diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 65a0a05b..7809fecd 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -3,7 +3,7 @@ variables: LC_ALL: C.UTF-8 GIT_STRATEGY: fetch DOCKER_CMD: docker --config="$HOME/.docker/$CI_JOB_ID/" - IMG_BASE: registry.labs.nic.cz/labs/bird + IMG_BASE: registry.nic.cz/labs/bird TOOLS_DIR: /var/lib/gitlab-runner/bird-tools stages: @@ -16,7 +16,7 @@ stages: stage: image allow_failure: true script: - - $DOCKER_CMD login -u gitlab-ci-token -p $CI_BUILD_TOKEN registry.labs.nic.cz + - $DOCKER_CMD login -u gitlab-ci-token -p $CI_BUILD_TOKEN registry.nic.cz # Make sure we refresh the base image if it updates (eg. security updates, etc) # If we do just the build, cache is always reused and the freshness of the # base image is never checked. However, pull always asks and updates the @@ -164,9 +164,9 @@ docker_ubuntu-20_04-amd64: IMG_NAME: "ubuntu-20.04-amd64" <<: *docker_build -docker_ubuntu-20_10-amd64: +docker_ubuntu-21_10-amd64: variables: - IMG_NAME: "ubuntu-20.10-amd64" + IMG_NAME: "ubuntu-21.10-amd64" <<: *docker_build # GPG error @@ -234,143 +234,143 @@ docker_opensuse-15.3-amd64: build-debian-8-amd64: <<: *build-linux - image: registry.labs.nic.cz/labs/bird:debian-8-amd64 + image: registry.nic.cz/labs/bird:debian-8-amd64 build-debian-8-i386: <<: *build-linux - image: registry.labs.nic.cz/labs/bird:debian-8-i386 + image: registry.nic.cz/labs/bird:debian-8-i386 build-debian-9-amd64: <<: *build-linux - image: registry.labs.nic.cz/labs/bird:debian-9-amd64 + image: registry.nic.cz/labs/bird:debian-9-amd64 build-debian-9-i386: <<: *build-linux - image: registry.labs.nic.cz/labs/bird:debian-9-i386 + image: registry.nic.cz/labs/bird:debian-9-i386 build-debian-10-amd64: <<: *build-linux - image: registry.labs.nic.cz/labs/bird:debian-10-amd64 + image: registry.nic.cz/labs/bird:debian-10-amd64 build-debian-10-i386: <<: *build-linux - image: registry.labs.nic.cz/labs/bird:debian-10-i386 + image: registry.nic.cz/labs/bird:debian-10-i386 build-debian-11-amd64: <<: *build-linux - image: registry.labs.nic.cz/labs/bird:debian-11-amd64 + image: registry.nic.cz/labs/bird:debian-11-amd64 #build-debian-11-i386: # <<: *build-linux -# image: registry.labs.nic.cz/labs/bird:debian-11-i386 +# image: registry.nic.cz/labs/bird:debian-11-i386 build-debian-testing-amd64: <<: *build-linux - image: registry.labs.nic.cz/labs/bird:debian-testing-amd64 + image: registry.nic.cz/labs/bird:debian-testing-amd64 #build-debian-testing-i386: # <<: *build-linux -# image: registry.labs.nic.cz/labs/bird:debian-testing-i386 +# image: registry.nic.cz/labs/bird:debian-testing-i386 build-fedora-25-amd64: <<: *build-linux - image: registry.labs.nic.cz/labs/bird:fedora-25-amd64 + image: registry.nic.cz/labs/bird:fedora-25-amd64 build-fedora-26-amd64: <<: *build-linux - image: registry.labs.nic.cz/labs/bird:fedora-26-amd64 + image: registry.nic.cz/labs/bird:fedora-26-amd64 build-fedora-27-amd64: <<: *build-linux - image: registry.labs.nic.cz/labs/bird:fedora-27-amd64 + image: registry.nic.cz/labs/bird:fedora-27-amd64 build-fedora-28-amd64: <<: *build-linux - image: registry.labs.nic.cz/labs/bird:fedora-28-amd64 + image: registry.nic.cz/labs/bird:fedora-28-amd64 build-fedora-29-amd64: <<: *build-linux - image: registry.labs.nic.cz/labs/bird:fedora-29-amd64 + image: registry.nic.cz/labs/bird:fedora-29-amd64 build-fedora-30-amd64: <<: *build-linux - image: registry.labs.nic.cz/labs/bird:fedora-30-amd64 + image: registry.nic.cz/labs/bird:fedora-30-amd64 build-fedora-31-amd64: <<: *build-linux - image: registry.labs.nic.cz/labs/bird:fedora-31-amd64 + image: registry.nic.cz/labs/bird:fedora-31-amd64 build-fedora-32-amd64: <<: *build-linux - image: registry.labs.nic.cz/labs/bird:fedora-32-amd64 + image: registry.nic.cz/labs/bird:fedora-32-amd64 build-fedora-33-amd64: <<: *build-linux - image: registry.labs.nic.cz/labs/bird:fedora-33-amd64 + image: registry.nic.cz/labs/bird:fedora-33-amd64 build-fedora-34-amd64: <<: *build-linux - image: registry.labs.nic.cz/labs/bird:fedora-33-amd64 + image: registry.nic.cz/labs/bird:fedora-33-amd64 build-centos-7-amd64: <<: *build-linux - image: registry.labs.nic.cz/labs/bird:centos-7-amd64 + image: registry.nic.cz/labs/bird:centos-7-amd64 build-centos-8-amd64: <<: *build-linux - image: registry.labs.nic.cz/labs/bird:centos-8-amd64 + image: registry.nic.cz/labs/bird:centos-8-amd64 build-ubuntu-14_04-amd64: <<: *build-linux - image: registry.labs.nic.cz/labs/bird:ubuntu-14.04-amd64 + image: registry.nic.cz/labs/bird:ubuntu-14.04-amd64 build-ubuntu-16_04-amd64: <<: *build-linux - image: registry.labs.nic.cz/labs/bird:ubuntu-16.04-amd64 + image: registry.nic.cz/labs/bird:ubuntu-16.04-amd64 build-ubuntu-18_04-amd64: <<: *build-linux - image: registry.labs.nic.cz/labs/bird:ubuntu-18.04-amd64 + image: registry.nic.cz/labs/bird:ubuntu-18.04-amd64 build-ubuntu-20_04-amd64: <<: *build-linux - image: registry.labs.nic.cz/labs/bird:ubuntu-20.04-amd64 + image: registry.nic.cz/labs/bird:ubuntu-20.04-amd64 -build-ubuntu-20_10-amd64: +build-ubuntu-21_10-amd64: <<: *build-linux - image: registry.labs.nic.cz/labs/bird:ubuntu-20.10-amd64 + image: registry.nic.cz/labs/bird:ubuntu-21.10-amd64 #build-ubuntu-21_04-amd64: # <<: *build-linux -# image: registry.labs.nic.cz/labs/bird:ubuntu-21.04-amd64 +# image: registry.nic.cz/labs/bird:ubuntu-21.04-amd64 build-opensuse-15.0-amd64: <<: *build-linux - image: registry.labs.nic.cz/labs/bird:opensuse-15.0-amd64 + image: registry.nic.cz/labs/bird:opensuse-15.0-amd64 build-opensuse-15.1-amd64: <<: *build-linux - image: registry.labs.nic.cz/labs/bird:opensuse-15.1-amd64 + image: registry.nic.cz/labs/bird:opensuse-15.1-amd64 build-opensuse-15.2-amd64: <<: *build-linux - image: registry.labs.nic.cz/labs/bird:opensuse-15.2-amd64 + image: registry.nic.cz/labs/bird:opensuse-15.2-amd64 build-opensuse-15.3-amd64: <<: *build-linux - image: registry.labs.nic.cz/labs/bird:opensuse-15.3-amd64 + image: registry.nic.cz/labs/bird:opensuse-15.3-amd64 -build-freebsd-11-amd64: - <<: *build-base - tags: - - freebsd - - amd64 +#build-freebsd-11-amd64: +# <<: *build-base +# tags: +# - freebsd +# - amd64 -build-freebsd-11-i386: - <<: *build-base - tags: - - freebsd - - i386 +#build-freebsd-11-i386: +# <<: *build-base +# tags: +# - freebsd +# - i386 .pkg-deb: &pkg-deb @@ -408,112 +408,113 @@ build-freebsd-11-i386: #pkg-debian-8-amd64: # <<: *pkg-deb # needs: [build-debian-8-amd64] -# image: registry.labs.nic.cz/labs/bird:debian-8-amd64 +# image: registry.nic.cz/labs/bird:debian-8-amd64 # Dpkg error: PATH is not set #pkg-debian-8-i386: # <<: *pkg-deb # needs: [build-debian-8-i386] -# image: registry.labs.nic.cz/labs/bird:debian-8-i386 +# image: registry.nic.cz/labs/bird:debian-8-i386 # Dpkg error: PATH is not set pkg-debian-9-amd64: <<: *pkg-deb needs: [build-debian-9-amd64] - image: registry.labs.nic.cz/labs/bird:debian-9-amd64 + image: registry.nic.cz/labs/bird:debian-9-amd64 # Dpkg error: PATH is not set pkg-debian-9-i386: <<: *pkg-deb needs: [build-debian-9-i386] - image: registry.labs.nic.cz/labs/bird:debian-9-i386 + image: registry.nic.cz/labs/bird:debian-9-i386 pkg-debian-10-amd64: <<: *pkg-deb needs: [build-debian-10-amd64] - image: registry.labs.nic.cz/labs/bird:debian-10-amd64 + image: registry.nic.cz/labs/bird:debian-10-amd64 pkg-debian-10-i386: <<: *pkg-deb needs: [build-debian-10-i386] - image: registry.labs.nic.cz/labs/bird:debian-10-i386 + image: registry.nic.cz/labs/bird:debian-10-i386 pkg-debian-11-amd64: <<: *pkg-deb needs: [build-debian-11-amd64] - image: registry.labs.nic.cz/labs/bird:debian-11-amd64 + image: registry.nic.cz/labs/bird:debian-11-amd64 pkg-fedora-30-amd64: <<: *pkg-rpm-wa needs: [build-fedora-30-amd64] - image: registry.labs.nic.cz/labs/bird:fedora-30-amd64 + image: registry.nic.cz/labs/bird:fedora-30-amd64 pkg-fedora-31-amd64: <<: *pkg-rpm-wa needs: [build-fedora-31-amd64] - image: registry.labs.nic.cz/labs/bird:fedora-31-amd64 + image: registry.nic.cz/labs/bird:fedora-31-amd64 pkg-fedora-32-amd64: <<: *pkg-rpm-wa needs: [build-fedora-32-amd64] - image: registry.labs.nic.cz/labs/bird:fedora-32-amd64 + image: registry.nic.cz/labs/bird:fedora-32-amd64 pkg-fedora-33-amd64: <<: *pkg-rpm-wa needs: [build-fedora-33-amd64] - image: registry.labs.nic.cz/labs/bird:fedora-33-amd64 + image: registry.nic.cz/labs/bird:fedora-33-amd64 pkg-fedora-34-amd64: <<: *pkg-rpm needs: [build-fedora-34-amd64] - image: registry.labs.nic.cz/labs/bird:fedora-34-amd64 + image: registry.nic.cz/labs/bird:fedora-34-amd64 pkg-centos-7-amd64: <<: *pkg-rpm-wa variables: LC_ALL: en_US.UTF-8 needs: [build-centos-7-amd64] - image: registry.labs.nic.cz/labs/bird:centos-7-amd64 + image: registry.nic.cz/labs/bird:centos-7-amd64 pkg-centos-8-amd64: <<: *pkg-rpm-wa needs: [build-centos-8-amd64] - image: registry.labs.nic.cz/labs/bird:centos-8-amd64 + image: registry.nic.cz/labs/bird:centos-8-amd64 pkg-ubuntu-18.04-amd64: <<: *pkg-deb needs: [build-ubuntu-18_04-amd64] - image: registry.labs.nic.cz/labs/bird:ubuntu-18.04-amd64 + image: registry.nic.cz/labs/bird:ubuntu-18.04-amd64 pkg-ubuntu-20.04-amd64: <<: *pkg-deb needs: [build-ubuntu-20_04-amd64] - image: registry.labs.nic.cz/labs/bird:ubuntu-20.04-amd64 + image: registry.nic.cz/labs/bird:ubuntu-20.04-amd64 -pkg-ubuntu-20.10-amd64: + +pkg-ubuntu-21.10-amd64: <<: *pkg-deb - needs: [build-ubuntu-20_10-amd64] - image: registry.labs.nic.cz/labs/bird:ubuntu-20.10-amd64 + needs: [build-ubuntu-21_10-amd64] + image: registry.nic.cz/labs/bird:ubuntu-21.10-amd64 #pkg-ubuntu-21.04-amd64: # <<: *pkg-deb # needs: [build-ubuntu-21_04-amd64] -# image: registry.labs.nic.cz/labs/bird:ubuntu-21.04-amd64 +# image: registry.nic.cz/labs/bird:ubuntu-21.04-amd64 pkg-opensuse-15.1-amd64: <<: *pkg-rpm-wa needs: [build-opensuse-15.1-amd64] - image: registry.labs.nic.cz/labs/bird:opensuse-15.1-amd64 + image: registry.nic.cz/labs/bird:opensuse-15.1-amd64 pkg-opensuse-15.2-amd64: <<: *pkg-rpm-wa needs: [build-opensuse-15.2-amd64] - image: registry.labs.nic.cz/labs/bird:opensuse-15.2-amd64 + image: registry.nic.cz/labs/bird:opensuse-15.2-amd64 pkg-opensuse-15.3-amd64: <<: *pkg-rpm-wa needs: [build-opensuse-15.3-amd64] - image: registry.labs.nic.cz/labs/bird:opensuse-15.3-amd64 + image: registry.nic.cz/labs/bird:opensuse-15.3-amd64 build-birdlab: @@ -582,6 +583,11 @@ test-ospf-custom: variables: TEST_NAME: cf-ospf-custom +test-ospf-area: + <<: *test-base + variables: + TEST_NAME: cf-ospf-area + test-ospf-vrf: <<: *test-base variables: @@ -636,3 +642,8 @@ test-babel-auth: <<: *test-base variables: TEST_NAME: cf-babel-auth + +test-rip-base: + <<: *test-base + variables: + TEST_NAME: cf-rip-base diff --git a/Makefile.in b/Makefile.in index e0ff4a1d..0d55807b 100644 --- a/Makefile.in +++ b/Makefile.in @@ -82,6 +82,9 @@ conf-lex-targets := $(addprefix $(objdir)/conf/,cf-lex.o) conf-y-targets := $(addprefix $(objdir)/conf/,cf-parse.y keywords.h commands.h) cf-local = $(conf-y-targets): $(s)config.Y +# nest/Makefile declarations needed for all other modules +proto-build-c := $(addprefix $(objdir)/nest/,proto-build.c) + src-o-files = $(patsubst %.c,$(o)%.o,$(src)) tests-target-files = $(patsubst %.c,$(o)%,$(tests_src)) @@ -95,6 +98,13 @@ else o = $(patsubst $(srcdir)%,$(objdir)%,$(s)) endif +define proto-build_in = +PROTO_BUILD += $(1) +$(proto-build-c): $(lastword $(MAKEFILE_LIST)) +endef + +proto-build = $(eval $(call proto-build_in,$(1))) + define clean_in = clean:: rm -f $(addprefix $(o),$(1)) diff --git a/NEWS b/NEWS index 4a85c365..c1e51af2 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,38 @@ +Version 2.0.9 (2022-02-09) + o BGP: Flowspec validation procedure + o Babel: MAC authentication support + o Routing table configuration blocks + o Optional prefix trie in routing table for faster LPM/interval queries + o CLI: New 'show route in ' command + o Filter: Faster (16-way) prefix sets + o Filter: MPLS label route attribute + o Filter: Operators to pick community components + o Filter: Operators to find minimum and maximum element of lists + o BGP: New 'free bind' option + o BGP: Log route updates that were changed to withdraws + o BGP: Improved 'invalid next hop' error reporting + o OSPF: Allow ifaces with host address as unnumbered PtP or PtMP ifaces + o OSPF: All packets on PtP networks should be sent to AllSPFRouters address + o Scripts for apkg-powered upstream packaging for deb and rpm + o Support for Blake2s and Blake2b hash functions + o Security keys / passwords can be entered in hexadecimal digits + o Memory statistics split into Effective and Overhead + o Linux: New option 'netlink rx buffer' to specify netlink socket buffer size + o BSD: Assume onlink flag on ifaces with only host addresses + o Many bugfixes + + Notes: + + For OSPF on PtP network, BIRD now sends all packets to multicast AllSPFRouters + address (as required in RFC 2328 8.1). This likely breaks setups with multiple + neighbors on a network configured as PtP, which worked in previous versions. + Such links should be configured as PtMP. + + Since Linux 5.3, netlink socket can be flooded by route cache entries during + route table scan. This version mitigates that issue by using strict netlink + filtering. + + Version 2.0.8 (2021-03-18) o Automatic channel reloads based on RPKI changes o Multiple static routes with the same network diff --git a/aclocal.m4 b/aclocal.m4 index 1613d680..58c48791 100644 --- a/aclocal.m4 +++ b/aclocal.m4 @@ -1,6 +1,32 @@ dnl ** Additional Autoconf tests for BIRD configure script dnl ** (c) 1999 Martin Mares +AC_DEFUN([BIRD_CHECK_POINTER_ALIGNMENT], +[ + AC_CACHE_CHECK( + [how pointers are aligned], + [bird_cv_pointer_alignment], + AC_COMPILE_IFELSE([ + AC_LANG_PROGRAM( + [ + _Static_assert(_Alignof(void *) == 8, "bad"); + ], [] + ) + ], + [bird_cv_pointer_alignment=8], + AC_COMPILE_IFELSE([ + AC_LANG_PROGRAM( + [ + _Static_assert(_Alignof(void *) == 4, "bad"); + ], [] + ) + ], + [bird_cv_pointer_alignment=4], + [bird_cv_pointer_alignment=unknown] + )) + ) +]) + AC_DEFUN([BIRD_CHECK_THREAD_LOCAL], [ AC_CACHE_CHECK( diff --git a/bird-gdb.py b/bird-gdb.py index 3cf65a9c..62c0ec87 100644 --- a/bird-gdb.py +++ b/bird-gdb.py @@ -25,7 +25,6 @@ class BIRDFValPrinter(BIRDPrinter): "T_ENUM_RTS": "i", "T_ENUM_BGP_ORIGIN": "i", "T_ENUM_SCOPE": "i", - "T_ENUM_RTC": "i", "T_ENUM_RTD": "i", "T_ENUM_ROA": "i", "T_ENUM_NETTYPE": "i", diff --git a/client/client.c b/client/client.c index 97cf6639..934e16e0 100644 --- a/client/client.c +++ b/client/client.c @@ -50,6 +50,7 @@ static byte *server_read_pos = server_read_buf; int init = 1; /* During intial sequence */ int busy = 1; /* Executing BIRD command */ int interactive; /* Whether stdin is terminal */ +int last_code; /* Last return code */ static int num_lines, skip_input; int term_lns, term_cls; @@ -196,7 +197,7 @@ init_commands(void) { /* Initial command is finished and we want to exit */ cleanup(); - exit(0); + exit((last_code < 8000) ? 0 : 1); } input_init(); @@ -283,6 +284,8 @@ server_got_reply(char *x) if (code) PRINTF(len, "%s\n", verbose ? x : x+5); + last_code = code; + if (x[4] == ' ') { busy = 0; diff --git a/conf/cf-lex.l b/conf/cf-lex.l index 704a1750..11bcdb18 100644 --- a/conf/cf-lex.l +++ b/conf/cf-lex.l @@ -42,7 +42,7 @@ #define PARSER 1 #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "filter/filter.h" #include "filter/f-inst.h" @@ -77,19 +77,22 @@ static uint cf_hash(const byte *c); #define SYM_NEXT(n) n->next #define SYM_EQ(a,s1,b,s2) !strcmp(a,b) && s1 == s2 #define SYM_FN(k,s) cf_hash(k) -#define SYM_ORDER 6 /* Initial */ +#define SYM_ORDER 4 /* Initial */ #define SYM_REHASH sym_rehash -#define SYM_PARAMS /8, *1, 2, 2, 6, 20 +#define SYM_PARAMS /8, *1, 2, 2, 4, 20 HASH_DEFINE_REHASH_FN(SYM, struct symbol) HASH(struct keyword) kw_hash; - +HASH(struct ea_class) ea_name_hash; struct sym_scope *conf_this_scope; +static struct sym_scope global_root_scope__init = { .active = 1, }; +struct sym_scope *global_root_scope = &global_root_scope__init; + linpool *cfg_mem; int (*cf_read_hook)(byte *buf, unsigned int max, int fd); @@ -255,7 +258,7 @@ WHITE [ \t] return IP4; } -{XIGIT}{2}(:{XIGIT}{2}|{XIGIT}{2}){15,} { +{XIGIT}{2}((:{XIGIT}{2}){15,}|({XIGIT}{2}){15,}) { char *s = yytext; size_t len = 0, i; struct bytestring *bytes; @@ -347,7 +350,7 @@ else: { return DDOT; } -[={}:;,.()+*/%<>~\[\]?!\|-] { +[={}:;,.()+*/%<>~\[\]?!\|&-] { return yytext[0]; } @@ -587,41 +590,58 @@ cf_new_symbol(const byte *c) *s = (struct symbol) { .scope = conf_this_scope, .class = SYM_VOID, }; strcpy(s->name, c); - if (!new_config->sym_hash.data) - HASH_INIT(new_config->sym_hash, new_config->pool, SYM_ORDER); + if (!conf_this_scope->hash.data) + HASH_INIT(conf_this_scope->hash, new_config->pool, SYM_ORDER); - HASH_INSERT2(new_config->sym_hash, SYM, new_config->pool, s); + HASH_INSERT2(conf_this_scope->hash, SYM, new_config->pool, s); - add_tail(&(new_config->symbols), &(s->n)); + if (conf_this_scope == new_config->root_scope) + add_tail(&(new_config->symbols), &(s->n)); return s; } +static struct symbol * +cf_root_symbol(const byte *c) +{ + uint l = strlen(c); + if (l > SYM_MAX_LEN) + bug("Root symbol %s too long", c); + + struct symbol *s = mb_alloc(&root_pool, sizeof(struct symbol) + l + 1); + *s = (struct symbol) { .scope = global_root_scope, .class = SYM_VOID, }; + memcpy(s->name, c, l+1); + + if (!global_root_scope->hash.data) + HASH_INIT(global_root_scope->hash, &root_pool, SYM_ORDER); + + HASH_INSERT2(global_root_scope->hash, SYM, &root_pool, s); + return s; +} + + /** - * cf_find_symbol - find a symbol by name - * @cfg: specificed config + * cf_find_symbol_scope - find a symbol by name + * @scope: config scope * @c: symbol name * - * This functions searches the symbol table in the config @cfg for a symbol of - * given name. First it examines the current scope, then the second recent one + * This functions searches the symbol table in the scope @scope for a symbol of + * given name. First it examines the current scope, then the underlying one * and so on until it either finds the symbol and returns a pointer to its * &symbol structure or reaches the end of the scope chain and returns %NULL to * signify no match. */ struct symbol * -cf_find_symbol(const struct config *cfg, const byte *c) +cf_find_symbol_scope(const struct sym_scope *scope, const byte *c) { struct symbol *s; - if (cfg->sym_hash.data && - (s = HASH_FIND(cfg->sym_hash, SYM, c, 1))) - return s; - - /* In CLI command parsing, fallback points to the current config, otherwise it is NULL. */ - if (cfg->fallback && - cfg->fallback->sym_hash.data && - (s = HASH_FIND(cfg->fallback->sym_hash, SYM, c, 1))) - return s; + /* Find the symbol here or anywhere below */ + while (scope) + if (scope->hash.data && (s = HASH_FIND(scope->hash, SYM, c, 1))) + return s; + else + scope = scope->next; return NULL; } @@ -638,7 +658,7 @@ cf_find_symbol(const struct config *cfg, const byte *c) struct symbol * cf_get_symbol(const byte *c) { - return cf_find_symbol(new_config, c) ?: cf_new_symbol(c); + return cf_find_symbol_scope(conf_this_scope, c) ?: cf_new_symbol(c); } /** @@ -654,7 +674,7 @@ cf_localize_symbol(struct symbol *sym) /* If the symbol type is void, it has been recently allocated just in this scope. */ if (!sym->class) return sym; - + /* If the scope is the current, it is already defined in this scope. */ if (sym->scope == conf_this_scope) cf_error("Symbol already defined"); @@ -689,10 +709,7 @@ cf_lex_symbol(const char *data) struct symbol *sym = cf_get_symbol(data); cf_lval.s = sym; - if (sym->class != SYM_VOID) - return CF_SYM_KNOWN; - - /* Is it a keyword? */ + /* Is it a keyword? Prefer the keyword. */ struct keyword *k = HASH_FIND(kw_hash, KW, data); if (k) { @@ -705,9 +722,11 @@ cf_lex_symbol(const char *data) } } - /* OK, undefined symbol */ - cf_lval.s = sym; - return CF_SYM_UNDEFINED; + /* OK, only a symbol. */ + if (sym->class == SYM_VOID) + return CF_SYM_UNDEFINED; + else + return CF_SYM_KNOWN; } static void @@ -720,6 +739,34 @@ cf_lex_init_kh(void) HASH_INSERT(kw_hash, KW, k); } +void +ea_lex_register(struct ea_class *def) +{ + struct symbol *sym = cf_root_symbol(def->name); + sym->class = SYM_ATTRIBUTE; + sym->attribute = def; + def->sym = sym; +} + +void +ea_lex_unregister(struct ea_class *def) +{ + struct symbol *sym = def->sym; + HASH_REMOVE2(global_root_scope->hash, SYM, &root_pool, sym); + mb_free(sym); + def->sym = NULL; +} + +struct ea_class * +ea_class_find_by_name(const char *name) +{ + struct symbol *sym = cf_find_symbol(global_root_scope, name); + if (!sym || (sym->class != SYM_ATTRIBUTE)) + return NULL; + else + return sym->attribute; +} + /** * cf_lex_init - initialize the lexer * @is_cli: true if we're going to parse CLI command, false for configuration @@ -753,6 +800,11 @@ cf_lex_init(int is_cli, struct config *c) c->root_scope = cfg_allocz(sizeof(struct sym_scope)); conf_this_scope = c->root_scope; conf_this_scope->active = 1; + + if (is_cli) + conf_this_scope->next = config->root_scope; + else + conf_this_scope->next = global_root_scope; } /** diff --git a/conf/conf.c b/conf/conf.c index 58abcde1..580a6472 100644 --- a/conf/conf.c +++ b/conf/conf.c @@ -46,7 +46,7 @@ #undef LOCAL_DEBUG #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "nest/iface.h" #include "lib/resource.h" @@ -168,7 +168,6 @@ int cli_parse(struct config *c) { int done = 0; - c->fallback = config; new_config = c; cfg_mem = c->mem; if (setjmp(conf_jmpbuf)) @@ -179,7 +178,6 @@ cli_parse(struct config *c) done = 1; cleanup: - c->fallback = NULL; new_config = NULL; cfg_mem = NULL; return done; @@ -520,6 +518,8 @@ order_shutdown(int gr) memcpy(c, config, sizeof(struct config)); init_list(&c->protos); init_list(&c->tables); + init_list(&c->symbols); + memset(c->def_tables, 0, sizeof(c->def_tables)); c->shutdown = 1; c->gr_down = gr; diff --git a/conf/conf.h b/conf/conf.h index 55cb9c58..4ccaa54e 100644 --- a/conf/conf.h +++ b/conf/conf.h @@ -16,7 +16,6 @@ #include "lib/timer.h" /* Configuration structure */ - struct config { pool *pool; /* Pool the configuration is stored in */ linpool *mem; /* Linear pool containing configuration data */ @@ -45,6 +44,7 @@ struct config { int cli_debug; /* Tracing of CLI connections and commands */ int latency_debug; /* I/O loop tracks duration of each event */ + int pipe_debug; /* Track route propagation through pipes */ u32 latency_limit; /* Events with longer duration are logged (us) */ u32 watchdog_warning; /* I/O loop watchdog limit for warning (us) */ u32 watchdog_timeout; /* Watchdog timeout (in seconds, 0 = disabled) */ @@ -54,8 +54,7 @@ struct config { char *err_file_name; /* File name containing error */ char *file_name; /* Name of main configuration file */ int file_fd; /* File descriptor of main configuration file */ - HASH(struct symbol) sym_hash; /* Lexer: symbol hash table */ - struct config *fallback; /* Link to regular config for CLI parsing */ + struct sym_scope *root_scope; /* Scope for root symbols */ int obstacle_count; /* Number of items blocking freeing of this config */ int shutdown; /* This is a pseudo-config for daemon shutdown */ @@ -122,7 +121,7 @@ struct symbol { const struct f_line *function; /* For SYM_FUNCTION */ const struct filter *filter; /* For SYM_FILTER */ struct rtable_config *table; /* For SYM_TABLE */ - struct f_dynamic_attr *attribute; /* For SYM_ATTRIBUTE */ + struct ea_class *attribute; /* For SYM_ATTRIBUTE */ struct f_val *val; /* For SYM_CONSTANT */ uint offset; /* For SYM_VARIABLE */ }; @@ -133,10 +132,15 @@ struct symbol { struct sym_scope { struct sym_scope *next; /* Next on scope stack */ struct symbol *name; /* Name of this scope */ + + HASH(struct symbol) hash; /* Local symbol hash */ + uint slots; /* Variable slots */ int active; /* Currently entered */ }; +extern struct sym_scope *global_root_scope; + struct bytestring { size_t length; byte data[]; @@ -185,7 +189,14 @@ int cf_lex(void); void cf_lex_init(int is_cli, struct config *c); void cf_lex_unwind(void); -struct symbol *cf_find_symbol(const struct config *cfg, const byte *c); +struct symbol *cf_find_symbol_scope(const struct sym_scope *scope, const byte *c); +static inline struct symbol *cf_find_symbol_cfg(const struct config *cfg, const byte *c) +{ return cf_find_symbol_scope(cfg->root_scope, c); } + +#define cf_find_symbol(where, what) _Generic(*(where), \ + struct config: cf_find_symbol_cfg, \ + struct sym_scope: cf_find_symbol_scope \ + )((where), (what)) struct symbol *cf_get_symbol(const byte *c); struct symbol *cf_default_name(char *template, int *counter); diff --git a/conf/confbase.Y b/conf/confbase.Y index 6985783b..9a83083c 100644 --- a/conf/confbase.Y +++ b/conf/confbase.Y @@ -18,7 +18,7 @@ CF_HDR #include "lib/string.h" #include "nest/protocol.h" #include "nest/iface.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/bfd.h" #include "nest/cli.h" #include "filter/filter.h" @@ -71,8 +71,9 @@ CF_DECLS } xp; enum filter_return fret; enum ec_subtype ecs; - struct f_dynamic_attr fda; + struct ea_class *ea_class; struct f_static_attr fsa; + struct f_attr_bit fab; struct f_lval flv; struct f_line *fl; const struct filter *f; @@ -91,7 +92,7 @@ CF_DECLS struct proto_spec ps; struct channel_limit cl; struct timeformat *tf; - mpls_label_stack *mls; + struct adata *ad; struct bytestring *bs; } @@ -112,14 +113,15 @@ CF_DECLS %type ipa %type net_ip4_ net_ip6_ net_ip6 net_ip_ net_ip net_or_ipa %type net_ net_any net_vpn4_ net_vpn6_ net_vpn_ net_roa4_ net_roa6_ net_roa_ net_ip6_sadr_ net_mpls_ -%type label_stack_start label_stack +%type label_stack_start label_stack %type text opttext -%type symbol +%type symbol symbol_known toksym %nonassoc PREFIX_DUMMY %left AND OR %nonassoc '=' '<' '>' '~' GEQ LEQ NEQ NMA PO PC +%left '|' '&' %left '+' '-' %left '*' '/' '%' %left '!' @@ -151,16 +153,16 @@ conf: definition ; definition: DEFINE symbol '=' term ';' { - struct f_val *val = cfg_allocz(sizeof(struct f_val)); - if (f_eval(f_linearize($4), cfg_mem, val) > F_RETURN) cf_error("Runtime error"); - cf_define_symbol($2, SYM_CONSTANT | val->type, val, val); + struct f_val val; + if (f_eval(f_linearize($4), &val) > F_RETURN) cf_error("Runtime error"); + cf_define_symbol($2, SYM_CONSTANT | val.type, val, lp_val_copy(cfg_mem, &val)); } ; expr: NUM | '(' term ')' { $$ = f_eval_int(f_linearize($2)); } - | CF_SYM_KNOWN { + | symbol_known { if ($1->class != (SYM_CONSTANT | T_INT)) cf_error("Number constant expected"); $$ = SYM_VAL($1).i; } ; @@ -171,7 +173,9 @@ expr_us: | expr US { $$ = $1 US_; } ; -symbol: CF_SYM_UNDEFINED | CF_SYM_KNOWN ; +toksym: FROM ; +symbol: CF_SYM_UNDEFINED | CF_SYM_KNOWN | toksym ; +symbol_known: CF_SYM_KNOWN | toksym ; /* Switches */ @@ -347,17 +351,19 @@ net_or_ipa: label_stack_start: NUM { - $$ = cfg_allocz(sizeof(mpls_label_stack)); - $$->len = 1; - $$->stack[0] = $1; + $$ = cfg_allocz(ADATA_SIZE(MPLS_MAX_LABEL_STACK * sizeof(u32))); + $$->length = sizeof(u32); + *((u32 *)$$->data) = $1; }; label_stack: label_stack_start | label_stack '/' NUM { - if ($1->len >= MPLS_MAX_LABEL_STACK) + if ($1->length >= MPLS_MAX_LABEL_STACK * sizeof(u32)) cf_error("Too many labels in stack"); - $1->stack[$1->len++] = $3; + + *((u32 *)($$->data + $1->length)) = $3; + $1->length += sizeof(u32); $$ = $1; } ; diff --git a/conf/gen_keywords.m4 b/conf/gen_keywords.m4 index 0c1dc545..53226e4d 100644 --- a/conf/gen_keywords.m4 +++ b/conf/gen_keywords.m4 @@ -26,8 +26,7 @@ m4_define(CF_DEFINES, `m4_divert(-1)') m4_define(CF_handle_kw, `m4_divert(1){ "m4_translit($1,[[A-Z]],[[a-z]])", $1, NULL }, m4_divert(-1)') m4_define(CF_keywd, `m4_ifdef([[CF_tok_$1]],,[[m4_define([[CF_tok_$1]],1)CF_handle_kw($1)]])') -m4_define(CF_KEYWORDS, `m4_define([[CF_toks]],[[]])CF_iterate([[CF_keywd]], [[$@]])m4_ifelse(CF_toks,,,%token[[]]CF_toks -)DNL') +m4_define(CF_KEYWORDS, `CF_iterate([[CF_keywd]], [[$@]])DNL') # CLI commands generate keywords as well m4_define(CF_CLI, `CF_KEYWORDS(m4_translit($1, [[ ]], [[,]])) diff --git a/conf/gen_parser.m4 b/conf/gen_parser.m4 index 5b378a93..af4b1455 100644 --- a/conf/gen_parser.m4 +++ b/conf/gen_parser.m4 @@ -31,7 +31,7 @@ m4_define(CF_iterate, `m4_define([[CF_iter]], m4_defn([[$1]]))CF_itera($2)') # Keywords act as untyped %token m4_define(CF_keywd, `m4_ifdef([[CF_tok_$1]],,[[m4_define([[CF_tok_$1]],1)m4_define([[CF_toks]],CF_toks $1)]])') -m4_define(CF_KEYWORDS, `m4_define([[CF_toks]],[[]])CF_iterate([[CF_keywd]], [[$@]])m4_ifelse(CF_toks,,,%token[[]]CF_toks +m4_define(CF_KEYWORDS, `m4_define([[CF_toks]],[[]])CF_iterate([[CF_keywd]], [[$@]])m4_ifelse(CF_toks,,,%token[[]]CF_toks )DNL') # CLI commands diff --git a/configure.ac b/configure.ac index 64181d29..321bed95 100644 --- a/configure.ac +++ b/configure.ac @@ -360,6 +360,13 @@ AC_C_BIGENDIAN( [AC_MSG_ERROR([Cannot determine CPU endianity.])] ) +BIRD_CHECK_POINTER_ALIGNMENT +if test "$bird_cv_pointer_alignment" = "unknown" ; then + AC_MSG_ERROR([Couldn't determine pointer alignment]) +else + AC_DEFINE_UNQUOTED([CPU_POINTER_ALIGNMENT], [$bird_cv_pointer_alignment], [Pointer alignment for macro usage]) +fi + BIRD_CHECK_ANDROID_GLOB if test "$bird_cv_lib_glob" = no ; then AC_MSG_ERROR([glob.h not found.]) diff --git a/distro/pkg/rpm/bird.service b/distro/pkg/rpm/bird.service index fa203c78..26bcb8a0 100644 --- a/distro/pkg/rpm/bird.service +++ b/distro/pkg/rpm/bird.service @@ -5,8 +5,9 @@ After=network.target [Service] Type=simple +ExecStartPre=/usr/sbin/bird -p ExecStart=/usr/sbin/bird -f -u bird -g bird -ExecReload=/bin/kill -HUP $MAINPID +ExecReload=/usr/sbin/birdc configure Restart=on-failure [Install] diff --git a/doc/bird.sgml b/doc/bird.sgml index a2138b55..d17de23f 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -14,7 +14,7 @@ configuration - something in config which is not keyword. (set-fill-column 80) - Copyright 1999,2000 Pavel Machek , distribute under GPL version 2 or later. + Copyright 1999 - 2022 CZ.NIC, z.s.p.o , distribute under GPL version 2 or later. --> @@ -23,7 +23,6 @@ configuration - something in config which is not keyword. BIRD 2.0 User's Guide <author> Ondrej Filip <it/<feela@network.cz>/, -Pavel Machek <it/<pavel@ucw.cz>/, Martin Mares <it/<mj@ucw.cz>/, Maria Matejka <it/<mq@jmq.cz>/, Ondrej Zajicek <it/<santiago@crfreenet.org>/ @@ -145,13 +144,6 @@ BIRD executable by configuring out routing protocols you don't use, and <p>You can pass several command-line options to bird: <descrip> - <tag><label id="argv-block">-B <m/exp/</tag> - allocate memory using 2^<cf/exp/ byte sized blocks; - if you're expecting high memory load, raise this to - reduce number of allocated memory pages. For a million routes - in one table, the recommended setting is 18. - Default is your system page size, typically 12 for 4096 bytes. - <tag><label id="argv-config">-c <m/config name/</tag> use given configuration file instead of <it/prefix/<file>/etc/bird.conf</file>. @@ -259,16 +251,9 @@ The global best route selection algorithm is (roughly) as follows: </itemize> <p><label id="dsc-table-sorted">Usually, a routing table just chooses a selected -route from a list of entries for one network. But if the <cf/sorted/ option is -activated, these lists of entries are kept completely sorted (according to -preference or some protocol-dependent metric). This is needed for some features -of some protocols (e.g. <cf/secondary/ option of BGP protocol, which allows to -accept not just a selected route, but the first route (in the sorted list) that -is accepted by filters), but it is incompatible with some other features (e.g. -<cf/deterministic med/ option of BGP protocol, which activates a way of choosing -selected route that cannot be described using comparison and ordering). Minor -advantage is that routes are shown sorted in <cf/show route/, minor disadvantage -is that it is slightly more computationally expensive. +route from a list of entries for one network. Optionally, these lists of entries +are kept completely sorted (according to preference or some protocol-dependent +metric). See <ref id="rtable-sorted" name="sorted"> table option for details. <sect>Routes and network types <label id="routes"> @@ -635,18 +620,73 @@ include "tablename.conf";; <cf/protocol/ times, and the <cf/iso long ms/ format for <cf/base/ and <cf/log/ times. - <tag><label id="opt-table"><m/nettype/ table <m/name/ [sorted]</tag> - Create a new routing table. The default routing tables <cf/master4/ and - <cf/master6/ are created implicitly, other routing tables have to be - added by this command. Option <cf/sorted/ can be used to enable sorting - of routes, see <ref id="dsc-table-sorted" name="sorted table"> - description for details. + <tag><label id="opt-table"><m/nettype/ table <m/name/ [ { <m/option/; [<m/.../] } ]</tag> + Define a new routing table. The default routing tables <cf/master4/ and + <cf/master6/ are defined implicitly, other routing tables have to be + defined by this option. See the <ref id="rtable-opts" + name="routing table configuration section"> for routing table options. <tag><label id="opt-eval">eval <m/expr/</tag> Evaluates given filter expression. It is used by the developers for testing of filters. </descrip> +<sect>Routing table options +<label id="rtable-opts"> + +<p>Most routing tables do not need any options and are defined without an option +block, but there are still some options to tweak routing table behavior. Note +that implicit tables (<cf/master4/ and <cf/master6/) can be redefined in order +to set options. + +<descrip> + <tag><label id="rtable-sorted">sorted <m/switch/</tag> + Usually, a routing table just chooses the selected (best) route from a + list of routes for each network, while keeping remaining routes unsorted. + If enabled, these lists of routes are kept completely sorted (according + to preference or some protocol-dependent metric). + + This is needed for some protocol features (e.g. <cf/secondary/ option of + BGP protocol, which allows to accept not just a selected route, but the + first route (in the sorted list) that is accepted by filters), but it is + incompatible with some other features (e.g. <cf/deterministic med/ + option of BGP protocol, which activates a way of choosing selected route + that cannot be described using comparison and ordering). Minor advantage + is that routes are shown sorted in <cf/show route/, minor disadvantage + is that it is slightly more computationally expensive. Default: off. + + <tag><label id="rtable-trie">trie <m/switch/</tag> + BIRD routing tables are implemented with hash tables, which is efficient + for exact-match lookups, but inconvenient for longest-match lookups or + interval lookups (finding superprefix or subprefixes). This option + activates additional trie structure that is used to accelerate these + lookups, while using the hash table for exact-match lookups. + + This has advantage for <ref id="rpki" name="RPKI"> (on ROA tables), + for <ref id="bgp-gateway" name="recursive next-hops"> (on IGP tables), + and is required for <ref id="bgp-validate" name="flowspec validation"> + (on base IP tables). Another advantage is that interval results (like + from <cf/show route in .../ command) are lexicographically sorted. The + disadvantage is that trie-enabled routing tables require more memory, + which may be an issue especially in multi-table setups. Default: off. + + <tag><label id="rtable-min-settle-time">min settle time <m/time/</tag> + Specify a minimum value of the settle time. When a ROA table changes, + automatic <ref id="proto-rpki-reload" name="RPKI reload"> may be + triggered, after a short settle time. Minimum settle time is a delay + from the last ROA table change to wait for more updates. Default: 1 s. + + + <tag><label id="rtable-max-settle-time">max settle time <m/time/</tag> + Specify a maximum value of the settle time. When a ROA table changes, + automatic <ref id="proto-rpki-reload" name="RPKI reload"> may be + triggered, after a short settle time. Maximum settle time is an upper + limit to the settle time from the initial ROA table change even if + there are consecutive updates gradually renewing the settle time. + Default: 20 s. +</descrip> + + <sect>Protocol options <label id="protocol-opts"> @@ -1299,6 +1339,9 @@ in the foot). The same syntax can also be used to construct a pair from two arbitrary integer expressions (for example <cf/(1+2,a)/). + Operators <cf/.asn/ and <cf/.data/ can be used to extract corresponding + components of a pair: <cf>(<m/asn/, <m/data/)</cf>. + <tag><label id="type-quad">quad</tag> This is a dotted quad of numbers used to represent router IDs (and others). Each component can have a value from 0 to 255. Literals of @@ -1389,6 +1432,10 @@ in the foot). pairs, LCs can be constructed using expressions for its parts, (e.g. <cf/(myas, 10+20, 3*10)/, where <cf/myas/ is an integer variable). + Operators <cf/.asn/, <cf/.data1/, and <cf/.data2/ can be used + to extract corresponding components of LCs: + <cf>(<m/asn/, <m/data1/, <m/data2/)</cf>. + <tag><label id="type-set">int|pair|quad|ip|prefix|ec|lc|enum set</tag> Filters recognize four types of sets. Sets are similar to strings: you can pass them around but you can't modify them. Literals of type <cf>int @@ -1532,7 +1579,7 @@ in the foot). Clist is similar to a set, except that unlike other sets, it can be modified. The type is used for community list (a set of pairs) and for cluster list (a set of quads). There exist no literals of this type. - There are three special operators on clists: + There are special operators on clists: <cf><m/C/.len</cf> returns the length of clist <m/C/. @@ -1559,6 +1606,15 @@ in the foot). <cf><m/C/.add(<m/P/);</cf> if <m/C/ is appropriate route attribute (for example <cf/bgp_community/). Similarly for <cf/delete/ and <cf/filter/. + <cf><m/C/.min</cf> returns the minimum element of clist <m/C/. + + <cf><m/C/.max</cf> returns the maximum element of clist <m/C/. + + Operators <cf/.min/, <cf/.max/ can be used together with <cf/filter/ + to extract the community from the specific subset of communities + (e.g. localpref or prepend) without the need to check every possible + value (e.g. <cf/filter(bgp_community, [(23456, 1000..1099)]).min/). + <tag><label id="type-eclist">eclist</tag> Eclist is a data type used for BGP extended community lists. Eclists are very similar to clists, but they are sets of ECs instead of pairs. @@ -1667,17 +1723,8 @@ Common route attributes are: primary key of the routing table. Read-only. (See the <ref id="routes" name="chapter about routes">.) - <tag><label id="rta-scope"><m/enum/ scope</tag> - The scope of the route. Possible values: <cf/SCOPE_HOST/ for routes - local to this host, <cf/SCOPE_LINK/ for those specific for a physical - link, <cf/SCOPE_SITE/ and <cf/SCOPE_ORGANIZATION/ for private routes and - <cf/SCOPE_UNIVERSE/ for globally visible routes. This attribute is not - interpreted by BIRD and can be used to mark routes in filters. The - default value for new routes is <cf/SCOPE_UNIVERSE/. - <tag><label id="rta-preference"><m/int/ preference</tag> - Preference of the route. Valid values are 0-65535. (See the chapter - about routing tables.) + Preference of the route. <tag><label id="rta-from"><m/ip/ from</tag> The router which the route has originated from. @@ -2097,6 +2144,13 @@ protocol bfd [<name>] { to configure separate BFD protocol instances for IPv4 and for IPv6 sessions. + <tag><label id="bfd-strict-bind">strict bind <m/switch/</tag> + Specify whether each BFD interface should use a separate listening + socket bound to its local address, or just use a shared listening socket + accepting all addresses. Binding to a specific address could be useful + in cases like running multiple BIRD instances on a machine, each + handling a different set of interfaces. Default: disabled. + <tag><label id="bfd-iface">interface <m/pattern/ [, <m/.../] { <m/options/ }</tag> Interface definitions allow to specify options for sessions associated with such interfaces and also may contain interface specific options. @@ -2281,6 +2335,7 @@ avoid routing loops. <item> <rfc id="8092"> - BGP Large Communities Attribute <item> <rfc id="8203"> - BGP Administrative Shutdown Communication <item> <rfc id="8212"> - Default EBGP Route Propagation Behavior without Policies +<item> <rfc id="9117"> - Revised Validation Procedure for BGP Flow Specifications </itemize> <sect1>Route selection rules @@ -2403,6 +2458,12 @@ using the following configuration parameters: same address family and using the same local port) should have set <cf/strict bind/, or none of them. Default: disabled. + <tag><label id="bgp-free-bind">free bind <m/switch/</tag> + Use IP_FREEBIND socket option for the listening socket, which allows + binding to an IP address not (yet) assigned to an interface. Note that + all BGP instances that share a listening socket should have the same + value of the <cf/freebind/ option. Default: disabled. + <tag><label id="bgp-check-link">check link <M>switch</M></tag> BGP could use hardware link state into consideration. If enabled, BIRD tracks the link state of the associated interface and when link @@ -2614,13 +2675,6 @@ using the following configuration parameters: disabled. Default: on, with automatic fallback to off when received capability-related error. - <tag><label id="bgp-advertise-ipv4">advertise ipv4 <m/switch/</tag> - Advertise IPv4 multiprotocol capability. This is not a correct behavior - according to the strict interpretation of <rfc id="4760">, but it is - widespread and required by some BGP implementations (Cisco and Quagga). - This option is relevant to IPv4 mode with enabled capability - advertisement only. Default: on. - <tag><label id="bgp-advertise-hostname">advertise hostname <m/switch/</tag> Advertise hostname capability along with the hostname. Default: off. @@ -2666,7 +2720,7 @@ using the following configuration parameters: <tag><label id="bgp-error-wait-time">error wait time <m/number/,<m/number/</tag> Minimum and maximum delay in seconds between a protocol failure (either - local or reported by the peer) and automatic restart. Doesn't apply + local or reported by the peer) and automatic restart. Doesn not apply when <cf/disable after error/ is configured. If consecutive errors happen, the delay is increased exponentially until it reaches the maximum. Default: 60, 300. @@ -2844,6 +2898,31 @@ be used in explicit configuration. explicitly (to conserve memory). This option requires that the connected routing table is <ref id="dsc-table-sorted" name="sorted">. Default: off. + <tag><label id="bgp-validate">validate <m/switch/</tag> + Apply flowspec validation procedure as described in <rfc id="8955"> + section 6 and <rfc id="9117">. The Validation procedure enforces that + only routers in the forwarding path for a network can originate flowspec + rules for that network. The validation procedure should be used for EBGP + to prevent injection of malicious flowspec rules from outside, but it + should also be used for IBGP to ensure that selected flowspec rules are + consistent with selected IP routes. The validation procedure uses an IP + routing table (<ref id="bgp-base-table" name="base table">, see below) + against which flowspec rules are validated. This option is limited to + flowspec channels. Default: off (for compatibility reasons). + + Note that currently the flowspec validation does not work reliably + together with <ref id="bgp-import-table" name="import table"> option + enabled on flowspec channels. + + <tag><label id="bgp-base-table">base table <m/name/</tag> + Specifies an IP table used for the flowspec validation procedure. The + table must have enabled <cf/trie/ option, otherwise the validation + procedure would not work. The type of the table must be <cf/ipv4/ for + <cf/flow4/ channels and <cf/ipv6/ for <cf/flow6/ channels. This option + is limited to flowspec channels. Default: the main table of the + <cf/ipv4/ / <cf/ipv6/ channel of the same BGP instance, or the + <cf/master4/ / <cf/master6/ table if there is no such channel. + <tag><label id="bgp-extended-next-hop">extended next hop <m/switch/</tag> BGP expects that announced next hops have the same address family as associated network prefixes. This option provides an extension to use @@ -3240,6 +3319,12 @@ channels. allows to specify a limit on maximal number of nexthops in one route. By default, multipath merging is disabled. If enabled, default value of the limit is 16. + + <tag><label id="krt-netlink-rx-buffer">netlink rx buffer <m/number/</tag> (Linux) + Set kernel receive buffer size (in bytes) for the netlink socket. The default + value is OS-dependent (from the <file>/proc/sys/net/core/rmem_default</file> + file), If you get some "Kernel dropped some netlink message ..." warnings, + you may increase this value. </descrip> <sect1>Attributes @@ -4124,6 +4209,14 @@ include standard channel config options; see the example below. <tag><label id="pipe-peer-table">peer table <m/table/</tag> Defines secondary routing table to connect to. The primary one is selected by the <cf/table/ keyword. + + <tag><label id="pipe-max-generation">max generation <m/expr/</tag> + Sets maximal generation of route that may pass through this pipe. + The generation value is increased by one by each pipe on its path. + Not meeting this requirement causes an error message complaining about + an overpiped route. If you have long chains of pipes, you probably want + to raise this value; anyway the default of 16 should be enough for even + most strange uses. Maximum is 254. </descrip> <sect1>Attributes @@ -5157,7 +5250,7 @@ Note that for negated matches, value must be either zero or equal to bitmask <cf>port 1..1023,1194,3306</cf>). <tag><label id="flow-dport">dport <m/numbers-match/</tag> - Set a mating destination port numbers (e.g. <cf>dport 49151</cf>). + Set a matching destination port numbers (e.g. <cf>dport 49151</cf>). <tag><label id="flow-sport">sport <m/numbers-match/</tag> Set a matching source port numbers (e.g. <cf>sport = 0</cf>). @@ -5348,15 +5441,15 @@ name="atrey.karlin.mff.cuni.cz:/pub/rfc">). </book> <!-- -LocalWords: GPL IPv GateD BGPv RIPv OSPFv Linux sgml html dvi sgmltools Pavel +LocalWords: GPL IPv GateD BGPv RIPv OSPFv Linux sgml html dvi sgmltools LocalWords: linuxdoc dtd descrip config conf syslog stderr auth ospf bgp Mbps LocalWords: router's eval expr num birdc ctl UNIX if's enums bool int ip GCC LocalWords: len ipaddress pxlen netmask enum bgppath bgpmask clist gw md eth -LocalWords: RTS printn quitbird iBGP AS'es eBGP RFC multiprotocol IGP Machek +LocalWords: RTS printn quitbird iBGP AS'es eBGP RFC multiprotocol IGP LocalWords: EGP misconfigurations keepalive pref aggr aggregator BIRD's RTC LocalWords: OS'es AS's multicast nolisten misconfigured UID blackhole MRTD MTU LocalWords: uninstalls ethernets IP binutils ANYCAST anycast dest RTD ICMP rfc LocalWords: compat multicasts nonbroadcast pointopoint loopback sym stats LocalWords: Perl SIGHUP dd mm yy HH MM SS EXT IA UNICAST multihop Discriminator txt -LocalWords: proto wildcard Ondrej Filip +LocalWords: proto wildcard --> diff --git a/doc/prog-head.sgml b/doc/prog-head.sgml index 0eec367e..4daca3a3 100644 --- a/doc/prog-head.sgml +++ b/doc/prog-head.sgml @@ -12,9 +12,9 @@ <title>BIRD Programmer's Documentation <author> Ondrej Filip <it/<feela@network.cz>/, -Pavel Machek <it/<pavel@ucw.cz>/, Martin Mares <it/<mj@ucw.cz>/, Ondrej Zajicek <it/<santiago@crfreenet.org>/ +Maria Matejka <it/<mq@jmq.cz>/, </author> <abstract> diff --git a/filter/config.Y b/filter/config.Y index 8034b790..ca792593 100644 --- a/filter/config.Y +++ b/filter/config.Y @@ -22,6 +22,13 @@ static inline u32 pair_b(u32 p) { return p & 0xFFFF; } #define f_generate_complex(fi_code, da, arg) \ f_new_inst(FI_EA_SET, f_new_inst(fi_code, f_new_inst(FI_EA_GET, da), arg), da) +#define f_generate_complex_sym(fi_code, sym, arg) ({ \ + if (sym->class != SYM_ATTRIBUTE) \ + cf_error("Can't empty %s: not an attribute", sym->name); \ + f_generate_complex(fi_code, sym->attribute, arg); \ +}) + + /* * Sets and their items are during parsing handled as lists, linked * through left ptr. The first item in a list also contains a pointer @@ -161,28 +168,32 @@ f_new_lc_item(u32 f1, u32 t1, u32 f2, u32 t2, u32 f3, u32 t3) } static inline struct f_inst * -f_generate_empty(struct f_dynamic_attr dyn) +f_generate_empty(const struct symbol *sym) { - struct f_val empty; + if (sym->class != SYM_ATTRIBUTE) + cf_error("Can't empty %s: not an attribute", sym->name); - switch (dyn.type & EAF_TYPE_MASK) { - case EAF_TYPE_AS_PATH: - empty = f_const_empty_path; - break; - case EAF_TYPE_INT_SET: - empty = f_const_empty_clist; - break; - case EAF_TYPE_EC_SET: - empty = f_const_empty_eclist; - break; - case EAF_TYPE_LC_SET: - empty = f_const_empty_lclist; - break; - default: - cf_error("Can't empty that attribute"); - } + const struct ea_class *def = sym->attribute; + const struct f_val *empty = f_get_empty(def->type); + if (!empty) + cf_error("Can't empty attribute %s", def->name); - return f_new_inst(FI_EA_SET, f_new_inst(FI_CONSTANT, empty), dyn); + return f_new_inst(FI_EA_SET, f_new_inst(FI_CONSTANT, *empty), def); +} + +static inline struct f_inst * +f_implicit_roa_check(struct rtable_config *tab) +{ + const struct ea_class *def = ea_class_find("bgp_path"); + if (!def) + cf_error("Fatal: Couldn't find BGP path attribute definition."); + + struct f_static_attr fsa = f_new_static_attr(T_NET, SA_NET, 1); + + return f_new_inst(FI_ROA_CHECK, + f_new_inst(FI_RTA_GET, fsa), + f_new_inst(FI_AS_PATH_LAST, f_new_inst(FI_EA_GET, def)), + tab); } /* @@ -274,14 +285,15 @@ CF_KEYWORDS(FUNCTION, PRINT, PRINTN, UNSET, RETURN, SET, STRING, BGPMASK, BGPPATH, CLIST, ECLIST, LCLIST, IF, THEN, ELSE, CASE, TRUE, FALSE, RT, RO, UNKNOWN, GENERIC, - FROM, GW, NET, MASK, PROTO, SOURCE, SCOPE, DEST, IFNAME, IFINDEX, WEIGHT, GW_MPLS, - PREFERENCE, + FROM, GW, NET, MASK, PROTO, SCOPE, DEST, IFNAME, IFINDEX, WEIGHT, GW_MPLS, ROA_CHECK, ASN, SRC, DST, IS_V4, IS_V6, LEN, MAXLEN, + DATA, DATA1, DATA2, DEFINED, - ADD, DELETE, CONTAINS, RESET, - PREPEND, FIRST, LAST, LAST_NONAGGREGATED, MATCH, + ADD, DELETE, RESET, + PREPEND, FIRST, LAST, LAST_NONAGGREGATED, + MIN, MAX, EMPTY, FILTER, WHERE, EVAL, ATTRIBUTE, BT_ASSERT, BT_TEST_SUITE, BT_CHECK_ASSIGN, BT_TEST_SAME, FORMAT, STACKS) @@ -291,8 +303,8 @@ CF_KEYWORDS(FUNCTION, PRINT, PRINTN, UNSET, RETURN, %type <xp> cmds_int cmd_prep %type <x> term block cmd cmds constant constructor print_list var_list function_call symbol_value bgp_path_expr bgp_path bgp_path_tail -%type <fda> dynamic_attr %type <fsa> static_attr +%type <fab> attr_bit %type <f> filter where_filter %type <fl> filter_body function_body %type <flv> lvalue @@ -333,12 +345,19 @@ filter_eval: conf: custom_attr ; custom_attr: ATTRIBUTE type symbol ';' { - cf_define_symbol($3, SYM_ATTRIBUTE, attribute, ca_lookup(new_config->pool, $3->name, $2)->fda); + if (($3->class == SYM_ATTRIBUTE) && ($3->scope == new_config->root_scope)) + cf_error("Duplicate attribute %s definition", $3->name); + + cf_define_symbol($3, SYM_ATTRIBUTE, attribute, + ea_register_alloc(new_config->pool, (struct ea_class) { + .name = $3->name, + .type = $2, + })->class); }; conf: bt_test_suite ; bt_test_suite: - BT_TEST_SUITE '(' CF_SYM_KNOWN ',' text ')' { + BT_TEST_SUITE '(' symbol_known ',' text ')' { cf_assert_symbol($3, SYM_FUNCTION); struct f_bt_test_suite *t = cfg_allocz(sizeof(struct f_bt_test_suite)); t->fn = $3->function; @@ -351,7 +370,7 @@ bt_test_suite: conf: bt_test_same ; bt_test_same: - BT_TEST_SAME '(' CF_SYM_KNOWN ',' CF_SYM_KNOWN ',' NUM ')' { + BT_TEST_SAME '(' symbol_known ',' symbol_known ',' NUM ')' { cf_assert_symbol($3, SYM_FUNCTION); cf_assert_symbol($5, SYM_FUNCTION); struct f_bt_test_suite *t = cfg_allocz(sizeof(struct f_bt_test_suite)); @@ -429,7 +448,7 @@ function_vars: filter_body: function_body ; filter: - CF_SYM_KNOWN { + symbol_known { cf_assert_symbol($1, SYM_FILTER); $$ = $1->filter; } @@ -527,10 +546,10 @@ set_atom: | VPN_RD { $$.type = T_RD; $$.val.ec = $1; } | ENUM { $$.type = pair_a($1); $$.val.i = pair_b($1); } | '(' term ')' { - if (f_eval(f_linearize($2), cfg_mem, &($$)) > F_RETURN) cf_error("Runtime error"); + if (f_eval(f_linearize($2), &($$)) > F_RETURN) cf_error("Runtime error"); if (!f_valid_set_type($$.type)) cf_error("Set-incompatible type"); } - | CF_SYM_KNOWN { + | symbol_known { cf_assert_symbol($1, SYM_CONSTANT); if (!f_valid_set_type(SYM_TYPE($1))) cf_error("%s: set-incompatible type", $1->name); $$ = *$1->val; @@ -700,7 +719,7 @@ var_list: /* EMPTY */ { $$ = NULL; } | var_list ',' term { $$ = $3; $$->next = $1; } function_call: - CF_SYM_KNOWN '(' var_list ')' { + symbol_known '(' var_list ')' { if ($1->class != SYM_FUNCTION) cf_error("You can't call something which is not a function. Really."); @@ -724,7 +743,7 @@ function_call: } ; -symbol_value: CF_SYM_KNOWN +symbol_value: symbol_known { switch ($1->class) { case SYM_CONSTANT_RANGE: @@ -734,7 +753,7 @@ symbol_value: CF_SYM_KNOWN $$ = f_new_inst(FI_VAR_GET, $1); break; case SYM_ATTRIBUTE: - $$ = f_new_inst(FI_EA_GET, *$1->attribute); + $$ = f_new_inst(FI_EA_GET, $1->attribute); break; default: cf_error("Can't get value of symbol %s", $1->name); @@ -743,17 +762,13 @@ symbol_value: CF_SYM_KNOWN ; static_attr: - FROM { $$ = f_new_static_attr(T_IP, SA_FROM, 0); } - | GW { $$ = f_new_static_attr(T_IP, SA_GW, 0); } + GW { $$ = f_new_static_attr(T_IP, SA_GW, 0); } | NET { $$ = f_new_static_attr(T_NET, SA_NET, 1); } | PROTO { $$ = f_new_static_attr(T_STRING, SA_PROTO, 1); } - | SOURCE { $$ = f_new_static_attr(T_ENUM_RTS, SA_SOURCE, 1); } - | SCOPE { $$ = f_new_static_attr(T_ENUM_SCOPE, SA_SCOPE, 0); } | DEST { $$ = f_new_static_attr(T_ENUM_RTD, SA_DEST, 0); } | IFNAME { $$ = f_new_static_attr(T_STRING, SA_IFNAME, 0); } | IFINDEX { $$ = f_new_static_attr(T_INT, SA_IFINDEX, 1); } | WEIGHT { $$ = f_new_static_attr(T_INT, SA_WEIGHT, 0); } - | PREFERENCE { $$ = f_new_static_attr(T_INT, SA_PREF, 0); } | GW_MPLS { $$ = f_new_static_attr(T_INT, SA_GW_MPLS, 0); } ; @@ -763,6 +778,8 @@ term: | term '-' term { $$ = f_new_inst(FI_SUBTRACT, $1, $3); } | term '*' term { $$ = f_new_inst(FI_MULTIPLY, $1, $3); } | term '/' term { $$ = f_new_inst(FI_DIVIDE, $1, $3); } + | term '&' term { $$ = f_new_inst(FI_BITAND, $1, $3); } + | term '|' term { $$ = f_new_inst(FI_BITOR, $1, $3); } | term AND term { $$ = f_new_inst(FI_AND, $1, $3); } | term OR term { $$ = f_new_inst(FI_OR, $1, $3); } | term '=' term { $$ = f_new_inst(FI_EQ, $1, $3); } @@ -781,8 +798,10 @@ term: | constructor { $$ = $1; } | static_attr { $$ = f_new_inst(FI_RTA_GET, $1); } - - | dynamic_attr { $$ = f_new_inst(FI_EA_GET, $1); } + | attr_bit { + struct f_inst *c = f_new_inst(FI_CONSTANT, (struct f_val) { .type = T_INT, .val.i = (1U << $1.bit)}); + $$ = f_new_inst(FI_EQ, c, f_new_inst(FI_BITAND, f_new_inst(FI_EA_GET, $1.class), c)); + } | term '.' IS_V4 { $$ = f_new_inst(FI_IS_V4, $1); } | term '.' TYPE { $$ = f_new_inst(FI_TYPE, $1); } @@ -790,13 +809,18 @@ term: | term '.' RD { $$ = f_new_inst(FI_ROUTE_DISTINGUISHER, $1); } | term '.' LEN { $$ = f_new_inst(FI_LENGTH, $1); } | term '.' MAXLEN { $$ = f_new_inst(FI_ROA_MAXLEN, $1); } - | term '.' ASN { $$ = f_new_inst(FI_ROA_ASN, $1); } + | term '.' ASN { $$ = f_new_inst(FI_ASN, $1); } | term '.' SRC { $$ = f_new_inst(FI_NET_SRC, $1); } | term '.' DST { $$ = f_new_inst(FI_NET_DST, $1); } | term '.' MASK '(' term ')' { $$ = f_new_inst(FI_IP_MASK, $1, $5); } | term '.' FIRST { $$ = f_new_inst(FI_AS_PATH_FIRST, $1); } | term '.' LAST { $$ = f_new_inst(FI_AS_PATH_LAST, $1); } | term '.' LAST_NONAGGREGATED { $$ = f_new_inst(FI_AS_PATH_LAST_NAG, $1); } + | term '.' DATA { $$ = f_new_inst(FI_PAIR_DATA, $1); } + | term '.' DATA1 { $$ = f_new_inst(FI_LC_DATA1, $1); } + | term '.' DATA2 { $$ = f_new_inst(FI_LC_DATA2, $1); } + | term '.' MIN { $$ = f_new_inst(FI_MIN, $1); } + | term '.' MAX { $$ = f_new_inst(FI_MAX, $1); } /* Communities */ /* This causes one shift/reduce conflict @@ -815,13 +839,11 @@ term: | DELETE '(' term ',' term ')' { $$ = f_new_inst(FI_CLIST_DEL, $3, $5); } | FILTER '(' term ',' term ')' { $$ = f_new_inst(FI_CLIST_FILTER, $3, $5); } - | ROA_CHECK '(' rtable ')' { $$ = f_new_inst(FI_ROA_CHECK_IMPLICIT, $3); } - | ROA_CHECK '(' rtable ',' term ',' term ')' { $$ = f_new_inst(FI_ROA_CHECK_EXPLICIT, $5, $7, $3); } + | ROA_CHECK '(' rtable ')' { $$ = f_implicit_roa_check($3); } + | ROA_CHECK '(' rtable ',' term ',' term ')' { $$ = f_new_inst(FI_ROA_CHECK, $5, $7, $3); } | FORMAT '(' term ')' { $$ = f_new_inst(FI_FORMAT, $3); } -/* | term '.' LEN { $$->code = P('P','l'); } */ - | function_call ; @@ -848,13 +870,15 @@ cmd: | IF term THEN block ELSE block { $$ = f_new_inst(FI_CONDITION, $2, $4, $6); } - | CF_SYM_KNOWN '=' term ';' { + | symbol_known '=' term ';' { switch ($1->class) { case SYM_VARIABLE_RANGE: $$ = f_new_inst(FI_VAR_SET, $3, $1); break; case SYM_ATTRIBUTE: - $$ = f_new_inst(FI_EA_SET, $3, *$1->attribute); + if ($1->attribute->readonly) + cf_error("Attribute %s is read-only", $1->attribute->name); + $$ = f_new_inst(FI_EA_SET, $3, $1->attribute); break; default: cf_error("Can't assign to symbol %s", $1->name); @@ -864,16 +888,25 @@ cmd: DBG( "Ook, we'll return the value\n" ); $$ = f_new_inst(FI_RETURN, $2); } - | dynamic_attr '=' term ';' { - $$ = f_new_inst(FI_EA_SET, $3, $1); - } | static_attr '=' term ';' { if ($1.readonly) cf_error( "This static attribute is read-only."); $$ = f_new_inst(FI_RTA_SET, $3, $1); } - | UNSET '(' dynamic_attr ')' ';' { - $$ = f_new_inst(FI_EA_UNSET, $3); + | UNSET '(' symbol_known ')' ';' { + if ($3->class != SYM_ATTRIBUTE) + cf_error("Can't unset %s", $3->name); + if ($3->attribute->readonly) + cf_error("Attribute %s is read-only", $3->attribute->name); + $$ = f_new_inst(FI_EA_UNSET, $3->attribute); + } + | attr_bit '=' term ';' { + $$ = f_new_inst(FI_CONDITION, $3, + f_generate_complex(FI_BITOR, $1.class, + f_new_inst(FI_CONSTANT, (struct f_val) { .type = T_INT, .val.i = (1U << $1.bit)})), + f_generate_complex(FI_BITAND, $1.class, + f_new_inst(FI_CONSTANT, (struct f_val) { .type = T_INT, .val.i = ~(1U << $1.bit)})) + ); } | break_command print_list ';' { struct f_inst *breaker = f_new_inst(FI_DIE, $1); @@ -898,11 +931,11 @@ cmd: $$ = f_new_inst(FI_SWITCH, $2, build_tree($4)); } - | dynamic_attr '.' EMPTY ';' { $$ = f_generate_empty($1); } - | dynamic_attr '.' PREPEND '(' term ')' ';' { $$ = f_generate_complex( FI_PATH_PREPEND, $1, $5 ); } - | dynamic_attr '.' ADD '(' term ')' ';' { $$ = f_generate_complex( FI_CLIST_ADD, $1, $5 ); } - | dynamic_attr '.' DELETE '(' term ')' ';' { $$ = f_generate_complex( FI_CLIST_DEL, $1, $5 ); } - | dynamic_attr '.' FILTER '(' term ')' ';' { $$ = f_generate_complex( FI_CLIST_FILTER, $1, $5 ); } + | symbol_known '.' EMPTY ';' { $$ = f_generate_empty($1); } + | symbol_known '.' PREPEND '(' term ')' ';' { $$ = f_generate_complex_sym( FI_PATH_PREPEND, $1, $5 ); } + | symbol_known '.' ADD '(' term ')' ';' { $$ = f_generate_complex_sym( FI_CLIST_ADD, $1, $5 ); } + | symbol_known '.' DELETE '(' term ')' ';' { $$ = f_generate_complex_sym( FI_CLIST_DEL, $1, $5 ); } + | symbol_known '.' FILTER '(' term ')' ';' { $$ = f_generate_complex_sym( FI_CLIST_FILTER, $1, $5 ); } | BT_ASSERT '(' get_cf_position term get_cf_position ')' ';' { $$ = assert_done($4, $3 + 1, $5 - 1); } | BT_CHECK_ASSIGN '(' get_cf_position lvalue get_cf_position ',' term ')' ';' { $$ = assert_assign(&$4, $7, $3 + 1, $5 - 1); } ; @@ -913,8 +946,17 @@ get_cf_position: }; lvalue: - CF_SYM_KNOWN { cf_assert_symbol($1, SYM_VARIABLE); $$ = (struct f_lval) { .type = F_LVAL_VARIABLE, .sym = $1 }; } + symbol_known { + switch ($1->class) { + case SYM_VARIABLE_RANGE: + $$ = (struct f_lval) { .type = F_LVAL_VARIABLE, .sym = $1 }; + break; + case SYM_ATTRIBUTE: + $$ = (struct f_lval) { .type = F_LVAL_EA, .da = $1->attribute }; + break; + } + } | static_attr { $$ = (struct f_lval) { .type = F_LVAL_SA, .sa = $1 }; } - | dynamic_attr { $$ = (struct f_lval) { .type = F_LVAL_EA, .da = $1 }; }; + ; CF_END diff --git a/filter/data.c b/filter/data.c index 7c33d2cb..9dab1915 100644 --- a/filter/data.c +++ b/filter/data.c @@ -16,10 +16,10 @@ #include "lib/unaligned.h" #include "lib/net.h" #include "lib/ip.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "nest/iface.h" -#include "nest/attrs.h" +#include "lib/attrs.h" #include "conf/conf.h" #include "filter/filter.h" #include "filter/f-inst.h" @@ -27,6 +27,8 @@ static const char * const f_type_str[] = { [T_VOID] = "void", + [T_OPAQUE] = "opaque byte string", + [T_IFACE] = "interface", [T_INT] = "int", [T_BOOL] = "bool", @@ -36,7 +38,6 @@ static const char * const f_type_str[] = { [T_ENUM_RTS] = "enum rts", [T_ENUM_BGP_ORIGIN] = "enum bgp_origin", [T_ENUM_SCOPE] = "enum scope", - [T_ENUM_RTC] = "enum rtc", [T_ENUM_RTD] = "enum rtd", [T_ENUM_ROA] = "enum roa", [T_ENUM_NETTYPE] = "enum nettype", @@ -47,6 +48,7 @@ static const char * const f_type_str[] = { [T_NET] = "prefix", [T_STRING] = "string", [T_PATH_MASK] = "bgpmask", + [T_PATH_MASK_ITEM] = "bgpmask item", [T_PATH] = "bgppath", [T_CLIST] = "clist", [T_EC] = "ec", @@ -54,18 +56,26 @@ static const char * const f_type_str[] = { [T_LC] = "lc", [T_LCLIST] = "lclist", [T_RD] = "rd", + + [T_SET] = "set", + [T_PREFIX_SET] = "prefix set", }; const char * -f_type_name(enum f_type t) +f_type_name(btype t) { - if (t < ARRAY_SIZE(f_type_str)) - return f_type_str[t] ?: "?"; + return (t < ARRAY_SIZE(f_type_str)) ? (f_type_str[t] ?: "?") : "?"; +} - if ((t == T_SET) || (t == T_PREFIX_SET)) - return "set"; - - return "?"; +btype +f_type_element_type(btype t) +{ + switch(t) { + case T_CLIST: return T_PAIR; + case T_ECLIST: return T_EC; + case T_LCLIST: return T_LC; + default: return T_VOID; + }; } const struct f_val f_const_empty_path = { @@ -82,14 +92,6 @@ const struct f_val f_const_empty_path = { .val.ad = &null_adata, }; -static struct adata * -adata_empty(struct linpool *pool, int l) -{ - struct adata *res = lp_alloc(pool, sizeof(struct adata) + l); - res->length = l; - return res; -} - static void pm_format(const struct f_path_mask *p, buffer *buf) { @@ -412,7 +414,7 @@ clist_filter(struct linpool *pool, const struct adata *list, const struct f_val if (nl == list->length) return list; - struct adata *res = adata_empty(pool, nl); + struct adata *res = lp_alloc_adata(pool, nl); memcpy(res->data, tmp, nl); return res; } @@ -446,7 +448,7 @@ eclist_filter(struct linpool *pool, const struct adata *list, const struct f_val if (nl == list->length) return list; - struct adata *res = adata_empty(pool, nl); + struct adata *res = lp_alloc_adata(pool, nl); memcpy(res->data, tmp, nl); return res; } @@ -478,7 +480,7 @@ lclist_filter(struct linpool *pool, const struct adata *list, const struct f_val if (nl == list->length) return list; - struct adata *res = adata_empty(pool, nl); + struct adata *res = lp_alloc_adata(pool, nl); memcpy(res->data, tmp, nl); return res; } @@ -599,3 +601,75 @@ val_dump(const struct f_val *v) { return val_dump_buffer; } + +struct f_val * +lp_val_copy(struct linpool *lp, const struct f_val *v) +{ + switch (v->type) + { + case T_VOID: + case T_BOOL: + case T_INT: + case T_IP: + case T_PAIR: + case T_QUAD: + case T_EC: + case T_LC: + case T_RD: + case T_ENUM: + case T_PATH_MASK_ITEM: + /* These aren't embedded but there is no need to copy them */ + case T_SET: + case T_PREFIX_SET: + case T_PATH_MASK: + case T_IFACE: + { + struct f_val *out = lp_alloc(lp, sizeof(*out)); + *out = *v; + return out; + } + + case T_NET: + { + struct { + struct f_val val; + net_addr net[0]; + } *out = lp_alloc(lp, sizeof(*out) + v->val.net->length); + out->val = *v; + out->val.val.net = out->net; + net_copy(out->net, v->val.net); + return &out->val; + } + + case T_STRING: + { + uint len = strlen(v->val.s); + struct { + struct f_val val; + char buf[0]; + } *out = lp_alloc(lp, sizeof(*out) + len + 1); + out->val = *v; + out->val.val.s = out->buf; + memcpy(out->buf, v->val.s, len+1); + return &out->val; + } + + case T_PATH: + case T_CLIST: + case T_ECLIST: + case T_LCLIST: + { + struct { + struct f_val val; + struct adata ad; + } *out = lp_alloc(lp, sizeof(*out) + v->val.ad->length); + out->val = *v; + out->val.val.ad = &out->ad; + memcpy(&out->ad, v->val.ad, v->val.ad->length); + return &out->val; + } + + default: + bug("Unknown type in value copy: %d", v->type); + } +} diff --git a/filter/data.h b/filter/data.h index 45246f9f..23db4a85 100644 --- a/filter/data.h +++ b/filter/data.h @@ -11,110 +11,37 @@ #define _BIRD_FILTER_DATA_H_ #include "nest/bird.h" - -/* Type numbers must be in 0..0xff range */ -#define T_MASK 0xff - -/* Internal types */ -enum f_type { -/* Nothing. Simply nothing. */ - T_VOID = 0, - -/* User visible types, which fit in int */ - T_INT = 0x10, - T_BOOL = 0x11, - T_PAIR = 0x12, /* Notice that pair is stored as integer: first << 16 | second */ - T_QUAD = 0x13, - -/* Put enumerational types in 0x30..0x3f range */ - T_ENUM_LO = 0x30, - T_ENUM_HI = 0x3f, - - T_ENUM_RTS = 0x30, - T_ENUM_BGP_ORIGIN = 0x31, - T_ENUM_SCOPE = 0x32, - T_ENUM_RTC = 0x33, - T_ENUM_RTD = 0x34, - T_ENUM_ROA = 0x35, - T_ENUM_NETTYPE = 0x36, - T_ENUM_RA_PREFERENCE = 0x37, - T_ENUM_AF = 0x38, - -/* new enums go here */ - T_ENUM_EMPTY = 0x3f, /* Special hack for atomic_aggr */ - -#define T_ENUM T_ENUM_LO ... T_ENUM_HI - -/* Bigger ones */ - T_IP = 0x20, - T_NET = 0x21, - T_STRING = 0x22, - T_PATH_MASK = 0x23, /* mask for BGP path */ - T_PATH = 0x24, /* BGP path */ - T_CLIST = 0x25, /* Community list */ - T_EC = 0x26, /* Extended community value, u64 */ - T_ECLIST = 0x27, /* Extended community list */ - T_LC = 0x28, /* Large community value, lcomm */ - T_LCLIST = 0x29, /* Large community list */ - T_RD = 0x2a, /* Route distinguisher for VPN addresses */ - T_PATH_MASK_ITEM = 0x2b, /* Path mask item for path mask constructors */ - - T_SET = 0x80, - T_PREFIX_SET = 0x81, -} PACKED; +#include "lib/type.h" /* Filter value; size of this affects filter memory consumption */ struct f_val { - enum f_type type; /* T_* */ - union { - uint i; - u64 ec; - lcomm lc; - ip_addr ip; - const net_addr *net; - const char *s; - const struct f_tree *t; - const struct f_trie *ti; - const struct adata *ad; - const struct f_path_mask *path_mask; - struct f_path_mask_item pmi; - } val; + btype type; /* T_* */ + union bval_long val; }; -/* Dynamic attribute definition (eattrs) */ -struct f_dynamic_attr { - u8 type; /* EA type (EAF_*) */ - u8 bit; /* For bitfield accessors */ - enum f_type f_type; /* Filter type */ - uint ea_code; /* EA code */ -}; +#define fputip(a) ({ ip_addr *ax = falloc(sizeof(*ax)); *ax = (a); ax; }) enum f_sa_code { - SA_FROM = 1, - SA_GW, + SA_GW = 1, SA_NET, SA_PROTO, - SA_SOURCE, - SA_SCOPE, SA_DEST, SA_IFNAME, SA_IFINDEX, SA_WEIGHT, - SA_PREF, SA_GW_MPLS, } PACKED; /* Static attribute definition (members of struct rta) */ struct f_static_attr { - enum f_type f_type; /* Filter type */ + btype type; /* Data type */ enum f_sa_code sa_code; /* Static attribute id */ - int readonly:1; /* Don't allow writing */ + int readonly:1; /* Don't allow writing */ }; /* Filter l-value type */ enum f_lval_type { F_LVAL_VARIABLE, - F_LVAL_PREFERENCE, F_LVAL_SA, F_LVAL_EA, }; @@ -124,7 +51,7 @@ struct f_lval { enum f_lval_type type; union { struct symbol *sym; - struct f_dynamic_attr da; + const struct ea_class *da; struct f_static_attr sa; }; }; @@ -141,18 +68,23 @@ struct f_tree { void *data; }; +#define TRIE_STEP 4 +#define TRIE_STACK_LENGTH 33 + struct f_trie_node4 { ip4_addr addr, mask, accept; - uint plen; - struct f_trie_node4 *c[2]; + u16 plen; + u16 local; + struct f_trie_node4 *c[1 << TRIE_STEP]; }; struct f_trie_node6 { ip6_addr addr, mask, accept; - uint plen; - struct f_trie_node6 *c[2]; + u16 plen; + u16 local; + struct f_trie_node6 *c[1 << TRIE_STEP]; }; struct f_trie_node @@ -169,9 +101,20 @@ struct f_trie u8 zero; s8 ipv4; /* -1 for undefined / empty */ u16 data_size; /* Additional data for each trie node */ + u32 prefix_count; /* Works only for restricted tries (pxlen == l == h) */ struct f_trie_node root; /* Root trie node */ }; +struct f_trie_walk_state +{ + u8 ipv4; + u8 accept_length; /* Current inter-node prefix position */ + u8 start_pos; /* Initial prefix position in stack[0] */ + u8 local_pos; /* Current intra-node prefix position */ + u8 stack_pos; /* Current node in stack below */ + const struct f_trie_node *stack[TRIE_STACK_LENGTH]; +}; + struct f_tree *f_new_tree(void); struct f_tree *build_tree(struct f_tree *); const struct f_tree *find_tree(const struct f_tree *t, const struct f_val *val); @@ -182,12 +125,75 @@ void tree_walk(const struct f_tree *t, void (*hook)(const struct f_tree *, void struct f_trie *f_new_trie(linpool *lp, uint data_size); void *trie_add_prefix(struct f_trie *t, const net_addr *n, uint l, uint h); int trie_match_net(const struct f_trie *t, const net_addr *n); +int trie_match_longest_ip4(const struct f_trie *t, const net_addr_ip4 *net, net_addr_ip4 *dst, ip4_addr *found0); +int trie_match_longest_ip6(const struct f_trie *t, const net_addr_ip6 *net, net_addr_ip6 *dst, ip6_addr *found0); +void trie_walk_init(struct f_trie_walk_state *s, const struct f_trie *t, const net_addr *from); +int trie_walk_next(struct f_trie_walk_state *s, net_addr *net); int trie_same(const struct f_trie *t1, const struct f_trie *t2); void trie_format(const struct f_trie *t, buffer *buf); +static inline int +trie_match_next_longest_ip4(net_addr_ip4 *n, ip4_addr *found) +{ + while (n->pxlen) + { + n->pxlen--; + ip4_clrbit(&n->prefix, n->pxlen); + + if (ip4_getbit(*found, n->pxlen)) + return 1; + } + + return 0; +} + +static inline int +trie_match_next_longest_ip6(net_addr_ip6 *n, ip6_addr *found) +{ + while (n->pxlen) + { + n->pxlen--; + ip6_clrbit(&n->prefix, n->pxlen); + + if (ip6_getbit(*found, n->pxlen)) + return 1; + } + + return 0; +} + + +#define TRIE_WALK_TO_ROOT_IP4(trie, net, dst) ({ \ + net_addr_ip4 dst; \ + ip4_addr _found; \ + for (int _n = trie_match_longest_ip4(trie, net, &dst, &_found); \ + _n; \ + _n = trie_match_next_longest_ip4(&dst, &_found)) + +#define TRIE_WALK_TO_ROOT_IP6(trie, net, dst) ({ \ + net_addr_ip6 dst; \ + ip6_addr _found; \ + for (int _n = trie_match_longest_ip6(trie, net, &dst, &_found); \ + _n; \ + _n = trie_match_next_longest_ip6(&dst, &_found)) + +#define TRIE_WALK_TO_ROOT_END }) + + +#define TRIE_WALK(trie, net, from) ({ \ + net_addr net; \ + struct f_trie_walk_state tws_; \ + trie_walk_init(&tws_, trie, from); \ + while (trie_walk_next(&tws_, &net)) + +#define TRIE_WALK_END }) + + #define F_CMP_ERROR 999 -const char *f_type_name(enum f_type t); +const char *f_type_name(btype t); + +enum btype f_type_element_type(btype t); int val_same(const struct f_val *v1, const struct f_val *v2); int val_compare(const struct f_val *v1, const struct f_val *v2); @@ -195,6 +201,8 @@ void val_format(const struct f_val *v, buffer *buf); char *val_format_str(struct linpool *lp, const struct f_val *v); const char *val_dump(const struct f_val *v); +struct f_val *lp_val_copy(struct linpool *lp, const struct f_val *v); + static inline int val_is_ip4(const struct f_val *v) { return (v->type == T_IP) && ipa_is_ip4(v->val.ip); } int val_in_range(const struct f_val *v1, const struct f_val *v2); @@ -220,7 +228,17 @@ undef_value(struct f_val v) } extern const struct f_val f_const_empty_path, f_const_empty_clist, f_const_empty_eclist, f_const_empty_lclist; +static inline const struct f_val *f_get_empty(btype t) +{ + switch (t) { + case T_PATH: return &f_const_empty_path; + case T_CLIST: return &f_const_empty_clist; + case T_ECLIST: return &f_const_empty_eclist; + case T_LCLIST: return &f_const_empty_lclist; + default: return NULL; + } +} -enum filter_return f_eval(const struct f_line *expr, struct linpool *tmp_pool, struct f_val *pres); +enum filter_return f_eval(const struct f_line *expr, struct f_val *pres); #endif diff --git a/filter/decl.m4 b/filter/decl.m4 index 44537aaa..c59cd7f3 100644 --- a/filter/decl.m4 +++ b/filter/decl.m4 @@ -94,7 +94,7 @@ FID_DUMP_BODY()m4_dnl debug("%s" $4 "\n", INDENT, $5); ]]) FID_INTERPRET_EXEC()m4_dnl -const $1 $2 = whati->$2 +$1 $2 = whati->$2 FID_INTERPRET_BODY') # Instruction arguments are needed only until linearization is done. @@ -256,7 +256,7 @@ FID_INTERPRET_BODY()') m4_define(SYMBOL, `FID_MEMBER(struct symbol *, sym, [[strcmp(f1->sym->name, f2->sym->name) || (f1->sym->class != f2->sym->class)]], "symbol %s", item->sym->name)') m4_define(RTC, `FID_MEMBER(struct rtable_config *, rtc, [[strcmp(f1->rtc->name, f2->rtc->name)]], "route table %s", item->rtc->name)') m4_define(STATIC_ATTR, `FID_MEMBER(struct f_static_attr, sa, f1->sa.sa_code != f2->sa.sa_code,,)') -m4_define(DYNAMIC_ATTR, `FID_MEMBER(struct f_dynamic_attr, da, f1->da.ea_code != f2->da.ea_code,,)') +m4_define(DYNAMIC_ATTR, `FID_MEMBER(const struct ea_class *, da, f1->da != f2->da,,)') m4_define(ACCESS_RTE, `FID_HIC(,[[do { if (!fs->rte) runtime("No route to access"); } while (0)]],NEVER_CONSTANT())') # 2) Code wrapping @@ -490,7 +490,7 @@ fi_constant(struct f_inst *what, struct f_val val) } static int -f_const_promotion(struct f_inst *arg, enum f_type want) +f_const_promotion(struct f_inst *arg, btype want) { if (arg->fi_code != FI_CONSTANT) return 0; @@ -640,7 +640,7 @@ FID_WR_PUT(4)m4_dnl struct f_inst { struct f_inst *next; /* Next instruction */ enum f_instruction_code fi_code; /* Instruction code */ - enum f_type type; /* Type of returned value, if known */ + btype type; /* Type of returned value, if known */ int size; /* How many instructions are underneath */ int lineno; /* Line number */ union { diff --git a/filter/f-inst.c b/filter/f-inst.c index 00e22383..1690c9f6 100644 --- a/filter/f-inst.c +++ b/filter/f-inst.c @@ -240,6 +240,16 @@ if (v2.val.i == 0) runtime( "Mother told me not to divide by 0" ); RESULT(T_INT, i, v1.val.i / v2.val.i); } + INST(FI_BITOR, 2, 1) { + ARG(1,T_INT); + ARG(2,T_INT); + RESULT(T_INT, i, v1.val.i | v2.val.i); + } + INST(FI_BITAND, 2, 1) { + ARG(1,T_INT); + ARG(2,T_INT); + RESULT(T_INT, i, v1.val.i & v2.val.i); + } INST(FI_AND, 1, 1) { ARG(1,T_BOOL); ARG_TYPE_STATIC(2,T_BOOL); @@ -519,78 +529,100 @@ { STATIC_ATTR; ACCESS_RTE; - struct rta *rta = fs->rte->attrs; + ACCESS_EATTRS; switch (sa.sa_code) { - case SA_FROM: RESULT(sa.f_type, ip, rta->from); break; - case SA_GW: RESULT(sa.f_type, ip, rta->nh.gw); break; - case SA_NET: RESULT(sa.f_type, net, fs->rte->net); break; - case SA_PROTO: RESULT(sa.f_type, s, fs->rte->src->proto->name); break; - case SA_SOURCE: RESULT(sa.f_type, i, rta->source); break; - case SA_SCOPE: RESULT(sa.f_type, i, rta->scope); break; - case SA_DEST: RESULT(sa.f_type, i, rta->dest); break; - case SA_IFNAME: RESULT(sa.f_type, s, rta->nh.iface ? rta->nh.iface->name : ""); break; - case SA_IFINDEX: RESULT(sa.f_type, i, rta->nh.iface ? rta->nh.iface->index : 0); break; - case SA_WEIGHT: RESULT(sa.f_type, i, rta->nh.weight + 1); break; - case SA_PREF: RESULT(sa.f_type, i, rta->pref); break; - case SA_GW_MPLS: RESULT(sa.f_type, i, rta->nh.labels ? rta->nh.label[0] : MPLS_NULL); break; - + case SA_NET: RESULT(sa.type, net, fs->rte->net); break; + case SA_PROTO: RESULT(sa.type, s, fs->rte->src->proto->name); break; default: - bug("Invalid static attribute access (%u/%u)", sa.f_type, sa.sa_code); + { + struct eattr *nhea = ea_find(*fs->eattrs, &ea_gen_nexthop); + struct nexthop_adata *nhad = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL; + struct nexthop *nh = nhad ? &nhad->nh : NULL; + + switch (sa.sa_code) + { + case SA_DEST: + RESULT(sa.type, i, nhad ? + (NEXTHOP_IS_REACHABLE(nhad) ? RTD_UNICAST : nhad->dest) + : RTD_NONE); + break; + case SA_GW: + RESULT(sa.type, ip, nh ? nh->gw : IPA_NONE); + break; + case SA_IFNAME: + RESULT(sa.type, s, (nh && nh->iface) ? nh->iface->name : ""); + break; + case SA_IFINDEX: + RESULT(sa.type, i, (nh && nh->iface) ? nh->iface->index : 0); + break; + case SA_WEIGHT: + RESULT(sa.type, i, (nh ? nh->weight : 0) + 1); + break; + case SA_GW_MPLS: + RESULT(sa.type, i, (nh && nh->labels) ? nh->label[0] : MPLS_NULL); + break; + default: + bug("Invalid static attribute access (%u/%u)", sa.type, sa.sa_code); + } + } } } } INST(FI_RTA_SET, 1, 0) { ACCESS_RTE; + ACCESS_EATTRS; ARG_ANY(1); STATIC_ATTR; - ARG_TYPE(1, sa.f_type); + ARG_TYPE(1, sa.type); f_rta_cow(fs); { - struct rta *rta = fs->rte->attrs; + union { + struct nexthop_adata nha; + struct { + struct adata ad; + struct nexthop nh; + u32 label; + }; + } nha; + + nha.ad = (struct adata) { + .length = sizeof (struct nexthop_adata) - sizeof (struct adata), + }; + + eattr *a = NULL; switch (sa.sa_code) { - case SA_FROM: - rta->from = v1.val.ip; - break; - - case SA_GW: - { - ip_addr ip = v1.val.ip; - struct iface *ifa = ipa_is_link_local(ip) ? rta->nh.iface : NULL; - neighbor *n = neigh_find(fs->rte->src->proto, ip, ifa, 0); - if (!n || (n->scope == SCOPE_HOST)) - runtime( "Invalid gw address" ); - - rta->dest = RTD_UNICAST; - rta->nh.gw = ip; - rta->nh.iface = n->iface; - rta->nh.next = NULL; - rta->hostentry = NULL; - rta->nh.labels = 0; - } - break; - - case SA_SCOPE: - rta->scope = v1.val.i; - break; - case SA_DEST: { int i = v1.val.i; if ((i != RTD_BLACKHOLE) && (i != RTD_UNREACHABLE) && (i != RTD_PROHIBIT)) runtime( "Destination can be changed only to blackhole, unreachable or prohibit" ); - rta->dest = i; - rta->nh.gw = IPA_NONE; - rta->nh.iface = NULL; - rta->nh.next = NULL; - rta->hostentry = NULL; - rta->nh.labels = 0; + nha.nha.dest = i; + nha.ad.length = NEXTHOP_DEST_SIZE; + break; + } + case SA_GW: + { + struct eattr *nh_ea = ea_find(*fs->eattrs, &ea_gen_nexthop); + + ip_addr ip = v1.val.ip; + struct iface *ifa = (ipa_is_link_local(ip) && nh_ea) ? + ((struct nexthop_adata *) nh_ea->u.ptr)->nh.iface : NULL; + + neighbor *n = neigh_find(fs->rte->src->proto, ip, ifa, 0); + if (!n || (n->scope == SCOPE_HOST)) + runtime( "Invalid gw address" ); + + nha.nh = (struct nexthop) { + .gw = ip, + .iface = n->iface, + }; } break; @@ -600,12 +632,9 @@ if (!ifa) runtime( "Invalid iface name" ); - rta->dest = RTD_UNICAST; - rta->nh.gw = IPA_NONE; - rta->nh.iface = ifa; - rta->nh.next = NULL; - rta->hostentry = NULL; - rta->nh.labels = 0; + nha.nh = (struct nexthop) { + .iface = ifa, + }; } break; @@ -614,13 +643,20 @@ if (v1.val.i >= 0x100000) runtime( "Invalid MPLS label" ); + struct eattr *nh_ea = ea_find(*fs->eattrs, &ea_gen_nexthop); + if (!nh_ea) + runtime( "No nexthop to add a MPLS label to" ); + + nha.nh = ((struct nexthop_adata *) nh_ea->u.ptr)->nh; + if (v1.val.i != MPLS_NULL) { - rta->nh.label[0] = v1.val.i; - rta->nh.labels = 1; + nha.nh.label[0] = v1.val.i; + nha.nh.labels = 1; + nha.ad.length = sizeof nha - sizeof (struct adata); } else - rta->nh.labels = 0; + nha.nh.labels = 0; } break; @@ -629,22 +665,36 @@ int i = v1.val.i; if (i < 1 || i > 256) runtime( "Setting weight value out of bounds" ); - if (rta->dest != RTD_UNICAST) + + struct eattr *nh_ea = ea_find(*fs->eattrs, &ea_gen_nexthop); + if (!nh_ea) + runtime( "No nexthop to set weight on" ); + + struct nexthop_adata *nhad = (struct nexthop_adata *) nh_ea->u.ptr; + if (!NEXTHOP_IS_REACHABLE(nhad)) runtime( "Setting weight needs regular nexthop " ); + struct nexthop_adata *nhax = (struct nexthop_adata *) tmp_copy_adata(&nhad->ad); + /* Set weight on all next hops */ - for (struct nexthop *nh = &rta->nh; nh; nh = nh->next) + NEXTHOP_WALK(nh, nhax) nh->weight = i - 1; + + a = ea_set_attr(fs->eattrs, + EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, &nhax->ad)); } break; - case SA_PREF: - rta->pref = v1.val.i; - break; - default: - bug("Invalid static attribute access (%u/%u)", sa.f_type, sa.sa_code); + bug("Invalid static attribute access (%u/%u)", sa.type, sa.sa_code); } + + if (!a) + a = ea_set_attr(fs->eattrs, + EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, tmp_copy_adata(&nha.ad))); + + a->originated = 1; + a->fresh = 1; } } @@ -652,74 +702,30 @@ DYNAMIC_ATTR; ACCESS_RTE; ACCESS_EATTRS; - RESULT_TYPE(da.f_type); + RESULT_TYPE(da->type); { - eattr *e = ea_find(*fs->eattrs, da.ea_code); + const struct f_val *empty; + const eattr *e = ea_find(*fs->eattrs, da->id); - if (!e) { - /* A special case: undefined as_path looks like empty as_path */ - if (da.type == EAF_TYPE_AS_PATH) { - RESULT_(T_PATH, ad, &null_adata); - break; + if (e) + { + ASSERT_DIE(e->type == da->type); + + switch (e->type) { + case T_IP: + RESULT_(T_IP, ip, *((const ip_addr *) e->u.ptr->data)); + break; + default: + RESULT_VAL([[(struct f_val) { + .type = e->type, + .val.bval = e->u, + }]]); } - - /* The same special case for int_set */ - if (da.type == EAF_TYPE_INT_SET) { - RESULT_(T_CLIST, ad, &null_adata); - break; - } - - /* The same special case for ec_set */ - if (da.type == EAF_TYPE_EC_SET) { - RESULT_(T_ECLIST, ad, &null_adata); - break; - } - - /* The same special case for lc_set */ - if (da.type == EAF_TYPE_LC_SET) { - RESULT_(T_LCLIST, ad, &null_adata); - break; - } - - /* Undefined value */ - RESULT_VOID; - break; } - - switch (e->type & EAF_TYPE_MASK) { - case EAF_TYPE_INT: - RESULT_(da.f_type, i, e->u.data); - break; - case EAF_TYPE_ROUTER_ID: - RESULT_(T_QUAD, i, e->u.data); - break; - case EAF_TYPE_OPAQUE: - RESULT_(T_ENUM_EMPTY, i, 0); - break; - case EAF_TYPE_IP_ADDRESS: - RESULT_(T_IP, ip, *((ip_addr *) e->u.ptr->data)); - break; - case EAF_TYPE_AS_PATH: - RESULT_(T_PATH, ad, e->u.ptr); - break; - case EAF_TYPE_BITFIELD: - RESULT_(T_BOOL, i, !!(e->u.data & (1u << da.bit))); - break; - case EAF_TYPE_INT_SET: - RESULT_(T_CLIST, ad, e->u.ptr); - break; - case EAF_TYPE_EC_SET: - RESULT_(T_ECLIST, ad, e->u.ptr); - break; - case EAF_TYPE_LC_SET: - RESULT_(T_LCLIST, ad, e->u.ptr); - break; - case EAF_TYPE_UNDEF: + else if (empty = f_get_empty(da->type)) + RESULT_VAL(*empty); + else RESULT_VOID; - break; - default: - bug("Unknown dynamic attribute type"); - } } } @@ -728,62 +734,34 @@ ACCESS_EATTRS; ARG_ANY(1); DYNAMIC_ATTR; - ARG_TYPE(1, da.f_type); + ARG_TYPE(1, da->type); { - struct ea_list *l = lp_alloc(fs->pool, sizeof(struct ea_list) + sizeof(eattr)); + struct eattr *a; - l->next = NULL; - l->flags = EALF_SORTED; - l->count = 1; - l->attrs[0].id = da.ea_code; - l->attrs[0].flags = 0; - l->attrs[0].type = da.type | EAF_ORIGINATED | EAF_FRESH; + if (da->type >= EAF_TYPE__MAX) + bug("Unsupported attribute type"); - switch (da.type) { - case EAF_TYPE_INT: - case EAF_TYPE_ROUTER_ID: - l->attrs[0].u.data = v1.val.i; - break; + f_rta_cow(fs); - case EAF_TYPE_OPAQUE: + switch (da->type) { + case T_OPAQUE: + case T_IFACE: runtime( "Setting opaque attribute is not allowed" ); break; - case EAF_TYPE_IP_ADDRESS:; - int len = sizeof(ip_addr); - struct adata *ad = lp_alloc(fs->pool, sizeof(struct adata) + len); - ad->length = len; - (* (ip_addr *) ad->data) = v1.val.ip; - l->attrs[0].u.ptr = ad; - break; - - case EAF_TYPE_AS_PATH: - case EAF_TYPE_INT_SET: - case EAF_TYPE_EC_SET: - case EAF_TYPE_LC_SET: - l->attrs[0].u.ptr = v1.val.ad; - break; - - case EAF_TYPE_BITFIELD: - { - /* First, we have to find the old value */ - eattr *e = ea_find(*fs->eattrs, da.ea_code); - u32 data = e ? e->u.data : 0; - - if (v1.val.i) - l->attrs[0].u.data = data | (1u << da.bit); - else - l->attrs[0].u.data = data & ~(1u << da.bit); - } + case T_IP: + a = ea_set_attr(fs->eattrs, + EA_LITERAL_STORE_ADATA(da, 0, &v1.val.ip, sizeof(ip_addr))); break; default: - bug("Unknown dynamic attribute type"); + a = ea_set_attr(fs->eattrs, + EA_LITERAL_GENERIC(da->id, da->type, 0, .u = v1.val.bval)); + break; } - f_rta_cow(fs); - l->next = *fs->eattrs; - *fs->eattrs = l; + a->originated = 1; + a->fresh = 1; } } @@ -792,21 +770,8 @@ ACCESS_RTE; ACCESS_EATTRS; - { - struct ea_list *l = lp_alloc(fs->pool, sizeof(struct ea_list) + sizeof(eattr)); - - l->next = NULL; - l->flags = EALF_SORTED; - l->count = 1; - l->attrs[0].id = da.ea_code; - l->attrs[0].flags = 0; - l->attrs[0].type = EAF_TYPE_UNDEF | EAF_ORIGINATED | EAF_FRESH; - l->attrs[0].u.data = 0; - - f_rta_cow(fs); - l->next = *fs->eattrs; - *fs->eattrs = l; - } + f_rta_cow(fs); + ea_unset_attr(fs->eattrs, 1, da); } INST(FI_LENGTH, 1, 1) { /* Get length of */ @@ -901,14 +866,31 @@ ((net_addr_roa6 *) v1.val.net)->max_pxlen); } - INST(FI_ROA_ASN, 1, 1) { /* Get ROA ASN */ - ARG(1, T_NET); - if (!net_is_roa(v1.val.net)) - runtime( "ROA expected" ); + INST(FI_ASN, 1, 1) { /* Get ROA ASN or community ASN part */ + ARG_ANY(1); + RESULT_TYPE(T_INT); + switch(v1.type) + { + case T_NET: + if (!net_is_roa(v1.val.net)) + runtime( "ROA expected" ); - RESULT(T_INT, i, (v1.val.net->type == NET_ROA4) ? - ((net_addr_roa4 *) v1.val.net)->asn : - ((net_addr_roa6 *) v1.val.net)->asn); + RESULT_(T_INT, i, (v1.val.net->type == NET_ROA4) ? + ((net_addr_roa4 *) v1.val.net)->asn : + ((net_addr_roa6 *) v1.val.net)->asn); + break; + + case T_PAIR: + RESULT_(T_INT, i, v1.val.i >> 16); + break; + + case T_LC: + RESULT_(T_INT, i, v1.val.lc.asn); + break; + + default: + runtime( "Net, pair or lc expected" ); + } } INST(FI_IP, 1, 1) { /* Convert prefix to ... */ @@ -942,6 +924,89 @@ RESULT(T_INT, i, as_path_get_last_nonaggregated(v1.val.ad)); } + INST(FI_PAIR_DATA, 1, 1) { /* Get data part from the standard community */ + ARG(1, T_PAIR); + RESULT(T_INT, i, v1.val.i & 0xFFFF); + } + + INST(FI_LC_DATA1, 1, 1) { /* Get data1 part from the large community */ + ARG(1, T_LC); + RESULT(T_INT, i, v1.val.lc.ldp1); + } + + INST(FI_LC_DATA2, 1, 1) { /* Get data2 part from the large community */ + ARG(1, T_LC); + RESULT(T_INT, i, v1.val.lc.ldp2); + } + + INST(FI_MIN, 1, 1) { /* Get minimum element from set */ + ARG_ANY(1); + RESULT_TYPE(f_type_element_type(v1.type)); + switch(v1.type) + { + case T_CLIST: + { + u32 val = 0; + int_set_min(v1.val.ad, &val); + RESULT_(T_PAIR, i, val); + } + break; + + case T_ECLIST: + { + u64 val = 0; + ec_set_min(v1.val.ad, &val); + RESULT_(T_EC, ec, val); + } + break; + + case T_LCLIST: + { + lcomm val = { 0, 0, 0 }; + lc_set_min(v1.val.ad, &val); + RESULT_(T_LC, lc, val); + } + break; + + default: + runtime( "Clist or lclist expected" ); + } + } + + INST(FI_MAX, 1, 1) { /* Get maximum element from set */ + ARG_ANY(1); + RESULT_TYPE(f_type_element_type(v1.type)); + switch(v1.type) + { + case T_CLIST: + { + u32 val = 0; + int_set_max(v1.val.ad, &val); + RESULT_(T_PAIR, i, val); + } + break; + + case T_ECLIST: + { + u64 val = 0; + ec_set_max(v1.val.ad, &val); + RESULT_(T_EC, ec, val); + } + break; + + case T_LCLIST: + { + lcomm val = { 0, 0, 0 }; + lc_set_max(v1.val.ad, &val); + RESULT_(T_LC, lc, val); + } + break; + + default: + runtime( "Clist or lclist expected" ); + } + } + INST(FI_RETURN, 1, 1) { NEVER_CONSTANT; /* Acquire the return value */ @@ -1208,37 +1273,7 @@ runtime("Can't filter non-[e|l]clist"); } - INST(FI_ROA_CHECK_IMPLICIT, 0, 1) { /* ROA Check */ - NEVER_CONSTANT; - RTC(1); - struct rtable *table = rtc->table; - ACCESS_RTE; - ACCESS_EATTRS; - const net_addr *net = fs->rte->net; - - /* We ignore temporary attributes, probably not a problem here */ - /* 0x02 is a value of BA_AS_PATH, we don't want to include BGP headers */ - eattr *e = ea_find(*fs->eattrs, EA_CODE(PROTOCOL_BGP, 0x02)); - - if (!e || ((e->type & EAF_TYPE_MASK) != EAF_TYPE_AS_PATH)) - runtime("Missing AS_PATH attribute"); - - u32 as = 0; - as_path_get_last(e->u.ptr, &as); - - if (!table) - runtime("Missing ROA table"); - - if (table->addr_type != NET_ROA4 && table->addr_type != NET_ROA6) - runtime("Table type must be either ROA4 or ROA6"); - - if (table->addr_type != (net->type == NET_IP4 ? NET_ROA4 : NET_ROA6)) - RESULT(T_ENUM_ROA, i, ROA_UNKNOWN); /* Prefix and table type mismatch */ - else - RESULT(T_ENUM_ROA, i, [[ net_roa_check(table, net, as) ]]); - } - - INST(FI_ROA_CHECK_EXPLICIT, 2, 1) { /* ROA Check */ + INST(FI_ROA_CHECK, 2, 1) { /* ROA Check */ NEVER_CONSTANT; ARG(1, T_NET); ARG(2, T_INT); diff --git a/filter/f-inst.h b/filter/f-inst.h index df45f88e..047a66c9 100644 --- a/filter/f-inst.h +++ b/filter/f-inst.h @@ -87,15 +87,17 @@ void f_add_lines(const struct f_line_item *what, struct filter_iterator *fit); struct filter *f_new_where(struct f_inst *); -static inline struct f_dynamic_attr f_new_dynamic_attr(u8 type, enum f_type f_type, uint code) /* Type as core knows it, type as filters know it, and code of dynamic attribute */ -{ return (struct f_dynamic_attr) { .type = type, .f_type = f_type, .ea_code = code }; } /* f_type currently unused; will be handy for static type checking */ -static inline struct f_dynamic_attr f_new_dynamic_attr_bit(u8 bit, enum f_type f_type, uint code) /* Type as core knows it, type as filters know it, and code of dynamic attribute */ -{ return (struct f_dynamic_attr) { .type = EAF_TYPE_BITFIELD, .bit = bit, .f_type = f_type, .ea_code = code }; } /* f_type currently unused; will be handy for static type checking */ -static inline struct f_static_attr f_new_static_attr(int f_type, int code, int readonly) -{ return (struct f_static_attr) { .f_type = f_type, .sa_code = code, .readonly = readonly }; } -struct f_inst *f_generate_complex(enum f_instruction_code fi_code, struct f_dynamic_attr da, struct f_inst *argument); +static inline struct f_static_attr f_new_static_attr(btype type, int code, int readonly) +{ return (struct f_static_attr) { .type = type, .sa_code = code, .readonly = readonly }; } struct f_inst *f_generate_roa_check(struct rtable_config *table, struct f_inst *prefix, struct f_inst *asn); +struct f_attr_bit { + const struct ea_class *class; + uint bit; +}; + +#define f_new_dynamic_attr_bit(_bit, _name) ((struct f_attr_bit) { .bit = _bit, .class = ea_class_find(_name) }) + /* Hook for call bt_assert() function in configuration */ extern void (*bt_assert_hook)(int result, const struct f_line_item *assert); diff --git a/filter/f-util.c b/filter/f-util.c index 410999a6..fb93ee80 100644 --- a/filter/f-util.c +++ b/filter/f-util.c @@ -2,7 +2,7 @@ * Filters: utility functions * * Copyright 1998 Pavel Machek <pavel@ucw.cz> - * 2017 Jan Maria Matejka <mq@ucw.cz> + * 2017 Maria Matejka <mq@ucw.cz> * * Can be freely distributed and used under the terms of the GNU GPL. */ @@ -13,7 +13,7 @@ #include "filter/f-inst.h" #include "lib/idm.h" #include "nest/protocol.h" -#include "nest/route.h" +#include "nest/rt.h" #define P(a,b) ((a<<8) | b) @@ -40,144 +40,3 @@ struct filter *f_new_where(struct f_inst *where) f->root = f_linearize(cond); return f; } - -#define CA_KEY(n) n->name, n->fda.type -#define CA_NEXT(n) n->next -#define CA_EQ(na,ta,nb,tb) (!strcmp(na,nb) && (ta == tb)) -#define CA_FN(n,t) (mem_hash(n, strlen(n)) ^ (t*0xaae99453U)) -#define CA_ORDER 8 /* Fixed */ - -struct ca_storage { - struct ca_storage *next; - struct f_dynamic_attr fda; - u32 uc; - char name[0]; -}; - -HASH(struct ca_storage) ca_hash; - -static struct idm ca_idm; -static struct ca_storage **ca_storage; -static uint ca_storage_max; - -static void -ca_free(resource *r) -{ - struct custom_attribute *ca = (void *) r; - struct ca_storage *cas = HASH_FIND(ca_hash, CA, ca->name, ca->fda->type); - ASSERT(cas); - - ca->name = NULL; - ca->fda = NULL; - if (!--cas->uc) { - uint id = EA_CUSTOM_ID(cas->fda.ea_code); - idm_free(&ca_idm, id); - HASH_REMOVE(ca_hash, CA, cas); - ca_storage[id] = NULL; - mb_free(cas); - } -} - -static void -ca_dump(resource *r) -{ - struct custom_attribute *ca = (void *) r; - debug("name \"%s\" id 0x%04x ea_type 0x%02x f_type 0x%02x\n", - ca->name, ca->fda->ea_code, ca->fda->type, ca->fda->f_type); -} - -static struct resclass ca_class = { - .name = "Custom attribute", - .size = sizeof(struct custom_attribute), - .free = ca_free, - .dump = ca_dump, - .lookup = NULL, - .memsize = NULL, -}; - -struct custom_attribute * -ca_lookup(pool *p, const char *name, int f_type) -{ - int ea_type; - - switch (f_type) { - case T_INT: - ea_type = EAF_TYPE_INT; - break; - case T_IP: - ea_type = EAF_TYPE_IP_ADDRESS; - break; - case T_QUAD: - ea_type = EAF_TYPE_ROUTER_ID; - break; - case T_PATH: - ea_type = EAF_TYPE_AS_PATH; - break; - case T_CLIST: - ea_type = EAF_TYPE_INT_SET; - break; - case T_ECLIST: - ea_type = EAF_TYPE_EC_SET; - break; - case T_LCLIST: - ea_type = EAF_TYPE_LC_SET; - break; - default: - cf_error("Custom route attribute of unsupported type"); - } - - static int inited = 0; - if (!inited) { - idm_init(&ca_idm, &root_pool, 8); - HASH_INIT(ca_hash, &root_pool, CA_ORDER); - - ca_storage_max = 256; - ca_storage = mb_allocz(&root_pool, sizeof(struct ca_storage *) * ca_storage_max); - - inited++; - } - - struct ca_storage *cas = HASH_FIND(ca_hash, CA, name, ea_type); - if (cas) { - cas->uc++; - } else { - - uint id = idm_alloc(&ca_idm); - - if (id >= EA_CUSTOM_BIT) - cf_error("Too many custom attributes."); - - if (id >= ca_storage_max) { - ca_storage_max *= 2; - ca_storage = mb_realloc(ca_storage, sizeof(struct ca_storage *) * ca_storage_max * 2); - } - - cas = mb_allocz(&root_pool, sizeof(struct ca_storage) + strlen(name) + 1); - cas->fda = f_new_dynamic_attr(ea_type, f_type, EA_CUSTOM(id)); - cas->uc = 1; - - strcpy(cas->name, name); - ca_storage[id] = cas; - - HASH_INSERT(ca_hash, CA, cas); - } - - struct custom_attribute *ca = ralloc(p, &ca_class); - ca->fda = &(cas->fda); - ca->name = cas->name; - return ca; -} - -const char * -ea_custom_name(uint ea) -{ - uint id = EA_CUSTOM_ID(ea); - if (id >= ca_storage_max) - return NULL; - - if (!ca_storage[id]) - return NULL; - - return ca_storage[id]->name; -} - diff --git a/filter/filter.c b/filter/filter.c index 6f1e6ea0..124c9932 100644 --- a/filter/filter.c +++ b/filter/filter.c @@ -35,10 +35,10 @@ #include "lib/ip.h" #include "lib/net.h" #include "lib/flowspec.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "nest/iface.h" -#include "nest/attrs.h" +#include "lib/attrs.h" #include "conf/conf.h" #include "filter/filter.h" #include "filter/f-inst.h" @@ -79,9 +79,6 @@ struct filter_state { /* Cached pointer to ea_list */ struct ea_list **eattrs; - /* Linpool for adata allocation */ - struct linpool *pool; - /* Buffer for log output */ struct buffer buf; @@ -117,7 +114,7 @@ f_rta_cow(struct filter_state *fs) * at the end of f_run()), also the lock of hostentry is inherited (we * suppose hostentry is not changed by filters). */ - fs->rte->attrs = rta_do_cow(fs->rte->attrs, fs->pool); + fs->rte->attrs = rta_do_cow(fs->rte->attrs, tmp_linpool); /* Re-cache the ea_list */ f_cache_eattrs(fs); @@ -185,8 +182,8 @@ interpret(struct filter_state *fs, const struct f_line *line, struct f_val *val) return F_ERROR; \ } while(0) -#define falloc(size) lp_alloc(fs->pool, size) -#define fpool fs->pool +#define falloc(size) tmp_alloc(size) +#define fpool tmp_linpool #define ACCESS_EATTRS do { if (!fs->eattrs) f_cache_eattrs(fs); } while (0) @@ -237,7 +234,7 @@ interpret(struct filter_state *fs, const struct f_line *line, struct f_val *val) * tmp_pool, otherwise the filters may modify it. */ enum filter_return -f_run(const struct filter *filter, struct rte *rte, struct linpool *tmp_pool, int flags) +f_run(const struct filter *filter, struct rte *rte, int flags) { if (filter == FILTER_ACCEPT) return F_ACCEPT; @@ -250,7 +247,6 @@ f_run(const struct filter *filter, struct rte *rte, struct linpool *tmp_pool, in /* Initialize the filter state */ filter_state = (struct filter_state) { .rte = rte, - .pool = tmp_pool, .flags = flags, }; @@ -285,11 +281,10 @@ f_run(const struct filter *filter, struct rte *rte, struct linpool *tmp_pool, in */ enum filter_return -f_eval_rte(const struct f_line *expr, struct rte *rte, struct linpool *tmp_pool) +f_eval_rte(const struct f_line *expr, struct rte *rte) { filter_state = (struct filter_state) { .rte = rte, - .pool = tmp_pool, }; f_stack_init(filter_state); @@ -308,11 +303,9 @@ f_eval_rte(const struct f_line *expr, struct rte *rte, struct linpool *tmp_pool) * @pres: here the output will be stored */ enum filter_return -f_eval(const struct f_line *expr, struct linpool *tmp_pool, struct f_val *pres) +f_eval(const struct f_line *expr, struct f_val *pres) { - filter_state = (struct filter_state) { - .pool = tmp_pool, - }; + filter_state = (struct filter_state) {}; f_stack_init(filter_state); @@ -331,9 +324,7 @@ uint f_eval_int(const struct f_line *expr) { /* Called independently in parse-time to eval expressions */ - filter_state = (struct filter_state) { - .pool = cfg_mem, - }; + filter_state = (struct filter_state) {}; f_stack_init(filter_state); @@ -354,10 +345,10 @@ f_eval_int(const struct f_line *expr) * f_eval_buf - get a value of a term and print it to the supplied buffer */ enum filter_return -f_eval_buf(const struct f_line *expr, struct linpool *tmp_pool, buffer *buf) +f_eval_buf(const struct f_line *expr, buffer *buf) { struct f_val val; - enum filter_return fret = f_eval(expr, tmp_pool, &val); + enum filter_return fret = f_eval(expr, &val); if (fret <= F_RETURN) val_format(&val, buf); return fret; @@ -425,6 +416,23 @@ filter_commit(struct config *new, struct config *old) } } +void channel_filter_dump(const struct filter *f) +{ + if (f == FILTER_ACCEPT) + debug(" ALL"); + else if (f == FILTER_REJECT) + debug(" NONE"); + else if (f == FILTER_UNDEF) + debug(" UNDEF"); + else if (f->sym) { + ASSERT(f->sym->filter == f); + debug(" named filter %s", f->sym->name); + } else { + debug("\n"); + f_dump_line(f->root, 2); + } +} + void filters_dump_all(void) { struct symbol *sym; @@ -444,19 +452,10 @@ void filters_dump_all(void) struct channel *c; WALK_LIST(c, sym->proto->proto->channels) { debug(" Channel %s (%s) IMPORT", c->name, net_label[c->net_type]); - if (c->in_filter == FILTER_ACCEPT) - debug(" ALL\n"); - else if (c->in_filter == FILTER_REJECT) - debug(" NONE\n"); - else if (c->in_filter == FILTER_UNDEF) - debug(" UNDEF\n"); - else if (c->in_filter->sym) { - ASSERT(c->in_filter->sym->filter == c->in_filter); - debug(" named filter %s\n", c->in_filter->sym->name); - } else { - debug("\n"); - f_dump_line(c->in_filter->root, 2); - } + channel_filter_dump(c->in_filter); + debug(" EXPORT", c->name, net_label[c->net_type]); + channel_filter_dump(c->out_filter); + debug("\n"); } } } diff --git a/filter/filter.h b/filter/filter.h index 9964831c..3f2e62eb 100644 --- a/filter/filter.h +++ b/filter/filter.h @@ -13,8 +13,8 @@ #include "lib/resource.h" #include "lib/ip.h" #include "lib/macro.h" -#include "nest/route.h" -#include "nest/attrs.h" +#include "nest/rt.h" +#include "lib/attrs.h" /* Possible return values of filter execution */ enum filter_return { @@ -51,10 +51,10 @@ struct filter { struct rte; -enum filter_return f_run(const struct filter *filter, struct rte *rte, struct linpool *tmp_pool, int flags); -enum filter_return f_eval_rte(const struct f_line *expr, struct rte *rte, struct linpool *tmp_pool); +enum filter_return f_run(const struct filter *filter, struct rte *rte, int flags); +enum filter_return f_eval_rte(const struct f_line *expr, struct rte *rte); uint f_eval_int(const struct f_line *expr); -enum filter_return f_eval_buf(const struct f_line *expr, struct linpool *tmp_pool, buffer *buf); +enum filter_return f_eval_buf(const struct f_line *expr, buffer *buf); const char *filter_name(const struct filter *filter); int filter_same(const struct filter *new, const struct filter *old); @@ -70,13 +70,4 @@ void filters_dump_all(void); #define FF_SILENT 2 /* Silent filter execution */ -/* Custom route attributes */ -struct custom_attribute { - resource r; - struct f_dynamic_attr *fda; - const char *name; -}; - -struct custom_attribute *ca_lookup(pool *p, const char *name, int ea_type); - #endif diff --git a/filter/filter_test.c b/filter/filter_test.c index 7e4af092..63764964 100644 --- a/filter/filter_test.c +++ b/filter/filter_test.c @@ -46,9 +46,7 @@ run_function(const void *arg) if (t->cmp) return t->result == f_same(t->fn, t->cmp); - linpool *tmp = lp_new_default(&root_pool); - enum filter_return fret = f_eval(t->fn, tmp, NULL); - rfree(tmp); + enum filter_return fret = f_eval(t->fn, NULL); return (fret < F_REJECT); } diff --git a/filter/test.conf b/filter/test.conf index 93e7a770..21e7330f 100644 --- a/filter/test.conf +++ b/filter/test.conf @@ -9,7 +9,109 @@ router id 62.168.0.1; /* We have to setup any protocol */ protocol device { } +/* Setting some custom attributes, enough to force BIRD to reallocate the attribute idmap */ +attribute int test_ca_int1; +attribute int test_ca_int2; +attribute int test_ca_int3; +attribute int test_ca_int4; +attribute int test_ca_int5; +attribute int test_ca_int6; +attribute int test_ca_int7; +attribute int test_ca_int8; +attribute int test_ca_int9; +attribute int test_ca_int10; +attribute ip test_ca_ip1; +attribute ip test_ca_ip2; +attribute ip test_ca_ip3; +attribute ip test_ca_ip4; +attribute ip test_ca_ip5; +attribute ip test_ca_ip6; +attribute ip test_ca_ip7; +attribute ip test_ca_ip8; +attribute ip test_ca_ip9; +attribute ip test_ca_ip10; + +attribute quad test_ca_quad1; +attribute quad test_ca_quad2; +attribute quad test_ca_quad3; +attribute quad test_ca_quad4; +attribute quad test_ca_quad5; +attribute quad test_ca_quad6; +attribute quad test_ca_quad7; +attribute quad test_ca_quad8; +attribute quad test_ca_quad9; +attribute quad test_ca_quad10; + +attribute bgppath test_ca_bgppath1; +attribute bgppath test_ca_bgppath2; +attribute bgppath test_ca_bgppath3; +attribute bgppath test_ca_bgppath4; +attribute bgppath test_ca_bgppath5; +attribute bgppath test_ca_bgppath6; +attribute bgppath test_ca_bgppath7; +attribute bgppath test_ca_bgppath8; +attribute bgppath test_ca_bgppath9; +attribute bgppath test_ca_bgppath10; + +attribute clist test_ca_clist1; +attribute clist test_ca_clist2; +attribute clist test_ca_clist3; +attribute clist test_ca_clist4; +attribute clist test_ca_clist5; +attribute clist test_ca_clist6; +attribute clist test_ca_clist7; +attribute clist test_ca_clist8; +attribute clist test_ca_clist9; +attribute clist test_ca_clist10; + +attribute eclist test_ca_eclist1; +attribute eclist test_ca_eclist2; +attribute eclist test_ca_eclist3; +attribute eclist test_ca_eclist4; +attribute eclist test_ca_eclist5; +attribute eclist test_ca_eclist6; +attribute eclist test_ca_eclist7; +attribute eclist test_ca_eclist8; +attribute eclist test_ca_eclist9; +attribute eclist test_ca_eclist10; + +attribute lclist test_ca_lclist1; +attribute lclist test_ca_lclist2; +attribute lclist test_ca_lclist3; +attribute lclist test_ca_lclist4; +attribute lclist test_ca_lclist5; +attribute lclist test_ca_lclist6; +attribute lclist test_ca_lclist7; +attribute lclist test_ca_lclist8; +attribute lclist test_ca_lclist9; +attribute lclist test_ca_lclist10; + +attribute lclist test_ca_lclist_max1; +attribute lclist test_ca_lclist_max2; +attribute lclist test_ca_lclist_max3; +attribute lclist test_ca_lclist_max4; +attribute lclist test_ca_lclist_max5; +attribute lclist test_ca_lclist_max6; +attribute lclist test_ca_lclist_max7; +attribute lclist test_ca_lclist_max8; +attribute lclist test_ca_lclist_max9; +attribute lclist test_ca_lclist_max10; +attribute lclist test_ca_lclist_max11; +attribute lclist test_ca_lclist_max12; +attribute lclist test_ca_lclist_max13; +attribute lclist test_ca_lclist_max14; +attribute lclist test_ca_lclist_max15; +attribute lclist test_ca_lclist_max16; +attribute lclist test_ca_lclist_max17; +attribute lclist test_ca_lclist_max18; +attribute lclist test_ca_lclist_max19; +attribute lclist test_ca_lclist_max20; +attribute lclist test_ca_lclist_max21; + + +/* Uncomment this to get an error */ +#attribute int bgp_path; /* * Common definitions and functions @@ -111,6 +213,14 @@ int i; bt_assert(!(i = 4)); bt_assert(1 <= 1); bt_assert(!(1234 < 1234)); + + bt_assert(10 - 5 = 5); + bt_assert(4294967295 + 1 = 0); + bt_assert(6*9=54); + bt_assert(984/41 = 24); + bt_assert(123/45 = 2); + bt_assert(0xfee1a | 0xbeef = 0xffeff); + bt_assert(0xfee1a & 0xbeef = 0xae0a); } bt_test_suite(t_int, "Testing integers"); @@ -335,6 +445,26 @@ ip p; p = 1234:5678::; bt_assert(!p.is_v4); bt_assert(p.mask(24) = 1234:5600::); + + p = 1:2:3:4:5:6:7:8; + bt_assert(!p.is_v4); + bt_assert(format(p) = "1:2:3:4:5:6:7:8"); + bt_assert(p.mask(64) = 1:2:3:4::); + + p = 10:20:30:40:50:60:70:80; + bt_assert(!p.is_v4); + bt_assert(format(p) = "10:20:30:40:50:60:70:80"); + bt_assert(p.mask(64) = 10:20:30:40::); + + p = 1090:20a0:30b0:40c0:50d0:60e0:70f0:8000; + bt_assert(!p.is_v4); + bt_assert(format(p) = "1090:20a0:30b0:40c0:50d0:60e0:70f0:8000"); + bt_assert(p.mask(64) = 1090:20a0:30b0:40c0::); + + p = ::fffe:6:c0c:936d:88c7:35d3; + bt_assert(!p.is_v4); + bt_assert(format(p) = "::fffe:6:c0c:936d:88c7:35d3"); + bt_assert(p.mask(64) = 0:0:fffe:6::); } bt_test_suite(t_ip, "Testing ip address"); @@ -378,9 +508,9 @@ bt_test_suite(t_ip_set, "Testing sets of ip address"); function t_enum() { - bt_assert(format(RTS_STATIC) = "(enum 30)1"); - bt_assert(format(NET_IP4) = "(enum 36)1"); - bt_assert(format(NET_VPN6) = "(enum 36)4"); + bt_assert(format(RTS_STATIC) = "(enum 31)1"); + bt_assert(format(NET_IP4) = "(enum 3b)1"); + bt_assert(format(NET_VPN6) = "(enum 3b)4"); bt_assert(RTS_STATIC ~ [RTS_STATIC, RTS_DEVICE]); bt_assert(RTS_BGP !~ [RTS_STATIC, RTS_DEVICE]); @@ -478,6 +608,33 @@ prefix set pxs; bt_assert(1.2.0.0/16 ~ [ 1.0.0.0/8{ 15 , 17 } ]); bt_assert([ 10.0.0.0/8{ 15 , 17 } ] != [ 11.0.0.0/8{ 15 , 17 } ]); + + /* Formatting of prefix sets, some cases are a bit strange */ + bt_assert(format([ 0.0.0.0/0 ]) = "[0.0.0.0/0]"); + bt_assert(format([ 10.10.0.0/32 ]) = "[10.10.0.0/32{0.0.0.1}]"); + bt_assert(format([ 10.10.0.0/17 ]) = "[10.10.0.0/17{0.0.128.0}]"); + bt_assert(format([ 10.10.0.0/17{17,19} ]) = "[10.10.0.0/17{0.0.224.0}]"); # 224 = 128+64+32 + bt_assert(format([ 10.10.128.0/17{18,19} ]) = "[10.10.128.0/18{0.0.96.0}, 10.10.192.0/18{0.0.96.0}]"); # 96 = 64+32 + bt_assert(format([ 10.10.64.0/18- ]) = "[0.0.0.0/0, 0.0.0.0/1{128.0.0.0}, 0.0.0.0/2{64.0.0.0}, 0.0.0.0/3{32.0.0.0}, 10.10.0.0/16{255.255.0.0}, 10.10.0.0/17{0.0.128.0}, 10.10.64.0/18{0.0.64.0}]"); + bt_assert(format([ 10.10.64.0/18+ ]) = "[10.10.64.0/18{0.0.96.0}, 10.10.64.0/20{0.0.31.255}, 10.10.80.0/20{0.0.31.255}, 10.10.96.0/20{0.0.31.255}, 10.10.112.0/20{0.0.31.255}]"); + + bt_assert(format([ 10.10.160.0/19 ]) = "[10.10.160.0/19{0.0.32.0}]"); + bt_assert(format([ 10.10.160.0/19{19,22} ]) = "[10.10.160.0/19{0.0.32.0}, 10.10.160.0/20{0.0.28.0}, 10.10.176.0/20{0.0.28.0}]"); # 28 = 16+8+4 + bt_assert(format([ 10.10.160.0/19+ ]) = "[10.10.160.0/19{0.0.32.0}, 10.10.160.0/20{0.0.31.255}, 10.10.176.0/20{0.0.31.255}]"); + + bt_assert(format([ ::/0 ]) = "[::/0]"); + bt_assert(format([ 11:22:33:44:55:66:77:88/128 ]) = "[11:22:33:44:55:66:77:88/128{::1}]"); + bt_assert(format([ 11:22:33:44::/64 ]) = "[11:22:33:44::/64{0:0:0:1::}]"); + bt_assert(format([ 11:22:33:44::/64+ ]) = "[11:22:33:44::/64{::1:ffff:ffff:ffff:ffff}]"); + + bt_assert(format([ 11:22:33:44::/65 ]) = "[11:22:33:44::/65{::8000:0:0:0}]"); + bt_assert(format([ 11:22:33:44::/65{65,67} ]) = "[11:22:33:44::/65{::e000:0:0:0}]"); # e = 8+4+2 + bt_assert(format([ 11:22:33:44:8000::/65{66,67} ]) = "[11:22:33:44:8000::/66{::6000:0:0:0}, 11:22:33:44:c000::/66{::6000:0:0:0}]"); # 6 = 4+2 + bt_assert(format([ 11:22:33:44:4000::/66- ]) = "[::/0, ::/1{8000::}, ::/2{4000::}, ::/3{2000::}, 11:22:33:44::/64{ffff:ffff:ffff:ffff::}, 11:22:33:44::/65{::8000:0:0:0}, 11:22:33:44:4000::/66{::4000:0:0:0}]"); + bt_assert(format([ 11:22:33:44:4000::/66+ ]) = "[11:22:33:44:4000::/66{::6000:0:0:0}, 11:22:33:44:4000::/68{::1fff:ffff:ffff:ffff}, 11:22:33:44:5000::/68{::1fff:ffff:ffff:ffff}, 11:22:33:44:6000::/68{::1fff:ffff:ffff:ffff}, 11:22:33:44:7000::/68{::1fff:ffff:ffff:ffff}]"); + bt_assert(format([ 11:22:33:44:c000::/67 ]) = "[11:22:33:44:c000::/67{::2000:0:0:0}]"); + bt_assert(format([ 11:22:33:44:c000::/67{67,71} ]) = "[11:22:33:44:c000::/67{::2000:0:0:0}, 11:22:33:44:c000::/68{::1e00:0:0:0}, 11:22:33:44:d000::/68{::1e00:0:0:0}]"); + bt_assert(format([ 11:22:33:44:c000::/67+ ]) = "[11:22:33:44:c000::/67{::2000:0:0:0}, 11:22:33:44:c000::/68{::1fff:ffff:ffff:ffff}, 11:22:33:44:d000::/68{::1fff:ffff:ffff:ffff}]"); } bt_test_suite(t_prefix_set, "Testing prefix sets"); @@ -557,6 +714,12 @@ prefix set pxs; bt_assert(2000::/29 !~ pxs); bt_assert(1100::/10 !~ pxs); bt_assert(2010::/26 !~ pxs); + + pxs = [ 52E0::/13{13,128} ]; + bt_assert(52E7:BE81:379B:E6FD:541F:B0D0::/93 ~ pxs); + + pxs = [ 41D8:8718::/30{0,30}, 413A:99A8:6C00::/38{38,128} ]; + bt_assert(4180::/9 ~ pxs); } bt_test_suite(t_prefix6_set, "Testing prefix IPv6 sets"); @@ -683,6 +846,11 @@ clist l; clist l2; clist r; { + bt_assert((10, 20).asn = 10); + bt_assert((10, 20).data = 20); + bt_assert(p23.asn = 2); + bt_assert(p23.data = 3); + l = - empty -; bt_assert(l !~ [(*,*)]); bt_assert((l ~ [(*,*)]) != (l !~ [(*,*)])); @@ -775,6 +943,12 @@ clist r; r = filter(l, [(3,1), (*,2)]); bt_assert(r = add(add(-empty-, (3,1)), (3,2))); bt_assert(format(r) = "(clist (3,1) (3,2))"); + + # minimim & maximum element + r = add(add(add(add(add(-empty-, (2,1)), (1,3)), (2,2)), (3,1)), (2,3)); + bt_assert(format(r) = "(clist (2,1) (1,3) (2,2) (3,1) (2,3))"); + bt_assert(r.min = (1,3)); + bt_assert(r.max = (3,1)); } bt_test_suite(t_clist, "Testing lists of communities"); @@ -880,6 +1054,12 @@ eclist r; r = filter(el, [(rt, 10, 1), (rt, 10, 25..30), (ro, 10, 40)]); bt_assert(r = add(add(--empty--, (rt, 10, 1)), (rt, 10, 30))); bt_assert(format(r) = "(eclist (rt, 10, 1) (rt, 10, 30))"); + + # minimim & maximum element + r = add(add(add(add(add(--empty--, (rt, 2, 1)), (rt, 1, 3)), (rt, 2, 2)), (rt, 3, 1)), (rt, 2, 3)); + bt_assert(format(r) = "(eclist (rt, 2, 1) (rt, 1, 3) (rt, 2, 2) (rt, 3, 1) (rt, 2, 3))"); + bt_assert(r.min = (rt, 1, 3)); + bt_assert(r.max = (rt, 3, 1)); } bt_test_suite(t_eclist, "Testing lists of extended communities"); @@ -939,6 +1119,10 @@ lclist r; bt_assert(---empty--- = ---empty---); bt_assert((10, 20, 30) !~ ---empty---); + bt_assert((10, 20, 30).asn = 10); + bt_assert((10, 20, 30).data1 = 20); + bt_assert((10, 20, 30).data2 = 30); + ll = --- empty ---; ll = add(ll, (ten, 20, 30)); ll = add(ll, (1000, 2000, 3000)); @@ -985,6 +1169,12 @@ lclist r; r = filter(ll, [(5..15, *, *), (20, 15..25, *)]); bt_assert(r = add(add(---empty---, (10, 10, 10)), (20, 20, 20))); bt_assert(format(r) = "(lclist (10, 10, 10) (20, 20, 20))"); + + # minimim & maximum element + r = add(add(add(add(add(---empty---, (2, 3, 3)), (1, 2, 3)), (2, 3, 1)), (3, 1, 2)), (2, 1, 3)); + bt_assert(format(r) = "(lclist (2, 3, 3) (1, 2, 3) (2, 3, 1) (3, 1, 2) (2, 1, 3))"); + bt_assert(r.min = (1, 2, 3)); + bt_assert(r.max = (3, 1, 2)); } bt_test_suite(t_lclist, "Testing lists of large communities"); @@ -1243,6 +1433,7 @@ function __test2() filter testf int j; +bool t; { print "Heya, filtering route to ", net.ip, " prefixlen ", net.len, " source ", source; print "This route was from ", from; @@ -1254,6 +1445,56 @@ int j; rip_metric = 14; unset(rip_metric); + test_ca_int1 = 42; + test_ca_ip2 = 1.3.5.7; + test_ca_quad3 = 2.4.6.8; + test_ca_bgppath4 = +empty+; + test_ca_clist5 = -empty-; + test_ca_eclist6 = --empty--; + test_ca_lclist7 = ---empty---; + + igp_metric = 53; + babel_metric = 64; + t = defined(babel_router_id); + j = babel_seqno; + + bgp_origin = ORIGIN_IGP; + bgp_path = +empty+; + bgp_next_hop = 3456:789a:bcde:f012::3456:789a; + bgp_med = 71; + bgp_local_pref = 942; + t = defined(bgp_atomic_aggr); + t = defined(bgp_aggregator); + bgp_community = -empty-; + bgp_originator_id = 9.7.5.3; + bgp_cluster_list = -empty-; + t = defined(bgp_mp_reach_nlri); + t = defined(bgp_mp_unreach_nlri); + bgp_ext_community = --empty--; + bgp_as4_path = +empty+; + t = defined(bgp_as4_aggregator); + t = defined(bgp_aigp); + bgp_large_community = ---empty---; + t = defined(bgp_mpls_label_stack); + + ospf_metric1 = 64; + ospf_metric2 = 111; + ospf_tag = 654432; + + radv_preference = RA_PREF_LOW; + radv_lifetime = 28; + + rip_metric = 2; + rip_tag = 4; + t = defined(rip_from); + + krt_source = 17; + krt_metric = 19; + +# krt_lock_mtu = false; +# krt_lock_window = true; +# krt_lock_rtt = krt_lock_rttvar && krt_lock_sstresh || krt_lock_cwnd; + accept "ok I take that"; } diff --git a/filter/test.conf2 b/filter/test.conf2 index e95f9563..9fc8330f 100644 --- a/filter/test.conf2 +++ b/filter/test.conf2 @@ -38,12 +38,6 @@ protocol static { print from; from = 1.2.3.4; print from; - print scope; - scope = SCOPE_HOST; - print scope; - if !(scope ~ [ SCOPE_HOST, SCOPE_SITE ]) then { - print "Failed in test"; - } preference = 15; print preference; diff --git a/filter/tree_test.c b/filter/tree_test.c index 6472d17e..05702f81 100644 --- a/filter/tree_test.c +++ b/filter/tree_test.c @@ -19,10 +19,7 @@ static void start_conf_env(void) { bt_bird_init(); - - pool *p = rp_new(&root_pool, "helper_pool"); - linpool *l = lp_new_default(p); - cfg_mem = l; + cfg_mem = tmp_linpool; } static struct f_tree * diff --git a/filter/trie.c b/filter/trie.c index 1a4e1ac3..12ba0b82 100644 --- a/filter/trie.c +++ b/filter/trie.c @@ -1,7 +1,8 @@ /* * Filters: Trie for prefix sets * - * Copyright 2009 Ondrej Zajicek <santiago@crfreenet.org> + * (c) 2009--2021 Ondrej Zajicek <santiago@crfreenet.org> + * (c) 2009--2021 CZ.NIC z.s.p.o. * * Can be freely distributed and used under the terms of the GNU GPL. */ @@ -9,53 +10,68 @@ /** * DOC: Trie for prefix sets * - * We use a (compressed) trie to represent prefix sets. Every node - * in the trie represents one prefix (&addr/&plen) and &plen also - * indicates the index of the bit in the address that is used to - * branch at the node. If we need to represent just a set of - * prefixes, it would be simple, but we have to represent a - * set of prefix patterns. Each prefix pattern consists of - * &ppaddr/&pplen and two integers: &low and &high, and a prefix - * &paddr/&plen matches that pattern if the first MIN(&plen, &pplen) - * bits of &paddr and &ppaddr are the same and &low <= &plen <= &high. + * We use a (compressed) trie to represent prefix sets. Every node in the trie + * represents one prefix (&addr/&plen) and &plen also indicates the index of + * bits in the address that are used to branch at the node. Note that such + * prefix is not necessary a member of the prefix set, it is just a canonical + * prefix associated with a node. Prefix lengths of nodes are aligned to + * multiples of &TRIE_STEP (4) and there is 16-way branching in each + * node. Therefore, we say that a node is associated with a range of prefix + * lengths (&plen .. &plen + TRIE_STEP - 1). * - * We use a bitmask (&accept) to represent accepted prefix lengths - * at a node. As there are 33 prefix lengths (0..32 for IPv4), but - * there is just one prefix of zero length in the whole trie so we - * have &zero flag in &f_trie (indicating whether the trie accepts - * prefix 0.0.0.0/0) as a special case, and &accept bitmask + * The prefix set is not just a set of prefixes, it is defined by a set of + * prefix patterns. Each prefix pattern consists of &ppaddr/&pplen and two + * integers: &low and &high. The tested prefix &paddr/&plen matches that pattern + * if the first MIN(&plen, &pplen) bits of &paddr and &ppaddr are the same and + * &low <= &plen <= &high. + * + * There are two ways to represent accepted prefixes for a node. First, there is + * a bitmask &local, which represents independently all 15 prefixes that extend + * the canonical prefix of the node and are within a range of prefix lengths + * associated with the node. E.g., for node 10.0.0.0/8 they are 10.0.0.0/8, + * 10.0.0.0/9, 10.128.0.0/9, .. 10.224.0.0/11. This order (first by length, then + * lexicographically) is used for indexing the bitmask &local, starting at + * position 1. I.e., index is 2^(plen - base) + offset within the same length, + * see function trie_local_mask6() for details. + * + * Second, we use a bitmask &accept to represent accepted prefix lengths at a + * node. The bit is set means that all prefixes of given length that are either + * subprefixes or superprefixes of the canonical prefix are accepted. As there + * are 33 prefix lengths (0..32 for IPv4), but there is just one prefix of zero + * length in the whole trie so we have &zero flag in &f_trie (indicating whether + * the trie accepts prefix 0.0.0.0/0) as a special case, and &accept bitmask * represents accepted prefix lengths from 1 to 32. * - * There are two cases in prefix matching - a match when the length - * of the prefix is smaller that the length of the prefix pattern, - * (&plen < &pplen) and otherwise. The second case is simple - we - * just walk through the trie and look at every visited node - * whether that prefix accepts our prefix length (&plen). The - * first case is tricky - we don't want to examine every descendant - * of a final node, so (when we create the trie) we have to propagate - * that information from nodes to their ascendants. + * One complication is handling of prefix patterns with unaligned prefix length. + * When such pattern is to be added, we add a primary node above (with rounded + * down prefix length &nlen) and a set of secondary nodes below (with rounded up + * prefix lengths &slen). Accepted prefix lengths of the original prefix pattern + * are then represented in different places based on their lengths. For prefixes + * shorter than &nlen, it is &accept bitmask of the primary node, for prefixes + * between &nlen and &slen - 1 it is &local bitmask of the primary node, and for + * prefixes longer of equal &slen it is &accept bitmasks of secondary nodes. * - * Suppose that we have two masks (M1 and M2) for a node. Mask M1 - * represents accepted prefix lengths by just the node and mask M2 - * represents accepted prefix lengths by the node or any of its - * descendants. Therefore M2 is a bitwise or of M1 and children's - * M2 and this is a maintained invariant during trie building. - * Basically, when we want to match a prefix, we walk through the trie, - * check mask M1 for our prefix length and when we came to - * final node, we check mask M2. + * There are two cases in prefix matching - a match when the length of the + * prefix is smaller that the length of the prefix pattern, (&plen < &pplen) and + * otherwise. The second case is simple - we just walk through the trie and look + * at every visited node whether that prefix accepts our prefix length (&plen). + * The first case is tricky - we do not want to examine every descendant of a + * final node, so (when we create the trie) we have to propagate that + * information from nodes to their ascendants. * - * There are two differences in the real implementation. First, - * we use a compressed trie so there is a case that we skip our - * final node (if it is not in the trie) and we came to node that - * is either extension of our prefix, or completely out of path - * In the first case, we also have to check M2. + * There are two kinds of propagations - propagation from child's &accept + * bitmask to parent's &accept bitmask, and propagation from child's &accept + * bitmask to parent's &local bitmask. The first kind is simple - as all + * superprefixes of a parent are also all superprefixes of appropriate length of + * a child, then we can just add (by bitwise or) a child &accept mask masked by + * parent prefix length mask to the parent &accept mask. This handles prefixes + * shorter than node &plen. * - * Second, we really need not to maintain two separate bitmasks. - * Checks for mask M1 are always larger than &applen and we need - * just the first &pplen bits of mask M2 (if trie compression - * hadn't been used it would suffice to know just $applen-th bit), - * so we have to store them together in &accept mask - the first - * &pplen bits of mask M2 and then mask M1. + * The second kind of propagation is necessary to handle superprefixes of a + * child that are represented by parent &local mask - that are in the range of + * prefix lengths associated with the parent. For each accepted (by child + * &accept mask) prefix length from that range, we need to set appropriate bit + * in &local mask. See function trie_amask_to_local() for details. * * There are four cases when we walk through a trie: * @@ -65,8 +81,32 @@ * - we are beyond the end of path (node length > &plen) * - we are still on path and keep walking (node length < &plen) * - * The walking code in trie_match_prefix() is structured according to - * these cases. + * The walking code in trie_match_net() is structured according to these cases. + * + * Iteration over prefixes in a trie can be done using TRIE_WALK() macro, or + * directly using trie_walk_init() and trie_walk_next() functions. The second + * approach allows suspending the iteration and continuing in it later. + * Prefixes are enumerated in the usual lexicographic order and may be + * restricted to a subset of the trie (all subnets of a specified prefix). + * + * Note that the trie walk does not reliably enumerate `implicit' prefixes + * defined by &low and &high fields in prefix patterns, it is supposed to be + * used on tries constructed from `explicit' prefixes (&low == &plen == &high + * in call to trie_add_prefix()). + * + * The trie walk has three basic state variables stored in the struct + * &f_trie_walk_state -- the current node in &stack[stack_pos], &accept_length + * for iteration over inter-node prefixes (non-branching prefixes on compressed + * path between the current node and its parent node, stored in the bitmap + * &accept of the current node) and &local_pos for iteration over intra-node + * prefixes (stored in the bitmap &local). + * + * The trie also supports longest-prefix-match query by trie_match_longest_ip4() + * and it can be extended to iteration over all covering prefixes for a given + * prefix (from longest to shortest) using TRIE_WALK_TO_ROOT_IP4() macro. There + * are also IPv6 versions (for practical reasons, these functions and macros are + * separate for IPv4 and IPv6). There is the same limitation to enumeration of + * `implicit' prefixes like with the previous TRIE_WALK() macro. */ #include "nest/bird.h" @@ -86,7 +126,10 @@ #define ipa_mkmask(x) ip6_mkmask(x) #define ipa_masklen(x) ip6_masklen(&x) #define ipa_pxlen(x,y) ip6_pxlen(x,y) -#define ipa_getbit(x,n) ip6_getbit(x,n) +#define ipa_getbit(a,p) ip6_getbit(a,p) +#define ipa_getbits(a,p,n) ip6_getbits(a,p,n) +#define ipa_setbits(a,p,n) ip6_setbits(a,p,n) +#define trie_local_mask(a,b,c) trie_local_mask6(a,b,c) #define ipt_from_ip4(x) _MI6(_I(x), 0, 0, 0) #define ipt_to_ip4(x) _MI4(_I0(x)) @@ -109,10 +152,11 @@ f_new_trie(linpool *lp, uint data_size) } static inline struct f_trie_node4 * -new_node4(struct f_trie *t, int plen, ip4_addr paddr, ip4_addr pmask, ip4_addr amask) +new_node4(struct f_trie *t, uint plen, uint local, ip4_addr paddr, ip4_addr pmask, ip4_addr amask) { struct f_trie_node4 *n = lp_allocz(t->lp, sizeof(struct f_trie_node4) + t->data_size); n->plen = plen; + n->local = local; n->addr = paddr; n->mask = pmask; n->accept = amask; @@ -120,10 +164,11 @@ new_node4(struct f_trie *t, int plen, ip4_addr paddr, ip4_addr pmask, ip4_addr a } static inline struct f_trie_node6 * -new_node6(struct f_trie *t, int plen, ip6_addr paddr, ip6_addr pmask, ip6_addr amask) +new_node6(struct f_trie *t, uint plen, uint local, ip6_addr paddr, ip6_addr pmask, ip6_addr amask) { struct f_trie_node6 *n = lp_allocz(t->lp, sizeof(struct f_trie_node6) + t->data_size); n->plen = plen; + n->local = local; n->addr = paddr; n->mask = pmask; n->accept = amask; @@ -131,24 +176,24 @@ new_node6(struct f_trie *t, int plen, ip6_addr paddr, ip6_addr pmask, ip6_addr a } static inline struct f_trie_node * -new_node(struct f_trie *t, int plen, ip_addr paddr, ip_addr pmask, ip_addr amask) +new_node(struct f_trie *t, uint plen, uint local, ip_addr paddr, ip_addr pmask, ip_addr amask) { if (t->ipv4) - return (struct f_trie_node *) new_node4(t, plen, ipt_to_ip4(paddr), ipt_to_ip4(pmask), ipt_to_ip4(amask)); + return (struct f_trie_node *) new_node4(t, plen, local, ipt_to_ip4(paddr), ipt_to_ip4(pmask), ipt_to_ip4(amask)); else - return (struct f_trie_node *) new_node6(t, plen, ipa_to_ip6(paddr), ipa_to_ip6(pmask), ipa_to_ip6(amask)); + return (struct f_trie_node *) new_node6(t, plen, local, ipa_to_ip6(paddr), ipa_to_ip6(pmask), ipa_to_ip6(amask)); } static inline void attach_node4(struct f_trie_node4 *parent, struct f_trie_node4 *child) { - parent->c[ip4_getbit(child->addr, parent->plen) ? 1 : 0] = child; + parent->c[ip4_getbits(child->addr, parent->plen, TRIE_STEP)] = child; } static inline void attach_node6(struct f_trie_node6 *parent, struct f_trie_node6 *child) { - parent->c[ip6_getbit(child->addr, parent->plen) ? 1 : 0] = child; + parent->c[ip6_getbits(child->addr, parent->plen, TRIE_STEP)] = child; } static inline void @@ -160,10 +205,182 @@ attach_node(struct f_trie_node *parent, struct f_trie_node *child, int v4) attach_node6(&parent->v6, &child->v6); } + +/* + * Internal prefixes of a node a represented by the local bitmask, each bit for + * one prefix. Bit 0 is unused, Bit 1 is for the main prefix of the node, + * remaining bits correspond to subprefixes by this pattern: + * + * 1 + * 2 3 + * 4 5 6 7 + * 8 9 A B C D E F + * + * E.g. for 10.0.0.0/8 node, the 10.64.0.0/10 would be position 5. + */ + +/* + * Compute appropriate mask representing prefix px/plen in local bitmask of node + * with prefix length nlen. Assuming that nlen <= plen < (nlen + TRIE_STEP). + */ +static inline uint +trie_local_mask4(ip4_addr px, uint plen, uint nlen) +{ + uint step = plen - nlen; + uint pos = (1u << step) + ip4_getbits(px, nlen, step); + return 1u << pos; +} + +static inline uint +trie_local_mask6(ip6_addr px, uint plen, uint nlen) +{ + uint step = plen - nlen; + uint pos = (1u << step) + ip6_getbits(px, nlen, step); + return 1u << pos; +} + +/* + * Compute an appropriate local mask (for a node with prefix length nlen) + * representing prefixes of px that are accepted by amask and fall within the + * range associated with that node. Used for propagation of child accept mask + * to parent local mask. + */ +static inline uint +trie_amask_to_local(ip_addr px, ip_addr amask, uint nlen) +{ + uint local = 0; + + for (uint plen = MAX(nlen, 1); plen < (nlen + TRIE_STEP); plen++) + if (ipa_getbit(amask, plen - 1)) + local |= trie_local_mask(px, plen, nlen); + + return local; +} + +/* + * Compute a bitmask representing a level of subprefixes (of the same length), + * using specified position as a root. E.g., level 2 from root position 3 would + * be bit positions C-F, returned as bitmask 0xf000. + */ +static inline uint +trie_level_mask(uint pos, uint level) +{ + return ((1u << (1u << level)) - 1) << (pos << level); +} + + #define GET_ADDR(N,F,X) ((X) ? ipt_from_ip4((N)->v4.F) : ipa_from_ip6((N)->v6.F)) #define SET_ADDR(N,F,X,V) ({ if (X) (N)->v4.F =ipt_to_ip4(V); else (N)->v6.F =ipa_to_ip6(V); }) -#define GET_CHILD(N,F,X,I) ((X) ? (struct f_trie_node *) (N)->v4.c[I] : (struct f_trie_node *) (N)->v6.c[I]) +#define GET_LOCAL(N,X) ((X) ? (N)->v4.local : (N)->v6.local) +#define ADD_LOCAL(N,X,V) ({ uint v_ = (V); if (X) (N)->v4.local |= v_; else (N)->v6.local |= v_; }) + +#define GET_CHILD(N,X,I) ((X) ? (struct f_trie_node *) (N)->v4.c[I] : (struct f_trie_node *) (N)->v6.c[I]) + + +static void * +trie_add_node(struct f_trie *t, uint plen, ip_addr px, uint local, uint l, uint h) +{ + uint l_ = l ? (l - 1) : 0; + ip_addr amask = (l_ < h) ? ipa_xor(ipa_mkmask(l_), ipa_mkmask(h)) : IPA_NONE; + ip_addr pmask = ipa_mkmask(plen); + ip_addr paddr = ipa_and(px, pmask); + struct f_trie_node *o = NULL; + struct f_trie_node *n = &t->root; + int v4 = t->ipv4; + + /* Add all bits for each active level (0x0002 0x000c 0x00f0 0xff00) */ + for (uint i = 0; i < TRIE_STEP; i++) + if ((l <= (plen + i)) && ((plen + i) <= h)) + local |= trie_level_mask(1, i); + + DBG("Insert node %I/%u (%I %x)\n", paddr, plen, amask, local); + while (n) + { + ip_addr naddr = GET_ADDR(n, addr, v4); + ip_addr nmask = GET_ADDR(n, mask, v4); + ip_addr accept = GET_ADDR(n, accept, v4); + ip_addr cmask = ipa_and(nmask, pmask); + uint nlen = v4 ? n->v4.plen : n->v6.plen; + + DBG("Found node %I/%u (%I %x)\n", + naddr, nlen, accept, v4 ? n->v4.local : n->v6.local); + + if (ipa_compare(ipa_and(paddr, cmask), ipa_and(naddr, cmask))) + { + /* We are out of path - we have to add branching node 'b' + between node 'o' and node 'n', and attach new node 'a' + as the other child of 'b'. */ + int blen = ROUND_DOWN_POW2(ipa_pxlen(paddr, naddr), TRIE_STEP); + ip_addr bmask = ipa_mkmask(blen); + ip_addr baddr = ipa_and(px, bmask); + + /* Merge accept masks from children to get accept mask for node 'b' */ + ip_addr baccm = ipa_and(ipa_or(amask, accept), bmask); + uint bloc = trie_amask_to_local(naddr, accept, blen) | + trie_amask_to_local(paddr, amask, blen); + + struct f_trie_node *a = new_node(t, plen, local, paddr, pmask, amask); + struct f_trie_node *b = new_node(t, blen, bloc, baddr, bmask, baccm); + attach_node(o, b, v4); + attach_node(b, n, v4); + attach_node(b, a, v4); + t->prefix_count++; + + DBG("Case 1\n"); + return a; + } + + if (plen < nlen) + { + /* We add new node 'a' between node 'o' and node 'n' */ + amask = ipa_or(amask, ipa_and(accept, pmask)); + local |= trie_amask_to_local(naddr, accept, plen); + struct f_trie_node *a = new_node(t, plen, local, paddr, pmask, amask); + attach_node(o, a, v4); + attach_node(a, n, v4); + t->prefix_count++; + + DBG("Case 2\n"); + return a; + } + + if (plen == nlen) + { + /* We already found added node in trie. Just update accept and local mask */ + accept = ipa_or(accept, amask); + SET_ADDR(n, accept, v4, accept); + + if ((GET_LOCAL(n, v4) & local) != local) + t->prefix_count++; + + ADD_LOCAL(n, v4, local); + + DBG("Case 3\n"); + return n; + } + + /* Update accept mask part M2 and go deeper */ + accept = ipa_or(accept, ipa_and(amask, nmask)); + SET_ADDR(n, accept, v4, accept); + ADD_LOCAL(n, v4, trie_amask_to_local(paddr, amask, nlen)); + + DBG("Step %u\n", ipa_getbits(paddr, nlen)); + + /* n->plen < plen and plen <= 32 (128) */ + o = n; + n = GET_CHILD(n, v4, ipa_getbits(paddr, nlen, TRIE_STEP)); + } + + /* We add new tail node 'a' after node 'o' */ + struct f_trie_node *a = new_node(t, plen, local, paddr, pmask, amask); + attach_node(o, a, v4); + t->prefix_count++; + + DBG("Case 4\n"); + return a; +} + /** * trie_add_prefix * @t: trie to add to @@ -180,7 +397,6 @@ attach_node(struct f_trie_node *parent, struct f_trie_node *child, int v4) * a pointer to the root node is returned. Returns NULL when called with * mismatched IPv4/IPv6 net type. */ - void * trie_add_prefix(struct f_trie *t, const net_addr *net, uint l, uint h) { @@ -190,9 +406,23 @@ trie_add_prefix(struct f_trie *t, const net_addr *net, uint l, uint h) switch (net->type) { - case NET_IP4: px = ipt_from_ip4(net4_prefix(net)); v4 = 1; break; - case NET_IP6: px = ipa_from_ip6(net6_prefix(net)); v4 = 0; break; - default: bug("invalid type"); + case NET_IP4: + case NET_VPN4: + case NET_ROA4: + px = ipt_from_ip4(net4_prefix(net)); + v4 = 1; + break; + + case NET_IP6: + case NET_VPN6: + case NET_ROA6: + case NET_IP6_SADR: + px = ipa_from_ip6(net6_prefix(net)); + v4 = 0; + break; + + default: + bug("invalid type"); } if (t->ipv4 != v4) @@ -203,112 +433,97 @@ trie_add_prefix(struct f_trie *t, const net_addr *net, uint l, uint h) return NULL; } + DBG("\nInsert net %N (%u-%u)\n", net, l, h); + if (l == 0) t->zero = 1; - else - l--; if (h < plen) plen = h; - ip_addr amask = ipa_xor(ipa_mkmask(l), ipa_mkmask(h)); - ip_addr pmask = ipa_mkmask(plen); - ip_addr paddr = ipa_and(px, pmask); - struct f_trie_node *o = NULL; - struct f_trie_node *n = &t->root; + /* Primary node length, plen rounded down */ + uint nlen = ROUND_DOWN_POW2(plen, TRIE_STEP); - while (n) - { - ip_addr naddr = GET_ADDR(n, addr, v4); - ip_addr nmask = GET_ADDR(n, mask, v4); - ip_addr accept = GET_ADDR(n, accept, v4); - ip_addr cmask = ipa_and(nmask, pmask); - uint nlen = v4 ? n->v4.plen : n->v6.plen; + if (plen == nlen) + return trie_add_node(t, nlen, px, 0, l, h); - if (ipa_compare(ipa_and(paddr, cmask), ipa_and(naddr, cmask))) - { - /* We are out of path - we have to add branching node 'b' - between node 'o' and node 'n', and attach new node 'a' - as the other child of 'b'. */ - int blen = ipa_pxlen(paddr, naddr); - ip_addr bmask = ipa_mkmask(blen); - ip_addr baddr = ipa_and(px, bmask); + /* Secondary node length, plen rouned up */ + uint slen = nlen + TRIE_STEP; + void *node = NULL; - /* Merge accept masks from children to get accept mask for node 'b' */ - ip_addr baccm = ipa_and(ipa_or(amask, accept), bmask); + /* + * For unaligned prefix lengths it is more complicated. We need to encode + * matching prefixes of lengths from l to h. There are three cases of lengths: + * + * 1) 0..nlen are encoded by the accept mask of the primary node + * 2) nlen..(slen-1) are encoded by the local mask of the primary node + * 3) slen..max are encoded in secondary nodes + */ - struct f_trie_node *a = new_node(t, plen, paddr, pmask, amask); - struct f_trie_node *b = new_node(t, blen, baddr, bmask, baccm); - attach_node(o, b, v4); - attach_node(b, n, v4); - attach_node(b, a, v4); - return a; - } + if (l < slen) + { + uint local = 0; - if (plen < nlen) - { - /* We add new node 'a' between node 'o' and node 'n' */ - amask = ipa_or(amask, ipa_and(accept, pmask)); - struct f_trie_node *a = new_node(t, plen, paddr, pmask, amask); - attach_node(o, a, v4); - attach_node(a, n, v4); - return a; - } + /* Compute local bits for accepted nlen..(slen-1) prefixes */ + for (uint i = 0; i < TRIE_STEP; i++) + if ((l <= (nlen + i)) && ((nlen + i) <= h)) + { + uint pos = (1u << i) + ipa_getbits(px, nlen, i); + uint len = ((nlen + i) <= plen) ? 1 : (1u << (nlen + i - plen)); - if (plen == nlen) - { - /* We already found added node in trie. Just update accept mask */ - accept = ipa_or(accept, amask); - SET_ADDR(n, accept, v4, accept); - return n; - } + /* We need to fill 'len' bits starting at 'pos' position */ + local |= ((1u << len) - 1) << pos; + } - /* Update accept mask part M2 and go deeper */ - accept = ipa_or(accept, ipa_and(amask, nmask)); - SET_ADDR(n, accept, v4, accept); + /* Add the primary node */ + node = trie_add_node(t, nlen, px, local, l, nlen); + } - /* n->plen < plen and plen <= 32 (128) */ - o = n; - n = GET_CHILD(n, c, v4, ipa_getbit(paddr, nlen) ? 1 : 0); - } + if (slen <= h) + { + uint l2 = MAX(l, slen); + uint max = (1u << (slen - plen)); - /* We add new tail node 'a' after node 'o' */ - struct f_trie_node *a = new_node(t, plen, paddr, pmask, amask); - attach_node(o, a, v4); + /* Add secondary nodes */ + for (uint i = 0; i < max; i++) + node = trie_add_node(t, slen, ipa_setbits(px, slen - 1, i), 0, l2, h); + } - return a; + return node; } + static int trie_match_net4(const struct f_trie *t, ip4_addr px, uint plen) { - ip4_addr pmask = ip4_mkmask(plen); - ip4_addr paddr = ip4_and(px, pmask); - if (plen == 0) return t->zero; int plentest = plen - 1; + uint nlen = ROUND_DOWN_POW2(plen, TRIE_STEP); + uint local = trie_local_mask4(px, plen, nlen); const struct f_trie_node4 *n = &t->root.v4; while (n) { - ip4_addr cmask = ip4_and(n->mask, pmask); - /* We are out of path */ - if (ip4_compare(ip4_and(paddr, cmask), ip4_and(n->addr, cmask))) + if (!ip4_prefix_equal(px, n->addr, MIN(plen, n->plen))) return 0; + /* Check local mask */ + if ((n->plen == nlen) && (n->local & local)) + return 1; + /* Check accept mask */ if (ip4_getbit(n->accept, plentest)) return 1; /* We finished trie walk and still no match */ - if (plen <= n->plen) + if (nlen <= n->plen) return 0; /* Choose children */ - n = n->c[(ip4_getbit(paddr, n->plen)) ? 1 : 0]; + n = n->c[ip4_getbits(px, n->plen, TRIE_STEP)]; } return 0; @@ -317,33 +532,34 @@ trie_match_net4(const struct f_trie *t, ip4_addr px, uint plen) static int trie_match_net6(const struct f_trie *t, ip6_addr px, uint plen) { - ip6_addr pmask = ip6_mkmask(plen); - ip6_addr paddr = ip6_and(px, pmask); - if (plen == 0) return t->zero; int plentest = plen - 1; + uint nlen = ROUND_DOWN_POW2(plen, TRIE_STEP); + uint local = trie_local_mask6(px, plen, nlen); const struct f_trie_node6 *n = &t->root.v6; while (n) { - ip6_addr cmask = ip6_and(n->mask, pmask); - /* We are out of path */ - if (ip6_compare(ip6_and(paddr, cmask), ip6_and(n->addr, cmask))) + if (!ip6_prefix_equal(px, n->addr, MIN(plen, n->plen))) return 0; + /* Check local mask */ + if ((n->plen == nlen) && (n->local & local)) + return 1; + /* Check accept mask */ if (ip6_getbit(n->accept, plentest)) return 1; /* We finished trie walk and still no match */ - if (plen <= n->plen) + if (nlen <= n->plen) return 0; /* Choose children */ - n = n->c[(ip6_getbit(paddr, n->plen)) ? 1 : 0]; + n = n->c[ip6_getbits(px, n->plen, TRIE_STEP)]; } return 0; @@ -378,6 +594,412 @@ trie_match_net(const struct f_trie *t, const net_addr *n) } } + +/** + * trie_match_longest_ip4 + * @t: trie + * @net: net address + * @dst: return value + * @found0: optional returned bitmask of found nodes + * + * Perform longest prefix match for the address @net and return the resulting + * prefix in the buffer @dst. The bitmask @found0 is used to report lengths of + * prefixes on the path from the root to the resulting prefix. E.g., if there is + * also a /20 shorter matching prefix, then 20-th bit is set in @found0. This + * can be used to enumerate all matching prefixes for the network @net using + * function trie_match_next_longest_ip4() or macro TRIE_WALK_TO_ROOT_IP4(). + * + * This function assumes IPv4 trie, there is also an IPv6 variant. The @net + * argument is typed as net_addr_ip4, but would accept any IPv4-based net_addr, + * like net4_prefix(). Anyway, returned @dst is always net_addr_ip4. + * + * Result: 1 if a matching prefix was found, 0 if not. + */ +int +trie_match_longest_ip4(const struct f_trie *t, const net_addr_ip4 *net, net_addr_ip4 *dst, ip4_addr *found0) +{ + ASSERT(t->ipv4); + + const ip4_addr prefix = net->prefix; + const int pxlen = net->pxlen; + + const struct f_trie_node4 *n = &t->root.v4; + int len = 0; + + ip4_addr found = IP4_NONE; + int last = -1; + + while (n) + { + /* We are out of path */ + if (!ip4_prefix_equal(prefix, n->addr, MIN(pxlen, n->plen))) + goto done; + + /* Check accept mask */ + for (; len < n->plen; len++) + { + if (len > pxlen) + goto done; + + if (ip4_getbit(n->accept, len - 1)) + { + /* len is always < 32 due to len < n->plen */ + ip4_setbit(&found, len); + last = len; + } + } + + /* Special case for max length, there is only one valid local position */ + if (len == IP4_MAX_PREFIX_LENGTH) + { + if (n->local & (1u << 1)) + last = len; + + goto done; + } + + /* Check local mask */ + for (int pos = 1; pos < (1 << TRIE_STEP); pos = 2 * pos + ip4_getbit(prefix, len), len++) + { + if (len > pxlen) + goto done; + + if (n->local & (1u << pos)) + { + /* len is always < 32 due to special case above */ + ip4_setbit(&found, len); + last = len; + } + } + + /* Choose child */ + n = n->c[ip4_getbits(prefix, n->plen, TRIE_STEP)]; + } + +done: + if (last < 0) + return 0; + + *dst = NET_ADDR_IP4(ip4_and(prefix, ip4_mkmask(last)), last); + + if (found0) + *found0 = found; + + return 1; +} + + +/** + * trie_match_longest_ip6 + * @t: trie + * @net: net address + * @dst: return value + * @found0: optional returned bitmask of found nodes + * + * Perform longest prefix match for the address @net and return the resulting + * prefix in the buffer @dst. The bitmask @found0 is used to report lengths of + * prefixes on the path from the root to the resulting prefix. E.g., if there is + * also a /20 shorter matching prefix, then 20-th bit is set in @found0. This + * can be used to enumerate all matching prefixes for the network @net using + * function trie_match_next_longest_ip6() or macro TRIE_WALK_TO_ROOT_IP6(). + * + * This function assumes IPv6 trie, there is also an IPv4 variant. The @net + * argument is typed as net_addr_ip6, but would accept any IPv6-based net_addr, + * like net6_prefix(). Anyway, returned @dst is always net_addr_ip6. + * + * Result: 1 if a matching prefix was found, 0 if not. + */ +int +trie_match_longest_ip6(const struct f_trie *t, const net_addr_ip6 *net, net_addr_ip6 *dst, ip6_addr *found0) +{ + ASSERT(!t->ipv4); + + const ip6_addr prefix = net->prefix; + const int pxlen = net->pxlen; + + const struct f_trie_node6 *n = &t->root.v6; + int len = 0; + + ip6_addr found = IP6_NONE; + int last = -1; + + while (n) + { + /* We are out of path */ + if (!ip6_prefix_equal(prefix, n->addr, MIN(pxlen, n->plen))) + goto done; + + /* Check accept mask */ + for (; len < n->plen; len++) + { + if (len > pxlen) + goto done; + + if (ip6_getbit(n->accept, len - 1)) + { + /* len is always < 128 due to len < n->plen */ + ip6_setbit(&found, len); + last = len; + } + } + + /* Special case for max length, there is only one valid local position */ + if (len == IP6_MAX_PREFIX_LENGTH) + { + if (n->local & (1u << 1)) + last = len; + + goto done; + } + + /* Check local mask */ + for (int pos = 1; pos < (1 << TRIE_STEP); pos = 2 * pos + ip6_getbit(prefix, len), len++) + { + if (len > pxlen) + goto done; + + if (n->local & (1u << pos)) + { + /* len is always < 128 due to special case above */ + ip6_setbit(&found, len); + last = len; + } + } + + /* Choose child */ + n = n->c[ip6_getbits(prefix, n->plen, TRIE_STEP)]; + } + +done: + if (last < 0) + return 0; + + *dst = NET_ADDR_IP6(ip6_and(prefix, ip6_mkmask(last)), last); + + if (found0) + *found0 = found; + + return 1; +} + +#define SAME_PREFIX(A,B,X,L) ((X) ? ip4_prefix_equal((A)->v4.addr, net4_prefix(B), (L)) : ip6_prefix_equal((A)->v6.addr, net6_prefix(B), (L))) +#define GET_NET_BITS(N,X,A,B) ((X) ? ip4_getbits(net4_prefix(N), (A), (B)) : ip6_getbits(net6_prefix(N), (A), (B))) + +/** + * trie_walk_init + * @s: walk state + * @t: trie + * @net: optional subnet for walk + * + * Initialize walk state for subsequent walk through nodes of the trie @t by + * trie_walk_next(). The argument @net allows to restrict walk to given subnet, + * otherwise full walk over all nodes is used. This is done by finding node at + * or below @net and starting position in it. + */ +void +trie_walk_init(struct f_trie_walk_state *s, const struct f_trie *t, const net_addr *net) +{ + *s = (struct f_trie_walk_state) { + .ipv4 = t->ipv4, + .accept_length = 0, + .start_pos = 1, + .local_pos = 1, + .stack_pos = 0, + .stack[0] = &t->root + }; + + if (!net) + return; + + /* We want to find node of level at least plen */ + int plen = ROUND_DOWN_POW2(net->pxlen, TRIE_STEP); + const struct f_trie_node *n = &t->root; + const int v4 = t->ipv4; + + while (n) + { + int nlen = v4 ? n->v4.plen : n->v6.plen; + + /* We are out of path */ + if (!SAME_PREFIX(n, net, v4, MIN(net->pxlen, nlen))) + break; + + /* We found final node */ + if (nlen >= plen) + { + if (nlen == plen) + { + /* Find proper local_pos, while accept_length is not used */ + int step = net->pxlen - plen; + s->start_pos = s->local_pos = (1u << step) + GET_NET_BITS(net, v4, plen, step); + s->accept_length = plen; + } + else + { + /* Start from pos 1 in local node, but first try accept mask */ + s->accept_length = net->pxlen; + } + + s->stack[0] = n; + return; + } + + /* Choose child */ + n = GET_CHILD(n, v4, GET_NET_BITS(net, v4, nlen, TRIE_STEP)); + } + + s->stack[0] = NULL; + return; +} + +#define GET_ACCEPT_BIT(N,X,B) ((X) ? ip4_getbit((N)->v4.accept, (B)) : ip6_getbit((N)->v6.accept, (B))) +#define GET_LOCAL_BIT(N,X,B) (((X) ? (N)->v4.local : (N)->v6.local) & (1u << (B))) + +/** + * trie_walk_next + * @s: walk state + * @net: return value + * + * Find the next prefix in the trie walk and return it in the buffer @net. + * Prefixes are walked in the usual lexicographic order and may be restricted + * to a subset of the trie during walk setup by trie_walk_init(). Note that the + * trie walk does not iterate reliably over 'implicit' prefixes defined by &low + * and &high fields in prefix patterns, it is supposed to be used on tries + * constructed from 'explicit' prefixes (&low == &plen == &high in call to + * trie_add_prefix()). + * + * Result: 1 if the next prefix was found, 0 for the end of walk. + */ +int +trie_walk_next(struct f_trie_walk_state *s, net_addr *net) +{ + const struct f_trie_node *n = s->stack[s->stack_pos]; + int len = s->accept_length; + int pos = s->local_pos; + int v4 = s->ipv4; + + /* + * The walk has three basic state variables -- n, len and pos. In each node n, + * we first walk superprefixes (by len in &accept bitmask), and then we walk + * internal positions (by pos in &local bitmask). These positions are: + * + * 1 + * 2 3 + * 4 5 6 7 + * 8 9 A B C D E F + * + * We walk them depth-first, including virtual positions 10-1F that are + * equivalent of position 1 in child nodes 0-F. + */ + + if (!n) + { + memset(net, 0, v4 ? sizeof(net_addr_ip4) : sizeof(net_addr_ip6)); + return 0; + } + +next_node:; + /* Current node prefix length */ + int nlen = v4 ? n->v4.plen : n->v6.plen; + + /* First, check for accept prefix */ + for (; len < nlen; len++) + if (GET_ACCEPT_BIT(n, v4, len - 1)) + { + if (v4) + net_fill_ip4(net, ip4_and(n->v4.addr, ip4_mkmask(len)), len); + else + net_fill_ip6(net, ip6_and(n->v6.addr, ip6_mkmask(len)), len); + + s->local_pos = pos; + s->accept_length = len + 1; + return 1; + } + +next_pos: + /* Bottom of this node */ + if (pos >= (1 << TRIE_STEP)) + { + const struct f_trie_node *child = GET_CHILD(n, v4, pos - (1 << TRIE_STEP)); + int dir = 0; + + /* No child node */ + if (!child) + { + /* Step up until return from left child (pos is even) */ + do + { + /* Step up from start node */ + if ((s->stack_pos == 0) && (pos == s->start_pos)) + { + s->stack[0] = NULL; + memset(net, 0, v4 ? sizeof(net_addr_ip4) : sizeof(net_addr_ip6)); + return 0; + } + + /* Top of this node */ + if (pos == 1) + { + ASSERT(s->stack_pos); + const struct f_trie_node *old = n; + + /* Move to parent node */ + s->stack_pos--; + n = s->stack[s->stack_pos]; + nlen = v4 ? n->v4.plen : n->v6.plen; + + pos = v4 ? + ip4_getbits(old->v4.addr, nlen, TRIE_STEP) : + ip6_getbits(old->v6.addr, nlen, TRIE_STEP); + pos += (1 << TRIE_STEP); + len = nlen; + + ASSERT(GET_CHILD(n, v4, pos - (1 << TRIE_STEP)) == old); + } + + /* Step up */ + dir = pos % 2; + pos = pos / 2; + } + while (dir); + + /* Continue with step down to the right child */ + pos = 2 * pos + 1; + goto next_pos; + } + + /* Move to child node */ + pos = 1; + len = nlen + TRIE_STEP; + + s->stack_pos++; + n = s->stack[s->stack_pos] = child; + goto next_node; + } + + /* Check for local prefix */ + if (GET_LOCAL_BIT(n, v4, pos)) + { + /* Convert pos to address of local network */ + int x = (pos >= 2) + (pos >= 4) + (pos >= 8); + int y = pos & ((1u << x) - 1); + + if (v4) + net_fill_ip4(net, !x ? n->v4.addr : ip4_setbits(n->v4.addr, nlen + x - 1, y), nlen + x); + else + net_fill_ip6(net, !x ? n->v6.addr : ip6_setbits(n->v6.addr, nlen + x - 1, y), nlen + x); + + s->local_pos = 2 * pos; + s->accept_length = len; + return 1; + } + + /* Step down */ + pos = 2 * pos; + goto next_pos; +} + + static int trie_node_same4(const struct f_trie_node4 *t1, const struct f_trie_node4 *t2) { @@ -392,7 +1014,11 @@ trie_node_same4(const struct f_trie_node4 *t1, const struct f_trie_node4 *t2) (! ip4_equal(t1->accept, t2->accept))) return 0; - return trie_node_same4(t1->c[0], t2->c[0]) && trie_node_same4(t1->c[1], t2->c[1]); + for (uint i = 0; i < (1 << TRIE_STEP); i++) + if (! trie_node_same4(t1->c[i], t2->c[i])) + return 0; + + return 1; } static int @@ -409,7 +1035,11 @@ trie_node_same6(const struct f_trie_node6 *t1, const struct f_trie_node6 *t2) (! ip6_equal(t1->accept, t2->accept))) return 0; - return trie_node_same6(t1->c[0], t2->c[0]) && trie_node_same6(t1->c[1], t2->c[1]); + for (uint i = 0; i < (1 << TRIE_STEP); i++) + if (! trie_node_same6(t1->c[i], t2->c[i])) + return 0; + + return 1; } /** @@ -431,30 +1061,70 @@ trie_same(const struct f_trie *t1, const struct f_trie *t2) return trie_node_same6(&t1->root.v6, &t2->root.v6); } -static void -trie_node_format4(const struct f_trie_node4 *t, buffer *buf) -{ - if (t == NULL) - return; - if (ip4_nonzero(t->accept)) - buffer_print(buf, "%I4/%d{%I4}, ", t->addr, t->plen, t->accept); - - trie_node_format4(t->c[0], buf); - trie_node_format4(t->c[1], buf); -} +static const u8 log2[16] = {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3}; static void -trie_node_format6(const struct f_trie_node6 *t, buffer *buf) +trie_node_format(const struct f_trie_node *n, buffer *buf, int v4) { - if (t == NULL) + if (n == NULL) return; - if (ip6_nonzero(t->accept)) - buffer_print(buf, "%I6/%d{%I6}, ", t->addr, t->plen, t->accept); + if (v4) + { + if (ip4_nonzero(n->v4.accept)) + buffer_print(buf, "%I4/%d{%I4}, ", n->v4.addr, n->v4.plen, n->v4.accept); + } + else + { + if (ip6_nonzero(n->v6.accept)) + buffer_print(buf, "%I6/%d{%I6}, ", n->v6.addr, n->v6.plen, n->v6.accept); + } - trie_node_format6(t->c[0], buf); - trie_node_format6(t->c[1], buf); + int nlen = v4 ? n->v4.plen : n->v6.plen; + uint local = v4 ? n->v4.local : n->v6.local; + + for (int i = (nlen ? 0 : 1); i < TRIE_STEP; i++) + if (GET_ACCEPT_BIT(n, v4, nlen + i - 1)) + local &= ~trie_level_mask(1, i); + + for (int pos = 2; local && (pos < (1 << TRIE_STEP)); pos++) + if (local & (1u << pos)) + { + int lvl = log2[pos]; + int plen = nlen + lvl; + + int i; + for (i = 0; lvl + i < TRIE_STEP; i++) + { + uint lmask = trie_level_mask(pos, i); + + if ((local & lmask) != lmask) + break; + + local &= ~lmask; + } + + uint addr_bits = pos & ((1u << lvl) - 1); + uint accept_bits = (1u << i) - 1; + int h = plen + i - 1; + + if (v4) + { + ip4_addr addr = ip4_setbits(n->v4.addr, plen - 1, addr_bits); + ip4_addr mask = ip4_setbits(IP4_NONE, h - 1, accept_bits); + buffer_print(buf, "%I4/%d{%I4}, ", addr, plen, mask); + } + else + { + ip6_addr addr = ip6_setbits(n->v6.addr, plen - 1, addr_bits); + ip6_addr mask = ip6_setbits(IP6_NONE, h - 1, accept_bits); + buffer_print(buf, "%I6/%d{%I6}, ", addr, plen, mask); + } + } + + for (int i = 0; i < (1 << TRIE_STEP); i++) + trie_node_format(GET_CHILD(n, v4, i), buf, v4); } /** @@ -472,10 +1142,7 @@ trie_format(const struct f_trie *t, buffer *buf) if (t->zero) buffer_print(buf, "%I/%d, ", t->ipv4 ? IPA_NONE4 : IPA_NONE6, 0); - if (t->ipv4) - trie_node_format4(&t->root.v4, buf); - else - trie_node_format6(&t->root.v6, buf); + trie_node_format(&t->root, buf, t->ipv4); if (buf->pos == buf->end) return; diff --git a/filter/trie_test.c b/filter/trie_test.c index b2b36716..dc791280 100644 --- a/filter/trie_test.c +++ b/filter/trie_test.c @@ -14,9 +14,12 @@ #include "conf/conf.h" #define TESTS_NUM 10 -#define PREFIXES_NUM 10 +#define PREFIXES_NUM 32 #define PREFIX_TESTS_NUM 10000 +#define PREFIX_BENCH_NUM 100000000 +#define TRIE_BUFFER_SIZE 1024 +#define TEST_BUFFER_SIZE (1024*1024) #define BIG_BUFFER_SIZE 10000 /* Wrapping structure for storing f_prefixes structures in list */ @@ -31,146 +34,849 @@ xrandom(u32 max) return (bt_random() % max); } +static inline uint +get_exp_random(void) +{ + uint r, n = 0; + + for (r = bt_random(); r & 1; r = r >> 1) + n++; + + return n; +} + static int -is_prefix_included(list *prefixes, struct f_prefix *needle) +compare_prefixes(const void *a, const void *b) +{ + return net_compare(&((const struct f_prefix *) a)->net, + &((const struct f_prefix *) b)->net); +} + +static inline int +matching_ip4_nets(const net_addr_ip4 *a, const net_addr_ip4 *b) +{ + ip4_addr cmask = ip4_mkmask(MIN(a->pxlen, b->pxlen)); + return ip4_compare(ip4_and(a->prefix, cmask), ip4_and(b->prefix, cmask)) == 0; +} + +static inline int +matching_ip6_nets(const net_addr_ip6 *a, const net_addr_ip6 *b) +{ + ip6_addr cmask = ip6_mkmask(MIN(a->pxlen, b->pxlen)); + return ip6_compare(ip6_and(a->prefix, cmask), ip6_and(b->prefix, cmask)) == 0; +} + +static inline int +matching_nets(const net_addr *a, const net_addr *b) +{ + if (a->type != b->type) + return 0; + + return (a->type == NET_IP4) ? + matching_ip4_nets((const net_addr_ip4 *) a, (const net_addr_ip4 *) b) : + matching_ip6_nets((const net_addr_ip6 *) a, (const net_addr_ip6 *) b); +} + +static int +is_prefix_included(list *prefixes, const net_addr *needle) { struct f_prefix_node *n; WALK_LIST(n, *prefixes) - { - ip6_addr cmask = ip6_mkmask(MIN(n->prefix.net.pxlen, needle->net.pxlen)); - - ip6_addr ip = net6_prefix(&n->prefix.net); - ip6_addr needle_ip = net6_prefix(&needle->net); - - if ((ipa_compare(ipa_and(ip, cmask), ipa_and(needle_ip, cmask)) == 0) && - (n->prefix.lo <= needle->net.pxlen) && (needle->net.pxlen <= n->prefix.hi)) + if (matching_nets(&n->prefix.net, needle) && + (n->prefix.lo <= needle->pxlen) && (needle->pxlen <= n->prefix.hi)) { - bt_debug("FOUND\t" PRIip6 "/%d %d-%d\n", ARGip6(net6_prefix(&n->prefix.net)), n->prefix.net.pxlen, n->prefix.lo, n->prefix.hi); + char buf[64]; + bt_format_net(buf, 64, &n->prefix.net); + bt_debug("FOUND %s %d-%d\n", buf, n->prefix.lo, n->prefix.hi); + return 1; /* OK */ } - } + return 0; /* FAIL */ } -static struct f_prefix -get_random_ip6_prefix(void) +static void +get_random_net(net_addr *net, int v6) { - struct f_prefix p; - u8 pxlen = xrandom(120)+8; - ip6_addr ip6 = ip6_build(bt_random(),bt_random(),bt_random(),bt_random()); - net_addr_ip6 net6 = NET_ADDR_IP6(ip6, pxlen); - - p.net = *((net_addr*) &net6); - - if (bt_random() % 2) + if (!v6) { - p.lo = 0; - p.hi = p.net.pxlen; + uint pxlen = xrandom(24)+8; + ip4_addr ip4 = ip4_from_u32((u32) bt_random()); + net_fill_ip4(net, ip4_and(ip4, ip4_mkmask(pxlen)), pxlen); } else { - p.lo = p.net.pxlen; - p.hi = net_max_prefix_length[p.net.type]; + uint pxlen = xrandom(120)+8; + ip6_addr ip6 = ip6_build(bt_random(), bt_random(), bt_random(), bt_random()); + net_fill_ip6(net, ip6_and(ip6, ip6_mkmask(pxlen)), pxlen); } - - return p; } static void -generate_random_ipv6_prefixes(list *prefixes) +get_random_prefix(struct f_prefix *px, int v6, int tight) { - int i; - for (i = 0; i < PREFIXES_NUM; i++) + get_random_net(&px->net, v6); + + if (tight) { - struct f_prefix f = get_random_ip6_prefix(); - - struct f_prefix_node *px = calloc(1, sizeof(struct f_prefix_node)); - px->prefix = f; - - bt_debug("ADD\t" PRIip6 "/%d %d-%d\n", ARGip6(net6_prefix(&px->prefix.net)), px->prefix.net.pxlen, px->prefix.lo, px->prefix.hi); - add_tail(prefixes, &px->n); + px->lo = px->hi = px->net.pxlen; + } + else if (bt_random() % 2) + { + px->lo = 0; + px->hi = px->net.pxlen; + } + else + { + px->lo = px->net.pxlen; + px->hi = net_max_prefix_length[px->net.type]; } } +static void +get_random_ip4_subnet(net_addr_ip4 *net, const net_addr_ip4 *src, int pxlen) +{ + *net = NET_ADDR_IP4(ip4_and(src->prefix, ip4_mkmask(pxlen)), pxlen); + + if (pxlen > src->pxlen) + { + ip4_addr rnd = ip4_from_u32((u32) bt_random()); + ip4_addr mask = ip4_xor(ip4_mkmask(src->pxlen), ip4_mkmask(pxlen)); + net->prefix = ip4_or(net->prefix, ip4_and(rnd, mask)); + } +} + +static void +get_random_ip6_subnet(net_addr_ip6 *net, const net_addr_ip6 *src, int pxlen) +{ + *net = NET_ADDR_IP6(ip6_and(src->prefix, ip6_mkmask(pxlen)), pxlen); + + if (pxlen > src->pxlen) + { + ip6_addr rnd = ip6_build(bt_random(), bt_random(), bt_random(), bt_random()); + ip6_addr mask = ip6_xor(ip6_mkmask(src->pxlen), ip6_mkmask(pxlen)); + net->prefix = ip6_or(net->prefix, ip6_and(rnd, mask)); + } +} + +static void +get_random_subnet(net_addr *net, const net_addr *src, int pxlen) +{ + if (src->type == NET_IP4) + get_random_ip4_subnet((net_addr_ip4 *) net, (const net_addr_ip4 *) src, pxlen); + else + get_random_ip6_subnet((net_addr_ip6 *) net, (const net_addr_ip6 *) src, pxlen); +} + +static void +get_inner_net(net_addr *net, const struct f_prefix *src) +{ + int pxlen, step; + + if (bt_random() % 2) + { + step = get_exp_random(); + step = MIN(step, src->hi - src->lo); + pxlen = (bt_random() % 2) ? (src->lo + step) : (src->hi - step); + } + else + pxlen = src->lo + bt_random() % (src->hi - src->lo + 1); + + get_random_subnet(net, &src->net, pxlen); +} + +static void +swap_random_bits_ip4(net_addr_ip4 *net, int num) +{ + for (int i = 0; i < num; i++) + { + ip4_addr swap = IP4_NONE; + ip4_setbit(&swap, bt_random() % net->pxlen); + net->prefix = ip4_xor(net->prefix, swap); + } +} + +static void +swap_random_bits_ip6(net_addr_ip6 *net, int num) +{ + for (int i = 0; i < num; i++) + { + ip6_addr swap = IP6_NONE; + ip6_setbit(&swap, bt_random() % net->pxlen); + net->prefix = ip6_xor(net->prefix, swap); + } +} + +static void +swap_random_bits(net_addr *net, int num) +{ + if (net->type == NET_IP4) + swap_random_bits_ip4((net_addr_ip4 *) net, num); + else + swap_random_bits_ip6((net_addr_ip6 *) net, num); +} + +static void +get_outer_net(net_addr *net, const struct f_prefix *src) +{ + int pxlen, step; + int inside = 0; + int max = net_max_prefix_length[src->net.type]; + + if ((src->lo > 0) && (bt_random() % 3)) + { + step = 1 + get_exp_random(); + step = MIN(step, src->lo); + pxlen = src->lo - step; + } + else if ((src->hi < max) && (bt_random() % 2)) + { + step = 1 + get_exp_random(); + step = MIN(step, max - src->hi); + pxlen = src->hi + step; + } + else + { + pxlen = src->lo + bt_random() % (src->hi - src->lo + 1); + inside = 1; + } + + get_random_subnet(net, &src->net, pxlen); + + /* Perhaps swap some bits in prefix */ + if ((net->pxlen > 0) && (inside || (bt_random() % 4))) + swap_random_bits(net, 1 + get_exp_random()); +} + +static list * +make_random_prefix_list(int num, int v6, int tight) +{ + list *prefixes = lp_allocz(tmp_linpool, sizeof(struct f_prefix_node)); + init_list(prefixes); + + for (int i = 0; i < num; i++) + { + struct f_prefix_node *px = lp_allocz(tmp_linpool, sizeof(struct f_prefix_node)); + get_random_prefix(&px->prefix, v6, tight); + add_tail(prefixes, &px->n); + + char buf[64]; + bt_format_net(buf, 64, &px->prefix.net); + bt_debug("ADD %s{%d,%d}\n", buf, px->prefix.lo, px->prefix.hi); + } + + return prefixes; +} + +static struct f_trie * +make_trie_from_prefix_list(list *prefixes) +{ + struct f_trie *trie = f_new_trie(tmp_linpool, 0); + + struct f_prefix_node *n; + WALK_LIST(n, *prefixes) + trie_add_prefix(trie, &n->prefix.net, n->prefix.lo, n->prefix.hi); + + return trie; +} + +/* + * Read sequence of prefixes from file handle and return prefix list. + * Each prefix is on one line, sequence terminated by empty line or eof. + * Arg @plus means prefix should include all longer ones. + */ +static list * +read_prefix_list(FILE *f, int v6, int plus) +{ + ASSERT(!v6); + + uint a0, a1, a2, a3, pl; + char s[32]; + int n; + + list *pxlist = lp_allocz(tmp_linpool, sizeof(struct f_prefix_node)); + init_list(pxlist); + + errno = 0; + while (fgets(s, 32, f)) + { + if (s[0] == '\n') + return pxlist; + + n = sscanf(s, "%u.%u.%u.%u/%u", &a0, &a1, &a2, &a3, &pl); + + if (n != 5) + bt_abort_msg("Invalid content of trie_data"); + + struct f_prefix_node *px = lp_allocz(tmp_linpool, sizeof(struct f_prefix_node)); + net_fill_ip4(&px->prefix.net, ip4_build(a0, a1, a2, a3), pl); + px->prefix.lo = pl; + px->prefix.hi = plus ? IP4_MAX_PREFIX_LENGTH : pl; + add_tail(pxlist, &px->n); + + char buf[64]; + bt_format_net(buf, 64, &px->prefix.net); + bt_debug("ADD %s{%d,%d}\n", buf, px->prefix.lo, px->prefix.hi); + } + + bt_syscall(errno, "fgets()"); + return EMPTY_LIST(*pxlist) ? NULL : pxlist; +} + +/* + * Open file, read multiple sequences of prefixes from it. Fill @data with + * prefix lists and @trie with generated tries. Return number of sequences / + * tries. Use separate linpool @lp0 for prefix lists and @lp1 for tries. + * Arg @plus means prefix should include all longer ones. + */ static int -t_match_net(void) +read_prefix_file(const char *filename, int plus, + list *data[], struct f_trie *trie[]) +{ + FILE *f = fopen(filename, "r"); + bt_syscall(!f, "fopen(%s)", filename); + + int n = 0; + list *pxlist; + while (pxlist = read_prefix_list(f, 0, plus)) + { + data[n] = pxlist; + trie[n] = make_trie_from_prefix_list(pxlist); + bt_debug("NEXT\n"); + n++; + } + + fclose(f); + bt_debug("DONE reading %d tries\n", n); + + return n; +} + +/* + * Select random subset of @dn prefixes from prefix list @src of length @sn, + * and store them to buffer @dst (of size @dn). Prefixes may be chosen multiple + * times. Randomize order of prefixes in @dst buffer. + */ +static void +select_random_prefix_subset(list *src[], net_addr dst[], int sn, int dn) +{ + int pn = 0; + + if (!dn) + return; + + /* Compute total prefix number */ + for (int i = 0; i < sn; i++) + pn += list_length(src[i]); + + /* Change of selecting a prefix */ + int rnd = (pn / dn) + 10; + int n = 0; + + /* Iterate indefinitely over src array */ + for (int i = 0; 1; i++, i = (i < sn) ? i : 0) + { + struct f_prefix_node *px; + WALK_LIST(px, *src[i]) + { + if (xrandom(rnd) != 0) + continue; + + net_copy(&dst[n], &px->prefix.net); + n++; + + /* We have enough */ + if (n == dn) + goto done; + } + } + +done: + /* Shuffle networks */ + for (int i = 0; i < dn; i++) + { + int j = xrandom(dn); + + if (i == j) + continue; + + net_addr tmp; + net_copy(&tmp, &dst[i]); + net_copy(&dst[i], &dst[j]); + net_copy(&dst[j], &tmp); + } +} + +/* Fill @dst buffer with @dn randomly generated /32 prefixes */ +static void +make_random_addresses(net_addr dst[], int dn) +{ + for (int i = 0; i < dn; i++) + net_fill_ip4(&dst[i], ip4_from_u32((u32) bt_random()), IP4_MAX_PREFIX_LENGTH); +} + +static void +test_match_net(list *prefixes, struct f_trie *trie, const net_addr *net) +{ + char buf[64]; + bt_format_net(buf, 64, net); + bt_debug("TEST %s\n", buf); + + int should_be = is_prefix_included(prefixes, net); + int is_there = trie_match_net(trie, net); + + bt_assert_msg(should_be == is_there, "Prefix %s %s match", buf, + (should_be ? "should" : "should not")); +} + +static int +t_match_random_net(void) { bt_bird_init(); bt_config_parse(BT_CONFIG_SIMPLE); - uint round; - for (round = 0; round < TESTS_NUM; round++) + int v6 = 0; + for (int round = 0; round < TESTS_NUM; round++) { - list prefixes; /* of structs f_extended_prefix */ - init_list(&prefixes); - struct f_trie *trie = f_new_trie(config->mem, 0); + list *prefixes = make_random_prefix_list(PREFIXES_NUM, v6, 0); + struct f_trie *trie = make_trie_from_prefix_list(prefixes); - generate_random_ipv6_prefixes(&prefixes); - struct f_prefix_node *n; - WALK_LIST(n, prefixes) + for (int i = 0; i < PREFIX_TESTS_NUM; i++) { - trie_add_prefix(trie, &n->prefix.net, n->prefix.lo, n->prefix.hi); + net_addr net; + get_random_net(&net, v6); + test_match_net(prefixes, trie, &net); } - int i; - for (i = 0; i < PREFIX_TESTS_NUM; i++) - { - struct f_prefix f = get_random_ip6_prefix(); - bt_debug("TEST\t" PRIip6 "/%d\n", ARGip6(net6_prefix(&f.net)), f.net.pxlen); - - int should_be = is_prefix_included(&prefixes, &f); - int is_there = trie_match_net(trie, &f.net); - bt_assert_msg(should_be == is_there, "Prefix " PRIip6 "/%d %s", ARGip6(net6_prefix(&f.net)), f.net.pxlen, (should_be ? "should be found in trie" : "should not be found in trie")); - } - - struct f_prefix_node *nxt; - WALK_LIST_DELSAFE(n, nxt, prefixes) - { - free(n); - } + v6 = !v6; + tmp_flush(); } bt_bird_cleanup(); return 1; } +static int +t_match_inner_net(void) +{ + bt_bird_init(); + bt_config_parse(BT_CONFIG_SIMPLE); + + int v6 = 0; + for (int round = 0; round < TESTS_NUM; round++) + { + list *prefixes = make_random_prefix_list(PREFIXES_NUM, v6, 0); + struct f_trie *trie = make_trie_from_prefix_list(prefixes); + + struct f_prefix_node *n = HEAD(*prefixes); + for (int i = 0; i < PREFIX_TESTS_NUM; i++) + { + net_addr net; + get_inner_net(&net, &n->prefix); + test_match_net(prefixes, trie, &net); + + n = NODE_VALID(NODE_NEXT(n)) ? NODE_NEXT(n) : HEAD(*prefixes); + } + + v6 = !v6; + tmp_flush(); + } + + bt_bird_cleanup(); + return 1; +} + +static int +t_match_outer_net(void) +{ + bt_bird_init(); + bt_config_parse(BT_CONFIG_SIMPLE); + + int v6 = 0; + for (int round = 0; round < TESTS_NUM; round++) + { + list *prefixes = make_random_prefix_list(PREFIXES_NUM, v6, 0); + struct f_trie *trie = make_trie_from_prefix_list(prefixes); + + struct f_prefix_node *n = HEAD(*prefixes); + for (int i = 0; i < PREFIX_TESTS_NUM; i++) + { + net_addr net; + get_outer_net(&net, &n->prefix); + test_match_net(prefixes, trie, &net); + + n = NODE_VALID(NODE_NEXT(n)) ? NODE_NEXT(n) : HEAD(*prefixes); + } + + v6 = !v6; + tmp_flush(); + } + + v6 = !v6; + bt_bird_cleanup(); + return 1; +} + +/* + * Read prefixes from @filename, build set of tries, prepare test data and do + * PREFIX_BENCH_NUM trie lookups. With @plus = 0, use random subset of known + * prefixes as test data, with @plus = 1, use randomly generated /32 prefixes + * as test data. + */ +static int +benchmark_trie_dataset(const char *filename, int plus) +{ + int n = 0; + list *data[TRIE_BUFFER_SIZE]; + struct f_trie *trie[TRIE_BUFFER_SIZE]; + net_addr *nets; + + bt_reset_suite_case_timer(); + bt_log_suite_case_result(1, "Reading %s", filename, n); + n = read_prefix_file(filename, plus, data, trie); + bt_log_suite_case_result(1, "Read prefix data, %d lists, ", n); + + size_t trie_size = rmemsize(tmp_linpool).effective * 1000 / (1024*1024); + bt_log_suite_case_result(1, "Trie size %u.%03u MB", + (uint) (trie_size / 1000), (uint) (trie_size % 1000)); + + int t = PREFIX_BENCH_NUM / n; + int tb = MIN(t, TEST_BUFFER_SIZE); + nets = tmp_alloc(tb * sizeof(net_addr)); + + if (!plus) + select_random_prefix_subset(data, nets, n, tb); + else + make_random_addresses(nets, tb); + + bt_log_suite_case_result(1, "Make test data, %d (%d) tests", t, tb); + bt_reset_suite_case_timer(); + + /* + int match = 0; + for (int i = 0; i < t; i++) + for (int j = 0; j < n; j++) + test_match_net(data[j], trie[j], &nets[i]); + */ + + int match = 0; + for (int i = 0; i < t; i++) + for (int j = 0; j < n; j++) + if (trie_match_net(trie[j], &nets[i % TEST_BUFFER_SIZE])) + match++; + + bt_log_suite_case_result(1, "Matching done, %d / %d matches", match, t * n); + + tmp_flush(); + return 1; +} + +static int UNUSED +t_bench_trie_datasets_subset(void) +{ + bt_bird_init(); + bt_config_parse(BT_CONFIG_SIMPLE); + + /* Specific datasets, not included */ + benchmark_trie_dataset("trie-data-bgp-1", 0); + benchmark_trie_dataset("trie-data-bgp-10", 0); + benchmark_trie_dataset("trie-data-bgp-100", 0); + benchmark_trie_dataset("trie-data-bgp-1000", 0); + + bt_bird_cleanup(); + + return 1; +} + +static int UNUSED +t_bench_trie_datasets_random(void) +{ + bt_bird_init(); + bt_config_parse(BT_CONFIG_SIMPLE); + + /* Specific datasets, not included */ + benchmark_trie_dataset("trie-data-bgp-1", 1); + benchmark_trie_dataset("trie-data-bgp-10", 1); + benchmark_trie_dataset("trie-data-bgp-100", 1); + benchmark_trie_dataset("trie-data-bgp-1000", 1); + + bt_bird_cleanup(); + + return 1; +} + + static int t_trie_same(void) { bt_bird_init(); bt_config_parse(BT_CONFIG_SIMPLE); - int round; - for (round = 0; round < TESTS_NUM*4; round++) + int v6 = 0; + for (int round = 0; round < TESTS_NUM*4; round++) { - struct f_trie * trie1 = f_new_trie(config->mem, 0); - struct f_trie * trie2 = f_new_trie(config->mem, 0); - - list prefixes; /* a list of f_extended_prefix structures */ - init_list(&prefixes); - int i; - for (i = 0; i < 100; i++) - generate_random_ipv6_prefixes(&prefixes); + list *prefixes = make_random_prefix_list(100 * PREFIXES_NUM, v6, 0); + struct f_trie *trie1 = f_new_trie(tmp_linpool, 0); + struct f_trie *trie2 = f_new_trie(tmp_linpool, 0); struct f_prefix_node *n; - WALK_LIST(n, prefixes) - { + WALK_LIST(n, *prefixes) trie_add_prefix(trie1, &n->prefix.net, n->prefix.lo, n->prefix.hi); - } - WALK_LIST_BACKWARDS(n, prefixes) - { + + WALK_LIST_BACKWARDS(n, *prefixes) trie_add_prefix(trie2, &n->prefix.net, n->prefix.lo, n->prefix.hi); - } bt_assert(trie_same(trie1, trie2)); - struct f_prefix_node *nxt; - WALK_LIST_DELSAFE(n, nxt, prefixes) - { - free(n); - } + v6 = !v6; + tmp_flush(); } + bt_bird_cleanup(); + return 1; +} + +static inline void +log_networks(const net_addr *a, const net_addr *b) +{ + if (bt_verbose >= BT_VERBOSE_ABSOLUTELY_ALL) + { + char buf0[64]; + char buf1[64]; + bt_format_net(buf0, 64, a); + bt_format_net(buf1, 64, b); + bt_debug("Found %s expected %s\n", buf0, buf1); + } +} + +static int +t_trie_walk(void) +{ + bt_bird_init(); + bt_config_parse(BT_CONFIG_SIMPLE); + + for (int round = 0; round < TESTS_NUM*8; round++) + { + int level = round / TESTS_NUM; + int v6 = level % 2; + int num = PREFIXES_NUM * (int[]){1, 10, 100, 1000}[level / 2]; + int pos = 0, end = 0; + list *prefixes = make_random_prefix_list(num, v6, 1); + struct f_trie *trie = make_trie_from_prefix_list(prefixes); + struct f_prefix *pxset = malloc((num + 1) * sizeof(struct f_prefix)); + + struct f_prefix_node *n; + WALK_LIST(n, *prefixes) + pxset[pos++] = n->prefix; + memset(&pxset[pos], 0, sizeof (struct f_prefix)); + + qsort(pxset, num, sizeof(struct f_prefix), compare_prefixes); + + + /* Full walk */ + bt_debug("Full walk (round %d, %d nets)\n", round, num); + + pos = 0; + uint pxc = 0; + TRIE_WALK(trie, net, NULL) + { + log_networks(&net, &pxset[pos].net); + bt_assert(net_equal(&net, &pxset[pos].net)); + + /* Skip possible duplicates */ + while (net_equal(&pxset[pos].net, &pxset[pos + 1].net)) + pos++; + + pos++; + pxc++; + } + TRIE_WALK_END; + + bt_assert(pos == num); + bt_assert(pxc == trie->prefix_count); + bt_debug("Full walk done\n"); + + + /* Prepare net for subnet walk - start with random prefix */ + pos = bt_random() % num; + end = pos + (int[]){2, 2, 3, 4}[level / 2]; + end = MIN(end, num); + + struct f_prefix from = pxset[pos]; + + /* Find a common superprefix to several subsequent prefixes */ + for (; pos < end; pos++) + { + if (net_equal(&from.net, &pxset[pos].net)) + continue; + + int common = !v6 ? + ip4_pxlen(net4_prefix(&from.net), net4_prefix(&pxset[pos].net)) : + ip6_pxlen(net6_prefix(&from.net), net6_prefix(&pxset[pos].net)); + from.net.pxlen = MIN(from.net.pxlen, common); + + if (!v6) + ((net_addr_ip4 *) &from.net)->prefix = + ip4_and(net4_prefix(&from.net), net4_prefix(&pxset[pos].net)); + else + ((net_addr_ip6 *) &from.net)->prefix = + ip6_and(net6_prefix(&from.net), net6_prefix(&pxset[pos].net)); + } + + /* Fix irrelevant bits */ + if (!v6) + ((net_addr_ip4 *) &from.net)->prefix = + ip4_and(net4_prefix(&from.net), ip4_mkmask(net4_pxlen(&from.net))); + else + ((net_addr_ip6 *) &from.net)->prefix = + ip6_and(net6_prefix(&from.net), ip6_mkmask(net6_pxlen(&from.net))); + + + /* Find initial position for final prefix */ + for (pos = 0; pos < num; pos++) + if (compare_prefixes(&pxset[pos], &from) >= 0) + break; + + int p0 = pos; + char buf0[64]; + bt_format_net(buf0, 64, &from.net); + bt_debug("Subnet walk for %s (round %d, %d nets)\n", buf0, round, num); + + /* Subnet walk */ + TRIE_WALK(trie, net, &from.net) + { + log_networks(&net, &pxset[pos].net); + bt_assert(net_equal(&net, &pxset[pos].net)); + bt_assert(net_in_netX(&net, &from.net)); + + /* Skip possible duplicates */ + while (net_equal(&pxset[pos].net, &pxset[pos + 1].net)) + pos++; + + pos++; + } + TRIE_WALK_END; + + bt_assert((pos == num) || !net_in_netX(&pxset[pos].net, &from.net)); + bt_debug("Subnet walk done for %s (found %d nets)\n", buf0, pos - p0); + + tmp_flush(); + } + + bt_bird_cleanup(); + return 1; +} + +static int +find_covering_nets(struct f_prefix *prefixes, int num, const net_addr *net, net_addr *found) +{ + struct f_prefix key; + net_addr *n = &key.net; + int found_num = 0; + + net_copy(n, net); + + while (1) + { + struct f_prefix *px = + bsearch(&key, prefixes, num, sizeof(struct f_prefix), compare_prefixes); + + if (px) + { + net_copy(&found[found_num], n); + found_num++; + } + + if (n->pxlen == 0) + return found_num; + + n->pxlen--; + + if (n->type == NET_IP4) + ip4_clrbit(&((net_addr_ip4 *) n)->prefix, n->pxlen); + else + ip6_clrbit(&((net_addr_ip6 *) n)->prefix, n->pxlen); + } +} + +static int +t_trie_walk_to_root(void) +{ + bt_bird_init(); + bt_config_parse(BT_CONFIG_SIMPLE); + + for (int round = 0; round < TESTS_NUM * 4; round++) + { + int level = round / TESTS_NUM; + int v6 = level % 2; + int num = PREFIXES_NUM * (int[]){32, 512}[level / 2]; + int pos = 0; + int st = 0, sn = 0, sm = 0; + + list *prefixes = make_random_prefix_list(num, v6, 1); + struct f_trie *trie = make_trie_from_prefix_list(prefixes); + struct f_prefix *pxset = malloc((num + 1) * sizeof(struct f_prefix)); + + struct f_prefix_node *pxn; + WALK_LIST(pxn, *prefixes) + pxset[pos++] = pxn->prefix; + memset(&pxset[pos], 0, sizeof (struct f_prefix)); + + qsort(pxset, num, sizeof(struct f_prefix), compare_prefixes); + + int i; + for (i = 0; i < (PREFIX_TESTS_NUM / 10); i++) + { + net_addr from; + get_random_net(&from, v6); + + net_addr found[129]; + int found_num = find_covering_nets(pxset, num, &from, found); + int n = 0; + + if (bt_verbose >= BT_VERBOSE_ABSOLUTELY_ALL) + { + char buf[64]; + bt_format_net(buf, 64, &from); + bt_debug("Lookup for %s (expect %d)\n", buf, found_num); + } + + /* Walk to root, separate for IPv4 and IPv6 */ + if (!v6) + { + TRIE_WALK_TO_ROOT_IP4(trie, (net_addr_ip4 *) &from, net) + { + log_networks((net_addr *) &net, &found[n]); + bt_assert((n < found_num) && net_equal((net_addr *) &net, &found[n])); + n++; + } + TRIE_WALK_TO_ROOT_END; + } + else + { + TRIE_WALK_TO_ROOT_IP6(trie, (net_addr_ip6 *) &from, net) + { + log_networks((net_addr *) &net, &found[n]); + bt_assert((n < found_num) && net_equal((net_addr *) &net, &found[n])); + n++; + } + TRIE_WALK_TO_ROOT_END; + } + + bt_assert(n == found_num); + + /* Stats */ + st += n; + sn += !!n; + sm = MAX(sm, n); + } + + bt_debug("Success in %d / %d, sum %d, max %d\n", sn, i, st, sm); + + tmp_flush(); + } + + bt_bird_cleanup(); return 1; } @@ -179,8 +885,15 @@ main(int argc, char *argv[]) { bt_init(argc, argv); - bt_test_suite(t_match_net, "Testing random prefix matching"); + bt_test_suite(t_match_random_net, "Testing random prefix matching"); + bt_test_suite(t_match_inner_net, "Testing random inner prefix matching"); + bt_test_suite(t_match_outer_net, "Testing random outer prefix matching"); bt_test_suite(t_trie_same, "A trie filled forward should be same with a trie filled backward."); + bt_test_suite(t_trie_walk, "Testing TRIE_WALK() on random tries"); + bt_test_suite(t_trie_walk_to_root, "Testing TRIE_WALK_TO_ROOT() on random tries"); + + // bt_test_suite(t_bench_trie_datasets_subset, "Benchmark tries from datasets by random subset of nets"); + // bt_test_suite(t_bench_trie_datasets_random, "Benchmark tries from datasets by generated addresses"); return bt_exit_value(); } diff --git a/lib/Makefile b/lib/Makefile index 4378a7bd..15f757d9 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -1,7 +1,7 @@ -src := bitmap.c bitops.c blake2s.c blake2b.c checksum.c event.c flowspec.c idm.c ip.c lists.c mac.c md5.c mempool.c net.c patmatch.c printf.c resource.c sha1.c sha256.c sha512.c slab.c slists.c strtoul.c tbf.c timer.c xmalloc.c +src := a-path.c a-set.c bitmap.c bitops.c blake2s.c blake2b.c checksum.c event.c flowspec.c idm.c ip.c lists.c mac.c md5.c mempool.c net.c patmatch.c printf.c resource.c sha1.c sha256.c sha512.c slab.c slists.c strtoul.c tbf.c timer.c xmalloc.c obj := $(src-o-files) $(all-daemon) -tests_src := bitmap_test.c heap_test.c buffer_test.c event_test.c flowspec_test.c bitops_test.c patmatch_test.c fletcher16_test.c slist_test.c checksum_test.c lists_test.c mac_test.c ip_test.c hash_test.c printf_test.c +tests_src := a-set_test.c a-path_test.c bitmap_test.c heap_test.c buffer_test.c event_test.c flowspec_test.c bitops_test.c patmatch_test.c fletcher16_test.c slist_test.c checksum_test.c lists_test.c mac_test.c ip_test.c hash_test.c printf_test.c slab_test.c type_test.c tests_targets := $(tests_targets) $(tests-target-files) tests_objs := $(tests_objs) $(src-o-files) diff --git a/nest/a-path.c b/lib/a-path.c similarity index 99% rename from nest/a-path.c rename to lib/a-path.c index 2e34a3d1..0eca8475 100644 --- a/nest/a-path.c +++ b/lib/a-path.c @@ -8,8 +8,8 @@ */ #include "nest/bird.h" -#include "nest/route.h" -#include "nest/attrs.h" +#include "nest/rt.h" +#include "lib/attrs.h" #include "lib/resource.h" #include "lib/unaligned.h" #include "lib/string.h" @@ -591,7 +591,7 @@ as_path_match_set(const struct adata *path, const struct f_tree *set) p += 2; for (i=0; i<n; i++) { - struct f_val v = {T_INT, .val.i = get_as(p)}; + struct f_val v = { .type = T_INT, .val.i = get_as(p)}; if (find_tree(set, &v)) return 1; p += BS; @@ -631,7 +631,7 @@ as_path_filter(struct linpool *pool, const struct adata *path, const struct f_tr if (set) { - struct f_val v = {T_INT, .val.i = as}; + struct f_val v = { .type = T_INT, .val.i = as}; match = !!find_tree(set, &v); } else diff --git a/nest/a-path_test.c b/lib/a-path_test.c similarity index 88% rename from nest/a-path_test.c rename to lib/a-path_test.c index 9ed0a786..38f77642 100644 --- a/nest/a-path_test.c +++ b/lib/a-path_test.c @@ -9,8 +9,8 @@ #include "test/birdtest.h" #include "test/bt-utils.h" -#include "nest/route.h" -#include "nest/attrs.h" +#include "nest/rt.h" +#include "lib/attrs.h" #include "lib/resource.h" #define TESTS_NUM 30 @@ -23,8 +23,6 @@ static int t_as_path_match(void) { - resource_init(); - int round; for (round = 0; round < TESTS_NUM; round++) { @@ -32,14 +30,13 @@ t_as_path_match(void) struct adata *as_path = &empty_as_path; u32 first_prepended, last_prepended; first_prepended = last_prepended = 0; - struct linpool *lp = lp_new_default(&root_pool); struct f_path_mask *mask = alloca(sizeof(struct f_path_mask) + AS_PATH_LENGTH * sizeof(struct f_path_mask_item)); mask->len = AS_PATH_LENGTH; for (int i = AS_PATH_LENGTH - 1; i >= 0; i--) { u32 val = bt_random(); - as_path = as_path_prepend(lp, as_path, val); + as_path = as_path_prepend(tmp_linpool, as_path, val); bt_debug("Prepending ASN: %10u \n", val); if (i == 0) @@ -61,7 +58,7 @@ t_as_path_match(void) bt_assert(as_path_get_last(as_path, &asn)); bt_assert_msg(asn == first_prepended, "as_path_get_last() should return the first prepended ASN"); - rfree(lp); + tmp_flush(); } return 1; @@ -70,16 +67,13 @@ t_as_path_match(void) static int t_path_format(void) { - resource_init(); - struct adata empty_as_path = {}; struct adata *as_path = &empty_as_path; - struct linpool *lp = lp_new_default(&root_pool); uint i; for (i = 4294967285; i <= 4294967294; i++) { - as_path = as_path_prepend(lp, as_path, i); + as_path = as_path_prepend(tmp_linpool, as_path, i); bt_debug("Prepending ASN: %10u \n", i); } @@ -97,7 +91,7 @@ t_path_format(void) as_path_format(as_path, buf2, SMALL_BUFFER_SIZE); bt_assert_msg(strcmp(buf2, "4294967294 42...") == 0, "Small Buffer(%zu): '%s'", strlen(buf2), buf2); - rfree(lp); + tmp_flush(); return 1; } @@ -116,11 +110,8 @@ count_asn_in_array(const u32 *array, u32 asn) static int t_path_include(void) { - resource_init(); - struct adata empty_as_path = {}; struct adata *as_path = &empty_as_path; - struct linpool *lp = lp_new_default(&root_pool); u32 as_nums[AS_PATH_LENGTH] = {}; int i; @@ -128,7 +119,7 @@ t_path_include(void) { u32 val = bt_random(); as_nums[i] = val; - as_path = as_path_prepend(lp, as_path, val); + as_path = as_path_prepend(tmp_linpool, as_path, val); } for (i = 0; i < AS_PATH_LENGTH; i++) @@ -136,8 +127,8 @@ t_path_include(void) int counts_of_contains = count_asn_in_array(as_nums, as_nums[i]); bt_assert_msg(as_path_contains(as_path, as_nums[i], counts_of_contains), "AS Path should contains %d-times number %d", counts_of_contains, as_nums[i]); - bt_assert(as_path_filter(lp, as_path, NULL, as_nums[i], 0) != NULL); - bt_assert(as_path_filter(lp, as_path, NULL, as_nums[i], 1) != NULL); + bt_assert(as_path_filter(tmp_linpool, as_path, NULL, as_nums[i], 0) != NULL); + bt_assert(as_path_filter(tmp_linpool, as_path, NULL, as_nums[i], 1) != NULL); } for (i = 0; i < 10000; i++) @@ -152,7 +143,7 @@ t_path_include(void) bt_assert_msg(result == 0, "As path should not contain the number %u", test_val); } - rfree(lp); + tmp_flush(); return 1; } @@ -161,16 +152,13 @@ t_path_include(void) static int t_as_path_converting(void) { - resource_init(); - struct adata empty_as_path = {}; struct adata *as_path = &empty_as_path; - struct linpool *lp = lp_new_default(&root_pool); #define AS_PATH_LENGTH_FOR_CONVERTING_TEST 10 int i; for (i = 0; i < AS_PATH_LENGTH_FOR_CONVERTING_TEST; i++) - as_path = as_path_prepend(lp, as_path, i); + as_path = as_path_prepend(tmp_linpool, as_path, i); bt_debug("data length: %u \n", as_path->length); diff --git a/nest/a-set.c b/lib/a-set.c similarity index 84% rename from nest/a-set.c rename to lib/a-set.c index 1186eb56..8ede9b83 100644 --- a/nest/a-set.c +++ b/lib/a-set.c @@ -10,8 +10,8 @@ #include <stdlib.h> #include "nest/bird.h" -#include "nest/route.h" -#include "nest/attrs.h" +#include "nest/rt.h" +#include "lib/attrs.h" #include "lib/resource.h" #include "lib/string.h" @@ -516,6 +516,48 @@ int_set_sort(struct linpool *pool, const struct adata *src) return dst; } +int +int_set_min(const struct adata *list, u32 *val) +{ + if (!list) + return 0; + + u32 *l = (u32 *) list->data; + int len = int_set_get_size(list); + int i; + + if (len < 1) + return 0; + + *val = *l++; + for (i = 1; i < len; i++, l++) + if (int_set_cmp(val, l) > 0) + *val = *l; + + return 1; +} + +int +int_set_max(const struct adata *list, u32 *val) +{ + if (!list) + return 0; + + u32 *l = (u32 *) list->data; + int len = int_set_get_size(list); + int i; + + if (len < 1) + return 0; + + *val = *l++; + for (i = 1; i < len; i++, l++) + if (int_set_cmp(val, l) < 0) + *val = *l; + + return 1; +} + static int ec_set_cmp(const void *X, const void *Y) @@ -541,6 +583,50 @@ ec_set_sort_x(struct adata *set) qsort(set->data, set->length / 8, 8, ec_set_cmp); } +int +ec_set_min(const struct adata *list, u64 *val) +{ + if (!list) + return 0; + + u32 *l = int_set_get_data(list); + int len = int_set_get_size(list); + int i; + + if (len < 1) + return 0; + + u32 *res = l; l += 2; + for (i = 2; i < len; i += 2, l += 2) + if (ec_set_cmp(res, l) > 0) + res = l; + + *val = ec_generic(res[0], res[1]); + return 1; +} + +int +ec_set_max(const struct adata *list, u64 *val) +{ + if (!list) + return 0; + + u32 *l = int_set_get_data(list); + int len = int_set_get_size(list); + int i; + + if (len < 1) + return 0; + + u32 *res = l; l += 2; + for (i = 2; i < len; i += 2, l += 2) + if (ec_set_cmp(res, l) < 0) + res = l; + + *val = ec_generic(res[0], res[1]); + return 1; +} + static int lc_set_cmp(const void *X, const void *Y) @@ -563,3 +649,47 @@ lc_set_sort(struct linpool *pool, const struct adata *src) qsort(dst->data, dst->length / LCOMM_LENGTH, LCOMM_LENGTH, lc_set_cmp); return dst; } + +int +lc_set_min(const struct adata *list, lcomm *val) +{ + if (!list) + return 0; + + u32 *l = int_set_get_data(list); + int len = int_set_get_size(list); + int i; + + if (len < 1) + return 0; + + u32 *res = l; l += 3; + for (i = 3; i < len; i += 3, l += 3) + if (lc_set_cmp(res, l) > 0) + res = l; + + *val = (lcomm) { res[0], res[1], res[2] }; + return 1; +} + +int +lc_set_max(const struct adata *list, lcomm *val) +{ + if (!list) + return 0; + + u32 *l = int_set_get_data(list); + int len = int_set_get_size(list); + int i; + + if (len < 1) + return 0; + + u32 *res = l; l += 3; + for (i = 3; i < len; i += 3, l += 3) + if (lc_set_cmp(res, l) < 0) + res = l; + + *val = (lcomm) { res[0], res[1], res[2] }; + return 1; +} diff --git a/nest/a-set_test.c b/lib/a-set_test.c similarity index 80% rename from nest/a-set_test.c rename to lib/a-set_test.c index 96b6a727..3280031f 100644 --- a/nest/a-set_test.c +++ b/lib/a-set_test.c @@ -10,8 +10,8 @@ #include "test/bt-utils.h" #include "lib/net.h" -#include "nest/route.h" -#include "nest/attrs.h" +#include "nest/rt.h" +#include "lib/attrs.h" #include "lib/resource.h" #define SET_SIZE 10 @@ -25,8 +25,6 @@ static byte buf[BUFFER_SIZE] = {}; #define SET_SIZE_FOR_FORMAT_OUTPUT 10 -struct linpool *lp; - enum set_type { SET_TYPE_INT, @@ -38,24 +36,23 @@ generate_set_sequence(enum set_type type, int len) { struct adata empty_as_path = {}; set_sequence = set_sequence_same = set_sequence_higher = set_random = &empty_as_path; - lp = lp_new_default(&root_pool); int i; for (i = 0; i < len; i++) { if (type == SET_TYPE_INT) { - set_sequence = int_set_add(lp, set_sequence, i); - set_sequence_same = int_set_add(lp, set_sequence_same, i); - set_sequence_higher = int_set_add(lp, set_sequence_higher, i + SET_SIZE); - set_random = int_set_add(lp, set_random, bt_random()); + set_sequence = int_set_add(tmp_linpool, set_sequence, i); + set_sequence_same = int_set_add(tmp_linpool, set_sequence_same, i); + set_sequence_higher = int_set_add(tmp_linpool, set_sequence_higher, i + SET_SIZE); + set_random = int_set_add(tmp_linpool, set_random, bt_random()); } else if (type == SET_TYPE_EC) { - set_sequence = ec_set_add(lp, set_sequence, i); - set_sequence_same = ec_set_add(lp, set_sequence_same, i); - set_sequence_higher = ec_set_add(lp, set_sequence_higher, i + SET_SIZE); - set_random = ec_set_add(lp, set_random, (bt_random() << 32 | bt_random())); + set_sequence = ec_set_add(tmp_linpool, set_sequence, i); + set_sequence_same = ec_set_add(tmp_linpool, set_sequence_same, i); + set_sequence_higher = ec_set_add(tmp_linpool, set_sequence_higher, i + SET_SIZE); + set_random = ec_set_add(tmp_linpool, set_random, (bt_random() << 32 | bt_random())); } else bt_abort_msg("This should be unreachable"); @@ -71,7 +68,6 @@ t_set_int_contains(void) { int i; - resource_init(); generate_set_sequence(SET_TYPE_INT, SET_SIZE); bt_assert(int_set_get_size(set_sequence) == SET_SIZE); @@ -85,33 +81,29 @@ t_set_int_contains(void) for (i = 0; i < SET_SIZE; i++) bt_assert_msg(data[i] == i, "(data[i] = %d) == i = %d)", data[i], i); - rfree(lp); return 1; } static int t_set_int_union(void) { - resource_init(); generate_set_sequence(SET_TYPE_INT, SET_SIZE); const struct adata *set_union; - set_union = int_set_union(lp, set_sequence, set_sequence_same); + set_union = int_set_union(tmp_linpool, set_sequence, set_sequence_same); bt_assert(int_set_get_size(set_union) == SET_SIZE); bt_assert(int_set_format(set_union, 0, 2, buf, BUFFER_SIZE) == 0); - set_union = int_set_union(lp, set_sequence, set_sequence_higher); + set_union = int_set_union(tmp_linpool, set_sequence, set_sequence_higher); bt_assert_msg(int_set_get_size(set_union) == SET_SIZE*2, "int_set_get_size(set_union) %d, SET_SIZE*2 %d", int_set_get_size(set_union), SET_SIZE*2); bt_assert(int_set_format(set_union, 0, 2, buf, BUFFER_SIZE) == 0); - rfree(lp); return 1; } static int t_set_int_format(void) { - resource_init(); generate_set_sequence(SET_TYPE_INT, SET_SIZE_FOR_FORMAT_OUTPUT); bt_assert(int_set_format(set_sequence, 0, 0, buf, BUFFER_SIZE) == 0); @@ -125,21 +117,19 @@ t_set_int_format(void) bt_assert(int_set_format(set_sequence, 1, 0, buf, BUFFER_SIZE) == 0); bt_assert(strcmp(buf, "(0,0) (0,1) (0,2) (0,3) (0,4) (0,5) (0,6) (0,7) (0,8) (0,9)") == 0); - rfree(lp); return 1; } static int t_set_int_delete(void) { - resource_init(); generate_set_sequence(SET_TYPE_INT, SET_SIZE); const struct adata *deleting_sequence = set_sequence; u32 i; for (i = 0; i < SET_SIZE; i++) { - deleting_sequence = int_set_del(lp, deleting_sequence, i); + deleting_sequence = int_set_del(tmp_linpool, deleting_sequence, i); bt_assert_msg(int_set_get_size(deleting_sequence) == (int) (SET_SIZE-1-i), "int_set_get_size(deleting_sequence) %d == SET_SIZE-1-i %d", int_set_get_size(deleting_sequence), @@ -160,7 +150,6 @@ t_set_ec_contains(void) { u32 i; - resource_init(); generate_set_sequence(SET_TYPE_EC, SET_SIZE); bt_assert(ec_set_get_size(set_sequence) == SET_SIZE); @@ -174,62 +163,54 @@ t_set_ec_contains(void) // for (i = 0; i < SET_SIZE; i++) // bt_assert_msg(data[i] == (SET_SIZE-1-i), "(data[i] = %d) == ((SET_SIZE-1-i) = %d)", data[i], SET_SIZE-1-i); - rfree(lp); return 1; } static int t_set_ec_union(void) { - resource_init(); generate_set_sequence(SET_TYPE_EC, SET_SIZE); const struct adata *set_union; - set_union = ec_set_union(lp, set_sequence, set_sequence_same); + set_union = ec_set_union(tmp_linpool, set_sequence, set_sequence_same); bt_assert(ec_set_get_size(set_union) == SET_SIZE); bt_assert(ec_set_format(set_union, 0, buf, BUFFER_SIZE) == 0); - set_union = ec_set_union(lp, set_sequence, set_sequence_higher); + set_union = ec_set_union(tmp_linpool, set_sequence, set_sequence_higher); bt_assert_msg(ec_set_get_size(set_union) == SET_SIZE*2, "ec_set_get_size(set_union) %d, SET_SIZE*2 %d", ec_set_get_size(set_union), SET_SIZE*2); bt_assert(ec_set_format(set_union, 0, buf, BUFFER_SIZE) == 0); - rfree(lp); return 1; } static int t_set_ec_format(void) { - resource_init(); - const struct adata empty_as_path = {}; set_sequence = set_sequence_same = set_sequence_higher = set_random = &empty_as_path; - lp = lp_new_default(&root_pool); u64 i = 0; - set_sequence = ec_set_add(lp, set_sequence, i); + set_sequence = ec_set_add(tmp_linpool, set_sequence, i); for (i = 1; i < SET_SIZE_FOR_FORMAT_OUTPUT; i++) - set_sequence = ec_set_add(lp, set_sequence, i + ((i%2) ? ((u64)EC_RO << 48) : ((u64)EC_RT << 48))); + set_sequence = ec_set_add(tmp_linpool, set_sequence, i + ((i%2) ? ((u64)EC_RO << 48) : ((u64)EC_RT << 48))); bt_assert(ec_set_format(set_sequence, 0, buf, BUFFER_SIZE) == 0); bt_assert_msg(strcmp(buf, "(unknown 0x0, 0, 0) (ro, 0, 1) (rt, 0, 2) (ro, 0, 3) (rt, 0, 4) (ro, 0, 5) (rt, 0, 6) (ro, 0, 7) (rt, 0, 8) (ro, 0, 9)") == 0, "ec_set_format() returns '%s'", buf); - rfree(lp); return 1; } static int t_set_ec_delete(void) { - resource_init(); generate_set_sequence(SET_TYPE_EC, SET_SIZE); const struct adata *deleting_sequence = set_sequence; u32 i; for (i = 0; i < SET_SIZE; i++) { - deleting_sequence = ec_set_del(lp, deleting_sequence, i); + deleting_sequence = ec_set_del(tmp_linpool, deleting_sequence, i); bt_assert_msg(ec_set_get_size(deleting_sequence) == (int) (SET_SIZE-1-i), "ec_set_get_size(deleting_sequence) %d == SET_SIZE-1-i %d", ec_set_get_size(deleting_sequence), SET_SIZE-1-i); diff --git a/nest/attrs.h b/lib/attrs.h similarity index 85% rename from nest/attrs.h rename to lib/attrs.h index 50da817b..79b7b14a 100644 --- a/nest/attrs.h +++ b/lib/attrs.h @@ -11,7 +11,39 @@ #include <stdint.h> #include "lib/unaligned.h" -#include "nest/route.h" + +typedef struct adata { + uint length; /* Length of data */ + byte data[0]; +} adata; + +#define ADATA_SIZE(s) BIRD_CPU_ALIGN(sizeof(struct adata) + s) + +extern const adata null_adata; /* adata of length 0 */ + +static inline struct adata * +lp_alloc_adata(struct linpool *pool, uint len) +{ + struct adata *ad = lp_alloc(pool, sizeof(struct adata) + len); + ad->length = len; + return ad; +} + +static inline struct adata * +lp_store_adata(struct linpool *pool, const void *buf, uint len) +{ + struct adata *ad = lp_alloc_adata(pool, len); + memcpy(ad->data, buf, len); + return ad; +} + +#define tmp_alloc_adata(len) lp_alloc_adata(tmp_linpool, len) +#define tmp_store_adata(buf, len) lp_store_adata(tmp_linpool, buf, len) +#define tmp_copy_adata(ad) tmp_store_adata((ad)->data, (ad)->length) + +static inline int adata_same(const struct adata *a, const struct adata *b) +{ return (a->length == b->length && !memcmp(a->data, b->data, a->length)); } + /* a-path.c */ @@ -218,6 +250,12 @@ struct adata *ec_set_del_nontrans(struct linpool *pool, const struct adata *set) struct adata *int_set_sort(struct linpool *pool, const struct adata *src); struct adata *ec_set_sort(struct linpool *pool, const struct adata *src); struct adata *lc_set_sort(struct linpool *pool, const struct adata *src); +int int_set_min(const struct adata *list, u32 *val); +int ec_set_min(const struct adata *list, u64 *val); +int lc_set_min(const struct adata *list, lcomm *val); +int int_set_max(const struct adata *list, u32 *val); +int ec_set_max(const struct adata *list, u64 *val); +int lc_set_max(const struct adata *list, lcomm *val); void ec_set_sort_x(struct adata *set); /* Sort in place */ diff --git a/lib/birdlib.h b/lib/birdlib.h index 431b7c0d..9b6e4a16 100644 --- a/lib/birdlib.h +++ b/lib/birdlib.h @@ -9,16 +9,29 @@ #ifndef _BIRD_BIRDLIB_H_ #define _BIRD_BIRDLIB_H_ +#include "sysdep/config.h" #include "lib/alloca.h" /* Ugly structure offset handling macros */ -struct align_probe { char x; long int y; }; - #define OFFSETOF(s, i) ((size_t) &((s *)0)->i) #define SKIP_BACK(s, i, p) ((s *)((char *)p - OFFSETOF(s, i))) #define BIRD_ALIGN(s, a) (((s)+a-1)&~(a-1)) -#define CPU_STRUCT_ALIGN (sizeof(struct align_probe)) +#define CPU_STRUCT_ALIGN (MAX_(_Alignof(void*), _Alignof(u64))) +#define BIRD_CPU_ALIGN(s) BIRD_ALIGN((s), CPU_STRUCT_ALIGN) + +/* Structure item alignment macros */ + +#define PADDING_NAME(id) _padding_##id +#define PADDING_(id, sz) u8 PADDING_NAME(id)[sz] + +#if CPU_POINTER_ALIGNMENT == 4 +#define PADDING(id, n32, n64) PADDING_(id, n32) +#elif CPU_POINTER_ALIGNMENT == 8 +#define PADDING(id, n32, n64) PADDING_(id, n64) +#else +#error "Strange CPU pointer alignment: " CPU_POINTER_ALIGNMENT +#endif /* Utility macros */ @@ -32,6 +45,9 @@ struct align_probe { char x; long int y; }; #define MAX(a,b) MAX_(a,b) #endif +#define ROUND_DOWN_POW2(a,b) ((a) & ~((b)-1)) +#define ROUND_UP_POW2(a,b) (((a)+((b)-1)) & ~((b)-1)) + #define U64(c) UINT64_C(c) #define ABS(a) ((a)>=0 ? (a) : -(a)) #define DELTA(a,b) (((a)>=(b))?(a)-(b):(b)-(a)) diff --git a/lib/bitmap_test.c b/lib/bitmap_test.c index 0595a4d0..07860c94 100644 --- a/lib/bitmap_test.c +++ b/lib/bitmap_test.c @@ -24,7 +24,6 @@ t_bmap_set_clear_random(void) { struct bmap b; - resource_init(); bmap_init(&b, &root_pool, 1024); char expected[MAX_NUM] = {}; @@ -60,7 +59,6 @@ t_hmap_set_clear_random(void) { struct hmap b; - resource_init(); hmap_init(&b, &root_pool, 1024); char expected[MAX_NUM] = {}; @@ -119,7 +117,6 @@ t_hmap_set_clear_fill(void) { struct hmap b; - resource_init(); hmap_init(&b, &root_pool, 1024); char expected[MAX_NUM] = {}; diff --git a/lib/buffer_test.c b/lib/buffer_test.c index 5b7de330..0629e901 100644 --- a/lib/buffer_test.c +++ b/lib/buffer_test.c @@ -41,7 +41,6 @@ fill_expected_array(void) static void init_buffer(void) { - resource_init(); buffer_pool = &root_pool; BUFFER_INIT(buf, buffer_pool, MAX_NUM); } diff --git a/lib/event.c b/lib/event.c index 273447e0..33dc00b0 100644 --- a/lib/event.c +++ b/lib/event.c @@ -157,6 +157,7 @@ ev_run_list(event_list *l) io_log_event(e->hook, e->data); ev_run(e); + tmp_flush(); } return !EMPTY_LIST(*l); @@ -184,6 +185,7 @@ ev_run_list_limited(event_list *l, uint limit) io_log_event(e->hook, e->data); ev_run(e); + tmp_flush(); limit--; } diff --git a/lib/event_test.c b/lib/event_test.c index e1215bba..3070327d 100644 --- a/lib/event_test.c +++ b/lib/event_test.c @@ -15,7 +15,7 @@ #include "nest/locks.h" #include "sysdep/unix/unix.h" #include "nest/iface.h" -#include "nest/route.h" +#include "nest/rt.h" #define MAX_NUM 4 @@ -53,11 +53,10 @@ t_ev_run_list(void) { int i; - resource_init(); olock_init(); timer_init(); - io_init(); rt_init(); + io_init(); if_init(); // roa_init(); config_init(); diff --git a/lib/fib.h b/lib/fib.h new file mode 100644 index 00000000..bec2a8d4 --- /dev/null +++ b/lib/fib.h @@ -0,0 +1,119 @@ +/* + * BIRD Internet Routing Daemon -- Network prefix storage + * + * (c) 1998--2000 Martin Mares <mj@ucw.cz> + * (c) 2022 Maria Matejka <mq@jmq.cz> + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRD_LIB_FIB_H_ +#define _BIRD_LIB_FIB_H_ + +/* + * BIRD FIBs are generic data structure for storing network prefixes. + * Also used for the master routing table. Currently implemented as + * a hash table. + * + * Available operations: + * - insertion of new entry + * - deletion of entry + * - searching for entry by network prefix + * - asynchronous retrieval of fib contents + */ + +struct fib; + +struct fib_node { + struct fib_node *next; /* Next in hash chain */ + struct fib_iterator *readers; /* List of readers of this node */ + net_addr addr[0]; +}; + +struct fib_iterator { /* See lib/slists.h for an explanation */ + struct fib_iterator *prev, *next; /* Must be synced with struct fib_node! */ + byte efef; /* 0xff to distinguish between iterator and node */ + byte pad[3]; + struct fib_node *node; /* Or NULL if freshly merged */ + uint hash; +}; + +typedef void (*fib_init_fn)(struct fib *, void *); + +struct fib { + pool *fib_pool; /* Pool holding all our data */ + slab *fib_slab; /* Slab holding all fib nodes */ + struct fib_node **hash_table; /* Node hash table */ + uint hash_size; /* Number of hash table entries (a power of two) */ + uint hash_order; /* Binary logarithm of hash_size */ + uint hash_shift; /* 32 - hash_order */ + uint addr_type; /* Type of address data stored in fib (NET_*) */ + uint node_size; /* FIB node size, 0 for nonuniform */ + uint node_offset; /* Offset of fib_node struct inside of user data */ + uint entries; /* Number of entries */ + uint entries_min, entries_max; /* Entry count limits (else start rehashing) */ + fib_init_fn init; /* Constructor */ +}; + +static inline void * fib_node_to_user(struct fib *f, struct fib_node *e) +{ return e ? (void *) ((char *) e - f->node_offset) : NULL; } + +static inline struct fib_node * fib_user_to_node(struct fib *f, void *e) +{ return e ? (void *) ((char *) e + f->node_offset) : NULL; } + +void fib_init(struct fib *f, pool *p, uint addr_type, uint node_size, uint node_offset, uint hash_order, fib_init_fn init); +void *fib_find(struct fib *, const net_addr *); /* Find or return NULL if doesn't exist */ +void *fib_get_chain(struct fib *f, const net_addr *a); /* Find first node in linked list from hash table */ +void *fib_get(struct fib *, const net_addr *); /* Find or create new if nonexistent */ +void *fib_route(struct fib *, const net_addr *); /* Longest-match routing lookup */ +void fib_delete(struct fib *, void *); /* Remove fib entry */ +void fib_free(struct fib *); /* Destroy the fib */ +void fib_check(struct fib *); /* Consistency check for debugging */ + +void fit_init(struct fib_iterator *, struct fib *); /* Internal functions, don't call */ +struct fib_node *fit_get(struct fib *, struct fib_iterator *); +void fit_put(struct fib_iterator *, struct fib_node *); +void fit_put_next(struct fib *f, struct fib_iterator *i, struct fib_node *n, uint hpos); +void fit_put_end(struct fib_iterator *i); +void fit_copy(struct fib *f, struct fib_iterator *dst, struct fib_iterator *src); + + +#define FIB_WALK(fib, type, z) do { \ + struct fib_node *fn_, **ff_ = (fib)->hash_table; \ + uint count_ = (fib)->hash_size; \ + type *z; \ + while (count_--) \ + for (fn_ = *ff_++; z = fib_node_to_user(fib, fn_); fn_=fn_->next) + +#define FIB_WALK_END } while (0) + +#define FIB_ITERATE_INIT(it, fib) fit_init(it, fib) + +#define FIB_ITERATE_START(fib, it, type, z) do { \ + struct fib_node *fn_ = fit_get(fib, it); \ + uint count_ = (fib)->hash_size; \ + uint hpos_ = (it)->hash; \ + type *z; \ + for(;;) { \ + if (!fn_) \ + { \ + if (++hpos_ >= count_) \ + break; \ + fn_ = (fib)->hash_table[hpos_]; \ + continue; \ + } \ + z = fib_node_to_user(fib, fn_); + +#define FIB_ITERATE_END fn_ = fn_->next; } } while(0) + +#define FIB_ITERATE_PUT(it) fit_put(it, fn_) + +#define FIB_ITERATE_PUT_NEXT(it, fib) fit_put_next(fib, it, fn_, hpos_) + +#define FIB_ITERATE_PUT_END(it) fit_put_end(it) + +#define FIB_ITERATE_UNLINK(it, fib) fit_get(fib, it) + +#define FIB_ITERATE_COPY(dst, src, fib) fit_copy(fib, dst, src) + +#endif diff --git a/lib/flowspec_test.c b/lib/flowspec_test.c index ed4afe51..03649b99 100644 --- a/lib/flowspec_test.c +++ b/lib/flowspec_test.c @@ -446,10 +446,7 @@ t_validation6(void) static int t_builder4(void) { - resource_init(); - struct flow_builder *fb = flow_builder_init(&root_pool); - linpool *lp = lp_new_default(&root_pool); /* Expectation */ @@ -492,7 +489,7 @@ t_builder4(void) flow_builder_set_type(fb, FLOW_TYPE_TCP_FLAGS); flow_builder_add_op_val(fb, 0, 0x55); - net_addr_flow4 *res = flow_builder4_finalize(fb, lp); + net_addr_flow4 *res = flow_builder4_finalize(fb, tmp_linpool); bt_assert(memcmp(res, expect, expect->length) == 0); @@ -529,8 +526,6 @@ t_builder6(void) { net_addr_ip6 ip; - resource_init(); - linpool *lp = lp_new_default(&root_pool); struct flow_builder *fb = flow_builder_init(&root_pool); fb->ipv6 = 1; @@ -574,7 +569,7 @@ t_builder6(void) flow_builder_set_type(fb, FLOW_TYPE_LABEL); flow_builder_add_op_val(fb, 0, 0x55); - net_addr_flow6 *res = flow_builder6_finalize(fb, lp); + net_addr_flow6 *res = flow_builder6_finalize(fb, tmp_linpool); bt_assert(memcmp(res, expect, expect->length) == 0); /* Reverse order */ @@ -601,7 +596,7 @@ t_builder6(void) flow_builder_set_type(fb, FLOW_TYPE_DST_PREFIX); flow_builder6_add_pfx(fb, &ip, 61); - res = flow_builder6_finalize(fb, lp); + res = flow_builder6_finalize(fb, tmp_linpool); bt_assert(memcmp(res, expect, expect->length) == 0); return 1; diff --git a/lib/hash_test.c b/lib/hash_test.c index 59beb7c0..4bce7017 100644 --- a/lib/hash_test.c +++ b/lib/hash_test.c @@ -61,7 +61,6 @@ dump_nodes(void) static void init_hash_(uint order) { - resource_init(); my_pool = rp_new(&root_pool, "Test pool"); HASH_INIT(hash, my_pool, order); diff --git a/lib/ip.c b/lib/ip.c index fcc72caf..4c5fa47f 100644 --- a/lib/ip.c +++ b/lib/ip.c @@ -85,25 +85,29 @@ ip4_classify(ip4_addr ad) u32 a = _I(ad); u32 b = a >> 24U; - if (b && b <= 0xdf) + if (b < 0xe0) { - if (b == 0x7f) + if (b == 0x00) /* 0.0.0.0/8 This network */ + return IADDR_INVALID; + + if (b == 0x7f) /* 127.0.0.0/8 Loopback address */ return IADDR_HOST | SCOPE_HOST; - else if ((b == 0x0a) || - ((a & 0xffff0000) == 0xc0a80000) || - ((a & 0xfff00000) == 0xac100000)) + + if ((b == 0x0a) || /* 10.0.0.0/8 Private range */ + ((a & 0xffff0000) == 0xc0a80000) || /* 192.168.0.0/16 Private range */ + ((a & 0xfff00000) == 0xac100000)) /* 172.16.0.0/12 Private range */ return IADDR_HOST | SCOPE_SITE; - else - return IADDR_HOST | SCOPE_UNIVERSE; + + return IADDR_HOST | SCOPE_UNIVERSE; } - if (b >= 0xe0 && b <= 0xef) + if (b < 0xf0) /* 224.0.0.0/4 Multicast address */ return IADDR_MULTICAST | SCOPE_UNIVERSE; - if (a == 0xffffffff) + if (a == 0xffffffff) /* 255.255.255.255 Broadcast address */ return IADDR_BROADCAST | SCOPE_LINK; - return IADDR_INVALID; + return IADDR_HOST | SCOPE_SITE; /* 240.0.0.0/4 Reserved / private */ } int diff --git a/lib/ip.h b/lib/ip.h index 5b179acb..20e7a336 100644 --- a/lib/ip.h +++ b/lib/ip.h @@ -279,11 +279,35 @@ static inline uint ip6_pxlen(ip6_addr a, ip6_addr b) return 32 * i + 31 - u32_log2(a.addr[i] ^ b.addr[i]); } +static inline int ip4_prefix_equal(ip4_addr a, ip4_addr b, uint n) +{ + return (_I(a) ^ _I(b)) < ((u64) 1 << (32 - n)); +} + +static inline int ip6_prefix_equal(ip6_addr a, ip6_addr b, uint n) +{ + uint n0 = n / 32; + uint n1 = n % 32; + + return + ((n0 <= 0) || (_I0(a) == _I0(b))) && + ((n0 <= 1) || (_I1(a) == _I1(b))) && + ((n0 <= 2) || (_I2(a) == _I2(b))) && + ((n0 <= 3) || (_I3(a) == _I3(b))) && + (!n1 || ((a.addr[n0] ^ b.addr[n0]) < (1u << (32 - n1)))); +} + static inline u32 ip4_getbit(ip4_addr a, uint pos) -{ return _I(a) & (0x80000000 >> pos); } +{ return (_I(a) >> (31 - pos)) & 1; } + +static inline u32 ip4_getbits(ip4_addr a, uint pos, uint n) +{ return (_I(a) >> ((32 - n) - pos)) & ((1u << n) - 1); } static inline u32 ip6_getbit(ip6_addr a, uint pos) -{ return a.addr[pos / 32] & (0x80000000 >> (pos % 32)); } +{ return (a.addr[pos / 32] >> (31 - (pos % 32))) & 0x1; } + +static inline u32 ip6_getbits(ip6_addr a, uint pos, uint n) +{ return (a.addr[pos / 32] >> ((32 - n) - (pos % 32))) & ((1u << n) - 1); } static inline u32 ip4_setbit(ip4_addr *a, uint pos) { return _I(*a) |= (0x80000000 >> pos); } @@ -297,6 +321,13 @@ static inline u32 ip4_clrbit(ip4_addr *a, uint pos) static inline u32 ip6_clrbit(ip6_addr *a, uint pos) { return a->addr[pos / 32] &= ~(0x80000000 >> (pos % 32)); } +static inline ip4_addr ip4_setbits(ip4_addr a, uint pos, uint val) +{ _I(a) |= val << (31 - pos); return a; } + +static inline ip6_addr ip6_setbits(ip6_addr a, uint pos, uint val) +{ a.addr[pos / 32] |= val << (31 - pos % 32); return a; } + + static inline ip4_addr ip4_opposite_m1(ip4_addr a) { return _MI4(_I(a) ^ 1); } @@ -331,11 +362,7 @@ static inline ip6_addr ip6_hton(ip6_addr a) static inline ip6_addr ip6_ntoh(ip6_addr a) { return _MI6(ntohl(_I0(a)), ntohl(_I1(a)), ntohl(_I2(a)), ntohl(_I3(a))); } -#define MPLS_MAX_LABEL_STACK 8 -typedef struct mpls_label_stack { - uint len; - u32 stack[MPLS_MAX_LABEL_STACK]; -} mpls_label_stack; +#define MPLS_MAX_LABEL_STACK 16 static inline int mpls_get(const char *buf, int buflen, u32 *stack) diff --git a/lib/ip_test.c b/lib/ip_test.c index 36d10d68..eee0a427 100644 --- a/lib/ip_test.c +++ b/lib/ip_test.c @@ -167,6 +167,70 @@ t_ip6_ntop(void) return bt_assert_batch(test_vectors, test_ipa_ntop, bt_fmt_ipa, bt_fmt_str); } +static int +t_ip4_prefix_equal(void) +{ + bt_assert( ip4_prefix_equal(ip4_from_u32(0x12345678), ip4_from_u32(0x1234ffff), 16)); + bt_assert(!ip4_prefix_equal(ip4_from_u32(0x12345678), ip4_from_u32(0x1234ffff), 17)); + bt_assert( ip4_prefix_equal(ip4_from_u32(0x12345678), ip4_from_u32(0x12345000), 21)); + bt_assert(!ip4_prefix_equal(ip4_from_u32(0x12345678), ip4_from_u32(0x12345000), 22)); + + bt_assert( ip4_prefix_equal(ip4_from_u32(0x00000000), ip4_from_u32(0xffffffff), 0)); + bt_assert( ip4_prefix_equal(ip4_from_u32(0x12345678), ip4_from_u32(0x12345678), 0)); + + bt_assert( ip4_prefix_equal(ip4_from_u32(0x12345678), ip4_from_u32(0x12345678), 32)); + bt_assert(!ip4_prefix_equal(ip4_from_u32(0x12345678), ip4_from_u32(0x12345679), 32)); + bt_assert(!ip4_prefix_equal(ip4_from_u32(0x12345678), ip4_from_u32(0x92345678), 32)); + + return 1; +} + +static int +t_ip6_prefix_equal(void) +{ + bt_assert( ip6_prefix_equal(ip6_build(0x20010db8, 0x12345678, 0x10101010, 0x20202020), + ip6_build(0x20010db8, 0x1234ffff, 0xfefefefe, 0xdcdcdcdc), + 48)); + + bt_assert(!ip6_prefix_equal(ip6_build(0x20010db8, 0x12345678, 0x10101010, 0x20202020), + ip6_build(0x20010db8, 0x1234ffff, 0xfefefefe, 0xdcdcdcdc), + 49)); + + bt_assert(!ip6_prefix_equal(ip6_build(0x20010db8, 0x12345678, 0x10101010, 0x20202020), + ip6_build(0x20020db8, 0x12345678, 0xfefefefe, 0xdcdcdcdc), + 48)); + + bt_assert( ip6_prefix_equal(ip6_build(0x20010db8, 0x12345678, 0x10101010, 0x20202020), + ip6_build(0x20010db8, 0x12345678, 0xfefefefe, 0xdcdcdcdc), + 64)); + + bt_assert(!ip6_prefix_equal(ip6_build(0x20010db8, 0x12345678, 0x10101010, 0x20202020), + ip6_build(0x20010db8, 0x1234567e, 0xfefefefe, 0xdcdcdcdc), + 64)); + + bt_assert( ip6_prefix_equal(ip6_build(0x20010db8, 0x12345678, 0x10101010, 0x20002020), + ip6_build(0x20010db8, 0x12345678, 0x10101010, 0x20202020), + 106)); + + bt_assert(!ip6_prefix_equal(ip6_build(0x20010db8, 0x12345678, 0x10101010, 0x20002020), + ip6_build(0x20010db8, 0x12345678, 0x10101010, 0x20202020), + 107)); + + bt_assert( ip6_prefix_equal(ip6_build(0xfeef0db8, 0x87654321, 0x10101010, 0x20202020), + ip6_build(0x20010db8, 0x12345678, 0xfefefefe, 0xdcdcdcdc), + 0)); + + bt_assert( ip6_prefix_equal(ip6_build(0x20010db8, 0x12345678, 0x10101010, 0x20202020), + ip6_build(0x20010db8, 0x12345678, 0x10101010, 0x20202020), + 128)); + + bt_assert(!ip6_prefix_equal(ip6_build(0x20010db8, 0x12345678, 0x10101010, 0x20202020), + ip6_build(0x20010db8, 0x12345678, 0x10101010, 0x20202021), + 128)); + + return 1; +} + int main(int argc, char *argv[]) { @@ -176,6 +240,8 @@ main(int argc, char *argv[]) bt_test_suite(t_ip6_pton, "Converting IPv6 string to ip6_addr struct"); bt_test_suite(t_ip4_ntop, "Converting ip4_addr struct to IPv4 string"); bt_test_suite(t_ip6_ntop, "Converting ip6_addr struct to IPv6 string"); + bt_test_suite(t_ip4_prefix_equal, "Testing ip4_prefix_equal()"); + bt_test_suite(t_ip6_prefix_equal, "Testing ip6_prefix_equal()"); return bt_exit_value(); } diff --git a/lib/lists.h b/lib/lists.h index 479f4ed1..7e6d5467 100644 --- a/lib/lists.h +++ b/lib/lists.h @@ -42,6 +42,7 @@ typedef union list { /* In fact two overlayed nodes */ }; } list; +#define STATIC_LIST_INIT(name) name = { .head = &name.tail_node, .tail = &name.head_node, .null = NULL } #define NODE (node *) #define HEAD(list) ((void *)((list).head)) diff --git a/lib/mempool.c b/lib/mempool.c index 8f300b81..33eaec86 100644 --- a/lib/mempool.c +++ b/lib/mempool.c @@ -27,26 +27,24 @@ struct lp_chunk { struct lp_chunk *next; - uint size; uintptr_t data_align[0]; byte data[0]; }; -const int lp_chunk_size = sizeof(struct lp_chunk); +#define LP_DATA_SIZE (page_size - OFFSETOF(struct lp_chunk, data)) struct linpool { resource r; byte *ptr, *end; - pool *p; struct lp_chunk *first, *current; /* Normal (reusable) chunks */ struct lp_chunk *first_large; /* Large chunks */ - uint chunk_size, threshold, total:31, use_pages:1, total_large; + uint total, total_large; }; static void lp_free(resource *); static void lp_dump(resource *); static resource *lp_lookup(resource *, unsigned long); -static size_t lp_memsize(resource *r); +static struct resmem lp_memsize(resource *r); static struct resclass lp_class = { "LinPool", @@ -60,26 +58,14 @@ static struct resclass lp_class = { /** * lp_new - create a new linear memory pool * @p: pool - * @blk: block size * * lp_new() creates a new linear memory pool resource inside the pool @p. - * The linear pool consists of a list of memory chunks of size at least - * @blk. + * The linear pool consists of a list of memory chunks of page size. */ linpool -*lp_new(pool *p, uint blk) +*lp_new(pool *p) { - linpool *m = ralloc(p, &lp_class); - m->p = p; - if (!blk) - { - m->use_pages = 1; - blk = page_size - lp_chunk_size; - } - - m->chunk_size = blk; - m->threshold = 3*blk/4; - return m; + return ralloc(p, &lp_class); } /** @@ -110,14 +96,13 @@ lp_alloc(linpool *m, uint size) else { struct lp_chunk *c; - if (size >= m->threshold) + if (size > LP_DATA_SIZE) { /* Too large => allocate large chunk */ c = xmalloc(sizeof(struct lp_chunk) + size); m->total_large += size; c->next = m->first_large; m->first_large = c; - c->size = size; } else { @@ -129,14 +114,10 @@ lp_alloc(linpool *m, uint size) else { /* Need to allocate a new chunk */ - if (m->use_pages) - c = alloc_page(m->p); - else - c = xmalloc(sizeof(struct lp_chunk) + m->chunk_size); + c = alloc_page(); - m->total += m->chunk_size; + m->total += LP_DATA_SIZE; c->next = NULL; - c->size = m->chunk_size; if (m->current) m->current->next = c; @@ -145,7 +126,7 @@ lp_alloc(linpool *m, uint size) } m->current = c; m->ptr = c->data + size; - m->end = c->data + m->chunk_size; + m->end = c->data + LP_DATA_SIZE; } return c->data; } @@ -207,7 +188,7 @@ lp_flush(linpool *m) /* Move ptr to the first chunk and free all large chunks */ m->current = c = m->first; m->ptr = c ? c->data : NULL; - m->end = c ? c->data + m->chunk_size : NULL; + m->end = c ? c->data + LP_DATA_SIZE : NULL; while (c = m->first_large) { @@ -230,6 +211,7 @@ lp_save(linpool *m, lp_state *p) { p->current = m->current; p->large = m->first_large; + p->total_large = m->total_large; p->ptr = m->ptr; } @@ -251,12 +233,12 @@ lp_restore(linpool *m, lp_state *p) /* Move ptr to the saved pos and free all newer large chunks */ m->current = c = p->current; m->ptr = p->ptr; - m->end = c ? c->data + m->chunk_size : NULL; + m->end = c ? c->data + LP_DATA_SIZE : NULL; + m->total_large = p->total_large; while ((c = m->first_large) && (c != p->large)) { m->first_large = c->next; - m->total_large -= c->size; xfree(c); } } @@ -270,10 +252,7 @@ lp_free(resource *r) for(d=m->first; d; d = c) { c = d->next; - if (m->use_pages) - free_page(m->p, d); - else - xfree(d); + free_page(d); } for(d=m->first_large; d; d = c) { @@ -293,30 +272,33 @@ lp_dump(resource *r) ; for(cntl=0, c=m->first_large; c; c=c->next, cntl++) ; - debug("(chunk=%d threshold=%d count=%d+%d total=%d+%d)\n", - m->chunk_size, - m->threshold, + debug("(count=%d+%d total=%d+%d)\n", cnt, cntl, m->total, m->total_large); } -static size_t +static struct resmem lp_memsize(resource *r) { linpool *m = (linpool *) r; - struct lp_chunk *c; - int cnt = 0; + struct resmem sz = { + .overhead = sizeof(struct linpool) + ALLOC_OVERHEAD, + .effective = m->total_large, + }; - for(c=m->first; c; c=c->next) - cnt++; - for(c=m->first_large; c; c=c->next) - cnt++; + for (struct lp_chunk *c = m->first_large; c; c = c->next) + sz.overhead += sizeof(struct lp_chunk) + ALLOC_OVERHEAD; - return ALLOC_OVERHEAD + sizeof(struct linpool) + - cnt * (ALLOC_OVERHEAD + sizeof(struct lp_chunk)) + - m->total + m->total_large; + uint regular = 0; + for (struct lp_chunk *c = m->first; c; c = c->next) + regular++; + + sz.effective += LP_DATA_SIZE * regular; + sz.overhead += (sizeof(struct lp_chunk) + ALLOC_OVERHEAD) * regular; + + return sz; } @@ -327,10 +309,7 @@ lp_lookup(resource *r, unsigned long a) struct lp_chunk *c; for(c=m->first; c; c=c->next) - if ((unsigned long) c->data <= a && (unsigned long) c->data + c->size > a) - return r; - for(c=m->first_large; c; c=c->next) - if ((unsigned long) c->data <= a && (unsigned long) c->data + c->size > a) + if ((unsigned long) c->data <= a && (unsigned long) c->data + LP_DATA_SIZE > a) return r; return NULL; } diff --git a/lib/net.h b/lib/net.h index 8eb4c7b9..9f4a00ad 100644 --- a/lib/net.h +++ b/lib/net.h @@ -38,6 +38,7 @@ #define NB_IP (NB_IP4 | NB_IP6) #define NB_VPN (NB_VPN4 | NB_VPN6) +#define NB_ROA (NB_ROA4 | NB_ROA6) #define NB_FLOW (NB_FLOW4 | NB_FLOW6) #define NB_DEST (NB_IP | NB_IP6_SADR | NB_VPN | NB_MPLS) #define NB_ANY 0xffffffff diff --git a/lib/printf.c b/lib/printf.c index 236df427..424d545f 100644 --- a/lib/printf.c +++ b/lib/printf.c @@ -568,3 +568,51 @@ buffer_puts(buffer *buf, const char *str) buf->pos = (bp < be) ? bp : buf->end; } + +#define POOL_PRINTF_MAXBUF 1024 + +char *mb_vsprintf(pool *p, const char *fmt, va_list args) +{ + char buf[POOL_PRINTF_MAXBUF]; + int count = bvsnprintf(buf, POOL_PRINTF_MAXBUF, fmt, args); + + if (count < 0) + bug("Attempted to mb_vsprintf() a too long string"); + + char *out = mb_alloc(p, count + 1); + memcpy(out, buf, count + 1); + return out; +} + +char *mb_sprintf(pool *p, const char *fmt, ...) +{ + va_list args; + char *out; + va_start(args, fmt); + out = mb_vsprintf(p, fmt, args); + va_end(args); + return out; +} + +char *lp_vsprintf(linpool *p, const char *fmt, va_list args) +{ + char buf[POOL_PRINTF_MAXBUF]; + int count = bvsnprintf(buf, POOL_PRINTF_MAXBUF, fmt, args); + + if (count < 0) + bug("Attempted to mb_vsprintf() a too long string"); + + char *out = lp_alloc(p, count + 1); + memcpy(out, buf, count + 1); + return out; +} + +char *lp_sprintf(linpool *p, const char *fmt, ...) +{ + va_list args; + char *out; + va_start(args, fmt); + out = lp_vsprintf(p, fmt, args); + va_end(args); + return out; +} diff --git a/lib/resource.c b/lib/resource.c index e80b315b..c31d9889 100644 --- a/lib/resource.c +++ b/lib/resource.c @@ -2,6 +2,7 @@ * BIRD Resource Manager * * (c) 1998--2000 Martin Mares <mj@ucw.cz> + * (c) 2021 Maria Matejka <mq@jmq.cz> * * Can be freely distributed and used under the terms of the GNU GPL. */ @@ -31,22 +32,13 @@ struct pool { resource r; list inside; - struct pool_pages *pages; const char *name; }; -struct pool_pages { - uint free; - uint used; - void *ptr[0]; -}; - -#define POOL_PAGES_MAX ((page_size - sizeof(struct pool_pages)) / sizeof (void *)) - static void pool_dump(resource *); static void pool_free(resource *); static resource *pool_lookup(resource *, unsigned long); -static size_t pool_memsize(resource *P); +static struct resmem pool_memsize(resource *P); static struct resclass pool_class = { "Pool", @@ -59,9 +51,6 @@ static struct resclass pool_class = { pool root_pool; -void *alloc_sys_page(void); -void free_sys_page(void *); - static int indent; /** @@ -81,6 +70,20 @@ rp_new(pool *p, const char *name) return z; } +pool * +rp_newf(pool *p, const char *fmt, ...) +{ + pool *z = rp_new(p, NULL); + + va_list args; + va_start(args, fmt); + z->name = mb_vsprintf(p, fmt, args); + va_end(args); + + return z; +} + + static void pool_free(resource *P) { @@ -94,14 +97,6 @@ pool_free(resource *P) xfree(r); r = rr; } - - if (p->pages) - { - ASSERT_DIE(!p->pages->used); - for (uint i=0; i<p->pages->free; i++) - free_sys_page(p->pages->ptr[i]); - free_sys_page(p->pages); - } } static void @@ -117,18 +112,22 @@ pool_dump(resource *P) indent -= 3; } -static size_t +static struct resmem pool_memsize(resource *P) { pool *p = (pool *) P; resource *r; - size_t sum = sizeof(pool) + ALLOC_OVERHEAD; + struct resmem sum = { + .effective = 0, + .overhead = sizeof(pool) + ALLOC_OVERHEAD, + }; WALK_LIST(r, p->inside) - sum += rmemsize(r); - - if (p->pages) - sum += page_size * (p->pages->used + p->pages->free + 1); + { + struct resmem add = rmemsize(r); + sum.effective += add.effective; + sum.overhead += add.overhead; + } return sum; } @@ -216,14 +215,17 @@ rdump(void *res) debug("NULL\n"); } -size_t +struct resmem rmemsize(void *res) { resource *r = res; if (!r) - return 0; + return (struct resmem) {}; if (!r->class->memsize) - return r->class->size + ALLOC_OVERHEAD; + return (struct resmem) { + .effective = r->class->size - sizeof(resource), + .overhead = ALLOC_OVERHEAD + sizeof(resource), + }; return r->class->memsize(r); } @@ -282,11 +284,33 @@ rlookup(unsigned long a) void resource_init(void) { + resource_sys_init(); + root_pool.r.class = &pool_class; root_pool.name = "Root"; init_list(&root_pool.inside); + tmp_init(&root_pool); } +_Thread_local struct tmp_resources tmp_res; + +void +tmp_init(pool *p) +{ + tmp_res.lp = lp_new_default(p); + tmp_res.parent = p; + tmp_res.pool = rp_new(p, "TMP"); +} + +void +tmp_flush(void) +{ + lp_flush(tmp_linpool); + rfree(tmp_res.pool); + tmp_res.pool = rp_new(tmp_res.parent, "TMP"); +} + + /** * DOC: Memory blocks * @@ -328,11 +352,14 @@ mbl_lookup(resource *r, unsigned long a) return NULL; } -static size_t +static struct resmem mbl_memsize(resource *r) { struct mblock *m = (struct mblock *) r; - return ALLOC_OVERHEAD + sizeof(struct mblock) + m->size; + return (struct resmem) { + .effective = m->size, + .overhead = ALLOC_OVERHEAD + sizeof(struct mblock), + }; } static struct resclass mb_class = { @@ -416,21 +443,6 @@ mb_realloc(void *m, unsigned size) return b->data; } -/** - * mb_move - move a memory block - * @m: memory block - * @p: target pool - * - * mb_move() moves the given memory block to another pool in the same way - * as rmove() moves a plain resource. - */ -void -mb_move(void *m, pool *p) -{ - struct mblock *b = SKIP_BACK(struct mblock, data, m); - rmove(b, p); -} - /** * mb_free - free a memory block @@ -448,39 +460,6 @@ mb_free(void *m) rfree(b); } -void * -alloc_page(pool *p) -{ - if (!p->pages) - { - p->pages = alloc_sys_page(); - p->pages->free = 0; - p->pages->used = 1; - } - else - p->pages->used++; - - if (p->pages->free) - { - void *ptr = p->pages->ptr[--p->pages->free]; - bzero(ptr, page_size); - return ptr; - } - else - return alloc_sys_page(); -} - -void -free_page(pool *p, void *ptr) -{ - ASSERT_DIE(p->pages); - p->pages->used--; - - if (p->pages->free >= POOL_PAGES_MAX) - return free_sys_page(ptr); - else - p->pages->ptr[p->pages->free++] = ptr; -} #define STEP_UP(x) ((x) + (x)/2 + 4) diff --git a/lib/resource.h b/lib/resource.h index 26030aea..4cedbf00 100644 --- a/lib/resource.h +++ b/lib/resource.h @@ -2,6 +2,7 @@ * BIRD Resource Manager * * (c) 1998--1999 Martin Mares <mj@ucw.cz> + * (c) 2021 Maria Matejka <mq@jmq.cz> * * Can be freely distributed and used under the terms of the GNU GPL. */ @@ -11,6 +12,11 @@ #include "lib/lists.h" +struct resmem { + size_t effective; /* Memory actually used for data storage */ + size_t overhead; /* Overhead memory imposed by allocator strategies */ +}; + /* Resource */ typedef struct resource { @@ -26,11 +32,11 @@ struct resclass { void (*free)(resource *); /* Freeing function */ void (*dump)(resource *); /* Dump to debug output */ resource *(*lookup)(resource *, unsigned long); /* Look up address (only for debugging) */ - size_t (*memsize)(resource *); /* Return size of memory used by the resource, may be NULL */ + struct resmem (*memsize)(resource *); /* Return size of memory used by the resource, may be NULL */ }; /* Estimate of system allocator overhead per item, for memory consumtion stats */ -#define ALLOC_OVERHEAD 8 +#define ALLOC_OVERHEAD 16 /* Generic resource manipulation */ @@ -38,9 +44,10 @@ typedef struct pool pool; void resource_init(void); pool *rp_new(pool *, const char *); /* Create new pool */ +pool *rp_newf(pool *, const char *, ...); /* Create a new pool with a formatted string as its name */ void rfree(void *); /* Free single resource */ void rdump(void *); /* Dump to debug output */ -size_t rmemsize(void *res); /* Return size of memory used by the resource */ +struct resmem rmemsize(void *res); /* Return size of memory used by the resource */ void rlookup(unsigned long); /* Look up address (only for debugging) */ void rmove(void *, pool *); /* Move to a different pool */ @@ -53,7 +60,6 @@ extern pool root_pool; void *mb_alloc(pool *, unsigned size); void *mb_allocz(pool *, unsigned size); void *mb_realloc(void *m, unsigned size); -void mb_move(void *, pool *); void mb_free(void *); /* Memory pools with linear allocation */ @@ -63,9 +69,10 @@ typedef struct linpool linpool; typedef struct lp_state { void *current, *large; byte *ptr; + uint total_large; } lp_state; -linpool *lp_new(pool *, unsigned blk); +linpool *lp_new(pool *); void *lp_alloc(linpool *, unsigned size); /* Aligned */ void *lp_allocu(linpool *, unsigned size); /* Unaligned */ void *lp_allocz(linpool *, unsigned size); /* With clear */ @@ -73,10 +80,23 @@ void lp_flush(linpool *); /* Free everything, but leave linpool */ void lp_save(linpool *m, lp_state *p); /* Save state */ void lp_restore(linpool *m, lp_state *p); /* Restore state */ -extern const int lp_chunk_size; -#define LP_GAS 1024 -#define LP_GOOD_SIZE(x) (((x + LP_GAS - 1) & (~(LP_GAS - 1))) - lp_chunk_size) -#define lp_new_default(p) lp_new(p, 0) +struct tmp_resources { + pool *pool, *parent; + linpool *lp; +}; + +extern _Thread_local struct tmp_resources tmp_res; + +#define tmp_linpool tmp_res.lp +#define tmp_alloc(sz) lp_alloc(tmp_linpool, sz) +#define tmp_allocu(sz) lp_allocu(tmp_linpool, sz) +#define tmp_allocz(sz) lp_allocz(tmp_linpool, sz) + +void tmp_init(pool *p); +void tmp_flush(void); + + +#define lp_new_default lp_new /* Slabs */ @@ -85,7 +105,7 @@ typedef struct slab slab; slab *sl_new(pool *, unsigned size); void *sl_alloc(slab *); void *sl_allocz(slab *); -void sl_free(slab *, void *); +void sl_free(void *); /* * Low-level memory allocation functions, please don't use @@ -94,12 +114,12 @@ void sl_free(slab *, void *); void buffer_realloc(void **buf, unsigned *size, unsigned need, unsigned item_size); -extern long page_size; - /* Allocator of whole pages; for use in slabs and other high-level allocators. */ -void *alloc_page(pool *); -void free_page(pool *, void *); -#define PAGE_HEAD(x) ((void *) (((intptr_t) (x)) & ~(page_size-1))) +extern long page_size; +void *alloc_page(void); +void free_page(void *); + +void resource_sys_init(void); #ifdef HAVE_LIBDMALLOC /* diff --git a/lib/route.h b/lib/route.h new file mode 100644 index 00000000..283a3760 --- /dev/null +++ b/lib/route.h @@ -0,0 +1,433 @@ +/* + * BIRD Internet Routing Daemon -- Routing data structures + * + * (c) 1998--2000 Martin Mares <mj@ucw.cz> + * (c) 2022 Maria Matejka <mq@jmq.cz> + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRD_LIB_ROUTE_H_ +#define _BIRD_LIB_ROUTE_H_ + +#include "lib/type.h" + +struct network; +struct proto; +struct cli; + + +typedef struct rte { + struct rta *attrs; /* Attributes of this route */ + const net_addr *net; /* Network this RTE belongs to */ + struct rte_src *src; /* Route source that created the route */ + struct rt_import_hook *sender; /* Import hook used to send the route to the routing table */ + btime lastmod; /* Last modified (set by table) */ + u32 id; /* Table specific route id */ + byte flags; /* Table-specific flags */ + byte pflags; /* Protocol-specific flags */ + u8 generation; /* If this route import is based on other previously exported route, + this value should be 1 + MAX(generation of the parent routes). + Otherwise the route is independent and this value is zero. */ +} rte; + +#define REF_FILTERED 2 /* Route is rejected by import filter */ +#define REF_STALE 4 /* Route is stale in a refresh cycle */ +#define REF_DISCARD 8 /* Route is scheduled for discard */ +#define REF_MODIFY 16 /* Route is scheduled for modify */ + +/* Route is valid for propagation (may depend on other flags in the future), accepts NULL */ +static inline int rte_is_valid(rte *r) { return r && !(r->flags & REF_FILTERED); } + +/* Route just has REF_FILTERED flag */ +static inline int rte_is_filtered(rte *r) { return !!(r->flags & REF_FILTERED); } + +struct rte_src { + struct rte_src *next; /* Hash chain */ + struct proto *proto; /* Protocol the source is based on */ + u32 private_id; /* Private ID, assigned by the protocol */ + u32 global_id; /* Globally unique ID of the source */ + unsigned uc; /* Use count */ +}; + + +struct rte_src *rt_find_source(struct proto *p, u32 id); +struct rte_src *rt_get_source(struct proto *p, u32 id); +static inline void rt_lock_source(struct rte_src *src) { src->uc++; } +static inline void rt_unlock_source(struct rte_src *src) { src->uc--; } +void rt_prune_sources(void); + +/* + * Route Attributes + * + * Beware: All standard BGP attributes must be represented here instead + * of making them local to the route. This is needed to ensure proper + * construction of BGP route attribute lists. + */ + +/* Nexthop structure */ +struct nexthop { + ip_addr gw; /* Next hop */ + struct iface *iface; /* Outgoing interface */ + byte flags; + byte weight; + byte labels; /* Number of all labels */ + u32 label[0]; +}; + +/* For packing one into eattrs */ +struct nexthop_adata { + struct adata ad; + /* There is either a set of nexthops or a special destination (RTD_*) */ + union { + struct nexthop nh; + uint dest; + }; +}; + +#define NEXTHOP_DEST_SIZE (OFFSETOF(struct nexthop_adata, dest) + sizeof(uint) - OFFSETOF(struct adata, data)) +#define NEXTHOP_DEST_LITERAL(x) ((struct nexthop_adata) { \ + .ad.length = NEXTHOP_DEST_SIZE, .dest = (x), }) + +#define RNF_ONLINK 0x1 /* Gateway is onlink regardless of IP ranges */ + + +typedef struct rta { + struct rta *next, **pprev; /* Hash chain */ + u32 uc; /* Use count */ + u32 hash_key; /* Hash over important fields */ + struct ea_list *eattrs; /* Extended Attribute chain */ + u16 cached:1; /* Are attributes cached? */ +} rta; + +#define RTS_STATIC 1 /* Normal static route */ +#define RTS_INHERIT 2 /* Route inherited from kernel */ +#define RTS_DEVICE 3 /* Device route */ +#define RTS_STATIC_DEVICE 4 /* Static device route */ +#define RTS_REDIRECT 5 /* Learned via redirect */ +#define RTS_RIP 6 /* RIP route */ +#define RTS_OSPF 7 /* OSPF route */ +#define RTS_OSPF_IA 8 /* OSPF inter-area route */ +#define RTS_OSPF_EXT1 9 /* OSPF external route type 1 */ +#define RTS_OSPF_EXT2 10 /* OSPF external route type 2 */ +#define RTS_BGP 11 /* BGP route */ +#define RTS_PIPE 12 /* Inter-table wormhole */ +#define RTS_BABEL 13 /* Babel route */ +#define RTS_RPKI 14 /* Route Origin Authorization */ +#define RTS_PERF 15 /* Perf checker */ +#define RTS_MAX 16 + +#define RTD_NONE 0 /* Undefined next hop */ +#define RTD_UNICAST 1 /* A standard next hop */ +#define RTD_BLACKHOLE 2 /* Silently drop packets */ +#define RTD_UNREACHABLE 3 /* Reject as unreachable */ +#define RTD_PROHIBIT 4 /* Administratively prohibited */ +#define RTD_MAX 5 + +extern const char * rta_dest_names[RTD_MAX]; + +static inline const char *rta_dest_name(uint n) +{ return (n < RTD_MAX) ? rta_dest_names[n] : "???"; } + + +/* + * Extended Route Attributes + */ + +typedef struct eattr { + word id; /* EA_CODE(PROTOCOL_..., protocol-dependent ID) */ + byte flags; /* Protocol-dependent flags */ + byte type; /* Attribute type */ + byte rfu:5; + byte originated:1; /* The attribute has originated locally */ + byte fresh:1; /* An uncached attribute (e.g. modified in export filter) */ + byte undef:1; /* Explicitly undefined */ + + PADDING(unused, 3, 3); + + union bval u; +} eattr; + + +#define EA_CODE_MASK 0xffff +#define EA_ALLOW_UNDEF 0x10000 /* ea_find: allow EAF_TYPE_UNDEF */ +#define EA_BIT(n) ((n) << 24) /* Used in bitfield accessors */ +#define EA_BIT_GET(ea) ((ea) >> 24) + +typedef struct ea_list { + struct ea_list *next; /* In case we have an override list */ + byte flags; /* Flags: EALF_... */ + byte rfu; + word count; /* Number of attributes */ + eattr attrs[0]; /* Attribute definitions themselves */ +} ea_list; + +#define EALF_SORTED 1 /* Attributes are sorted by code */ +#define EALF_BISECT 2 /* Use interval bisection for searching */ +#define EALF_CACHED 4 /* Attributes belonging to cached rta */ + +struct ea_class { +#define EA_CLASS_INSIDE \ + const char *name; /* Name (both print and filter) */ \ + struct symbol *sym; /* Symbol to export to configs */ \ + uint id; /* Autoassigned attribute ID */ \ + uint uc; /* Reference count */ \ + btype type; /* Data type ID */ \ + uint readonly:1; /* This attribute can't be changed by filters */ \ + uint conf:1; /* Requested by config */ \ + void (*format)(const eattr *ea, byte *buf, uint size); \ + void (*stored)(const eattr *ea); /* When stored into global hash */ \ + void (*freed)(const eattr *ea); /* When released from global hash */ \ + + EA_CLASS_INSIDE; +}; + +struct ea_class_ref { + resource r; + struct ea_class *class; +}; + +void ea_register_init(struct ea_class *); +struct ea_class_ref *ea_register_alloc(pool *, struct ea_class); + +#define EA_REGISTER_ALL_HELPER(x) ea_register_init(x); +#define EA_REGISTER_ALL(...) MACRO_FOREACH(EA_REGISTER_ALL_HELPER, __VA_ARGS__) + +struct ea_class *ea_class_find_by_id(uint id); +struct ea_class *ea_class_find_by_name(const char *name); +static inline struct ea_class *ea_class_self(struct ea_class *self) { return self; } +#define ea_class_find(_arg) _Generic((_arg), \ + uint: ea_class_find_by_id, \ + word: ea_class_find_by_id, \ + char *: ea_class_find_by_name, \ + const char *: ea_class_find_by_name, \ + struct ea_class *: ea_class_self)(_arg) + +struct ea_walk_state { + ea_list *eattrs; /* Ccurrent ea_list, initially set by caller */ + eattr *ea; /* Current eattr, initially NULL */ + u32 visited[4]; /* Bitfield, limiting max to 128 */ +}; + +#define ea_find(_l, _arg) _Generic((_arg), uint: ea_find_by_id, struct ea_class *: ea_find_by_class, char *: ea_find_by_name)(_l, _arg) +eattr *ea_find_by_id(ea_list *, unsigned ea); +static inline eattr *ea_find_by_class(ea_list *l, const struct ea_class *def) +{ return ea_find_by_id(l, def->id); } +static inline eattr *ea_find_by_name(ea_list *l, const char *name) +{ + const struct ea_class *def = ea_class_find_by_name(name); + return def ? ea_find_by_class(l, def) : NULL; +} + +#define ea_get_int(_l, _ident, _def) ({ \ + struct ea_class *cls = ea_class_find((_ident)); \ + ASSERT_DIE(cls->type & EAF_EMBEDDED); \ + const eattr *ea = ea_find((_l), cls->id); \ + (ea ? ea->u.data : (_def)); \ + }) + +#define ea_get_ip(_l, _ident, _def) ({ \ + struct ea_class *cls = ea_class_find((_ident)); \ + ASSERT_DIE(cls->type == T_IP); \ + const eattr *ea = ea_find((_l), cls->id); \ + (ea ? *((const ip_addr *) ea->u.ptr->data) : (_def)); \ + }) + +eattr *ea_walk(struct ea_walk_state *s, uint id, uint max); +void ea_dump(ea_list *); +int ea_same(ea_list *x, ea_list *y); /* Test whether two ea_lists are identical */ +uint ea_hash(ea_list *e); /* Calculate 16-bit hash value */ +ea_list *ea_append(ea_list *to, ea_list *what); +void ea_format_bitfield(const struct eattr *a, byte *buf, int bufsize, const char **names, int min, int max); + +/* Normalize ea_list; allocates the result from tmp_linpool */ +ea_list *ea_normalize(const ea_list *e); + +uint ea_list_size(ea_list *); +void ea_list_copy(ea_list *dest, ea_list *src, uint size); + +#define EA_LOCAL_LIST(N) struct { ea_list l; eattr a[N]; } + +#define EA_LITERAL_EMBEDDED(_class, _flags, _val) ({ \ + btype _type = (_class)->type; \ + ASSERT_DIE(_type & EAF_EMBEDDED); \ + EA_LITERAL_GENERIC((_class)->id, _type, _flags, .u.i = _val); \ + }) + +#define EA_LITERAL_STORE_ADATA(_class, _flags, _buf, _len) ({ \ + btype _type = (_class)->type; \ + ASSERT_DIE(!(_type & EAF_EMBEDDED)); \ + EA_LITERAL_GENERIC((_class)->id, _type, _flags, .u.ad = tmp_store_adata((_buf), (_len))); \ + }) + +#define EA_LITERAL_DIRECT_ADATA(_class, _flags, _adata) ({ \ + btype _type = (_class)->type; \ + ASSERT_DIE(!(_type & EAF_EMBEDDED)); \ + EA_LITERAL_GENERIC((_class)->id, _type, _flags, .u.ad = _adata); \ + }) + +#define EA_LITERAL_GENERIC(_id, _type, _flags, ...) \ + ((eattr) { .id = _id, .type = _type, .flags = _flags, __VA_ARGS__ }) + +static inline eattr * +ea_set_attr(ea_list **to, eattr a) +{ + EA_LOCAL_LIST(1) *ea = tmp_alloc(sizeof(*ea)); + *ea = (typeof(*ea)) { + .l.flags = EALF_SORTED, + .l.count = 1, + .l.next = *to, + .a[0] = a, + }; + + *to = &ea->l; + return &ea->a[0]; +} + +static inline void +ea_unset_attr(ea_list **to, _Bool local, const struct ea_class *def) +{ + ea_set_attr(to, EA_LITERAL_GENERIC(def->id, 0, 0, + .fresh = local, .originated = local, .undef = 1)); +} + +static inline void +ea_set_attr_u32(ea_list **to, const struct ea_class *def, uint flags, u64 data) +{ ea_set_attr(to, EA_LITERAL_EMBEDDED(def, flags, data)); } + +static inline void +ea_set_attr_data(ea_list **to, const struct ea_class *def, uint flags, const void *data, uint len) +{ ea_set_attr(to, EA_LITERAL_STORE_ADATA(def, flags, data, len)); } + +static inline void +ea_copy_attr(ea_list **to, ea_list *from, const struct ea_class *def) +{ + eattr *e = ea_find_by_class(from, def); + if (e) + if (e->type & EAF_EMBEDDED) + ea_set_attr_u32(to, def, e->flags, e->u.data); + else + ea_set_attr_data(to, def, e->flags, e->u.ptr->data, e->u.ptr->length); + else + ea_unset_attr(to, 0, def); +} + +/* + * Common route attributes + */ + +/* Preference: first-order comparison */ +extern struct ea_class ea_gen_preference; +static inline u32 rt_get_preference(rte *rt) +{ return ea_get_int(rt->attrs->eattrs, &ea_gen_preference, 0); } + +/* IGP metric: second-order comparison */ +extern struct ea_class ea_gen_igp_metric; +u32 rt_get_igp_metric(const rte *rt); +#define IGP_METRIC_UNKNOWN 0x80000000 /* Default igp_metric used when no other + protocol-specific metric is availabe */ + +/* From: Advertising router */ +extern struct ea_class ea_gen_from; + +/* Source: An old method to devise the route source protocol and kind. + * To be superseded in a near future by something more informative. */ +extern struct ea_class ea_gen_source; +static inline u32 rt_get_source_attr(const rte *rt) +{ return ea_get_int(rt->attrs->eattrs, &ea_gen_source, 0); } + +/* Flowspec validation result */ +enum flowspec_valid { + FLOWSPEC_UNKNOWN = 0, + FLOWSPEC_VALID = 1, + FLOWSPEC_INVALID = 2, + FLOWSPEC__MAX, +}; + +extern const char * flowspec_valid_names[FLOWSPEC__MAX]; +static inline const char *flowspec_valid_name(enum flowspec_valid v) +{ return (v < FLOWSPEC__MAX) ? flowspec_valid_names[v] : "???"; } + +extern struct ea_class ea_gen_flowspec_valid; +static inline enum flowspec_valid rt_get_flowspec_valid(rte *rt) +{ return ea_get_int(rt->attrs->eattrs, &ea_gen_flowspec_valid, FLOWSPEC_UNKNOWN); } + +/* Next hop: For now, stored as adata */ +extern struct ea_class ea_gen_nexthop; + +static inline void ea_set_dest(struct ea_list **to, uint flags, uint dest) +{ + struct nexthop_adata nhad = NEXTHOP_DEST_LITERAL(dest); + ea_set_attr_data(to, &ea_gen_nexthop, flags, &nhad.ad.data, nhad.ad.length); +} + +/* Next hop structures */ + +#define NEXTHOP_ALIGNMENT (_Alignof(struct nexthop)) +#define NEXTHOP_MAX_SIZE (sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK) +#define NEXTHOP_SIZE(_nh) NEXTHOP_SIZE_CNT(((_nh)->labels)) +#define NEXTHOP_SIZE_CNT(cnt) BIRD_ALIGN((sizeof(struct nexthop) + sizeof(u32) * (cnt)), NEXTHOP_ALIGNMENT) +#define nexthop_size(nh) NEXTHOP_SIZE((nh)) + +#define NEXTHOP_NEXT(_nh) ((void *) (_nh) + NEXTHOP_SIZE(_nh)) +#define NEXTHOP_END(_nhad) ((_nhad)->ad.data + (_nhad)->ad.length) +#define NEXTHOP_VALID(_nh, _nhad) ((void *) (_nh) < (void *) NEXTHOP_END(_nhad)) +#define NEXTHOP_ONE(_nhad) (NEXTHOP_NEXT(&(_nhad)->nh) == NEXTHOP_END(_nhad)) + +#define NEXTHOP_WALK(_iter, _nhad) for ( \ + struct nexthop *_iter = &(_nhad)->nh; \ + (void *) _iter < (void *) NEXTHOP_END(_nhad); \ + _iter = NEXTHOP_NEXT(_iter)) + + +static inline int nexthop_same(struct nexthop_adata *x, struct nexthop_adata *y) +{ return adata_same(&x->ad, &y->ad); } +struct nexthop_adata *nexthop_merge(struct nexthop_adata *x, struct nexthop_adata *y, int max, linpool *lp); +struct nexthop_adata *nexthop_sort(struct nexthop_adata *x, linpool *lp); +int nexthop_is_sorted(struct nexthop_adata *x); + +#define NEXTHOP_IS_REACHABLE(nhad) ((nhad)->ad.length > NEXTHOP_DEST_SIZE) + +/* Route has regular, reachable nexthop (i.e. not RTD_UNREACHABLE and like) */ +static inline int rte_is_reachable(rte *r) +{ + eattr *nhea = ea_find(r->attrs->eattrs, &ea_gen_nexthop); + if (!nhea) + return 0; + + struct nexthop_adata *nhad = (void *) nhea->u.ptr; + return NEXTHOP_IS_REACHABLE(nhad); +} + +static inline int nhea_dest(eattr *nhea) +{ + if (!nhea) + return RTD_NONE; + + struct nexthop_adata *nhad = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL; + if (NEXTHOP_IS_REACHABLE(nhad)) + return RTD_UNICAST; + else + return nhad->dest; +} + +static inline int rte_dest(const rte *r) +{ + return nhea_dest(ea_find(r->attrs->eattrs, &ea_gen_nexthop)); +} + +void rta_init(void); +#define rta_size(...) (sizeof(rta)) +#define RTA_MAX_SIZE (sizeof(rta)) +rta *rta_lookup(rta *); /* Get rta equivalent to this one, uc++ */ +static inline int rta_is_cached(rta *r) { return r->cached; } +static inline rta *rta_clone(rta *r) { r->uc++; return r; } +void rta__free(rta *r); +static inline void rta_free(rta *r) { if (r && !--r->uc) rta__free(r); } +rta *rta_do_cow(rta *o, linpool *lp); +static inline rta * rta_cow(rta *r, linpool *lp) { return rta_is_cached(r) ? rta_do_cow(r, lp) : r; } +void rta_dump(rta *); +void rta_dump_all(void); +void rta_show(struct cli *, rta *); + +#endif diff --git a/lib/slab.c b/lib/slab.c index 6348e29b..38d10626 100644 --- a/lib/slab.c +++ b/lib/slab.c @@ -32,6 +32,7 @@ #include "nest/bird.h" #include "lib/resource.h" #include "lib/string.h" +#include "lib/tlists.h" #undef FAKE_SLAB /* Turn on if you want to debug memory allocations */ @@ -42,7 +43,7 @@ static void slab_free(resource *r); static void slab_dump(resource *r); static resource *slab_lookup(resource *r, unsigned long addr); -static size_t slab_memsize(resource *r); +static struct resmem slab_memsize(resource *r); #ifdef FAKE_SLAB @@ -98,7 +99,7 @@ sl_allocz(slab *s) } void -sl_free(slab *s, void *oo) +sl_free(void *oo) { struct sl_obj *o = SKIP_BACK(struct sl_obj, data, oo); @@ -128,7 +129,7 @@ slab_dump(resource *r) debug("(%d objects per %d bytes)\n", cnt, s->size); } -static size_t +static struct resmem slab_memsize(resource *r) { slab *s = (slab *) r; @@ -138,7 +139,10 @@ slab_memsize(resource *r) WALK_LIST(o, s->objs) cnt++; - return ALLOC_OVERHEAD + sizeof(struct slab) + cnt * (ALLOC_OVERHEAD + s->size); + return (struct resmem) { + .effective = cnt * s->size, + .overhead = ALLOC_OVERHEAD + sizeof(struct slab) + cnt * ALLOC_OVERHEAD, + }; } @@ -150,12 +154,38 @@ slab_memsize(resource *r) #define MAX_EMPTY_HEADS 1 +enum sl_head_state { + slh_empty = 2, + slh_partial = 0, + slh_full = 1, +} PACKED; + +struct sl_head { + struct slab *slab; + TLIST_NODE(sl_head, struct sl_head) n; + u16 num_full; + enum sl_head_state state; + u32 used_bits[0]; +}; + +struct sl_alignment { /* Magic structure for testing of alignment */ + byte data; + int x[0]; +}; + +#define TLIST_PREFIX sl_head +#define TLIST_TYPE struct sl_head +#define TLIST_ITEM n +#define TLIST_WANT_WALK +#define TLIST_WANT_ADD_HEAD + +#include "lib/tlists.h" + struct slab { resource r; - pool *p; uint obj_size, head_size, head_bitfield_len; uint objs_per_slab, num_empty_heads, data_size; - list empty_heads, partial_heads, full_heads; + struct sl_head_list empty_heads, partial_heads, full_heads; }; static struct resclass sl_class = { @@ -167,18 +197,15 @@ static struct resclass sl_class = { slab_memsize }; -struct sl_head { - node n; - u32 num_full; - u32 used_bits[0]; -}; +#define SL_GET_HEAD(x) ((struct sl_head *) (((uintptr_t) (x)) & ~(page_size-1))) -struct sl_alignment { /* Magic structure for testing of alignment */ - byte data; - int x[0]; -}; +#define SL_HEAD_CHANGE_STATE(_s, _h, _from, _to) ({ \ + ASSERT_DIE(_h->state == slh_##_from); \ + sl_head_rem_node(&_s->_from##_heads, _h); \ + sl_head_add_head(&_s->_to##_heads, _h); \ + _h->state = slh_##_to; \ + }) -#define SL_GET_HEAD(x) ((struct sl_head *) PAGE_HEAD(x)) /** * sl_new - create a new Slab @@ -192,10 +219,9 @@ slab * sl_new(pool *p, uint size) { slab *s = ralloc(p, &sl_class); - s->p = p; uint align = sizeof(struct sl_alignment); - if (align < sizeof(int)) - align = sizeof(int); + if (align < sizeof(void *)) + align = sizeof(void *); s->data_size = size; size = (size + align - 1) / align * align; s->obj_size = size; @@ -216,9 +242,6 @@ sl_new(pool *p, uint size) bug("Slab: object too large"); s->num_empty_heads = 0; - init_list(&s->empty_heads); - init_list(&s->partial_heads); - init_list(&s->full_heads); return s; } @@ -235,8 +258,7 @@ sl_alloc(slab *s) struct sl_head *h; redo: - h = HEAD(s->partial_heads); - if (!h->n.next) + if (!(h = s->partial_heads.first)) goto no_partial; okay: for (uint i=0; i<s->head_bitfield_len; i++) @@ -256,26 +278,27 @@ okay: return out; } - rem_node(&h->n); - add_tail(&s->full_heads, &h->n); + SL_HEAD_CHANGE_STATE(s, h, partial, full); goto redo; no_partial: - h = HEAD(s->empty_heads); - if (h->n.next) + if (h = s->empty_heads.first) { - rem_node(&h->n); - add_head(&s->partial_heads, &h->n); + SL_HEAD_CHANGE_STATE(s, h, empty, partial); s->num_empty_heads--; goto okay; } - h = alloc_page(s->p); + + h = alloc_page(); + ASSERT_DIE(SL_GET_HEAD(h) == h); + #ifdef POISON memset(h, 0xba, page_size); #endif - ASSERT_DIE(SL_GET_HEAD(h) == h); + memset(h, 0, s->head_size); - add_head(&s->partial_heads, &h->n); + h->slab = s; + sl_head_add_head(&s->partial_heads, h); goto okay; } @@ -304,9 +327,10 @@ sl_allocz(slab *s) * and returns it back to the Slab @s. */ void -sl_free(slab *s, void *oo) +sl_free(void *oo) { struct sl_head *h = SL_GET_HEAD(oo); + struct slab *s = h->slab; #ifdef POISON memset(oo, 0xdb, s->data_size); @@ -319,24 +343,22 @@ sl_free(slab *s, void *oo) h->used_bits[pos / 32] &= ~(1 << (pos % 32)); - if (h->num_full-- == s->objs_per_slab) - { - rem_node(&h->n); - add_head(&s->partial_heads, &h->n); - } + if ((h->num_full-- == s->objs_per_slab) && (h->state == slh_full)) + SL_HEAD_CHANGE_STATE(s, h, full, partial); else if (!h->num_full) { - rem_node(&h->n); + sl_head_rem_node(&s->partial_heads, h); if (s->num_empty_heads >= MAX_EMPTY_HEADS) { #ifdef POISON memset(h, 0xde, page_size); #endif - free_page(s->p, h); + free_page(h); } else { - add_head(&s->empty_heads, &h->n); + sl_head_add_head(&s->empty_heads, h); + h->state = slh_empty; s->num_empty_heads++; } } @@ -346,14 +368,13 @@ static void slab_free(resource *r) { slab *s = (slab *) r; - struct sl_head *h, *g; - WALK_LIST_DELSAFE(h, g, s->empty_heads) - free_page(s->p, h); - WALK_LIST_DELSAFE(h, g, s->partial_heads) - free_page(s->p, h); - WALK_LIST_DELSAFE(h, g, s->full_heads) - free_page(s->p, h); + WALK_TLIST_DELSAFE(sl_head, h, &s->empty_heads) + free_page(h); + WALK_TLIST_DELSAFE(sl_head, h, &s->partial_heads) + free_page(h); + WALK_TLIST_DELSAFE(sl_head, h, &s->full_heads) + free_page(h); } static void @@ -361,45 +382,53 @@ slab_dump(resource *r) { slab *s = (slab *) r; int ec=0, pc=0, fc=0; - struct sl_head *h; - WALK_LIST(h, s->empty_heads) + WALK_TLIST(sl_head, h, &s->empty_heads) ec++; - WALK_LIST(h, s->partial_heads) + WALK_TLIST(sl_head, h, &s->partial_heads) pc++; - WALK_LIST(h, s->full_heads) + WALK_TLIST(sl_head, h, &s->full_heads) fc++; debug("(%de+%dp+%df blocks per %d objs per %d bytes)\n", ec, pc, fc, s->objs_per_slab, s->obj_size); } -static size_t +static struct resmem slab_memsize(resource *r) { slab *s = (slab *) r; size_t heads = 0; - struct sl_head *h; - WALK_LIST(h, s->empty_heads) - heads++; - WALK_LIST(h, s->partial_heads) - heads++; - WALK_LIST(h, s->full_heads) + WALK_TLIST(sl_head, h, &s->full_heads) heads++; -// return ALLOC_OVERHEAD + sizeof(struct slab) + heads * (ALLOC_OVERHEAD + page_size); - return ALLOC_OVERHEAD + sizeof(struct slab); /* The page sizes are accounted for in the pool */ + size_t items = heads * s->objs_per_slab; + + WALK_TLIST(sl_head, h, &s->partial_heads) + { + heads++; + items += h->num_full; + } + + WALK_TLIST(sl_head, h, &s->empty_heads) + heads++; + + size_t eff = items * s->data_size; + + return (struct resmem) { + .effective = eff, + .overhead = ALLOC_OVERHEAD + sizeof(struct slab) + heads * page_size - eff, + }; } static resource * slab_lookup(resource *r, unsigned long a) { slab *s = (slab *) r; - struct sl_head *h; - WALK_LIST(h, s->partial_heads) + WALK_TLIST(sl_head, h, &s->partial_heads) if ((unsigned long) h < a && (unsigned long) h + page_size < a) return r; - WALK_LIST(h, s->full_heads) + WALK_TLIST(sl_head, h, &s->full_heads) if ((unsigned long) h < a && (unsigned long) h + page_size < a) return r; return NULL; diff --git a/lib/slab_test.c b/lib/slab_test.c new file mode 100644 index 00000000..803d0215 --- /dev/null +++ b/lib/slab_test.c @@ -0,0 +1,171 @@ +/* + * BIRD Library -- Slab Alloc / Dealloc Tests + * + * (c) 2022 Maria Matejka <mq@jmq.cz> + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include "test/birdtest.h" +#include "lib/resource.h" +#include "lib/bitops.h" + +static const int sizes[] = { + 8, 12, 18, 27, 41, 75, 131, 269, +}; + +#define TEST_SIZE 1024 * 128 +#define ITEMS(sz) TEST_SIZE / ( (sz) >> u32_log2((sz))/2 ) + +struct test_request { + int size; + enum strategy { + TEST_NONE, + TEST_FORWARDS, + TEST_BACKWARDS, + TEST_RANDOM, + TEST_MIXED, + TEST__MAX, + } strategy; +}; + +const char * const strategy_name[TEST__MAX] = { + [TEST_FORWARDS] = "forwards", + [TEST_BACKWARDS] = "backwards", + [TEST_RANDOM] = "random", + [TEST_MIXED] = "mixed", +}; + +static inline byte *test_alloc(slab *s, int sz, struct resmem *sliz) +{ + byte *out = sl_alloc(s); + + for (int p=0; p < sz; p++) + out[p] = p & 0xff; + + struct resmem ns = rmemsize((resource *) s); + + bt_assert(sliz->effective + sz == ns.effective); + bt_assert((sliz->overhead - sz - ns.overhead) % page_size == 0); + + *sliz = ns; + + return out; +} + +static inline void test_free(slab *s, byte *block, int sz, struct resmem *sliz) +{ + for (int p=0; p < sz; p++) + { + bt_assert(block[p] == (p & 0xff)); + block[p]++; + } + + sl_free(block); + + struct resmem ns = rmemsize((resource *) s); + + bt_assert(sliz->effective - sz == ns.effective); + bt_assert((sliz->overhead + sz - ns.overhead) % page_size == 0); + + *sliz = ns; +} + +static inline struct resmem get_memsize(slab *s) +{ + struct resmem sz = rmemsize((resource *) s); + bt_assert(sz.effective == 0); + return sz; +} + +static int +t_slab(const void *data) +{ + const struct test_request *tr = data; + int sz = tr->size; + + slab *s = sl_new(&root_pool, sz); + struct resmem sliz = get_memsize(s); + + int n = ITEMS(sz); + byte **block = mb_alloc(&root_pool, n * sizeof(*block)); + + switch (tr->strategy) { + case TEST_FORWARDS: + for (int i = 0; i < n; i++) + block[i] = test_alloc(s, sz, &sliz); + + for (int i = 0; i < n; i++) + test_free(s, block[i], sz, &sliz); + + break; + + case TEST_BACKWARDS: + for (int i = 0; i < n; i++) + block[i] = test_alloc(s, sz, &sliz); + + for (int i = n - 1; i >= 0; i--) + test_free(s, block[i], sz, &sliz); + + break; + + case TEST_RANDOM: + for (int i = 0; i < n; i++) + block[i] = test_alloc(s, sz, &sliz); + + for (int i = 0; i < n; i++) + { + int pos = bt_random() % (n - i); + test_free(s, block[pos], sz, &sliz); + if (pos != n - i - 1) + block[pos] = block[n - i - 1]; + } + + break; + + case TEST_MIXED: + { + int cur = 0; + int pending = n; + + while (cur + pending > 0) { + int action = bt_random() % (cur + pending); + + if (action < cur) { + test_free(s, block[action], sz, &sliz); + if (action != --cur) + block[action] = block[cur]; + } else { + block[cur++] = test_alloc(s, sz, &sliz); + pending--; + } + } + + break; + } + + default: bug("This shouldn't happen"); + } + + mb_free(block); + return 1; +} +int main(int argc, char *argv[]) +{ + bt_init(argc, argv); + + struct test_request tr; + + for (uint i = 0; i < sizeof(sizes) / sizeof(*sizes); i++) + for (uint strategy = TEST_FORWARDS; strategy < TEST__MAX; strategy++) + { + tr = (struct test_request) { + .size = sizes[i], + .strategy = strategy, + }; + bt_test_suite_arg(t_slab, &tr, "Slab allocator test, size=%d, strategy=%s", + tr.size, strategy_name[strategy]); + } + + return bt_exit_value(); +} diff --git a/lib/socket.h b/lib/socket.h index 96fedeeb..0b6ac589 100644 --- a/lib/socket.h +++ b/lib/socket.h @@ -123,6 +123,7 @@ extern int sk_priority_control; /* Suggested priority for control traffic, shou #define SKF_TTL_RX 0x08 /* Report TTL / Hop Limit for RX packets */ #define SKF_BIND 0x10 /* Bind datagram socket to given source address */ #define SKF_HIGH_PORT 0x20 /* Choose port from high range if possible */ +#define SKF_FREEBIND 0x40 /* Allow socket to bind to a nonlocal address */ #define SKF_THREAD 0x100 /* Socked used in thread, Do not add to main loop */ #define SKF_TRUNCATED 0x200 /* Received packet was truncated, set by IO layer */ diff --git a/lib/string.h b/lib/string.h index 976b1c24..2829943d 100644 --- a/lib/string.h +++ b/lib/string.h @@ -20,6 +20,11 @@ int bvsprintf(char *str, const char *fmt, va_list args); int bsnprintf(char *str, int size, const char *fmt, ...); int bvsnprintf(char *str, int size, const char *fmt, va_list args); +char *mb_sprintf(pool *p, const char *fmt, ...); +char *mb_vsprintf(pool *p, const char *fmt, va_list args); +char *lp_sprintf(linpool *p, const char *fmt, ...); +char *lp_vsprintf(linpool *p, const char *fmt, va_list args); + int buffer_vprint(buffer *buf, const char *fmt, va_list args); int buffer_print(buffer *buf, const char *fmt, ...); void buffer_puts(buffer *buf, const char *str); diff --git a/lib/timer.c b/lib/timer.c index 381163d0..c47e0bbc 100644 --- a/lib/timer.c +++ b/lib/timer.c @@ -233,6 +233,7 @@ timers_fire(struct timeloop *loop) io_log_event(t->hook, t->data); t->hook(t); + tmp_flush(); } } diff --git a/lib/tlists.h b/lib/tlists.h new file mode 100644 index 00000000..e1ed79ea --- /dev/null +++ b/lib/tlists.h @@ -0,0 +1,172 @@ +/* + * BIRD Library -- Typed Linked Lists + * + * (c) 2022 Maria Matejka <mq@jmq.cz> + * + * Can be freely distributed and used under the terms of the GNU GPL. + * + * + * This implementation of linked lists forces its members to be + * typed. On the other hand, it needs to be implemented as ugly macros to + * keep the needed genericity. + * + * Usage: + * 1. Include this file + * 2. Define the node structure + * 3. For every list type you need to define: + * A. #define TLIST_PREFIX and other macros + * B. Include this file once again + * + * Macros to define: + * TLIST_PREFIX: prefix to prepend to everything generated + * TLIST_TYPE: the actual node type + * TLIST_ITEM: where the tlist structure is + * TLIST_WANT_WALK: if defined, generates a helper functions for list walking macros + * TLIST_WANT_ADD_HEAD: if defined, TLIST_PREFIX_add_head() is generated to + * add an item to the beginning of the list + * TLIST_WANT_ADD_TAIL: if defined, TLIST_PREFIX_add_tail() is generated to + * add an item to the end of the list + * + * TLIST_PREFIX_rem_node() is generated always. + * + * All these macros are #undef-ed by including this file. + * + * Example: + * + * #include "lib/tlists.h" + * + * struct foo { + * ... + * TLIST_NODE(bar, struct foo) baz; + * ... + * }; + * + * #define TLIST_PREFIX bar + * #define TLIST_TYPE struct foo + * #define TLIST_ITEM baz + * + * #define TLIST_WANT_WALK + * #define TLIST_WANT_ADD_HEAD + * + * #include "lib/tlists.h" + * + * ... + * (end of example) + * + */ + +#ifdef _BIRD_LIB_TLISTS_H_ +# ifdef TLIST_PREFIX + +/* Check for mandatory arguments */ +#ifndef TLIST_TYPE +#error "TLIST_TYPE must be defined" +#endif +#ifndef TLIST_ITEM +#error "TLIST_ITEM must be defined" +#endif +#ifndef TLIST_PREFIX +#error "TLIST_PREFIX must be defined" +#endif + +#define TLIST_NAME(x) MACRO_CONCAT_AFTER(TLIST_PREFIX,_##x) +#ifndef TLIST_LIST_STRUCT +#define TLIST_LIST_STRUCT TLIST_NAME(list) +#endif + +typedef struct TLIST_LIST_STRUCT { + TLIST_TYPE *first; + TLIST_TYPE *last; +} TLIST_LIST_STRUCT; + +#ifdef TLIST_WANT_WALK +static inline struct TLIST_NAME(node) * TLIST_NAME(node_get)(TLIST_TYPE *node) +{ return &(node->TLIST_ITEM); } +#endif + +#ifdef TLIST_WANT_ADD_HEAD +static inline void TLIST_NAME(add_head)(TLIST_LIST_STRUCT *list, TLIST_TYPE *node) +{ + ASSERT_DIE(!node->TLIST_ITEM.prev && !node->TLIST_ITEM.next); + if (node->TLIST_ITEM.next = list->first) + list->first->TLIST_ITEM.prev = node; + else + list->last = node; + list->first = node; +} +#endif + +#ifdef TLIST_WANT_ADD_TAIL +static inline void TLIST_NAME(add_tail)(TLIST_LIST_STRUCT *list, TLIST_TYPE *node) +{ + ASSERT_DIE(!node->TLIST_ITEM.prev && !node->TLIST_ITEM.next); + if (node->TLIST_ITEM.prev = list->last) + list->last->TLIST_ITEM.next = node; + else + list->first = node; + list->last = node; +} +#endif + +static inline void TLIST_NAME(rem_node)(TLIST_LIST_STRUCT *list, TLIST_TYPE *node) +{ + if (node->TLIST_ITEM.prev) + node->TLIST_ITEM.prev->TLIST_ITEM.next = node->TLIST_ITEM.next; + else + { + ASSERT_DIE(list->first == node); + list->first = node->TLIST_ITEM.next; + } + + if (node->TLIST_ITEM.next) + node->TLIST_ITEM.next->TLIST_ITEM.prev = node->TLIST_ITEM.prev; + else + { + ASSERT_DIE(list->last == node); + list->last = node->TLIST_ITEM.prev; + } + + node->TLIST_ITEM.next = node->TLIST_ITEM.prev = NULL; +} + +#undef TLIST_PREFIX +#undef TLIST_NAME +#undef TLIST_LIST_STRUCT +#undef TLIST_TYPE +#undef TLIST_ITEM +#undef TLIST_WANT_ADD_HEAD +#undef TLIST_WANT_ADD_TAIL + +# endif +#else +#define _BIRD_LIB_TLISTS_H_ + +#include "lib/macro.h" + +#if defined(TLIST_NAME) || defined(TLIST_PREFIX) +#error "You should first include lib/tlists.h without requesting a TLIST" +#endif + +#define TLIST_NODE(_name, _type) struct _name##_node { _type *next; _type *prev; } +#define TLIST_LIST(_name) struct _name##_list + +/* Use ->first and ->last to access HEAD and TAIL */ +#define THEAD(_name, _list) (_list)->first +#define TTAIL(_name, _list) (_list)->last + +/* Walkaround macros: simple and resilient to node removal */ +#define WALK_TLIST(_name, _node, _list) \ + for (typeof((_list)->first) _node = (_list)->first; \ + _node; _node = _name##_node_get((_node))->next) + +#define WALK_TLIST_DELSAFE(_name, _node, _list) \ + for (typeof((_list)->first) _node = (_list)->first, \ + _helper = _node ? _name##_node_get((_list)->first)->next : NULL; \ + _node; \ + (_node = _helper) ? (_helper = _name##_node_get(_helper)->next) : 0) + +/* Empty check */ +#define EMPTY_TLIST(_name, _list) (!(_list)->first) + +#endif + diff --git a/lib/type.h b/lib/type.h new file mode 100644 index 00000000..b54744c1 --- /dev/null +++ b/lib/type.h @@ -0,0 +1,112 @@ +/* + * BIRD Internet Routing Daemon -- Internal Data Types + * + * (c) 2022 Maria Matejka <mq@jmq.cz> + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRD_TYPE_H_ +#define _BIRD_TYPE_H_ + +#include "lib/birdlib.h" +#include "lib/attrs.h" + +union bval { +#define BVAL_ITEMS \ + struct { \ + u32 data; /* Integer type inherited from eattrs */ \ + PADDING(data, 0, 4); /* Must be padded on 64-bits */ \ + }; \ + struct { \ + u32 i; /* Integer type inherited from filters */ \ + PADDING(i, 0, 4); /* Must be padded on 64-bits */ \ + }; \ + const struct adata *ptr; /* Generic attribute data inherited from eattrs */ \ + const struct adata *ad; /* Generic attribute data inherited from filters */ \ + + BVAL_ITEMS; +}; + +union bval_long { + union bval bval; /* For direct assignments */ + BVAL_ITEMS; /* For item-wise access */ + + u64 ec; + lcomm lc; + ip_addr ip; + const net_addr *net; + const char *s; + const struct f_tree *t; + const struct f_trie *ti; + const struct f_path_mask *path_mask; + struct f_path_mask_item pmi; +}; + + +/* Internal types */ +enum btype { +/* Nothing. Simply nothing. */ + T_VOID = 0, + +/* Something but inaccessible. */ + T_OPAQUE = 0x02, /* Opaque byte string (not filterable) */ + T_IFACE = 0x0c, /* Pointer to an interface (inside adata) */ + T_NEXTHOP_LIST = 0x2c, /* The whole nexthop block */ + T_HOSTENTRY = 0x2e, /* Hostentry with possible MPLS labels */ + +/* Types shared with eattrs */ + T_INT = 0x01, /* 32-bit unsigned integer number */ + T_IP = 0x04, /* IP address */ + T_QUAD = 0x05, /* Router ID (IPv4 address) */ + T_PATH = 0x06, /* BGP AS path (encoding per RFC 1771:4.3) */ + T_CLIST = 0x0a, /* Set of u32's (e.g., a community list) */ + T_ECLIST = 0x0e, /* Set of pairs of u32's - ext. community list */ + T_LCLIST = 0x08, /* Set of triplets of u32's - large community list */ + + T_ENUM_BGP_ORIGIN = 0x11, /* BGP Origin enum */ + T_ENUM_RA_PREFERENCE = 0x13, /* RA Preference enum */ + T_ENUM_FLOWSPEC_VALID = 0x15, /* Flowspec validation result */ + +#define EAF_TYPE__MAX 0x1f +#define EAF_EMBEDDED 0x01 /* Data stored in eattr.u.data (part of type spec) */ + /* Otherwise, attribute data is adata */ + +/* Other user visible types which fit in int */ + T_BOOL = 0xa0, + T_PAIR = 0xa4, /* Notice that pair is stored as integer: first << 16 | second */ + +/* Put enumerational types in 0x20..0x3f range */ + T_ENUM_LO = 0x10, + T_ENUM_HI = 0x3f, + + T_ENUM_RTS = 0x31, + T_ENUM_SCOPE = 0x33, + T_ENUM_RTD = 0x37, + T_ENUM_ROA = 0x39, + T_ENUM_NETTYPE = 0x3b, + T_ENUM_AF = 0x3d, + +/* new enums go here */ + +#define T_ENUM T_ENUM_LO ... T_ENUM_HI + +/* Bigger ones */ + T_NET = 0xb0, + T_STRING = 0xb4, + T_PATH_MASK = 0xb8, /* mask for BGP path */ + T_EC = 0xbc, /* Extended community value, u64 */ + T_LC = 0xc0, /* Large community value, lcomm */ + T_RD = 0xc4, /* Route distinguisher for VPN addresses */ + T_PATH_MASK_ITEM = 0xc8, /* Path mask item for path mask constructors */ + + T_SET = 0x80, + T_PREFIX_SET = 0x84, +} PACKED; + +typedef enum btype btype; + +STATIC_ASSERT(sizeof(btype) == sizeof(byte)); + + +#endif diff --git a/lib/type_test.c b/lib/type_test.c new file mode 100644 index 00000000..b526db69 --- /dev/null +++ b/lib/type_test.c @@ -0,0 +1,79 @@ +/* + * BIRD Library -- Data Type Alignment Tests + * + * (c) 2022 Maria Matejka <mq@jmq.cz> + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include "test/birdtest.h" +#include "lib/type.h" +#include "lib/route.h" + +#define CHECK_ONE(val) \ + for (uint i=0; i<sizeof(val); i++) \ + bt_assert(((const u8 *) &val)[i] == (u8) ~0); + +#define SET_PADDING(val, name) \ + for (uint i=0; i<sizeof(val.PADDING_NAME(name)); i++) \ + val.PADDING_NAME(name)[i] = ~0; + + +static int +t_bval(void) +{ + union bval v; + + memset(&v, 0, sizeof(v)); + v.data = ~0; + SET_PADDING(v, data); + CHECK_ONE(v); + + memset(&v, 0, sizeof(v)); + v.i = ~0; + SET_PADDING(v, i); + CHECK_ONE(v); + + memset(&v, 0, sizeof(v)); + v.ptr = (void *) ~0; + CHECK_ONE(v); + + memset(&v, 0, sizeof(v)); + v.ad = (void *) ~0; + CHECK_ONE(v); + + return 1; +} + +static int +t_eattr(void) +{ + struct eattr e; + memset(&e, 0, sizeof(e)); + + e.id = ~0; + e.flags = ~0; + e.type = ~0; + e.rfu = ~0; + e.originated = ~0; + e.fresh = ~0; + e.undef = ~0; + memset(&e.u, ~0, sizeof(e.u)); /* Assumes t_bval passed */ + + SET_PADDING(e, unused); + + CHECK_ONE(e); + + return 1; +} + + +int main(int argc, char *argv[]) +{ + bt_init(argc, argv); + + bt_test_suite(t_bval, "Structure alignment test: bval"); + bt_test_suite(t_eattr, "Structure alignment test: eattr"); + + return bt_exit_value(); +} diff --git a/misc/bird.spec b/misc/bird.spec index 26b43011..425a6340 100644 --- a/misc/bird.spec +++ b/misc/bird.spec @@ -1,6 +1,6 @@ Summary: BIRD Internet Routing Daemon Name: bird -Version: 2.0.8 +Version: 2.0.9 Release: 1 Copyright: GPL Group: Networking/Daemons diff --git a/misc/docker/ubuntu-20.10-amd64/Dockerfile b/misc/docker/ubuntu-21.10-amd64/Dockerfile similarity index 85% rename from misc/docker/ubuntu-20.10-amd64/Dockerfile rename to misc/docker/ubuntu-21.10-amd64/Dockerfile index 19cb1b85..aa0987b6 100644 --- a/misc/docker/ubuntu-20.10-amd64/Dockerfile +++ b/misc/docker/ubuntu-21.10-amd64/Dockerfile @@ -1,9 +1,10 @@ -FROM ubuntu:20.10 +FROM ubuntu:21.10 ENV DEBIAN_FRONTEND=noninteractive RUN sed -i 's/deb.debian.org/ftp.cz.debian.org/' /etc/apt/sources.list RUN apt-get -y update RUN apt-get -y upgrade -RUN apt-get -y install \ +RUN apt-get -y --no-install-recommends install \ + tzdata \ build-essential \ flex \ bison \ diff --git a/nest/Makefile b/nest/Makefile index 884d3950..0350c3b6 100644 --- a/nest/Makefile +++ b/nest/Makefile @@ -1,8 +1,13 @@ -src := a-path.c a-set.c cli.c cmds.c iface.c locks.c neighbor.c password.c proto.c rt-attr.c rt-dev.c rt-fib.c rt-show.c rt-table.c +src := cli.c cmds.c iface.c locks.c neighbor.c password.c proto.c proto-build.c rt-attr.c rt-dev.c rt-fib.c rt-show.c rt-table.c obj := $(src-o-files) $(all-daemon) $(cf-local) +$(call proto-build,dev_build) -tests_src := a-set_test.c a-path_test.c +$(proto-build-c): $(lastword $(MAKEFILE_LIST)) + $(E)echo GEN $@ + $(Q)echo "#include \"lib/birdlib.h\"\n$(patsubst %,void %(void);\n,$(PROTO_BUILD)) void protos_build_gen(void) { $(patsubst %, %();\n,$(PROTO_BUILD))}" > $@ + +tests_src := tests_targets := $(tests_targets) $(tests-target-files) tests_objs := $(tests_objs) $(src-o-files) diff --git a/nest/bird.h b/nest/bird.h index 55712abe..931974a0 100644 --- a/nest/bird.h +++ b/nest/bird.h @@ -9,7 +9,6 @@ #ifndef _BIRD_BIRD_H_ #define _BIRD_BIRD_H_ -#include "sysdep/config.h" #include "lib/birdlib.h" #include "lib/ip.h" #include "lib/net.h" diff --git a/nest/cmds.c b/nest/cmds.c index 18f39eb5..092be48a 100644 --- a/nest/cmds.c +++ b/nest/cmds.c @@ -8,7 +8,7 @@ #include "nest/bird.h" #include "nest/protocol.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/cli.h" #include "conf/conf.h" #include "nest/cmds.h" @@ -51,47 +51,80 @@ cmd_show_symbols(struct sym_show_data *sd) cli_msg(1010, "%-8s\t%s", sd->sym->name, cf_symbol_class_name(sd->sym)); else { - HASH_WALK(config->sym_hash, next, sym) - { - if (!sym->scope->active) - continue; + for (const struct sym_scope *scope = config->root_scope; scope; scope = scope->next) + HASH_WALK(scope->hash, next, sym) + { + if (!sym->scope->active) + continue; - if (sd->type && (sym->class != sd->type)) - continue; + if (sd->type && (sym->class != sd->type)) + continue; - cli_msg(-1010, "%-8s\t%s", sym->name, cf_symbol_class_name(sym)); - } - HASH_WALK_END; + cli_msg(-1010, "%-8s\t%s", sym->name, cf_symbol_class_name(sym)); + } + HASH_WALK_END; cli_msg(0, ""); } } -static void -print_size(char *dsc, size_t val) +#define SIZE_SUFFIX " kMGT" +#define SIZE_FORMAT "% 4u.%1u % 1cB" +#define SIZE_ARGS(a) (a).val, (a).decimal, SIZE_SUFFIX[(a).magnitude] + +struct size_args { + u64 val:48; + u64 decimal:8; + u64 magnitude:8; +}; + +static struct size_args +get_size_args(u64 val) { - char *px = " kMG"; - int i = 0; - while ((val >= 10000) && (i < 3)) +#define VALDEC 10 /* One decimal place */ + val *= VALDEC; + + uint i = 0; + while ((val >= 10000 * VALDEC) && (i < 4)) { val = (val + 512) / 1024; i++; } - cli_msg(-1018, "%-17s %4u %cB", dsc, (unsigned) val, px[i]); + return (struct size_args) { + .val = (val / VALDEC), + .decimal = (val % VALDEC), + .magnitude = i, + }; +} + +static void +print_size(char *dsc, struct resmem vals) +{ + struct size_args effective = get_size_args(vals.effective); + struct size_args overhead = get_size_args(vals.overhead); + + cli_msg(-1018, "%-17s " SIZE_FORMAT " " SIZE_FORMAT, dsc, SIZE_ARGS(effective), SIZE_ARGS(overhead)); } extern pool *rt_table_pool; extern pool *rta_pool; +extern uint *pages_kept; void cmd_show_memory(void) { cli_msg(-1018, "BIRD memory usage"); + cli_msg(-1018, "%-17s Effective Overhead", ""); print_size("Routing tables:", rmemsize(rt_table_pool)); print_size("Route attributes:", rmemsize(rta_pool)); print_size("Protocols:", rmemsize(proto_pool)); - print_size("Total:", rmemsize(&root_pool)); + struct resmem total = rmemsize(&root_pool); +#ifdef HAVE_MMAP + print_size("Standby memory:", (struct resmem) { .overhead = page_size * *pages_kept }); + total.overhead += page_size * *pages_kept; +#endif + print_size("Total:", total); cli_msg(0, ""); } @@ -101,7 +134,7 @@ cmd_eval(const struct f_line *expr) buffer buf; LOG_BUFFER_INIT(buf); - if (f_eval_buf(expr, this_cli->parser_pool, &buf) > F_RETURN) + if (f_eval_buf(expr, &buf) > F_RETURN) { cli_msg(8008, "runtime error"); return; diff --git a/nest/config.Y b/nest/config.Y index ef3e27c0..7c68a09a 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -17,6 +17,7 @@ CF_HDR CF_DEFINES +static struct rtable_config *this_table; static struct proto_config *this_proto; static struct channel_config *this_channel; static struct iface_patt *this_ipatt; @@ -111,19 +112,20 @@ proto_postconfig(void) CF_DECLS -CF_KEYWORDS(ROUTER, ID, HOSTNAME, PROTOCOL, TEMPLATE, PREFERENCE, DISABLED, DEBUG, ALL, OFF, DIRECT) +CF_KEYWORDS(ROUTER, ID, HOSTNAME, PROTOCOL, TEMPLATE, PREFERENCE, DISABLED, DEBUG, ALL, OFF, DIRECT, PIPE) CF_KEYWORDS(INTERFACE, IMPORT, EXPORT, FILTER, NONE, VRF, DEFAULT, TABLE, STATES, ROUTES, FILTERS) CF_KEYWORDS(IPV4, IPV6, VPN4, VPN6, ROA4, ROA6, FLOW4, FLOW6, SADR, MPLS) CF_KEYWORDS(RECEIVE, LIMIT, ACTION, WARN, BLOCK, RESTART, DISABLE, KEEP, FILTERED, RPKI) CF_KEYWORDS(PASSWORD, KEY, FROM, PASSIVE, TO, ID, EVENTS, PACKETS, PROTOCOLS, CHANNELS, INTERFACES) CF_KEYWORDS(ALGORITHM, KEYED, HMAC, MD5, SHA1, SHA256, SHA384, SHA512, BLAKE2S128, BLAKE2S256, BLAKE2B256, BLAKE2B512) -CF_KEYWORDS(PRIMARY, STATS, COUNT, BY, FOR, COMMANDS, PREEXPORT, NOEXPORT, EXPORTED, GENERATE) -CF_KEYWORDS(BGP, PASSWORDS, DESCRIPTION, SORTED) -CF_KEYWORDS(RELOAD, IN, OUT, MRTDUMP, MESSAGES, RESTRICT, MEMORY, IGP_METRIC, CLASS, DSCP) +CF_KEYWORDS(PRIMARY, STATS, COUNT, FOR, IN, COMMANDS, PREEXPORT, NOEXPORT, EXPORTED, GENERATE) +CF_KEYWORDS(BGP, PASSWORDS, DESCRIPTION) +CF_KEYWORDS(RELOAD, IN, OUT, MRTDUMP, MESSAGES, RESTRICT, MEMORY, CLASS, DSCP) CF_KEYWORDS(TIMEFORMAT, ISO, SHORT, LONG, ROUTE, PROTOCOL, BASE, LOG, S, MS, US) -CF_KEYWORDS(GRACEFUL, RESTART, WAIT, MAX, FLUSH, AS) +CF_KEYWORDS(GRACEFUL, RESTART, WAIT, MAX, AS) CF_KEYWORDS(MIN, IDLE, RX, TX, INTERVAL, MULTIPLIER, PASSIVE) CF_KEYWORDS(CHECK, LINK) +CF_KEYWORDS(SORTED, TRIE, MIN, MAX, SETTLE, TIME) /* For r_args_channel */ CF_KEYWORDS(IPV4, IPV4_MC, IPV4_MPLS, IPV6, IPV6_MC, IPV6_MPLS, IPV6_SADR, VPN4, VPN4_MC, VPN4_MPLS, VPN6, VPN6_MC, VPN6_MPLS, ROA4, ROA6, FLOW4, FLOW6, MPLS, PRI, SEC) @@ -131,7 +133,7 @@ CF_KEYWORDS(IPV4, IPV4_MC, IPV4_MPLS, IPV6, IPV6_MC, IPV6_MPLS, IPV6_SADR, VPN4, CF_ENUM(T_ENUM_RTS, RTS_, STATIC, INHERIT, DEVICE, STATIC_DEVICE, REDIRECT, RIP, OSPF, OSPF_IA, OSPF_EXT1, OSPF_EXT2, BGP, PIPE, BABEL) CF_ENUM(T_ENUM_SCOPE, SCOPE_, HOST, LINK, SITE, ORGANIZATION, UNIVERSE, UNDEFINED) -CF_ENUM(T_ENUM_RTD, RTD_, UNICAST, BLACKHOLE, UNREACHABLE, PROHIBIT) +CF_ENUM(T_ENUM_RTD, RTD_, BLACKHOLE, UNREACHABLE, PROHIBIT) CF_ENUM(T_ENUM_ROA, ROA_, UNKNOWN, VALID, INVALID) CF_ENUM_PX(T_ENUM_AF, AF_, AFI_, IPV4, IPV6) @@ -141,7 +143,7 @@ CF_ENUM_PX(T_ENUM_AF, AF_, AFI_, IPV4, IPV6) %type <s> optproto %type <ra> r_args %type <sd> sym_args -%type <i> proto_start echo_mask echo_size debug_mask debug_list debug_flag mrtdump_mask mrtdump_list mrtdump_flag export_mode limit_action net_type table_sorted tos password_algorithm +%type <i> proto_start echo_mask echo_size debug_mask debug_list debug_flag mrtdump_mask mrtdump_list mrtdump_flag export_mode limit_action net_type tos password_algorithm %type <ps> proto_patt proto_patt2 %type <cc> channel_start proto_channel %type <cl> limit_spec @@ -206,16 +208,37 @@ CF_ENUM(T_ENUM_NETTYPE, NET_, IP4, IP6, VPN4, VPN6, ROA4, ROA6, FLOW4, FLOW6, IP conf: table ; -table_sorted: - { $$ = 0; } - | SORTED { $$ = 1; } +table: table_start table_sorted table_opt_list ; + +table_start: net_type TABLE symbol { + this_table = rt_new_table($3, $1); + } ; -table: net_type TABLE symbol table_sorted { - struct rtable_config *cf; - cf = rt_new_table($3, $1); - cf->sorted = $4; +table_sorted: + /* empty */ + | SORTED { this_table->sorted = 1; } + ; + +table_opt: + SORTED bool { this_table->sorted = $2; } + | TRIE bool { + if (!net_val_match(this_table->addr_type, NB_IP | NB_VPN | NB_ROA | NB_IP6_SADR)) + cf_error("Trie option not supported for %s table", net_label[this_table->addr_type]); + this_table->trie_used = $2; } + | MIN SETTLE TIME expr_us { this_table->min_settle_time = $4; } + | MAX SETTLE TIME expr_us { this_table->max_settle_time = $4; } + ; + +table_opts: + /* empty */ + | table_opts table_opt ';' + ; + +table_opt_list: + /* empty */ + | '{' table_opts '}' ; @@ -348,6 +371,7 @@ debug_default: DEBUG PROTOCOLS debug_mask { new_config->proto_default_debug = $3; } | DEBUG CHANNELS debug_mask { new_config->channel_default_debug = $3; } | DEBUG COMMANDS expr { new_config->cli_debug = $3; } + | DEBUG PIPE bool { new_config->pipe_debug = $3; } ; /* MRTDUMP PROTOCOLS is in systep/unix/config.Y */ @@ -615,20 +639,28 @@ r_args: $$ = cfg_allocz(sizeof(struct rt_show_data)); init_list(&($$->tables)); $$->filter = FILTER_ACCEPT; - $$->running_on_config = new_config->fallback; + $$->running_on_config = config; } | r_args net_any { $$ = $1; if ($$->addr) cf_error("Only one prefix expected"); $$->addr = $2; + $$->addr_mode = RSD_ADDR_EQUAL; } | r_args FOR r_args_for { $$ = $1; if ($$->addr) cf_error("Only one prefix expected"); - $$->show_for = 1; $$->addr = $3; + $$->addr_mode = RSD_ADDR_FOR; } - | r_args TABLE CF_SYM_KNOWN { + | r_args IN net_any { + $$ = $1; + if ($$->addr) cf_error("Only one prefix expected"); + if (!net_type_match($3, NB_IP)) cf_error("Only IP networks accepted for 'in' argument"); + $$->addr = $3; + $$->addr_mode = RSD_ADDR_IN; + } +| r_args TABLE symbol_known { cf_assert_symbol($3, SYM_TABLE); $$ = $1; rt_show_add_table($$, $3->table->table); @@ -673,7 +705,7 @@ r_args: $$ = $1; $$->filtered = 1; } - | r_args export_mode CF_SYM_KNOWN { + | r_args export_mode symbol_known { cf_assert_symbol($3, SYM_PROTO); struct proto_config *c = (struct proto_config *) $3->proto; $$ = $1; @@ -690,7 +722,7 @@ r_args: $$->export_channel = $3; $$->tables_defined_by = RSD_TDB_INDIRECT; } - | r_args PROTOCOL CF_SYM_KNOWN { + | r_args PROTOCOL symbol_known { cf_assert_symbol($3, SYM_PROTO); struct proto_config *c = (struct proto_config *) $3->proto; $$ = $1; @@ -819,8 +851,10 @@ CF_CLI(DUMP NEIGHBORS,,, [[Dump neighbor cache]]) { neigh_dump_all(); cli_msg(0, ""); } ; CF_CLI(DUMP ATTRIBUTES,,, [[Dump attribute cache]]) { rta_dump_all(); cli_msg(0, ""); } ; -CF_CLI(DUMP ROUTES,,, [[Dump routing table]]) +CF_CLI(DUMP ROUTES,,, [[Dump routes]]) { rt_dump_all(); cli_msg(0, ""); } ; +CF_CLI(DUMP TABLES,,, [[Dump table connections]]) +{ rt_dump_hooks_all(); cli_msg(0, ""); } ; CF_CLI(DUMP PROTOCOLS,,, [[Dump protocol information]]) { protos_dump_all(); cli_msg(0, ""); } ; CF_CLI(DUMP FILTER ALL,,, [[Dump all filters in linearized form]]) @@ -890,9 +924,6 @@ proto_patt2: | TEXT { $$.ptr = $1; $$.patt = 1; } ; -dynamic_attr: IGP_METRIC { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_GEN_IGP_METRIC); } ; - - CF_CODE CF_END diff --git a/nest/iface.c b/nest/iface.c index 83a633a3..682340c5 100644 --- a/nest/iface.c +++ b/nest/iface.c @@ -591,7 +591,7 @@ ifa_update(struct ifa *a) if (ipa_equal(b->brd, a->brd) && ipa_equal(b->opposite, a->opposite) && b->scope == a->scope && - !((b->flags ^ a->flags) & IA_PEER)) + !((b->flags ^ a->flags) & (IA_SECONDARY | IA_PEER | IA_HOST))) { b->flags |= IA_UPDATED; return b; diff --git a/nest/limit.h b/nest/limit.h new file mode 100644 index 00000000..5838ad3b --- /dev/null +++ b/nest/limit.h @@ -0,0 +1,49 @@ +/* + * BIRD Internet Routing Daemon -- Limits + * + * (c) 1998--2000 Martin Mares <mj@ucw.cz> + * (c) 2021 Maria Matejka <mq@jmq.cz> + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRD_LIMIT_H_ +#define _BIRD_LIMIT_H_ + +struct limit { + u32 max; + u32 count; + int (*action)(struct limit *, void *data); +}; + +static inline int limit_do_action(struct limit *l, void *data) +{ + return l->action ? l->action(l, data) : 1; +} + +static inline int limit_push(struct limit *l, void *data) +{ + if ((l->count >= l->max) && limit_do_action(l, data)) + return 1; + + l->count++; + return 0; +} + +static inline void limit_pop(struct limit *l) +{ + --l->count; +} + +static inline void limit_reset(struct limit *l) +{ + l->count = 0; +} + +static inline void limit_update(struct limit *l, void *data, u32 max) +{ + if (l->count > (l->max = max)) + limit_do_action(l, data); +} + +#endif diff --git a/nest/neighbor.c b/nest/neighbor.c index 1a31fb79..7cf9c85d 100644 --- a/nest/neighbor.c +++ b/nest/neighbor.c @@ -345,7 +345,7 @@ neigh_free(neighbor *n) { rem_node(&n->n); rem_node(&n->if_n); - sl_free(neigh_slab, n); + sl_free(n); } /** diff --git a/nest/proto.c b/nest/proto.c index 7cfb1555..77817888 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -16,17 +16,16 @@ #include "lib/timer.h" #include "lib/string.h" #include "conf/conf.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/iface.h" #include "nest/cli.h" #include "filter/filter.h" #include "filter/f-inst.h" pool *proto_pool; -list proto_list; +list STATIC_LIST_INIT(proto_list); -static list protocol_list; -struct protocol *class_to_protocol[PROTOCOL__MAX]; +static list STATIC_LIST_INIT(protocol_list); #define CD(c, msg, args...) ({ if (c->debug & D_STATES) log(L_TRACE "%s.%s: " msg, c->proto->name, c->name ?: "?", ## args); }) #define PD(p, msg, args...) ({ if (p->debug & D_STATES) log(L_TRACE "%s: " msg, p->name, ## args); }) @@ -43,8 +42,7 @@ static int graceful_restart_state; static u32 graceful_restart_locks; static char *p_states[] = { "DOWN", "START", "UP", "STOP" }; -static char *c_states[] = { "DOWN", "START", "UP", "FLUSHING" }; -static char *e_states[] = { "DOWN", "FEEDING", "READY" }; +static char *c_states[] = { "DOWN", "START", "UP", "STOP", "RESTART" }; extern struct protocol proto_unix_iface; @@ -52,15 +50,17 @@ static void channel_request_reload(struct channel *c); static void proto_shutdown_loop(timer *); static void proto_rethink_goal(struct proto *p); static char *proto_state_name(struct proto *p); -static void channel_verify_limits(struct channel *c); -static inline void channel_reset_limit(struct channel_limit *l); - +static void channel_init_limit(struct channel *c, struct limit *l, int dir, struct channel_limit *cf); +static void channel_update_limit(struct channel *c, struct limit *l, int dir, struct channel_limit *cf); +static void channel_reset_limit(struct channel *c, struct limit *l, int dir); +static void channel_feed_end(struct channel *c); +static void channel_export_stopped(struct rt_export_request *req); static inline int proto_is_done(struct proto *p) { return (p->proto_state == PS_DOWN) && (p->active_channels == 0); } static inline int channel_is_active(struct channel *c) -{ return (c->channel_state == CS_START) || (c->channel_state == CS_UP); } +{ return (c->channel_state != CS_DOWN); } static inline int channel_reloadable(struct channel *c) { return c->proto->reload_routes && c->reloadable; } @@ -68,10 +68,46 @@ static inline int channel_reloadable(struct channel *c) static inline void channel_log_state_change(struct channel *c) { - if (c->export_state) - CD(c, "State changed to %s/%s", c_states[c->channel_state], e_states[c->export_state]); - else - CD(c, "State changed to %s", c_states[c->channel_state]); + CD(c, "State changed to %s", c_states[c->channel_state]); +} + +void +channel_import_log_state_change(struct rt_import_request *req, u8 state) +{ + struct channel *c = SKIP_BACK(struct channel, in_req, req); + CD(c, "Channel import state changed to %s", rt_import_state_name(state)); +} + +void +channel_export_log_state_change(struct rt_export_request *req, u8 state) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); + CD(c, "Channel export state changed to %s", rt_export_state_name(state)); + + switch (state) + { + case TES_FEEDING: + if (c->proto->feed_begin) + c->proto->feed_begin(c, !c->refeeding); + break; + case TES_READY: + channel_feed_end(c); + break; + } +} + +static void +channel_dump_import_req(struct rt_import_request *req) +{ + struct channel *c = SKIP_BACK(struct channel, in_req, req); + debug(" Channel %s.%s import request %p\n", c->proto->name, c->name, req); +} + +static void +channel_dump_export_req(struct rt_export_request *req) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); + debug(" Channel %s.%s export request %p\n", c->proto->name, c->name, req); } static void @@ -141,6 +177,15 @@ proto_find_channel_by_name(struct proto *p, const char *n) return NULL; } +rte * channel_preimport(struct rt_import_request *req, rte *new, rte *old); + +void rt_notify_optimal(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe); +void rt_notify_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe); +void rt_feed_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe, rte **feed, uint count); +void rt_notify_accepted(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe, rte **feed, uint count); +void rt_notify_merged(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe, rte **feed, uint count); + + /** * proto_add_channel - connect protocol to a routing table * @p: protocol instance @@ -165,12 +210,14 @@ proto_add_channel(struct proto *p, struct channel_config *cf) c->channel = cf->channel; c->proto = p; c->table = cf->table->table; + rt_lock_table(c->table); c->in_filter = cf->in_filter; c->out_filter = cf->out_filter; - c->rx_limit = cf->rx_limit; - c->in_limit = cf->in_limit; - c->out_limit = cf->out_limit; + + channel_init_limit(c, &c->rx_limit, PLD_RX, &cf->rx_limit); + channel_init_limit(c, &c->in_limit, PLD_IN, &cf->in_limit); + channel_init_limit(c, &c->out_limit, PLD_OUT, &cf->out_limit); c->net_type = cf->net_type; c->ra_mode = cf->ra_mode; @@ -181,7 +228,6 @@ proto_add_channel(struct proto *p, struct channel_config *cf) c->rpki_reload = cf->rpki_reload; c->channel_state = CS_DOWN; - c->export_state = ES_DOWN; c->last_state_change = current_time(); c->reloadable = 1; @@ -203,6 +249,7 @@ proto_remove_channel(struct proto *p UNUSED, struct channel *c) CD(c, "Removed", c->name); + rt_unlock_table(c->table); rem_node(&c->n); mb_free(c); } @@ -223,7 +270,7 @@ proto_pause_channels(struct proto *p) struct channel *c; WALK_LIST(c, p->channels) if (!c->disabled && channel_is_active(c)) - channel_set_state(c, CS_START); + channel_set_state(c, CS_PAUSE); } static void @@ -232,7 +279,7 @@ proto_stop_channels(struct proto *p) struct channel *c; WALK_LIST(c, p->channels) if (!c->disabled && channel_is_active(c)) - channel_set_state(c, CS_FLUSHING); + channel_set_state(c, CS_STOP); } static void @@ -243,71 +290,6 @@ proto_remove_channels(struct proto *p) proto_remove_channel(p, c); } -static void -channel_schedule_feed(struct channel *c, int initial) -{ - // DBG("%s: Scheduling meal\n", p->name); - ASSERT(c->channel_state == CS_UP); - - c->export_state = ES_FEEDING; - c->refeeding = !initial; - - ev_schedule_work(c->feed_event); -} - -static void -channel_feed_loop(void *ptr) -{ - struct channel *c = ptr; - - if (c->export_state != ES_FEEDING) - return; - - /* Start feeding */ - if (!c->feed_active) - { - if (c->proto->feed_begin) - c->proto->feed_begin(c, !c->refeeding); - - c->refeed_pending = 0; - } - - // DBG("Feeding protocol %s continued\n", p->name); - if (!rt_feed_channel(c)) - { - ev_schedule_work(c->feed_event); - return; - } - - /* Reset export limit if the feed ended with acceptable number of exported routes */ - struct channel_limit *l = &c->out_limit; - if (c->refeeding && - (l->state == PLS_BLOCKED) && - (c->refeed_count <= l->limit) && - (c->stats.exp_routes <= l->limit)) - { - log(L_INFO "Protocol %s resets route export limit (%u)", c->proto->name, l->limit); - channel_reset_limit(&c->out_limit); - - /* Continue in feed - it will process routing table again from beginning */ - c->refeed_count = 0; - ev_schedule_work(c->feed_event); - return; - } - - // DBG("Feeding protocol %s finished\n", p->name); - c->export_state = ES_READY; - channel_log_state_change(c); - - if (c->proto->feed_end) - c->proto->feed_end(c); - - /* Restart feeding */ - if (c->refeed_pending) - channel_request_feeding(c); -} - - static void channel_roa_in_changed(struct rt_subscription *s) { @@ -326,14 +308,12 @@ static void channel_roa_out_changed(struct rt_subscription *s) { struct channel *c = s->data; - int active = (c->export_state == ES_FEEDING); + CD(c, "Feeding triggered by RPKI change"); - CD(c, "Feeding triggered by RPKI change%s", active ? " - already active" : ""); + c->refeed_pending = 1; - if (!active) - channel_request_feeding(c); - else - c->refeed_pending = 1; + if (c->out_req.hook) + rt_stop_export(&c->out_req, channel_export_stopped); } /* Temporary code, subscriptions should be changed to resources */ @@ -409,14 +389,8 @@ channel_roa_subscribe_filter(struct channel *c, int dir) { switch (fi->fi_code) { - case FI_ROA_CHECK_IMPLICIT: - tab = fi->i_FI_ROA_CHECK_IMPLICIT.rtc->table; - if (valid) channel_roa_subscribe(c, tab, dir); - found = 1; - break; - - case FI_ROA_CHECK_EXPLICIT: - tab = fi->i_FI_ROA_CHECK_EXPLICIT.rtc->table; + case FI_ROA_CHECK: + tab = fi->i_FI_ROA_CHECK.rtc->table; if (valid) channel_roa_subscribe(c, tab, dir); found = 1; break; @@ -445,32 +419,189 @@ channel_roa_unsubscribe_all(struct channel *c) } static void -channel_start_export(struct channel *c) +channel_start_import(struct channel *c) { - ASSERT(c->channel_state == CS_UP); - ASSERT(c->export_state == ES_DOWN); + if (c->in_req.hook) + { + log(L_WARN "%s.%s: Attempted to start channel's already started import", c->proto->name, c->name); + return; + } - channel_schedule_feed(c, 1); /* Sets ES_FEEDING */ + int nlen = strlen(c->name) + strlen(c->proto->name) + 2; + char *rn = mb_allocz(c->proto->pool, nlen); + bsprintf(rn, "%s.%s", c->proto->name, c->name); + + c->in_req = (struct rt_import_request) { + .name = rn, + .trace_routes = c->debug | c->proto->debug, + .dump_req = channel_dump_import_req, + .log_state_change = channel_import_log_state_change, + .preimport = channel_preimport, + .rte_modify = c->proto->rte_modify, + }; + + ASSERT(c->channel_state == CS_UP); + + channel_reset_limit(c, &c->rx_limit, PLD_RX); + channel_reset_limit(c, &c->in_limit, PLD_IN); + + memset(&c->import_stats, 0, sizeof(struct channel_import_stats)); + + DBG("%s.%s: Channel start import req=%p\n", c->proto->name, c->name, &c->in_req); + rt_request_import(c->table, &c->in_req); } static void -channel_stop_export(struct channel *c) +channel_start_export(struct channel *c) { - /* Need to abort feeding */ - if (c->export_state == ES_FEEDING) - rt_feed_channel_abort(c); + if (c->out_req.hook) + { + log(L_WARN "%s.%s: Attempted to start channel's already started export", c->proto->name, c->name); + return; + } - c->export_state = ES_DOWN; - c->stats.exp_routes = 0; - bmap_reset(&c->export_map, 1024); + ASSERT(c->channel_state == CS_UP); + int nlen = strlen(c->name) + strlen(c->proto->name) + 2; + char *rn = mb_allocz(c->proto->pool, nlen); + bsprintf(rn, "%s.%s", c->proto->name, c->name); + + c->out_req = (struct rt_export_request) { + .name = rn, + .trace_routes = c->debug | c->proto->debug, + .dump_req = channel_dump_export_req, + .log_state_change = channel_export_log_state_change, + }; + + bmap_init(&c->export_map, c->proto->pool, 1024); + bmap_init(&c->export_reject_map, c->proto->pool, 1024); + + channel_reset_limit(c, &c->out_limit, PLD_OUT); + + memset(&c->export_stats, 0, sizeof(struct channel_export_stats)); + + switch (c->ra_mode) { + case RA_OPTIMAL: + c->out_req.export_one = rt_notify_optimal; + break; + case RA_ANY: + c->out_req.export_one = rt_notify_any; + c->out_req.export_bulk = rt_feed_any; + break; + case RA_ACCEPTED: + c->out_req.export_bulk = rt_notify_accepted; + break; + case RA_MERGED: + c->out_req.export_bulk = rt_notify_merged; + break; + default: + bug("Unknown route announcement mode"); + } + + DBG("%s.%s: Channel start export req=%p\n", c->proto->name, c->name, &c->out_req); + rt_request_export(c->table, &c->out_req); } +static void +channel_check_stopped(struct channel *c) +{ + switch (c->channel_state) + { + case CS_STOP: + if (c->out_req.hook || c->in_req.hook) + return; + + channel_set_state(c, CS_DOWN); + ev_schedule(c->proto->event); + + break; + case CS_PAUSE: + if (c->out_req.hook) + return; + + channel_set_state(c, CS_START); + break; + default: + bug("Stopped channel in a bad state: %d", c->channel_state); + } + + DBG("%s.%s: Channel requests/hooks stopped (in state %s)\n", c->proto->name, c->name, c_states[c->channel_state]); +} + +void +channel_import_stopped(struct rt_import_request *req) +{ + struct channel *c = SKIP_BACK(struct channel, in_req, req); + + req->hook = NULL; + + if (c->in_table) + rt_prune_sync(c->in_table, 1); + + mb_free(c->in_req.name); + c->in_req.name = NULL; + + channel_check_stopped(c); +} + +static void +channel_export_stopped(struct rt_export_request *req) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); + + /* The hook has already stopped */ + req->hook = NULL; + + if (c->refeed_pending) + { + c->refeeding = 1; + c->refeed_pending = 0; + rt_request_export(c->table, req); + return; + } + + /* Free the routes from out_table */ + if (c->out_table) + rt_prune_sync(c->out_table, 1); + + mb_free(c->out_req.name); + c->out_req.name = NULL; + + channel_check_stopped(c); +} + +static void +channel_feed_end(struct channel *c) +{ + struct rt_export_request *req = &c->out_req; + + /* Reset export limit if the feed ended with acceptable number of exported routes */ + struct limit *l = &c->out_limit; + if (c->refeeding && + (c->limit_active & (1 << PLD_OUT)) && + (c->refeed_count <= l->max) && + (l->count <= l->max)) + { + log(L_INFO "Protocol %s resets route export limit (%u)", c->proto->name, l->max); + + c->refeed_pending = 1; + rt_stop_export(req, channel_export_stopped); + return; + } + + if (c->proto->feed_end) + c->proto->feed_end(c); + + if (c->refeed_pending) + rt_stop_export(req, channel_export_stopped); + else + c->refeeding = 0; +} /* Called by protocol for reload from in_table */ void channel_schedule_reload(struct channel *c) { - ASSERT(c->channel_state == CS_UP); + ASSERT(c->in_req.hook); rt_reload_channel_abort(c); ev_schedule_work(c->reload_event); @@ -496,23 +627,6 @@ channel_reload_loop(void *ptr) channel_request_reload(c); } -static void -channel_reset_import(struct channel *c) -{ - /* Need to abort feeding */ - ev_postpone(c->reload_event); - rt_reload_channel_abort(c); - - rt_prune_sync(c->in_table, 1); -} - -static void -channel_reset_export(struct channel *c) -{ - /* Just free the routes */ - rt_prune_sync(c->out_table, 1); -} - /* Called by protocol to activate in_table */ void channel_setup_in_table(struct channel *c) @@ -544,20 +658,11 @@ channel_setup_out_table(struct channel *c) static void channel_do_start(struct channel *c) { - rt_lock_table(c->table); - add_tail(&c->table->channels, &c->table_node); c->proto->active_channels++; - c->feed_event = ev_new_init(c->proto->pool, channel_feed_loop, c); - - bmap_init(&c->export_map, c->proto->pool, 1024); - memset(&c->stats, 0, sizeof(struct proto_stats)); - - channel_reset_limit(&c->rx_limit); - channel_reset_limit(&c->in_limit); - channel_reset_limit(&c->out_limit); - CALL(c->channel->start, c); + + channel_start_import(c); } static void @@ -572,9 +677,31 @@ channel_do_up(struct channel *c) } static void -channel_do_flush(struct channel *c) +channel_do_pause(struct channel *c) { - rt_schedule_prune(c->table); + /* Need to abort feeding */ + if (c->reload_event) + { + ev_postpone(c->reload_event); + rt_reload_channel_abort(c); + } + + /* Stop export */ + if (c->out_req.hook) + rt_stop_export(&c->out_req, channel_export_stopped); + + channel_roa_unsubscribe_all(c); + + bmap_free(&c->export_map); + bmap_free(&c->export_reject_map); +} + +static void +channel_do_stop(struct channel *c) +{ + /* Stop import */ + if (c->in_req.hook) + rt_stop_import(&c->in_req, channel_import_stopped); c->gr_wait = 0; if (c->gr_lock) @@ -583,28 +710,21 @@ channel_do_flush(struct channel *c) CALL(c->channel->shutdown, c); /* This have to be done in here, as channel pool is freed before channel_do_down() */ - bmap_free(&c->export_map); c->in_table = NULL; c->reload_event = NULL; c->out_table = NULL; - - channel_roa_unsubscribe_all(c); } static void channel_do_down(struct channel *c) { - ASSERT(!c->feed_active && !c->reload_active); + ASSERT(!c->reload_active); - rem_node(&c->table_node); - rt_unlock_table(c->table); c->proto->active_channels--; - if ((c->stats.imp_routes + c->stats.filt_routes) != 0) - log(L_ERR "%s: Channel %s is down but still has some routes", c->proto->name, c->name); - // bmap_free(&c->export_map); - memset(&c->stats, 0, sizeof(struct proto_stats)); + memset(&c->import_stats, 0, sizeof(struct channel_import_stats)); + memset(&c->export_stats, 0, sizeof(struct channel_export_stats)); c->in_table = NULL; c->reload_event = NULL; @@ -623,7 +743,6 @@ void channel_set_state(struct channel *c, uint state) { uint cs = c->channel_state; - uint es = c->export_state; DBG("%s reporting channel %s state transition %s -> %s\n", c->proto->name, c->name, c_states[cs], c_states[state]); if (state == cs) @@ -635,20 +754,11 @@ channel_set_state(struct channel *c, uint state) switch (state) { case CS_START: - ASSERT(cs == CS_DOWN || cs == CS_UP); + ASSERT(cs == CS_DOWN || cs == CS_PAUSE); if (cs == CS_DOWN) channel_do_start(c); - if (es != ES_DOWN) - channel_stop_export(c); - - if (c->in_table && (cs == CS_UP)) - channel_reset_import(c); - - if (c->out_table && (cs == CS_UP)) - channel_reset_export(c); - break; case CS_UP: @@ -663,23 +773,24 @@ channel_set_state(struct channel *c, uint state) channel_do_up(c); break; - case CS_FLUSHING: - ASSERT(cs == CS_START || cs == CS_UP); + case CS_PAUSE: + ASSERT(cs == CS_UP); - if (es != ES_DOWN) - channel_stop_export(c); + if (cs == CS_UP) + channel_do_pause(c); + break; - if (c->in_table && (cs == CS_UP)) - channel_reset_import(c); + case CS_STOP: + ASSERT(cs == CS_UP || cs == CS_START || cs == CS_PAUSE); - if (c->out_table && (cs == CS_UP)) - channel_reset_export(c); + if (cs == CS_UP) + channel_do_pause(c); - channel_do_flush(c); + channel_do_stop(c); break; case CS_DOWN: - ASSERT(cs == CS_FLUSHING); + ASSERT(cs == CS_STOP); channel_do_down(c); break; @@ -704,35 +815,16 @@ channel_set_state(struct channel *c, uint state) void channel_request_feeding(struct channel *c) { - ASSERT(c->channel_state == CS_UP); + ASSERT(c->out_req.hook); - CD(c, "Feeding requested"); - - /* Do nothing if we are still waiting for feeding */ - if (c->export_state == ES_DOWN) - return; - - /* If we are already feeding, we want to restart it */ - if (c->export_state == ES_FEEDING) - { - /* Unless feeding is in initial state */ - if (!c->feed_active) - return; - - rt_feed_channel_abort(c); - } - - /* Track number of exported routes during refeed */ - c->refeed_count = 0; - - channel_schedule_feed(c, 0); /* Sets ES_FEEDING */ - channel_log_state_change(c); + c->refeed_pending = 1; + rt_stop_export(&c->out_req, channel_export_stopped); } static void channel_request_reload(struct channel *c) { - ASSERT(c->channel_state == CS_UP); + ASSERT(c->in_req.hook); ASSERT(channel_reloadable(c)); CD(c, "Reload requested"); @@ -743,8 +835,8 @@ channel_request_reload(struct channel *c) * Should this be done before reload_routes() hook? * Perhaps, but routes are updated asynchronously. */ - channel_reset_limit(&c->rx_limit); - channel_reset_limit(&c->in_limit); + channel_reset_limit(c, &c->rx_limit, PLD_RX); + channel_reset_limit(c, &c->in_limit, PLD_IN); } const struct channel_class channel_basic = { @@ -847,19 +939,19 @@ channel_reconfigure(struct channel *c, struct channel_config *cf) /* Reconfigure channel fields */ c->in_filter = cf->in_filter; c->out_filter = cf->out_filter; - c->rx_limit = cf->rx_limit; - c->in_limit = cf->in_limit; - c->out_limit = cf->out_limit; + + channel_update_limit(c, &c->rx_limit, PLD_RX, &cf->rx_limit); + channel_update_limit(c, &c->in_limit, PLD_IN, &cf->in_limit); + channel_update_limit(c, &c->out_limit, PLD_OUT, &cf->out_limit); // c->ra_mode = cf->ra_mode; c->merge_limit = cf->merge_limit; c->preference = cf->preference; c->debug = cf->debug; + c->in_req.trace_routes = c->out_req.trace_routes = c->debug | c->proto->debug; c->in_keep_filtered = cf->in_keep_filtered; c->rpki_reload = cf->rpki_reload; - channel_verify_limits(c); - /* Execute channel-specific reconfigure hook */ if (c->channel->reconfigure && !c->channel->reconfigure(c, cf, &import_changed, &export_changed)) return 0; @@ -971,8 +1063,8 @@ proto_event(void *ptr) if (proto_is_done(p)) { - if (p->proto->cleanup) - p->proto->cleanup(p); + rfree(p->pool); + p->pool = NULL; p->active = 0; proto_log_state_change(p); @@ -1524,7 +1616,7 @@ graceful_restart_done(timer *t UNUSED) WALK_LIST(c, p->channels) { /* Resume postponed export of routes */ - if ((c->channel_state == CS_UP) && c->gr_wait && c->proto->rt_notify) + if ((c->channel_state == CS_UP) && c->gr_wait && p->rt_notify) channel_start_export(c); /* Cleanup */ @@ -1614,7 +1706,11 @@ protos_dump_all(void) struct proto *p; WALK_LIST(p, proto_list) { - debug(" protocol %s state %s\n", p->name, p_states[p->proto_state]); +#define DPF(x) (p->x ? " " #x : "") + debug(" protocol %s (%p) state %s with %d active channels flags: %s%s%s%s%s\n", + p->name, p, p_states[p->proto_state], p->active_channels, + DPF(disabled), DPF(active), DPF(do_start), DPF(do_stop), DPF(reconfiguring)); +#undef DPF struct channel *c; WALK_LIST(c, p->channels) @@ -1624,6 +1720,9 @@ protos_dump_all(void) debug("\tInput filter: %s\n", filter_name(c->in_filter)); if (c->out_filter) debug("\tOutput filter: %s\n", filter_name(c->out_filter)); + debug("\tChannel state: %s/%s/%s\n", c_states[c->channel_state], + c->in_req.hook ? rt_import_state_name(rt_import_get_state(c->in_req.hook)) : "-", + c->out_req.hook ? rt_export_state_name(rt_export_get_state(c->out_req.hook)) : "-"); } if (p->proto->dump && (p->proto_state != PS_DOWN)) @@ -1643,14 +1742,13 @@ void proto_build(struct protocol *p) { add_tail(&protocol_list, &p->n); - ASSERT(p->class); - ASSERT(!class_to_protocol[p->class]); - class_to_protocol[p->class] = p; } /* FIXME: convert this call to some protocol hook */ extern void bfd_init_all(void); +void protos_build_gen(void); + /** * protos_build - build a protocol list * @@ -1663,44 +1761,7 @@ extern void bfd_init_all(void); void protos_build(void) { - init_list(&proto_list); - init_list(&protocol_list); - - proto_build(&proto_device); -#ifdef CONFIG_RADV - proto_build(&proto_radv); -#endif -#ifdef CONFIG_RIP - proto_build(&proto_rip); -#endif -#ifdef CONFIG_STATIC - proto_build(&proto_static); -#endif -#ifdef CONFIG_MRT - proto_build(&proto_mrt); -#endif -#ifdef CONFIG_OSPF - proto_build(&proto_ospf); -#endif -#ifdef CONFIG_PIPE - proto_build(&proto_pipe); -#endif -#ifdef CONFIG_BGP - proto_build(&proto_bgp); -#endif -#ifdef CONFIG_BFD - proto_build(&proto_bfd); - bfd_init_all(); -#endif -#ifdef CONFIG_BABEL - proto_build(&proto_babel); -#endif -#ifdef CONFIG_RPKI - proto_build(&proto_rpki); -#endif -#ifdef CONFIG_PERF - proto_build(&proto_perf); -#endif + protos_build_gen(); proto_pool = rp_new(&root_pool, "Protocols"); proto_shutdown_timer = tm_new(proto_pool); @@ -1780,88 +1841,104 @@ proto_set_message(struct proto *p, char *msg, int len) } -static const char * -channel_limit_name(struct channel_limit *l) -{ - const char *actions[] = { - [PLA_WARN] = "warn", - [PLA_BLOCK] = "block", - [PLA_RESTART] = "restart", - [PLA_DISABLE] = "disable", - }; +static const char * channel_limit_name[] = { + [PLA_WARN] = "warn", + [PLA_BLOCK] = "block", + [PLA_RESTART] = "restart", + [PLA_DISABLE] = "disable", +}; - return actions[l->action]; -} -/** - * channel_notify_limit: notify about limit hit and take appropriate action - * @c: channel - * @l: limit being hit - * @dir: limit direction (PLD_*) - * @rt_count: the number of routes - * - * The function is called by the route processing core when limit @l - * is breached. It activates the limit and tooks appropriate action - * according to @l->action. - */ -void -channel_notify_limit(struct channel *c, struct channel_limit *l, int dir, u32 rt_count) +static void +channel_log_limit(struct channel *c, struct limit *l, int dir) { const char *dir_name[PLD_MAX] = { "receive", "import" , "export" }; - const byte dir_down[PLD_MAX] = { PDC_RX_LIMIT_HIT, PDC_IN_LIMIT_HIT, PDC_OUT_LIMIT_HIT }; - struct proto *p = c->proto; - - if (l->state == PLS_BLOCKED) - return; - - /* For warning action, we want the log message every time we hit the limit */ - if (!l->state || ((l->action == PLA_WARN) && (rt_count == l->limit))) - log(L_WARN "Protocol %s hits route %s limit (%d), action: %s", - p->name, dir_name[dir], l->limit, channel_limit_name(l)); - - switch (l->action) - { - case PLA_WARN: - l->state = PLS_ACTIVE; - break; - - case PLA_BLOCK: - l->state = PLS_BLOCKED; - break; - - case PLA_RESTART: - case PLA_DISABLE: - l->state = PLS_BLOCKED; - if (p->proto_state == PS_UP) - proto_schedule_down(p, l->action == PLA_RESTART, dir_down[dir]); - break; - } + log(L_WARN "Channel %s.%s hits route %s limit (%d), action: %s", + c->proto->name, c->name, dir_name[dir], l->max, channel_limit_name[c->limit_actions[dir]]); } static void -channel_verify_limits(struct channel *c) +channel_activate_limit(struct channel *c, struct limit *l, int dir) { - struct channel_limit *l; - u32 all_routes = c->stats.imp_routes + c->stats.filt_routes; + if (c->limit_active & (1 << dir)) + return; - l = &c->rx_limit; - if (l->action && (all_routes > l->limit)) - channel_notify_limit(c, l, PLD_RX, all_routes); - - l = &c->in_limit; - if (l->action && (c->stats.imp_routes > l->limit)) - channel_notify_limit(c, l, PLD_IN, c->stats.imp_routes); - - l = &c->out_limit; - if (l->action && (c->stats.exp_routes > l->limit)) - channel_notify_limit(c, l, PLD_OUT, c->stats.exp_routes); + c->limit_active |= (1 << dir); + channel_log_limit(c, l, dir); } -static inline void -channel_reset_limit(struct channel_limit *l) +static int +channel_limit_warn(struct limit *l, void *data) { - if (l->action) - l->state = PLS_INITIAL; + struct channel_limit_data *cld = data; + struct channel *c = cld->c; + int dir = cld->dir; + + channel_log_limit(c, l, dir); + + return 0; +} + +static int +channel_limit_block(struct limit *l, void *data) +{ + struct channel_limit_data *cld = data; + struct channel *c = cld->c; + int dir = cld->dir; + + channel_activate_limit(c, l, dir); + + return 1; +} + +static const byte chl_dir_down[PLD_MAX] = { PDC_RX_LIMIT_HIT, PDC_IN_LIMIT_HIT, PDC_OUT_LIMIT_HIT }; + +static int +channel_limit_down(struct limit *l, void *data) +{ + struct channel_limit_data *cld = data; + struct channel *c = cld->c; + struct proto *p = c->proto; + int dir = cld->dir; + + channel_activate_limit(c, l, dir); + + if (p->proto_state == PS_UP) + proto_schedule_down(p, c->limit_actions[dir] == PLA_RESTART, chl_dir_down[dir]); + + return 1; +} + +static int (*channel_limit_action[])(struct limit *, void *) = { + [PLA_NONE] = NULL, + [PLA_WARN] = channel_limit_warn, + [PLA_BLOCK] = channel_limit_block, + [PLA_RESTART] = channel_limit_down, + [PLA_DISABLE] = channel_limit_down, +}; + +static void +channel_update_limit(struct channel *c, struct limit *l, int dir, struct channel_limit *cf) +{ + l->action = channel_limit_action[cf->action]; + c->limit_actions[dir] = cf->action; + + struct channel_limit_data cld = { .c = c, .dir = dir }; + limit_update(l, &cld, cf->action ? cf->limit : ~((u32) 0)); +} + +static void +channel_init_limit(struct channel *c, struct limit *l, int dir, struct channel_limit *cf) +{ + channel_reset_limit(c, l, dir); + channel_update_limit(c, l, dir, cf); +} + +static void +channel_reset_limit(struct channel *c, struct limit *l, int dir) +{ + limit_reset(l); + c->limit_active &= ~(1 << dir); } static inline void @@ -1913,8 +1990,6 @@ proto_do_down(struct proto *p) { p->down_code = 0; neigh_prune(); - rfree(p->pool); - p->pool = NULL; /* Shutdown is finished in the protocol event */ if (proto_is_done(p)) @@ -2009,38 +2084,58 @@ proto_state_name(struct proto *p) static void channel_show_stats(struct channel *c) { - struct proto_stats *s = &c->stats; + struct channel_import_stats *ch_is = &c->import_stats; + struct channel_export_stats *ch_es = &c->export_stats; + struct rt_import_stats *rt_is = c->in_req.hook ? &c->in_req.hook->stats : NULL; + struct rt_export_stats *rt_es = c->out_req.hook ? &c->out_req.hook->stats : NULL; + +#define SON(ie, item) ((ie) ? (ie)->item : 0) +#define SCI(item) SON(ch_is, item) +#define SCE(item) SON(ch_es, item) +#define SRI(item) SON(rt_is, item) +#define SRE(item) SON(rt_es, item) + + u32 rx_routes = c->rx_limit.count; + u32 in_routes = c->in_limit.count; + u32 out_routes = c->out_limit.count; if (c->in_keep_filtered) cli_msg(-1006, " Routes: %u imported, %u filtered, %u exported, %u preferred", - s->imp_routes, s->filt_routes, s->exp_routes, s->pref_routes); + in_routes, (rx_routes - in_routes), out_routes, SRI(pref)); else cli_msg(-1006, " Routes: %u imported, %u exported, %u preferred", - s->imp_routes, s->exp_routes, s->pref_routes); + in_routes, out_routes, SRI(pref)); - cli_msg(-1006, " Route change stats: received rejected filtered ignored accepted"); - cli_msg(-1006, " Import updates: %10u %10u %10u %10u %10u", - s->imp_updates_received, s->imp_updates_invalid, - s->imp_updates_filtered, s->imp_updates_ignored, - s->imp_updates_accepted); - cli_msg(-1006, " Import withdraws: %10u %10u --- %10u %10u", - s->imp_withdraws_received, s->imp_withdraws_invalid, - s->imp_withdraws_ignored, s->imp_withdraws_accepted); - cli_msg(-1006, " Export updates: %10u %10u %10u --- %10u", - s->exp_updates_received, s->exp_updates_rejected, - s->exp_updates_filtered, s->exp_updates_accepted); - cli_msg(-1006, " Export withdraws: %10u --- --- --- %10u", - s->exp_withdraws_received, s->exp_withdraws_accepted); + cli_msg(-1006, " Route change stats: received rejected filtered ignored RX limit IN limit accepted"); + cli_msg(-1006, " Import updates: %10u %10u %10u %10u %10u %10u %10u", + SCI(updates_received), SCI(updates_invalid), + SCI(updates_filtered), SRI(updates_ignored), + SCI(updates_limited_rx), SCI(updates_limited_in), + SRI(updates_accepted)); + cli_msg(-1006, " Import withdraws: %10u %10u --- %10u --- %10u", + SCI(withdraws_received), SCI(withdraws_invalid), + SRI(withdraws_ignored), SRI(withdraws_accepted)); + cli_msg(-1006, " Export updates: %10u %10u %10u --- %10u %10u", + SRE(updates_received), SCE(updates_rejected), + SCE(updates_filtered), SCE(updates_limited), SCE(updates_accepted)); + cli_msg(-1006, " Export withdraws: %10u --- --- --- ---%10u", + SRE(withdraws_received), SCE(withdraws_accepted)); + +#undef SRI +#undef SRE +#undef SCI +#undef SCE +#undef SON } void -channel_show_limit(struct channel_limit *l, const char *dsc) +channel_show_limit(struct limit *l, const char *dsc, int active, int action) { if (!l->action) return; - cli_msg(-1006, " %-16s%d%s", dsc, l->limit, l->state ? " [HIT]" : ""); - cli_msg(-1006, " Action: %s", channel_limit_name(l)); + cli_msg(-1006, " %-16s%d%s", dsc, l->max, active ? " [HIT]" : ""); + cli_msg(-1006, " Action: %s", channel_limit_name[action]); } void @@ -2048,6 +2143,8 @@ channel_show_info(struct channel *c) { cli_msg(-1006, " Channel %s", c->name); cli_msg(-1006, " State: %s", c_states[c->channel_state]); + cli_msg(-1006, " Import state: %s", rt_import_state_name(rt_import_get_state(c->in_req.hook))); + cli_msg(-1006, " Export state: %s", rt_export_state_name(rt_export_get_state(c->out_req.hook))); cli_msg(-1006, " Table: %s", c->table->name); cli_msg(-1006, " Preference: %d", c->preference); cli_msg(-1006, " Input filter: %s", filter_name(c->in_filter)); @@ -2058,9 +2155,9 @@ channel_show_info(struct channel *c) c->gr_lock ? " pending" : "", c->gr_wait ? " waiting" : ""); - channel_show_limit(&c->rx_limit, "Receive limit:"); - channel_show_limit(&c->in_limit, "Import limit:"); - channel_show_limit(&c->out_limit, "Export limit:"); + channel_show_limit(&c->rx_limit, "Receive limit:", c->limit_active & (1 << PLD_RX), c->limit_actions[PLD_RX]); + channel_show_limit(&c->in_limit, "Import limit:", c->limit_active & (1 << PLD_IN), c->limit_actions[PLD_IN]); + channel_show_limit(&c->out_limit, "Export limit:", c->limit_active & (1 << PLD_OUT), c->limit_actions[PLD_OUT]); if (c->channel_state != CS_DOWN) channel_show_stats(c); diff --git a/nest/protocol.h b/nest/protocol.h index 80b4509b..aeb60ac6 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -12,7 +12,8 @@ #include "lib/lists.h" #include "lib/resource.h" #include "lib/event.h" -#include "nest/route.h" +#include "nest/rt.h" +#include "nest/limit.h" #include "conf/conf.h" struct iface; @@ -37,38 +38,20 @@ struct symbol; * Routing Protocol */ -enum protocol_class { - PROTOCOL_NONE, - PROTOCOL_BABEL, - PROTOCOL_BFD, - PROTOCOL_BGP, - PROTOCOL_DEVICE, - PROTOCOL_DIRECT, - PROTOCOL_KERNEL, - PROTOCOL_OSPF, - PROTOCOL_MRT, - PROTOCOL_PERF, - PROTOCOL_PIPE, - PROTOCOL_RADV, - PROTOCOL_RIP, - PROTOCOL_RPKI, - PROTOCOL_STATIC, - PROTOCOL__MAX -}; - -extern struct protocol *class_to_protocol[PROTOCOL__MAX]; struct protocol { node n; char *name; char *template; /* Template for automatic generation of names */ int name_counter; /* Counter for automatic name generation */ - enum protocol_class class; /* Machine readable protocol class */ uint preference; /* Default protocol preference */ uint channel_mask; /* Mask of accepted channel types (NB_*) */ uint proto_size; /* Size of protocol data structure */ uint config_size; /* Size of protocol config data structure */ + uint eattr_begin; /* First ID of registered eattrs */ + uint eattr_end; /* End of eattr id zone */ + void (*preconfig)(struct protocol *, struct config *); /* Just before configuring */ void (*postconfig)(struct proto_config *); /* After configuring each instance */ struct proto * (*init)(struct proto_config *); /* Create new instance */ @@ -76,16 +59,15 @@ struct protocol { void (*dump)(struct proto *); /* Debugging dump */ int (*start)(struct proto *); /* Start the instance */ int (*shutdown)(struct proto *); /* Stop the instance */ - void (*cleanup)(struct proto *); /* Called after shutdown when protocol became hungry/down */ void (*get_status)(struct proto *, byte *buf); /* Get instance status (for `show protocols' command) */ void (*get_route_info)(struct rte *, byte *buf); /* Get route information (for `show route' command) */ - int (*get_attr)(const struct eattr *, byte *buf, int buflen); /* ASCIIfy dynamic attribute (returns GA_*) */ +// int (*get_attr)(const struct eattr *, byte *buf, int buflen); /* ASCIIfy dynamic attribute (returns GA_*) */ void (*show_proto_info)(struct proto *); /* Show protocol info (for `show protocols all' command) */ void (*copy_config)(struct proto_config *, struct proto_config *); /* Copy config from given protocol instance */ }; -void protos_build(void); -void proto_build(struct protocol *); +void protos_build(void); /* Called from sysdep to initialize protocols */ +void proto_build(struct protocol *); /* Called from protocol to register itself */ void protos_preconfig(struct config *); void protos_commit(struct config *new, struct config *old, int force_restart, int type); struct proto * proto_spawn(struct proto_config *cf, uint disabled); @@ -132,31 +114,6 @@ struct proto_config { }; /* Protocol statistics */ -struct proto_stats { - /* Import - from protocol to core */ - u32 imp_routes; /* Number of routes successfully imported to the (adjacent) routing table */ - u32 filt_routes; /* Number of routes rejected in import filter but kept in the routing table */ - u32 pref_routes; /* Number of routes selected as best in the (adjacent) routing table */ - u32 imp_updates_received; /* Number of route updates received */ - u32 imp_updates_invalid; /* Number of route updates rejected as invalid */ - u32 imp_updates_filtered; /* Number of route updates rejected by filters */ - u32 imp_updates_ignored; /* Number of route updates rejected as already in route table */ - u32 imp_updates_accepted; /* Number of route updates accepted and imported */ - u32 imp_withdraws_received; /* Number of route withdraws received */ - u32 imp_withdraws_invalid; /* Number of route withdraws rejected as invalid */ - u32 imp_withdraws_ignored; /* Number of route withdraws rejected as already not in route table */ - u32 imp_withdraws_accepted; /* Number of route withdraws accepted and processed */ - - /* Export - from core to protocol */ - u32 exp_routes; /* Number of routes successfully exported to the protocol */ - u32 exp_updates_received; /* Number of route updates received */ - u32 exp_updates_rejected; /* Number of route updates rejected by protocol */ - u32 exp_updates_filtered; /* Number of route updates rejected by filters */ - u32 exp_updates_accepted; /* Number of route updates accepted and exported */ - u32 exp_withdraws_received; /* Number of route withdraws received */ - u32 exp_withdraws_accepted; /* Number of route withdraws accepted and processed */ -}; - struct proto { node n; /* Node in global proto_list */ struct protocol *proto; /* Protocol */ @@ -235,7 +192,7 @@ struct proto { struct rte *(*rte_modify)(struct rte *, struct linpool *); void (*rte_insert)(struct network *, struct rte *); void (*rte_remove)(struct network *, struct rte *); - u32 (*rte_igp_metric)(struct rte *); + u32 (*rte_igp_metric)(const struct rte *); /* Hic sunt protocol-specific data */ }; @@ -275,7 +232,7 @@ void channel_graceful_restart_unlock(struct channel *c); #define DEFAULT_GR_WAIT 240 -void channel_show_limit(struct channel_limit *l, const char *dsc); +void channel_show_limit(struct limit *l, const char *dsc, int active, int action); void channel_show_info(struct channel *c); void channel_cmd_debug(struct channel *c, uint mask); @@ -430,18 +387,29 @@ extern struct proto_config *cf_dev_proto; #define PLA_RESTART 4 /* Force protocol restart */ #define PLA_DISABLE 5 /* Shutdown and disable protocol */ -#define PLS_INITIAL 0 /* Initial limit state after protocol start */ -#define PLS_ACTIVE 1 /* Limit was hit */ -#define PLS_BLOCKED 2 /* Limit is active and blocking new routes */ - struct channel_limit { u32 limit; /* Maximum number of prefixes */ u8 action; /* Action to take (PLA_*) */ - u8 state; /* State of limit (PLS_*) */ }; -void channel_notify_limit(struct channel *c, struct channel_limit *l, int dir, u32 rt_count); +struct channel_limit_data { + struct channel *c; + int dir; +}; +#define CLP__RX(_c) (&(_c)->rx_limit) +#define CLP__IN(_c) (&(_c)->in_limit) +#define CLP__OUT(_c) (&(_c)->out_limit) + + +#if 0 +#define CHANNEL_LIMIT_LOG(_c, _dir, _op) log(L_TRACE "%s.%s: %s limit %s %u", (_c)->proto->name, (_c)->name, #_dir, _op, (CLP__##_dir(_c))->count) +#else +#define CHANNEL_LIMIT_LOG(_c, _dir, _op) +#endif + +#define CHANNEL_LIMIT_PUSH(_c, _dir) ({ CHANNEL_LIMIT_LOG(_c, _dir, "push from"); struct channel_limit_data cld = { .c = (_c), .dir = PLD_##_dir }; limit_push(CLP__##_dir(_c), &cld); }) +#define CHANNEL_LIMIT_POP(_c, _dir) ({ limit_pop(CLP__##_dir(_c)); CHANNEL_LIMIT_LOG(_c, _dir, "pop to"); }) /* * Channels @@ -484,6 +452,7 @@ struct channel_config { struct proto_config *parent; /* Where channel is defined (proto or template) */ struct rtable_config *table; /* Table we're attached to */ const struct filter *in_filter, *out_filter; /* Attached filters */ + struct channel_limit rx_limit; /* Limit for receiving routes from protocol (relevant when in_keep_filtered is active) */ struct channel_limit in_limit; /* Limit for importing routes from protocol */ @@ -500,7 +469,6 @@ struct channel_config { struct channel { node n; /* Node in proto->channels */ - node table_node; /* Node in table->channels */ const char *name; /* Channel name (may be NULL) */ const struct channel_class *channel; @@ -509,14 +477,39 @@ struct channel { struct rtable *table; const struct filter *in_filter; /* Input filter */ const struct filter *out_filter; /* Output filter */ - struct bmap export_map; /* Keeps track which routes passed export filter */ - struct channel_limit rx_limit; /* Receive limit (for in_keep_filtered) */ - struct channel_limit in_limit; /* Input limit */ - struct channel_limit out_limit; /* Output limit */ + struct bmap export_map; /* Keeps track which routes were really exported */ + struct bmap export_reject_map; /* Keeps track which routes were rejected by export filter */ + + struct limit rx_limit; /* Receive limit (for in_keep_filtered) */ + struct limit in_limit; /* Input limit */ + struct limit out_limit; /* Output limit */ + + u8 limit_actions[PLD_MAX]; /* Limit actions enum */ + u8 limit_active; /* Flags for active limits */ + + struct channel_import_stats { + /* Import - from protocol to core */ + u32 updates_received; /* Number of route updates received */ + u32 updates_invalid; /* Number of route updates rejected as invalid */ + u32 updates_filtered; /* Number of route updates rejected by filters */ + u32 updates_limited_rx; /* Number of route updates exceeding the rx_limit */ + u32 updates_limited_in; /* Number of route updates exceeding the in_limit */ + u32 withdraws_received; /* Number of route withdraws received */ + u32 withdraws_invalid; /* Number of route withdraws rejected as invalid */ + } import_stats; + + struct channel_export_stats { + /* Export - from core to protocol */ + u32 updates_rejected; /* Number of route updates rejected by protocol */ + u32 updates_filtered; /* Number of route updates rejected by filters */ + u32 updates_accepted; /* Number of route updates accepted and exported */ + u32 updates_limited; /* Number of route updates exceeding the out_limit */ + u32 withdraws_accepted; /* Number of route withdraws accepted and processed */ + } export_stats; + + struct rt_import_request in_req; /* Table import connection */ + struct rt_export_request out_req; /* Table export connection */ - struct event *feed_event; /* Event responsible for feeding */ - struct fib_iterator feed_fit; /* Routing table iterator used during feeding */ - struct proto_stats stats; /* Per-channel protocol statistics */ u32 refeed_count; /* Number of routes exported during refeed regardless of out_limit */ u8 net_type; /* Routing table network type (NET_*), 0 for undefined */ @@ -529,10 +522,7 @@ struct channel { u8 stale; /* Used in reconfiguration */ u8 channel_state; - u8 export_state; /* Route export state (ES_*, see below) */ - u8 feed_active; - u8 flush_active; - u8 refeeding; /* We are refeeding (valid only if export_state == ES_FEEDING) */ + u8 refeeding; /* Refeeding the channel. */ u8 reloadable; /* Hook reload_routes() is allowed on the channel */ u8 gr_lock; /* Graceful restart mechanism should wait for this channel */ u8 gr_wait; /* Route export to channel is postponed until graceful restart */ @@ -580,34 +570,34 @@ struct channel { * restricted by that and is on volition of the protocol. Generally, channels * are opened in protocols' start() hooks when going to PS_UP. * - * CS_FLUSHING - The transitional state between initialized channel and closed + * CS_STOP - The transitional state between initialized channel and closed * channel. The channel is still initialized, but no route exchange is allowed. * Instead, the associated table is running flush loop to remove routes imported * through the channel. After that, the channel changes state to CS_DOWN and * is detached from the table (the table is unlocked and the channel is unlinked - * from it). Unlike other states, the CS_FLUSHING state is not explicitly + * from it). Unlike other states, the CS_STOP state is not explicitly * entered or left by the protocol. A protocol may request to close a channel * (by calling channel_close()), which causes the channel to change state to - * CS_FLUSHING and later to CS_DOWN. Also note that channels are closed + * CS_STOP and later to CS_DOWN. Also note that channels are closed * automatically by the core when the protocol is going down. * + * CS_PAUSE - Almost the same as CS_STOP, just the table import is kept and + * the table export is stopped before transitioning to CS_START. + * * Allowed transitions: * * CS_DOWN -> CS_START / CS_UP - * CS_START -> CS_UP / CS_FLUSHING - * CS_UP -> CS_START / CS_FLUSHING - * CS_FLUSHING -> CS_DOWN (automatic) + * CS_START -> CS_UP / CS_STOP + * CS_UP -> CS_PAUSE / CS_STOP + * CS_PAUSE -> CS_START (automatic) + * CS_STOP -> CS_DOWN (automatic) */ #define CS_DOWN 0 #define CS_START 1 #define CS_UP 2 -#define CS_FLUSHING 3 - -#define ES_DOWN 0 -#define ES_FEEDING 1 -#define ES_READY 2 - +#define CS_STOP 3 +#define CS_PAUSE 4 struct channel_config *proto_cf_find_channel(struct proto_config *p, uint net_type); static inline struct channel_config *proto_cf_main_channel(struct proto_config *pc) @@ -625,7 +615,7 @@ void channel_schedule_reload(struct channel *c); static inline void channel_init(struct channel *c) { channel_set_state(c, CS_START); } static inline void channel_open(struct channel *c) { channel_set_state(c, CS_UP); } -static inline void channel_close(struct channel *c) { channel_set_state(c, CS_FLUSHING); } +static inline void channel_close(struct channel *c) { channel_set_state(c, CS_STOP); } void channel_request_feeding(struct channel *c); void *channel_config_new(const struct channel_class *cc, const char *name, uint net_type, struct proto_config *proto); diff --git a/nest/route.h b/nest/route.h deleted file mode 100644 index ade14857..00000000 --- a/nest/route.h +++ /dev/null @@ -1,727 +0,0 @@ -/* - * BIRD Internet Routing Daemon -- Routing Table - * - * (c) 1998--2000 Martin Mares <mj@ucw.cz> - * - * Can be freely distributed and used under the terms of the GNU GPL. - */ - -#ifndef _BIRD_ROUTE_H_ -#define _BIRD_ROUTE_H_ - -#include "lib/lists.h" -#include "lib/bitmap.h" -#include "lib/resource.h" -#include "lib/net.h" - -struct ea_list; -struct protocol; -struct proto; -struct rte_src; -struct symbol; -struct timer; -struct filter; -struct cli; - -/* - * Generic data structure for storing network prefixes. Also used - * for the master routing table. Currently implemented as a hash - * table. - * - * Available operations: - * - insertion of new entry - * - deletion of entry - * - searching for entry by network prefix - * - asynchronous retrieval of fib contents - */ - -struct fib_node { - struct fib_node *next; /* Next in hash chain */ - struct fib_iterator *readers; /* List of readers of this node */ - net_addr addr[0]; -}; - -struct fib_iterator { /* See lib/slists.h for an explanation */ - struct fib_iterator *prev, *next; /* Must be synced with struct fib_node! */ - byte efef; /* 0xff to distinguish between iterator and node */ - byte pad[3]; - struct fib_node *node; /* Or NULL if freshly merged */ - uint hash; -}; - -typedef void (*fib_init_fn)(void *); - -struct fib { - pool *fib_pool; /* Pool holding all our data */ - slab *fib_slab; /* Slab holding all fib nodes */ - struct fib_node **hash_table; /* Node hash table */ - uint hash_size; /* Number of hash table entries (a power of two) */ - uint hash_order; /* Binary logarithm of hash_size */ - uint hash_shift; /* 32 - hash_order */ - uint addr_type; /* Type of address data stored in fib (NET_*) */ - uint node_size; /* FIB node size, 0 for nonuniform */ - uint node_offset; /* Offset of fib_node struct inside of user data */ - uint entries; /* Number of entries */ - uint entries_min, entries_max; /* Entry count limits (else start rehashing) */ - fib_init_fn init; /* Constructor */ -}; - -static inline void * fib_node_to_user(struct fib *f, struct fib_node *e) -{ return e ? (void *) ((char *) e - f->node_offset) : NULL; } - -static inline struct fib_node * fib_user_to_node(struct fib *f, void *e) -{ return e ? (void *) ((char *) e + f->node_offset) : NULL; } - -void fib_init(struct fib *f, pool *p, uint addr_type, uint node_size, uint node_offset, uint hash_order, fib_init_fn init); -void *fib_find(struct fib *, const net_addr *); /* Find or return NULL if doesn't exist */ -void *fib_get_chain(struct fib *f, const net_addr *a); /* Find first node in linked list from hash table */ -void *fib_get(struct fib *, const net_addr *); /* Find or create new if nonexistent */ -void *fib_route(struct fib *, const net_addr *); /* Longest-match routing lookup */ -void fib_delete(struct fib *, void *); /* Remove fib entry */ -void fib_free(struct fib *); /* Destroy the fib */ -void fib_check(struct fib *); /* Consistency check for debugging */ - -void fit_init(struct fib_iterator *, struct fib *); /* Internal functions, don't call */ -struct fib_node *fit_get(struct fib *, struct fib_iterator *); -void fit_put(struct fib_iterator *, struct fib_node *); -void fit_put_next(struct fib *f, struct fib_iterator *i, struct fib_node *n, uint hpos); -void fit_put_end(struct fib_iterator *i); -void fit_copy(struct fib *f, struct fib_iterator *dst, struct fib_iterator *src); - - -#define FIB_WALK(fib, type, z) do { \ - struct fib_node *fn_, **ff_ = (fib)->hash_table; \ - uint count_ = (fib)->hash_size; \ - type *z; \ - while (count_--) \ - for (fn_ = *ff_++; z = fib_node_to_user(fib, fn_); fn_=fn_->next) - -#define FIB_WALK_END } while (0) - -#define FIB_ITERATE_INIT(it, fib) fit_init(it, fib) - -#define FIB_ITERATE_START(fib, it, type, z) do { \ - struct fib_node *fn_ = fit_get(fib, it); \ - uint count_ = (fib)->hash_size; \ - uint hpos_ = (it)->hash; \ - type *z; \ - for(;;) { \ - if (!fn_) \ - { \ - if (++hpos_ >= count_) \ - break; \ - fn_ = (fib)->hash_table[hpos_]; \ - continue; \ - } \ - z = fib_node_to_user(fib, fn_); - -#define FIB_ITERATE_END fn_ = fn_->next; } } while(0) - -#define FIB_ITERATE_PUT(it) fit_put(it, fn_) - -#define FIB_ITERATE_PUT_NEXT(it, fib) fit_put_next(fib, it, fn_, hpos_) - -#define FIB_ITERATE_PUT_END(it) fit_put_end(it) - -#define FIB_ITERATE_UNLINK(it, fib) fit_get(fib, it) - -#define FIB_ITERATE_COPY(dst, src, fib) fit_copy(fib, dst, src) - - -/* - * Master Routing Tables. Generally speaking, each of them contains a FIB - * with each entry pointing to a list of route entries representing routes - * to given network (with the selected one at the head). - * - * Each of the RTE's contains variable data (the preference and protocol-dependent - * metrics) and a pointer to a route attribute block common for many routes). - * - * It's guaranteed that there is at most one RTE for every (prefix,proto) pair. - */ - -struct rtable_config { - node n; - char *name; - struct rtable *table; - struct proto_config *krt_attached; /* Kernel syncer attached to this table */ - uint addr_type; /* Type of address data stored in table (NET_*) */ - int gc_max_ops; /* Maximum number of operations before GC is run */ - int gc_min_time; /* Minimum time between two consecutive GC runs */ - byte sorted; /* Routes of network are sorted according to rte_better() */ - byte internal; /* Internal table of a protocol */ - btime min_settle_time; /* Minimum settle time for notifications */ - btime max_settle_time; /* Maximum settle time for notifications */ -}; - -typedef struct rtable { - resource r; - node n; /* Node in list of all tables */ - pool *rp; /* Resource pool to allocate everything from, including itself */ - struct slab *rte_slab; /* Slab to allocate route objects */ - struct fib fib; - char *name; /* Name of this table */ - list channels; /* List of attached channels (struct channel) */ - uint addr_type; /* Type of address data stored in table (NET_*) */ - int pipe_busy; /* Pipe loop detection */ - int use_count; /* Number of protocols using this table */ - u32 rt_count; /* Number of routes in the table */ - - byte internal; /* Internal table of a protocol */ - - struct hmap id_map; - struct hostcache *hostcache; - struct rtable_config *config; /* Configuration of this table */ - struct config *deleted; /* Table doesn't exist in current configuration, - * delete as soon as use_count becomes 0 and remove - * obstacle from this routing table. - */ - struct event *rt_event; /* Routing table event */ - btime last_rt_change; /* Last time when route changed */ - btime base_settle_time; /* Start time of rtable settling interval */ - btime gc_time; /* Time of last GC */ - int gc_counter; /* Number of operations since last GC */ - byte prune_state; /* Table prune state, 1 -> scheduled, 2-> running */ - byte hcu_scheduled; /* Hostcache update is scheduled */ - byte nhu_state; /* Next Hop Update state */ - struct fib_iterator prune_fit; /* Rtable prune FIB iterator */ - struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */ - - list subscribers; /* Subscribers for notifications */ - struct timer *settle_timer; /* Settle time for notifications */ -} rtable; - -struct rt_subscription { - node n; - rtable *tab; - void (*hook)(struct rt_subscription *b); - void *data; -}; - -#define NHU_CLEAN 0 -#define NHU_SCHEDULED 1 -#define NHU_RUNNING 2 -#define NHU_DIRTY 3 - -typedef struct network { - struct rte_storage *routes; /* Available routes for this network */ - struct fib_node n; /* FIB flags reserved for kernel syncer */ -} net; - -struct hostcache { - slab *slab; /* Slab holding all hostentries */ - struct hostentry **hash_table; /* Hash table for hostentries */ - unsigned hash_order, hash_shift; - unsigned hash_max, hash_min; - unsigned hash_items; - linpool *lp; /* Linpool for trie */ - struct f_trie *trie; /* Trie of prefixes that might affect hostentries */ - list hostentries; /* List of all hostentries */ - byte update_hostcache; -}; - -struct hostentry { - node ln; - ip_addr addr; /* IP address of host, part of key */ - ip_addr link; /* (link-local) IP address of host, used as gw - if host is directly attached */ - struct rtable *tab; /* Dependent table, part of key */ - struct hostentry *next; /* Next in hash chain */ - unsigned hash_key; /* Hash key */ - unsigned uc; /* Use count */ - struct rta *src; /* Source rta entry */ - byte dest; /* Chosen route destination type (RTD_...) */ - byte nexthop_linkable; /* Nexthop list is completely non-device */ - u32 igp_metric; /* Chosen route IGP metric */ -}; - -typedef struct rte { - struct rta *attrs; /* Attributes of this route */ - const net_addr *net; /* Network this RTE belongs to */ - struct rte_src *src; /* Route source that created the route */ - struct channel *sender; /* Channel used to send the route to the routing table */ - btime lastmod; /* Last modified (set by table) */ - u32 id; /* Table specific route id */ - byte flags; /* Table-specific flags */ - byte pflags; /* Protocol-specific flags */ -} rte; - -struct rte_storage { - struct rte_storage *next; /* Next in chain */ - struct rte rte; /* Route data */ -}; - -#define RTE_COPY(r, l) ((r) ? (((*(l)) = (r)->rte), (l)) : NULL) -#define RTE_OR_NULL(r) ((r) ? &((r)->rte) : NULL) - -#define REF_FILTERED 2 /* Route is rejected by import filter */ -#define REF_STALE 4 /* Route is stale in a refresh cycle */ -#define REF_DISCARD 8 /* Route is scheduled for discard */ -#define REF_MODIFY 16 /* Route is scheduled for modify */ - -/* Route is valid for propagation (may depend on other flags in the future), accepts NULL */ -static inline int rte_is_valid(rte *r) { return r && !(r->flags & REF_FILTERED); } - -/* Route just has REF_FILTERED flag */ -static inline int rte_is_filtered(rte *r) { return !!(r->flags & REF_FILTERED); } - - -/* Types of route announcement, also used as flags */ -#define RA_UNDEF 0 /* Undefined RA type */ -#define RA_OPTIMAL 1 /* Announcement of optimal route change */ -#define RA_ACCEPTED 2 /* Announcement of first accepted route */ -#define RA_ANY 3 /* Announcement of any route change */ -#define RA_MERGED 4 /* Announcement of optimal route merged with next ones */ - -/* Return value of preexport() callback */ -#define RIC_ACCEPT 1 /* Accepted by protocol */ -#define RIC_PROCESS 0 /* Process it through import filter */ -#define RIC_REJECT -1 /* Rejected by protocol */ -#define RIC_DROP -2 /* Silently dropped by protocol */ - -/** - * rte_update - enter a new update to a routing table - * @c: channel doing the update - * @net: network address - * @rte: a &rte representing the new route - * @src: old route source identifier - * - * This function imports a new route to the appropriate table (via the channel). - * Table keys are @net (obligatory) and @rte->attrs->src. - * Both the @net and @rte pointers can be local. - * - * The route attributes (@rte->attrs) are obligatory. They can be also allocated - * locally. Anyway, if you use an already-cached attribute object, you shall - * call rta_clone() on that object yourself. (This semantics may change in future.) - * - * If the route attributes are local, you may set @rte->attrs->src to NULL, then - * the protocol's default route source will be supplied. - * - * When rte_update() gets a route, it automatically validates it. This includes - * checking for validity of the given network and next hop addresses and also - * checking for host-scope or link-scope routes. Then the import filters are - * processed and if accepted, the route is passed to route table recalculation. - * - * The accepted routes are then inserted into the table, replacing the old route - * for the same @net identified by @src. Then the route is announced - * to all the channels connected to the table using the standard export mechanism. - * Setting @rte to NULL makes this a withdraw, otherwise @rte->src must be the same - * as @src. - * - * All memory used for temporary allocations is taken from a special linpool - * @rte_update_pool and freed when rte_update() finishes. - */ -void rte_update(struct channel *c, const net_addr *net, struct rte *rte, struct rte_src *src); - -extern list routing_tables; -struct config; - -void rt_init(void); -void rt_preconfig(struct config *); -void rt_commit(struct config *new, struct config *old); -void rt_lock_table(rtable *); -void rt_unlock_table(rtable *); -void rt_subscribe(rtable *tab, struct rt_subscription *s); -void rt_unsubscribe(struct rt_subscription *s); -rtable *rt_setup(pool *, struct rtable_config *); -static inline void rt_shutdown(rtable *r) { rfree(r->rp); } - -static inline net *net_find(rtable *tab, const net_addr *addr) { return (net *) fib_find(&tab->fib, addr); } -static inline net *net_find_valid(rtable *tab, const net_addr *addr) -{ net *n = net_find(tab, addr); return (n && n->routes && rte_is_valid(&n->routes->rte)) ? n : NULL; } -static inline net *net_get(rtable *tab, const net_addr *addr) { return (net *) fib_get(&tab->fib, addr); } -void *net_route(rtable *tab, const net_addr *n); -int net_roa_check(rtable *tab, const net_addr *n, u32 asn); -int rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter); -rte *rt_export_merged(struct channel *c, net *net, linpool *pool, int silent); -void rt_refresh_begin(rtable *t, struct channel *c); -void rt_refresh_end(rtable *t, struct channel *c); -void rt_modify_stale(rtable *t, struct channel *c); -void rt_schedule_prune(rtable *t); -void rte_dump(struct rte_storage *); -void rte_free(struct rte_storage *, rtable *); -struct rte_storage *rte_store(const rte *, net *net, rtable *); -void rt_dump(rtable *); -void rt_dump_all(void); -int rt_feed_channel(struct channel *c); -void rt_feed_channel_abort(struct channel *c); -int rt_reload_channel(struct channel *c); -void rt_reload_channel_abort(struct channel *c); -void rt_prune_sync(rtable *t, int all); -int rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old, struct rte_storage **old_exported, int refeed); -struct rtable_config *rt_new_table(struct symbol *s, uint addr_type); - - -/* Default limit for ECMP next hops, defined in sysdep code */ -extern const int rt_default_ecmp; - -struct rt_show_data_rtable { - node n; - rtable *table; - struct channel *export_channel; -}; - -struct rt_show_data { - net_addr *addr; - list tables; - struct rt_show_data_rtable *tab; /* Iterator over table list */ - struct rt_show_data_rtable *last_table; /* Last table in output */ - struct fib_iterator fit; /* Iterator over networks in table */ - int verbose, tables_defined_by; - const struct filter *filter; - struct proto *show_protocol; - struct proto *export_protocol; - struct channel *export_channel; - struct config *running_on_config; - struct krt_proto *kernel; - int export_mode, primary_only, filtered, stats, show_for; - - int table_open; /* Iteration (fit) is open */ - int net_counter, rt_counter, show_counter, table_counter; - int net_counter_last, rt_counter_last, show_counter_last; -}; - -void rt_show(struct rt_show_data *); -struct rt_show_data_rtable * rt_show_add_table(struct rt_show_data *d, rtable *t); - -/* Value of table definition mode in struct rt_show_data */ -#define RSD_TDB_DEFAULT 0 /* no table specified */ -#define RSD_TDB_INDIRECT 0 /* show route ... protocol P ... */ -#define RSD_TDB_ALL RSD_TDB_SET /* show route ... table all ... */ -#define RSD_TDB_DIRECT RSD_TDB_SET | RSD_TDB_NMN /* show route ... table X table Y ... */ - -#define RSD_TDB_SET 0x1 /* internal: show empty tables */ -#define RSD_TDB_NMN 0x2 /* internal: need matching net */ - -/* Value of export_mode in struct rt_show_data */ -#define RSEM_NONE 0 /* Export mode not used */ -#define RSEM_PREEXPORT 1 /* Routes ready for export, before filtering */ -#define RSEM_EXPORT 2 /* Routes accepted by export filter */ -#define RSEM_NOEXPORT 3 /* Routes rejected by export filter */ -#define RSEM_EXPORTED 4 /* Routes marked in export map */ - -/* - * Route Attributes - * - * Beware: All standard BGP attributes must be represented here instead - * of making them local to the route. This is needed to ensure proper - * construction of BGP route attribute lists. - */ - -/* Nexthop structure */ -struct nexthop { - ip_addr gw; /* Next hop */ - struct iface *iface; /* Outgoing interface */ - struct nexthop *next; - byte flags; - byte weight; - byte labels_orig; /* Number of labels before hostentry was applied */ - byte labels; /* Number of all labels */ - u32 label[0]; -}; - -#define RNF_ONLINK 0x1 /* Gateway is onlink regardless of IP ranges */ - - -struct rte_src { - struct rte_src *next; /* Hash chain */ - struct proto *proto; /* Protocol the source is based on */ - u32 private_id; /* Private ID, assigned by the protocol */ - u32 global_id; /* Globally unique ID of the source */ - unsigned uc; /* Use count */ -}; - - -typedef struct rta { - struct rta *next, **pprev; /* Hash chain */ - u32 uc; /* Use count */ - u32 hash_key; /* Hash over important fields */ - struct ea_list *eattrs; /* Extended Attribute chain */ - struct hostentry *hostentry; /* Hostentry for recursive next-hops */ - ip_addr from; /* Advertising router */ - u32 igp_metric; /* IGP metric to next hop (for iBGP routes) */ - u16 cached:1; /* Are attributes cached? */ - u16 source:7; /* Route source (RTS_...) */ - u16 scope:4; /* Route scope (SCOPE_... -- see ip.h) */ - u16 dest:4; /* Route destination type (RTD_...) */ - word pref; - struct nexthop nh; /* Next hop */ -} rta; - -#define RTS_STATIC 1 /* Normal static route */ -#define RTS_INHERIT 2 /* Route inherited from kernel */ -#define RTS_DEVICE 3 /* Device route */ -#define RTS_STATIC_DEVICE 4 /* Static device route */ -#define RTS_REDIRECT 5 /* Learned via redirect */ -#define RTS_RIP 6 /* RIP route */ -#define RTS_OSPF 7 /* OSPF route */ -#define RTS_OSPF_IA 8 /* OSPF inter-area route */ -#define RTS_OSPF_EXT1 9 /* OSPF external route type 1 */ -#define RTS_OSPF_EXT2 10 /* OSPF external route type 2 */ -#define RTS_BGP 11 /* BGP route */ -#define RTS_PIPE 12 /* Inter-table wormhole */ -#define RTS_BABEL 13 /* Babel route */ -#define RTS_RPKI 14 /* Route Origin Authorization */ -#define RTS_PERF 15 /* Perf checker */ -#define RTS_MAX 16 - -#define RTD_NONE 0 /* Undefined next hop */ -#define RTD_UNICAST 1 /* Next hop is neighbor router */ -#define RTD_BLACKHOLE 2 /* Silently drop packets */ -#define RTD_UNREACHABLE 3 /* Reject as unreachable */ -#define RTD_PROHIBIT 4 /* Administratively prohibited */ -#define RTD_MAX 5 - -#define IGP_METRIC_UNKNOWN 0x80000000 /* Default igp_metric used when no other - protocol-specific metric is availabe */ - - -extern const char * rta_dest_names[RTD_MAX]; - -static inline const char *rta_dest_name(uint n) -{ return (n < RTD_MAX) ? rta_dest_names[n] : "???"; } - -/* Route has regular, reachable nexthop (i.e. not RTD_UNREACHABLE and like) */ -static inline int rte_is_reachable(rte *r) -{ return r->attrs->dest == RTD_UNICAST; } - - -/* - * Extended Route Attributes - */ - -typedef struct eattr { - word id; /* EA_CODE(PROTOCOL_..., protocol-dependent ID) */ - byte flags; /* Protocol-dependent flags */ - byte type; /* Attribute type and several flags (EAF_...) */ - union { - uintptr_t data; - const struct adata *ptr; /* Attribute data elsewhere */ - } u; -} eattr; - - -#define EA_CODE(proto,id) (((proto) << 8) | (id)) -#define EA_ID(ea) ((ea) & 0xff) -#define EA_PROTO(ea) ((ea) >> 8) -#define EA_CUSTOM(id) ((id) | EA_CUSTOM_BIT) -#define EA_IS_CUSTOM(ea) ((ea) & EA_CUSTOM_BIT) -#define EA_CUSTOM_ID(ea) ((ea) & ~EA_CUSTOM_BIT) - -const char *ea_custom_name(uint ea); - -#define EA_GEN_IGP_METRIC EA_CODE(PROTOCOL_NONE, 0) - -#define EA_CODE_MASK 0xffff -#define EA_CUSTOM_BIT 0x8000 -#define EA_ALLOW_UNDEF 0x10000 /* ea_find: allow EAF_TYPE_UNDEF */ -#define EA_BIT(n) ((n) << 24) /* Used in bitfield accessors */ -#define EA_BIT_GET(ea) ((ea) >> 24) - -#define EAF_TYPE_MASK 0x1f /* Mask with this to get type */ -#define EAF_TYPE_INT 0x01 /* 32-bit unsigned integer number */ -#define EAF_TYPE_OPAQUE 0x02 /* Opaque byte string (not filterable) */ -#define EAF_TYPE_IP_ADDRESS 0x04 /* IP address */ -#define EAF_TYPE_ROUTER_ID 0x05 /* Router ID (IPv4 address) */ -#define EAF_TYPE_AS_PATH 0x06 /* BGP AS path (encoding per RFC 1771:4.3) */ -#define EAF_TYPE_BITFIELD 0x09 /* 32-bit embedded bitfield */ -#define EAF_TYPE_INT_SET 0x0a /* Set of u32's (e.g., a community list) */ -#define EAF_TYPE_PTR 0x0d /* Pointer to an object */ -#define EAF_TYPE_EC_SET 0x0e /* Set of pairs of u32's - ext. community list */ -#define EAF_TYPE_LC_SET 0x12 /* Set of triplets of u32's - large community list */ -#define EAF_TYPE_UNDEF 0x1f /* `force undefined' entry */ -#define EAF_EMBEDDED 0x01 /* Data stored in eattr.u.data (part of type spec) */ -#define EAF_VAR_LENGTH 0x02 /* Attribute length is variable (part of type spec) */ -#define EAF_ORIGINATED 0x20 /* The attribute has originated locally */ -#define EAF_FRESH 0x40 /* An uncached attribute (e.g. modified in export filter) */ - -typedef struct adata { - uint length; /* Length of data */ - byte data[0]; -} adata; - -extern const adata null_adata; /* adata of length 0 */ - -static inline struct adata * -lp_alloc_adata(struct linpool *pool, uint len) -{ - struct adata *ad = lp_alloc(pool, sizeof(struct adata) + len); - ad->length = len; - return ad; -} - -static inline int adata_same(const struct adata *a, const struct adata *b) -{ return (a->length == b->length && !memcmp(a->data, b->data, a->length)); } - - -typedef struct ea_list { - struct ea_list *next; /* In case we have an override list */ - byte flags; /* Flags: EALF_... */ - byte rfu; - word count; /* Number of attributes */ - eattr attrs[0]; /* Attribute definitions themselves */ -} ea_list; - -#define EALF_SORTED 1 /* Attributes are sorted by code */ -#define EALF_BISECT 2 /* Use interval bisection for searching */ -#define EALF_CACHED 4 /* Attributes belonging to cached rta */ - -struct rte_src *rt_find_source(struct proto *p, u32 id); -struct rte_src *rt_get_source(struct proto *p, u32 id); -static inline void rt_lock_source(struct rte_src *src) { src->uc++; } -static inline void rt_unlock_source(struct rte_src *src) { src->uc--; } -void rt_prune_sources(void); - -struct ea_walk_state { - ea_list *eattrs; /* Ccurrent ea_list, initially set by caller */ - eattr *ea; /* Current eattr, initially NULL */ - u32 visited[4]; /* Bitfield, limiting max to 128 */ -}; - -eattr *ea_find(ea_list *, unsigned ea); -eattr *ea_walk(struct ea_walk_state *s, uint id, uint max); -uintptr_t ea_get_int(ea_list *, unsigned ea, uintptr_t def); -void ea_dump(ea_list *); -void ea_sort(ea_list *); /* Sort entries in all sub-lists */ -unsigned ea_scan(ea_list *); /* How many bytes do we need for merged ea_list */ -void ea_merge(ea_list *from, ea_list *to); /* Merge sub-lists to allocated buffer */ -int ea_same(ea_list *x, ea_list *y); /* Test whether two ea_lists are identical */ -uint ea_hash(ea_list *e); /* Calculate 16-bit hash value */ -ea_list *ea_append(ea_list *to, ea_list *what); -void ea_format_bitfield(const struct eattr *a, byte *buf, int bufsize, const char **names, int min, int max); - -#define ea_normalize(ea) do { \ - if (ea->next) { \ - ea_list *t = alloca(ea_scan(ea)); \ - ea_merge(ea, t); \ - ea = t; \ - } \ - ea_sort(ea); \ - if (ea->count == 0) \ - ea = NULL; \ -} while(0) \ - -static inline eattr * -ea_set_attr(ea_list **to, struct linpool *pool, uint id, uint flags, uint type, uintptr_t val) -{ - ea_list *a = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr)); - eattr *e = &a->attrs[0]; - - a->flags = EALF_SORTED; - a->count = 1; - a->next = *to; - *to = a; - - e->id = id; - e->type = type; - e->flags = flags; - - if (type & EAF_EMBEDDED) - e->u.data = (u32) val; - else - e->u.ptr = (struct adata *) val; - - return e; -} - -static inline void -ea_set_attr_u32(ea_list **to, struct linpool *pool, uint id, uint flags, uint type, u32 val) -{ ea_set_attr(to, pool, id, flags, type, (uintptr_t) val); } - -static inline void -ea_set_attr_ptr(ea_list **to, struct linpool *pool, uint id, uint flags, uint type, struct adata *val) -{ ea_set_attr(to, pool, id, flags, type, (uintptr_t) val); } - -static inline void -ea_set_attr_data(ea_list **to, struct linpool *pool, uint id, uint flags, uint type, void *data, uint len) -{ - struct adata *a = lp_alloc_adata(pool, len); - memcpy(a->data, data, len); - ea_set_attr(to, pool, id, flags, type, (uintptr_t) a); -} - - -#define NEXTHOP_MAX_SIZE (sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK) - -static inline size_t nexthop_size(const struct nexthop *nh) -{ return sizeof(struct nexthop) + sizeof(u32)*nh->labels; } -int nexthop__same(struct nexthop *x, struct nexthop *y); /* Compare multipath nexthops */ -static inline int nexthop_same(struct nexthop *x, struct nexthop *y) -{ return (x == y) || nexthop__same(x, y); } -struct nexthop *nexthop_merge(struct nexthop *x, struct nexthop *y, int rx, int ry, int max, linpool *lp); -struct nexthop *nexthop_sort(struct nexthop *x); -static inline void nexthop_link(struct rta *a, struct nexthop *from) -{ memcpy(&a->nh, from, nexthop_size(from)); } -void nexthop_insert(struct nexthop **n, struct nexthop *y); -int nexthop_is_sorted(struct nexthop *x); - -void rta_init(void); -static inline size_t rta_size(const rta *a) { return sizeof(rta) + sizeof(u32)*a->nh.labels; } -#define RTA_MAX_SIZE (sizeof(rta) + sizeof(u32)*MPLS_MAX_LABEL_STACK) -rta *rta_lookup(rta *); /* Get rta equivalent to this one, uc++ */ -static inline int rta_is_cached(rta *r) { return r->cached; } -static inline rta *rta_clone(rta *r) { r->uc++; return r; } -void rta__free(rta *r); -static inline void rta_free(rta *r) { if (r && !--r->uc) rta__free(r); } -rta *rta_do_cow(rta *o, linpool *lp); -static inline rta * rta_cow(rta *r, linpool *lp) { return rta_is_cached(r) ? rta_do_cow(r, lp) : r; } -void rta_dump(rta *); -void rta_dump_all(void); -void rta_show(struct cli *, rta *); - -u32 rt_get_igp_metric(rte *); -struct hostentry * rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep); -void rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls); - -static inline void -rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr gw, ip_addr ll, mpls_label_stack *mls) -{ - rta_apply_hostentry(a, rt_get_hostentry(tab, gw, ll, dep), mls); -} - -/* - * rta_set_recursive_next_hop() acquires hostentry from hostcache and fills - * rta->hostentry field. New hostentry has zero use count. Cached rta locks its - * hostentry (increases its use count), uncached rta does not lock it. Hostentry - * with zero use count is removed asynchronously during host cache update, - * therefore it is safe to hold such hostentry temorarily. Hostentry holds a - * lock for a 'source' rta, mainly to share multipath nexthops. - * - * There is no need to hold a lock for hostentry->dep table, because that table - * contains routes responsible for that hostentry, and therefore is non-empty if - * given hostentry has non-zero use count. If the hostentry has zero use count, - * the entry is removed before dep is referenced. - * - * The protocol responsible for routes with recursive next hops should hold a - * lock for a 'source' table governing that routes (argument tab to - * rta_set_recursive_next_hop()), because its routes reference hostentries - * (through rta) related to the governing table. When all such routes are - * removed, rtas are immediately removed achieving zero uc. Then the 'source' - * table lock could be immediately released, although hostentries may still - * exist - they will be freed together with the 'source' table. - */ - -static inline void rt_lock_hostentry(struct hostentry *he) { if (he) he->uc++; } -static inline void rt_unlock_hostentry(struct hostentry *he) { if (he) he->uc--; } - -/* - * Default protocol preferences - */ - -#define DEF_PREF_DIRECT 240 /* Directly connected */ -#define DEF_PREF_STATIC 200 /* Static route */ -#define DEF_PREF_OSPF 150 /* OSPF intra-area, inter-area and type 1 external routes */ -#define DEF_PREF_BABEL 130 /* Babel */ -#define DEF_PREF_RIP 120 /* RIP */ -#define DEF_PREF_BGP 100 /* BGP */ -#define DEF_PREF_RPKI 100 /* RPKI */ -#define DEF_PREF_INHERITED 10 /* Routes inherited from other routing daemons */ - -/* - * Route Origin Authorization - */ - -#define ROA_UNKNOWN 0 -#define ROA_VALID 1 -#define ROA_INVALID 2 - -#endif diff --git a/nest/rt-attr.c b/nest/rt-attr.c index 1bece201..f548a575 100644 --- a/nest/rt-attr.c +++ b/nest/rt-attr.c @@ -45,11 +45,11 @@ */ #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "nest/iface.h" #include "nest/cli.h" -#include "nest/attrs.h" +#include "lib/attrs.h" #include "lib/alloca.h" #include "lib/hash.h" #include "lib/idm.h" @@ -57,9 +57,25 @@ #include "lib/string.h" #include <stddef.h> +#include <stdlib.h> const adata null_adata; /* adata of length 0 */ +struct ea_class ea_gen_igp_metric = { + .name = "igp_metric", + .type = T_INT, +}; + +struct ea_class ea_gen_preference = { + .name = "preference", + .type = T_INT, +}; + +struct ea_class ea_gen_from = { + .name = "from", + .type = T_IP, +}; + const char * const rta_src_names[RTS_MAX] = { [RTS_STATIC] = "static", [RTS_INHERIT] = "inherit", @@ -77,6 +93,71 @@ const char * const rta_src_names[RTS_MAX] = { [RTS_RPKI] = "RPKI", }; +static void +ea_gen_source_format(const eattr *a, byte *buf, uint size) +{ + if ((a->u.data >= RTS_MAX) || !rta_src_names[a->u.data]) + bsnprintf(buf, size, "unknown"); + else + bsnprintf(buf, size, "%s", rta_src_names[a->u.data]); +} + +struct ea_class ea_gen_source = { + .name = "source", + .type = T_ENUM_RTS, + .readonly = 1, + .format = ea_gen_source_format, +}; + +struct ea_class ea_gen_nexthop = { + .name = "nexthop", + .type = T_NEXTHOP_LIST, +}; + +/* + * ea_set_hostentry() acquires hostentry from hostcache. + * New hostentry has zero use count. Cached rta locks its + * hostentry (increases its use count), uncached rta does not lock it. + * Hostentry with zero use count is removed asynchronously + * during host cache update, therefore it is safe to hold + * such hostentry temporarily as long as you hold the table lock. + * + * There is no need to hold a lock for hostentry->dep table, because that table + * contains routes responsible for that hostentry, and therefore is non-empty if + * given hostentry has non-zero use count. If the hostentry has zero use count, + * the entry is removed before dep is referenced. + * + * The protocol responsible for routes with recursive next hops should hold a + * lock for a 'source' table governing that routes (argument tab), + * because its routes reference hostentries related to the governing table. + * When all such routes are + * removed, rtas are immediately removed achieving zero uc. Then the 'source' + * table lock could be immediately released, although hostentries may still + * exist - they will be freed together with the 'source' table. + */ + + static void +ea_gen_hostentry_stored(const eattr *ea) +{ + struct hostentry_adata *had = (struct hostentry_adata *) ea->u.ptr; + had->he->uc++; +} + +static void +ea_gen_hostentry_freed(const eattr *ea) +{ + struct hostentry_adata *had = (struct hostentry_adata *) ea->u.ptr; + had->he->uc--; +} + +struct ea_class ea_gen_hostentry = { + .name = "hostentry", + .type = T_HOSTENTRY, + .readonly = 1, + .stored = ea_gen_hostentry_stored, + .freed = ea_gen_hostentry_freed, +}; + const char * rta_dest_names[RTD_MAX] = { [RTD_NONE] = "", [RTD_UNICAST] = "unicast", @@ -85,10 +166,21 @@ const char * rta_dest_names[RTD_MAX] = { [RTD_PROHIBIT] = "prohibited", }; +struct ea_class ea_gen_flowspec_valid = { + .name = "flowspec_valid", + .type = T_ENUM_FLOWSPEC_VALID, + .readonly = 1, +}; + +const char * flowspec_valid_names[FLOWSPEC__MAX] = { + [FLOWSPEC_UNKNOWN] = "unknown", + [FLOWSPEC_VALID] = "", + [FLOWSPEC_INVALID] = "invalid", +}; + pool *rta_pool; -static slab *rta_slab_[4]; -static slab *nexthop_slab_[4]; +static slab *rta_slab; static slab *rte_src_slab; static struct idm src_ids; @@ -154,7 +246,7 @@ rt_prune_sources(void) { HASH_DO_REMOVE(src_hash, RSH, sp); idm_free(&src_ids, src->global_id); - sl_free(rte_src_slab, src); + sl_free(src); } } HASH_WALK_FILTER_END; @@ -167,50 +259,10 @@ rt_prune_sources(void) * Multipath Next Hop */ -static inline u32 -nexthop_hash(struct nexthop *x) -{ - u32 h = 0; - for (; x; x = x->next) - { - h ^= ipa_hash(x->gw) ^ (h << 5) ^ (h >> 9); - - for (int i = 0; i < x->labels; i++) - h ^= x->label[i] ^ (h << 6) ^ (h >> 7); - } - - return h; -} - -int -nexthop__same(struct nexthop *x, struct nexthop *y) -{ - for (; x && y; x = x->next, y = y->next) - { - if (!ipa_equal(x->gw, y->gw) || (x->iface != y->iface) || - (x->flags != y->flags) || (x->weight != y->weight) || - (x->labels_orig != y->labels_orig) || (x->labels != y->labels)) - return 0; - - for (int i = 0; i < x->labels; i++) - if (x->label[i] != y->label[i]) - return 0; - } - - return x == y; -} - static int nexthop_compare_node(const struct nexthop *x, const struct nexthop *y) { int r; - - if (!x) - return 1; - - if (!y) - return -1; - /* Should we also compare flags ? */ r = ((int) y->weight) - ((int) x->weight); @@ -235,23 +287,16 @@ nexthop_compare_node(const struct nexthop *x, const struct nexthop *y) return ((int) x->iface->index) - ((int) y->iface->index); } -static inline struct nexthop * -nexthop_copy_node(const struct nexthop *src, linpool *lp) +static int +nexthop_compare_qsort(const void *x, const void *y) { - struct nexthop *n = lp_alloc(lp, nexthop_size(src)); - - memcpy(n, src, nexthop_size(src)); - n->next = NULL; - - return n; + return nexthop_compare_node( *(const struct nexthop **) x, *(const struct nexthop **) y ); } /** * nexthop_merge - merge nexthop lists * @x: list 1 * @y: list 2 - * @rx: reusability of list @x - * @ry: reusability of list @y * @max: max number of nexthops * @lp: linpool for allocating nexthops * @@ -268,139 +313,226 @@ nexthop_copy_node(const struct nexthop *src, linpool *lp) * resulting list is no longer needed. When reusability is not set, the * corresponding lists are not modified nor linked from the resulting list. */ -struct nexthop * -nexthop_merge(struct nexthop *x, struct nexthop *y, int rx, int ry, int max, linpool *lp) +struct nexthop_adata * +nexthop_merge(struct nexthop_adata *xin, struct nexthop_adata *yin, int max, linpool *lp) { - struct nexthop *root = NULL; - struct nexthop **n = &root; + uint outlen = ADATA_SIZE(xin->ad.length) + ADATA_SIZE(yin->ad.length); + struct nexthop_adata *out = lp_alloc(lp, outlen); + out->ad.length = outlen - sizeof (struct adata); - while ((x || y) && max--) + struct nexthop *x = &xin->nh, *y = &yin->nh, *cur = &out->nh; + int xvalid, yvalid; + + while (max--) { - int cmp = nexthop_compare_node(x, y); + xvalid = NEXTHOP_VALID(x, xin); + yvalid = NEXTHOP_VALID(y, yin); - if (cmp < 0) - { - ASSUME(x); - *n = rx ? x : nexthop_copy_node(x, lp); - x = x->next; - } - else if (cmp > 0) - { - ASSUME(y); - *n = ry ? y : nexthop_copy_node(y, lp); - y = y->next; - } - else - { - ASSUME(x && y); - *n = rx ? x : (ry ? y : nexthop_copy_node(x, lp)); - x = x->next; - y = y->next; - } - n = &((*n)->next); - } - *n = NULL; - - return root; -} - -void -nexthop_insert(struct nexthop **n, struct nexthop *x) -{ - for (; *n; n = &((*n)->next)) - { - int cmp = nexthop_compare_node(*n, x); - - if (cmp < 0) - continue; - else if (cmp > 0) + if (!xvalid && !yvalid) break; + + ASSUME(NEXTHOP_VALID(cur, out)); + + int cmp = !xvalid ? 1 : !yvalid ? -1 : nexthop_compare_node(x, y); + + if (cmp < 0) + { + ASSUME(NEXTHOP_VALID(x, xin)); + memcpy(cur, x, nexthop_size(x)); + x = NEXTHOP_NEXT(x); + } + else if (cmp > 0) + { + ASSUME(NEXTHOP_VALID(y, yin)); + memcpy(cur, y, nexthop_size(y)); + y = NEXTHOP_NEXT(y); + } else - return; + { + ASSUME(NEXTHOP_VALID(x, xin)); + memcpy(cur, x, nexthop_size(x)); + x = NEXTHOP_NEXT(x); + + ASSUME(NEXTHOP_VALID(y, yin)); + y = NEXTHOP_NEXT(y); + } + cur = NEXTHOP_NEXT(cur); } - x->next = *n; - *n = x; + out->ad.length = (void *) cur - (void *) out->ad.data; + + return out; } -struct nexthop * -nexthop_sort(struct nexthop *x) +struct nexthop_adata * +nexthop_sort(struct nexthop_adata *nhad, linpool *lp) { - struct nexthop *s = NULL; + /* Count the nexthops */ + uint cnt = 0; + NEXTHOP_WALK(nh, nhad) + cnt++; - /* Simple insert-sort */ - while (x) + if (cnt <= 1) + return nhad; + + /* Get pointers to them */ + struct nexthop **sptr = tmp_alloc(cnt * sizeof(struct nexthop *)); + + uint i = 0; + NEXTHOP_WALK(nh, nhad) + sptr[i++] = nh; + + /* Sort the pointers */ + qsort(sptr, cnt, sizeof(struct nexthop *), nexthop_compare_qsort); + + /* Allocate the output */ + struct nexthop_adata *out = (struct nexthop_adata *) lp_alloc_adata(lp, nhad->ad.length); + struct nexthop *dest = &out->nh; + + /* Deduplicate nexthops while storing them */ + for (uint i = 0; i < cnt; i++) { - struct nexthop *n = x; - x = n->next; - n->next = NULL; + if (i && !nexthop_compare_node(sptr[i], sptr[i-1])) + continue; - nexthop_insert(&s, n); + memcpy(dest, sptr[i], NEXTHOP_SIZE(sptr[i])); + dest = NEXTHOP_NEXT(dest); } - return s; + out->ad.length = (void *) dest - (void *) out->ad.data; + return out; } int -nexthop_is_sorted(struct nexthop *x) +nexthop_is_sorted(struct nexthop_adata *nhad) { - for (; x && x->next; x = x->next) - if (nexthop_compare_node(x, x->next) >= 0) + struct nexthop *prev = NULL; + NEXTHOP_WALK(nh, nhad) + { + if (prev && (nexthop_compare_node(prev, nh) >= 0)) return 0; + prev = nh; + } + return 1; } -static inline slab * -nexthop_slab(struct nexthop *nh) -{ - return nexthop_slab_[MIN(nh->labels, 3)]; -} - -static struct nexthop * -nexthop_copy(struct nexthop *o) -{ - struct nexthop *first = NULL; - struct nexthop **last = &first; - - for (; o; o = o->next) - { - struct nexthop *n = sl_allocz(nexthop_slab(o)); - n->gw = o->gw; - n->iface = o->iface; - n->next = NULL; - n->flags = o->flags; - n->weight = o->weight; - n->labels_orig = o->labels_orig; - n->labels = o->labels; - for (int i=0; i<o->labels; i++) - n->label[i] = o->label[i]; - - *last = n; - last = &(n->next); - } - - return first; -} - -static void -nexthop_free(struct nexthop *o) -{ - struct nexthop *n; - - while (o) - { - n = o->next; - sl_free(nexthop_slab(o), o); - o = n; - } -} - - /* * Extended Attributes */ +#define EA_CLASS_INITIAL_MAX 128 +static struct ea_class **ea_class_global = NULL; +static uint ea_class_max; +static struct idm ea_class_idm; + +/* Config parser lex register function */ +void ea_lex_register(struct ea_class *def); +void ea_lex_unregister(struct ea_class *def); + +static void +ea_class_free(struct ea_class *cl) +{ + /* No more ea class references. Unregister the attribute. */ + idm_free(&ea_class_idm, cl->id); + ea_class_global[cl->id] = NULL; + ea_lex_unregister(cl); +} + +static void +ea_class_ref_free(resource *r) +{ + struct ea_class_ref *ref = SKIP_BACK(struct ea_class_ref, r, r); + if (!--ref->class->uc) + ea_class_free(ref->class); +} + +static void +ea_class_ref_dump(resource *r) +{ + struct ea_class_ref *ref = SKIP_BACK(struct ea_class_ref, r, r); + debug("name \"%s\", type=%d\n", ref->class->name, ref->class->type); +} + +static struct resclass ea_class_ref_class = { + .name = "Attribute class reference", + .size = sizeof(struct ea_class_ref), + .free = ea_class_ref_free, + .dump = ea_class_ref_dump, + .lookup = NULL, + .memsize = NULL, +}; + +static void +ea_class_init(void) +{ + idm_init(&ea_class_idm, rta_pool, EA_CLASS_INITIAL_MAX); + ea_class_global = mb_allocz(rta_pool, + sizeof(*ea_class_global) * (ea_class_max = EA_CLASS_INITIAL_MAX)); +} + +static struct ea_class_ref * +ea_ref_class(pool *p, struct ea_class *def) +{ + def->uc++; + struct ea_class_ref *ref = ralloc(p, &ea_class_ref_class); + ref->class = def; + return ref; +} + +static struct ea_class_ref * +ea_register(pool *p, struct ea_class *def) +{ + def->id = idm_alloc(&ea_class_idm); + + ASSERT_DIE(ea_class_global); + while (def->id >= ea_class_max) + ea_class_global = mb_realloc(ea_class_global, sizeof(*ea_class_global) * (ea_class_max *= 2)); + + ASSERT_DIE(def->id < ea_class_max); + ea_class_global[def->id] = def; + + ea_lex_register(def); + + return ea_ref_class(p, def); +} + +struct ea_class_ref * +ea_register_alloc(pool *p, struct ea_class cl) +{ + struct ea_class *clp = ea_class_find_by_name(cl.name); + if (clp && clp->type == cl.type) + return ea_ref_class(p, clp); + + uint namelen = strlen(cl.name) + 1; + + struct { + struct ea_class cl; + char name[0]; + } *cla = mb_alloc(rta_pool, sizeof(struct ea_class) + namelen); + cla->cl = cl; + memcpy(cla->name, cl.name, namelen); + cla->cl.name = cla->name; + + return ea_register(p, &cla->cl); +} + +void +ea_register_init(struct ea_class *clp) +{ + ASSERT_DIE(!ea_class_find_by_name(clp->name)); + ea_register(&root_pool, clp); +} + +struct ea_class * +ea_class_find_by_id(uint id) +{ + ASSERT_DIE(id < ea_class_max); + ASSERT_DIE(ea_class_global[id]); + return ea_class_global[id]; +} + static inline eattr * ea__find(ea_list *e, unsigned id) { @@ -444,12 +576,11 @@ ea__find(ea_list *e, unsigned id) * to its &eattr structure or %NULL if no such attribute exists. */ eattr * -ea_find(ea_list *e, unsigned id) +ea_find_by_id(ea_list *e, unsigned id) { eattr *a = ea__find(e, id & EA_CODE_MASK); - if (a && (a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF && - !(id & EA_ALLOW_UNDEF)) + if (a && a->undef && !(id & EA_ALLOW_UNDEF)) return NULL; return a; } @@ -516,7 +647,7 @@ ea_walk(struct ea_walk_state *s, uint id, uint max) BIT32_SET(s->visited, n); - if ((a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF) + if (a->undef) continue; s->eattrs = e; @@ -530,25 +661,6 @@ ea_walk(struct ea_walk_state *s, uint id, uint max) return NULL; } -/** - * ea_get_int - fetch an integer attribute - * @e: attribute list - * @id: attribute ID - * @def: default value - * - * This function is a shortcut for retrieving a value of an integer attribute - * by calling ea_find() to find the attribute, extracting its value or returning - * a provided default if no such attribute is present. - */ -uintptr_t -ea_get_int(ea_list *e, unsigned id, uintptr_t def) -{ - eattr *a = ea_find(e, id); - if (!a) - return def; - return a->u.data; -} - static inline void ea_do_sort(ea_list *e) { @@ -616,14 +728,17 @@ ea_do_prune(ea_list *e) /* Now s0 is the most recent version, s[-1] the oldest one */ /* Drop undefs */ - if ((s0->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF) + if (s0->undef) continue; /* Copy the newest version to destination */ *d = *s0; /* Preserve info whether it originated locally */ - d->type = (d->type & ~(EAF_ORIGINATED|EAF_FRESH)) | (s[-1].type & EAF_ORIGINATED); + d->originated = s[-1].originated; + + /* Not fresh any more, we prefer surstroemming */ + d->fresh = 0; /* Next destination */ d++; @@ -643,7 +758,7 @@ ea_do_prune(ea_list *e) * If an attribute occurs multiple times in a single &ea_list, * ea_sort() leaves only the first (the only significant) occurrence. */ -void +static void ea_sort(ea_list *e) { while (e) @@ -667,8 +782,8 @@ ea_sort(ea_list *e) * This function calculates an upper bound of the size of * a given &ea_list after merging with ea_merge(). */ -unsigned -ea_scan(ea_list *e) +static unsigned +ea_scan(const ea_list *e) { unsigned cnt = 0; @@ -694,8 +809,8 @@ ea_scan(ea_list *e) * segments with ea_merge() and finally sort and prune the result * by calling ea_sort(). */ -void -ea_merge(ea_list *e, ea_list *t) +static void +ea_merge(const ea_list *e, ea_list *t) { eattr *d = t->attrs; @@ -711,6 +826,16 @@ ea_merge(ea_list *e, ea_list *t) } } +ea_list * +ea_normalize(const ea_list *e) +{ + ea_list *t = tmp_alloc(ea_scan(e)); + ea_merge(e, t); + ea_sort(t); + + return t->count ? t : NULL; +} + /** * ea_same - compare two &ea_list's * @x: attribute list @@ -737,39 +862,47 @@ ea_same(ea_list *x, ea_list *y) if (a->id != b->id || a->flags != b->flags || a->type != b->type || + a->originated != b->originated || + a->fresh != b->fresh || + a->undef != b->undef || ((a->type & EAF_EMBEDDED) ? a->u.data != b->u.data : !adata_same(a->u.ptr, b->u.ptr))) return 0; } return 1; } -static inline ea_list * -ea_list_copy(ea_list *o) +uint +ea_list_size(ea_list *o) { - ea_list *n; - unsigned i, adpos, elen; + unsigned i, elen; - if (!o) - return NULL; - ASSERT(!o->next); - elen = adpos = sizeof(ea_list) + sizeof(eattr) * o->count; + ASSERT_DIE(o); + ASSERT_DIE(!o->next); + elen = BIRD_CPU_ALIGN(sizeof(ea_list) + sizeof(eattr) * o->count); for(i=0; i<o->count; i++) { eattr *a = &o->attrs[i]; if (!(a->type & EAF_EMBEDDED)) - elen += sizeof(struct adata) + a->u.ptr->length; + elen += ADATA_SIZE(a->u.ptr->length); } - n = mb_alloc(rta_pool, elen); + return elen; +} + +void +ea_list_copy(ea_list *n, ea_list *o, uint elen) +{ + uint adpos = sizeof(ea_list) + sizeof(eattr) * o->count; memcpy(n, o, adpos); - n->flags |= EALF_CACHED; - for(i=0; i<o->count; i++) + adpos = BIRD_CPU_ALIGN(adpos); + + for(uint i=0; i<o->count; i++) { eattr *a = &n->attrs[i]; if (!(a->type & EAF_EMBEDDED)) { - unsigned size = sizeof(struct adata) + a->u.ptr->length; + unsigned size = ADATA_SIZE(a->u.ptr->length); ASSERT_DIE(adpos + size <= elen); struct adata *d = ((void *) n) + adpos; @@ -779,8 +912,41 @@ ea_list_copy(ea_list *o) adpos += size; } } + ASSERT_DIE(adpos == elen); - return n; +} + +static void +ea_list_ref(ea_list *l) +{ + for(uint i=0; i<l->count; i++) + { + eattr *a = &l->attrs[i]; + ASSERT_DIE(a->id < ea_class_max); + + struct ea_class *cl = ea_class_global[a->id]; + ASSERT_DIE(cl && cl->uc); + + CALL(cl->stored, a); + cl->uc++; + } +} + +static void +ea_list_unref(ea_list *l) +{ + for(uint i=0; i<l->count; i++) + { + eattr *a = &l->attrs[i]; + ASSERT_DIE(a->id < ea_class_max); + + struct ea_class *cl = ea_class_global[a->id]; + ASSERT_DIE(cl && cl->uc); + + CALL(cl->freed, a); + if (!--cl->uc) + ea_class_free(cl); + } } static inline void @@ -788,23 +954,12 @@ ea_free(ea_list *o) { if (o) { + ea_list_unref(o); ASSERT(!o->next); mb_free(o); } } -static int -get_generic_attr(const eattr *a, byte **buf, int buflen UNUSED) -{ - if (a->id == EA_GEN_IGP_METRIC) - { - *buf += bsprintf(*buf, "igp_metric"); - return GA_NAME; - } - - return GA_UNKNOWN; -} - void ea_format_bitfield(const struct eattr *a, byte *buf, int bufsize, const char **names, int min, int max) { @@ -904,78 +1059,75 @@ ea_show_lc_set(struct cli *c, const struct adata *ad, byte *pos, byte *buf, byte void ea_show(struct cli *c, const eattr *e) { - struct protocol *p; - int status = GA_UNKNOWN; const struct adata *ad = (e->type & EAF_EMBEDDED) ? NULL : e->u.ptr; byte buf[CLI_MSG_SIZE]; byte *pos = buf, *end = buf + sizeof(buf); - if (EA_IS_CUSTOM(e->id)) - { - const char *name = ea_custom_name(e->id); - if (name) - { - pos += bsprintf(pos, "%s", name); - status = GA_NAME; - } - else - pos += bsprintf(pos, "%02x.", EA_PROTO(e->id)); - } - else if (p = class_to_protocol[EA_PROTO(e->id)]) - { - pos += bsprintf(pos, "%s.", p->name); - if (p->get_attr) - status = p->get_attr(e, pos, end - pos); - pos += strlen(pos); - } - else if (EA_PROTO(e->id)) - pos += bsprintf(pos, "%02x.", EA_PROTO(e->id)); - else - status = get_generic_attr(e, &pos, end - pos); + ASSERT_DIE(e->id < ea_class_max); - if (status < GA_NAME) - pos += bsprintf(pos, "%02x", EA_ID(e->id)); - if (status < GA_FULL) - { - *pos++ = ':'; - *pos++ = ' '; - switch (e->type & EAF_TYPE_MASK) - { - case EAF_TYPE_INT: + struct ea_class *cls = ea_class_global[e->id]; + ASSERT_DIE(cls); + + pos += bsprintf(pos, "%s", cls->name); + + *pos++ = ':'; + *pos++ = ' '; + + if (e->undef) + bsprintf(pos, "undefined (should not happen)"); + else if (cls->format) + cls->format(e, buf, end - buf); + else + switch (e->type) + { + case T_INT: bsprintf(pos, "%u", e->u.data); break; - case EAF_TYPE_OPAQUE: + case T_OPAQUE: opaque_format(ad, pos, end - pos); break; - case EAF_TYPE_IP_ADDRESS: + case T_IP: bsprintf(pos, "%I", *(ip_addr *) ad->data); break; - case EAF_TYPE_ROUTER_ID: + case T_QUAD: bsprintf(pos, "%R", e->u.data); break; - case EAF_TYPE_AS_PATH: + case T_PATH: as_path_format(ad, pos, end - pos); break; - case EAF_TYPE_BITFIELD: - bsprintf(pos, "%08x", e->u.data); - break; - case EAF_TYPE_INT_SET: + case T_CLIST: ea_show_int_set(c, ad, 1, pos, buf, end); return; - case EAF_TYPE_EC_SET: + case T_ECLIST: ea_show_ec_set(c, ad, pos, buf, end); return; - case EAF_TYPE_LC_SET: + case T_LCLIST: ea_show_lc_set(c, ad, pos, buf, end); return; - case EAF_TYPE_UNDEF: default: bsprintf(pos, "<type %02x>", e->type); - } - } + } + cli_printf(c, -1012, "\t%s", buf); } +static void +nexthop_dump(const struct adata *ad) +{ + struct nexthop_adata *nhad = (struct nexthop_adata *) ad; + + debug(":"); + + NEXTHOP_WALK(nh, nhad) + { + if (ipa_nonzero(nh->gw)) debug(" ->%I", nh->gw); + if (nh->labels) debug(" L %d", nh->label[0]); + for (int i=1; i<nh->labels; i++) + debug("/%d", nh->label[i]); + debug(" [%s]", nh->iface ? nh->iface->name : "???"); + } +} + /** * ea_dump - dump an extended attribute * @e: attribute to be dumped @@ -1002,12 +1154,17 @@ ea_dump(ea_list *e) for(i=0; i<e->count; i++) { eattr *a = &e->attrs[i]; - debug(" %02x:%02x.%02x", EA_PROTO(a->id), EA_ID(a->id), a->flags); - debug("=%c", "?iO?I?P???S?????" [a->type & EAF_TYPE_MASK]); - if (a->type & EAF_ORIGINATED) + debug(" %04x.%02x", a->id, a->flags); + debug("=%c", + "?iO?IRP???S??pE?" + "??L???N?????????" + "?o???r??????????" [a->type]); + if (a->originated) debug("o"); if (a->type & EAF_EMBEDDED) debug(":%08x", a->u.data); + else if (a->id == ea_gen_nexthop.id) + nexthop_dump(a->u.ptr); else { int j, len = a->u.ptr->length; @@ -1037,6 +1194,7 @@ ea_hash(ea_list *e) if (e) /* Assuming chain of length 1 */ { + ASSERT_DIE(!e->next); for(i=0; i<e->count; i++) { struct eattr *a = &e->attrs[i]; @@ -1100,50 +1258,30 @@ rta_alloc_hash(void) static inline uint rta_hash(rta *a) { - u64 h; - mem_hash_init(&h); -#define MIX(f) mem_hash_mix(&h, &(a->f), sizeof(a->f)); -#define BMIX(f) mem_hash_mix_num(&h, a->f); - MIX(hostentry); - MIX(from); - MIX(igp_metric); - BMIX(source); - BMIX(scope); - BMIX(dest); - MIX(pref); -#undef MIX - - return mem_hash_value(&h) ^ nexthop_hash(&(a->nh)) ^ ea_hash(a->eattrs); + return ea_hash(a->eattrs); } static inline int rta_same(rta *x, rta *y) { - return (x->source == y->source && - x->scope == y->scope && - x->dest == y->dest && - x->igp_metric == y->igp_metric && - ipa_equal(x->from, y->from) && - x->hostentry == y->hostentry && - nexthop_same(&(x->nh), &(y->nh)) && - ea_same(x->eattrs, y->eattrs)); -} - -static inline slab * -rta_slab(rta *a) -{ - return rta_slab_[a->nh.labels > 2 ? 3 : a->nh.labels]; + return ea_same(x->eattrs, y->eattrs); } static rta * rta_copy(rta *o) { - rta *r = sl_alloc(rta_slab(o)); + rta *r = sl_alloc(rta_slab); memcpy(r, o, rta_size(o)); r->uc = 1; - r->nh.next = nexthop_copy(o->nh.next); - r->eattrs = ea_list_copy(o->eattrs); + if (!r->eattrs) + return r; + + uint elen = ea_list_size(o->eattrs); + r->eattrs = mb_alloc(rta_pool, elen); + ea_list_copy(r->eattrs, o->eattrs, elen); + ea_list_ref(r->eattrs); + r->eattrs->flags |= EALF_CACHED; return r; } @@ -1199,7 +1337,7 @@ rta_lookup(rta *o) ASSERT(!o->cached); if (o->eattrs) - ea_normalize(o->eattrs); + o->eattrs = ea_normalize(o->eattrs); h = rta_hash(o); for(r=rta_hash_table[h & rta_cache_mask]; r; r=r->next) @@ -1209,7 +1347,6 @@ rta_lookup(rta *o) r = rta_copy(o); r->hash_key = h; r->cached = 1; - rt_lock_hostentry(r->hostentry); rta_insert(r); if (++rta_cache_count > rta_cache_limit) @@ -1226,12 +1363,9 @@ rta__free(rta *a) *a->pprev = a->next; if (a->next) a->next->pprev = a->pprev; - rt_unlock_hostentry(a->hostentry); - if (a->nh.next) - nexthop_free(a->nh.next); ea_free(a->eattrs); a->cached = 0; - sl_free(rta_slab(a), a); + sl_free(a); } rta * @@ -1239,12 +1373,6 @@ rta_do_cow(rta *o, linpool *lp) { rta *r = lp_alloc(lp, rta_size(o)); memcpy(r, o, rta_size(o)); - for (struct nexthop **nhn = &(r->nh.next), *nho = o->nh.next; nho; nho = nho->next) - { - *nhn = lp_alloc(lp, nexthop_size(nho)); - memcpy(*nhn, nho, nexthop_size(nho)); - nhn = &((*nhn)->next); - } r->cached = 0; r->uc = 0; return r; @@ -1259,27 +1387,10 @@ rta_do_cow(rta *o, linpool *lp) void rta_dump(rta *a) { - static char *rts[] = { "", "RTS_STATIC", "RTS_INHERIT", "RTS_DEVICE", - "RTS_STAT_DEV", "RTS_REDIR", "RTS_RIP", - "RTS_OSPF", "RTS_OSPF_IA", "RTS_OSPF_EXT1", - "RTS_OSPF_EXT2", "RTS_BGP", "RTS_PIPE", "RTS_BABEL" }; - static char *rtd[] = { "", " DEV", " HOLE", " UNREACH", " PROHIBIT" }; - - debug("pref=%d uc=%d %s %s%s h=%04x", - a->pref, a->uc, rts[a->source], ip_scope_text(a->scope), - rtd[a->dest], a->hash_key); + debug("uc=%d h=%04x", + a->uc, a->hash_key); if (!a->cached) debug(" !CACHED"); - debug(" <-%I", a->from); - if (a->dest == RTD_UNICAST) - for (struct nexthop *nh = &(a->nh); nh; nh = nh->next) - { - if (ipa_nonzero(nh->gw)) debug(" ->%I", nh->gw); - if (nh->labels) debug(" L %d", nh->label[0]); - for (int i=1; i<nh->labels; i++) - debug("/%d", nh->label[i]); - debug(" [%s]", nh->iface ? nh->iface->name : "???"); - } if (a->eattrs) { debug(" EA: "); @@ -1313,8 +1424,6 @@ rta_dump_all(void) void rta_show(struct cli *c, rta *a) { - cli_printf(c, -1008, "\tType: %s %s", rta_src_names[a->source], ip_scope_text(a->scope)); - for(ea_list *eal = a->eattrs; eal; eal=eal->next) for(int i=0; i<eal->count; i++) ea_show(c, &eal->attrs[i]); @@ -1331,18 +1440,19 @@ rta_init(void) { rta_pool = rp_new(&root_pool, "Attributes"); - rta_slab_[0] = sl_new(rta_pool, sizeof(rta)); - rta_slab_[1] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)); - rta_slab_[2] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)*2); - rta_slab_[3] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)*MPLS_MAX_LABEL_STACK); - - nexthop_slab_[0] = sl_new(rta_pool, sizeof(struct nexthop)); - nexthop_slab_[1] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)); - nexthop_slab_[2] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*2); - nexthop_slab_[3] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK); + rta_slab = sl_new(rta_pool, sizeof(rta)); rta_alloc_hash(); rte_src_init(); + ea_class_init(); + + ea_register_init(&ea_gen_preference); + ea_register_init(&ea_gen_igp_metric); + ea_register_init(&ea_gen_from); + ea_register_init(&ea_gen_source); + ea_register_init(&ea_gen_nexthop); + ea_register_init(&ea_gen_hostentry); + ea_register_init(&ea_gen_flowspec_valid); } /* diff --git a/nest/rt-dev.c b/nest/rt-dev.c index 5d1e57b3..fa224f9a 100644 --- a/nest/rt-dev.c +++ b/nest/rt-dev.c @@ -18,7 +18,7 @@ #include "nest/bird.h" #include "nest/iface.h" #include "nest/protocol.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/rt-dev.h" #include "conf/conf.h" #include "lib/resource.h" @@ -79,14 +79,16 @@ dev_ifa_notify(struct proto *P, uint flags, struct ifa *ad) /* Use iface ID as local source ID */ struct rte_src *src = rt_get_source(P, ad->iface->index); - rta a0 = { - .pref = c->preference, - .source = RTS_DEVICE, - .scope = SCOPE_UNIVERSE, - .dest = RTD_UNICAST, - .nh.iface = ad->iface, + rta a0 = {}; + struct nexthop_adata nhad = { + .nh = { .iface = ad->iface, }, + .ad = { .length = (void *) NEXTHOP_NEXT(&nhad.nh) - (void *) nhad.ad.data, }, }; + ea_set_attr_u32(&a0.eattrs, &ea_gen_preference, 0, c->preference); + ea_set_attr_u32(&a0.eattrs, &ea_gen_source, 0, RTS_DEVICE); + ea_set_attr_data(&a0.eattrs, &ea_gen_nexthop, 0, nhad.ad.data, nhad.ad.length); + rte e0 = { .attrs = rta_lookup(&a0), .src = src, @@ -184,7 +186,6 @@ dev_copy_config(struct proto_config *dest, struct proto_config *src) struct protocol proto_device = { .name = "Direct", .template = "direct%d", - .class = PROTOCOL_DIRECT, .preference = DEF_PREF_DIRECT, .channel_mask = NB_IP | NB_IP6_SADR, .proto_size = sizeof(struct rt_dev_proto), @@ -194,3 +195,9 @@ struct protocol proto_device = { .reconfigure = dev_reconfigure, .copy_config = dev_copy_config }; + +void +dev_build(void) +{ + proto_build(&proto_device); +} diff --git a/nest/rt-fib.c b/nest/rt-fib.c index a7f70371..801561da 100644 --- a/nest/rt-fib.c +++ b/nest/rt-fib.c @@ -55,7 +55,7 @@ #undef LOCAL_DEBUG #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "lib/string.h" /* @@ -331,7 +331,7 @@ fib_get(struct fib *f, const net_addr *a) memset(b, 0, f->node_offset); if (f->init) - f->init(b); + f->init(f, b); if (f->entries++ > f->entries_max) fib_rehash(f, HASH_HI_STEP); @@ -475,7 +475,7 @@ fib_delete(struct fib *f, void *E) } if (f->fib_slab) - sl_free(f->fib_slab, E); + sl_free(E); else mb_free(E); diff --git a/nest/rt-show.c b/nest/rt-show.c index ae5000f5..cc5a9a10 100644 --- a/nest/rt-show.c +++ b/nest/rt-show.c @@ -10,11 +10,12 @@ #undef LOCAL_DEBUG #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "nest/cli.h" #include "nest/iface.h" #include "filter/filter.h" +#include "filter/data.h" #include "sysdep/unix/krt.h" static void @@ -44,32 +45,38 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, int primary rta *a = e->attrs; int sync_error = d->kernel ? krt_get_sync_error(d->kernel, e) : 0; void (*get_route_info)(struct rte *, byte *buf); - struct nexthop *nh; + eattr *nhea = net_type_match(e->net, NB_DEST) ? + ea_find(a->eattrs, &ea_gen_nexthop) : NULL; + struct nexthop_adata *nhad = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL; + int dest = nhad ? (NEXTHOP_IS_REACHABLE(nhad) ? RTD_UNICAST : nhad->dest) : RTD_NONE; + int flowspec_valid = net_is_flow(e->net) ? rt_get_flowspec_valid(e) : FLOWSPEC_UNKNOWN; tm_format_time(tm, &config->tf_route, e->lastmod); - if (ipa_nonzero(a->from) && !ipa_equal(a->from, a->nh.gw)) - bsprintf(from, " from %I", a->from); + ip_addr a_from = ea_get_ip(a->eattrs, &ea_gen_from, IPA_NONE); + if (ipa_nonzero(a_from) && (!nhad || !ipa_equal(a_from, nhad->nh.gw))) + bsprintf(from, " from %I", a_from); else from[0] = 0; /* Need to normalize the extended attributes */ if (d->verbose && !rta_is_cached(a) && a->eattrs) - ea_normalize(a->eattrs); + a->eattrs = ea_normalize(a->eattrs); get_route_info = e->src->proto->proto->get_route_info; if (get_route_info) get_route_info(e, info); else - bsprintf(info, " (%d)", a->pref); + bsprintf(info, " (%d)", rt_get_preference(e)); if (d->last_table != d->tab) rt_show_table(c, d); - cli_printf(c, -1007, "%-20s %s [%s %s%s]%s%s", ia, rta_dest_name(a->dest), - e->src->proto->name, tm, from, primary ? (sync_error ? " !" : " *") : "", info); + cli_printf(c, -1007, "%-20s %s [%s %s%s]%s%s", ia, + net_is_flow(e->net) ? flowspec_valid_name(flowspec_valid) : rta_dest_name(dest), + e->src->proto->name, tm, from, primary ? (sync_error ? " !" : " *") : "", info); - if (a->dest == RTD_UNICAST) - for (nh = &(a->nh); nh; nh = nh->next) + if (dest == RTD_UNICAST) + NEXTHOP_WALK(nh, nhad) { char mpls[MPLS_MAX_LABEL_STACK*12 + 5], *lsp = mpls; char *onlink = (nh->flags & RNF_ONLINK) ? " onlink" : ""; @@ -83,7 +90,7 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, int primary } *lsp = '\0'; - if (a->nh.next) + if (!NEXTHOP_ONE(nhad)) bsprintf(weight, " weight %d", nh->weight + 1); if (ipa_nonzero(nh->gw)) @@ -98,6 +105,29 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, int primary rta_show(c, a); } +static uint +rte_feed_count(net *n) +{ + uint count = 0; + for (struct rte_storage *e = n->routes; e; e = e->next) + if (rte_is_valid(RTE_OR_NULL(e))) + count++; + return count; +} + +static void +rte_feed_obtain(net *n, rte **feed, uint count) +{ + uint i = 0; + for (struct rte_storage *e = n->routes; e; e = e->next) + if (rte_is_valid(RTE_OR_NULL(e))) + { + ASSERT_DIE(i < count); + feed[i++] = &e->rte; + } + ASSERT_DIE(i == count); +} + static void rt_show_net(struct cli *c, net *n, struct rt_show_data *d) { @@ -109,10 +139,9 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) ASSUME(!d->export_mode || ec); int first = 1; + int first_show = 1; int pass = 0; - bsnprintf(ia, sizeof(ia), "%N", n->n.addr); - for (struct rte_storage *er = n->routes; er; er = er->next) { if (rte_is_filtered(&er->rte) != d->filtered) @@ -128,7 +157,7 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) struct rte e = er->rte; /* Export channel is down, do not try to export routes to it */ - if (ec && (ec->export_state == ES_DOWN)) + if (ec && !ec->out_req.hook) goto skip; if (d->export_mode == RSEM_EXPORTED) @@ -143,7 +172,14 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) { /* Special case for merged export */ pass = 1; - rte *em = rt_export_merged(ec, n, c->show_pool, 1); + uint count = rte_feed_count(n); + if (!count) + goto skip; + + rte **feed = alloca(count * sizeof(rte *)); + rte_feed_obtain(n, feed, count); + rte *em = rt_export_merged(ec, feed, count, c->show_pool, 1); + if (em) e = *em; else @@ -168,7 +204,7 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) * command may change the export filter and do not update routes. */ int do_export = (ic > 0) || - (f_run(ec->out_filter, &e, c->show_pool, FF_SILENT) <= F_ACCEPT); + (f_run(ec->out_filter, &e, FF_SILENT) <= F_ACCEPT); if (do_export != (d->export_mode == RSEM_EXPORT)) goto skip; @@ -181,14 +217,21 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) if (d->show_protocol && (d->show_protocol != e.src->proto)) goto skip; - if (f_run(d->filter, &e, c->show_pool, 0) > F_ACCEPT) + if (f_run(d->filter, &e, 0) > F_ACCEPT) goto skip; if (d->stats < 2) + { + if (first_show) + net_format(n->n.addr, ia, sizeof(ia)); + else + ia[0] = 0; + rt_show_rte(c, ia, &e, d, (n->routes == er)); + first_show = 0; + } d->show_counter++; - ia[0] = 0; skip: lp_flush(c->show_pool); @@ -205,9 +248,12 @@ rt_show_cleanup(struct cli *c) struct rt_show_data_rtable *tab; /* Unlink the iterator */ - if (d->table_open) + if (d->table_open && !d->trie_walk) fit_get(&d->tab->table->fib, &d->fit); + if (d->walk_lock) + rt_unlock_trie(d->tab->table, d->walk_lock); + /* Unlock referenced tables */ WALK_LIST(tab, d->tables) rt_unlock_table(tab->table); @@ -217,12 +263,13 @@ static void rt_show_cont(struct cli *c) { struct rt_show_data *d = c->rover; + struct rtable *tab = d->tab->table; #ifdef DEBUGGING unsigned max = 4; #else unsigned max = 64; #endif - struct fib *fib = &d->tab->table->fib; + struct fib *fib = &tab->fib; struct fib_iterator *it = &d->fit; if (d->running_on_config && (d->running_on_config != config)) @@ -233,7 +280,22 @@ rt_show_cont(struct cli *c) if (!d->table_open) { - FIB_ITERATE_INIT(&d->fit, &d->tab->table->fib); + /* We use either trie-based walk or fib-based walk */ + d->trie_walk = tab->trie && + (d->addr_mode == RSD_ADDR_IN) && + net_val_match(tab->addr_type, NB_IP); + + if (d->trie_walk && !d->walk_state) + d->walk_state = lp_allocz(c->parser_pool, sizeof (struct f_trie_walk_state)); + + if (d->trie_walk) + { + d->walk_lock = rt_lock_trie(tab); + trie_walk_init(d->walk_state, tab->trie, d->addr); + } + else + FIB_ITERATE_INIT(&d->fit, &tab->fib); + d->table_open = 1; d->table_counter++; d->kernel = rt_show_get_kernel(d); @@ -246,16 +308,44 @@ rt_show_cont(struct cli *c) rt_show_table(c, d); } - FIB_ITERATE_START(fib, it, net, n) + if (d->trie_walk) { - if (!max--) + /* Trie-based walk */ + net_addr addr; + while (trie_walk_next(d->walk_state, &addr)) { - FIB_ITERATE_PUT(it); - return; + net *n = net_find(tab, &addr); + if (!n) + continue; + + rt_show_net(c, n, d); + + if (!--max) + return; } - rt_show_net(c, n, d); + + rt_unlock_trie(tab, d->walk_lock); + d->walk_lock = NULL; + } + else + { + /* fib-based walk */ + FIB_ITERATE_START(fib, it, net, n) + { + if ((d->addr_mode == RSD_ADDR_IN) && (!net_in_netX(n->n.addr, d->addr))) + goto next; + + if (!max--) + { + FIB_ITERATE_PUT(it); + return; + } + rt_show_net(c, n, d); + + next:; + } + FIB_ITERATE_END; } - FIB_ITERATE_END; if (d->stats) { @@ -264,7 +354,7 @@ rt_show_cont(struct cli *c) cli_printf(c, -1007, "%d of %d routes for %d networks in table %s", d->show_counter - d->show_counter_last, d->rt_counter - d->rt_counter_last, - d->net_counter - d->net_counter_last, d->tab->table->name); + d->net_counter - d->net_counter_last, tab->name); } d->kernel = NULL; @@ -315,7 +405,7 @@ rt_show_get_default_tables(struct rt_show_data *d) { WALK_LIST(c, d->export_protocol->channels) { - if (c->export_state == ES_DOWN) + if (!c->out_req.hook) continue; tab = rt_show_add_table(d, c->table); @@ -395,7 +485,7 @@ rt_show(struct rt_show_data *d) rt_show_prepare_tables(d); - if (!d->addr) + if (!d->addr || (d->addr_mode == RSD_ADDR_IN)) { WALK_LIST(tab, d->tables) rt_lock_table(tab->table); @@ -413,7 +503,7 @@ rt_show(struct rt_show_data *d) d->tab = tab; d->kernel = rt_show_get_kernel(d); - if (d->show_for) + if (d->addr_mode == RSD_ADDR_FOR) n = net_route(tab->table, d->addr); else n = net_find(tab->table, d->addr); diff --git a/nest/rt-table.c b/nest/rt-table.c index ee69d7c4..946f4021 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -26,12 +26,72 @@ * (see the route attribute module for a precise explanation) holding the * remaining route attributes which are expected to be shared by multiple * routes in order to conserve memory. + * + * There are several mechanisms that allow automatic update of routes in one + * routing table (dst) as a result of changes in another routing table (src). + * They handle issues of recursive next hop resolving, flowspec validation and + * RPKI validation. + * + * The first such mechanism is handling of recursive next hops. A route in the + * dst table has an indirect next hop address, which is resolved through a route + * in the src table (which may also be the same table) to get an immediate next + * hop. This is implemented using structure &hostcache attached to the src + * table, which contains &hostentry structures for each tracked next hop + * address. These structures are linked from recursive routes in dst tables, + * possibly multiple routes sharing one hostentry (as many routes may have the + * same indirect next hop). There is also a trie in the hostcache, which matches + * all prefixes that may influence resolving of tracked next hops. + * + * When a best route changes in the src table, the hostcache is notified using + * rt_notify_hostcache(), which immediately checks using the trie whether the + * change is relevant and if it is, then it schedules asynchronous hostcache + * recomputation. The recomputation is done by rt_update_hostcache() (called + * from rt_event() of src table), it walks through all hostentries and resolves + * them (by rt_update_hostentry()). It also updates the trie. If a change in + * hostentry resolution was found, then it schedules asynchronous nexthop + * recomputation of associated dst table. That is done by rt_next_hop_update() + * (called from rt_event() of dst table), it iterates over all routes in the dst + * table and re-examines their hostentries for changes. Note that in contrast to + * hostcache update, next hop update can be interrupted by main loop. These two + * full-table walks (over hostcache and dst table) are necessary due to absence + * of direct lookups (route -> affected nexthop, nexthop -> its route). + * + * The second mechanism is for flowspec validation, where validity of flowspec + * routes depends of resolving their network prefixes in IP routing tables. This + * is similar to the recursive next hop mechanism, but simpler as there are no + * intermediate hostcache and hostentries (because flows are less likely to + * share common net prefix than routes sharing a common next hop). In src table, + * there is a list of dst tables (list flowspec_links), this list is updated by + * flowpsec channels (by rt_flowspec_link() and rt_flowspec_unlink() during + * channel start/stop). Each dst table has its own trie of prefixes that may + * influence validation of flowspec routes in it (flowspec_trie). + * + * When a best route changes in the src table, rt_flowspec_notify() immediately + * checks all dst tables from the list using their tries to see whether the + * change is relevant for them. If it is, then an asynchronous re-validation of + * flowspec routes in the dst table is scheduled. That is also done by function + * rt_next_hop_update(), like nexthop recomputation above. It iterates over all + * flowspec routes and re-validates them. It also recalculates the trie. + * + * Note that in contrast to the hostcache update, here the trie is recalculated + * during the rt_next_hop_update(), which may be interleaved with IP route + * updates. The trie is flushed at the beginning of recalculation, which means + * that such updates may use partial trie to see if they are relevant. But it + * works anyway! Either affected flowspec was already re-validated and added to + * the trie, then IP route change would match the trie and trigger a next round + * of re-validation, or it was not yet re-validated and added to the trie, but + * will be re-validated later in this round anyway. + * + * The third mechanism is used for RPKI re-validation of IP routes and it is the + * simplest. It is just a list of subscribers in src table, who are notified + * when any change happened, but only after a settle time. Also, in RPKI case + * the dst is not a table, but a channel, who refeeds routes through a filter. */ #undef LOCAL_DEBUG #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "nest/iface.h" #include "lib/resource.h" @@ -44,54 +104,241 @@ #include "lib/hash.h" #include "lib/string.h" #include "lib/alloca.h" +#include "lib/flowspec.h" + +#ifdef CONFIG_BGP +#include "proto/bgp/bgp.h" +#endif pool *rt_table_pool; static linpool *rte_update_pool; list routing_tables; +list deleted_routing_tables; static void rt_free_hostcache(rtable *tab); static void rt_notify_hostcache(rtable *tab, net *net); static void rt_update_hostcache(rtable *tab); static void rt_next_hop_update(rtable *tab); +static inline void rt_next_hop_resolve_rte(rte *r); +static inline void rt_flowspec_resolve_rte(rte *r, struct channel *c); static inline void rt_prune_table(rtable *tab); static inline void rt_schedule_notify(rtable *tab); +static void rt_flowspec_notify(rtable *tab, net *net); +static void rt_feed_channel(void *); +const char *rt_import_state_name_array[TIS_MAX] = { + [TIS_DOWN] = "DOWN", + [TIS_UP] = "UP", + [TIS_STOP] = "STOP", + [TIS_FLUSHING] = "FLUSHING", + [TIS_WAITING] = "WAITING", + [TIS_CLEARED] = "CLEARED", +}; -/* Like fib_route(), but skips empty net entries */ -static inline void * -net_route_ip4(rtable *t, net_addr_ip4 *n) +const char *rt_export_state_name_array[TES_MAX] = { + [TES_DOWN] = "DOWN", + [TES_HUNGRY] = "HUNGRY", + [TES_FEEDING] = "FEEDING", + [TES_READY] = "READY", + [TES_STOP] = "STOP" +}; + +const char *rt_import_state_name(u8 state) { - net *r; + if (state >= TIS_MAX) + return "!! INVALID !!"; + else + return rt_import_state_name_array[state]; +} - while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0)) +const char *rt_export_state_name(u8 state) +{ + if (state >= TES_MAX) + return "!! INVALID !!"; + else + return rt_export_state_name_array[state]; +} + +static inline struct rte_storage *rt_next_hop_update_rte(rtable *tab, net *n, rte *old); +static struct hostentry *rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep); + +static void +net_init_with_trie(struct fib *f, void *N) +{ + rtable *tab = SKIP_BACK(rtable, fib, f); + net *n = N; + + if (tab->trie) + trie_add_prefix(tab->trie, n->n.addr, n->n.addr->pxlen, n->n.addr->pxlen); + + if (tab->trie_new) + trie_add_prefix(tab->trie_new, n->n.addr, n->n.addr->pxlen, n->n.addr->pxlen); +} + +static inline net * +net_route_ip4_trie(rtable *t, const net_addr_ip4 *n0) +{ + TRIE_WALK_TO_ROOT_IP4(t->trie, n0, n) { - n->pxlen--; - ip4_clrbit(&n->prefix, n->pxlen); + net *r; + if (r = net_find_valid(t, (net_addr *) &n)) + return r; + } + TRIE_WALK_TO_ROOT_END; + + return NULL; +} + +static inline net * +net_route_vpn4_trie(rtable *t, const net_addr_vpn4 *n0) +{ + TRIE_WALK_TO_ROOT_IP4(t->trie, (const net_addr_ip4 *) n0, px) + { + net_addr_vpn4 n = NET_ADDR_VPN4(px.prefix, px.pxlen, n0->rd); + + net *r; + if (r = net_find_valid(t, (net_addr *) &n)) + return r; + } + TRIE_WALK_TO_ROOT_END; + + return NULL; +} + +static inline net * +net_route_ip6_trie(rtable *t, const net_addr_ip6 *n0) +{ + TRIE_WALK_TO_ROOT_IP6(t->trie, n0, n) + { + net *r; + if (r = net_find_valid(t, (net_addr *) &n)) + return r; + } + TRIE_WALK_TO_ROOT_END; + + return NULL; +} + +static inline net * +net_route_vpn6_trie(rtable *t, const net_addr_vpn6 *n0) +{ + TRIE_WALK_TO_ROOT_IP6(t->trie, (const net_addr_ip6 *) n0, px) + { + net_addr_vpn6 n = NET_ADDR_VPN6(px.prefix, px.pxlen, n0->rd); + + net *r; + if (r = net_find_valid(t, (net_addr *) &n)) + return r; + } + TRIE_WALK_TO_ROOT_END; + + return NULL; +} + +static inline void * +net_route_ip6_sadr_trie(rtable *t, const net_addr_ip6_sadr *n0) +{ + TRIE_WALK_TO_ROOT_IP6(t->trie, (const net_addr_ip6 *) n0, px) + { + net_addr_ip6_sadr n = NET_ADDR_IP6_SADR(px.prefix, px.pxlen, n0->src_prefix, n0->src_pxlen); + net *best = NULL; + int best_pxlen = 0; + + /* We need to do dst first matching. Since sadr addresses are hashed on dst + prefix only, find the hash table chain and go through it to find the + match with the longest matching src prefix. */ + for (struct fib_node *fn = fib_get_chain(&t->fib, (net_addr *) &n); fn; fn = fn->next) + { + net_addr_ip6_sadr *a = (void *) fn->addr; + + if (net_equal_dst_ip6_sadr(&n, a) && + net_in_net_src_ip6_sadr(&n, a) && + (a->src_pxlen >= best_pxlen)) + { + best = fib_node_to_user(&t->fib, fn); + best_pxlen = a->src_pxlen; + } + } + + if (best) + return best; + } + TRIE_WALK_TO_ROOT_END; + + return NULL; +} + +static inline net * +net_route_ip4_fib(rtable *t, const net_addr_ip4 *n0) +{ + net_addr_ip4 n; + net_copy_ip4(&n, n0); + + net *r; + while (r = net_find_valid(t, (net_addr *) &n), (!r) && (n.pxlen > 0)) + { + n.pxlen--; + ip4_clrbit(&n.prefix, n.pxlen); + } + + return r; +} + +static inline net * +net_route_vpn4_fib(rtable *t, const net_addr_vpn4 *n0) +{ + net_addr_vpn4 n; + net_copy_vpn4(&n, n0); + + net *r; + while (r = net_find_valid(t, (net_addr *) &n), (!r) && (n.pxlen > 0)) + { + n.pxlen--; + ip4_clrbit(&n.prefix, n.pxlen); + } + + return r; +} + +static inline net * +net_route_ip6_fib(rtable *t, const net_addr_ip6 *n0) +{ + net_addr_ip6 n; + net_copy_ip6(&n, n0); + + net *r; + while (r = net_find_valid(t, (net_addr *) &n), (!r) && (n.pxlen > 0)) + { + n.pxlen--; + ip6_clrbit(&n.prefix, n.pxlen); + } + + return r; +} + +static inline net * +net_route_vpn6_fib(rtable *t, const net_addr_vpn6 *n0) +{ + net_addr_vpn6 n; + net_copy_vpn6(&n, n0); + + net *r; + while (r = net_find_valid(t, (net_addr *) &n), (!r) && (n.pxlen > 0)) + { + n.pxlen--; + ip6_clrbit(&n.prefix, n.pxlen); } return r; } static inline void * -net_route_ip6(rtable *t, net_addr_ip6 *n) +net_route_ip6_sadr_fib(rtable *t, const net_addr_ip6_sadr *n0) { - net *r; - - while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0)) - { - n->pxlen--; - ip6_clrbit(&n->prefix, n->pxlen); - } - - return r; -} - -static inline void * -net_route_ip6_sadr(rtable *t, net_addr_ip6_sadr *n) -{ - struct fib_node *fn; + net_addr_ip6_sadr n; + net_copy_ip6_sadr(&n, n0); while (1) { @@ -100,13 +347,13 @@ net_route_ip6_sadr(rtable *t, net_addr_ip6_sadr *n) /* We need to do dst first matching. Since sadr addresses are hashed on dst prefix only, find the hash table chain and go through it to find the - match with the smallest matching src prefix. */ - for (fn = fib_get_chain(&t->fib, (net_addr *) n); fn; fn = fn->next) + match with the longest matching src prefix. */ + for (struct fib_node *fn = fib_get_chain(&t->fib, (net_addr *) &n); fn; fn = fn->next) { net_addr_ip6_sadr *a = (void *) fn->addr; - if (net_equal_dst_ip6_sadr(n, a) && - net_in_net_src_ip6_sadr(n, a) && + if (net_equal_dst_ip6_sadr(&n, a) && + net_in_net_src_ip6_sadr(&n, a) && (a->src_pxlen >= best_pxlen)) { best = fib_node_to_user(&t->fib, fn); @@ -117,38 +364,52 @@ net_route_ip6_sadr(rtable *t, net_addr_ip6_sadr *n) if (best) return best; - if (!n->dst_pxlen) + if (!n.dst_pxlen) break; - n->dst_pxlen--; - ip6_clrbit(&n->dst_prefix, n->dst_pxlen); + n.dst_pxlen--; + ip6_clrbit(&n.dst_prefix, n.dst_pxlen); } return NULL; } -void * +net * net_route(rtable *tab, const net_addr *n) { ASSERT(tab->addr_type == n->type); - net_addr *n0 = alloca(n->length); - net_copy(n0, n); - switch (n->type) { case NET_IP4: + if (tab->trie) + return net_route_ip4_trie(tab, (net_addr_ip4 *) n); + else + return net_route_ip4_fib (tab, (net_addr_ip4 *) n); + case NET_VPN4: - case NET_ROA4: - return net_route_ip4(tab, (net_addr_ip4 *) n0); + if (tab->trie) + return net_route_vpn4_trie(tab, (net_addr_vpn4 *) n); + else + return net_route_vpn4_fib (tab, (net_addr_vpn4 *) n); case NET_IP6: + if (tab->trie) + return net_route_ip6_trie(tab, (net_addr_ip6 *) n); + else + return net_route_ip6_fib (tab, (net_addr_ip6 *) n); + case NET_VPN6: - case NET_ROA6: - return net_route_ip6(tab, (net_addr_ip6 *) n0); + if (tab->trie) + return net_route_vpn6_trie(tab, (net_addr_vpn6 *) n); + else + return net_route_vpn6_fib (tab, (net_addr_vpn6 *) n); case NET_IP6_SADR: - return net_route_ip6_sadr(tab, (net_addr_ip6_sadr *) n0); + if (tab->trie) + return net_route_ip6_sadr_trie(tab, (net_addr_ip6_sadr *) n); + else + return net_route_ip6_sadr_fib (tab, (net_addr_ip6_sadr *) n); default: return NULL; @@ -157,7 +418,35 @@ net_route(rtable *tab, const net_addr *n) static int -net_roa_check_ip4(rtable *tab, const net_addr_ip4 *px, u32 asn) +net_roa_check_ip4_trie(rtable *tab, const net_addr_ip4 *px, u32 asn) +{ + int anything = 0; + + TRIE_WALK_TO_ROOT_IP4(tab->trie, px, px0) + { + net_addr_roa4 roa0 = NET_ADDR_ROA4(px0.prefix, px0.pxlen, 0, 0); + + struct fib_node *fn; + for (fn = fib_get_chain(&tab->fib, (net_addr *) &roa0); fn; fn = fn->next) + { + net_addr_roa4 *roa = (void *) fn->addr; + net *r = fib_node_to_user(&tab->fib, fn); + + if (net_equal_prefix_roa4(roa, &roa0) && r->routes && rte_is_valid(&r->routes->rte)) + { + anything = 1; + if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen)) + return ROA_VALID; + } + } + } + TRIE_WALK_TO_ROOT_END; + + return anything ? ROA_INVALID : ROA_UNKNOWN; +} + +static int +net_roa_check_ip4_fib(rtable *tab, const net_addr_ip4 *px, u32 asn) { struct net_addr_roa4 n = NET_ADDR_ROA4(px->prefix, px->pxlen, 0, 0); struct fib_node *fn; @@ -189,7 +478,35 @@ net_roa_check_ip4(rtable *tab, const net_addr_ip4 *px, u32 asn) } static int -net_roa_check_ip6(rtable *tab, const net_addr_ip6 *px, u32 asn) +net_roa_check_ip6_trie(rtable *tab, const net_addr_ip6 *px, u32 asn) +{ + int anything = 0; + + TRIE_WALK_TO_ROOT_IP6(tab->trie, px, px0) + { + net_addr_roa6 roa0 = NET_ADDR_ROA6(px0.prefix, px0.pxlen, 0, 0); + + struct fib_node *fn; + for (fn = fib_get_chain(&tab->fib, (net_addr *) &roa0); fn; fn = fn->next) + { + net_addr_roa6 *roa = (void *) fn->addr; + net *r = fib_node_to_user(&tab->fib, fn); + + if (net_equal_prefix_roa6(roa, &roa0) && r->routes && rte_is_valid(&r->routes->rte)) + { + anything = 1; + if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen)) + return ROA_VALID; + } + } + } + TRIE_WALK_TO_ROOT_END; + + return anything ? ROA_INVALID : ROA_UNKNOWN; +} + +static int +net_roa_check_ip6_fib(rtable *tab, const net_addr_ip6 *px, u32 asn) { struct net_addr_roa6 n = NET_ADDR_ROA6(px->prefix, px->pxlen, 0, 0); struct fib_node *fn; @@ -239,9 +556,19 @@ int net_roa_check(rtable *tab, const net_addr *n, u32 asn) { if ((tab->addr_type == NET_ROA4) && (n->type == NET_IP4)) - return net_roa_check_ip4(tab, (const net_addr_ip4 *) n, asn); + { + if (tab->trie) + return net_roa_check_ip4_trie(tab, (const net_addr_ip4 *) n, asn); + else + return net_roa_check_ip4_fib (tab, (const net_addr_ip4 *) n, asn); + } else if ((tab->addr_type == NET_ROA6) && (n->type == NET_IP6)) - return net_roa_check_ip6(tab, (const net_addr_ip6 *) n, asn); + { + if (tab->trie) + return net_roa_check_ip6_trie(tab, (const net_addr_ip6 *) n, asn); + else + return net_roa_check_ip6_fib (tab, (const net_addr_ip6 *) n, asn); + } else return ROA_UNKNOWN; /* Should not happen */ } @@ -293,11 +620,11 @@ rte_store(const rte *r, net *net, rtable *tab) */ void -rte_free(struct rte_storage *e, rtable *tab) +rte_free(struct rte_storage *e) { rt_unlock_source(e->rte.src); rta_free(e->rte.attrs); - sl_free(tab->rte_slab, e); + sl_free(e); } static int /* Actually better or at least as good as */ @@ -310,9 +637,12 @@ rte_better(rte *new, rte *old) if (!rte_is_valid(new)) return 0; - if (new->attrs->pref > old->attrs->pref) + u32 np = rt_get_preference(new); + u32 op = rt_get_preference(old); + + if (np > op) return 1; - if (new->attrs->pref < old->attrs->pref) + if (np < op) return 0; if (new->src->proto->proto != old->src->proto->proto) { @@ -336,7 +666,7 @@ rte_mergable(rte *pri, rte *sec) if (!rte_is_valid(pri) || !rte_is_valid(sec)) return 0; - if (pri->attrs->pref != sec->attrs->pref) + if (rt_get_preference(pri) != rt_get_preference(sec)) return 0; if (pri->src->proto->proto != sec->src->proto->proto) @@ -349,153 +679,185 @@ rte_mergable(rte *pri, rte *sec) } static void -rte_trace(struct channel *c, rte *e, int dir, char *msg) +rte_trace(const char *name, const rte *e, int dir, const char *msg) { - log(L_TRACE "%s.%s %c %s %N %uL %uG %s", - c->proto->name, c->name ?: "?", dir, msg, e->net, e->src->private_id, e->src->global_id, - rta_dest_name(e->attrs->dest)); + log(L_TRACE "%s %c %s %N %uL %uG %s", + name, dir, msg, e->net, e->src->private_id, e->src->global_id, + rta_dest_name(rte_dest(e))); } static inline void -rte_trace_in(uint flag, struct channel *c, rte *e, char *msg) +channel_rte_trace_in(uint flag, struct channel *c, const rte *e, const char *msg) { if ((c->debug & flag) || (c->proto->debug & flag)) - rte_trace(c, e, '>', msg); + rte_trace(c->in_req.name, e, '>', msg); } static inline void -rte_trace_out(uint flag, struct channel *c, rte *e, char *msg) +channel_rte_trace_out(uint flag, struct channel *c, const rte *e, const char *msg) { if ((c->debug & flag) || (c->proto->debug & flag)) - rte_trace(c, e, '<', msg); + rte_trace(c->out_req.name, e, '<', msg); +} + +static inline void +rt_rte_trace_in(uint flag, struct rt_import_request *req, const rte *e, const char *msg) +{ + if (req->trace_routes & flag) + rte_trace(req->name, e, '>', msg); +} + +#if 0 +// seems to be unused at all +static inline void +rt_rte_trace_out(uint flag, struct rt_export_request *req, const rte *e, const char *msg) +{ + if (req->trace_routes & flag) + rte_trace(req->name, e, '<', msg); +} +#endif + +static uint +rte_feed_count(net *n) +{ + uint count = 0; + for (struct rte_storage *e = n->routes; e; e = e->next) + if (rte_is_valid(RTE_OR_NULL(e))) + count++; + return count; +} + +static void +rte_feed_obtain(net *n, struct rte **feed, uint count) +{ + uint i = 0; + for (struct rte_storage *e = n->routes; e; e = e->next) + if (rte_is_valid(RTE_OR_NULL(e))) + { + ASSERT_DIE(i < count); + feed[i++] = &e->rte; + } + ASSERT_DIE(i == count); } static rte * -export_filter_(struct channel *c, rte *rt, linpool *pool, int silent) +export_filter(struct channel *c, rte *rt, int silent) { struct proto *p = c->proto; const struct filter *filter = c->out_filter; - struct proto_stats *stats = &c->stats; - int v; + struct channel_export_stats *stats = &c->export_stats; - v = p->preexport ? p->preexport(c, rt) : 0; + /* Do nothing if we have already rejected the route */ + if (silent && bmap_test(&c->export_reject_map, rt->id)) + goto reject_noset; + + int v = p->preexport ? p->preexport(c, rt) : 0; if (v < 0) { if (silent) - goto reject; + goto reject_noset; - stats->exp_updates_rejected++; + stats->updates_rejected++; if (v == RIC_REJECT) - rte_trace_out(D_FILTERS, c, rt, "rejected by protocol"); - goto reject; + channel_rte_trace_out(D_FILTERS, c, rt, "rejected by protocol"); + goto reject_noset; + } if (v > 0) { if (!silent) - rte_trace_out(D_FILTERS, c, rt, "forced accept by protocol"); + channel_rte_trace_out(D_FILTERS, c, rt, "forced accept by protocol"); goto accept; } v = filter && ((filter == FILTER_REJECT) || - (f_run(filter, rt, pool, + (f_run(filter, rt, (silent ? FF_SILENT : 0)) > F_ACCEPT)); if (v) { if (silent) goto reject; - stats->exp_updates_filtered++; - rte_trace_out(D_FILTERS, c, rt, "filtered out"); + stats->updates_filtered++; + channel_rte_trace_out(D_FILTERS, c, rt, "filtered out"); goto reject; } accept: + /* We have accepted the route */ + bmap_clear(&c->export_reject_map, rt->id); return rt; reject: + /* We have rejected the route by filter */ + bmap_set(&c->export_reject_map, rt->id); + +reject_noset: /* Discard temporary rte */ return NULL; } -static inline rte * -export_filter(struct channel *c, rte *rt, int silent) -{ - return export_filter_(c, rt, rte_update_pool, silent); -} - static void -do_rt_notify(struct channel *c, const net_addr *net, rte *new, rte *old, int refeed) +do_rt_notify(struct channel *c, const net_addr *net, rte *new, const rte *old) { struct proto *p = c->proto; - struct proto_stats *stats = &c->stats; + struct channel_export_stats *stats = &c->export_stats; - if (refeed && new) + if (c->refeeding && new) c->refeed_count++; - /* Apply export limit */ - struct channel_limit *l = &c->out_limit; - if (l->action && !old && new) - { - if (stats->exp_routes >= l->limit) - channel_notify_limit(c, l, PLD_OUT, stats->exp_routes); - - if (l->state == PLS_BLOCKED) + if (!old && new) + if (CHANNEL_LIMIT_PUSH(c, OUT)) { - stats->exp_updates_rejected++; - rte_trace_out(D_FILTERS, c, new, "rejected [limit]"); + stats->updates_rejected++; + channel_rte_trace_out(D_FILTERS, c, new, "rejected [limit]"); return; } - } + + if (!new && old) + CHANNEL_LIMIT_POP(c, OUT); /* Apply export table */ struct rte_storage *old_exported = NULL; if (c->out_table) { - if (!rte_update_out(c, net, new, old, &old_exported, refeed)) + if (!rte_update_out(c, net, new, old, &old_exported)) + { + channel_rte_trace_out(D_ROUTES, c, new, "idempotent"); return; + } } if (new) - stats->exp_updates_accepted++; + stats->updates_accepted++; else - stats->exp_withdraws_accepted++; + stats->withdraws_accepted++; if (old) - { bmap_clear(&c->export_map, old->id); - stats->exp_routes--; - } if (new) - { bmap_set(&c->export_map, new->id); - stats->exp_routes++; - } if (p->debug & D_ROUTES) { if (new && old) - rte_trace_out(D_ROUTES, c, new, "replaced"); + channel_rte_trace_out(D_ROUTES, c, new, "replaced"); else if (new) - rte_trace_out(D_ROUTES, c, new, "added"); + channel_rte_trace_out(D_ROUTES, c, new, "added"); else if (old) - rte_trace_out(D_ROUTES, c, old, "removed"); + channel_rte_trace_out(D_ROUTES, c, old, "removed"); } p->rt_notify(p, c, net, new, old_exported ? &old_exported->rte : old); if (c->out_table && old_exported) - rte_free(old_exported, c->out_table); + rte_free(old_exported); } static void -rt_notify_basic(struct channel *c, const net_addr *net, rte *new, rte *old, int refeed) +rt_notify_basic(struct channel *c, const net_addr *net, rte *new, rte *old) { - if (new) - c->stats.exp_updates_received++; - else - c->stats.exp_withdraws_received++; - if (new) new = export_filter(c, new, 0); @@ -505,176 +867,250 @@ rt_notify_basic(struct channel *c, const net_addr *net, rte *new, rte *old, int if (!new && !old) return; - do_rt_notify(c, net, new, old, refeed); + do_rt_notify(c, net, new, old); } -static void -rt_notify_accepted(struct channel *c, net *net, rte *new_changed, rte *old_changed, int refeed) +void +rt_notify_accepted(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe, + struct rte **feed, uint count) { - // struct proto *p = c->proto; - rte nb0; - rte *new_best = NULL; - rte *old_best = NULL; - int new_first = 0; + struct channel *c = SKIP_BACK(struct channel, out_req, req); - /* - * We assume that there are no changes in net route order except (added) - * new_changed and (removed) old_changed. Therefore, the function is not - * compatible with deterministic_med (where nontrivial reordering can happen - * as a result of a route change) and with recomputation of recursive routes - * due to next hop update (where many routes can be changed in one step). - * - * Note that we need this assumption just for optimizations, we could just - * run full new_best recomputation otherwise. - * - * There are three cases: - * feed or old_best is old_changed -> we need to recompute new_best - * old_best is before new_changed -> new_best is old_best, ignore - * old_best is after new_changed -> try new_changed, otherwise old_best - */ + rte nb0, *new_best = NULL; + const rte *old_best = NULL; - if (net->routes) - c->stats.exp_updates_received++; - else - c->stats.exp_withdraws_received++; - - /* Find old_best - either old_changed, or route for net->routes */ - if (old_changed && bmap_test(&c->export_map, old_changed->id)) - old_best = old_changed; - else + for (uint i = 0; i < count; i++) { - for (struct rte_storage *r = net->routes; r && rte_is_valid(&r->rte); r = r->next) + if (!rte_is_valid(feed[i])) + continue; + + /* Has been already rejected, won't bother with it */ + if (!c->refeeding && bmap_test(&c->export_reject_map, feed[i]->id)) + continue; + + /* Previously exported */ + if (!old_best && bmap_test(&c->export_map, feed[i]->id)) { - if (bmap_test(&c->export_map, r->rte.id)) + /* is still best */ + if (!new_best) { - old_best = &r->rte; - break; + DBG("rt_notify_accepted: idempotent\n"); + goto done; } - /* Note if new_changed found before old_best */ - if (&r->rte == new_changed) - new_first = 1; + /* is superseded */ + old_best = feed[i]; + break; + } + + /* Have no new best route yet */ + if (!new_best) + { + /* Try this route not seen before */ + nb0 = *feed[i]; + new_best = export_filter(c, &nb0, 0); + DBG("rt_notify_accepted: checking route id %u: %s\n", feed[i]->id, new_best ? "ok" : "no"); } } - /* Find new_best */ - if ((new_changed == old_changed) || (old_best == old_changed)) - { - /* Feed or old_best changed -> find first accepted by filters */ - for (struct rte_storage *r = net->routes; r && rte_is_valid(&r->rte); r = r->next) - if (new_best = export_filter(c, ((nb0 = r->rte), &nb0), 0)) + /* Check obsolete routes for previously exported */ + if (!old_best) + if (rpe && rpe->old && bmap_test(&c->export_map, rpe->old->rte.id)) + old_best = &rpe->old->rte; + +/* for (; rpe; rpe = atomic_load_explicit(&rpe->next, memory_order_relaxed)) + { + if (rpe->old && bmap_test(&hook->accept_map, rpe->old->id)) + { + old_best = &rpe->old.rte; break; - } - else - { - /* Other cases -> either new_changed, or old_best (and nothing changed) */ - if (new_first && (new_changed = export_filter(c, new_changed, 0))) - new_best = new_changed; - else - return; - } + } + if (rpe == rpe_last) + break; + } + */ + + /* Nothing to export */ if (!new_best && !old_best) - return; + { + DBG("rt_notify_accepted: nothing to export\n"); + goto done; + } - do_rt_notify(c, net->n.addr, new_best, old_best, refeed); -} + do_rt_notify(c, n, new_best, old_best); - -static struct nexthop * -nexthop_merge_rta(struct nexthop *nhs, rta *a, linpool *pool, int max) -{ - return nexthop_merge(nhs, &(a->nh), 1, 0, max, pool); +done: + /* Drop the old stored rejection if applicable. + * new->id == old->id happens when updating hostentries. */ + if (rpe && rpe->old && (!rpe->new || (rpe->new->rte.id != rpe->old->rte.id))) + bmap_clear(&c->export_reject_map, rpe->old->rte.id); } rte * -rt_export_merged(struct channel *c, net *net, linpool *pool, int silent) +rt_export_merged(struct channel *c, struct rte **feed, uint count, linpool *pool, int silent) { - // struct proto *p = c->proto; - struct nexthop *nhs = NULL; - _Thread_local static rte rme; - struct rte_storage *best0 = net->routes; - rte *best; + _Thread_local static rte rloc; - if (!best0 || !rte_is_valid(&best0->rte)) + // struct proto *p = c->proto; + struct nexthop_adata *nhs = NULL; + rte *best0 = feed[0]; + rte *best = NULL; + + if (!rte_is_valid(best0)) return NULL; - best = export_filter_(c, ((rme = best0->rte), &rme), pool, silent); + /* Already rejected, no need to re-run the filter */ + if (!c->refeeding && bmap_test(&c->export_reject_map, best0->id)) + return NULL; - if (!best || !rte_is_reachable(best)) + rloc = *best0; + best = export_filter(c, &rloc, silent); + + if (!best) + /* Best route doesn't pass the filter */ + return NULL; + + if (!rte_is_reachable(best)) + /* Unreachable routes can't be merged */ return best; - for (struct rte_storage *rt0 = best0->next; rt0; rt0 = rt0->next) + for (uint i = 1; i < count; i++) { - if (!rte_mergable(best, &rt0->rte)) + if (!rte_mergable(best0, feed[i])) continue; - rte rnh = rt0->rte; - rte *rt = export_filter_(c, &rnh, pool, 1); + rte tmp0 = *feed[i]; + rte *tmp = export_filter(c, &tmp0, 1); - if (!rt) + if (!tmp || !rte_is_reachable(tmp)) continue; - if (rte_is_reachable(rt)) - nhs = nexthop_merge_rta(nhs, rt->attrs, pool, c->merge_limit); + eattr *nhea = ea_find(tmp->attrs->eattrs, &ea_gen_nexthop); + ASSERT_DIE(nhea); + + if (nhs) + nhs = nexthop_merge(nhs, (struct nexthop_adata *) nhea->u.ptr, c->merge_limit, pool); + else + nhs = (struct nexthop_adata *) nhea->u.ptr; } if (nhs) { - nhs = nexthop_merge_rta(nhs, best->attrs, pool, c->merge_limit); + eattr *nhea = ea_find(best->attrs->eattrs, &ea_gen_nexthop); + ASSERT_DIE(nhea); - if (nhs->next) - { - best->attrs = rta_cow(best->attrs, pool); - nexthop_link(best->attrs, nhs); - } + nhs = nexthop_merge(nhs, (struct nexthop_adata *) nhea->u.ptr, c->merge_limit, pool); + + best->attrs = rta_cow(best->attrs, pool); + ea_set_attr(&best->attrs->eattrs, + EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, &nhs->ad)); } return best; } - -static void -rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed, - rte *new_best, rte *old_best, int refeed) +void +rt_notify_merged(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe, + struct rte **feed, uint count) { - /* We assume that all rte arguments are either NULL or rte_is_valid() */ + struct channel *c = SKIP_BACK(struct channel, out_req, req); - /* This check should be done by the caller */ - if (!new_best && !old_best) - return; + // struct proto *p = c->proto; +#if 0 /* TODO: Find whether this check is possible when processing multiple changes at once. */ /* Check whether the change is relevant to the merged route */ if ((new_best == old_best) && (new_changed != old_changed) && !rte_mergable(new_best, new_changed) && !rte_mergable(old_best, old_changed)) return; +#endif - if (new_best) - c->stats.exp_updates_received++; - else - c->stats.exp_withdraws_received++; + rte *old_best = NULL; + /* Find old best route */ + for (uint i = 0; i < count; i++) + if (bmap_test(&c->export_map, feed[i]->id)) + { + old_best = feed[i]; + break; + } + + /* Check obsolete routes for previously exported */ + if (!old_best) + if (rpe && rpe->old && bmap_test(&c->export_map, rpe->old->rte.id)) + old_best = &rpe->old->rte; + +/* for (; rpe; rpe = atomic_load_explicit(&rpe->next, memory_order_relaxed)) + { + if (rpe->old && bmap_test(&hook->accept_map, rpe->old->id)) + { + old_best = &rpe->old.rte; + break; + } + + if (rpe == rpe_last) + break; + } + */ /* Prepare new merged route */ - if (new_best) - new_best = rt_export_merged(c, net, rte_update_pool, 0); + rte *new_merged = count ? rt_export_merged(c, feed, count, rte_update_pool, 0) : NULL; - /* Check old merged route */ - if (old_best && !bmap_test(&c->export_map, old_best->id)) - old_best = NULL; + if (new_merged || old_best) + do_rt_notify(c, n, new_merged, old_best); - if (!new_best && !old_best) - return; - - do_rt_notify(c, net->n.addr, new_best, old_best, refeed); + /* Drop the old stored rejection if applicable. + * new->id == old->id happens when updating hostentries. */ + if (rpe && rpe->old && (!rpe->new || (rpe->new->rte.id != rpe->old->rte.id))) + bmap_clear(&c->export_reject_map, rpe->old->rte.id); } +void +rt_notify_optimal(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); + rte n0; + + if (rpe->new_best != rpe->old_best) + rt_notify_basic(c, net, RTE_COPY(rpe->new_best, &n0), RTE_OR_NULL(rpe->old_best)); + + /* Drop the old stored rejection if applicable. + * new->id == old->id happens when updating hostentries. */ + if (rpe->old && (!rpe->new || (rpe->new->rte.id != rpe->old->rte.id))) + bmap_clear(&c->export_reject_map, rpe->old->rte.id); +} + +void +rt_notify_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); + rte n0; + + if (rpe->new != rpe->old) + rt_notify_basic(c, net, RTE_COPY(rpe->new, &n0), RTE_OR_NULL(rpe->old)); + + /* Drop the old stored rejection if applicable. + * new->id == old->id happens when updating hostentries. */ + if (rpe->old && (!rpe->new || (rpe->new->rte.id != rpe->old->rte.id))) + bmap_clear(&c->export_reject_map, rpe->old->rte.id); +} + +void +rt_feed_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe UNUSED, rte **feed, uint count) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); + + for (uint i=0; i<count; i++) + { + rte n0 = *feed[i]; + rt_notify_basic(c, net, &n0, NULL); + } +} /** * rte_announce - announce a routing table change * @tab: table the route has been added to - * @type: type of route announcement (RA_UNDEF or RA_ANY) * @net: network in question * @new: the new or changed route * @old: the previous route replaced by the new one @@ -690,13 +1126,6 @@ rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed * and @new_best and @old_best describes best routes. Other routes are not * affected, but in sorted table the order of other routes might change. * - * Second, There is a bulk change of multiple routes in @net, with shared best - * route selection. In such case separate route changes are described using - * @type of %RA_ANY, with @new and @old specifying the changed route, while - * @new_best and @old_best are NULL. After that, another notification is done - * where @new_best and @old_best are filled (may be the same), but @new and @old - * are NULL. - * * The function announces the change to all associated channels. For each * channel, an appropriate preprocessing is done according to channel &ra_mode. * For example, %RA_OPTIMAL channels receive just changes of best routes. @@ -711,19 +1140,19 @@ rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed * done outside of scope of rte_announce(). */ static void -rte_announce(rtable *tab, uint type, net *net, struct rte_storage *new, struct rte_storage *old, +rte_announce(rtable *tab, net *net, struct rte_storage *new, struct rte_storage *old, struct rte_storage *new_best, struct rte_storage *old_best) { - if (!new || !rte_is_valid(&new->rte)) + if (!rte_is_valid(RTE_OR_NULL(new))) new = NULL; - if (!old || !rte_is_valid(&old->rte)) + if (!rte_is_valid(RTE_OR_NULL(old))) old = NULL; - if (!new_best || !rte_is_valid(&new_best->rte)) + if (!rte_is_valid(RTE_OR_NULL(new_best))) new_best = NULL; - if (!old_best || !rte_is_valid(&old_best->rte)) + if (!rte_is_valid(RTE_OR_NULL(old_best))) old_best = NULL; if (!new && !old && !new_best && !old_best) @@ -732,51 +1161,50 @@ rte_announce(rtable *tab, uint type, net *net, struct rte_storage *new, struct r if (new_best != old_best) { if (new_best) - new_best->rte.sender->stats.pref_routes++; + new_best->rte.sender->stats.pref++; if (old_best) - old_best->rte.sender->stats.pref_routes--; + old_best->rte.sender->stats.pref--; if (tab->hostcache) rt_notify_hostcache(tab, net); + + if (!EMPTY_LIST(tab->flowspec_links)) + rt_flowspec_notify(tab, net); } rt_schedule_notify(tab); - struct channel *c; node *n; - WALK_LIST2(c, n, tab->channels, table_node) + struct rt_pending_export rpe = { .new = new, .old = old, .new_best = new_best, .old_best = old_best }; + uint count = rte_feed_count(net); + rte **feed = NULL; + if (count) { - if (c->export_state == ES_DOWN) + feed = alloca(count * sizeof(rte *)); + rte_feed_obtain(net, feed, count); + } + + struct rt_export_hook *eh; + WALK_LIST(eh, tab->exports) + { + if (eh->export_state == TES_STOP) continue; - if (type && (type != c->ra_mode)) - continue; + if (new) + eh->stats.updates_received++; + else + eh->stats.withdraws_received++; - rte n0; - switch (c->ra_mode) - { - case RA_OPTIMAL: - if (new_best != old_best) - rt_notify_basic(c, net->n.addr, RTE_COPY(new_best, &n0), RTE_OR_NULL(old_best), 0); - break; - - case RA_ANY: - if (new != old) - rt_notify_basic(c, net->n.addr, RTE_COPY(new, &n0), RTE_OR_NULL(old), 0); - break; - - case RA_ACCEPTED: - rt_notify_accepted(c, net, RTE_OR_NULL(new), RTE_OR_NULL(old), 0); - break; - - case RA_MERGED: - rt_notify_merged(c, net, RTE_OR_NULL(new), RTE_OR_NULL(old), RTE_OR_NULL(new_best), RTE_OR_NULL(old_best), 0); - break; - } + if (eh->req->export_one) + eh->req->export_one(eh->req, net->n.addr, &rpe); + else if (eh->req->export_bulk) + eh->req->export_bulk(eh->req, net->n.addr, &rpe, feed, count); + else + bug("Export request must always provide an export method"); } } static inline int -rte_validate(rte *e) +rte_validate(struct channel *ch, rte *e) { int c; const net_addr *n = e->net; @@ -784,7 +1212,7 @@ rte_validate(rte *e) if (!net_validate(n)) { log(L_WARN "Ignoring bogus prefix %N received via %s", - n, e->sender->proto->name); + n, ch->proto->name); return 0; } @@ -794,21 +1222,34 @@ rte_validate(rte *e) if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK)) { log(L_WARN "Ignoring bogus route %N received via %s", - n, e->sender->proto->name); + n, ch->proto->name); return 0; } - if (net_type_match(n, NB_DEST) == !e->attrs->dest) + if (net_type_match(n, NB_DEST)) { - log(L_WARN "Ignoring route %N with invalid dest %d received via %s", - n, e->attrs->dest, e->sender->proto->name); - return 0; - } + eattr *nhea = ea_find(e->attrs->eattrs, &ea_gen_nexthop); + int dest = nhea_dest(nhea); - if ((e->attrs->dest == RTD_UNICAST) && !nexthop_is_sorted(&(e->attrs->nh))) + if (dest == RTD_NONE) + { + log(L_WARN "Ignoring route %N with no destination received via %s", + n, ch->proto->name); + return 0; + } + + if ((dest == RTD_UNICAST) && + !nexthop_is_sorted((struct nexthop_adata *) nhea->u.ptr)) + { + log(L_WARN "Ignoring unsorted multipath route %N received via %s", + n, ch->proto->name); + return 0; + } + } + else if (ea_find(e->attrs->eattrs, &ea_gen_nexthop)) { - log(L_WARN "Ignoring unsorted multipath route %N received via %s", - n, e->sender->proto->name); + log(L_WARN "Ignoring route %N having a nexthop attribute received via %s", + n, ch->proto->name); return 0; } @@ -829,12 +1270,11 @@ rte_same(rte *x, rte *y) static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); } static void -rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) +rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *src) { - struct proto *p = c->proto; + struct rt_import_request *req = c->req; struct rtable *table = c->table; - struct proto_stats *stats = &c->stats; - static struct tbf rl_pipe = TBF_DEFAULT_LOG_LIMITS; + struct rt_import_stats *stats = &c->stats; struct rte_storage *old_best_stored = net->routes, *old_stored = NULL; rte *old_best = old_best_stored ? &old_best_stored->rte : NULL; rte *old = NULL; @@ -846,22 +1286,20 @@ rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) { old = &(old_stored = (*before_old))->rte; - /* If there is the same route in the routing table but from - * a different sender, then there are two paths from the - * source protocol to this routing table through transparent - * pipes, which is not allowed. - * - * We log that and ignore the route. If it is withdraw, we - * ignore it completely (there might be 'spurious withdraws', - * see FIXME in do_rte_announce()) - */ - if (old->sender->proto != p) - { - if (new) - log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %N to table %s", - net->n.addr, table->name); - return; - } + /* If there is the same route in the routing table but from + * a different sender, then there are two paths from the + * source protocol to this routing table through transparent + * pipes, which is not allowed. + * We log that and ignore the route. */ + if (old->sender != c) + { + if (!old->generation && !new->generation) + bug("Two protocols claim to author a route with the same rte_src in table %s: %N %s/%u:%u", + c->table->name, net->n.addr, old->src->proto->name, old->src->private_id, old->src->global_id); + + log_rl(&table->rl_pipe, L_ERR "Route source collision in table %s: %N %s/%u:%u", + c->table->name, net->n.addr, old->src->proto->name, old->src->private_id, old->src->global_id); + } if (new && rte_same(old, new)) { @@ -871,97 +1309,39 @@ rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) if (!rte_is_filtered(new)) { - stats->imp_updates_ignored++; - rte_trace_in(D_ROUTES, c, new, "ignored"); + stats->updates_ignored++; + rt_rte_trace_in(D_ROUTES, req, new, "ignored"); } + } - return; - } - - *before_old = (*before_old)->next; - table->rt_count--; + *before_old = (*before_old)->next; + table->rt_count--; } if (!old && !new) { - stats->imp_withdraws_ignored++; + stats->withdraws_ignored++; return; } + if (req->preimport) + new = req->preimport(req, new, old); + int new_ok = rte_is_ok(new); int old_ok = rte_is_ok(old); - struct channel_limit *l = &c->rx_limit; - if (l->action && !old && new && !c->in_table) - { - u32 all_routes = stats->imp_routes + stats->filt_routes; - - if (all_routes >= l->limit) - channel_notify_limit(c, l, PLD_RX, all_routes); - - if (l->state == PLS_BLOCKED) - { - /* In receive limit the situation is simple, old is NULL so - we just free new and exit like nothing happened */ - - stats->imp_updates_ignored++; - rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); - return; - } - } - - l = &c->in_limit; - if (l->action && !old_ok && new_ok) - { - if (stats->imp_routes >= l->limit) - channel_notify_limit(c, l, PLD_IN, stats->imp_routes); - - if (l->state == PLS_BLOCKED) - { - /* In import limit the situation is more complicated. We - shouldn't just drop the route, we should handle it like - it was filtered. We also have to continue the route - processing if old or new is non-NULL, but we should exit - if both are NULL as this case is probably assumed to be - already handled. */ - - stats->imp_updates_ignored++; - rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); - - if (c->in_keep_filtered) - new->flags |= REF_FILTERED; - else - new = NULL; - - /* Note that old && !new could be possible when - c->in_keep_filtered changed in the recent past. */ - - if (!old && !new) - return; - - new_ok = 0; - goto skip_stats1; - } - } - if (new_ok) - stats->imp_updates_accepted++; + stats->updates_accepted++; else if (old_ok) - stats->imp_withdraws_accepted++; + stats->withdraws_accepted++; else - stats->imp_withdraws_ignored++; + stats->withdraws_ignored++; if (old_ok || new_ok) table->last_rt_change = current_time(); - skip_stats1:; struct rte_storage *new_stored = new ? rte_store(new, net, table) : NULL; - if (new) - rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++; - if (old) - rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--; - if (table->config->sorted) { /* If routes are sorted, just insert new route to appropriate position */ @@ -1064,23 +1444,20 @@ rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) } /* Log the route change */ - if ((c->debug & D_ROUTES) || (p->debug & D_ROUTES)) + if (new_ok) + rt_rte_trace_in(D_ROUTES, req, &new_stored->rte, new_stored == net->routes ? "added [best]" : "added"); + else if (old_ok) { - if (new_ok) - rte_trace(c, &new_stored->rte, '>', new_stored == net->routes ? "added [best]" : "added"); - else if (old_ok) - { - if (old != old_best) - rte_trace(c, old, '>', "removed"); - else if (net->routes && rte_is_ok(&net->routes->rte)) - rte_trace(c, old, '>', "removed [replaced]"); - else - rte_trace(c, old, '>', "removed [sole]"); - } + if (old != old_best) + rt_rte_trace_in(D_ROUTES, req, old, "removed"); + else if (net->routes && rte_is_ok(&net->routes->rte)) + rt_rte_trace_in(D_ROUTES, req, old, "removed [replaced]"); + else + rt_rte_trace_in(D_ROUTES, req, old, "removed [sole]"); } /* Propagate the route change */ - rte_announce(table, RA_UNDEF, net, new_stored, old_stored, + rte_announce(table, net, new_stored, old_stored, net->routes, old_best_stored); if (!net->routes && @@ -1088,17 +1465,20 @@ rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) (table->gc_time + table->config->gc_min_time <= current_time())) rt_schedule_prune(table); +#if 0 + /* Enable and reimplement these callbacks if anybody wants to use them */ if (old_ok && p->rte_remove) p->rte_remove(net, old); if (new_ok && p->rte_insert) p->rte_insert(net, &new_stored->rte); +#endif if (old) { if (!new_stored) hmap_clear(&table->id_map, old->id); - rte_free(old_stored, table); + rte_free(old_stored); } } @@ -1117,100 +1497,134 @@ rte_update_unlock(void) lp_flush(rte_update_pool); } -static int rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src); +rte * +channel_preimport(struct rt_import_request *req, rte *new, rte *old) +{ + struct channel *c = SKIP_BACK(struct channel, in_req, req); + + if (new && !old) + if (CHANNEL_LIMIT_PUSH(c, RX)) + return NULL; + + if (!new && old) + CHANNEL_LIMIT_POP(c, RX); + + int new_in = new && !rte_is_filtered(new); + int old_in = old && !rte_is_filtered(old); + + if (new_in && !old_in) + if (CHANNEL_LIMIT_PUSH(c, IN)) + if (c->in_keep_filtered) + { + new->flags |= REF_FILTERED; + return new; + } + else + return NULL; + + if (!new_in && old_in) + CHANNEL_LIMIT_POP(c, IN); + + return new; +} + +static void rte_update_direct(struct channel *c, const net_addr *n, rte *new, struct rte_src *src); void rte_update(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) { + if (!c->in_req.hook) + return; + + ASSERT(c->channel_state == CS_UP); + if (c->in_table && !rte_update_in(c, n, new, src)) return; - // struct proto *p = c->proto; - struct proto_stats *stats = &c->stats; - const struct filter *filter = c->in_filter; - net *nn; + return rte_update_direct(c, n, new, src); +} - ASSERT(c->channel_state == CS_UP); +static void +rte_update_direct(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) +{ + const struct filter *filter = c->in_filter; + struct channel_import_stats *stats = &c->import_stats; rte_update_lock(); if (new) { new->net = n; - new->sender = c; - stats->imp_updates_received++; - if (!rte_validate(new)) + int fr; + + stats->updates_received++; + if ((filter == FILTER_REJECT) || + ((fr = f_run(filter, new, 0)) > F_ACCEPT)) { - rte_trace_in(D_FILTERS, c, new, "invalid"); - stats->imp_updates_invalid++; - goto drop; - } - - if (filter == FILTER_REJECT) - { - stats->imp_updates_filtered++; - rte_trace_in(D_FILTERS, c, new, "filtered out"); - - if (! c->in_keep_filtered) - goto drop; - - /* new is a private copy, i could modify it */ - new->flags |= REF_FILTERED; - } - else if (filter) - { - int fr = f_run(filter, new, rte_update_pool, 0); - if (fr > F_ACCEPT) - { - stats->imp_updates_filtered++; - rte_trace_in(D_FILTERS, c, new, "filtered out"); - - if (! c->in_keep_filtered) - goto drop; + stats->updates_filtered++; + channel_rte_trace_in(D_FILTERS, c, new, "filtered out"); + if (c->in_keep_filtered) new->flags |= REF_FILTERED; - } + else + new = NULL; + } + + if (new) + if (net_is_flow(n)) + rt_flowspec_resolve_rte(new, c); + else + rt_next_hop_resolve_rte(new); + + if (new && !rte_validate(c, new)) + { + channel_rte_trace_in(D_FILTERS, c, new, "invalid"); + stats->updates_invalid++; + new = NULL; } - /* Use the actual struct network, not the dummy one */ - nn = net_get(c->table, n); - new->net = nn->n.addr; } else - { - stats->imp_withdraws_received++; + stats->withdraws_received++; - if (!(nn = net_find(c->table, n)) || !src) - { - stats->imp_withdraws_ignored++; - rte_update_unlock(); - return; - } + rte_import(&c->in_req, n, new, src); + + rte_update_unlock(); +} + +void +rte_import(struct rt_import_request *req, const net_addr *n, rte *new, struct rte_src *src) +{ + struct rt_import_hook *hook = req->hook; + if (!hook) + return; + + net *nn; + if (new) + { + /* Use the actual struct network, not the dummy one */ + nn = net_get(hook->table, n); + new->net = nn->n.addr; + new->sender = hook; + } + else if (!(nn = net_find(hook->table, n))) + { + req->hook->stats.withdraws_ignored++; + return; } - recalc: /* And recalculate the best route */ - rte_recalculate(c, nn, new, src); - - rte_update_unlock(); - return; - - drop: - new = NULL; - if (nn = net_find(c->table, n)) - goto recalc; - - rte_update_unlock(); + rte_recalculate(hook, nn, new, src); } /* Independent call to rte_announce(), used from next hop recalculation, outside of rte_update(). new must be non-NULL */ static inline void -rte_announce_i(rtable *tab, uint type, net *net, struct rte_storage *new, struct rte_storage *old, +rte_announce_i(rtable *tab, net *net, struct rte_storage *new, struct rte_storage *old, struct rte_storage *new_best, struct rte_storage *old_best) { rte_update_lock(); - rte_announce(tab, type, net, new, old, new_best, old_best); + rte_announce(tab, net, new, old, new_best, old_best); rte_update_unlock(); } @@ -1228,7 +1642,7 @@ rte_modify(net *net, rte *old) { rte_update_lock(); - rte *new = old->sender->proto->rte_modify(old, rte_update_pool); + rte *new = old->sender->req->rte_modify(old, rte_update_pool); if (new != old) { if (new) @@ -1246,26 +1660,144 @@ rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filte { net *n = net_find(t, a); - if (!n || !n->routes) + if (!n || !rte_is_valid(RTE_OR_NULL(n->routes))) return 0; rte rt = n->routes->rte; - if (!rte_is_valid(&rt)) - return 0; - rte_update_lock(); /* Rest is stripped down export_filter() */ int v = c->proto->preexport ? c->proto->preexport(c, &rt) : 0; if (v == RIC_PROCESS) - v = (f_run(filter, &rt, rte_update_pool, FF_SILENT) <= F_ACCEPT); + v = (f_run(filter, &rt, FF_SILENT) <= F_ACCEPT); rte_update_unlock(); return v > 0; } +static void +rt_export_stopped(void *data) +{ + struct rt_export_hook *hook = data; + rtable *tab = hook->table; + + /* Unlist */ + rem_node(&hook->n); + + /* Reporting the channel as stopped. */ + hook->stopped(hook->req); + + /* Freeing the hook together with its coroutine. */ + rfree(hook->pool); + rt_unlock_table(tab); + + DBG("Export hook %p in table %s finished uc=%u\n", hook, tab->name, tab->use_count); +} + + +static inline void +rt_set_import_state(struct rt_import_hook *hook, u8 state) +{ + hook->last_state_change = current_time(); + hook->import_state = state; + + if (hook->req->log_state_change) + hook->req->log_state_change(hook->req, state); +} + +static inline void +rt_set_export_state(struct rt_export_hook *hook, u8 state) +{ + hook->last_state_change = current_time(); + hook->export_state = state; + + if (hook->req->log_state_change) + hook->req->log_state_change(hook->req, state); +} + +void +rt_request_import(rtable *tab, struct rt_import_request *req) +{ + rt_lock_table(tab); + + struct rt_import_hook *hook = req->hook = mb_allocz(tab->rp, sizeof(struct rt_import_hook)); + + DBG("Lock table %s for import %p req=%p uc=%u\n", tab->name, hook, req, tab->use_count); + + hook->req = req; + hook->table = tab; + + rt_set_import_state(hook, TIS_UP); + + hook->n = (node) {}; + add_tail(&tab->imports, &hook->n); +} + +void +rt_stop_import(struct rt_import_request *req, void (*stopped)(struct rt_import_request *)) +{ + ASSERT_DIE(req->hook); + struct rt_import_hook *hook = req->hook; + + rt_schedule_prune(hook->table); + + rt_set_import_state(hook, TIS_STOP); + + hook->stopped = stopped; +} + +void +rt_request_export(rtable *tab, struct rt_export_request *req) +{ + rt_lock_table(tab); + + pool *p = rp_new(tab->rp, "Export hook"); + struct rt_export_hook *hook = req->hook = mb_allocz(p, sizeof(struct rt_export_hook)); + hook->pool = p; + hook->lp = lp_new_default(p); + + hook->req = req; + hook->table = tab; + + /* stats zeroed by mb_allocz */ + + rt_set_export_state(hook, TES_HUNGRY); + + hook->n = (node) {}; + add_tail(&tab->exports, &hook->n); + + FIB_ITERATE_INIT(&hook->feed_fit, &tab->fib); + + DBG("New export hook %p req %p in table %s uc=%u\n", hook, req, tab->name, tab->use_count); + + rt_set_export_state(hook, TES_FEEDING); + + hook->event = ev_new_init(p, rt_feed_channel, hook); + ev_schedule_work(hook->event); +} + +void +rt_stop_export(struct rt_export_request *req, void (*stopped)(struct rt_export_request *)) +{ + ASSERT_DIE(req->hook); + struct rt_export_hook *hook = req->hook; + + rtable *tab = hook->table; + + /* Stop feeding */ + ev_postpone(hook->event); + + if (hook->export_state == TES_FEEDING) + fit_get(&tab->fib, &hook->feed_fit); + + hook->event->hook = rt_export_stopped; + hook->stopped = stopped; + + rt_set_export_state(hook, TES_STOP); + ev_schedule(hook->event); +} /** * rt_refresh_begin - start a refresh cycle @@ -1282,12 +1814,12 @@ rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filte * flag in rt_refresh_end() and then removing such routes in the prune loop. */ void -rt_refresh_begin(rtable *t, struct channel *c) +rt_refresh_begin(rtable *t, struct rt_import_request *req) { FIB_WALK(&t->fib, net, n) { for (struct rte_storage *e = n->routes; e; e = e->next) - if (e->rte.sender == c) + if (e->rte.sender == req->hook) e->rte.flags |= REF_STALE; } FIB_WALK_END; @@ -1302,14 +1834,14 @@ rt_refresh_begin(rtable *t, struct channel *c) * hook. See rt_refresh_begin() for description of refresh cycles. */ void -rt_refresh_end(rtable *t, struct channel *c) +rt_refresh_end(rtable *t, struct rt_import_request *req) { int prune = 0; FIB_WALK(&t->fib, net, n) { for (struct rte_storage *e = n->routes; e; e = e->next) - if ((e->rte.sender == c) && (e->rte.flags & REF_STALE)) + if ((e->rte.sender == req->hook) && (e->rte.flags & REF_STALE)) { e->rte.flags |= REF_DISCARD; prune = 1; @@ -1322,14 +1854,14 @@ rt_refresh_end(rtable *t, struct channel *c) } void -rt_modify_stale(rtable *t, struct channel *c) +rt_modify_stale(rtable *t, struct rt_import_request *req) { int prune = 0; FIB_WALK(&t->fib, net, n) { for (struct rte_storage *e = n->routes; e; e = e->next) - if ((e->rte.sender == c) && (e->rte.flags & REF_STALE) && !(e->rte.flags & REF_FILTERED)) + if ((e->rte.sender == req->hook) && (e->rte.flags & REF_STALE) && !(e->rte.flags & REF_FILTERED)) { e->rte.flags |= REF_MODIFY; prune = 1; @@ -1365,7 +1897,7 @@ rte_dump(struct rte_storage *e) void rt_dump(rtable *t) { - debug("Dump of routing table <%s>\n", t->name); + debug("Dump of routing table <%s>%s\n", t->name, t->deleted ? " (deleted)" : ""); #ifdef DEBUGGING fib_check(&t->fib); #endif @@ -1391,6 +1923,54 @@ rt_dump_all(void) WALK_LIST2(t, n, routing_tables, n) rt_dump(t); + + WALK_LIST2(t, n, deleted_routing_tables, n) + rt_dump(t); +} + +void +rt_dump_hooks(rtable *tab) +{ + debug("Dump of hooks in routing table <%s>%s\n", tab->name, tab->deleted ? " (deleted)" : ""); + debug(" nhu_state=%u hcu_scheduled=%u use_count=%d rt_count=%u\n", + tab->nhu_state, tab->hcu_scheduled, tab->use_count, tab->rt_count); + debug(" last_rt_change=%t gc_time=%t gc_counter=%d prune_state=%u\n", + tab->last_rt_change, tab->gc_time, tab->gc_counter, tab->prune_state); + + struct rt_import_hook *ih; + WALK_LIST(ih, tab->imports) + { + ih->req->dump_req(ih->req); + debug(" Import hook %p requested by %p: pref=%u" + " last_state_change=%t import_state=%u stopped=%p\n", + ih, ih->req, ih->stats.pref, + ih->last_state_change, ih->import_state, ih->stopped); + } + + struct rt_export_hook *eh; + WALK_LIST(eh, tab->exports) + { + eh->req->dump_req(eh->req); + debug(" Export hook %p requested by %p:" + " refeed_pending=%u last_state_change=%t export_state=%u stopped=%p\n", + eh, eh->req, eh->refeed_pending, eh->last_state_change, eh->export_state, eh->stopped); + } + debug("\n"); +} + +void +rt_dump_hooks_all(void) +{ + rtable *t; + node *n; + + debug("Dump of all table hooks\n"); + + WALK_LIST2(t, n, routing_tables, n) + rt_dump_hooks(t); + + WALK_LIST2(t, n, deleted_routing_tables, n) + rt_dump_hooks(t); } static inline void @@ -1508,6 +2088,7 @@ rt_subscribe(rtable *tab, struct rt_subscription *s) { s->tab = tab; rt_lock_table(tab); + DBG("rt_subscribe(%s)\n", tab->name); add_tail(&tab->subscribers, &s->n); } @@ -1518,6 +2099,90 @@ rt_unsubscribe(struct rt_subscription *s) rt_unlock_table(s->tab); } +static struct rt_flowspec_link * +rt_flowspec_find_link(rtable *src, rtable *dst) +{ + struct rt_flowspec_link *ln; + WALK_LIST(ln, src->flowspec_links) + if ((ln->src == src) && (ln->dst == dst)) + return ln; + + return NULL; +} + +void +rt_flowspec_link(rtable *src, rtable *dst) +{ + ASSERT(rt_is_ip(src)); + ASSERT(rt_is_flow(dst)); + + struct rt_flowspec_link *ln = rt_flowspec_find_link(src, dst); + + if (!ln) + { + rt_lock_table(src); + rt_lock_table(dst); + + ln = mb_allocz(src->rp, sizeof(struct rt_flowspec_link)); + ln->src = src; + ln->dst = dst; + add_tail(&src->flowspec_links, &ln->n); + } + + ln->uc++; +} + +void +rt_flowspec_unlink(rtable *src, rtable *dst) +{ + struct rt_flowspec_link *ln = rt_flowspec_find_link(src, dst); + + ASSERT(ln && (ln->uc > 0)); + + ln->uc--; + + if (!ln->uc) + { + rem_node(&ln->n); + mb_free(ln); + + rt_unlock_table(src); + rt_unlock_table(dst); + } +} + +static void +rt_flowspec_notify(rtable *src, net *net) +{ + /* Only IP tables are src links */ + ASSERT(rt_is_ip(src)); + + struct rt_flowspec_link *ln; + WALK_LIST(ln, src->flowspec_links) + { + rtable *dst = ln->dst; + ASSERT(rt_is_flow(dst)); + + /* No need to inspect it further if recalculation is already active */ + if ((dst->nhu_state == NHU_SCHEDULED) || (dst->nhu_state == NHU_DIRTY)) + continue; + + if (trie_match_net(dst->flowspec_trie, net->n.addr)) + rt_schedule_nhu(dst); + } +} + +static void +rt_flowspec_reset_trie(rtable *tab) +{ + linpool *lp = tab->flowspec_trie->lp; + int ipv4 = tab->flowspec_trie->ipv4; + + lp_flush(lp); + tab->flowspec_trie = f_new_trie(lp, 0); + tab->flowspec_trie->ipv4 = ipv4; +} + static void rt_free(resource *_r) { @@ -1564,12 +2229,7 @@ static struct resclass rt_class = { rtable * rt_setup(pool *pp, struct rtable_config *cf) { - int ns = strlen("Routing table ") + strlen(cf->name) + 1; - void *nb = mb_alloc(pp, ns); - ASSERT_DIE(ns - 1 == bsnprintf(nb, ns, "Routing table %s", cf->name)); - - pool *p = rp_new(pp, nb); - mb_move(nb, p); + pool *p = rp_newf(pp, "Routing table %s", cf->name); rtable *t = ralloc(p, &rt_class); t->rp = p; @@ -1582,9 +2242,20 @@ rt_setup(pool *pp, struct rtable_config *cf) fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL); + if (cf->trie_used) + { + t->trie = f_new_trie(lp_new_default(p), 0); + t->trie->ipv4 = net_val_match(t->addr_type, NB_IP4 | NB_VPN4 | NB_ROA4); + + t->fib.init = net_init_with_trie; + } + + init_list(&t->flowspec_links); + if (!(t->internal = cf->internal)) { - init_list(&t->channels); + init_list(&t->imports); + init_list(&t->exports); hmap_init(&t->id_map, p, 1024); hmap_set(&t->id_map, 0); @@ -1592,6 +2263,14 @@ rt_setup(pool *pp, struct rtable_config *cf) t->rt_event = ev_new_init(p, rt_event, t); t->last_rt_change = t->gc_time = current_time(); + + t->rl_pipe = (struct tbf) TBF_DEFAULT_LOG_LIMITS; + + if (rt_is_flow(t)) + { + t->flowspec_trie = f_new_trie(lp_new_default(p), 0); + t->flowspec_trie->ipv4 = (t->addr_type == NET_FLOW4); + } } return t; @@ -1610,6 +2289,7 @@ rt_init(void) rt_table_pool = rp_new(&root_pool, "Routing tables"); rte_update_pool = lp_new_default(rt_table_pool); init_list(&routing_tables); + init_list(&deleted_routing_tables); } @@ -1631,9 +2311,9 @@ static void rt_prune_table(rtable *tab) { struct fib_iterator *fit = &tab->prune_fit; - int limit = 512; + int limit = 2000; - struct channel *c; + struct rt_import_hook *ih; node *n, *x; DBG("Pruning route table %s\n", tab->name); @@ -1647,29 +2327,36 @@ rt_prune_table(rtable *tab) if (tab->prune_state == 1) { /* Mark channels to flush */ - WALK_LIST2(c, n, tab->channels, table_node) - if (c->channel_state == CS_FLUSHING) - c->flush_active = 1; + WALK_LIST2(ih, n, tab->imports, n) + if (ih->import_state == TIS_STOP) + rt_set_import_state(ih, TIS_FLUSHING); FIB_ITERATE_INIT(fit, &tab->fib); tab->prune_state = 2; + + if (tab->prune_trie) + { + /* Init prefix trie pruning */ + tab->trie_new = f_new_trie(lp_new_default(tab->rp), 0); + tab->trie_new->ipv4 = tab->trie->ipv4; + } } again: FIB_ITERATE_START(&tab->fib, fit, net, n) { rescan: + if (limit <= 0) + { + FIB_ITERATE_PUT(fit); + ev_schedule(tab->rt_event); + return; + } + for (struct rte_storage *e=n->routes; e; e=e->next) { - if (e->rte.sender->flush_active || (e->rte.flags & REF_DISCARD)) + if ((e->rte.sender->import_state == TIS_FLUSHING) || (e->rte.flags & REF_DISCARD)) { - if (limit <= 0) - { - FIB_ITERATE_PUT(fit); - ev_schedule(tab->rt_event); - return; - } - rte_discard(n, &e->rte); limit--; @@ -1678,13 +2365,6 @@ again: if (e->rte.flags & REF_MODIFY) { - if (limit <= 0) - { - FIB_ITERATE_PUT(fit); - ev_schedule(tab->rt_event); - return; - } - rte_modify(n, &e->rte); limit--; @@ -1698,6 +2378,12 @@ again: fib_delete(&tab->fib, n); goto again; } + + if (tab->trie_new) + { + trie_add_prefix(tab->trie_new, n->n.addr, n->n.addr->pxlen, n->n.addr->pxlen); + limit--; + } } FIB_ITERATE_END; @@ -1711,23 +2397,117 @@ again: /* state change 2->0, 3->1 */ tab->prune_state &= 1; - if (tab->prune_state > 0) - ev_schedule(tab->rt_event); + if (tab->trie_new) + { + /* Finish prefix trie pruning */ + + if (!tab->trie_lock_count) + { + rfree(tab->trie->lp); + } + else + { + ASSERT(!tab->trie_old); + tab->trie_old = tab->trie; + tab->trie_old_lock_count = tab->trie_lock_count; + tab->trie_lock_count = 0; + } + + tab->trie = tab->trie_new; + tab->trie_new = NULL; + tab->prune_trie = 0; + } + else + { + /* Schedule prefix trie pruning */ + if (tab->trie && !tab->trie_old && (tab->trie->prefix_count > (2 * tab->fib.entries))) + { + /* state change 0->1, 2->3 */ + tab->prune_state |= 1; + tab->prune_trie = 1; + } + } - /* FIXME: This should be handled in a better way */ rt_prune_sources(); /* Close flushed channels */ - WALK_LIST2_DELSAFE(c, n, x, tab->channels, table_node) - if (c->flush_active) - { - c->flush_active = 0; - channel_set_state(c, CS_DOWN); - } - - return; + WALK_LIST2_DELSAFE(ih, n, x, tab->imports, n) + if (ih->import_state == TIS_FLUSHING) + { + rt_set_import_state(ih, TIS_CLEARED); + ih->stopped(ih->req); + rem_node(&ih->n); + mb_free(ih); + rt_unlock_table(tab); + } } +/** + * rt_lock_trie - lock a prefix trie of a routing table + * @tab: routing table with prefix trie to be locked + * + * The prune loop may rebuild the prefix trie and invalidate f_trie_walk_state + * structures. Therefore, asynchronous walks should lock the prefix trie using + * this function. That allows the prune loop to rebuild the trie, but postpones + * its freeing until all walks are done (unlocked by rt_unlock_trie()). + * + * Return a current trie that will be locked, the value should be passed back to + * rt_unlock_trie() for unlocking. + * + */ +struct f_trie * +rt_lock_trie(rtable *tab) +{ + ASSERT(tab->trie); + + tab->trie_lock_count++; + return tab->trie; +} + +/** + * rt_unlock_trie - unlock a prefix trie of a routing table + * @tab: routing table with prefix trie to be locked + * @trie: value returned by matching rt_lock_trie() + * + * Done for trie locked by rt_lock_trie() after walk over the trie is done. + * It may free the trie and schedule next trie pruning. + */ +void +rt_unlock_trie(rtable *tab, struct f_trie *trie) +{ + ASSERT(trie); + + if (trie == tab->trie) + { + /* Unlock the current prefix trie */ + ASSERT(tab->trie_lock_count); + tab->trie_lock_count--; + } + else if (trie == tab->trie_old) + { + /* Unlock the old prefix trie */ + ASSERT(tab->trie_old_lock_count); + tab->trie_old_lock_count--; + + /* Free old prefix trie that is no longer needed */ + if (!tab->trie_old_lock_count) + { + rfree(tab->trie_old->lp); + tab->trie_old = NULL; + + /* Kick prefix trie pruning that was postponed */ + if (tab->trie && (tab->trie->prefix_count > (2 * tab->fib.entries))) + { + tab->prune_trie = 1; + rt_schedule_prune(tab); + } + } + } + else + log(L_BUG "Invalid arg to rt_unlock_trie()"); +} + + void rt_preconfig(struct config *c) { @@ -1743,171 +2523,425 @@ rt_preconfig(struct config *c) * triggered by rt_schedule_nhu(). */ -static inline int -rta_next_hop_outdated(rta *a) +void +ea_set_hostentry(ea_list **to, struct rtable *dep, struct rtable *tab, ip_addr gw, ip_addr ll, u32 lnum, u32 labels[lnum]) { - struct hostentry *he = a->hostentry; + struct { + struct adata ad; + struct hostentry *he; + u32 labels[lnum]; + } *head = (void *) tmp_alloc_adata(sizeof *head - sizeof(struct adata)); - if (!he) - return 0; + head->he = rt_get_hostentry(tab, gw, ll, dep); + memcpy(head->labels, labels, lnum * sizeof(u32)); - if (!he->src) - return a->dest != RTD_UNREACHABLE; - - return (a->dest != he->dest) || (a->igp_metric != he->igp_metric) || - (!he->nexthop_linkable) || !nexthop_same(&(a->nh), &(he->src->nh)); + ea_set_attr(to, EA_LITERAL_DIRECT_ADATA( + &ea_gen_hostentry, 0, &head->ad)); } -void -rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls) + +static void +rta_apply_hostentry(rta *a, struct hostentry_adata *head) { - a->hostentry = he; - a->dest = he->dest; - a->igp_metric = he->igp_metric; + struct hostentry *he = head->he; + u32 *labels = head->labels; + u32 lnum = (u32 *) (head->ad.data + head->ad.length) - labels; - if (a->dest != RTD_UNICAST) + ea_set_attr_u32(&a->eattrs, &ea_gen_igp_metric, 0, he->igp_metric); + + if (!he->src) { - /* No nexthop */ -no_nexthop: - a->nh = (struct nexthop) {}; - if (mls) - { /* Store the label stack for later changes */ - a->nh.labels_orig = a->nh.labels = mls->len; - memcpy(a->nh.label, mls->stack, mls->len * sizeof(u32)); - } + ea_set_dest(&a->eattrs, 0, RTD_UNREACHABLE); return; } - if (((!mls) || (!mls->len)) && he->nexthop_linkable) + eattr *he_nh_ea = ea_find(he->src->eattrs, &ea_gen_nexthop); + ASSERT_DIE(he_nh_ea); + + struct nexthop_adata *nhad = (struct nexthop_adata *) he_nh_ea->u.ptr; + int idest = nhea_dest(he_nh_ea); + + if ((idest != RTD_UNICAST) || + !lnum && he->nexthop_linkable) { /* Just link the nexthop chain, no label append happens. */ - memcpy(&(a->nh), &(he->src->nh), nexthop_size(&(he->src->nh))); + ea_copy_attr(&a->eattrs, he->src->eattrs, &ea_gen_nexthop); return; } - struct nexthop *nhp = NULL, *nhr = NULL; - int skip_nexthop = 0; + uint total_size = OFFSETOF(struct nexthop_adata, nh); - for (struct nexthop *nh = &(he->src->nh); nh; nh = nh->next) + NEXTHOP_WALK(nh, nhad) { - if (skip_nexthop) - skip_nexthop--; - else + if (nh->labels + lnum > MPLS_MAX_LABEL_STACK) { - nhr = nhp; - nhp = (nhp ? (nhp->next = lp_alloc(rte_update_pool, NEXTHOP_MAX_SIZE)) : &(a->nh)); + log(L_WARN "Sum of label stack sizes %d + %d = %d exceedes allowed maximum (%d)", + nh->labels, lnum, nh->labels + lnum, MPLS_MAX_LABEL_STACK); + continue; } - memset(nhp, 0, NEXTHOP_MAX_SIZE); - nhp->iface = nh->iface; - nhp->weight = nh->weight; + total_size += NEXTHOP_SIZE_CNT(nh->labels + lnum); + } - if (mls) + if (total_size == OFFSETOF(struct nexthop_adata, nh)) + { + log(L_WARN "No valid nexthop remaining, setting route unreachable"); + + struct nexthop_adata nha = { + .ad.length = NEXTHOP_DEST_SIZE, + .dest = RTD_UNREACHABLE, + }; + + ea_set_attr_data(&a->eattrs, &ea_gen_nexthop, 0, &nha.ad.data, nha.ad.length); + return; + } + + struct nexthop_adata *new = (struct nexthop_adata *) tmp_alloc_adata(total_size); + struct nexthop *dest = &new->nh; + + NEXTHOP_WALK(nh, nhad) + { + if (nh->labels + lnum > MPLS_MAX_LABEL_STACK) + continue; + + memcpy(dest, nh, NEXTHOP_SIZE(nh)); + if (lnum) { - nhp->labels = nh->labels + mls->len; - nhp->labels_orig = mls->len; - if (nhp->labels <= MPLS_MAX_LABEL_STACK) - { - memcpy(nhp->label, nh->label, nh->labels * sizeof(u32)); /* First the hostentry labels */ - memcpy(&(nhp->label[nh->labels]), mls->stack, mls->len * sizeof(u32)); /* Then the bottom labels */ - } - else - { - log(L_WARN "Sum of label stack sizes %d + %d = %d exceedes allowed maximum (%d)", - nh->labels, mls->len, nhp->labels, MPLS_MAX_LABEL_STACK); - skip_nexthop++; - continue; - } - } - else if (nh->labels) - { - nhp->labels = nh->labels; - nhp->labels_orig = 0; - memcpy(nhp->label, nh->label, nh->labels * sizeof(u32)); + memcpy(&(dest->label[dest->labels]), labels, lnum * sizeof labels[0]); + dest->labels += lnum; } if (ipa_nonzero(nh->gw)) - { - nhp->gw = nh->gw; /* Router nexthop */ - nhp->flags |= (nh->flags & RNF_ONLINK); - } + /* Router nexthop */ + dest->flags = (dest->flags & RNF_ONLINK); else if (!(nh->iface->flags & IF_MULTIACCESS) || (nh->iface->flags & IF_LOOPBACK)) - nhp->gw = IPA_NONE; /* PtP link - no need for nexthop */ + dest->gw = IPA_NONE; /* PtP link - no need for nexthop */ else if (ipa_nonzero(he->link)) - nhp->gw = he->link; /* Device nexthop with link-local address known */ + dest->gw = he->link; /* Device nexthop with link-local address known */ else - nhp->gw = he->addr; /* Device nexthop with link-local address unknown */ + dest->gw = he->addr; /* Device nexthop with link-local address unknown */ + + dest = NEXTHOP_NEXT(dest); } - if (skip_nexthop) - if (nhr) - nhr->next = NULL; - else - { - a->dest = RTD_UNREACHABLE; - log(L_WARN "No valid nexthop remaining, setting route unreachable"); - goto no_nexthop; - } + /* Fix final length */ + new->ad.length = (void *) dest - (void *) new->ad.data; + ea_set_attr(&a->eattrs, EA_LITERAL_DIRECT_ADATA( + &ea_gen_nexthop, 0, &new->ad)); +} + +static inline struct hostentry_adata * +rta_next_hop_outdated(rta *a) +{ + /* First retrieve the hostentry */ + eattr *heea = ea_find(a->eattrs, &ea_gen_hostentry); + if (!heea) + return NULL; + + struct hostentry_adata *head = (struct hostentry_adata *) heea->u.ptr; + + /* If no nexthop is present, we have to create one */ + eattr *a_nh_ea = ea_find(a->eattrs, &ea_gen_nexthop); + if (!a_nh_ea) + return head; + + struct nexthop_adata *nhad = (struct nexthop_adata *) a_nh_ea->u.ptr; + + /* Shortcut for unresolvable hostentry */ + if (!head->he->src) + return NEXTHOP_IS_REACHABLE(nhad) ? head : NULL; + + /* Comparing our nexthop with the hostentry nexthop */ + eattr *he_nh_ea = ea_find(head->he->src->eattrs, &ea_gen_nexthop); + + return ( + (ea_get_int(a->eattrs, &ea_gen_igp_metric, IGP_METRIC_UNKNOWN) != head->he->igp_metric) || + (!head->he->nexthop_linkable) || + (!he_nh_ea != !a_nh_ea) || + (he_nh_ea && a_nh_ea && !adata_same(he_nh_ea->u.ptr, a_nh_ea->u.ptr))) + ? head : NULL; } static inline struct rte_storage * rt_next_hop_update_rte(rtable *tab, net *n, rte *old) { - rta *a = alloca(RTA_MAX_SIZE); - memcpy(a, old->attrs, rta_size(old->attrs)); + struct hostentry_adata *head = rta_next_hop_outdated(old->attrs); + if (!head) + return NULL; - mpls_label_stack mls = { .len = a->nh.labels_orig }; - memcpy(mls.stack, &a->nh.label[a->nh.labels - mls.len], mls.len * sizeof(u32)); - - rta_apply_hostentry(a, old->attrs->hostentry, &mls); - a->cached = 0; + rta a = *old->attrs; + a.cached = 0; + rta_apply_hostentry(&a, head); rte e0 = *old; - e0.attrs = a; + e0.attrs = &a; return rte_store(&e0, n, tab); } +static inline void +rt_next_hop_resolve_rte(rte *r) +{ + eattr *heea = ea_find(r->attrs->eattrs, &ea_gen_hostentry); + if (!heea) + return; + + struct hostentry_adata *head = (struct hostentry_adata *) heea->u.ptr; + + if (r->attrs->cached) + { + rta *a = tmp_alloc(RTA_MAX_SIZE); + *a = *r->attrs; + a->cached = 0; + r->attrs = a; + } + + rta_apply_hostentry(r->attrs, head); +} + +#ifdef CONFIG_BGP + +static inline int +net_flow_has_dst_prefix(const net_addr *n) +{ + ASSUME(net_is_flow(n)); + + if (n->pxlen) + return 1; + + if (n->type == NET_FLOW4) + { + const net_addr_flow4 *n4 = (void *) n; + return (n4->length > sizeof(net_addr_flow4)) && (n4->data[0] == FLOW_TYPE_DST_PREFIX); + } + else + { + const net_addr_flow6 *n6 = (void *) n; + return (n6->length > sizeof(net_addr_flow6)) && (n6->data[0] == FLOW_TYPE_DST_PREFIX); + } +} + +static inline int +rta_as_path_is_empty(rta *a) +{ + eattr *e = ea_find(a->eattrs, "bgp_path"); + return !e || (as_path_getlen(e->u.ptr) == 0); +} + +static inline u32 +rta_get_first_asn(rta *a) +{ + eattr *e = ea_find(a->eattrs, "bgp_path"); + u32 asn; + + return (e && as_path_get_first_regular(e->u.ptr, &asn)) ? asn : 0; +} + +static inline enum flowspec_valid +rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, rta *a, int interior) +{ + ASSERT(rt_is_ip(tab_ip)); + ASSERT(rt_is_flow(tab_flow)); + ASSERT(tab_ip->trie); + + /* RFC 8955 6. a) Flowspec has defined dst prefix */ + if (!net_flow_has_dst_prefix(n)) + return FLOWSPEC_INVALID; + + /* RFC 9117 4.1. Accept AS_PATH is empty (fr */ + if (interior && rta_as_path_is_empty(a)) + return FLOWSPEC_VALID; + + + /* RFC 8955 6. b) Flowspec and its best-match route have the same originator */ + + /* Find flowspec dst prefix */ + net_addr dst; + if (n->type == NET_FLOW4) + net_fill_ip4(&dst, net4_prefix(n), net4_pxlen(n)); + else + net_fill_ip6(&dst, net6_prefix(n), net6_pxlen(n)); + + /* Find best-match BGP unicast route for flowspec dst prefix */ + net *nb = net_route(tab_ip, &dst); + const rte *rb = nb ? &nb->routes->rte : NULL; + + /* Register prefix to trie for tracking further changes */ + int max_pxlen = (n->type == NET_FLOW4) ? IP4_MAX_PREFIX_LENGTH : IP6_MAX_PREFIX_LENGTH; + trie_add_prefix(tab_flow->flowspec_trie, &dst, (nb ? nb->n.addr->pxlen : 0), max_pxlen); + + /* No best-match BGP route -> no flowspec */ + if (!rb || (rt_get_source_attr(rb) != RTS_BGP)) + return FLOWSPEC_INVALID; + + /* Find ORIGINATOR_ID values */ + u32 orig_a = ea_get_int(a->eattrs, "bgp_originator_id", 0); + u32 orig_b = ea_get_int(rb->attrs->eattrs, "bgp_originator_id", 0); + + /* Originator is either ORIGINATOR_ID (if present), or BGP neighbor address (if not) */ + if ((orig_a != orig_b) || (!orig_a && !orig_b && !ipa_equal( + ea_get_ip(a->eattrs, &ea_gen_from, IPA_NONE), + ea_get_ip(rb->attrs->eattrs, &ea_gen_from, IPA_NONE) + ))) + return FLOWSPEC_INVALID; + + + /* Find ASN of the best-match route, for use in next checks */ + u32 asn_b = rta_get_first_asn(rb->attrs); + if (!asn_b) + return FLOWSPEC_INVALID; + + /* RFC 9117 4.2. For EBGP, flowspec and its best-match route are from the same AS */ + if (!interior && (rta_get_first_asn(a) != asn_b)) + return FLOWSPEC_INVALID; + + /* RFC 8955 6. c) More-specific routes are from the same AS as the best-match route */ + TRIE_WALK(tab_ip->trie, subnet, &dst) + { + net *nc = net_find_valid(tab_ip, &subnet); + if (!nc) + continue; + + const rte *rc = &nc->routes->rte; + if (rt_get_source_attr(rc) != RTS_BGP) + return FLOWSPEC_INVALID; + + if (rta_get_first_asn(rc->attrs) != asn_b) + return FLOWSPEC_INVALID; + } + TRIE_WALK_END; + + return FLOWSPEC_VALID; +} + +#endif /* CONFIG_BGP */ + +static struct rte_storage * +rt_flowspec_update_rte(rtable *tab, net *n, rte *r) +{ +#ifdef CONFIG_BGP + if (rt_get_source_attr(r) != RTS_BGP) + return NULL; + + struct bgp_channel *bc = (struct bgp_channel *) SKIP_BACK(struct channel, in_req, r->sender->req); + if (!bc->base_table) + return NULL; + + struct bgp_proto *p = SKIP_BACK(struct bgp_proto, p, bc->c.proto); + + enum flowspec_valid old = rt_get_flowspec_valid(r), + valid = rt_flowspec_check(bc->base_table, tab, n->n.addr, r->attrs, p->is_interior); + + if (old == valid) + return NULL; + + rta *a = alloca(RTA_MAX_SIZE); + *a = *r->attrs; + a->cached = 0; + + ea_set_attr_u32(&a->eattrs, &ea_gen_flowspec_valid, 0, valid); + + rte new; + memcpy(&new, r, sizeof(rte)); + new.attrs = a; + + return rte_store(&new, n, tab); +#else + return NULL; +#endif +} + +static inline void +rt_flowspec_resolve_rte(rte *r, struct channel *c) +{ +#ifdef CONFIG_BGP + enum flowspec_valid valid, old = rt_get_flowspec_valid(r); + struct bgp_channel *bc = (struct bgp_channel *) c; + + if ( (rt_get_source_attr(r) == RTS_BGP) + && (c->channel == &channel_bgp) + && (bc->base_table)) + { + struct bgp_proto *p = SKIP_BACK(struct bgp_proto, p, bc->c.proto); + valid = rt_flowspec_check( + bc->base_table, + c->in_req.hook->table, + r->net, r->attrs, p->is_interior); + } + else + valid = FLOWSPEC_UNKNOWN; + + if (valid == old) + return; + + if (r->attrs->cached) + { + rta *a = tmp_alloc(RTA_MAX_SIZE); + *a = *r->attrs; + a->cached = 0; + r->attrs = a; + } + + if (valid == FLOWSPEC_UNKNOWN) + ea_unset_attr(&r->attrs->eattrs, 0, &ea_gen_flowspec_valid); + else + ea_set_attr_u32(&r->attrs->eattrs, &ea_gen_flowspec_valid, 0, valid); +#endif +} + static inline int rt_next_hop_update_net(rtable *tab, net *n) { - struct rte_storage **k, *e, *new, *old_best, **new_best; + struct rte_storage *new; int count = 0; - int free_old_best = 0; + int is_flow = net_is_flow(n->n.addr); - old_best = n->routes; + struct rte_storage *old_best = n->routes; if (!old_best) return 0; - for (k = &n->routes; e = *k; k = &e->next) - if (rta_next_hop_outdated(e->rte.attrs)) - { - new = rt_next_hop_update_rte(tab, n, &e->rte); - new->next = e->next; - *k = new; - - rte_trace_in(D_ROUTES, new->rte.sender, &new->rte, "updated"); - rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL); - - /* Call a pre-comparison hook */ - /* Not really an efficient way to compute this */ - if (e->rte.src->proto->rte_recalculate) - e->rte.src->proto->rte_recalculate(tab, n, &new->rte, &e->rte, NULL); - - if (e != old_best) - rte_free(e, tab); - else /* Freeing of the old best rte is postponed */ - free_old_best = 1; - - e = new; - count++; - } + for (struct rte_storage *e, **k = &n->routes; e = *k; k = &e->next) + if (is_flow || rta_next_hop_outdated(e->rte.attrs)) + count++; if (!count) return 0; + struct rte_multiupdate { + struct rte_storage *old, *new; + } *updates = alloca(sizeof(struct rte_multiupdate) * count); + + int pos = 0; + for (struct rte_storage *e, **k = &n->routes; e = *k; k = &e->next) + if (is_flow || rta_next_hop_outdated(e->rte.attrs)) + { + struct rte_storage *new = is_flow + ? rt_flowspec_update_rte(tab, n, &e->rte) + : rt_next_hop_update_rte(tab, n, &e->rte); + + if (!new) + continue; + + /* Call a pre-comparison hook */ + /* Not really an efficient way to compute this */ + if (e->rte.src->proto->rte_recalculate) + e->rte.src->proto->rte_recalculate(tab, n, &new->rte, &e->rte, &old_best->rte); + + updates[pos++] = (struct rte_multiupdate) { + .old = e, + .new = new, + }; + + /* Replace the route in the list */ + new->next = e->next; + *k = e = new; + } + + ASSERT_DIE(pos <= count); + count = pos; + /* Find the new best route */ - new_best = NULL; - for (k = &n->routes; e = *k; k = &e->next) + struct rte_storage **new_best = NULL; + for (struct rte_storage *e, **k = &n->routes; e = *k; k = &e->next) { if (!new_best || rte_better(&e->rte, &(*new_best)->rte)) new_best = k; @@ -1922,15 +2956,17 @@ rt_next_hop_update_net(rtable *tab, net *n) n->routes = new; } - /* Announce the new best route */ - if (new != old_best) - rte_trace_in(D_ROUTES, new->rte.sender, &new->rte, "updated [best]"); + /* Announce the changes */ + for (int i=0; i<count; i++) + { + _Bool nb = (new == updates[i].new), ob = (old_best == updates[i].old); + const char *best_indicator[2][2] = { { "updated", "updated [-best]" }, { "updated [+best]", "updated [best]" } }; + rt_rte_trace_in(D_ROUTES, updates[i].new->rte.sender->req, &updates[i].new->rte, best_indicator[nb][ob]); + rte_announce_i(tab, n, updates[i].new, updates[i].old, new, old_best); + } - /* Propagate changes */ - rte_announce_i(tab, RA_UNDEF, n, NULL, NULL, n->routes, old_best); - - if (free_old_best) - rte_free(old_best, tab); + for (int i=0; i<count; i++) + rte_free(updates[i].old); return count; } @@ -1948,6 +2984,9 @@ rt_next_hop_update(rtable *tab) { FIB_ITERATE_INIT(fit, &tab->fib); tab->nhu_state = NHU_RUNNING; + + if (tab->flowspec_trie) + rt_flowspec_reset_trie(tab); } FIB_ITERATE_START(&tab->fib, fit, net, n) @@ -2036,6 +3075,22 @@ rt_unlock_table(rtable *r) } } +static int +rt_reconfigure(rtable *tab, struct rtable_config *new, struct rtable_config *old) +{ + if ((new->addr_type != old->addr_type) || + (new->sorted != old->sorted) || + (new->trie_used != old->trie_used)) + return 0; + + DBG("\t%s: same\n", new->name); + new->table = tab; + tab->name = new->name; + tab->config = new; + + return 1; +} + static struct rtable_config * rt_find_table_config(struct config *cf, char *name) { @@ -2065,28 +3120,19 @@ rt_commit(struct config *new, struct config *old) { WALK_LIST(o, old->tables) { - rtable *ot = o->table; - if (!ot->deleted) - { - r = rt_find_table_config(new, o->name); - if (r && (r->addr_type == o->addr_type) && !new->shutdown) - { - DBG("\t%s: same\n", o->name); - r->table = ot; - ot->name = r->name; - ot->config = r; - if (o->sorted != r->sorted) - log(L_WARN "Reconfiguration of rtable sorted flag not implemented"); - } - else - { - DBG("\t%s: deleted\n", o->name); - ot->deleted = old; - config_add_obstacle(old); - rt_lock_table(ot); - rt_unlock_table(ot); - } - } + rtable *tab = o->table; + if (tab->deleted) + continue; + + r = rt_find_table_config(new, o->name); + if (r && !new->shutdown && rt_reconfigure(tab, r, o)) + continue; + + DBG("\t%s: deleted\n", o->name); + tab->deleted = old; + config_add_obstacle(old); + rt_lock_table(tab); + rt_unlock_table(tab); } } @@ -2100,22 +3146,6 @@ rt_commit(struct config *new, struct config *old) DBG("\tdone\n"); } -static inline void -do_feed_channel(struct channel *c, net *n, rte *e) -{ - rte_update_lock(); - if (c->ra_mode == RA_ACCEPTED) - rt_notify_accepted(c, n, NULL, NULL, c->refeeding); - else if (c->ra_mode == RA_MERGED) - rt_notify_merged(c, n, NULL, NULL, e, e, c->refeeding); - else /* RA_BASIC */ - { - rte e0 = *e; - rt_notify_basic(c, n->n.addr, &e0, &e0, c->refeeding); - } - rte_update_unlock(); -} - /** * rt_feed_channel - advertise all routes to a channel * @c: channel to be fed @@ -2125,79 +3155,55 @@ do_feed_channel(struct channel *c, net *n, rte *e) * has something to do. (We avoid transferring all the routes in single pass in * order not to monopolize CPU time.) */ -int -rt_feed_channel(struct channel *c) +static void +rt_feed_channel(void *data) { + struct rt_export_hook *c = data; + struct fib_iterator *fit = &c->feed_fit; int max_feed = 256; - ASSERT(c->export_state == ES_FEEDING); - - if (!c->feed_active) - { - FIB_ITERATE_INIT(fit, &c->table->fib); - c->feed_active = 1; - } + ASSERT(c->export_state == TES_FEEDING); FIB_ITERATE_START(&c->table->fib, fit, net, n) { - struct rte_storage *e = n->routes; if (max_feed <= 0) { FIB_ITERATE_PUT(fit); - return 0; + ev_schedule_work(c->event); + return; } - if ((c->ra_mode == RA_OPTIMAL) || - (c->ra_mode == RA_ACCEPTED) || - (c->ra_mode == RA_MERGED)) - if (e && rte_is_valid(&e->rte)) - { - /* In the meantime, the protocol may fell down */ - if (c->export_state != ES_FEEDING) - goto done; + if (c->export_state != TES_FEEDING) + goto done; - do_feed_channel(c, n, &e->rte); - max_feed--; - } - - if (c->ra_mode == RA_ANY) - for(e = n->routes; e; e = e->next) - { - /* In the meantime, the protocol may fell down */ - if (c->export_state != ES_FEEDING) - goto done; - - if (!rte_is_valid(&e->rte)) - continue; - - do_feed_channel(c, n, &e->rte); - max_feed--; - } + if (c->req->export_bulk) + { + uint count = rte_feed_count(n); + if (count) + { + rte_update_lock(); + rte **feed = alloca(count * sizeof(rte *)); + rte_feed_obtain(n, feed, count); + struct rt_pending_export rpe = { .new_best = n->routes }; + c->req->export_bulk(c->req, n->n.addr, &rpe, feed, count); + max_feed -= count; + rte_update_unlock(); + } + } + else if (n->routes && rte_is_valid(&n->routes->rte)) + { + rte_update_lock(); + struct rt_pending_export rpe = { .new = n->routes, .new_best = n->routes }; + c->req->export_one(c->req, n->n.addr, &rpe); + max_feed--; + rte_update_unlock(); + } } FIB_ITERATE_END; done: - c->feed_active = 0; - return 1; -} - -/** - * rt_feed_baby_abort - abort protocol feeding - * @c: channel - * - * This function is called by the protocol code when the protocol stops or - * ceases to exist during the feeding. - */ -void -rt_feed_channel_abort(struct channel *c) -{ - if (c->feed_active) - { - /* Unlink the iterator */ - fit_get(&c->table->fib, &c->feed_fit); - c->feed_active = 0; - } + rt_set_export_state(c, TES_READY); } @@ -2205,7 +3211,7 @@ rt_feed_channel_abort(struct channel *c) * Import table */ -static int +int rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) { struct rtable *tab = c->in_table; @@ -2238,6 +3244,9 @@ rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *sr goto drop_update; } + if (!new) + CHANNEL_LIMIT_POP(c, RX); + /* Move iterator if needed */ if (*pos == c->reload_next_rte) c->reload_next_rte = (*pos)->next; @@ -2245,10 +3254,21 @@ rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *sr /* Remove the old rte */ struct rte_storage *del = *pos; *pos = (*pos)->next; - rte_free(del, tab); + rte_free(del); tab->rt_count--; } - else if (!new) + else if (new) + { + if (CHANNEL_LIMIT_PUSH(c, RX)) + { + /* Required by rte_trace_in() */ + new->net = n; + + channel_rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); + goto drop_update; + } + } + else goto drop_withdraw; if (!new) @@ -2259,25 +3279,8 @@ rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *sr return 1; } - struct channel_limit *l = &c->rx_limit; - if (l->action && !*pos) - { - if (tab->rt_count >= l->limit) - channel_notify_limit(c, l, PLD_RX, tab->rt_count); - - if (l->state == PLS_BLOCKED) - { - /* Required by rte_trace_in() */ - new->net = n; - - rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); - goto drop_update; - } - } - /* Insert the new rte */ struct rte_storage *e = rte_store(new, net, tab); - e->rte.sender = c; e->rte.lastmod = current_time(); e->next = *pos; *pos = e; @@ -2285,8 +3288,8 @@ rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *sr return 1; drop_update: - c->stats.imp_updates_received++; - c->stats.imp_updates_ignored++; + c->import_stats.updates_received++; + c->in_req.hook->stats.updates_ignored++; if (!net->routes) fib_delete(&tab->fib, net); @@ -2294,8 +3297,8 @@ drop_update: return 0; drop_withdraw: - c->stats.imp_withdraws_received++; - c->stats.imp_withdraws_ignored++; + c->import_stats.withdraws_received++; + c->in_req.hook->stats.withdraws_ignored++; return 0; } @@ -2325,7 +3328,7 @@ rt_reload_channel(struct channel *c) } rte r = e->rte; - rte_update(c, r.net, &r, r.src); + rte_update_direct(c, r.net, &r, r.src); } c->reload_next_rte = NULL; @@ -2375,7 +3378,7 @@ again: if (all || (e->rte.flags & (REF_STALE | REF_DISCARD))) { *ee = e->next; - rte_free(e, t); + rte_free(e); t->rt_count--; } else @@ -2398,7 +3401,7 @@ again: */ int -rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old0, struct rte_storage **old_exported, int refeed) +rte_update_out(struct channel *c, const net_addr *n, rte *new, const rte *old0, struct rte_storage **old_exported) { struct rtable *tab = c->out_table; struct rte_src *src; @@ -2415,7 +3418,7 @@ rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old0, struct src = old0->src; if (!net) - goto drop_withdraw; + goto drop; } /* Find the old rte */ @@ -2425,7 +3428,7 @@ rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old0, struct if (old = *pos) { if (new && rte_same(&(*pos)->rte, new)) - goto drop_update; + goto drop; /* Remove the old rte */ *pos = old->next; @@ -2436,7 +3439,7 @@ rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old0, struct if (!new) { if (!old) - goto drop_withdraw; + goto drop; if (!net->routes) fib_delete(&tab->fib, net); @@ -2452,13 +3455,36 @@ rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old0, struct tab->rt_count++; return 1; -drop_update: - return refeed; - -drop_withdraw: +drop: return 0; } +void +rt_refeed_channel(struct channel *c) +{ + if (!c->out_table) + { + channel_request_feeding(c); + return; + } + + ASSERT_DIE(c->ra_mode != RA_ANY); + + c->proto->feed_begin(c, 0); + + FIB_WALK(&c->out_table->fib, net, n) + { + if (!n->routes) + continue; + + rte e = n->routes->rte; + c->proto->rt_notify(c->proto, c, n->n.addr, &e, NULL); + } + FIB_WALK_END; + + c->proto->feed_end(c); +} + /* * Hostcache @@ -2555,7 +3581,7 @@ hc_delete_hostentry(struct hostcache *hc, pool *p, struct hostentry *he) rem_node(&he->ln); hc_remove(hc, he); - sl_free(hc->slab, he); + sl_free(he); hc->hash_items--; if (hc->hash_items < hc->hash_min) @@ -2572,7 +3598,7 @@ rt_init_hostcache(rtable *tab) hc_alloc_table(hc, tab->rp, HC_DEF_ORDER); hc->slab = sl_new(tab->rp, sizeof(struct hostentry)); - hc->lp = lp_new(tab->rp, LP_GOOD_SIZE(1024)); + hc->lp = lp_new(tab->rp); hc->trie = f_new_trie(hc->lp, 0); tab->hostcache = hc; @@ -2624,14 +3650,14 @@ if_local_addr(ip_addr a, struct iface *i) } u32 -rt_get_igp_metric(rte *rt) +rt_get_igp_metric(const rte *rt) { - eattr *ea = ea_find(rt->attrs->eattrs, EA_GEN_IGP_METRIC); + eattr *ea = ea_find(rt->attrs->eattrs, "igp_metric"); if (ea) return ea->u.data; - if (rt->attrs->source == RTS_DEVICE) + if (rt_get_source_attr(rt) == RTS_DEVICE) return 0; if (rt->src->proto->rte_igp_metric) @@ -2649,7 +3675,6 @@ rt_update_hostentry(rtable *tab, struct hostentry *he) /* Reset the hostentry */ he->src = NULL; - he->dest = RTD_UNREACHABLE; he->nexthop_linkable = 0; he->igp_metric = 0; @@ -2662,7 +3687,7 @@ rt_update_hostentry(rtable *tab, struct hostentry *he) rta *a = e->rte.attrs; pxlen = n->n.addr->pxlen; - if (a->hostentry) + if (ea_find(a->eattrs, &ea_gen_hostentry)) { /* Recursive route should not depend on another recursive route */ log(L_WARN "Next hop address %I resolvable through recursive route for %N", @@ -2670,9 +3695,12 @@ rt_update_hostentry(rtable *tab, struct hostentry *he) goto done; } - if (a->dest == RTD_UNICAST) - { - for (struct nexthop *nh = &(a->nh); nh; nh = nh->next) + eattr *nhea = ea_find(a->eattrs, &ea_gen_nexthop); + ASSERT_DIE(nhea); + struct nexthop_adata *nhad = (void *) nhea->u.ptr; + + if (NEXTHOP_IS_REACHABLE(nhad)) + NEXTHOP_WALK(nh, nhad) if (ipa_zero(nh->gw)) { if (if_local_addr(he->addr, nh->iface)) @@ -2685,10 +3713,8 @@ rt_update_hostentry(rtable *tab, struct hostentry *he) direct++; } - } he->src = rta_clone(a); - he->dest = a->dest; he->nexthop_linkable = !direct; he->igp_metric = rt_get_igp_metric(&e->rte); } @@ -2728,7 +3754,7 @@ rt_update_hostcache(rtable *tab) tab->hcu_scheduled = 0; } -struct hostentry * +static struct hostentry * rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep) { struct hostentry *he; diff --git a/nest/rt.h b/nest/rt.h new file mode 100644 index 00000000..eb868aa7 --- /dev/null +++ b/nest/rt.h @@ -0,0 +1,486 @@ +/* + * BIRD Internet Routing Daemon -- Routing Table + * + * (c) 1998--2000 Martin Mares <mj@ucw.cz> + * (c) 2019--2021 Maria Matejka <mq@jmq.cz> + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRD_NEST_RT_H_ +#define _BIRD_NEST_RT_H_ + +#include "lib/lists.h" +#include "lib/bitmap.h" +#include "lib/resource.h" +#include "lib/net.h" +#include "lib/type.h" +#include "lib/fib.h" +#include "lib/route.h" + +struct ea_list; +struct protocol; +struct proto; +struct channel; +struct rte_src; +struct symbol; +struct timer; +struct filter; +struct f_trie; +struct f_trie_walk_state; +struct cli; + +/* + * Master Routing Tables. Generally speaking, each of them contains a FIB + * with each entry pointing to a list of route entries representing routes + * to given network (with the selected one at the head). + * + * Each of the RTE's contains variable data (the preference and protocol-dependent + * metrics) and a pointer to a route attribute block common for many routes). + * + * It's guaranteed that there is at most one RTE for every (prefix,proto) pair. + */ + +struct rtable_config { + node n; + char *name; + struct rtable *table; + struct proto_config *krt_attached; /* Kernel syncer attached to this table */ + uint addr_type; /* Type of address data stored in table (NET_*) */ + int gc_max_ops; /* Maximum number of operations before GC is run */ + int gc_min_time; /* Minimum time between two consecutive GC runs */ + byte sorted; /* Routes of network are sorted according to rte_better() */ + byte internal; /* Internal table of a protocol */ + byte trie_used; /* Rtable has attached trie */ + btime min_settle_time; /* Minimum settle time for notifications */ + btime max_settle_time; /* Maximum settle time for notifications */ +}; + +typedef struct rtable { + resource r; + node n; /* Node in list of all tables */ + pool *rp; /* Resource pool to allocate everything from, including itself */ + struct slab *rte_slab; /* Slab to allocate route objects */ + struct fib fib; + struct f_trie *trie; /* Trie of prefixes defined in fib */ + char *name; /* Name of this table */ + uint addr_type; /* Type of address data stored in table (NET_*) */ + int use_count; /* Number of protocols using this table */ + u32 rt_count; /* Number of routes in the table */ + + list imports; /* Registered route importers */ + list exports; /* Registered route exporters */ + + struct hmap id_map; + struct hostcache *hostcache; + struct rtable_config *config; /* Configuration of this table */ + struct config *deleted; /* Table doesn't exist in current configuration, + * delete as soon as use_count becomes 0 and remove + * obstacle from this routing table. + */ + struct event *rt_event; /* Routing table event */ + btime last_rt_change; /* Last time when route changed */ + btime base_settle_time; /* Start time of rtable settling interval */ + btime gc_time; /* Time of last GC */ + int gc_counter; /* Number of operations since last GC */ + byte prune_state; /* Table prune state, 1 -> scheduled, 2-> running */ + byte prune_trie; /* Prune prefix trie during next table prune */ + byte hcu_scheduled; /* Hostcache update is scheduled */ + byte nhu_state; /* Next Hop Update state */ + byte internal; /* This table is internal for some other object */ + struct fib_iterator prune_fit; /* Rtable prune FIB iterator */ + struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */ + struct f_trie *trie_new; /* New prefix trie defined during pruning */ + struct f_trie *trie_old; /* Old prefix trie waiting to be freed */ + u32 trie_lock_count; /* Prefix trie locked by walks */ + u32 trie_old_lock_count; /* Old prefix trie locked by walks */ + struct tbf rl_pipe; /* Rate limiting token buffer for pipe collisions */ + + list subscribers; /* Subscribers for notifications */ + struct timer *settle_timer; /* Settle time for notifications */ + list flowspec_links; /* List of flowspec links, src for NET_IPx and dst for NET_FLOWx */ + struct f_trie *flowspec_trie; /* Trie for evaluation of flowspec notifications */ +} rtable; + +struct rt_subscription { + node n; + rtable *tab; + void (*hook)(struct rt_subscription *b); + void *data; +}; + +struct rt_flowspec_link { + node n; + rtable *src; + rtable *dst; + u32 uc; +}; + +#define NHU_CLEAN 0 +#define NHU_SCHEDULED 1 +#define NHU_RUNNING 2 +#define NHU_DIRTY 3 + +typedef struct network { + struct rte_storage *routes; /* Available routes for this network */ + struct fib_node n; /* FIB flags reserved for kernel syncer */ +} net; + +struct hostcache { + slab *slab; /* Slab holding all hostentries */ + struct hostentry **hash_table; /* Hash table for hostentries */ + unsigned hash_order, hash_shift; + unsigned hash_max, hash_min; + unsigned hash_items; + linpool *lp; /* Linpool for trie */ + struct f_trie *trie; /* Trie of prefixes that might affect hostentries */ + list hostentries; /* List of all hostentries */ + byte update_hostcache; +}; + +struct hostentry { + node ln; + ip_addr addr; /* IP address of host, part of key */ + ip_addr link; /* (link-local) IP address of host, used as gw + if host is directly attached */ + struct rtable *tab; /* Dependent table, part of key */ + struct hostentry *next; /* Next in hash chain */ + unsigned hash_key; /* Hash key */ + unsigned uc; /* Use count */ + struct rta *src; /* Source rta entry */ + byte nexthop_linkable; /* Nexthop list is completely non-device */ + u32 igp_metric; /* Chosen route IGP metric */ +}; + +struct rte_storage { + struct rte_storage *next; /* Next in chain */ + struct rte rte; /* Route data */ +}; + +#define RTE_COPY(r, l) ((r) ? (((*(l)) = (r)->rte), (l)) : NULL) +#define RTE_OR_NULL(r) ((r) ? &((r)->rte) : NULL) + +/* Table-channel connections */ + +struct rt_import_request { + struct rt_import_hook *hook; /* The table part of importer */ + char *name; + u8 trace_routes; + + void (*dump_req)(struct rt_import_request *req); + void (*log_state_change)(struct rt_import_request *req, u8 state); + /* Preimport is called when the @new route is just-to-be inserted, replacing @old. + * Return a route (may be different or modified in-place) to continue or NULL to withdraw. */ + struct rte *(*preimport)(struct rt_import_request *req, struct rte *new, struct rte *old); + struct rte *(*rte_modify)(struct rte *, struct linpool *); +}; + +struct rt_import_hook { + node n; + rtable *table; /* The connected table */ + struct rt_import_request *req; /* The requestor */ + + struct rt_import_stats { + /* Import - from protocol to core */ + u32 pref; /* Number of routes selected as best in the (adjacent) routing table */ + u32 updates_ignored; /* Number of route updates rejected as already in route table */ + u32 updates_accepted; /* Number of route updates accepted and imported */ + u32 withdraws_ignored; /* Number of route withdraws rejected as already not in route table */ + u32 withdraws_accepted; /* Number of route withdraws accepted and processed */ + } stats; + + btime last_state_change; /* Time of last state transition */ + + u8 import_state; /* IS_* */ + + void (*stopped)(struct rt_import_request *); /* Stored callback when import is stopped */ +}; + +struct rt_pending_export { + struct rte_storage *new, *new_best, *old, *old_best; +}; + +struct rt_export_request { + struct rt_export_hook *hook; /* Table part of the export */ + char *name; + u8 trace_routes; + + /* There are two methods of export. You can either request feeding every single change + * or feeding the whole route feed. In case of regular export, &export_one is preferred. + * Anyway, when feeding, &export_bulk is preferred, falling back to &export_one. + * Thus, for RA_OPTIMAL, &export_one is only set, + * for RA_MERGED and RA_ACCEPTED, &export_bulk is only set + * and for RA_ANY, both are set to accomodate for feeding all routes but receiving single changes + */ + void (*export_one)(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe); + void (*export_bulk)(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe, rte **feed, uint count); + + void (*dump_req)(struct rt_export_request *req); + void (*log_state_change)(struct rt_export_request *req, u8); +}; + +struct rt_export_hook { + node n; + rtable *table; /* The connected table */ + + pool *pool; + linpool *lp; + + struct rt_export_request *req; /* The requestor */ + + struct rt_export_stats { + /* Export - from core to protocol */ + u32 updates_received; /* Number of route updates received */ + u32 withdraws_received; /* Number of route withdraws received */ + } stats; + + struct fib_iterator feed_fit; /* Routing table iterator used during feeding */ + + btime last_state_change; /* Time of last state transition */ + + u8 refeed_pending; /* Refeeding and another refeed is scheduled */ + u8 export_state; /* Route export state (TES_*, see below) */ + + struct event *event; /* Event running all the export operations */ + + void (*stopped)(struct rt_export_request *); /* Stored callback when export is stopped */ +}; + +#define TIS_DOWN 0 +#define TIS_UP 1 +#define TIS_STOP 2 +#define TIS_FLUSHING 3 +#define TIS_WAITING 4 +#define TIS_CLEARED 5 +#define TIS_MAX 6 + +#define TES_DOWN 0 +#define TES_HUNGRY 1 +#define TES_FEEDING 2 +#define TES_READY 3 +#define TES_STOP 4 +#define TES_MAX 5 + +void rt_request_import(rtable *tab, struct rt_import_request *req); +void rt_request_export(rtable *tab, struct rt_export_request *req); + +void rt_stop_import(struct rt_import_request *, void (*stopped)(struct rt_import_request *)); +void rt_stop_export(struct rt_export_request *, void (*stopped)(struct rt_export_request *)); + +const char *rt_import_state_name(u8 state); +const char *rt_export_state_name(u8 state); + +static inline u8 rt_import_get_state(struct rt_import_hook *ih) { return ih ? ih->import_state : TIS_DOWN; } +static inline u8 rt_export_get_state(struct rt_export_hook *eh) { return eh ? eh->export_state : TES_DOWN; } + +void rte_import(struct rt_import_request *req, const net_addr *net, rte *new, struct rte_src *src); + +/* Types of route announcement, also used as flags */ +#define RA_UNDEF 0 /* Undefined RA type */ +#define RA_OPTIMAL 1 /* Announcement of optimal route change */ +#define RA_ACCEPTED 2 /* Announcement of first accepted route */ +#define RA_ANY 3 /* Announcement of any route change */ +#define RA_MERGED 4 /* Announcement of optimal route merged with next ones */ + +/* Return value of preexport() callback */ +#define RIC_ACCEPT 1 /* Accepted by protocol */ +#define RIC_PROCESS 0 /* Process it through import filter */ +#define RIC_REJECT -1 /* Rejected by protocol */ +#define RIC_DROP -2 /* Silently dropped by protocol */ + +#define rte_update channel_rte_import +/** + * rte_update - enter a new update to a routing table + * @c: channel doing the update + * @net: network address + * @rte: a &rte representing the new route + * @src: old route source identifier + * + * This function imports a new route to the appropriate table (via the channel). + * Table keys are @net (obligatory) and @rte->attrs->src. + * Both the @net and @rte pointers can be local. + * + * The route attributes (@rte->attrs) are obligatory. They can be also allocated + * locally. Anyway, if you use an already-cached attribute object, you shall + * call rta_clone() on that object yourself. (This semantics may change in future.) + * + * If the route attributes are local, you may set @rte->attrs->src to NULL, then + * the protocol's default route source will be supplied. + * + * When rte_update() gets a route, it automatically validates it. This includes + * checking for validity of the given network and next hop addresses and also + * checking for host-scope or link-scope routes. Then the import filters are + * processed and if accepted, the route is passed to route table recalculation. + * + * The accepted routes are then inserted into the table, replacing the old route + * for the same @net identified by @src. Then the route is announced + * to all the channels connected to the table using the standard export mechanism. + * Setting @rte to NULL makes this a withdraw, otherwise @rte->src must be the same + * as @src. + * + * All memory used for temporary allocations is taken from a special linpool + * @rte_update_pool and freed when rte_update() finishes. + */ +void rte_update(struct channel *c, const net_addr *net, struct rte *rte, struct rte_src *src); + +extern list routing_tables; +struct config; + +void rt_init(void); +void rt_preconfig(struct config *); +void rt_commit(struct config *new, struct config *old); +void rt_lock_table(rtable *); +void rt_unlock_table(rtable *); +struct f_trie * rt_lock_trie(rtable *tab); +void rt_unlock_trie(rtable *tab, struct f_trie *trie); +void rt_subscribe(rtable *tab, struct rt_subscription *s); +void rt_unsubscribe(struct rt_subscription *s); +void rt_flowspec_link(rtable *src, rtable *dst); +void rt_flowspec_unlink(rtable *src, rtable *dst); +rtable *rt_setup(pool *, struct rtable_config *); +static inline void rt_shutdown(rtable *r) { rfree(r->rp); } + +static inline net *net_find(rtable *tab, const net_addr *addr) { return (net *) fib_find(&tab->fib, addr); } +static inline net *net_find_valid(rtable *tab, const net_addr *addr) +{ net *n = net_find(tab, addr); return (n && n->routes && rte_is_valid(&n->routes->rte)) ? n : NULL; } +static inline net *net_get(rtable *tab, const net_addr *addr) { return (net *) fib_get(&tab->fib, addr); } +net *net_get(rtable *tab, const net_addr *addr); +net *net_route(rtable *tab, const net_addr *n); +int rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter); +rte *rt_export_merged(struct channel *c, rte ** feed, uint count, linpool *pool, int silent); +void rt_refresh_begin(rtable *t, struct rt_import_request *); +void rt_refresh_end(rtable *t, struct rt_import_request *); +void rt_modify_stale(rtable *t, struct rt_import_request *); +void rt_schedule_prune(rtable *t); +void rte_dump(struct rte_storage *); +void rte_free(struct rte_storage *); +struct rte_storage *rte_store(const rte *, net *net, rtable *); +void rt_dump(rtable *); +void rt_dump_all(void); +void rt_dump_hooks(rtable *); +void rt_dump_hooks_all(void); +int rt_reload_channel(struct channel *c); +void rt_reload_channel_abort(struct channel *c); +void rt_refeed_channel(struct channel *c); +void rt_prune_sync(rtable *t, int all); +int rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src); +int rte_update_out(struct channel *c, const net_addr *n, rte *new, const rte *old, struct rte_storage **old_exported); +struct rtable_config *rt_new_table(struct symbol *s, uint addr_type); + +static inline int rt_is_ip(rtable *tab) +{ return (tab->addr_type == NET_IP4) || (tab->addr_type == NET_IP6); } + +static inline int rt_is_vpn(rtable *tab) +{ return (tab->addr_type == NET_VPN4) || (tab->addr_type == NET_VPN6); } + +static inline int rt_is_roa(rtable *tab) +{ return (tab->addr_type == NET_ROA4) || (tab->addr_type == NET_ROA6); } + +static inline int rt_is_flow(rtable *tab) +{ return (tab->addr_type == NET_FLOW4) || (tab->addr_type == NET_FLOW6); } + + +/* Default limit for ECMP next hops, defined in sysdep code */ +extern const int rt_default_ecmp; + +struct rt_show_data_rtable { + node n; + rtable *table; + struct channel *export_channel; +}; + +struct rt_show_data { + net_addr *addr; + list tables; + struct rt_show_data_rtable *tab; /* Iterator over table list */ + struct rt_show_data_rtable *last_table; /* Last table in output */ + struct fib_iterator fit; /* Iterator over networks in table */ + struct f_trie_walk_state *walk_state; /* Iterator over networks in trie */ + struct f_trie *walk_lock; /* Locked trie for walking */ + int verbose, tables_defined_by; + const struct filter *filter; + struct proto *show_protocol; + struct proto *export_protocol; + struct channel *export_channel; + struct config *running_on_config; + struct krt_proto *kernel; + struct rt_export_hook *kernel_export_hook; + int export_mode, addr_mode, primary_only, filtered, stats; + + int table_open; /* Iteration (fit) is open */ + int trie_walk; /* Current table is iterated using trie */ + int net_counter, rt_counter, show_counter, table_counter; + int net_counter_last, rt_counter_last, show_counter_last; +}; + +void rt_show(struct rt_show_data *); +struct rt_show_data_rtable * rt_show_add_table(struct rt_show_data *d, rtable *t); + +/* Value of table definition mode in struct rt_show_data */ +#define RSD_TDB_DEFAULT 0 /* no table specified */ +#define RSD_TDB_INDIRECT 0 /* show route ... protocol P ... */ +#define RSD_TDB_ALL RSD_TDB_SET /* show route ... table all ... */ +#define RSD_TDB_DIRECT RSD_TDB_SET | RSD_TDB_NMN /* show route ... table X table Y ... */ + +#define RSD_TDB_SET 0x1 /* internal: show empty tables */ +#define RSD_TDB_NMN 0x2 /* internal: need matching net */ + +/* Value of addr_mode */ +#define RSD_ADDR_EQUAL 1 /* Exact query - show route <addr> */ +#define RSD_ADDR_FOR 2 /* Longest prefix match - show route for <addr> */ +#define RSD_ADDR_IN 3 /* Interval query - show route in <addr> */ + +/* Value of export_mode in struct rt_show_data */ +#define RSEM_NONE 0 /* Export mode not used */ +#define RSEM_PREEXPORT 1 /* Routes ready for export, before filtering */ +#define RSEM_EXPORT 2 /* Routes accepted by export filter */ +#define RSEM_NOEXPORT 3 /* Routes rejected by export filter */ +#define RSEM_EXPORTED 4 /* Routes marked in export map */ + +/* Host entry: Resolve hook for recursive nexthops */ +extern struct ea_class ea_gen_hostentry; +struct hostentry_adata { + adata ad; + struct hostentry *he; + u32 labels[0]; +}; + +void +ea_set_hostentry(ea_list **to, struct rtable *dep, struct rtable *tab, ip_addr gw, ip_addr ll, u32 lnum, u32 labels[lnum]); + +/* +struct hostentry * rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep); +void rta_apply_hostentry(rta *a, struct hostentry *he, u32 lnum, u32 labels[lnum]); + +static inline void +rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr gw, ip_addr ll, u32 lnum, u32 labels[lnum]) +{ + rta_apply_hostentry(a, rt_get_hostentry(tab, gw, ll, dep), lnum, labels); +} +*/ + +/* + * Default protocol preferences + */ + +#define DEF_PREF_DIRECT 240 /* Directly connected */ +#define DEF_PREF_STATIC 200 /* Static route */ +#define DEF_PREF_OSPF 150 /* OSPF intra-area, inter-area and type 1 external routes */ +#define DEF_PREF_BABEL 130 /* Babel */ +#define DEF_PREF_RIP 120 /* RIP */ +#define DEF_PREF_BGP 100 /* BGP */ +#define DEF_PREF_RPKI 100 /* RPKI */ +#define DEF_PREF_INHERITED 10 /* Routes inherited from other routing daemons */ +#define DEF_PREF_UNKNOWN 0 /* Routes with no preference set */ + +/* + * Route Origin Authorization + */ + +#define ROA_UNKNOWN 0 +#define ROA_VALID 1 +#define ROA_INVALID 2 + +int net_roa_check(rtable *tab, const net_addr *n, u32 asn); + +#endif diff --git a/proto/babel/Makefile b/proto/babel/Makefile index 06b58e95..ae6aeaf2 100644 --- a/proto/babel/Makefile +++ b/proto/babel/Makefile @@ -2,5 +2,6 @@ src := babel.c packets.c obj := $(src-o-files) $(all-daemon) $(cf-local) +$(call proto-build,babel_build) tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/babel/babel.c b/proto/babel/babel.c index 03c85b7d..65d2567e 100644 --- a/proto/babel/babel.c +++ b/proto/babel/babel.c @@ -37,6 +37,7 @@ #include <stdlib.h> #include "babel.h" +#include "lib/macro.h" #define LOG_PKT_AUTH(msg, args...) \ log_rl(&p->log_pkt_tbf, L_AUTH "%s: " msg, p->p.name, args) @@ -58,12 +59,14 @@ static void babel_update_cost(struct babel_neighbor *n); static inline void babel_kick_timer(struct babel_proto *p); static inline void babel_iface_kick_timer(struct babel_iface *ifa); +static struct ea_class ea_babel_metric, ea_babel_router_id, ea_babel_seqno; + /* * Functions to maintain data structures */ static void -babel_init_entry(void *E) +babel_init_entry(struct fib *f UNUSED, void *E) { struct babel_entry *e = E; @@ -119,7 +122,7 @@ babel_get_source(struct babel_proto *p, struct babel_entry *e, u64 router_id) } static void -babel_expire_sources(struct babel_proto *p, struct babel_entry *e) +babel_expire_sources(struct babel_proto *p UNUSED, struct babel_entry *e) { struct babel_source *n, *nx; btime now_ = current_time(); @@ -129,7 +132,7 @@ babel_expire_sources(struct babel_proto *p, struct babel_entry *e) if (n->expires && n->expires <= now_) { rem_node(NODE n); - sl_free(p->source_slab, n); + sl_free(n); } } } @@ -174,7 +177,7 @@ babel_retract_route(struct babel_proto *p, struct babel_route *r) } static void -babel_flush_route(struct babel_proto *p, struct babel_route *r) +babel_flush_route(struct babel_proto *p UNUSED, struct babel_route *r) { DBG("Babel: Flush route %N router_id %lR neigh %I\n", r->e->n.addr, r->router_id, r->neigh->addr); @@ -185,7 +188,7 @@ babel_flush_route(struct babel_proto *p, struct babel_route *r) if (r->e->selected == r) r->e->selected = NULL; - sl_free(p->route_slab, r); + sl_free(r); } static void @@ -336,13 +339,13 @@ found: } static void -babel_remove_seqno_request(struct babel_proto *p, struct babel_seqno_request *sr) +babel_remove_seqno_request(struct babel_proto *p UNUSED, struct babel_seqno_request *sr) { if (sr->nbr) rem_node(&sr->nbr_node); rem_node(NODE sr); - sl_free(p->seqno_slab, sr); + sl_free(sr); } static int @@ -640,37 +643,14 @@ babel_announce_rte(struct babel_proto *p, struct babel_entry *e) if (r) { - rta a0 = { - .source = RTS_BABEL, - .scope = SCOPE_UNIVERSE, - .dest = RTD_UNICAST, - .pref = c->preference, - .from = r->neigh->addr, - .nh.gw = r->next_hop, - .nh.iface = r->neigh->ifa->iface, - .eattrs = alloca(sizeof(ea_list) + 3*sizeof(eattr)), - }; - - *a0.eattrs = (ea_list) { .count = 3 }; - a0.eattrs->attrs[0] = (eattr) { - .id = EA_BABEL_METRIC, - .type = EAF_TYPE_INT, - .u.data = r->metric, - }; - - struct adata *ad = alloca(sizeof(struct adata) + sizeof(u64)); - ad->length = sizeof(u64); - memcpy(ad->data, &(r->router_id), sizeof(u64)); - a0.eattrs->attrs[1] = (eattr) { - .id = EA_BABEL_ROUTER_ID, - .type = EAF_TYPE_OPAQUE, - .u.ptr = ad, - }; - - a0.eattrs->attrs[2] = (eattr) { - .id = EA_BABEL_SEQNO, - .type = EAF_TYPE_INT, - .u.data = r->seqno, + struct nexthop_adata nhad = { + .nh = { + .gw = r->next_hop, + .iface = r->neigh->ifa->iface, + }, + .ad = { + .length = sizeof nhad - sizeof nhad.ad, + }, }; /* @@ -679,7 +659,25 @@ babel_announce_rte(struct babel_proto *p, struct babel_entry *e) * have routing work. */ if (!neigh_find(&p->p, r->next_hop, r->neigh->ifa->iface, 0)) - a0.nh.flags = RNF_ONLINK; + nhad.nh.flags = RNF_ONLINK; + + struct { + ea_list l; + eattr a[7]; + } eattrs = { + .l.count = ARRAY_SIZE(eattrs.a), + .a = { + EA_LITERAL_EMBEDDED(&ea_gen_preference, 0, c->preference), + EA_LITERAL_STORE_ADATA(&ea_gen_from, 0, &r->neigh->addr, sizeof(r->neigh->addr)), + EA_LITERAL_EMBEDDED(&ea_gen_source, 0, RTS_BABEL), + EA_LITERAL_STORE_ADATA(&ea_gen_nexthop, 0, nhad.ad.data, nhad.ad.length), + EA_LITERAL_EMBEDDED(&ea_babel_metric, 0, r->metric), + EA_LITERAL_STORE_ADATA(&ea_babel_router_id, 0, &r->router_id, sizeof(r->router_id)), + EA_LITERAL_EMBEDDED(&ea_babel_seqno, 0, r->seqno), + } + }; + + rta a0 = { .eattrs = &eattrs.l, }; rte e0 = { .attrs = &a0, @@ -692,12 +690,11 @@ babel_announce_rte(struct babel_proto *p, struct babel_entry *e) else if (e->valid && (e->router_id != p->router_id)) { /* Unreachable */ - rta a0 = { - .source = RTS_BABEL, - .scope = SCOPE_UNIVERSE, - .dest = RTD_UNREACHABLE, - .pref = 1, - }; + rta a0 = {}; + + ea_set_attr_u32(&a0.eattrs, &ea_gen_preference, 0, 1); + ea_set_attr_u32(&a0.eattrs, &ea_gen_source, 0, RTS_BABEL); + ea_set_dest(&a0.eattrs, 0, RTD_UNREACHABLE); rte e0 = { .attrs = &a0, @@ -862,14 +859,14 @@ babel_send_ihus(struct babel_iface *ifa) } static void -babel_send_hello(struct babel_iface *ifa) +babel_send_hello(struct babel_iface *ifa, uint interval) { struct babel_proto *p = ifa->proto; union babel_msg msg = {}; msg.type = BABEL_TLV_HELLO; msg.hello.seqno = ifa->hello_seqno++; - msg.hello.interval = ifa->cf->hello_interval; + msg.hello.interval = interval ?: ifa->cf->hello_interval; TRACE(D_PACKETS, "Sending hello on %s with seqno %d interval %t", ifa->ifname, msg.hello.seqno, (btime) msg.hello.interval); @@ -1577,7 +1574,7 @@ babel_iface_timer(timer *t) if (now_ >= ifa->next_hello) { - babel_send_hello(ifa); + babel_send_hello(ifa, 0); ifa->next_hello += hello_period * (1 + (now_ - ifa->next_hello) / hello_period); } @@ -1624,7 +1621,7 @@ babel_iface_start(struct babel_iface *ifa) tm_start(ifa->timer, 100 MS); ifa->up = 1; - babel_send_hello(ifa); + babel_send_hello(ifa, 0); babel_send_wildcard_retraction(ifa); babel_send_wildcard_request(ifa); babel_send_update(ifa, 0); /* Full update */ @@ -1919,7 +1916,7 @@ babel_reconfigure_ifaces(struct babel_proto *p, struct babel_config *cf) struct babel_iface *ifa = babel_find_iface(p, iface); struct babel_iface_config *ic = (void *) iface_patt_find(&cf->iface_list, iface, NULL); - if (ic && iface_is_valid(p, iface)) + if (ic && !iface_is_valid(p, iface)) ic = NULL; if (ifa && ic) @@ -2031,39 +2028,42 @@ static void babel_get_route_info(rte *rte, byte *buf) { u64 rid = 0; - eattr *e = ea_find(rte->attrs->eattrs, EA_BABEL_ROUTER_ID); + eattr *e = ea_find(rte->attrs->eattrs, &ea_babel_router_id); if (e) memcpy(&rid, e->u.ptr->data, sizeof(u64)); - buf += bsprintf(buf, " (%d/%d) [%lR]", rte->attrs->pref, - ea_get_int(rte->attrs->eattrs, EA_BABEL_METRIC, BABEL_INFINITY), rid); + buf += bsprintf(buf, " (%d/%d) [%lR]", + rt_get_preference(rte), + ea_get_int(rte->attrs->eattrs, &ea_babel_metric, BABEL_INFINITY), rid); } -static int -babel_get_attr(const eattr *a, byte *buf, int buflen UNUSED) +static void +babel_router_id_format(const eattr *a, byte *buf, uint len) { - switch (a->id) - { - case EA_BABEL_SEQNO: - return GA_FULL; - - case EA_BABEL_METRIC: - bsprintf(buf, "metric: %d", a->u.data); - return GA_FULL; - - case EA_BABEL_ROUTER_ID: - { - u64 rid = 0; - memcpy(&rid, a->u.ptr->data, sizeof(u64)); - bsprintf(buf, "router_id: %lR", rid); - return GA_FULL; - } - - default: - return GA_UNKNOWN; - } + u64 rid = 0; + memcpy(&rid, a->u.ptr->data, sizeof(u64)); + bsnprintf(buf, len, "%lR", rid); } +static struct ea_class ea_babel_metric = { + .name = "babel_metric", + .type = T_INT, +}; + +static struct ea_class ea_babel_router_id = { + .name = "babel_router_id", + .type = T_OPAQUE, + .readonly = 1, + .format = babel_router_id_format, +}; + +static struct ea_class ea_babel_seqno = { + .name = "babel_seqno", + .type = T_INT, + .readonly = 1, +}; + + void babel_show_interfaces(struct proto *P, const char *iff) { @@ -2262,9 +2262,13 @@ babel_kick_timer(struct babel_proto *p) static int babel_preexport(struct channel *c, struct rte *new) { - struct rta *a = new->attrs; + if (new->src->proto != c->proto) + return 0; + /* Reject our own unreachable routes */ - if ((a->dest == RTD_UNREACHABLE) && (new->src->proto == c->proto)) + eattr *ea = ea_find(new->attrs->eattrs, &ea_gen_nexthop); + struct nexthop_adata *nhad = (void *) ea->u.ptr; + if (!NEXTHOP_IS_REACHABLE(nhad)) return -1; return 0; @@ -2285,13 +2289,13 @@ babel_rt_notify(struct proto *P, struct channel *c UNUSED, const net_addr *net, { /* Update */ uint rt_seqno; - uint rt_metric = ea_get_int(new->attrs->eattrs, EA_BABEL_METRIC, 0); + uint rt_metric = ea_get_int(new->attrs->eattrs, &ea_babel_metric, 0); u64 rt_router_id = 0; if (new->src->proto == P) { - rt_seqno = ea_find(new->attrs->eattrs, EA_BABEL_SEQNO)->u.data; - eattr *e = ea_find(new->attrs->eattrs, EA_BABEL_ROUTER_ID); + rt_seqno = ea_get_int(new->attrs->eattrs, &ea_babel_seqno, 0); + eattr *e = ea_find(new->attrs->eattrs, &ea_babel_router_id); if (e) memcpy(&rt_router_id, e->u.ptr->data, sizeof(u64)); } @@ -2342,16 +2346,16 @@ babel_rt_notify(struct proto *P, struct channel *c UNUSED, const net_addr *net, static int babel_rte_better(struct rte *new, struct rte *old) { - uint new_metric = ea_find(new->attrs->eattrs, EA_BABEL_SEQNO)->u.data; - uint old_metric = ea_find(old->attrs->eattrs, EA_BABEL_SEQNO)->u.data; + uint new_metric = ea_get_int(new->attrs->eattrs, &ea_babel_metric, BABEL_INFINITY); + uint old_metric = ea_get_int(old->attrs->eattrs, &ea_babel_metric, BABEL_INFINITY); return new_metric < old_metric; } static u32 -babel_rte_igp_metric(struct rte *rt) +babel_rte_igp_metric(const rte *rt) { - return ea_get_int(rt->attrs->eattrs, EA_BABEL_METRIC, BABEL_INFINITY); + return ea_get_int(rt->attrs->eattrs, &ea_babel_metric, BABEL_INFINITY); } @@ -2435,6 +2439,11 @@ babel_iface_shutdown(struct babel_iface *ifa) { if (ifa->sk) { + /* + * Retract all our routes and lower the hello interval so peers' neighbour + * state expires quickly + */ + babel_send_hello(ifa, BABEL_MIN_INTERVAL); babel_send_wildcard_retraction(ifa); babel_send_queue(ifa); } @@ -2479,11 +2488,9 @@ babel_reconfigure(struct proto *P, struct proto_config *CF) return 1; } - struct protocol proto_babel = { .name = "Babel", .template = "babel%d", - .class = PROTOCOL_BABEL, .preference = DEF_PREF_BABEL, .channel_mask = NB_IP | NB_IP6_SADR, .proto_size = sizeof(struct babel_proto), @@ -2495,5 +2502,16 @@ struct protocol proto_babel = { .shutdown = babel_shutdown, .reconfigure = babel_reconfigure, .get_route_info = babel_get_route_info, - .get_attr = babel_get_attr }; + +void +babel_build(void) +{ + proto_build(&proto_babel); + + EA_REGISTER_ALL( + &ea_babel_metric, + &ea_babel_router_id, + &ea_babel_seqno + ); +} diff --git a/proto/babel/babel.h b/proto/babel/babel.h index 8b6da3c8..a980d1da 100644 --- a/proto/babel/babel.h +++ b/proto/babel/babel.h @@ -16,7 +16,7 @@ #include "nest/bird.h" #include "nest/cli.h" #include "nest/iface.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "nest/locks.h" #include "nest/password.h" @@ -26,10 +26,6 @@ #include "lib/string.h" #include "lib/timer.h" -#define EA_BABEL_METRIC EA_CODE(PROTOCOL_BABEL, 0) -#define EA_BABEL_ROUTER_ID EA_CODE(PROTOCOL_BABEL, 1) -#define EA_BABEL_SEQNO EA_CODE(PROTOCOL_BABEL, 2) - #define BABEL_MAGIC 42 #define BABEL_VERSION 2 #define BABEL_PORT 6696 diff --git a/proto/babel/config.Y b/proto/babel/config.Y index 05210fa4..a4350eed 100644 --- a/proto/babel/config.Y +++ b/proto/babel/config.Y @@ -24,7 +24,7 @@ CF_DECLS CF_KEYWORDS(BABEL, INTERFACE, METRIC, RXCOST, HELLO, UPDATE, INTERVAL, PORT, TYPE, WIRED, WIRELESS, RX, TX, BUFFER, PRIORITY, LENGTH, CHECK, LINK, - NEXT, HOP, IPV4, IPV6, BABEL_METRIC, SHOW, INTERFACES, NEIGHBORS, + NEXT, HOP, IPV4, IPV6, SHOW, INTERFACES, NEIGHBORS, ENTRIES, RANDOMIZE, ROUTER, ID, AUTHENTICATION, NONE, MAC, PERMISSIVE) CF_GRAMMAR @@ -163,8 +163,6 @@ babel_iface_opt_list: babel_iface: babel_iface_start iface_patt_list_nopx babel_iface_opt_list babel_iface_finish; -dynamic_attr: BABEL_METRIC { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_BABEL_METRIC); } ; - CF_CLI_HELP(SHOW BABEL, ..., [[Show information about Babel protocol]]); CF_CLI(SHOW BABEL INTERFACES, optproto opttext, [<name>] [\"<interface>\"], [[Show information about Babel interfaces]]) diff --git a/proto/babel/packets.c b/proto/babel/packets.c index f13410e2..d4acc170 100644 --- a/proto/babel/packets.c +++ b/proto/babel/packets.c @@ -1318,7 +1318,6 @@ babel_send_to(struct babel_iface *ifa, ip_addr dest) static uint babel_write_queue(struct babel_iface *ifa, list *queue) { - struct babel_proto *p = ifa->proto; struct babel_write_state state = { .next_hop_ip6 = ifa->addr }; if (EMPTY_LIST(*queue)) @@ -1346,7 +1345,7 @@ babel_write_queue(struct babel_iface *ifa, list *queue) pos += len; rem_node(NODE msg); - sl_free(p->msg_slab, msg); + sl_free(msg); } pos += babel_auth_add_tlvs(ifa, (struct babel_tlv *) pos, end - pos); @@ -1507,13 +1506,13 @@ babel_process_packet(struct babel_iface *ifa, else if (res == PARSE_IGNORE) { DBG("Babel: Ignoring TLV of type %d\n", tlv->type); - sl_free(p->msg_slab, msg); + sl_free(msg); } else /* PARSE_ERROR */ { LOG_PKT("Bad TLV from %I via %s type %d pos %d - parse error", saddr, ifa->iface->name, tlv->type, (int) ((byte *)tlv - (byte *)pkt)); - sl_free(p->msg_slab, msg); + sl_free(msg); break; } } @@ -1525,7 +1524,7 @@ babel_process_packet(struct babel_iface *ifa, if (tlv_data[msg->msg.type].handle_tlv) tlv_data[msg->msg.type].handle_tlv(&msg->msg, ifa); rem_node(NODE msg); - sl_free(p->msg_slab, msg); + sl_free(msg); } } @@ -2011,7 +2010,7 @@ babel_auth_sign(struct babel_iface *ifa, ip_addr dest) } DBG("Added MAC signatures (%d bytes) on ifa %s for dest %I\n", - tot_len, ifa->ifname, dest); + pos - (pkt + len), ifa->ifname, dest); return pos - (pkt + len); } diff --git a/proto/bfd/Makefile b/proto/bfd/Makefile index 402122fc..dbdc0a09 100644 --- a/proto/bfd/Makefile +++ b/proto/bfd/Makefile @@ -2,5 +2,6 @@ src := bfd.c io.c packets.c obj := $(src-o-files) $(all-daemon) $(cf-local) +$(call proto-build,bfd_build) -tests_objs := $(tests_objs) $(src-o-files) \ No newline at end of file +tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/bfd/bfd.c b/proto/bfd/bfd.c index dac184c5..871ecf69 100644 --- a/proto/bfd/bfd.c +++ b/proto/bfd/bfd.c @@ -113,8 +113,8 @@ #define HASH_IP_EQ(a1,n1,a2,n2) ipa_equal(a1, a2) && n1 == n2 #define HASH_IP_FN(a,n) ipa_hash(a) ^ u32_hash(n) -static list bfd_proto_list; -static list bfd_wait_list; +static list STATIC_LIST_INIT(bfd_proto_list); +static list STATIC_LIST_INIT(bfd_wait_list); const char *bfd_state_names[] = { "AdminDown", "Down", "Init", "Up" }; @@ -508,7 +508,7 @@ bfd_remove_session(struct bfd_proto *p, struct bfd_session *s) HASH_REMOVE(p->session_hash_id, HASH_ID, s); HASH_REMOVE(p->session_hash_ip, HASH_IP, s); - sl_free(p->session_slab, s); + sl_free(s); TRACE(D_EVENTS, "Session to %I removed", ip); @@ -582,6 +582,9 @@ bfd_get_iface(struct bfd_proto *p, ip_addr local, struct iface *iface) ifa->sk = bfd_open_tx_sk(p, local, iface); ifa->uc = 1; + if (cf->strict_bind) + ifa->rx = bfd_open_rx_sk_bound(p, local, iface); + add_tail(&p->iface_list, &ifa->n); return ifa; @@ -599,6 +602,12 @@ bfd_free_iface(struct bfd_iface *ifa) rfree(ifa->sk); } + if (ifa->rx) + { + sk_stop(ifa->rx); + rfree(ifa->rx); + } + rem_node(&ifa->n); mb_free(ifa); } @@ -998,13 +1007,6 @@ bfd_notify_init(struct bfd_proto *p) * BFD protocol glue */ -void -bfd_init_all(void) -{ - init_list(&bfd_proto_list); - init_list(&bfd_wait_list); -} - static struct proto * bfd_init(struct proto_config *c) { @@ -1038,17 +1040,20 @@ bfd_start(struct proto *P) birdloop_enter(p->loop); - if (cf->accept_ipv4 && cf->accept_direct) - p->rx4_1 = bfd_open_rx_sk(p, 0, SK_IPV4); + if (!cf->strict_bind) + { + if (cf->accept_ipv4 && cf->accept_direct) + p->rx4_1 = bfd_open_rx_sk(p, 0, SK_IPV4); - if (cf->accept_ipv4 && cf->accept_multihop) - p->rx4_m = bfd_open_rx_sk(p, 1, SK_IPV4); + if (cf->accept_ipv4 && cf->accept_multihop) + p->rx4_m = bfd_open_rx_sk(p, 1, SK_IPV4); - if (cf->accept_ipv6 && cf->accept_direct) - p->rx6_1 = bfd_open_rx_sk(p, 0, SK_IPV6); + if (cf->accept_ipv6 && cf->accept_direct) + p->rx6_1 = bfd_open_rx_sk(p, 0, SK_IPV6); - if (cf->accept_ipv6 && cf->accept_multihop) - p->rx6_m = bfd_open_rx_sk(p, 1, SK_IPV6); + if (cf->accept_ipv6 && cf->accept_multihop) + p->rx6_m = bfd_open_rx_sk(p, 1, SK_IPV6); + } birdloop_leave(p->loop); @@ -1102,7 +1107,8 @@ bfd_reconfigure(struct proto *P, struct proto_config *c) if ((new->accept_ipv4 != old->accept_ipv4) || (new->accept_ipv6 != old->accept_ipv6) || (new->accept_direct != old->accept_direct) || - (new->accept_multihop != old->accept_multihop)) + (new->accept_multihop != old->accept_multihop) || + (new->strict_bind != old->strict_bind)) return 0; birdloop_mask_wakeups(p->loop); @@ -1177,7 +1183,6 @@ bfd_show_sessions(struct proto *P) struct protocol proto_bfd = { .name = "BFD", .template = "bfd%d", - .class = PROTOCOL_BFD, .proto_size = sizeof(struct bfd_proto), .config_size = sizeof(struct bfd_config), .init = bfd_init, @@ -1186,3 +1191,9 @@ struct protocol proto_bfd = { .reconfigure = bfd_reconfigure, .copy_config = bfd_copy_config, }; + +void +bfd_build(void) +{ + proto_build(&proto_bfd); +} diff --git a/proto/bfd/bfd.h b/proto/bfd/bfd.h index 91fdaa60..60b7916c 100644 --- a/proto/bfd/bfd.h +++ b/proto/bfd/bfd.h @@ -13,7 +13,7 @@ #include "nest/cli.h" #include "nest/iface.h" #include "nest/protocol.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/password.h" #include "conf/conf.h" #include "lib/hash.h" @@ -47,6 +47,7 @@ struct bfd_config u8 accept_ipv6; u8 accept_direct; u8 accept_multihop; + u8 strict_bind; }; struct bfd_iface_config @@ -116,6 +117,7 @@ struct bfd_iface struct bfd_proto *bfd; sock *sk; + sock *rx; u32 uc; u8 changed; }; @@ -221,6 +223,7 @@ void bfd_show_sessions(struct proto *P); /* packets.c */ void bfd_send_ctl(struct bfd_proto *p, struct bfd_session *s, int final); sock * bfd_open_rx_sk(struct bfd_proto *p, int multihop, int inet_version); +sock * bfd_open_rx_sk_bound(struct bfd_proto *p, ip_addr local, struct iface *ifa); sock * bfd_open_tx_sk(struct bfd_proto *p, ip_addr local, struct iface *ifa); diff --git a/proto/bfd/config.Y b/proto/bfd/config.Y index df1cba42..70461872 100644 --- a/proto/bfd/config.Y +++ b/proto/bfd/config.Y @@ -23,7 +23,8 @@ CF_DECLS CF_KEYWORDS(BFD, MIN, IDLE, RX, TX, INTERVAL, MULTIPLIER, PASSIVE, INTERFACE, MULTIHOP, NEIGHBOR, DEV, LOCAL, AUTHENTICATION, - NONE, SIMPLE, METICULOUS, KEYED, MD5, SHA1, IPV4, IPV6, DIRECT) + NONE, SIMPLE, METICULOUS, KEYED, MD5, SHA1, IPV4, IPV6, DIRECT, + STRICT, BIND) %type <iface> bfd_neigh_iface %type <a> bfd_neigh_local @@ -48,6 +49,7 @@ bfd_proto_item: | INTERFACE bfd_iface | MULTIHOP bfd_multihop | NEIGHBOR bfd_neighbor + | STRICT BIND bool { BFD_CFG->strict_bind = $3; } ; bfd_proto_opts: diff --git a/proto/bfd/io.c b/proto/bfd/io.c index 1cd9365a..e696cc89 100644 --- a/proto/bfd/io.c +++ b/proto/bfd/io.c @@ -482,6 +482,8 @@ birdloop_main(void *arg) birdloop_set_current(loop); + tmp_init(loop->pool); + pthread_mutex_lock(&loop->mutex); while (1) { diff --git a/proto/bfd/packets.c b/proto/bfd/packets.c index 7618e20f..5f10734c 100644 --- a/proto/bfd/packets.c +++ b/proto/bfd/packets.c @@ -366,7 +366,9 @@ bfd_rx_hook(sock *sk, uint len) if (ps > BFD_STATE_DOWN) DROP("invalid init state", ps); - uint ifindex = (sk->sport == BFD_CONTROL_PORT) ? sk->lifindex : 0; + uint ifindex = (sk->sport == BFD_CONTROL_PORT) ? + (sk->iface ? sk->iface->index : sk->lifindex) : + 0; s = bfd_find_session_by_addr(p, sk->faddr, ifindex); /* FIXME: better session matching and message */ @@ -438,6 +440,38 @@ bfd_open_rx_sk(struct bfd_proto *p, int multihop, int af) return NULL; } +sock * +bfd_open_rx_sk_bound(struct bfd_proto *p, ip_addr local, struct iface *ifa) +{ + sock *sk = sk_new(p->tpool); + sk->type = SK_UDP; + sk->saddr = local; + sk->sport = ifa ? BFD_CONTROL_PORT : BFD_MULTI_CTL_PORT; + sk->iface = ifa; + sk->vrf = p->p.vrf; + sk->data = p; + + sk->rbsize = BFD_MAX_LEN; + sk->rx_hook = bfd_rx_hook; + sk->err_hook = bfd_err_hook; + + /* TODO: configurable ToS and priority */ + sk->tos = IP_PREC_INTERNET_CONTROL; + sk->priority = sk_priority_control; + sk->flags = SKF_THREAD | SKF_BIND | (ifa ? SKF_TTL_RX : 0); + + if (sk_open(sk) < 0) + goto err; + + sk_start(sk); + return sk; + + err: + sk_log_error(sk, p->p.name); + rfree(sk); + return NULL; +} + sock * bfd_open_tx_sk(struct bfd_proto *p, ip_addr local, struct iface *ifa) { diff --git a/proto/bgp/Makefile b/proto/bgp/Makefile index 00aaef5e..2a4cc99c 100644 --- a/proto/bgp/Makefile +++ b/proto/bgp/Makefile @@ -2,5 +2,6 @@ src := attrs.c bgp.c packets.c obj := $(src-o-files) $(all-daemon) $(cf-local) +$(call proto-build,bgp_build) -tests_objs := $(tests_objs) $(src-o-files) \ No newline at end of file +tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index 90490b4f..0b715eaa 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -15,12 +15,13 @@ #include "nest/bird.h" #include "nest/iface.h" #include "nest/protocol.h" -#include "nest/route.h" -#include "nest/attrs.h" +#include "nest/rt.h" +#include "lib/attrs.h" #include "conf/conf.h" #include "lib/resource.h" #include "lib/string.h" #include "lib/unaligned.h" +#include "lib/macro.h" #include "bgp.h" @@ -45,9 +46,9 @@ * * export - Hook that validates and normalizes attribute during export phase. * Receives eattr, may modify it (e.g., sort community lists for canonical - * representation), UNSET() it (e.g., skip empty lists), or WITHDRAW() it if - * necessary. May assume that eattr has value valid w.r.t. its type, but may be - * invalid w.r.t. BGP constraints. Optional. + * representation), UNSET() it (e.g., skip empty lists), or REJECT() the route + * if necessary. May assume that eattr has value valid w.r.t. its type, but may + * be invalid w.r.t. BGP constraints. Optional. * * encode - Hook that converts internal representation to external one during * packet writing. Receives eattr and puts it in the buffer (including attribute @@ -64,37 +65,72 @@ * format - Optional hook that converts eattr to textual representation. */ - -struct bgp_attr_desc { - const char *name; - uint type; - uint flags; - void (*export)(struct bgp_export_state *s, eattr *a); - int (*encode)(struct bgp_write_state *s, eattr *a, byte *buf, uint size); - void (*decode)(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to); - void (*format)(const eattr *ea, byte *buf, uint size); +union bgp_attr_desc { + struct ea_class class; + struct { + EA_CLASS_INSIDE; + uint flags; + void (*export)(struct bgp_export_state *s, eattr *a); + int (*encode)(struct bgp_write_state *s, eattr *a, byte *buf, uint size); + void (*decode)(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to); + }; }; -static const struct bgp_attr_desc bgp_attr_table[]; - -static inline int bgp_attr_known(uint code); - -eattr * -bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintptr_t val) +static union bgp_attr_desc bgp_attr_table[]; +static inline const union bgp_attr_desc *bgp_find_attr_desc(eattr *a) { - ASSERT(bgp_attr_known(code)); + const struct ea_class *class = ea_class_find(a->id); - return ea_set_attr( - attrs, - pool, - EA_CODE(PROTOCOL_BGP, code), - flags & ~BAF_EXT_LEN, - bgp_attr_table[code].type, - val - ); + if ((class < &bgp_attr_table[0].class) || (class >= &bgp_attr_table[BGP_ATTR_MAX].class)) + return NULL; + + return (const union bgp_attr_desc *) class; } +#define BGP_EA_ID(code) (bgp_attr_table[code].id) +#define EA_BGP_ID(code) (((union bgp_attr_desc *) ea_class_find(code)) - bgp_attr_table) +void bgp_set_attr_u32(ea_list **to, uint code, uint flags, u32 val) +{ + const union bgp_attr_desc *desc = &bgp_attr_table[code]; + + ea_set_attr(to, EA_LITERAL_EMBEDDED( + &desc->class, + flags & ~BAF_EXT_LEN, + val + )); +} + +void bgp_set_attr_ptr(ea_list **to, uint code, uint flags, const struct adata *ad) +{ + const union bgp_attr_desc *desc = &bgp_attr_table[code]; + + ea_set_attr(to, EA_LITERAL_DIRECT_ADATA( + &desc->class, + flags & ~BAF_EXT_LEN, + ad + )); +} + +void +bgp_set_attr_data(ea_list **to, uint code, uint flags, void *data, uint len) +{ + const union bgp_attr_desc *desc = &bgp_attr_table[code]; + + ea_set_attr(to, EA_LITERAL_STORE_ADATA( + &desc->class, + flags & ~BAF_EXT_LEN, + data, + len + )); +} + +void +bgp_unset_attr(ea_list **to, uint code) +{ + const union bgp_attr_desc *desc = &bgp_attr_table[code]; + ea_unset_attr(to, 0, &desc->class); +} #define REPORT(msg, args...) \ ({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); }) @@ -106,7 +142,10 @@ bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintp ({ REPORT(msg, ## args); s->err_withdraw = 1; return; }) #define UNSET(a) \ - ({ a->type = EAF_TYPE_UNDEF; return; }) + ({ a->undef = 1; return; }) + +#define REJECT(msg, args...) \ + ({ log(L_ERR "%s: " msg, s->proto->p.name, ## args); s->err_reject = 1; return; }) #define NEW_BGP "Discarding %s attribute received from AS4-aware neighbor" #define BAD_EBGP "Discarding %s attribute received from EBGP neighbor" @@ -148,7 +187,7 @@ bgp_encode_u8(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size) if (size < (3+1)) return -1; - bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 1); + bgp_put_attr_hdr3(buf, EA_BGP_ID(a->id), a->flags, 1); buf[3] = a->u.data; return 3+1; @@ -160,7 +199,7 @@ bgp_encode_u32(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size) if (size < (3+4)) return -1; - bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 4); + bgp_put_attr_hdr3(buf, EA_BGP_ID(a->id), a->flags, 4); put_u32(buf+3, a->u.data); return 3+4; @@ -174,7 +213,7 @@ bgp_encode_u32s(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size if (size < (4+len)) return -1; - uint hdr = bgp_put_attr_hdr(buf, EA_ID(a->id), a->flags, len); + uint hdr = bgp_put_attr_hdr(buf, EA_BGP_ID(a->id), a->flags, len); put_u32s(buf + hdr, (u32 *) a->u.ptr->data, len / 4); return hdr + len; @@ -195,7 +234,7 @@ bgp_put_attr(byte *buf, uint size, uint code, uint flags, const byte *data, uint static int bgp_encode_raw(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size) { - return bgp_put_attr(buf, size, EA_ID(a->id), a->flags, a->u.ptr->data, a->u.ptr->length); + return bgp_put_attr(buf, size, EA_BGP_ID(a->id), a->flags, a->u.ptr->data, a->u.ptr->length); } @@ -333,9 +372,11 @@ bgp_aigp_set_metric(struct linpool *pool, const struct adata *ad, u64 metric) } int -bgp_total_aigp_metric_(struct rta *a, u64 *metric, const struct adata **ad) +bgp_total_aigp_metric_(const rte *e, u64 *metric, const struct adata **ad) { - eattr *ea = ea_find(a->eattrs, EA_CODE(PROTOCOL_BGP, BA_AIGP)); + rta *a = e->attrs; + + eattr *ea = ea_find(a->eattrs, BGP_EA_ID(BA_AIGP)); if (!ea) return 0; @@ -344,7 +385,7 @@ bgp_total_aigp_metric_(struct rta *a, u64 *metric, const struct adata **ad) return 0; u64 aigp = get_u64(b + 3); - u64 step = a->igp_metric; + u64 step = rt_get_igp_metric(e); if (!rta_resolvable(a) || (step >= IGP_METRIC_UNKNOWN)) step = BGP_AIGP_MAX; @@ -363,7 +404,7 @@ bgp_total_aigp_metric_(struct rta *a, u64 *metric, const struct adata **ad) static inline int bgp_init_aigp_metric(rte *e, u64 *metric, const struct adata **ad) { - if (e->attrs->source == RTS_BGP) + if (rt_get_source_attr(e) == RTS_BGP) return 0; *metric = rt_get_igp_metric(e); @@ -372,9 +413,9 @@ bgp_init_aigp_metric(rte *e, u64 *metric, const struct adata **ad) } u32 -bgp_rte_igp_metric(struct rte *rt) +bgp_rte_igp_metric(const rte *rt) { - u64 metric = bgp_total_aigp_metric(rt->attrs); + u64 metric = bgp_total_aigp_metric(rt); return (u32) MIN(metric, (u64) IGP_METRIC_UNKNOWN); } @@ -387,7 +428,7 @@ static void bgp_export_origin(struct bgp_export_state *s, eattr *a) { if (a->u.data > 2) - WITHDRAW(BAD_VALUE, "ORIGIN", a->u.data); + REJECT(BAD_VALUE, "ORIGIN", a->u.data); } static void @@ -399,7 +440,7 @@ bgp_decode_origin(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte if (data[0] > 2) WITHDRAW(BAD_VALUE, "ORIGIN", data[0]); - bgp_set_attr_u32(to, s->pool, BA_ORIGIN, flags, data[0]); + bgp_set_attr_u32(to, BA_ORIGIN, flags, data[0]); } static void @@ -467,7 +508,7 @@ bgp_decode_as_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte !bgp_as_path_first_as_equal(data, len, p->remote_as)) WITHDRAW("Malformed AS_PATH attribute - %s", "First AS differs from neigbor AS"); - bgp_set_attr_data(to, s->pool, BA_AS_PATH, flags, data, len); + bgp_set_attr_data(to, BA_AS_PATH, flags, data, len); } @@ -539,7 +580,7 @@ bgp_decode_med(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *da WITHDRAW(BAD_LENGTH, "MULTI_EXIT_DISC", len); u32 val = get_u32(data); - bgp_set_attr_u32(to, s->pool, BA_MULTI_EXIT_DISC, flags, val); + bgp_set_attr_u32(to, BA_MULTI_EXIT_DISC, flags, val); } @@ -560,7 +601,7 @@ bgp_decode_local_pref(struct bgp_parse_state *s, uint code UNUSED, uint flags, b WITHDRAW(BAD_LENGTH, "LOCAL_PREF", len); u32 val = get_u32(data); - bgp_set_attr_u32(to, s->pool, BA_LOCAL_PREF, flags, val); + bgp_set_attr_u32(to, BA_LOCAL_PREF, flags, val); } @@ -570,7 +611,7 @@ bgp_decode_atomic_aggr(struct bgp_parse_state *s, uint code UNUSED, uint flags, if (len != 0) DISCARD(BAD_LENGTH, "ATOMIC_AGGR", len); - bgp_set_attr_data(to, s->pool, BA_ATOMIC_AGGR, flags, NULL, 0); + bgp_set_attr_data(to, BA_ATOMIC_AGGR, flags, NULL, 0); } static int @@ -604,7 +645,7 @@ bgp_decode_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flags, b len = aggregator_16to32(data, src); } - bgp_set_attr_data(to, s->pool, BA_AGGREGATOR, flags, data, len); + bgp_set_attr_data(to, BA_AGGREGATOR, flags, data, len); } static void @@ -633,7 +674,7 @@ bgp_decode_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, by struct adata *ad = lp_alloc_adata(s->pool, len); get_u32s(data, (u32 *) ad->data, len / 4); - bgp_set_attr_ptr(to, s->pool, BA_COMMUNITY, flags, ad); + bgp_set_attr_ptr(to, BA_COMMUNITY, flags, ad); } @@ -654,7 +695,7 @@ bgp_decode_originator_id(struct bgp_parse_state *s, uint code UNUSED, uint flags WITHDRAW(BAD_LENGTH, "ORIGINATOR_ID", len); u32 val = get_u32(data); - bgp_set_attr_u32(to, s->pool, BA_ORIGINATOR_ID, flags, val); + bgp_set_attr_u32(to, BA_ORIGINATOR_ID, flags, val); } @@ -679,7 +720,7 @@ bgp_decode_cluster_list(struct bgp_parse_state *s, uint code UNUSED, uint flags, struct adata *ad = lp_alloc_adata(s->pool, len); get_u32s(data, (u32 *) ad->data, len / 4); - bgp_set_attr_ptr(to, s->pool, BA_CLUSTER_LIST, flags, ad); + bgp_set_attr_ptr(to, BA_CLUSTER_LIST, flags, ad); } static void @@ -798,7 +839,7 @@ bgp_decode_ext_community(struct bgp_parse_state *s, uint code UNUSED, uint flags struct adata *ad = lp_alloc_adata(s->pool, len); get_u32s(data, (u32 *) ad->data, len / 4); - bgp_set_attr_ptr(to, s->pool, BA_EXT_COMMUNITY, flags, ad); + bgp_set_attr_ptr(to, BA_EXT_COMMUNITY, flags, ad); } @@ -811,7 +852,7 @@ bgp_decode_as4_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flag if (len != 8) DISCARD(BAD_LENGTH, "AS4_AGGREGATOR", len); - bgp_set_attr_data(to, s->pool, BA_AS4_AGGREGATOR, flags, data, len); + bgp_set_attr_data(to, BA_AS4_AGGREGATOR, flags, data, len); } static void @@ -841,7 +882,7 @@ bgp_decode_as4_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byt a = as_path_strip_confed(s->pool, a); } - bgp_set_attr_ptr(to, s->pool, BA_AS4_PATH, flags, a); + bgp_set_attr_ptr(to, BA_AS4_PATH, flags, a); } @@ -865,7 +906,7 @@ bgp_decode_aigp(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *d if (!bgp_aigp_valid(data, len, err, sizeof(err))) DISCARD("Malformed AIGP attribute - %s", err); - bgp_set_attr_data(to, s->pool, BA_AIGP, flags, data, len); + bgp_set_attr_data(to, BA_AIGP, flags, data, len); } static void @@ -897,7 +938,7 @@ bgp_decode_large_community(struct bgp_parse_state *s, uint code UNUSED, uint fla struct adata *ad = lp_alloc_adata(s->pool, len); get_u32s(data, (u32 *) ad->data, len / 4); - bgp_set_attr_ptr(to, s->pool, BA_LARGE_COMMUNITY, flags, ad); + bgp_set_attr_ptr(to, BA_LARGE_COMMUNITY, flags, ad); } static void @@ -909,20 +950,20 @@ bgp_export_mpls_label_stack(struct bgp_export_state *s, eattr *a) /* Perhaps we should just ignore it? */ if (!s->mpls) - WITHDRAW("Unexpected MPLS stack"); + REJECT("Unexpected MPLS stack"); /* Empty MPLS stack is not allowed */ if (!lnum) - WITHDRAW("Malformed MPLS stack - empty"); + REJECT("Malformed MPLS stack - empty"); /* This is ugly, but we must ensure that labels fit into NLRI field */ if ((24*lnum + (net_is_vpn(n) ? 64 : 0) + net_pxlen(n)) > 255) - WITHDRAW("Malformed MPLS stack - too many labels (%u)", lnum); + REJECT("Malformed MPLS stack - too many labels (%u)", lnum); for (uint i = 0; i < lnum; i++) { if (labels[i] > 0xfffff) - WITHDRAW("Malformed MPLS stack - invalid label (%u)", labels[i]); + REJECT("Malformed MPLS stack - invalid label (%u)", labels[i]); /* TODO: Check for special-purpose label values? */ } @@ -970,10 +1011,29 @@ bgp_format_mpls_label_stack(const eattr *a, byte *buf, uint size) } static inline void -bgp_decode_unknown(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to) +bgp_export_unknown(struct bgp_export_state *s UNUSED, eattr *a) { + if (!(a->flags & BAF_TRANSITIVE)) + UNSET(a); + + a->flags |= BAF_PARTIAL; +} + +static inline void +bgp_decode_unknown(struct bgp_parse_state *s UNUSED, uint code, uint flags, byte *data, uint len, ea_list **to) +{ + if (!(flags & BAF_OPTIONAL)) + WITHDRAW("Unknown attribute (code %u) - conflicting flags (%02x)", code, flags); + /* Cannot use bgp_set_attr_data() as it works on known attributes only */ - ea_set_attr_data(to, s->pool, EA_CODE(PROTOCOL_BGP, code), flags, EAF_TYPE_OPAQUE, data, len); + ea_set_attr_data(to, &bgp_attr_table[code].class, flags, data, len); +} + +static inline void +bgp_format_unknown(const eattr *a, byte *buf, uint size) +{ + if (a->flags & BAF_TRANSITIVE) + bsnprintf(buf, size, "(transitive)"); } @@ -981,10 +1041,10 @@ bgp_decode_unknown(struct bgp_parse_state *s, uint code, uint flags, byte *data, * Attribute table */ -static const struct bgp_attr_desc bgp_attr_table[] = { +static union bgp_attr_desc bgp_attr_table[BGP_ATTR_MAX] = { [BA_ORIGIN] = { - .name = "origin", - .type = EAF_TYPE_INT, + .name = "bgp_origin", + .type = T_ENUM_BGP_ORIGIN, .flags = BAF_TRANSITIVE, .export = bgp_export_origin, .encode = bgp_encode_u8, @@ -992,69 +1052,69 @@ static const struct bgp_attr_desc bgp_attr_table[] = { .format = bgp_format_origin, }, [BA_AS_PATH] = { - .name = "as_path", - .type = EAF_TYPE_AS_PATH, + .name = "bgp_path", + .type = T_PATH, .flags = BAF_TRANSITIVE, .encode = bgp_encode_as_path, .decode = bgp_decode_as_path, }, [BA_NEXT_HOP] = { - .name = "next_hop", - .type = EAF_TYPE_IP_ADDRESS, + .name = "bgp_next_hop", + .type = T_IP, .flags = BAF_TRANSITIVE, .encode = bgp_encode_next_hop, .decode = bgp_decode_next_hop, .format = bgp_format_next_hop, }, [BA_MULTI_EXIT_DISC] = { - .name = "med", - .type = EAF_TYPE_INT, + .name = "bgp_med", + .type = T_INT, .flags = BAF_OPTIONAL, .encode = bgp_encode_u32, .decode = bgp_decode_med, }, [BA_LOCAL_PREF] = { - .name = "local_pref", - .type = EAF_TYPE_INT, + .name = "bgp_local_pref", + .type = T_INT, .flags = BAF_TRANSITIVE, .export = bgp_export_local_pref, .encode = bgp_encode_u32, .decode = bgp_decode_local_pref, }, [BA_ATOMIC_AGGR] = { - .name = "atomic_aggr", - .type = EAF_TYPE_OPAQUE, + .name = "bgp_atomic_aggr", + .type = T_OPAQUE, .flags = BAF_TRANSITIVE, .encode = bgp_encode_raw, .decode = bgp_decode_atomic_aggr, }, [BA_AGGREGATOR] = { - .name = "aggregator", - .type = EAF_TYPE_OPAQUE, + .name = "bgp_aggregator", + .type = T_OPAQUE, .flags = BAF_OPTIONAL | BAF_TRANSITIVE, .encode = bgp_encode_aggregator, .decode = bgp_decode_aggregator, .format = bgp_format_aggregator, }, [BA_COMMUNITY] = { - .name = "community", - .type = EAF_TYPE_INT_SET, + .name = "bgp_community", + .type = T_CLIST, .flags = BAF_OPTIONAL | BAF_TRANSITIVE, .export = bgp_export_community, .encode = bgp_encode_u32s, .decode = bgp_decode_community, }, [BA_ORIGINATOR_ID] = { - .name = "originator_id", - .type = EAF_TYPE_ROUTER_ID, + .name = "bgp_originator_id", + .type = T_QUAD, .flags = BAF_OPTIONAL, .export = bgp_export_originator_id, .encode = bgp_encode_u32, .decode = bgp_decode_originator_id, }, [BA_CLUSTER_LIST] = { - .name = "cluster_list", - .type = EAF_TYPE_INT_SET, + .name = "bgp_cluster_list", + .type = T_CLIST, .flags = BAF_OPTIONAL, .export = bgp_export_cluster_list, .encode = bgp_encode_u32s, @@ -1062,43 +1122,43 @@ static const struct bgp_attr_desc bgp_attr_table[] = { .format = bgp_format_cluster_list, }, [BA_MP_REACH_NLRI] = { - .name = "mp_reach_nlri", - .type = EAF_TYPE_OPAQUE, + .name = "bgp_mp_reach_nlri", + .type = T_OPAQUE, .flags = BAF_OPTIONAL, .decode = bgp_decode_mp_reach_nlri, }, [BA_MP_UNREACH_NLRI] = { - .name = "mp_unreach_nlri", - .type = EAF_TYPE_OPAQUE, + .name = "bgp_mp_unreach_nlri", + .type = T_OPAQUE, .flags = BAF_OPTIONAL, .decode = bgp_decode_mp_unreach_nlri, }, [BA_EXT_COMMUNITY] = { - .name = "ext_community", - .type = EAF_TYPE_EC_SET, + .name = "bgp_ext_community", + .type = T_ECLIST, .flags = BAF_OPTIONAL | BAF_TRANSITIVE, .export = bgp_export_ext_community, .encode = bgp_encode_u32s, .decode = bgp_decode_ext_community, }, [BA_AS4_PATH] = { - .name = "as4_path", - .type = EAF_TYPE_AS_PATH, + .name = "bgp_as4_path", + .type = T_PATH, .flags = BAF_OPTIONAL | BAF_TRANSITIVE, .encode = bgp_encode_raw, .decode = bgp_decode_as4_path, }, [BA_AS4_AGGREGATOR] = { - .name = "as4_aggregator", - .type = EAF_TYPE_OPAQUE, + .name = "bgp_as4_aggregator", + .type = T_OPAQUE, .flags = BAF_OPTIONAL | BAF_TRANSITIVE, .encode = bgp_encode_raw, .decode = bgp_decode_as4_aggregator, .format = bgp_format_aggregator, }, [BA_AIGP] = { - .name = "aigp", - .type = EAF_TYPE_OPAQUE, + .name = "bgp_aigp", + .type = T_OPAQUE, .flags = BAF_OPTIONAL | BAF_DECODE_FLAGS, .export = bgp_export_aigp, .encode = bgp_encode_raw, @@ -1106,16 +1166,17 @@ static const struct bgp_attr_desc bgp_attr_table[] = { .format = bgp_format_aigp, }, [BA_LARGE_COMMUNITY] = { - .name = "large_community", - .type = EAF_TYPE_LC_SET, + .name = "bgp_large_community", + .type = T_LCLIST, .flags = BAF_OPTIONAL | BAF_TRANSITIVE, .export = bgp_export_large_community, .encode = bgp_encode_u32s, .decode = bgp_decode_large_community, }, [BA_MPLS_LABEL_STACK] = { - .name = "mpls_label_stack", - .type = EAF_TYPE_INT_SET, + .name = "bgp_mpls_label_stack", + .type = T_CLIST, + .readonly = 1, .export = bgp_export_mpls_label_stack, .encode = bgp_encode_mpls_label_stack, .decode = bgp_decode_mpls_label_stack, @@ -1123,12 +1184,32 @@ static const struct bgp_attr_desc bgp_attr_table[] = { }, }; -static inline int -bgp_attr_known(uint code) +eattr * +bgp_find_attr(ea_list *attrs, uint code) { - return (code < ARRAY_SIZE(bgp_attr_table)) && bgp_attr_table[code].name; + return ea_find(attrs, BGP_EA_ID(code)); } +void +bgp_register_attrs(void) +{ + for (uint i=0; i<ARRAY_SIZE(bgp_attr_table); i++) + { + if (!bgp_attr_table[i].name) + bgp_attr_table[i] = (union bgp_attr_desc) { + .name = mb_sprintf(&root_pool, "bgp_unknown_0x%02x", i), + .type = T_OPAQUE, + .flags = BAF_OPTIONAL, + .readonly = 1, + .export = bgp_export_unknown, + .encode = bgp_encode_raw, + .decode = bgp_decode_unknown, + .format = bgp_format_unknown, + }; + + ea_register_init(&bgp_attr_table[i].class); + } +} /* * Attribute export @@ -1137,38 +1218,24 @@ bgp_attr_known(uint code) static inline void bgp_export_attr(struct bgp_export_state *s, eattr *a, ea_list *to) { - if (EA_PROTO(a->id) != PROTOCOL_BGP) + const union bgp_attr_desc *desc = bgp_find_attr_desc(a); + if (!desc) return; - uint code = EA_ID(a->id); - - if (bgp_attr_known(code)) - { - const struct bgp_attr_desc *desc = &bgp_attr_table[code]; - - /* The flags might have been zero if the attr was added by filters */ - a->flags = (a->flags & BAF_PARTIAL) | desc->flags; - - /* Set partial bit if new opt-trans attribute is attached to non-local route */ - if ((s->src != NULL) && (a->type & EAF_ORIGINATED) && - (a->flags & BAF_OPTIONAL) && (a->flags & BAF_TRANSITIVE)) - a->flags |= BAF_PARTIAL; - - /* Call specific hook */ - CALL(desc->export, s, a); - - /* Attribute might become undefined in hook */ - if ((a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF) - return; - } - else - { - /* Don't re-export unknown non-transitive attributes */ - if (!(a->flags & BAF_TRANSITIVE)) - return; + /* The flags might have been zero if the attr was added locally */ + a->flags = (a->flags & BAF_PARTIAL) | desc->flags; + /* Set partial bit if new opt-trans attribute is attached to non-local route */ + if ((s->src != NULL) && (a->originated) && + (a->flags & BAF_OPTIONAL) && (a->flags & BAF_TRANSITIVE)) a->flags |= BAF_PARTIAL; - } + + /* Call specific hook */ + CALL(desc->export, s, a); + + /* Attribute might become undefined in hook */ + if (a->undef) + return; /* Append updated attribute */ to->attrs[to->count++] = *a; @@ -1188,12 +1255,11 @@ bgp_export_attr(struct bgp_export_state *s, eattr *a, ea_list *to) * Result: one sorted attribute list segment, or NULL if attributes are unsuitable. */ static inline ea_list * -bgp_export_attrs(struct bgp_export_state *s, ea_list *attrs) +bgp_export_attrs(struct bgp_export_state *s, const ea_list *a) { /* Merge the attribute list */ - ea_list *new = lp_alloc(s->pool, ea_scan(attrs)); - ea_merge(attrs, new); - ea_sort(new); + ea_list *new = ea_normalize(a); + ASSERT_DIE(new); uint i, count; count = new->count; @@ -1203,7 +1269,7 @@ bgp_export_attrs(struct bgp_export_state *s, ea_list *attrs) for (i = 0; i < count; i++) bgp_export_attr(s, &new->attrs[i], new); - if (s->err_withdraw) + if (s->err_reject) return NULL; return new; @@ -1217,14 +1283,9 @@ bgp_export_attrs(struct bgp_export_state *s, ea_list *attrs) static inline int bgp_encode_attr(struct bgp_write_state *s, eattr *a, byte *buf, uint size) { - ASSERT(EA_PROTO(a->id) == PROTOCOL_BGP); - - uint code = EA_ID(a->id); - - if (bgp_attr_known(code)) - return bgp_attr_table[code].encode(s, a, buf, size); - else - return bgp_encode_raw(s, a, buf, size); + const union bgp_attr_desc *desc = bgp_find_attr_desc(a); + ASSERT_DIE(desc); + return desc->encode(s, a, buf, size); } /** @@ -1289,7 +1350,7 @@ bgp_cluster_list_loopy(struct bgp_proto *p, ea_list *attrs) } static inline void -bgp_decode_attr(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to) +bgp_decode_attr(struct bgp_parse_state *s, byte code, byte flags, byte *data, uint len, ea_list **to) { /* Handle duplicate attributes; RFC 7606 3 (g) */ if (BIT32_TEST(s->attrs_seen, code)) @@ -1301,24 +1362,15 @@ bgp_decode_attr(struct bgp_parse_state *s, uint code, uint flags, byte *data, ui } BIT32_SET(s->attrs_seen, code); - if (bgp_attr_known(code)) - { - const struct bgp_attr_desc *desc = &bgp_attr_table[code]; + ASSERT_DIE(bgp_attr_table[code].id); + const union bgp_attr_desc *desc = &bgp_attr_table[code]; - /* Handle conflicting flags; RFC 7606 3 (c) */ - if (((flags ^ desc->flags) & (BAF_OPTIONAL | BAF_TRANSITIVE)) && - !(desc->flags & BAF_DECODE_FLAGS)) - WITHDRAW("Malformed %s attribute - conflicting flags (%02x)", desc->name, flags); + /* Handle conflicting flags; RFC 7606 3 (c) */ + if (((flags ^ desc->flags) & (BAF_OPTIONAL | BAF_TRANSITIVE)) && + !(desc->flags & BAF_DECODE_FLAGS)) + WITHDRAW("Malformed %s attribute - conflicting flags (%02x, expected %02x)", desc->name, flags, desc->flags); - desc->decode(s, code, flags, data, len, to); - } - else /* Unknown attribute */ - { - if (!(flags & BAF_OPTIONAL)) - WITHDRAW("Unknown attribute (code %u) - conflicting flags (%02x)", code, flags); - - bgp_decode_unknown(s, code, flags, data, len, to); - } + desc->decode(s, code, flags, data, len, to); } /** @@ -1336,7 +1388,8 @@ bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len) { struct bgp_proto *p = s->proto; ea_list *attrs = NULL; - uint code, flags, alen; + uint alen; + byte code, flags; byte *pos = data; /* Parse the attributes */ @@ -1401,23 +1454,23 @@ bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len) /* Reject routes with our ASN in AS_PATH attribute */ if (bgp_as_path_loopy(p, attrs, p->local_as)) - goto withdraw; + goto loop; /* Reject routes with our Confederation ID in AS_PATH attribute; RFC 5065 4.0 */ if ((p->public_as != p->local_as) && bgp_as_path_loopy(p, attrs, p->public_as)) - goto withdraw; + goto loop; /* Reject routes with our Router ID in ORIGINATOR_ID attribute; RFC 4456 8 */ if (p->is_internal && bgp_originator_id_loopy(p, attrs)) - goto withdraw; + goto loop; /* Reject routes with our Cluster ID in CLUSTER_LIST attribute; RFC 4456 8 */ if (p->rr_client && bgp_cluster_list_loopy(p, attrs)) - goto withdraw; + goto loop; /* If there is no local preference, define one */ if (!BIT32_TEST(s->attrs_seen, BA_LOCAL_PREF)) - bgp_set_attr_u32(&attrs, s->pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref); + bgp_set_attr_u32(&attrs, BA_LOCAL_PREF, 0, p->cf->default_local_pref); return attrs; @@ -1434,6 +1487,10 @@ withdraw: s->err_withdraw = 1; return NULL; + +loop: + /* Loops are handled as withdraws, but ignored silently. Do not set err_withdraw. */ + return NULL; } void @@ -1443,7 +1500,7 @@ bgp_finish_attrs(struct bgp_parse_state *s, rta *a) if (BIT32_TEST(s->attrs_seen, BA_AIGP) && !s->channel->cf->aigp) { REPORT("Discarding AIGP attribute received on non-AIGP session"); - bgp_unset_attr(&a->eattrs, s->pool, BA_AIGP); + bgp_unset_attr(&a->eattrs, BA_AIGP); } } @@ -1458,7 +1515,7 @@ bgp_finish_attrs(struct bgp_parse_state *s, rta *a) #define RBH_FN(a,h) h #define RBH_REHASH bgp_rbh_rehash -#define RBH_PARAMS /8, *2, 2, 2, 8, 20 +#define RBH_PARAMS /8, *2, 2, 2, 12, 20 HASH_DEFINE_REHASH_FN(RBH, struct bgp_bucket) @@ -1491,6 +1548,7 @@ bgp_free_bucket_table(struct bgp_channel *c) static struct bgp_bucket * bgp_get_bucket(struct bgp_channel *c, ea_list *new) { + /* Hash and lookup */ u32 hash = ea_hash(new); struct bgp_bucket *b = HASH_FIND(c->bucket_hash, RBH, new, hash); @@ -1498,45 +1556,18 @@ bgp_get_bucket(struct bgp_channel *c, ea_list *new) if (b) return b; - uint ea_size = sizeof(ea_list) + new->count * sizeof(eattr); - uint ea_size_aligned = BIRD_ALIGN(ea_size, CPU_STRUCT_ALIGN); - uint size = sizeof(struct bgp_bucket) + ea_size_aligned; - uint i; - byte *dest; + /* Scan the list for total size */ + uint ea_size = BIRD_CPU_ALIGN(ea_list_size(new)); + uint size = sizeof(struct bgp_bucket) + ea_size; - /* Gather total size of non-inline attributes */ - for (i = 0; i < new->count; i++) - { - eattr *a = &new->attrs[i]; - - if (!(a->type & EAF_EMBEDDED)) - size += BIRD_ALIGN(sizeof(struct adata) + a->u.ptr->length, CPU_STRUCT_ALIGN); - } - - /* Create the bucket */ + /* Allocate the bucket */ b = mb_alloc(c->pool, size); *b = (struct bgp_bucket) { }; init_list(&b->prefixes); b->hash = hash; - /* Copy list of extended attributes */ - memcpy(b->eattrs, new, ea_size); - dest = ((byte *) b->eattrs) + ea_size_aligned; - - /* Copy values of non-inline attributes */ - for (i = 0; i < new->count; i++) - { - eattr *a = &b->eattrs->attrs[i]; - - if (!(a->type & EAF_EMBEDDED)) - { - const struct adata *oa = a->u.ptr; - struct adata *na = (struct adata *) dest; - memcpy(na, oa, sizeof(struct adata) + oa->length); - a->u.ptr = na; - dest += BIRD_ALIGN(sizeof(struct adata) + na->length, CPU_STRUCT_ALIGN); - } - } + /* Copy the ea_list */ + ea_list_copy(b->eattrs, new, ea_size); /* Insert the bucket to send queue and bucket hash */ add_tail(&c->bucket_queue, &b->send_node); @@ -1600,7 +1631,7 @@ bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b) #define PXH_FN(n,i,h) h #define PXH_REHASH bgp_pxh_rehash -#define PXH_PARAMS /8, *2, 2, 2, 8, 24 +#define PXH_PARAMS /8, *2, 2, 2, 12, 24 HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix) @@ -1626,7 +1657,8 @@ bgp_free_prefix_table(struct bgp_channel *c) static struct bgp_prefix * bgp_get_prefix(struct bgp_channel *c, const net_addr *net, u32 path_id) { - u32 hash = net_hash(net) ^ u32_hash(path_id); + /* We must use a different hash function than the rtable */ + u32 hash = u32_hash(net_hash(net) ^ u32_hash(path_id)); struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id, hash); if (px) @@ -1657,7 +1689,7 @@ bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px) HASH_REMOVE2(c->prefix_hash, PXH, c->pool, px); if (c->prefix_slab) - sl_free(c->prefix_slab, px); + sl_free(px); else mb_free(px); } @@ -1682,6 +1714,22 @@ bgp_preexport(struct channel *c, rte *e) if (src == NULL) return 0; + /* Reject flowspec that failed validation */ + if (net_is_flow(e->net)) + switch (rt_get_flowspec_valid(e)) + { + case FLOWSPEC_VALID: + break; + case FLOWSPEC_INVALID: + return -1; + case FLOWSPEC_UNKNOWN: + ASSUME((rt_get_source_attr(e) != RTS_BGP) || + !((struct bgp_channel *) SKIP_BACK(struct channel, in_req, e->sender->req))->base_table); + break; + case FLOWSPEC__MAX: + bug("This never happens."); + } + /* IBGP route reflection, RFC 4456 */ if (p->is_internal && src->is_internal && (p->local_as == src->local_as)) { @@ -1698,7 +1746,7 @@ bgp_preexport(struct channel *c, rte *e) /* Handle well-known communities, RFC 1997 */ struct eattr *com; if (p->cf->interpret_communities && - (com = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY)))) + (com = ea_find(e->attrs->eattrs, BGP_EA_ID(BA_COMMUNITY)))) { const struct adata *d = com->u.ptr; @@ -1734,7 +1782,7 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at /* ORIGIN attribute - mandatory, attach if missing */ if (! bgp_find_attr(attrs0, BA_ORIGIN)) - bgp_set_attr_u32(&attrs, pool, BA_ORIGIN, 0, src ? ORIGIN_INCOMPLETE : ORIGIN_IGP); + bgp_set_attr_u32(&attrs, BA_ORIGIN, 0, src ? ORIGIN_INCOMPLETE : ORIGIN_IGP); /* AS_PATH attribute - mandatory */ a = bgp_find_attr(attrs0, BA_AS_PATH); @@ -1749,24 +1797,24 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at { /* IBGP or route server -> just ensure there is one */ if (!a) - bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, &null_adata); + bgp_set_attr_ptr(&attrs, BA_AS_PATH, 0, &null_adata); } else if (p->is_interior) { /* Confederation -> prepend ASN as AS_CONFED_SEQUENCE */ ad = as_path_prepend2(pool, ad, AS_PATH_CONFED_SEQUENCE, p->public_as); - bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad); + bgp_set_attr_ptr(&attrs, BA_AS_PATH, 0, ad); } else /* Regular EBGP (no RS, no confederation) */ { /* Regular EBGP -> prepend ASN as regular sequence */ ad = as_path_prepend2(pool, ad, AS_PATH_SEQUENCE, p->public_as); - bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad); + bgp_set_attr_ptr(&attrs, BA_AS_PATH, 0, ad); /* MULTI_EXIT_DESC attribute - accept only if set in export filter */ a = bgp_find_attr(attrs0, BA_MULTI_EXIT_DISC); - if (a && !(a->type & EAF_FRESH)) - bgp_unset_attr(&attrs, pool, BA_MULTI_EXIT_DISC); + if (a && !(a->fresh)) + bgp_unset_attr(&attrs, BA_MULTI_EXIT_DISC); } /* NEXT_HOP attribute - delegated to AF-specific hook */ @@ -1775,16 +1823,16 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at /* LOCAL_PREF attribute - required for IBGP, attach if missing */ if (p->is_interior && ! bgp_find_attr(attrs0, BA_LOCAL_PREF)) - bgp_set_attr_u32(&attrs, pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref); + bgp_set_attr_u32(&attrs, BA_LOCAL_PREF, 0, p->cf->default_local_pref); /* AIGP attribute - accumulate local metric or originate new one */ u64 metric; if (s.local_next_hop && - (bgp_total_aigp_metric_(e->attrs, &metric, &ad) || + (bgp_total_aigp_metric_(e, &metric, &ad) || (c->cf->aigp_originate && bgp_init_aigp_metric(e, &metric, &ad)))) { ad = bgp_aigp_set_metric(pool, ad, metric); - bgp_set_attr_ptr(&attrs, pool, BA_AIGP, 0, ad); + bgp_set_attr_ptr(&attrs, BA_AIGP, 0, ad); } /* IBGP route reflection, RFC 4456 */ @@ -1792,7 +1840,7 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at { /* ORIGINATOR_ID attribute - attach if not already set */ if (! bgp_find_attr(attrs0, BA_ORIGINATOR_ID)) - bgp_set_attr_u32(&attrs, pool, BA_ORIGINATOR_ID, 0, src->remote_id); + bgp_set_attr_u32(&attrs, BA_ORIGINATOR_ID, 0, src->remote_id); /* CLUSTER_LIST attribute - prepend cluster ID */ a = bgp_find_attr(attrs0, BA_CLUSTER_LIST); @@ -1807,7 +1855,7 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at ad = int_set_prepend(pool, ad, p->rr_cluster_id); /* Should be at least one prepended cluster ID */ - bgp_set_attr_ptr(&attrs, pool, BA_CLUSTER_LIST, 0, ad); + bgp_set_attr_ptr(&attrs, BA_CLUSTER_LIST, 0, ad); } /* AS4_* transition attributes, RFC 6793 4.2.2 */ @@ -1816,15 +1864,15 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at a = bgp_find_attr(attrs, BA_AS_PATH); if (a && as_path_contains_as4(a->u.ptr)) { - bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, as_path_to_old(pool, a->u.ptr)); - bgp_set_attr_ptr(&attrs, pool, BA_AS4_PATH, 0, as_path_strip_confed(pool, a->u.ptr)); + bgp_set_attr_ptr(&attrs, BA_AS_PATH, 0, as_path_to_old(pool, a->u.ptr)); + bgp_set_attr_ptr(&attrs, BA_AS4_PATH, 0, as_path_strip_confed(pool, a->u.ptr)); } a = bgp_find_attr(attrs, BA_AGGREGATOR); if (a && aggregator_contains_as4(a->u.ptr)) { - bgp_set_attr_ptr(&attrs, pool, BA_AGGREGATOR, 0, aggregator_to_old(pool, a->u.ptr)); - bgp_set_attr_ptr(&attrs, pool, BA_AS4_AGGREGATOR, 0, a->u.ptr); + bgp_set_attr_ptr(&attrs, BA_AGGREGATOR, 0, aggregator_to_old(pool, a->u.ptr)); + bgp_set_attr_ptr(&attrs, BA_AS4_AGGREGATOR, 0, a->u.ptr); } } @@ -1849,13 +1897,15 @@ bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, c if (new) { - struct ea_list *attrs = bgp_update_attrs(p, c, new, new->attrs->eattrs, bgp_linpool2); + struct ea_list *attrs = bgp_update_attrs(p, c, new, new->attrs->eattrs, tmp_linpool); + + /* Error during attribute processing */ + if (!attrs) + log(L_ERR "%s: Invalid route %N withdrawn", p->p.name, n); /* If attributes are invalid, we fail back to withdraw */ buck = attrs ? bgp_get_bucket(c, attrs) : bgp_get_withdraw_bucket(c); path = new->src->global_id; - - lp_flush(bgp_linpool2); } else { @@ -1873,7 +1923,7 @@ bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, c static inline u32 bgp_get_neighbor(rte *r) { - eattr *e = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); + eattr *e = ea_find(r->attrs->eattrs, BGP_EA_ID(BA_AS_PATH)); u32 as; if (e && as_path_get_first_regular(e->u.ptr, &as)) @@ -1894,7 +1944,7 @@ rte_stale(rte *r) return 0; /* If staleness is unknown, compute and cache it */ - eattr *a = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY)); + eattr *a = ea_find(r->attrs->eattrs, BGP_EA_ID(BA_COMMUNITY)); if (a && int_set_contains(a->u.ptr, BGP_COMM_LLGR_STALE)) { r->pflags |= BGP_REF_STALE; @@ -1940,8 +1990,8 @@ bgp_rte_better(rte *new, rte *old) return 1; /* Start with local preferences */ - x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF)); - y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF)); + x = ea_find(new->attrs->eattrs, BGP_EA_ID(BA_LOCAL_PREF)); + y = ea_find(old->attrs->eattrs, BGP_EA_ID(BA_LOCAL_PREF)); n = x ? x->u.data : new_bgp->cf->default_local_pref; o = y ? y->u.data : old_bgp->cf->default_local_pref; if (n > o) @@ -1950,8 +2000,8 @@ bgp_rte_better(rte *new, rte *old) return 0; /* RFC 7311 4.1 - Apply AIGP metric */ - u64 n2 = bgp_total_aigp_metric(new->attrs); - u64 o2 = bgp_total_aigp_metric(old->attrs); + u64 n2 = bgp_total_aigp_metric(new); + u64 o2 = bgp_total_aigp_metric(old); if (n2 < o2) return 1; if (n2 > o2) @@ -1960,8 +2010,8 @@ bgp_rte_better(rte *new, rte *old) /* RFC 4271 9.1.2.2. a) Use AS path lengths */ if (new_bgp->cf->compare_path_lengths || old_bgp->cf->compare_path_lengths) { - x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); - y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); + x = ea_find(new->attrs->eattrs, BGP_EA_ID(BA_AS_PATH)); + y = ea_find(old->attrs->eattrs, BGP_EA_ID(BA_AS_PATH)); n = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN; o = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN; if (n < o) @@ -1971,8 +2021,8 @@ bgp_rte_better(rte *new, rte *old) } /* RFC 4271 9.1.2.2. b) Use origins */ - x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN)); - y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN)); + x = ea_find(new->attrs->eattrs, BGP_EA_ID(BA_ORIGIN)); + y = ea_find(old->attrs->eattrs, BGP_EA_ID(BA_ORIGIN)); n = x ? x->u.data : ORIGIN_INCOMPLETE; o = y ? y->u.data : ORIGIN_INCOMPLETE; if (n < o) @@ -1994,8 +2044,8 @@ bgp_rte_better(rte *new, rte *old) if (new_bgp->cf->med_metric || old_bgp->cf->med_metric || (bgp_get_neighbor(new) == bgp_get_neighbor(old))) { - x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC)); - y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC)); + x = ea_find(new->attrs->eattrs, BGP_EA_ID(BA_MULTI_EXIT_DISC)); + y = ea_find(old->attrs->eattrs, BGP_EA_ID(BA_MULTI_EXIT_DISC)); n = x ? x->u.data : new_bgp->cf->default_med; o = y ? y->u.data : old_bgp->cf->default_med; if (n < o) @@ -2011,8 +2061,8 @@ bgp_rte_better(rte *new, rte *old) return 1; /* RFC 4271 9.1.2.2. e) Compare IGP metrics */ - n = new_bgp->cf->igp_metric ? new->attrs->igp_metric : 0; - o = old_bgp->cf->igp_metric ? old->attrs->igp_metric : 0; + n = new_bgp->cf->igp_metric ? rt_get_igp_metric(new) : 0; + o = old_bgp->cf->igp_metric ? rt_get_igp_metric(old) : 0; if (n < o) return 1; if (n > o) @@ -2020,8 +2070,8 @@ bgp_rte_better(rte *new, rte *old) /* RFC 4271 9.1.2.2. f) Compare BGP identifiers */ /* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighbor ID */ - x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID)); - y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID)); + x = ea_find(new->attrs->eattrs, BGP_EA_ID(BA_ORIGINATOR_ID)); + y = ea_find(old->attrs->eattrs, BGP_EA_ID(BA_ORIGINATOR_ID)); n = x ? x->u.data : new_bgp->remote_id; o = y ? y->u.data : old_bgp->remote_id; @@ -2038,8 +2088,8 @@ bgp_rte_better(rte *new, rte *old) return 0; /* RFC 4456 9. b) Compare cluster list lengths */ - x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST)); - y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST)); + x = ea_find(new->attrs->eattrs, BGP_EA_ID(BA_CLUSTER_LIST)); + y = ea_find(old->attrs->eattrs, BGP_EA_ID(BA_CLUSTER_LIST)); n = x ? int_set_get_size(x->u.ptr) : 0; o = y ? int_set_get_size(y->u.ptr) : 0; if (n < o) @@ -2061,17 +2111,20 @@ bgp_rte_mergable(rte *pri, rte *sec) u32 p, s; /* Skip suppressed routes (see bgp_rte_recalculate()) */ - /* LLGR draft - depreference stale routes */ - if (pri->pflags != sec->pflags) + if ((pri->pflags ^ sec->pflags) & BGP_REF_SUPPRESSED) return 0; /* RFC 4271 9.1.2.1. Route resolvability test */ if (rta_resolvable(pri->attrs) != rta_resolvable(sec->attrs)) return 0; + /* LLGR draft - depreference stale routes */ + if (rte_stale(pri) != rte_stale(sec)) + return 0; + /* Start with local preferences */ - x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF)); - y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF)); + x = ea_find(pri->attrs->eattrs, BGP_EA_ID(BA_LOCAL_PREF)); + y = ea_find(sec->attrs->eattrs, BGP_EA_ID(BA_LOCAL_PREF)); p = x ? x->u.data : pri_bgp->cf->default_local_pref; s = y ? y->u.data : sec_bgp->cf->default_local_pref; if (p != s) @@ -2080,8 +2133,8 @@ bgp_rte_mergable(rte *pri, rte *sec) /* RFC 4271 9.1.2.2. a) Use AS path lengths */ if (pri_bgp->cf->compare_path_lengths || sec_bgp->cf->compare_path_lengths) { - x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); - y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); + x = ea_find(pri->attrs->eattrs, BGP_EA_ID(BA_AS_PATH)); + y = ea_find(sec->attrs->eattrs, BGP_EA_ID(BA_AS_PATH)); p = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN; s = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN; @@ -2093,8 +2146,8 @@ bgp_rte_mergable(rte *pri, rte *sec) } /* RFC 4271 9.1.2.2. b) Use origins */ - x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN)); - y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN)); + x = ea_find(pri->attrs->eattrs, BGP_EA_ID(BA_ORIGIN)); + y = ea_find(sec->attrs->eattrs, BGP_EA_ID(BA_ORIGIN)); p = x ? x->u.data : ORIGIN_INCOMPLETE; s = y ? y->u.data : ORIGIN_INCOMPLETE; if (p != s) @@ -2104,8 +2157,8 @@ bgp_rte_mergable(rte *pri, rte *sec) if (pri_bgp->cf->med_metric || sec_bgp->cf->med_metric || (bgp_get_neighbor(pri) == bgp_get_neighbor(sec))) { - x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC)); - y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC)); + x = ea_find(pri->attrs->eattrs, BGP_EA_ID(BA_MULTI_EXIT_DISC)); + y = ea_find(sec->attrs->eattrs, BGP_EA_ID(BA_MULTI_EXIT_DISC)); p = x ? x->u.data : pri_bgp->cf->default_med; s = y ? y->u.data : sec_bgp->cf->default_med; if (p != s) @@ -2117,8 +2170,8 @@ bgp_rte_mergable(rte *pri, rte *sec) return 0; /* RFC 4271 9.1.2.2. e) Compare IGP metrics */ - p = pri_bgp->cf->igp_metric ? pri->attrs->igp_metric : 0; - s = sec_bgp->cf->igp_metric ? sec->attrs->igp_metric : 0; + p = pri_bgp->cf->igp_metric ? rt_get_igp_metric(pri) : 0; + s = sec_bgp->cf->igp_metric ? rt_get_igp_metric(sec) : 0; if (p != s) return 0; @@ -2131,7 +2184,7 @@ bgp_rte_mergable(rte *pri, rte *sec) static inline int same_group(rte *r, u32 lpref, u32 lasn) { - return (r->attrs->pref == lpref) && (bgp_get_neighbor(r) == lasn); + return (rt_get_preference(r) == lpref) && (bgp_get_neighbor(r) == lasn); } static inline int @@ -2145,7 +2198,7 @@ int bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) { rte *key = new ? new : old; - u32 lpref = key->attrs->pref; + u32 lpref = rt_get_preference(key); u32 lasn = bgp_get_neighbor(key); int old_suppressed = old ? !!(old->pflags & BGP_REF_SUPPRESSED) : 0; @@ -2212,7 +2265,7 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) /* The default case - find a new best-in-group route */ rte *r = new; /* new may not be in the list */ - for (struct rte_storage *s = net->routes; rte_is_valid(&s->rte); s = s->next) + for (struct rte_storage *s = net->routes; rte_is_valid(RTE_OR_NULL(s)); s = s->next) if (use_deterministic_med(s) && same_group(&s->rte, lpref, lasn)) { s->rte.pflags |= BGP_REF_SUPPRESSED; @@ -2229,7 +2282,7 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) new->pflags &= ~BGP_REF_SUPPRESSED; /* Found all existing routes mergable with best-in-group */ - for (struct rte_storage *s = net->routes; rte_is_valid(&s->rte); s = s->next) + for (struct rte_storage *s = net->routes; rte_is_valid(RTE_OR_NULL(s)); s = s->next) if (use_deterministic_med(s) && same_group(&s->rte, lpref, lasn)) if ((&s->rte != r) && bgp_rte_mergable(r, &s->rte)) s->rte.pflags &= ~BGP_REF_SUPPRESSED; @@ -2270,7 +2323,7 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) rte * bgp_rte_modify_stale(struct rte *r, struct linpool *pool) { - eattr *ea = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY)); + eattr *ea = ea_find(r->attrs->eattrs, BGP_EA_ID(BA_COMMUNITY)); const struct adata *ad = ea ? ea->u.ptr : NULL; uint flags = ea ? ea->flags : BAF_PARTIAL; @@ -2286,7 +2339,7 @@ bgp_rte_modify_stale(struct rte *r, struct linpool *pool) e0 = *r; e0.attrs = a; - bgp_set_attr_ptr(&(a->eattrs), pool, BA_COMMUNITY, flags, + bgp_set_attr_ptr(&(a->eattrs), BA_COMMUNITY, flags, int_set_add(pool, ad, BGP_COMM_LLGR_STALE)); e0.pflags |= BGP_REF_STALE; @@ -2306,8 +2359,8 @@ bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool) eattr *a4 = bgp_find_attr(*attrs, BA_AS4_AGGREGATOR); /* First, unset AS4_* attributes */ - if (p4) bgp_unset_attr(attrs, pool, BA_AS4_PATH); - if (a4) bgp_unset_attr(attrs, pool, BA_AS4_AGGREGATOR); + if (p4) bgp_unset_attr(attrs, BA_AS4_PATH); + if (a4) bgp_unset_attr(attrs, BA_AS4_AGGREGATOR); /* Handle AGGREGATOR attribute */ if (a2 && a4) @@ -2340,60 +2393,37 @@ bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool) } } -int -bgp_get_attr(const eattr *a, byte *buf, int buflen) -{ - uint i = EA_ID(a->id); - const struct bgp_attr_desc *d; - int len; - - if (bgp_attr_known(i)) - { - d = &bgp_attr_table[i]; - len = bsprintf(buf, "%s", d->name); - buf += len; - if (d->format) - { - *buf++ = ':'; - *buf++ = ' '; - d->format(a, buf, buflen - len - 2); - return GA_FULL; - } - return GA_NAME; - } - - bsprintf(buf, "%02x%s", i, (a->flags & BAF_TRANSITIVE) ? " [t]" : ""); - return GA_NAME; -} - void bgp_get_route_info(rte *e, byte *buf) { - eattr *p = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); - eattr *o = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN)); + eattr *p = ea_find(e->attrs->eattrs, BGP_EA_ID(BA_AS_PATH)); + eattr *o = ea_find(e->attrs->eattrs, BGP_EA_ID(BA_ORIGIN)); u32 origas; - buf += bsprintf(buf, " (%d", e->attrs->pref); + buf += bsprintf(buf, " (%d", rt_get_preference(e)); - if (e->pflags & BGP_REF_SUPPRESSED) - buf += bsprintf(buf, "-"); - - if (rte_stale(e)) - buf += bsprintf(buf, "s"); - - u64 metric = bgp_total_aigp_metric(e->attrs); - if (metric < BGP_AIGP_MAX) + if (!net_is_flow(e->net)) { - buf += bsprintf(buf, "/%lu", metric); - } - else if (e->attrs->igp_metric) - { - if (!rta_resolvable(e->attrs)) - buf += bsprintf(buf, "/-"); - else if (e->attrs->igp_metric >= IGP_METRIC_UNKNOWN) - buf += bsprintf(buf, "/?"); - else - buf += bsprintf(buf, "/%d", e->attrs->igp_metric); + if (e->pflags & BGP_REF_SUPPRESSED) + buf += bsprintf(buf, "-"); + + if (rte_stale(e)) + buf += bsprintf(buf, "s"); + + u64 metric = bgp_total_aigp_metric(e); + if (metric < BGP_AIGP_MAX) + { + buf += bsprintf(buf, "/%lu", metric); + } + else if (metric = rt_get_igp_metric(e)) + { + if (!rta_resolvable(e->attrs)) + buf += bsprintf(buf, "/-"); + else if (metric >= IGP_METRIC_UNKNOWN) + buf += bsprintf(buf, "/?"); + else + buf += bsprintf(buf, "/%d", metric); + } } buf += bsprintf(buf, ") ["); diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 5c78bfa1..84430287 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -101,6 +101,7 @@ * RFC 8203 - BGP Administrative Shutdown Communication * RFC 8212 - Default EBGP Route Propagation Behavior without Policies * RFC 8654 - Extended Message Support for BGP + * RFC 9117 - Revised Validation Procedure for BGP Flow Specifications * draft-ietf-idr-ext-opt-param-07 * draft-uttaro-idr-bgp-persistence-04 * draft-walton-bgp-hostname-capability-02 @@ -113,7 +114,7 @@ #include "nest/bird.h" #include "nest/iface.h" #include "nest/protocol.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/cli.h" #include "nest/locks.h" #include "conf/conf.h" @@ -125,9 +126,7 @@ #include "bgp.h" -struct linpool *bgp_linpool; /* Global temporary pool */ -struct linpool *bgp_linpool2; /* Global temporary pool for bgp_rt_notify() */ -static list bgp_sockets; /* Global list of listening sockets */ +static list STATIC_LIST_INIT(bgp_sockets); /* Global list of listening sockets */ static void bgp_connect(struct bgp_proto *p); @@ -157,16 +156,17 @@ bgp_open(struct bgp_proto *p) ip_addr addr = p->cf->strict_bind ? p->cf->local_ip : (p->ipv4 ? IPA_NONE4 : IPA_NONE6); uint port = p->cf->local_port; - - /* FIXME: Add some global init? */ - if (!bgp_linpool) - init_list(&bgp_sockets); + uint flags = p->cf->free_bind ? SKF_FREEBIND : 0; + uint flag_mask = SKF_FREEBIND; /* We assume that cf->iface is defined iff cf->local_ip is link-local */ WALK_LIST(bs, bgp_sockets) - if (ipa_equal(bs->sk->saddr, addr) && (bs->sk->sport == port) && - (bs->sk->iface == ifa) && (bs->sk->vrf == p->p.vrf)) + if (ipa_equal(bs->sk->saddr, addr) && + (bs->sk->sport == port) && + (bs->sk->iface == ifa) && + (bs->sk->vrf == p->p.vrf) && + ((bs->sk->flags & flag_mask) == flags)) { bs->uc++; p->sock = bs; @@ -180,7 +180,7 @@ bgp_open(struct bgp_proto *p) sk->sport = port; sk->iface = ifa; sk->vrf = p->p.vrf; - sk->flags = 0; + sk->flags = flags; sk->tos = IP_PREC_INTERNET_CONTROL; sk->rbsize = BGP_RX_BUFFER_SIZE; sk->tbsize = BGP_TX_BUFFER_SIZE; @@ -198,12 +198,6 @@ bgp_open(struct bgp_proto *p) add_tail(&bgp_sockets, &bs->n); - if (!bgp_linpool) - { - bgp_linpool = lp_new_default(proto_pool); - bgp_linpool2 = lp_new_default(proto_pool); - } - return 0; err: @@ -232,15 +226,6 @@ bgp_close(struct bgp_proto *p) rfree(bs->sk); rem_node(&bs->n); mb_free(bs); - - if (!EMPTY_LIST(bgp_sockets)) - return; - - rfree(bgp_linpool); - bgp_linpool = NULL; - - rfree(bgp_linpool2); - bgp_linpool2 = NULL; } static inline int @@ -775,25 +760,25 @@ bgp_handle_graceful_restart(struct bgp_proto *p) { case BGP_GRS_NONE: c->gr_active = BGP_GRS_ACTIVE; - rt_refresh_begin(c->c.table, &c->c); + rt_refresh_begin(c->c.table, &c->c.in_req); break; case BGP_GRS_ACTIVE: - rt_refresh_end(c->c.table, &c->c); - rt_refresh_begin(c->c.table, &c->c); + rt_refresh_end(c->c.table, &c->c.in_req); + rt_refresh_begin(c->c.table, &c->c.in_req); break; case BGP_GRS_LLGR: - rt_refresh_begin(c->c.table, &c->c); - rt_modify_stale(c->c.table, &c->c); + rt_refresh_begin(c->c.table, &c->c.in_req); + rt_modify_stale(c->c.table, &c->c.in_req); break; } } else { /* Just flush the routes */ - rt_refresh_begin(c->c.table, &c->c); - rt_refresh_end(c->c.table, &c->c); + rt_refresh_begin(c->c.table, &c->c.in_req); + rt_refresh_end(c->c.table, &c->c.in_req); } /* Reset bucket and prefix tables */ @@ -834,7 +819,7 @@ bgp_graceful_restart_done(struct bgp_channel *c) BGP_TRACE(D_EVENTS, "Neighbor graceful restart done"); tm_stop(c->stale_timer); - rt_refresh_end(c->c.table, &c->c); + rt_refresh_end(c->c.table, &c->c.in_req); } /** @@ -876,7 +861,7 @@ bgp_graceful_restart_timeout(timer *t) /* Channel is in GR, and supports LLGR -> start LLGR */ c->gr_active = BGP_GRS_LLGR; tm_start(c->stale_timer, c->stale_time S); - rt_modify_stale(c->c.table, &c->c); + rt_modify_stale(c->c.table, &c->c.in_req); } } else @@ -914,10 +899,10 @@ bgp_refresh_begin(struct bgp_channel *c) { log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; } c->load_state = BFS_REFRESHING; - rt_refresh_begin(c->c.table, &c->c); + rt_refresh_begin(c->c.table, &c->c.in_req); if (c->c.in_table) - rt_refresh_begin(c->c.in_table, &c->c); + rt_refresh_begin(c->c.in_table, &c->c.in_req); } /** @@ -938,7 +923,7 @@ bgp_refresh_end(struct bgp_channel *c) { log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; } c->load_state = BFS_NONE; - rt_refresh_end(c->c.table, &c->c); + rt_refresh_end(c->c.table, &c->c.in_req); if (c->c.in_table) rt_prune_sync(c->c.in_table, 0); @@ -1736,6 +1721,9 @@ bgp_channel_init(struct channel *C, struct channel_config *CF) if (cf->igp_table_ip6) c->igp_table_ip6 = cf->igp_table_ip6->table; + + if (cf->base_table) + c->base_table = cf->base_table->table; } static int @@ -1751,6 +1739,12 @@ bgp_channel_start(struct channel *C) if (c->igp_table_ip6) rt_lock_table(c->igp_table_ip6); + if (c->base_table) + { + rt_lock_table(c->base_table); + rt_flowspec_link(c->base_table, c->c.table); + } + c->pool = p->p.pool; // XXXX bgp_init_bucket_table(c); bgp_init_prefix_table(c); @@ -1835,6 +1829,12 @@ bgp_channel_cleanup(struct channel *C) if (c->igp_table_ip6) rt_unlock_table(c->igp_table_ip6); + if (c->base_table) + { + rt_flowspec_unlink(c->base_table, c->c.table); + rt_unlock_table(c->base_table); + } + c->index = 0; /* Cleanup rest of bgp_channel starting at pool field */ @@ -1882,6 +1882,25 @@ bgp_default_igp_table(struct bgp_config *cf, struct bgp_channel_config *cc, u32 cf_error("Undefined IGP table"); } +static struct rtable_config * +bgp_default_base_table(struct bgp_config *cf, struct bgp_channel_config *cc) +{ + /* Expected table type */ + u32 type = (cc->afi == BGP_AF_FLOW4) ? NET_IP4 : NET_IP6; + + /* First, try appropriate IP channel */ + u32 afi2 = BGP_AF(BGP_AFI(cc->afi), BGP_SAFI_UNICAST); + struct bgp_channel_config *cc2 = bgp_find_channel_config(cf, afi2); + if (cc2 && (cc2->c.table->addr_type == type)) + return cc2->c.table; + + /* Last, try default table of given type */ + struct rtable_config *tab = cf->c.global->def_tables[type]; + if (tab) + return tab; + + cf_error("Undefined base table"); +} void bgp_postconfig(struct proto_config *CF) @@ -2026,6 +2045,14 @@ bgp_postconfig(struct proto_config *CF) cf_error("Mismatched IGP table type"); } + /* Default value of base table */ + if ((BGP_SAFI(cc->afi) == BGP_SAFI_FLOW) && cc->validate && !cc->base_table) + cc->base_table = bgp_default_base_table(cf, cc); + + if (cc->base_table && !cc->base_table->trie_used) + cf_error("Flowspec validation requires base table (%s) with trie", + cc->base_table->name); + if (cf->multihop && (cc->gw_mode == GW_DIRECT)) cf_error("Multihop BGP cannot use direct gateway mode"); @@ -2094,7 +2121,7 @@ bgp_reconfigure(struct proto *P, struct proto_config *CF) return same; } -#define IGP_TABLE(cf, sym) ((cf)->igp_table_##sym ? (cf)->igp_table_##sym ->table : NULL ) +#define TABLE(cf, NAME) ((cf)->NAME ? (cf)->NAME->table : NULL ) static int bgp_channel_reconfigure(struct channel *C, struct channel_config *CC, int *import_changed, int *export_changed) @@ -2105,6 +2132,7 @@ bgp_channel_reconfigure(struct channel *C, struct channel_config *CC, int *impor struct bgp_channel_config *old = c->cf; if ((new->secondary != old->secondary) || + (new->validate != old->validate) || (new->gr_able != old->gr_able) || (new->llgr_able != old->llgr_able) || (new->llgr_time != old->llgr_time) || @@ -2112,8 +2140,9 @@ bgp_channel_reconfigure(struct channel *C, struct channel_config *CC, int *impor (new->add_path != old->add_path) || (new->import_table != old->import_table) || (new->export_table != old->export_table) || - (IGP_TABLE(new, ip4) != IGP_TABLE(old, ip4)) || - (IGP_TABLE(new, ip6) != IGP_TABLE(old, ip6))) + (TABLE(new, igp_table_ip4) != TABLE(old, igp_table_ip4)) || + (TABLE(new, igp_table_ip6) != TABLE(old, igp_table_ip6)) || + (TABLE(new, base_table) != TABLE(old, base_table))) return 0; if (new->mandatory && !old->mandatory && (C->channel_state != CS_UP)) @@ -2438,6 +2467,9 @@ bgp_show_proto_info(struct proto *P) else cli_msg(-1006, " Neighbor address: %I%J", p->remote_ip, p->cf->iface); + if ((p->conn == &p->outgoing_conn) && (p->cf->remote_port != BGP_PORT)) + cli_msg(-1006, " Neighbor port: %u", p->cf->remote_port); + cli_msg(-1006, " Neighbor AS: %u", p->remote_as); cli_msg(-1006, " Local AS: %u", p->cf->local_as); @@ -2527,6 +2559,9 @@ bgp_show_proto_info(struct proto *P) if (c->igp_table_ip6) cli_msg(-1006, " IGP IPv6 table: %s", c->igp_table_ip6->name); + + if (c->base_table) + cli_msg(-1006, " Base table: %s", c->base_table->name); } } } @@ -2544,7 +2579,6 @@ struct channel_class channel_bgp = { struct protocol proto_bgp = { .name = "BGP", .template = "bgp%d", - .class = PROTOCOL_BGP, .preference = DEF_PREF_BGP, .channel_mask = NB_IP | NB_VPN | NB_FLOW, .proto_size = sizeof(struct bgp_proto), @@ -2556,7 +2590,12 @@ struct protocol proto_bgp = { .reconfigure = bgp_reconfigure, .copy_config = bgp_copy_config, .get_status = bgp_get_status, - .get_attr = bgp_get_attr, .get_route_info = bgp_get_route_info, .show_proto_info = bgp_show_proto_info }; + +void bgp_build(void) +{ + proto_build(&proto_bgp); + bgp_register_attrs(); +} diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index c79dd1b2..662d9d48 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -14,13 +14,12 @@ #include <stdint.h> #include <setjmp.h> #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/bfd.h" //#include "lib/lists.h" #include "lib/hash.h" #include "lib/socket.h" -struct linpool; struct eattr; @@ -86,6 +85,7 @@ struct bgp_config { int peer_type; /* Internal or external BGP (BGP_PT_*, optional) */ int multihop; /* Number of hops if multihop */ int strict_bind; /* Bind listening socket to local address */ + int free_bind; /* Bind listening socket with SKF_FREEBIND */ int ttl_security; /* Enable TTL security [RFC 5082] */ int compare_path_lengths; /* Use path lengths when selecting best route */ int med_metric; /* Compare MULTI_EXIT_DISC even between routes from differen ASes */ @@ -146,6 +146,7 @@ struct bgp_channel_config { u8 mandatory; /* Channel is mandatory in capability negotiation */ u8 gw_mode; /* How we compute route gateway from next_hop attr, see GW_* */ u8 secondary; /* Accept also non-best routes (i.e. RA_ACCEPTED) */ + u8 validate; /* Validate Flowspec per RFC 8955 (6) */ u8 gr_able; /* Allow full graceful restart for the channel */ u8 llgr_able; /* Allow full long-lived GR for the channel */ uint llgr_time; /* Long-lived graceful restart stale time */ @@ -159,6 +160,7 @@ struct bgp_channel_config { struct rtable_config *igp_table_ip4; /* Table for recursive IPv4 next hop lookups */ struct rtable_config *igp_table_ip6; /* Table for recursive IPv6 next hop lookups */ + struct rtable_config *base_table; /* Base table for Flowspec validation */ }; #define BGP_PT_INTERNAL 1 @@ -344,6 +346,7 @@ struct bgp_channel { rtable *igp_table_ip4; /* Table for recursive IPv4 next hop lookups */ rtable *igp_table_ip6; /* Table for recursive IPv6 next hop lookups */ + rtable *base_table; /* Base table for Flowspec validation */ /* Rest are zeroed when down */ pool *pool; @@ -400,7 +403,7 @@ struct bgp_export_state { int mpls; u32 attrs_seen[1]; - uint err_withdraw; + uint err_reject; uint local_next_hop; }; @@ -426,6 +429,7 @@ struct bgp_parse_state { int as4_session; int add_path; int mpls; + int reach_nlri_step; u32 attrs_seen[256/32]; @@ -452,7 +456,6 @@ struct bgp_parse_state { uint err_subcode; jmp_buf err_jmpbuf; - struct hostentry *hostentry; adata *mpls_labels; /* Cached state for bgp_rte_update() */ @@ -493,9 +496,6 @@ bgp_parse_error(struct bgp_parse_state *s, uint subcode) longjmp(s->err_jmpbuf, 1); } -extern struct linpool *bgp_linpool; -extern struct linpool *bgp_linpool2; - void bgp_start_timer(timer *t, uint value); void bgp_check_config(struct bgp_config *c); @@ -519,7 +519,9 @@ struct rte_source *bgp_get_source(struct bgp_proto *p, u32 path_id); static inline int rta_resolvable(rta *a) { - return a->dest == RTD_UNICAST; + eattr *nhea = ea_find(a->eattrs, &ea_gen_nexthop); + struct nexthop_adata *nhad = (void *) nhea->u.ptr; + return NEXTHOP_IS_REACHABLE(nhad) || (nhad->dest != RTD_UNREACHABLE); } @@ -537,34 +539,13 @@ rta_resolvable(rta *a) /* attrs.c */ -static inline eattr * -bgp_find_attr(ea_list *attrs, uint code) -{ - return ea_find(attrs, EA_CODE(PROTOCOL_BGP, code)); -} - eattr * -bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintptr_t val); +bgp_find_attr(ea_list *attrs, uint code); -static inline void -bgp_set_attr_u32(ea_list **to, struct linpool *pool, uint code, uint flags, u32 val) -{ bgp_set_attr(to, pool, code, flags, (uintptr_t) val); } - -static inline void -bgp_set_attr_ptr(ea_list **to, struct linpool *pool, uint code, uint flags, const struct adata *val) -{ bgp_set_attr(to, pool, code, flags, (uintptr_t) val); } - -static inline void -bgp_set_attr_data(ea_list **to, struct linpool *pool, uint code, uint flags, void *data, uint len) -{ - struct adata *a = lp_alloc_adata(pool, len); - bmemcpy(a->data, data, len); - bgp_set_attr(to, pool, code, flags, (uintptr_t) a); -} - -static inline void -bgp_unset_attr(ea_list **to, struct linpool *pool, uint code) -{ eattr *e = bgp_set_attr(to, pool, code, 0, 0); e->type = EAF_TYPE_UNDEF; } +void bgp_set_attr_u32(ea_list **to, uint code, uint flags, u32 val); +void bgp_set_attr_ptr(ea_list **to, uint code, uint flags, const struct adata *ad); +void bgp_set_attr_data(ea_list **to, uint code, uint flags, void *data, uint len); +void bgp_unset_attr(ea_list **to, uint code); int bgp_encode_mp_reach_mrt(struct bgp_write_state *s, eattr *a, byte *buf, uint size); @@ -586,26 +567,27 @@ int bgp_rte_better(struct rte *, struct rte *); int bgp_rte_mergable(rte *pri, rte *sec); int bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best); struct rte *bgp_rte_modify_stale(struct rte *r, struct linpool *pool); -u32 bgp_rte_igp_metric(struct rte *); +u32 bgp_rte_igp_metric(const rte *); void bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, const rte *old); int bgp_preexport(struct channel *, struct rte *); -int bgp_get_attr(const struct eattr *e, byte *buf, int buflen); void bgp_get_route_info(struct rte *, byte *); -int bgp_total_aigp_metric_(rta *a, u64 *metric, const struct adata **ad); +int bgp_total_aigp_metric_(const rte *e, u64 *metric, const struct adata **ad); #define BGP_AIGP_METRIC 1 #define BGP_AIGP_MAX U64(0xffffffffffffffff) static inline u64 -bgp_total_aigp_metric(rta *a) +bgp_total_aigp_metric(const rte *e) { u64 metric = BGP_AIGP_MAX; const struct adata *ad; - bgp_total_aigp_metric_(a, &metric, &ad); + bgp_total_aigp_metric_(e, &metric, &ad); return metric; } +void bgp_register_attrs(void); + /* packets.c */ @@ -642,26 +624,31 @@ void bgp_update_next_hop(struct bgp_export_state *s, eattr *a, ea_list **to); #define BAF_DECODE_FLAGS 0x0100 /* Private flag - attribute flags are handled by the decode hook */ -#define BA_ORIGIN 0x01 /* RFC 4271 */ /* WM */ -#define BA_AS_PATH 0x02 /* WM */ -#define BA_NEXT_HOP 0x03 /* WM */ -#define BA_MULTI_EXIT_DISC 0x04 /* ON */ -#define BA_LOCAL_PREF 0x05 /* WD */ -#define BA_ATOMIC_AGGR 0x06 /* WD */ -#define BA_AGGREGATOR 0x07 /* OT */ -#define BA_COMMUNITY 0x08 /* RFC 1997 */ /* OT */ -#define BA_ORIGINATOR_ID 0x09 /* RFC 4456 */ /* ON */ -#define BA_CLUSTER_LIST 0x0a /* RFC 4456 */ /* ON */ -#define BA_MP_REACH_NLRI 0x0e /* RFC 4760 */ -#define BA_MP_UNREACH_NLRI 0x0f /* RFC 4760 */ -#define BA_EXT_COMMUNITY 0x10 /* RFC 4360 */ -#define BA_AS4_PATH 0x11 /* RFC 6793 */ -#define BA_AS4_AGGREGATOR 0x12 /* RFC 6793 */ -#define BA_AIGP 0x1a /* RFC 7311 */ -#define BA_LARGE_COMMUNITY 0x20 /* RFC 8092 */ +enum bgp_attr_id { + BA_ORIGIN = 0x01, /* RFC 4271 */ /* WM */ + BA_AS_PATH = 0x02, /* WM */ + BA_NEXT_HOP = 0x03, /* WM */ + BA_MULTI_EXIT_DISC = 0x04, /* ON */ + BA_LOCAL_PREF = 0x05, /* WD */ + BA_ATOMIC_AGGR = 0x06, /* WD */ + BA_AGGREGATOR = 0x07, /* OT */ + BA_COMMUNITY = 0x08, /* RFC 1997 */ /* OT */ + BA_ORIGINATOR_ID = 0x09, /* RFC 4456 */ /* ON */ + BA_CLUSTER_LIST = 0x0a, /* RFC 4456 */ /* ON */ + BA_MP_REACH_NLRI = 0x0e, /* RFC 4760 */ + BA_MP_UNREACH_NLRI = 0x0f, /* RFC 4760 */ + BA_EXT_COMMUNITY = 0x10, /* RFC 4360 */ + BA_AS4_PATH = 0x11, /* RFC 6793 */ + BA_AS4_AGGREGATOR = 0x12, /* RFC 6793 */ + BA_AIGP = 0x1a, /* RFC 7311 */ + BA_LARGE_COMMUNITY = 0x20, /* RFC 8092 */ /* Bird's private internal BGP attributes */ -#define BA_MPLS_LABEL_STACK 0xfe /* MPLS label stack transfer attribute */ + BA_MPLS_LABEL_STACK = 0x100, /* MPLS label stack transfer attribute */ + +/* Maximum */ + BGP_ATTR_MAX, +}; /* BGP connection states */ diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y index 2dfbdca9..24f3ec8f 100644 --- a/proto/bgp/config.Y +++ b/proto/bgp/config.Y @@ -19,19 +19,18 @@ CF_DECLS CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, KEEPALIVE, MULTIHOP, STARTUP, VIA, NEXT, HOP, SELF, DEFAULT, PATH, METRIC, ERROR, - START, DELAY, FORGET, WAIT, ENABLE, DISABLE, AFTER, BGP_PATH, - BGP_LOCAL_PREF, BGP_MED, BGP_ORIGIN, BGP_NEXT_HOP, BGP_ATOMIC_AGGR, - BGP_AGGREGATOR, BGP_COMMUNITY, BGP_EXT_COMMUNITY, BGP_LARGE_COMMUNITY, + START, DELAY, FORGET, WAIT, ENABLE, DISABLE, AFTER, + BGP_LOCAL_PREF, BGP_MED, SOURCE, ADDRESS, PASSWORD, RR, RS, CLIENT, CLUSTER, ID, AS4, ADVERTISE, IPV4, CAPABILITIES, LIMIT, PASSIVE, PREFER, OLDER, MISSING, LLADDR, - DROP, IGNORE, ROUTE, REFRESH, INTERPRET, COMMUNITIES, BGP_ORIGINATOR_ID, - BGP_CLUSTER_LIST, IGP, TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL, + DROP, IGNORE, ROUTE, REFRESH, INTERPRET, COMMUNITIES, + IGP, TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL, SECURITY, DETERMINISTIC, SECONDARY, ALLOW, BFD, ADD, PATHS, RX, TX, GRACEFUL, RESTART, AWARE, CHECK, LINK, PORT, EXTENDED, MESSAGES, SETKEY, STRICT, BIND, CONFEDERATION, MEMBER, MULTICAST, FLOW4, FLOW6, LONG, LIVED, STALE, IMPORT, IBGP, EBGP, MANDATORY, INTERNAL, EXTERNAL, SETS, - DYNAMIC, RANGE, NAME, DIGITS, BGP_AIGP, AIGP, ORIGINATE, COST, ENFORCE, - FIRST) + DYNAMIC, RANGE, NAME, DIGITS, AIGP, ORIGINATE, COST, ENFORCE, + FIRST, FREE, VALIDATE, BASE) %type <i> bgp_nh %type <i32> bgp_afi @@ -44,6 +43,8 @@ CF_KEYWORDS(CEASE, PREFIX, LIMIT, HIT, ADMINISTRATIVE, SHUTDOWN, RESET, PEER, CF_GRAMMAR +toksym: BGP_MED | BGP_LOCAL_PREF | SOURCE ; + proto: bgp_proto '}' ; bgp_proto_start: proto_start BGP { @@ -155,6 +156,7 @@ bgp_proto: } | bgp_proto DYNAMIC NAME DIGITS expr ';' { BGP_CFG->dynamic_name_digits = $5; if ($5>10) cf_error("Dynamic name digits must be at most 10"); } | bgp_proto STRICT BIND bool ';' { BGP_CFG->strict_bind = $4; } + | bgp_proto FREE BIND bool ';' { BGP_CFG->free_bind = $4; } | bgp_proto PATH METRIC bool ';' { BGP_CFG->compare_path_lengths = $4; } | bgp_proto MED METRIC bool ';' { BGP_CFG->med_metric = $4; } | bgp_proto IGP METRIC bool ';' { BGP_CFG->igp_metric = $4; } @@ -255,6 +257,11 @@ bgp_channel_item: | GATEWAY DIRECT { BGP_CC->gw_mode = GW_DIRECT; } | GATEWAY RECURSIVE { BGP_CC->gw_mode = GW_RECURSIVE; } | SECONDARY bool { BGP_CC->secondary = $2; } + | VALIDATE bool { + BGP_CC->validate = $2; + if (BGP_SAFI(BGP_CC->afi) != BGP_SAFI_FLOW) + cf_error("Validate option limited to flowspec channels"); + } | GRACEFUL RESTART bool { BGP_CC->gr_able = $3; } | LONG LIVED GRACEFUL RESTART bool { BGP_CC->llgr_able = $5; } | LONG LIVED STALE TIME expr { BGP_CC->llgr_time = $5; } @@ -278,6 +285,16 @@ bgp_channel_item: else cf_error("Mismatched IGP table type"); } + | BASE TABLE rtable { + if (BGP_SAFI(BGP_CC->afi) != BGP_SAFI_FLOW) + cf_error("Base table option limited to flowspec channels"); + + if (((BGP_CC->afi == BGP_AF_FLOW4) && ($3->addr_type == NET_IP4)) || + ((BGP_CC->afi == BGP_AF_FLOW6) && ($3->addr_type == NET_IP6))) + BGP_CC->base_table = $3; + else + cf_error("Mismatched base table type"); + } ; bgp_channel_opts: @@ -300,36 +317,6 @@ bgp_channel_end: bgp_proto_channel: bgp_channel_start bgp_channel_opt_list bgp_channel_end; - -dynamic_attr: BGP_ORIGIN - { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_ENUM_BGP_ORIGIN, EA_CODE(PROTOCOL_BGP, BA_ORIGIN)); } ; -dynamic_attr: BGP_PATH - { $$ = f_new_dynamic_attr(EAF_TYPE_AS_PATH, T_PATH, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); } ; -dynamic_attr: BGP_NEXT_HOP - { $$ = f_new_dynamic_attr(EAF_TYPE_IP_ADDRESS, T_IP, EA_CODE(PROTOCOL_BGP, BA_NEXT_HOP)); } ; -dynamic_attr: BGP_MED - { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC)); } ; -dynamic_attr: BGP_LOCAL_PREF - { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF)); } ; -dynamic_attr: BGP_ATOMIC_AGGR - { $$ = f_new_dynamic_attr(EAF_TYPE_OPAQUE, T_ENUM_EMPTY, EA_CODE(PROTOCOL_BGP, BA_ATOMIC_AGGR)); } ; -dynamic_attr: BGP_AGGREGATOR - { $$ = f_new_dynamic_attr(EAF_TYPE_OPAQUE, T_ENUM_EMPTY, EA_CODE(PROTOCOL_BGP, BA_AGGREGATOR)); } ; -dynamic_attr: BGP_COMMUNITY - { $$ = f_new_dynamic_attr(EAF_TYPE_INT_SET, T_CLIST, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY)); } ; -dynamic_attr: BGP_ORIGINATOR_ID - { $$ = f_new_dynamic_attr(EAF_TYPE_ROUTER_ID, T_QUAD, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID)); } ; -dynamic_attr: BGP_CLUSTER_LIST - { $$ = f_new_dynamic_attr(EAF_TYPE_INT_SET, T_CLIST, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST)); } ; -dynamic_attr: BGP_EXT_COMMUNITY - { $$ = f_new_dynamic_attr(EAF_TYPE_EC_SET, T_ECLIST, EA_CODE(PROTOCOL_BGP, BA_EXT_COMMUNITY)); } ; -dynamic_attr: BGP_AIGP - { $$ = f_new_dynamic_attr(EAF_TYPE_OPAQUE, T_ENUM_EMPTY, EA_CODE(PROTOCOL_BGP, BA_AIGP)); } ; -dynamic_attr: BGP_LARGE_COMMUNITY - { $$ = f_new_dynamic_attr(EAF_TYPE_LC_SET, T_LCLIST, EA_CODE(PROTOCOL_BGP, BA_LARGE_COMMUNITY)); } ; - - - CF_ENUM(T_ENUM_BGP_ORIGIN, ORIGIN_, IGP, EGP, INCOMPLETE) CF_CODE diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index 647551e5..9911738d 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -15,8 +15,8 @@ #include "nest/bird.h" #include "nest/iface.h" #include "nest/protocol.h" -#include "nest/route.h" -#include "nest/attrs.h" +#include "nest/rt.h" +#include "lib/attrs.h" #include "proto/mrt/mrt.h" #include "conf/conf.h" #include "lib/unaligned.h" @@ -932,11 +932,15 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len) #define WITHDRAW(msg, args...) \ ({ REPORT(msg, ## args); s->err_withdraw = 1; return; }) +#define REJECT(msg, args...) \ + ({ log(L_ERR "%s: " msg, s->proto->p.name, ## args); s->err_reject = 1; return; }) + #define BAD_AFI "Unexpected AF <%u/%u> in UPDATE" #define BAD_NEXT_HOP "Invalid NEXT_HOP attribute" #define NO_NEXT_HOP "Missing NEXT_HOP attribute" #define NO_LABEL_STACK "Missing MPLS stack" +#define MISMATCHED_AF " - mismatched address family (%I for %s)" static void bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll) @@ -949,67 +953,86 @@ bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll) neighbor *nbr = NULL; /* GW_DIRECT -> single_hop -> p->neigh != NULL */ - if (ipa_nonzero(gw)) + if (ipa_nonzero2(gw)) nbr = neigh_find(&p->p, gw, NULL, 0); else if (ipa_nonzero(ll)) nbr = neigh_find(&p->p, ll, p->neigh->iface, 0); + else + WITHDRAW(BAD_NEXT_HOP " - zero address"); - if (!nbr || (nbr->scope == SCOPE_HOST)) - WITHDRAW(BAD_NEXT_HOP); + if (!nbr) + WITHDRAW(BAD_NEXT_HOP " - address %I not directly reachable", ipa_nonzero(gw) ? gw : ll); - a->dest = RTD_UNICAST; - a->nh.gw = nbr->addr; - a->nh.iface = nbr->iface; - a->igp_metric = c->cf->cost; + if (nbr->scope == SCOPE_HOST) + WITHDRAW(BAD_NEXT_HOP " - address %I is local", nbr->addr); + + ea_set_attr_u32(&a->eattrs, &ea_gen_igp_metric, 0, c->cf->cost); + + struct nexthop_adata nhad = { + .nh = { + .gw = nbr->addr, + .iface = nbr->iface, + }, + .ad = { + .length = sizeof nhad - sizeof nhad.ad, + }, + }; + ea_set_attr_data(&a->eattrs, &ea_gen_nexthop, 0, nhad.ad.data, nhad.ad.length); } else /* GW_RECURSIVE */ { - if (ipa_zero(gw)) - WITHDRAW(BAD_NEXT_HOP); + if (ipa_zero2(gw)) + WITHDRAW(BAD_NEXT_HOP " - zero address"); rtable *tab = ipa_is_ip4(gw) ? c->igp_table_ip4 : c->igp_table_ip6; - s->hostentry = rt_get_hostentry(tab, gw, ll, c->c.table); - - if (!s->mpls) - rta_apply_hostentry(a, s->hostentry, NULL); - - /* With MPLS, hostentry is applied later in bgp_apply_mpls_labels() */ + if (s->mpls) + { + u32 labels[BGP_MPLS_MAX]; + ea_set_hostentry(&a->eattrs, c->c.table, tab, gw, ll, BGP_MPLS_MAX, labels); + } + else + ea_set_hostentry(&a->eattrs, c->c.table, tab, gw, ll, 0, NULL); } } static void -bgp_apply_mpls_labels(struct bgp_parse_state *s, rta *a, u32 *labels, uint lnum) +bgp_apply_mpls_labels(struct bgp_parse_state *s, rta *a, u32 lnum, u32 labels[lnum]) { if (lnum > MPLS_MAX_LABEL_STACK) { REPORT("Too many MPLS labels ($u)", lnum); - a->dest = RTD_UNREACHABLE; - a->hostentry = NULL; - a->nh = (struct nexthop) { }; + ea_set_dest(&a->eattrs, 0, RTD_UNREACHABLE); return; } /* Handle implicit NULL as empty MPLS stack */ if ((lnum == 1) && (labels[0] == BGP_MPLS_NULL)) - lnum = 0; + lnum = s->mpls_labels->length = 0; if (s->channel->cf->gw_mode == GW_DIRECT) { - a->nh.labels = lnum; - memcpy(a->nh.label, labels, 4*lnum); + eattr *e = ea_find(a->eattrs, &ea_gen_nexthop); + struct { + struct nexthop_adata nhad; + u32 labels[MPLS_MAX_LABEL_STACK]; + } nh; + + memcpy(&nh.nhad, e->u.ptr, sizeof(struct adata) + e->u.ptr->length); + nh.nhad.nh.labels = lnum; + memcpy(nh.labels, labels, lnum * sizeof(u32)); + nh.nhad.ad.length = sizeof nh.nhad + lnum * sizeof(u32); } else /* GW_RECURSIVE */ { - mpls_label_stack ms; - - ms.len = lnum; - memcpy(ms.stack, labels, 4*lnum); - rta_apply_hostentry(a, s->hostentry, &ms); + eattr *e = ea_find(a->eattrs, &ea_gen_hostentry); + ASSERT_DIE(e); + struct hostentry_adata *head = (void *) e->u.ptr; + memcpy(&head->labels, labels, lnum * sizeof(u32)); + head->ad.length = (void *)(&head->labels[lnum]) - (void *) head->ad.data; } } - static int bgp_match_src(struct bgp_export_state *s, int mode) { @@ -1039,7 +1062,7 @@ bgp_use_next_hop(struct bgp_export_state *s, eattr *a) return 1; /* Keep it when explicitly set in export filter */ - if (a->type & EAF_FRESH) + if (a->fresh) return 1; /* Check for non-matching AF */ @@ -1056,7 +1079,7 @@ bgp_use_next_hop(struct bgp_export_state *s, eattr *a) return p->neigh && (p->neigh->iface == ifa); } -static inline int +static inline struct nexthop * bgp_use_gateway(struct bgp_export_state *s) { struct bgp_proto *p = s->proto; @@ -1065,22 +1088,32 @@ bgp_use_gateway(struct bgp_export_state *s) /* Handle next hop self option - also applies to gateway */ if (c->cf->next_hop_self && bgp_match_src(s, c->cf->next_hop_self)) - return 0; + return NULL; + + eattr *nhea = ea_find(ra->eattrs, &ea_gen_nexthop); + if (!nhea) + return NULL; /* We need one valid global gateway */ - if ((ra->dest != RTD_UNICAST) || ra->nh.next || ipa_zero(ra->nh.gw) || ipa_is_link_local(ra->nh.gw)) - return 0; + struct nexthop_adata *nhad = (struct nexthop_adata *) nhea->u.ptr; + if (!NEXTHOP_IS_REACHABLE(nhad) || + !NEXTHOP_ONE(nhad) || ipa_zero(nhad->nh.gw) || + ipa_is_link_local(nhad->nh.gw)) + return NULL; /* Check for non-matching AF */ - if ((ipa_is_ip4(ra->nh.gw) != bgp_channel_is_ipv4(c)) && !c->ext_next_hop) - return 0; + if ((ipa_is_ip4(nhad->nh.gw) != bgp_channel_is_ipv4(c)) && !c->ext_next_hop) + return NULL; /* Use it when exported to internal peers */ if (p->is_interior) - return 1; + return &nhad->nh; /* Use it when forwarded to single-hop BGP peer on on the same iface */ - return p->neigh && (p->neigh->iface == ra->nh.iface); + if (p->neigh && (p->neigh->iface == nhad->nh.iface)) + return &nhad->nh; + + return NULL; } static void @@ -1088,31 +1121,31 @@ bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to) { if (!a || !bgp_use_next_hop(s, a)) { - if (bgp_use_gateway(s)) + struct nexthop *nhloc; + if (nhloc = bgp_use_gateway(s)) { - rta *ra = s->route->attrs; - ip_addr nh[1] = { ra->nh.gw }; - bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, 16); + ip_addr nh[1] = { nhloc->gw }; + bgp_set_attr_data(to, BA_NEXT_HOP, 0, nh, 16); if (s->mpls) { u32 implicit_null = BGP_MPLS_NULL; - u32 *labels = ra->nh.labels ? ra->nh.label : &implicit_null; - uint lnum = ra->nh.labels ? ra->nh.labels : 1; - bgp_set_attr_data(to, s->pool, BA_MPLS_LABEL_STACK, 0, labels, lnum * 4); + u32 *labels = nhloc->labels ? nhloc->label : &implicit_null; + uint lnum = nhloc->labels ? nhloc->labels : 1; + bgp_set_attr_data(to, BA_MPLS_LABEL_STACK, 0, labels, lnum * 4); } } else { ip_addr nh[2] = { s->channel->next_hop_addr, s->channel->link_addr }; - bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, ipa_nonzero(nh[1]) ? 32 : 16); + bgp_set_attr_data(to, BA_NEXT_HOP, 0, nh, ipa_nonzero(nh[1]) ? 32 : 16); s->local_next_hop = 1; /* TODO: Use local MPLS assigned label */ if (s->mpls) { u32 implicit_null = BGP_MPLS_NULL; - bgp_set_attr_data(to, s->pool, BA_MPLS_LABEL_STACK, 0, &implicit_null, 4); + bgp_set_attr_data(to, BA_MPLS_LABEL_STACK, 0, &implicit_null, 4); } } } @@ -1120,28 +1153,28 @@ bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to) /* Check if next hop is valid */ a = bgp_find_attr(*to, BA_NEXT_HOP); if (!a) - WITHDRAW(NO_NEXT_HOP); + REJECT(NO_NEXT_HOP); ip_addr *nh = (void *) a->u.ptr->data; ip_addr peer = s->proto->remote_ip; uint len = a->u.ptr->length; /* Forbid zero next hop */ - if (ipa_zero(nh[0]) && ((len != 32) || ipa_zero(nh[1]))) - WITHDRAW(BAD_NEXT_HOP); + if (ipa_zero2(nh[0]) && ((len != 32) || ipa_zero(nh[1]))) + REJECT(BAD_NEXT_HOP " - zero address"); /* Forbid next hop equal to neighbor IP */ if (ipa_equal(peer, nh[0]) || ((len == 32) && ipa_equal(peer, nh[1]))) - WITHDRAW(BAD_NEXT_HOP); + REJECT(BAD_NEXT_HOP " - neighbor address %I", peer); /* Forbid next hop with non-matching AF */ if ((ipa_is_ip4(nh[0]) != bgp_channel_is_ipv4(s->channel)) && !s->channel->ext_next_hop) - WITHDRAW(BAD_NEXT_HOP); + REJECT(BAD_NEXT_HOP MISMATCHED_AF, nh[0], s->channel->desc->name); /* Just check if MPLS stack */ if (s->mpls && !bgp_find_attr(*to, BA_MPLS_LABEL_STACK)) - WITHDRAW(NO_LABEL_STACK); + REJECT(NO_LABEL_STACK); } static uint @@ -1212,11 +1245,11 @@ bgp_decode_next_hop_ip(struct bgp_parse_state *s, byte *data, uint len, rta *a) ad->length = 16; if ((bgp_channel_is_ipv4(c) != ipa_is_ip4(nh[0])) && !c->ext_next_hop) - WITHDRAW(BAD_NEXT_HOP); + WITHDRAW(BAD_NEXT_HOP MISMATCHED_AF, nh[0], c->desc->name); // XXXX validate next hop - bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad); + bgp_set_attr_ptr(&(a->eattrs), BA_NEXT_HOP, 0, ad); bgp_apply_next_hop(s, a, nh[0], nh[1]); } @@ -1293,11 +1326,11 @@ bgp_decode_next_hop_vpn(struct bgp_parse_state *s, byte *data, uint len, rta *a) bgp_parse_error(s, 9); if ((bgp_channel_is_ipv4(c) != ipa_is_ip4(nh[0])) && !c->ext_next_hop) - WITHDRAW(BAD_NEXT_HOP); + WITHDRAW(BAD_NEXT_HOP MISMATCHED_AF, nh[0], c->desc->name); // XXXX validate next hop - bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad); + bgp_set_attr_ptr(&(a->eattrs), BA_NEXT_HOP, 0, ad); bgp_apply_next_hop(s, a, nh[0], nh[1]); } @@ -1322,11 +1355,11 @@ bgp_decode_next_hop_none(struct bgp_parse_state *s UNUSED, byte *data UNUSED, ui } static void -bgp_update_next_hop_none(struct bgp_export_state *s, eattr *a, ea_list **to) +bgp_update_next_hop_none(struct bgp_export_state *s UNUSED, eattr *a, ea_list **to) { /* NEXT_HOP shall not pass */ if (a) - bgp_unset_attr(to, s->pool, BA_NEXT_HOP); + bgp_unset_attr(to, BA_NEXT_HOP); } @@ -1335,7 +1368,7 @@ bgp_update_next_hop_none(struct bgp_export_state *s, eattr *a, ea_list **to) */ static void -bgp_rte_update(struct bgp_parse_state *s, net_addr *n, u32 path_id, rta *a0) +bgp_rte_update(struct bgp_parse_state *s, const net_addr *n, u32 path_id, rta *a0) { if (path_id != s->last_id) { @@ -1348,6 +1381,10 @@ bgp_rte_update(struct bgp_parse_state *s, net_addr *n, u32 path_id, rta *a0) if (!a0) { + /* Route update was changed to withdraw */ + if (s->err_withdraw && s->reach_nlri_step) + REPORT("Invalid route %N withdrawn", n); + /* Route withdraw */ rte_update(&s->channel->c, n, NULL, s->last_src); return; @@ -1392,7 +1429,8 @@ bgp_encode_mpls_labels(struct bgp_write_state *s UNUSED, const adata *mpls, byte static void bgp_decode_mpls_labels(struct bgp_parse_state *s, byte **pos, uint *len, uint *pxlen, rta *a) { - u32 labels[BGP_MPLS_MAX], label; + u32 labels[BGP_MPLS_MAX]; + u32 label; uint lnum = 0; do { @@ -1406,7 +1444,7 @@ bgp_decode_mpls_labels(struct bgp_parse_state *s, byte **pos, uint *len, uint *p /* RFC 8277 2.4 - withdraw does not have variable-size MPLS stack but fixed-size 24-bit Compatibility field, which MUST be ignored */ - if (!a && !s->err_withdraw) + if (!s->reach_nlri_step) return; } while (!(label & BGP_MPLS_BOS)); @@ -1414,19 +1452,8 @@ bgp_decode_mpls_labels(struct bgp_parse_state *s, byte **pos, uint *len, uint *p if (!a) return; - /* Attach MPLS attribute unless we already have one */ - if (!s->mpls_labels) - { - s->mpls_labels = lp_alloc_adata(s->pool, 4*BGP_MPLS_MAX); - bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_MPLS_LABEL_STACK, 0, s->mpls_labels); - } - - /* Overwrite data in the attribute */ - s->mpls_labels->length = 4*lnum; - memcpy(s->mpls_labels->data, labels, 4*lnum); - /* Update next hop entry in rta */ - bgp_apply_mpls_labels(s, a, labels, lnum); + bgp_apply_mpls_labels(s, a, lnum, labels); /* Attributes were changed, invalidate cached entry */ rta_free(s->cached_rta); @@ -2289,11 +2316,14 @@ bgp_create_update(struct bgp_channel *c, byte *buf) again: ; + struct lp_state tmpp; + lp_save(tmp_linpool, &tmpp); + /* Initialize write state */ struct bgp_write_state s = { .proto = p, .channel = c, - .pool = bgp_linpool, + .pool = tmp_linpool, .mp_reach = (c->afi != BGP_AF_IPV4) || c->ext_next_hop, .as4_session = p->as4_session, .add_path = c->add_path_tx, @@ -2319,6 +2349,7 @@ again: ; if (EMPTY_LIST(buck->prefixes)) { bgp_free_bucket(c, buck); + lp_restore(tmp_linpool, &tmpp); goto again; } @@ -2332,7 +2363,10 @@ again: ; bgp_defer_bucket(c, buck); if (!res) + { + lp_restore(tmp_linpool, &tmpp); goto again; + } goto done; } @@ -2343,7 +2377,7 @@ again: ; done: BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE"); p->stats.tx_updates++; - lp_flush(s.pool); + lp_restore(tmp_linpool, &tmpp); return res; } @@ -2433,11 +2467,11 @@ bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_lis { a = allocz(RTA_MAX_SIZE); - a->source = RTS_BGP; - a->scope = SCOPE_UNIVERSE; - a->from = s->proto->remote_ip; a->eattrs = ea; - a->pref = c->c.preference; + + ea_set_attr_data(&a->eattrs, &ea_gen_from, 0, &s->proto->remote_ip, sizeof(ip_addr)); + ea_set_attr_u32(&a->eattrs, &ea_gen_preference, 0, c->c.preference); + ea_set_attr_u32(&a->eattrs, &ea_gen_source, 0, RTS_BGP); c->desc->decode_next_hop(s, nh, nh_len, a); bgp_finish_attrs(s, a); @@ -2472,10 +2506,13 @@ bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len) bgp_start_timer(conn->hold_timer, conn->hold_time); + struct lp_state tmpp; + lp_save(tmp_linpool, &tmpp); + /* Initialize parse state */ struct bgp_parse_state s = { .proto = p, - .pool = bgp_linpool, + .pool = tmp_linpool, .as4_session = p->as4_session, }; @@ -2541,6 +2578,8 @@ bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len) if (s.mp_unreach_len) bgp_decode_nlri(&s, s.mp_unreach_af, s.mp_unreach_nlri, s.mp_unreach_len, NULL, NULL, 0); + s.reach_nlri_step = 1; + if (s.ip_reach_len) bgp_decode_nlri(&s, BGP_AF_IPV4, s.ip_reach_nlri, s.ip_reach_len, ea, s.ip_next_hop_data, s.ip_next_hop_len); @@ -2551,7 +2590,7 @@ bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len) done: rta_free(s.cached_rta); - lp_flush(s.pool); + lp_restore(tmp_linpool, &tmpp); return; } @@ -2695,7 +2734,7 @@ bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, uint len) { case BGP_RR_REQUEST: BGP_TRACE(D_PACKETS, "Got ROUTE-REFRESH"); - channel_request_feeding(&c->c); + rt_refeed_channel(&c->c); break; case BGP_RR_BEGIN: diff --git a/proto/mrt/Makefile b/proto/mrt/Makefile index 925fb102..000e1c1c 100644 --- a/proto/mrt/Makefile +++ b/proto/mrt/Makefile @@ -2,5 +2,6 @@ src := mrt.c obj := $(src-o-files) $(all-daemon) $(cf-local) +$(call proto-build,mrt_build) -tests_objs := $(tests_objs) $(src-o-files) \ No newline at end of file +tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/mrt/mrt.c b/proto/mrt/mrt.c index 03f0d59e..0f53dbe7 100644 --- a/proto/mrt/mrt.c +++ b/proto/mrt/mrt.c @@ -113,13 +113,13 @@ mrt_buffer_flush(buffer *b) } #define MRT_DEFINE_TYPE(S, T) \ - static inline void mrt_put_##S##_(buffer *b, T x) \ + UNUSED static inline void mrt_put_##S##_(buffer *b, T x) \ { \ put_##S(b->pos, x); \ b->pos += sizeof(T); \ } \ \ - static inline void mrt_put_##S(buffer *b, T x) \ + UNUSED static inline void mrt_put_##S(buffer *b, T x) \ { \ mrt_buffer_need(b, sizeof(T)); \ put_##S(b->pos, x); \ @@ -431,7 +431,7 @@ mrt_rib_table_entry_bgp_attrs(struct mrt_table_dump_state *s, rte *r) /* Attribute list must be normalized for bgp_encode_attrs() */ if (!rta_is_cached(r->attrs)) - ea_normalize(eattrs); + eattrs = ea_normalize(eattrs); mrt_buffer_need(b, MRT_ATTR_BUFFER_SIZE); byte *pos = b->pos; @@ -525,7 +525,7 @@ mrt_rib_table_dump(struct mrt_table_dump_state *s, net *n, int add_path) } rte e = rt->rte; - if (f_run(s->filter, &e, s->linpool, 0) <= F_ACCEPT) + if (f_run(s->filter, &e, 0) <= F_ACCEPT) mrt_rib_table_entry(s, &e); lp_flush(s->linpool); @@ -557,8 +557,8 @@ mrt_table_dump_init(pool *pp) struct mrt_table_dump_state *s = mb_allocz(pool, sizeof(struct mrt_table_dump_state)); s->pool = pool; - s->linpool = lp_new(pool, 4080); - s->peer_lp = lp_new(pool, 4080); + s->linpool = lp_new(pool); + s->peer_lp = lp_new(pool); mrt_buffer_init(&s->buf, pool, 2 * MRT_ATTR_BUFFER_SIZE); /* We lock the current config as we may reference it indirectly by filter */ @@ -904,7 +904,6 @@ mrt_copy_config(struct proto_config *dest UNUSED, struct proto_config *src UNUSE struct protocol proto_mrt = { .name = "MRT", .template = "mrt%d", - .class = PROTOCOL_MRT, .proto_size = sizeof(struct mrt_proto), .config_size = sizeof(struct mrt_config), .init = mrt_init, @@ -913,3 +912,9 @@ struct protocol proto_mrt = { .reconfigure = mrt_reconfigure, .copy_config = mrt_copy_config, }; + +void +mrt_build(void) +{ + proto_build(&proto_mrt); +} diff --git a/proto/mrt/mrt.h b/proto/mrt/mrt.h index 4ff94c12..3b83aa39 100644 --- a/proto/mrt/mrt.h +++ b/proto/mrt/mrt.h @@ -13,7 +13,7 @@ #include "nest/bird.h" #include "nest/protocol.h" #include "lib/lists.h" -#include "nest/route.h" +#include "nest/rt.h" #include "lib/event.h" #include "lib/hash.h" diff --git a/proto/ospf/Makefile b/proto/ospf/Makefile index 39e74f71..85664543 100644 --- a/proto/ospf/Makefile +++ b/proto/ospf/Makefile @@ -2,5 +2,6 @@ src := dbdes.c hello.c iface.c lsack.c lsalib.c lsreq.c lsupd.c neighbor.c ospf. obj := $(src-o-files) $(all-daemon) $(cf-local) +$(call proto-build,ospf_build) -tests_objs := $(tests_objs) $(src-o-files) \ No newline at end of file +tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/ospf/config.Y b/proto/ospf/config.Y index 4b7d5a36..bc3df8db 100644 --- a/proto/ospf/config.Y +++ b/proto/ospf/config.Y @@ -190,7 +190,7 @@ ospf_check_auth(void) CF_DECLS -CF_KEYWORDS(OSPF, V2, V3, OSPF_METRIC1, OSPF_METRIC2, OSPF_TAG, OSPF_ROUTER_ID) +CF_KEYWORDS(OSPF, V2, V3) CF_KEYWORDS(AREA, NEIGHBORS, RFC1583COMPAT, STUB, TICK, COST, COST2, RETRANSMIT) CF_KEYWORDS(HELLO, TRANSMIT, PRIORITY, DEAD, TYPE, BROADCAST, BCAST, DEFAULT) CF_KEYWORDS(NONBROADCAST, NBMA, POINTOPOINT, PTP, POINTOMULTIPOINT, PTMP) @@ -505,11 +505,6 @@ ospf_iface: ospf_iface_start ospf_iface_patt_list ospf_iface_opt_list { ospf_iface_finish(); } ; -dynamic_attr: OSPF_METRIC1 { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_OSPF_METRIC1); } ; -dynamic_attr: OSPF_METRIC2 { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_OSPF_METRIC2); } ; -dynamic_attr: OSPF_TAG { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_OSPF_TAG); } ; -dynamic_attr: OSPF_ROUTER_ID { $$ = f_new_dynamic_attr(EAF_TYPE_ROUTER_ID, T_QUAD, EA_OSPF_ROUTER_ID); } ; - CF_CLI_HELP(SHOW OSPF, ..., [[Show information about OSPF protocol]]); CF_CLI(SHOW OSPF, optproto, [<name>], [[Show information about OSPF protocol]]) { PROTO_WALK_CMD($3, &proto_ospf, p) ospf_sh(p); }; diff --git a/proto/ospf/ospf.c b/proto/ospf/ospf.c index 22150590..8fe64d95 100644 --- a/proto/ospf/ospf.c +++ b/proto/ospf/ospf.c @@ -106,11 +106,12 @@ #include <stdlib.h> #include "ospf.h" +#include "lib/macro.h" static int ospf_preexport(struct channel *C, rte *new); static void ospf_reload_routes(struct channel *C); static int ospf_rte_better(struct rte *new, struct rte *old); -static u32 ospf_rte_igp_metric(struct rte *rt); +static u32 ospf_rte_igp_metric(const rte *rt); static void ospf_disp(timer *timer); @@ -299,7 +300,7 @@ ospf_start(struct proto *P) p->lsab_size = 256; p->lsab_used = 0; p->lsab = mb_alloc(P->pool, p->lsab_size); - p->nhpool = lp_new(P->pool, 12*sizeof(struct nexthop)); + p->nhpool = lp_new(P->pool); init_list(&(p->iface_list)); init_list(&(p->area_list)); fib_init(&p->rtf, P->pool, ospf_get_af(p), sizeof(ort), OFFSETOF(ort, fn), 0, NULL); @@ -386,23 +387,26 @@ ospf_init(struct proto_config *CF) static int ospf_rte_better(struct rte *new, struct rte *old) { - u32 new_metric1 = ea_get_int(new->attrs->eattrs, EA_OSPF_METRIC1, LSINFINITY); + u32 new_metric1 = ea_get_int(new->attrs->eattrs, &ea_ospf_metric1, LSINFINITY); if (new_metric1 == LSINFINITY) return 0; - if(new->attrs->source < old->attrs->source) return 1; - if(new->attrs->source > old->attrs->source) return 0; + u32 ns = rt_get_source_attr(new); + u32 os = rt_get_source_attr(old); - if(new->attrs->source == RTS_OSPF_EXT2) + if (ns < os) return 1; + if (ns > os) return 0; + + if (ns == RTS_OSPF_EXT2) { - u32 old_metric2 = ea_get_int(old->attrs->eattrs, EA_OSPF_METRIC2, LSINFINITY); - u32 new_metric2 = ea_get_int(new->attrs->eattrs, EA_OSPF_METRIC2, LSINFINITY); - if(new_metric2 < old_metric2) return 1; - if(new_metric2 > old_metric2) return 0; + u32 old_metric2 = ea_get_int(old->attrs->eattrs, &ea_ospf_metric2, LSINFINITY); + u32 new_metric2 = ea_get_int(new->attrs->eattrs, &ea_ospf_metric2, LSINFINITY); + if (new_metric2 < old_metric2) return 1; + if (new_metric2 > old_metric2) return 0; } - u32 old_metric1 = ea_get_int(old->attrs->eattrs, EA_OSPF_METRIC1, LSINFINITY); + u32 old_metric1 = ea_get_int(old->attrs->eattrs, &ea_ospf_metric1, LSINFINITY); if (new_metric1 < old_metric1) return 1; @@ -410,12 +414,12 @@ ospf_rte_better(struct rte *new, struct rte *old) } static u32 -ospf_rte_igp_metric(struct rte *rt) +ospf_rte_igp_metric(const rte *rt) { - if (rt->attrs->source == RTS_OSPF_EXT2) + if (rt_get_source_attr(rt) == RTS_OSPF_EXT2) return IGP_METRIC_UNKNOWN; - return ea_get_int(rt->attrs->eattrs, EA_OSPF_METRIC1, LSINFINITY); + return ea_get_int(rt->attrs->eattrs, &ea_ospf_metric1, LSINFINITY); } void @@ -570,7 +574,8 @@ ospf_get_route_info(rte * rte, byte * buf) { char *type = "<bug>"; - switch (rte->attrs->source) + uint source = rt_get_source_attr(rte); + switch (source) { case RTS_OSPF: type = "I"; @@ -587,42 +592,26 @@ ospf_get_route_info(rte * rte, byte * buf) } buf += bsprintf(buf, " %s", type); - buf += bsprintf(buf, " (%d/%d", rte->attrs->pref, ea_get_int(rte->attrs->eattrs, EA_OSPF_METRIC1, LSINFINITY)); - if (rte->attrs->source == RTS_OSPF_EXT2) - buf += bsprintf(buf, "/%d", ea_get_int(rte->attrs->eattrs, EA_OSPF_METRIC2, LSINFINITY)); + buf += bsprintf(buf, " (%d/%d", rt_get_preference(rte), ea_get_int(rte->attrs->eattrs, &ea_ospf_metric1, LSINFINITY)); + if (source == RTS_OSPF_EXT2) + buf += bsprintf(buf, "/%d", ea_get_int(rte->attrs->eattrs, &ea_ospf_metric2, LSINFINITY)); buf += bsprintf(buf, ")"); - if (rte->attrs->source == RTS_OSPF_EXT1 || rte->attrs->source == RTS_OSPF_EXT2) + if (source == RTS_OSPF_EXT1 || source == RTS_OSPF_EXT2) { - eattr *ea = ea_find(rte->attrs->eattrs, EA_OSPF_TAG); + eattr *ea = ea_find(rte->attrs->eattrs, &ea_ospf_tag); if (ea && (ea->u.data > 0)) buf += bsprintf(buf, " [%x]", ea->u.data); } - eattr *ea = ea_find(rte->attrs->eattrs, EA_OSPF_ROUTER_ID); + eattr *ea = ea_find(rte->attrs->eattrs, &ea_ospf_router_id); if (ea) buf += bsprintf(buf, " [%R]", ea->u.data); } -static int -ospf_get_attr(const eattr * a, byte * buf, int buflen UNUSED) +static void +ospf_tag_format(const eattr * a, byte * buf, uint buflen) { - switch (a->id) - { - case EA_OSPF_METRIC1: - bsprintf(buf, "metric1"); - return GA_NAME; - case EA_OSPF_METRIC2: - bsprintf(buf, "metric2"); - return GA_NAME; - case EA_OSPF_TAG: - bsprintf(buf, "tag: 0x%08x", a->u.data); - return GA_FULL; - case EA_OSPF_ROUTER_ID: - bsprintf(buf, "router_id"); - return GA_NAME; - default: - return GA_UNKNOWN; - } + bsnprintf(buf, buflen, "0x%08x", a->u.data); } static void @@ -1520,7 +1509,6 @@ ospf_sh_lsadb(struct lsadb_show_data *ld) struct protocol proto_ospf = { .name = "OSPF", .template = "ospf%d", - .class = PROTOCOL_OSPF, .preference = DEF_PREF_OSPF, .channel_mask = NB_IP, .proto_size = sizeof(struct ospf_proto), @@ -1531,6 +1519,39 @@ struct protocol proto_ospf = { .shutdown = ospf_shutdown, .reconfigure = ospf_reconfigure, .get_status = ospf_get_status, - .get_attr = ospf_get_attr, .get_route_info = ospf_get_route_info }; + +struct ea_class ea_ospf_metric1 = { + .name = "ospf_metric1", + .type = T_INT, +}; + +struct ea_class ea_ospf_metric2 = { + .name = "ospf_metric2", + .type = T_INT, +}; + +struct ea_class ea_ospf_tag = { + .name = "ospf_tag", + .type = T_INT, + .format = ospf_tag_format, +}; + +struct ea_class ea_ospf_router_id = { + .name = "ospf_router_id", + .type = T_QUAD, +}; + +void +ospf_build(void) +{ + proto_build(&proto_ospf); + + EA_REGISTER_ALL( + &ea_ospf_metric1, + &ea_ospf_metric2, + &ea_ospf_tag, + &ea_ospf_router_id + ); +} diff --git a/proto/ospf/ospf.h b/proto/ospf/ospf.h index 3e704ae8..7bed5c85 100644 --- a/proto/ospf/ospf.h +++ b/proto/ospf/ospf.h @@ -22,7 +22,7 @@ #include "lib/resource.h" #include "nest/protocol.h" #include "nest/iface.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/cli.h" #include "nest/locks.h" #include "nest/bfd.h" @@ -939,12 +939,7 @@ struct lsadb_show_data { u32 router; /* Advertising router, 0 -> all */ }; - -#define EA_OSPF_METRIC1 EA_CODE(PROTOCOL_OSPF, 0) -#define EA_OSPF_METRIC2 EA_CODE(PROTOCOL_OSPF, 1) -#define EA_OSPF_TAG EA_CODE(PROTOCOL_OSPF, 2) -#define EA_OSPF_ROUTER_ID EA_CODE(PROTOCOL_OSPF, 3) - +extern struct ea_class ea_ospf_metric1, ea_ospf_metric2, ea_ospf_tag, ea_ospf_router_id; /* * For regular networks, neighbor address must match network prefix. diff --git a/proto/ospf/rt.c b/proto/ospf/rt.c index 3e208023..1c76aee7 100644 --- a/proto/ospf/rt.c +++ b/proto/ospf/rt.c @@ -28,24 +28,30 @@ nh_is_vlink(struct nexthop *nhs) static inline int unresolved_vlink(ort *ort) { - return ort->n.nhs && nh_is_vlink(ort->n.nhs); + return ort->n.nhs && nh_is_vlink(&ort->n.nhs->nh); } -static inline struct nexthop * +static inline struct nexthop_adata * new_nexthop(struct ospf_proto *p, ip_addr gw, struct iface *iface, byte weight) { - struct nexthop *nh = lp_allocz(p->nhpool, sizeof(struct nexthop)); - nh->gw = gw; - nh->iface = iface; - nh->weight = weight; - return nh; + struct nexthop_adata *nhad = lp_alloc(p->nhpool, sizeof(struct nexthop_adata)); + *nhad = (struct nexthop_adata) { + .ad = { .length = sizeof *nhad - sizeof nhad->ad, }, + .nh = { + .gw = gw, + .iface = iface, + .weight = weight, + }, + }; + + return nhad; } /* Returns true if there are device nexthops in n */ static inline int -has_device_nexthops(const struct nexthop *n) +has_device_nexthops(struct nexthop_adata *nhad) { - for (; n; n = n->next) + NEXTHOP_WALK(n, nhad) if (ipa_zero(n->gw)) return 1; @@ -53,38 +59,22 @@ has_device_nexthops(const struct nexthop *n) } /* Replace device nexthops with nexthops to gw */ -static struct nexthop * -fix_device_nexthops(struct ospf_proto *p, const struct nexthop *n, ip_addr gw) +static struct nexthop_adata * +fix_device_nexthops(struct ospf_proto *p, struct nexthop_adata *old, ip_addr gw) { - struct nexthop *root1 = NULL; - struct nexthop *root2 = NULL; - struct nexthop **nn1 = &root1; - struct nexthop **nn2 = &root2; - if (!p->ecmp) - return new_nexthop(p, gw, n->iface, n->weight); - - /* This is a bit tricky. We cannot just copy the list and update n->gw, - because the list should stay sorted, so we create two lists, one with new - gateways and one with old ones, and then merge them. */ - - for (; n; n = n->next) { - struct nexthop *nn = new_nexthop(p, ipa_zero(n->gw) ? gw : n->gw, n->iface, n->weight); - - if (ipa_zero(n->gw)) - { - *nn1 = nn; - nn1 = &(nn->next); - } - else - { - *nn2 = nn; - nn2 = &(nn->next); - } + struct nexthop_adata *new = (struct nexthop_adata *) lp_store_adata(p->nhpool, old->ad.data, old->ad.length); + new->nh.gw = gw; + return new; } - return nexthop_merge(root1, root2, 1, 1, p->ecmp, p->nhpool); + struct nexthop_adata *tmp = (struct nexthop_adata *) tmp_copy_adata(&old->ad); + NEXTHOP_WALK(n, tmp) + if (ipa_zero(n->gw)) + n->gw = gw; + + return nexthop_sort(tmp, p->nhpool); } @@ -169,9 +159,9 @@ orta_compare(const struct ospf_proto *p, const orta *new, const orta *old) return -1; if (!new->nhs) return 1; - if (nh_is_vlink(new->nhs)) + if (nh_is_vlink(&new->nhs->nh)) return -1; - if (nh_is_vlink(old->nhs)) + if (nh_is_vlink(&old->nhs->nh)) return 1; @@ -279,11 +269,7 @@ ort_merge(struct ospf_proto *p, ort *o, const orta *new) orta *old = &o->n; if (old->nhs != new->nhs) - { - old->nhs = nexthop_merge(old->nhs, new->nhs, old->nhs_reuse, new->nhs_reuse, - p->ecmp, p->nhpool); - old->nhs_reuse = 1; - } + old->nhs = nexthop_merge(old->nhs, new->nhs, p->ecmp, p->nhpool); if (old->rid < new->rid) old->rid = new->rid; @@ -295,11 +281,7 @@ ort_merge_ext(struct ospf_proto *p, ort *o, const orta *new) orta *old = &o->n; if (old->nhs != new->nhs) - { - old->nhs = nexthop_merge(old->nhs, new->nhs, old->nhs_reuse, new->nhs_reuse, - p->ecmp, p->nhpool); - old->nhs_reuse = 1; - } + old->nhs = nexthop_merge(old->nhs, new->nhs, p->ecmp, p->nhpool); if (old->tag != new->tag) old->tag = 0; @@ -1165,7 +1147,7 @@ ospf_check_vlinks(struct ospf_proto *p) if (tmp && (tmp->color == INSPF) && ipa_nonzero(tmp->lb) && tmp->nhs) { - struct ospf_iface *nhi = ospf_iface_find(p, tmp->nhs->iface); + struct ospf_iface *nhi = ospf_iface_find(p, tmp->nhs->nh.iface); if ((ifa->state != OSPF_IS_PTP) || (ifa->vifa != nhi) @@ -1579,10 +1561,7 @@ ospf_ext_spf(struct ospf_proto *p) /* Replace device nexthops with nexthops to forwarding address from LSA */ if (has_device_nexthops(nfa.nhs)) - { nfa.nhs = fix_device_nexthops(p, nfa.nhs, rt.fwaddr); - nfa.nhs_reuse = 1; - } } if (rt.ebit) @@ -1726,10 +1705,10 @@ ospf_rt_spf(struct ospf_proto *p) static inline int -inherit_nexthops(struct nexthop *pn) +inherit_nexthops(struct nexthop_adata *pn) { /* Proper nexthops (with defined GW) or dummy vlink nexthops (without iface) */ - return pn && (ipa_nonzero(pn->gw) || !pn->iface); + return pn && (ipa_nonzero(pn->nh.gw) || !pn->nh.iface); } static inline ip_addr @@ -1744,12 +1723,12 @@ link_lsa_lladdr(struct ospf_proto *p, struct top_hash_entry *en) return ospf_is_ip4(p) ? ipa_from_ip4(ospf3_6to4(ll)) : ipa_from_ip6(ll); } -static struct nexthop * +static struct nexthop_adata * calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry *par, int pos, uint data, uint lif, uint nif) { struct ospf_proto *p = oa->po; - struct nexthop *pn = par->nhs; + struct nexthop_adata *pn = par->nhs; struct top_hash_entry *link = NULL; struct ospf_iface *ifa = NULL; ip_addr nh = IPA_NONE; @@ -1827,10 +1806,10 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en, return NULL; } - struct nexthop *nhs = new_nexthop(p, nh, ifa->iface, ifa->ecmp_weight); + struct nexthop_adata *nhs = new_nexthop(p, nh, ifa->iface, ifa->ecmp_weight); if (ifa->addr->flags & IA_HOST) - nhs->flags = RNF_ONLINK; + nhs->nh.flags = RNF_ONLINK; return nhs; } @@ -1851,7 +1830,7 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en, if (ipa_zero(en->lb)) goto bad; - return new_nexthop(p, en->lb, pn->iface, pn->weight); + return new_nexthop(p, en->lb, pn->nh.iface, pn->nh.weight); } else /* OSPFv3 */ { @@ -1859,7 +1838,7 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en, * Next-hop is taken from lladdr field of Link-LSA, en->lb_id * is computed in link_back(). */ - link = ospf_hash_find(p->gr, pn->iface->index, en->lb_id, rid, LSA_T_LINK); + link = ospf_hash_find(p->gr, pn->nh.iface->index, en->lb_id, rid, LSA_T_LINK); if (!link) return NULL; @@ -1867,7 +1846,7 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en, if (ipa_zero(nh)) return NULL; - return new_nexthop(p, nh, pn->iface, pn->weight); + return new_nexthop(p, nh, pn->nh.iface, pn->nh.weight); } } @@ -1914,7 +1893,7 @@ add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry if (!link_back(oa, en, par, lif, nif)) return; - struct nexthop *nhs = calc_next_hop(oa, en, par, pos, data, lif, nif); + struct nexthop_adata *nhs = calc_next_hop(oa, en, par, pos, data, lif, nif); if (!nhs) { log(L_WARN "%s: Cannot find next hop for LSA (Type: %04x, Id: %R, Rt: %R)", @@ -1923,7 +1902,7 @@ add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry } /* If en->dist > 0, we know that en->color == CANDIDATE and en->nhs is defined. */ - if ((dist == en->dist) && !nh_is_vlink(en->nhs)) + if ((dist == en->dist) && !nh_is_vlink(&en->nhs->nh)) { /* * For multipath, we should merge nexthops. We merge regular nexthops only. @@ -1947,13 +1926,11 @@ add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry */ /* Keep old ones */ - if (!p->ecmp || nh_is_vlink(nhs) || (nhs == en->nhs)) + if (!p->ecmp || nh_is_vlink(&nhs->nh) || (nhs == en->nhs)) return; /* Merge old and new */ - int new_reuse = (par->nhs != nhs); - en->nhs = nexthop_merge(en->nhs, nhs, en->nhs_reuse, new_reuse, p->ecmp, p->nhpool); - en->nhs_reuse = 1; + en->nhs = nexthop_merge(en->nhs, nhs, p->ecmp, p->nhpool); return; } @@ -1967,7 +1944,6 @@ add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry en->nhs = nhs; en->dist = dist; en->color = CANDIDATE; - en->nhs_reuse = (par->nhs != nhs); prev = NULL; @@ -2004,11 +1980,31 @@ static inline int ort_changed(ort *nf, rta *nr) { rta *or = nf->old_rta; - return !or || + + if (!or || (nf->n.metric1 != nf->old_metric1) || (nf->n.metric2 != nf->old_metric2) || - (nf->n.tag != nf->old_tag) || (nf->n.rid != nf->old_rid) || - (nr->source != or->source) || (nr->dest != or->dest) || - !nexthop_same(&(nr->nh), &(or->nh)); + (nf->n.tag != nf->old_tag) || (nf->n.rid != nf->old_rid)) + return 1; + + eattr *nhea_n = ea_find(nr->eattrs, &ea_gen_nexthop); + eattr *nhea_o = ea_find(or->eattrs, &ea_gen_nexthop); + if (!nhea_n != !nhea_o) + return 1; + + if (nhea_n && nhea_o) + { + struct nexthop_adata *nhad_n = (struct nexthop_adata *) nhea_n->u.ptr; + struct nexthop_adata *nhad_o = (struct nexthop_adata *) nhea_o->u.ptr; + + if (!nexthop_same(nhad_n, nhad_o)) + return 1; + } + + if ( ea_get_int(nr->eattrs, &ea_gen_source, 0) + != ea_get_int(or->eattrs, &ea_gen_source, 0)) + return 1; + + return 0; } static void @@ -2030,10 +2026,9 @@ again1: FIB_ITERATE_START(fib, &fit, ort, nf) { /* Sanity check of next-hop addresses, failure should not happen */ - if (nf->n.type) + if (nf->n.type && nf->n.nhs) { - struct nexthop *nh; - for (nh = nf->n.nhs; nh; nh = nh->next) + NEXTHOP_WALK(nh, nf->n.nhs) if (ipa_nonzero(nh->gw)) { neighbor *nbr = neigh_find(&p->p, nh->gw, nh->iface, @@ -2053,48 +2048,47 @@ again1: if (nf->n.type) /* Add the route */ { rta a0 = { - .source = nf->n.type, - .scope = SCOPE_UNIVERSE, - .dest = RTD_UNICAST, - .nh = *(nf->n.nhs), - .pref = p->p.main_channel->preference, }; + struct { + ea_list l; + eattr a[7]; + } eattrs; + + eattrs.l = (ea_list) {}; + + eattrs.a[eattrs.l.count++] = + EA_LITERAL_EMBEDDED(&ea_gen_preference, 0, p->p.main_channel->preference); + + eattrs.a[eattrs.l.count++] = + EA_LITERAL_EMBEDDED(&ea_gen_source, 0, nf->n.type); + + eattrs.a[eattrs.l.count++] = + EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, &nf->n.nhs->ad); + if (reload || ort_changed(nf, &a0)) { - a0.eattrs = alloca(sizeof(ea_list) + 4 * sizeof(eattr)); - memset(a0.eattrs, 0, sizeof(ea_list)); - nf->old_metric1 = nf->n.metric1; nf->old_metric2 = nf->n.metric2; nf->old_tag = nf->n.tag; nf->old_rid = nf->n.rid; - a0.eattrs->attrs[a0.eattrs->count++] = (eattr) { - .id = EA_OSPF_METRIC1, - .type = EAF_TYPE_INT, - .u.data = nf->n.metric1, - }; + eattrs.a[eattrs.l.count++] = + EA_LITERAL_EMBEDDED(&ea_ospf_metric1, 0, nf->n.metric1); if (nf->n.type == RTS_OSPF_EXT2) - a0.eattrs->attrs[a0.eattrs->count++] = (eattr) { - .id = EA_OSPF_METRIC2, - .type = EAF_TYPE_INT, - .u.data = nf->n.metric2, - }; + eattrs.a[eattrs.l.count++] = + EA_LITERAL_EMBEDDED(&ea_ospf_metric2, 0, nf->n.metric2); if ((nf->n.type == RTS_OSPF_EXT1) || (nf->n.type == RTS_OSPF_EXT2)) - a0.eattrs->attrs[a0.eattrs->count++] = (eattr) { - .id = EA_OSPF_TAG, - .type = EAF_TYPE_INT, - .u.data = nf->n.tag, - }; + eattrs.a[eattrs.l.count++] = + EA_LITERAL_EMBEDDED(&ea_ospf_tag, 0, nf->n.tag); - a0.eattrs->attrs[a0.eattrs->count++] = (eattr) { - .id = EA_OSPF_ROUTER_ID, - .type = EAF_TYPE_ROUTER_ID, - .u.data = nf->n.rid, - }; + eattrs.a[eattrs.l.count++] = + EA_LITERAL_EMBEDDED(&ea_ospf_router_id, 0, nf->n.rid); + + ASSERT_DIE(ARRAY_SIZE(eattrs.a) >= eattrs.l.count); + a0.eattrs = &eattrs.l; rta_free(nf->old_rta); nf->old_rta = rta_lookup(&a0); diff --git a/proto/ospf/rt.h b/proto/ospf/rt.h index 094e125b..180216b7 100644 --- a/proto/ospf/rt.h +++ b/proto/ospf/rt.h @@ -18,8 +18,6 @@ typedef struct orta { u8 type; /* RTS_OSPF_* */ - u8 nhs_reuse; /* Whether nhs nodes can be reused during merging. - See a note in rt.c:add_cand() */ u32 options; /* * For ORT_ROUTER routes, options field are router-LSA style @@ -53,7 +51,7 @@ typedef struct orta struct ospf_area *oa; struct ospf_area *voa; /* Used when route is replaced in ospf_rt_sum_tr(), NULL otherwise */ - struct nexthop *nhs; /* Next hops computed during SPF */ + struct nexthop_adata *nhs; /* Next hops computed during SPF */ struct top_hash_entry *en; /* LSA responsible for this orta */ } orta; diff --git a/proto/ospf/topology.c b/proto/ospf/topology.c index bb88d20a..6ff6a745 100644 --- a/proto/ospf/topology.c +++ b/proto/ospf/topology.c @@ -1338,8 +1338,8 @@ ospf_rt_notify(struct proto *P, struct channel *ch UNUSED, const net_addr *n, rt /* Get route attributes */ rta *a = new->attrs; - eattr *m1a = ea_find(a->eattrs, EA_OSPF_METRIC1); - eattr *m2a = ea_find(a->eattrs, EA_OSPF_METRIC2); + eattr *m1a = ea_find(a->eattrs, &ea_ospf_metric1); + eattr *m2a = ea_find(a->eattrs, &ea_ospf_metric2); uint m1 = m1a ? m1a->u.data : 0; uint m2 = m2a ? m2a->u.data : 10000; @@ -1363,11 +1363,14 @@ ospf_rt_notify(struct proto *P, struct channel *ch UNUSED, const net_addr *n, rt uint ebit = m2a || !m1a; uint metric = ebit ? m2 : m1; - uint tag = ea_get_int(a->eattrs, EA_OSPF_TAG, 0); + uint tag = ea_get_int(a->eattrs, &ea_ospf_tag, 0); ip_addr fwd = IPA_NONE; - if ((a->dest == RTD_UNICAST) && use_gw_for_fwaddr(p, a->nh.gw, a->nh.iface)) - fwd = a->nh.gw; + eattr *nhea = ea_find(a->eattrs, &ea_gen_nexthop); + struct nexthop_adata *nhad = (struct nexthop_adata *) nhea->u.ptr; + if (NEXTHOP_IS_REACHABLE(nhad)) + if (use_gw_for_fwaddr(p, nhad->nh.gw, nhad->nh.iface)) + fwd = nhad->nh.gw; /* NSSA-LSA with P-bit set must have non-zero forwarding address */ if (oa && ipa_zero(fwd)) @@ -2135,7 +2138,7 @@ ospf_hash_delete(struct top_graph *f, struct top_hash_entry *e) if (*ee == e) { *ee = e->next; - sl_free(f->hash_slab, e); + sl_free(e); if (f->hash_entries-- < f->hash_entries_min) ospf_top_rehash(f, -HASH_LO_STEP); return; diff --git a/proto/ospf/topology.h b/proto/ospf/topology.h index c36d0b50..3c92b431 100644 --- a/proto/ospf/topology.h +++ b/proto/ospf/topology.h @@ -28,7 +28,7 @@ struct top_hash_entry u16 next_lsa_opts; /* For postponed LSA origination */ btime inst_time; /* Time of installation into DB */ struct ort *nf; /* Reference fibnode for sum and ext LSAs, NULL for otherwise */ - struct nexthop *nhs; /* Computed nexthops - valid only in ospf_rt_spf() */ + struct nexthop_adata *nhs; /* Computed nexthops - valid only in ospf_rt_spf() */ ip_addr lb; /* In OSPFv2, link back address. In OSPFv3, any global address in the area useful for vlinks */ u32 lb_id; /* Interface ID of link back iface (for bcast or NBMA networks) */ u32 dist; /* Distance from the root */ @@ -39,8 +39,6 @@ struct top_hash_entry #define CANDIDATE 1 #define INSPF 2 u8 mode; /* LSA generated during RT calculation (LSA_RTCALC or LSA_STALE)*/ - u8 nhs_reuse; /* Whether nhs nodes can be reused during merging. - See a note in rt.c:add_cand() */ }; diff --git a/proto/perf/Makefile b/proto/perf/Makefile index 7877fb19..42051f43 100644 --- a/proto/perf/Makefile +++ b/proto/perf/Makefile @@ -2,5 +2,6 @@ src := perf.c obj := $(src-o-files) $(all-daemon) $(cf-local) +$(call proto-build,perf_build) tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/perf/perf.c b/proto/perf/perf.c index 8b2cb69f..8a883dd7 100644 --- a/proto/perf/perf.c +++ b/proto/perf/perf.c @@ -18,7 +18,7 @@ #include "nest/bird.h" #include "nest/iface.h" #include "nest/protocol.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/cli.h" #include "conf/conf.h" #include "filter/filter.h" @@ -142,16 +142,20 @@ perf_loop(void *data) *((net_addr_ip4 *) &(p->data[i].net)) = random_net_ip4(); if (!p->attrs_per_rte || !(i % p->attrs_per_rte)) { - struct rta a0 = { - .source = RTS_PERF, - .scope = SCOPE_UNIVERSE, - .dest = RTD_UNICAST, - .pref = p->p.main_channel->preference, + struct rta a0 = {}; + + ea_set_attr_u32(&a0.eattrs, &ea_gen_preference, 0, p->p.main_channel->preference); + ea_set_attr_u32(&a0.eattrs, &ea_gen_source, 0, RTS_PERF); + + struct nexthop_adata nhad = { .nh.iface = p->ifa->iface, .nh.gw = gw, .nh.weight = 1, }; + ea_set_attr_data(&a0.eattrs, &ea_gen_nexthop, 0, + &nhad.ad.data, sizeof nhad - sizeof nhad.ad); + p->data[i].a = rta_lookup(&a0); } else @@ -305,7 +309,6 @@ perf_copy_config(struct proto_config *dest UNUSED, struct proto_config *src UNUS struct protocol proto_perf = { .name = "Perf", .template = "perf%d", - .class = PROTOCOL_PERF, .channel_mask = NB_IP, .proto_size = sizeof(struct perf_proto), .config_size = sizeof(struct perf_config), @@ -314,3 +317,9 @@ struct protocol proto_perf = { .reconfigure = perf_reconfigure, .copy_config = perf_copy_config, }; + +void +perf_build(void) +{ + proto_build(&proto_perf); +} diff --git a/proto/pipe/Makefile b/proto/pipe/Makefile index 5093da98..ba66027f 100644 --- a/proto/pipe/Makefile +++ b/proto/pipe/Makefile @@ -2,5 +2,6 @@ src := pipe.c obj := $(src-o-files) $(all-daemon) $(cf-local) +$(call proto-build,pipe_build) -tests_objs := $(tests_objs) $(src-o-files) \ No newline at end of file +tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/pipe/config.Y b/proto/pipe/config.Y index 1202c169..c869de9f 100644 --- a/proto/pipe/config.Y +++ b/proto/pipe/config.Y @@ -16,7 +16,7 @@ CF_DEFINES CF_DECLS -CF_KEYWORDS(PIPE, PEER, TABLE) +CF_KEYWORDS(PIPE, PEER, TABLE, MAX, GENERATION) CF_GRAMMAR @@ -25,6 +25,7 @@ proto: pipe_proto '}' { this_channel = NULL; } ; pipe_proto_start: proto_start PIPE { this_proto = proto_config_new(&proto_pipe, $1); + PIPE_CFG->max_generation = 16; } proto_name { @@ -41,6 +42,10 @@ pipe_proto: | pipe_proto proto_item ';' | pipe_proto channel_item_ ';' | pipe_proto PEER TABLE rtable ';' { PIPE_CFG->peer = $4; } + | pipe_proto MAX GENERATION expr ';' { + if (($4 < 1) || ($4 > 254)) cf_error("Max generation must be in range 1..254, got %u", $4); + PIPE_CFG->max_generation = $4; + } ; CF_CODE diff --git a/proto/pipe/pipe.c b/proto/pipe/pipe.c index 3caa85d0..e122d771 100644 --- a/proto/pipe/pipe.c +++ b/proto/pipe/pipe.c @@ -35,7 +35,7 @@ #include "nest/bird.h" #include "nest/iface.h" #include "nest/protocol.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/cli.h" #include "conf/conf.h" #include "filter/filter.h" @@ -56,43 +56,46 @@ pipe_rt_notify(struct proto *P, struct channel *src_ch, const net_addr *n, rte * if (!new && !old) return; - if (dst->table->pipe_busy) - { - log(L_ERR "Pipe loop detected when sending %N to table %s", - n, dst->table->name); - return; - } - - src_ch->table->pipe_busy = 1; - if (new) { rta *a = alloca(rta_size(new->attrs)); memcpy(a, new->attrs, rta_size(new->attrs)); a->cached = 0; - a->hostentry = NULL; + ea_unset_attr(&a->eattrs, 0, &ea_gen_hostentry); + rte e0 = { .attrs = a, .src = new->src, + .generation = new->generation + 1, }; rte_update(dst, n, &e0, new->src); } else rte_update(dst, n, NULL, old->src); - - src_ch->table->pipe_busy = 0; } static int pipe_preexport(struct channel *c, rte *e) { - struct proto *pp = e->sender->proto; + struct pipe_proto *p = (void *) c->proto; - if (pp == c->proto) - return -1; /* Avoid local loops automatically */ + /* Avoid direct loopbacks */ + if (e->sender == c->in_req.hook) + return -1; + + /* Indirection check */ + uint max_generation = ((struct pipe_config *) p->p.cf)->max_generation; + if (e->generation >= max_generation) + { + log_rl(&p->rl_gen, L_ERR "Route overpiped (%u hops of %u configured in %s) in table %s: %N %s/%u:%u", + e->generation, max_generation, c->proto->name, + c->table->name, e->net, e->src->proto->name, e->src->private_id, e->src->global_id); + + return -1; + } return 0; } @@ -177,6 +180,8 @@ pipe_init(struct proto_config *CF) P->preexport = pipe_preexport; P->reload_routes = pipe_reload_routes; + p->rl_gen = (struct tbf) TBF_DEFAULT_LOG_LIMITS; + pipe_configure_channels(p, cf); return P; @@ -208,8 +213,18 @@ pipe_get_status(struct proto *P, byte *buf) static void pipe_show_stats(struct pipe_proto *p) { - struct proto_stats *s1 = &p->pri->stats; - struct proto_stats *s2 = &p->sec->stats; + struct channel_import_stats *s1i = &p->pri->import_stats; + struct channel_export_stats *s1e = &p->pri->export_stats; + struct channel_import_stats *s2i = &p->sec->import_stats; + struct channel_export_stats *s2e = &p->sec->export_stats; + + struct rt_import_stats *rs1i = p->pri->in_req.hook ? &p->pri->in_req.hook->stats : NULL; + struct rt_export_stats *rs1e = p->pri->out_req.hook ? &p->pri->out_req.hook->stats : NULL; + struct rt_import_stats *rs2i = p->sec->in_req.hook ? &p->sec->in_req.hook->stats : NULL; + struct rt_export_stats *rs2e = p->sec->out_req.hook ? &p->sec->out_req.hook->stats : NULL; + + u32 pri_routes = p->pri->in_limit.count; + u32 sec_routes = p->sec->in_limit.count; /* * Pipe stats (as anything related to pipes) are a bit tricky. There @@ -233,24 +248,22 @@ pipe_show_stats(struct pipe_proto *p) */ cli_msg(-1006, " Routes: %u imported, %u exported", - s1->imp_routes, s2->imp_routes); + pri_routes, sec_routes); cli_msg(-1006, " Route change stats: received rejected filtered ignored accepted"); cli_msg(-1006, " Import updates: %10u %10u %10u %10u %10u", - s2->exp_updates_received, s2->exp_updates_rejected + s1->imp_updates_invalid, - s2->exp_updates_filtered, s1->imp_updates_ignored, s1->imp_updates_accepted); + rs2e->updates_received, s2e->updates_rejected + s1i->updates_invalid, + s2e->updates_filtered, rs1i->updates_ignored, rs1i->updates_accepted); cli_msg(-1006, " Import withdraws: %10u %10u --- %10u %10u", - s2->exp_withdraws_received, s1->imp_withdraws_invalid, - s1->imp_withdraws_ignored, s1->imp_withdraws_accepted); + rs2e->withdraws_received, s1i->withdraws_invalid, + rs1i->withdraws_ignored, rs1i->withdraws_accepted); cli_msg(-1006, " Export updates: %10u %10u %10u %10u %10u", - s1->exp_updates_received, s1->exp_updates_rejected + s2->imp_updates_invalid, - s1->exp_updates_filtered, s2->imp_updates_ignored, s2->imp_updates_accepted); + rs1e->updates_received, s1e->updates_rejected + s2i->updates_invalid, + s1e->updates_filtered, rs2i->updates_ignored, rs2i->updates_accepted); cli_msg(-1006, " Export withdraws: %10u %10u --- %10u %10u", - s1->exp_withdraws_received, s2->imp_withdraws_invalid, - s2->imp_withdraws_ignored, s2->imp_withdraws_accepted); + rs1e->withdraws_received, s2i->withdraws_invalid, + rs2i->withdraws_ignored, rs2i->withdraws_accepted); } -static const char *pipe_feed_state[] = { [ES_DOWN] = "down", [ES_FEEDING] = "feed", [ES_READY] = "up" }; - static void pipe_show_proto_info(struct proto *P) { @@ -259,13 +272,17 @@ pipe_show_proto_info(struct proto *P) cli_msg(-1006, " Channel %s", "main"); cli_msg(-1006, " Table: %s", p->pri->table->name); cli_msg(-1006, " Peer table: %s", p->sec->table->name); - cli_msg(-1006, " Import state: %s", pipe_feed_state[p->sec->export_state]); - cli_msg(-1006, " Export state: %s", pipe_feed_state[p->pri->export_state]); + cli_msg(-1006, " Import state: %s", rt_export_state_name(rt_export_get_state(p->sec->out_req.hook))); + cli_msg(-1006, " Export state: %s", rt_export_state_name(rt_export_get_state(p->pri->out_req.hook))); cli_msg(-1006, " Import filter: %s", filter_name(p->sec->out_filter)); cli_msg(-1006, " Export filter: %s", filter_name(p->pri->out_filter)); - channel_show_limit(&p->pri->in_limit, "Import limit:"); - channel_show_limit(&p->sec->in_limit, "Export limit:"); + + + channel_show_limit(&p->pri->in_limit, "Import limit:", + (p->pri->limit_active & (1 << PLD_IN)), p->pri->limit_actions[PLD_IN]); + channel_show_limit(&p->sec->in_limit, "Export limit:", + (p->sec->limit_active & (1 << PLD_IN)), p->sec->limit_actions[PLD_IN]); if (P->proto_state != PS_DOWN) pipe_show_stats(p); @@ -283,7 +300,6 @@ pipe_update_debug(struct proto *P) struct protocol proto_pipe = { .name = "Pipe", .template = "pipe%d", - .class = PROTOCOL_PIPE, .proto_size = sizeof(struct pipe_proto), .config_size = sizeof(struct pipe_config), .postconfig = pipe_postconfig, @@ -293,3 +309,9 @@ struct protocol proto_pipe = { .get_status = pipe_get_status, .show_proto_info = pipe_show_proto_info }; + +void +pipe_build(void) +{ + proto_build(&proto_pipe); +} diff --git a/proto/pipe/pipe.h b/proto/pipe/pipe.h index 038c6666..60c857eb 100644 --- a/proto/pipe/pipe.h +++ b/proto/pipe/pipe.h @@ -12,12 +12,14 @@ struct pipe_config { struct proto_config c; struct rtable_config *peer; /* Table we're connected to */ + u8 max_generation; }; struct pipe_proto { struct proto p; struct channel *pri; struct channel *sec; + struct tbf rl_gen; }; #endif diff --git a/proto/radv/Makefile b/proto/radv/Makefile index 05317eff..4780bee3 100644 --- a/proto/radv/Makefile +++ b/proto/radv/Makefile @@ -2,5 +2,6 @@ src := packets.c radv.c obj := $(src-o-files) $(all-daemon) $(cf-local) +$(call proto-build,radv_build) -tests_objs := $(tests_objs) $(src-o-files) \ No newline at end of file +tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/radv/config.Y b/proto/radv/config.Y index 8d4a3ab9..fb68d2e5 100644 --- a/proto/radv/config.Y +++ b/proto/radv/config.Y @@ -33,7 +33,7 @@ CF_KEYWORDS(RADV, PREFIX, INTERFACE, MIN, MAX, RA, DELAY, INTERVAL, SOLICITED, RETRANS, TIMER, CURRENT, HOP, LIMIT, DEFAULT, VALID, PREFERRED, MULT, LIFETIME, SKIP, ONLINK, AUTONOMOUS, RDNSS, DNSSL, NS, DOMAIN, LOCAL, TRIGGER, SENSITIVE, PREFERENCE, LOW, MEDIUM, HIGH, PROPAGATE, ROUTE, - ROUTES, RA_PREFERENCE, RA_LIFETIME) + ROUTES) CF_ENUM(T_ENUM_RA_PREFERENCE, RA_PREF_, LOW, MEDIUM, HIGH) @@ -336,9 +336,6 @@ radv_sensitive: | SENSITIVE bool { $$ = $2; } ; -dynamic_attr: RA_PREFERENCE { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_ENUM_RA_PREFERENCE, EA_RA_PREFERENCE); } ; -dynamic_attr: RA_LIFETIME { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_RA_LIFETIME); } ; - CF_CODE CF_END diff --git a/proto/radv/radv.c b/proto/radv/radv.c index fa228c69..1f75b7c2 100644 --- a/proto/radv/radv.c +++ b/proto/radv/radv.c @@ -10,6 +10,7 @@ #include <stdlib.h> #include "radv.h" +#include "lib/macro.h" /** * DOC: Router Advertisements @@ -42,6 +43,8 @@ * RFC 6106 - DNS extensions (RDDNS, DNSSL) */ +static struct ea_class ea_radv_preference, ea_radv_lifetime; + static void radv_prune_prefixes(struct radv_iface *ifa); static void radv_prune_routes(struct radv_proto *p); @@ -444,11 +447,11 @@ radv_rt_notify(struct proto *P, struct channel *ch UNUSED, const net_addr *n, rt { /* Update */ - ea = ea_find(new->attrs->eattrs, EA_RA_PREFERENCE); + ea = ea_find(new->attrs->eattrs, &ea_radv_preference); uint preference = ea ? ea->u.data : RA_PREF_MEDIUM; uint preference_set = !!ea; - ea = ea_find(new->attrs->eattrs, EA_RA_LIFETIME); + ea = ea_find(new->attrs->eattrs, &ea_radv_lifetime); uint lifetime = ea ? ea->u.data : 0; uint lifetime_set = !!ea; @@ -738,27 +741,26 @@ radv_pref_str(u32 pref) } } -/* The buffer has some minimal size */ -static int -radv_get_attr(const eattr *a, byte *buf, int buflen UNUSED) +static void +radv_preference_format(const eattr *a, byte *buf, uint buflen) { - switch (a->id) - { - case EA_RA_PREFERENCE: - bsprintf(buf, "preference: %s", radv_pref_str(a->u.data)); - return GA_FULL; - case EA_RA_LIFETIME: - bsprintf(buf, "lifetime"); - return GA_NAME; - default: - return GA_UNKNOWN; - } + bsnprintf(buf, buflen, "%s", radv_pref_str(a->u.data)); } +static struct ea_class ea_radv_preference = { + .name = "radv_preference", + .type = T_ENUM_RA_PREFERENCE, + .format = radv_preference_format, +}; + +static struct ea_class ea_radv_lifetime = { + .name = "radv_lifetime", + .type = T_INT, +}; + struct protocol proto_radv = { .name = "RAdv", .template = "radv%d", - .class = PROTOCOL_RADV, .channel_mask = NB_IP6, .proto_size = sizeof(struct radv_proto), .config_size = sizeof(struct radv_config), @@ -769,5 +771,15 @@ struct protocol proto_radv = { .reconfigure = radv_reconfigure, .copy_config = radv_copy_config, .get_status = radv_get_status, - .get_attr = radv_get_attr }; + +void +radv_build(void) +{ + proto_build(&proto_radv); + + EA_REGISTER_ALL( + &ea_radv_preference, + &ea_radv_lifetime + ); +} diff --git a/proto/radv/radv.h b/proto/radv/radv.h index 14d40f8a..c9219bda 100644 --- a/proto/radv/radv.h +++ b/proto/radv/radv.h @@ -19,7 +19,7 @@ #include "lib/resource.h" #include "nest/protocol.h" #include "nest/iface.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/cli.h" #include "nest/locks.h" #include "conf/conf.h" @@ -195,10 +195,6 @@ struct radv_iface #define RA_PREF_HIGH 0x08 #define RA_PREF_MASK 0x18 -/* Attributes */ -#define EA_RA_PREFERENCE EA_CODE(PROTOCOL_RADV, 0) -#define EA_RA_LIFETIME EA_CODE(PROTOCOL_RADV, 1) - #ifdef LOCAL_DEBUG #define RADV_FORCE_DEBUG 1 #else diff --git a/proto/rip/Makefile b/proto/rip/Makefile index 7feabcd8..b9ff62d6 100644 --- a/proto/rip/Makefile +++ b/proto/rip/Makefile @@ -2,5 +2,6 @@ src := packets.c rip.c obj := $(src-o-files) $(all-daemon) $(cf-local) +$(call proto-build,rip_build) -tests_objs := $(tests_objs) $(src-o-files) \ No newline at end of file +tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/rip/config.Y b/proto/rip/config.Y index 28ee9609..3c0973b1 100644 --- a/proto/rip/config.Y +++ b/proto/rip/config.Y @@ -37,7 +37,7 @@ CF_KEYWORDS(RIP, NG, ECMP, LIMIT, WEIGHT, INFINITY, METRIC, UPDATE, TIMEOUT, PASSIVE, VERSION, SPLIT, HORIZON, POISON, REVERSE, CHECK, ZERO, TIME, BFD, AUTHENTICATION, NONE, PLAINTEXT, CRYPTOGRAPHIC, MD5, TTL, SECURITY, RX, TX, BUFFER, LENGTH, PRIORITY, ONLY, LINK, - DEMAND, CIRCUIT, RIP_METRIC, RIP_TAG) + DEMAND, CIRCUIT) %type <i> rip_variant rip_auth @@ -190,9 +190,6 @@ rip_iface: rip_iface_start iface_patt_list_nopx rip_iface_opt_list rip_iface_finish; -dynamic_attr: RIP_METRIC { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_RIP_METRIC); } ; -dynamic_attr: RIP_TAG { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_RIP_TAG); } ; - CF_CLI_HELP(SHOW RIP, ..., [[Show information about RIP protocol]]); CF_CLI(SHOW RIP INTERFACES, optproto opttext, [<name>] [\"<interface>\"], [[Show information about RIP interfaces]]) diff --git a/proto/rip/rip.c b/proto/rip/rip.c index 0a9844f3..cc8b57eb 100644 --- a/proto/rip/rip.c +++ b/proto/rip/rip.c @@ -78,6 +78,7 @@ #include <stdlib.h> #include "rip.h" +#include "lib/macro.h" static inline void rip_lock_neighbor(struct rip_neighbor *n); @@ -88,6 +89,7 @@ static inline void rip_iface_kick_timer(struct rip_iface *ifa); static void rip_iface_timer(timer *timer); static void rip_trigger_update(struct rip_proto *p); +static struct ea_class ea_rip_metric, ea_rip_tag, ea_rip_from; /* * RIP routes @@ -108,14 +110,14 @@ rip_add_rte(struct rip_proto *p, struct rip_rte **rp, struct rip_rte *src) } static inline void -rip_remove_rte(struct rip_proto *p, struct rip_rte **rp) +rip_remove_rte(struct rip_proto *p UNUSED, struct rip_rte **rp) { struct rip_rte *rt = *rp; rip_unlock_neighbor(rt->from); *rp = rt->next; - sl_free(p->rte_slab, rt); + sl_free(rt); } static inline int rip_same_rte(struct rip_rte *a, struct rip_rte *b) @@ -124,6 +126,11 @@ static inline int rip_same_rte(struct rip_rte *a, struct rip_rte *b) static inline int rip_valid_rte(struct rip_rte *rt) { return rt->from->ifa != NULL; } +struct rip_iface_adata { + struct adata ad; + struct iface *iface; +}; + /** * rip_announce_rte - announce route from RIP routing table to the core * @p: RIP instance @@ -144,68 +151,85 @@ rip_announce_rte(struct rip_proto *p, struct rip_entry *en) if (rt) { /* Update */ - rta a0 = { - .pref = p->p.main_channel->preference, - .source = RTS_RIP, - .scope = SCOPE_UNIVERSE, - .dest = RTD_UNICAST, - }; + rta a0 = {}; + + struct { + ea_list l; + eattr a[3]; + } ea_block = { + .l.count = ARRAY_SIZE(ea_block.a), + .a = { + EA_LITERAL_EMBEDDED(&ea_gen_preference, 0, p->p.main_channel->preference), + EA_LITERAL_EMBEDDED(&ea_gen_source, 0, RTS_RIP), + EA_LITERAL_EMBEDDED(&ea_rip_metric, 0, rt->metric), + }, + }; + a0.eattrs = &ea_block.l; - u8 rt_metric = rt->metric; u16 rt_tag = rt->tag; + struct iface *rt_from = NULL; if (p->ecmp) { /* ECMP route */ - struct nexthop *nhs = NULL; int num = 0; + for (rt = en->routes; rt && (num < p->ecmp); rt = rt->next) + if (rip_valid_rte(rt)) + num++; + + struct nexthop_adata *nhad = (struct nexthop_adata *) tmp_alloc_adata((num+1) * sizeof(struct nexthop)); + struct nexthop *nh = &nhad->nh; + for (rt = en->routes; rt && (num < p->ecmp); rt = rt->next) { if (!rip_valid_rte(rt)) - continue; + continue; - struct nexthop *nh = allocz(sizeof(struct nexthop)); + *nh = (struct nexthop) { + .gw = rt->next_hop, + .iface = rt->from->ifa->iface, + .weight = rt->from->ifa->cf->ecmp_weight, + }; - nh->gw = rt->next_hop; - nh->iface = rt->from->ifa->iface; - nh->weight = rt->from->ifa->cf->ecmp_weight; + if (!rt_from) + rt_from = rt->from->ifa->iface; - nexthop_insert(&nhs, nh); - num++; + nh = NEXTHOP_NEXT(nh); if (rt->tag != rt_tag) rt_tag = 0; } - a0.nh = *nhs; + nhad->ad.length = ((void *) nh - (void *) nhad->ad.data); + + ea_set_attr(&a0.eattrs, + EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, + &(nexthop_sort(nhad, tmp_linpool)->ad))); } else { /* Unipath route */ - a0.from = rt->from->nbr->addr; - a0.nh.gw = rt->next_hop; - a0.nh.iface = rt->from->ifa->iface; + rt_from = rt->from->ifa->iface; + + struct nexthop_adata nhad = { + .nh.gw = rt->next_hop, + .nh.iface = rt->from->ifa->iface, + }; + + ea_set_attr_data(&a0.eattrs, &ea_gen_nexthop, 0, + &nhad.ad.data, sizeof nhad - sizeof nhad.ad); + ea_set_attr_data(&a0.eattrs, &ea_gen_from, 0, &rt->from->nbr->addr, sizeof(ip_addr)); } - a0.eattrs = alloca(sizeof(ea_list) + 3*sizeof(eattr)); - memset(a0.eattrs, 0, sizeof(ea_list)); /* Zero-ing only the ea_list header */ - a0.eattrs->count = 3; - a0.eattrs->attrs[0] = (eattr) { - .id = EA_RIP_METRIC, - .type = EAF_TYPE_INT, - .u.data = rt_metric, - }; - a0.eattrs->attrs[1] = (eattr) { - .id = EA_RIP_TAG, - .type = EAF_TYPE_INT, - .u.data = rt_tag, - }; - a0.eattrs->attrs[2] = (eattr) { - .id = EA_RIP_FROM, - .type = EAF_TYPE_PTR, - .u.data = (uintptr_t) a0.nh.iface, + ea_set_attr_u32(&a0.eattrs, &ea_rip_tag, 0, rt_tag); + + struct rip_iface_adata riad = { + .ad = { .length = sizeof(struct rip_iface_adata) - sizeof(struct adata) }, + .iface = rt_from, }; + ea_set_attr(&a0.eattrs, + EA_LITERAL_DIRECT_ADATA(&ea_rip_from, 0, &riad.ad)); rte e0 = { .attrs = &a0, @@ -320,9 +344,10 @@ rip_rt_notify(struct proto *P, struct channel *ch UNUSED, const net_addr *net, s if (new) { /* Update */ - u32 rt_tag = ea_get_int(new->attrs->eattrs, EA_RIP_TAG, 0); - u32 rt_metric = ea_get_int(new->attrs->eattrs, EA_RIP_METRIC, 1); - struct iface *rt_from = (struct iface *) ea_get_int(new->attrs->eattrs, EA_RIP_FROM, 0); + u32 rt_tag = ea_get_int(new->attrs->eattrs, &ea_rip_tag, 0); + u32 rt_metric = ea_get_int(new->attrs->eattrs, &ea_rip_metric, 1); + const eattr *rie = ea_find(new->attrs->eattrs, &ea_rip_from); + struct iface *rt_from = rie ? ((struct rip_iface_adata *) rie->u.ptr)->iface : NULL; if (rt_metric > p->infinity) { @@ -354,8 +379,14 @@ rip_rt_notify(struct proto *P, struct channel *ch UNUSED, const net_addr *net, s en->metric = rt_metric; en->tag = rt_tag; en->from = (new->src->proto == P) ? rt_from : NULL; - en->iface = new->attrs->nh.iface; - en->next_hop = new->attrs->nh.gw; + + eattr *nhea = ea_find(new->attrs->eattrs, &ea_gen_nexthop); + if (nhea) + { + struct nexthop_adata *nhad = (struct nexthop_adata *) nhea->u.ptr; + en->iface = nhad->nh.iface; + en->next_hop = nhad->nh.gw; + } } else { @@ -1088,16 +1119,16 @@ rip_rte_better(struct rte *new, struct rte *old) ASSERT_DIE(new->src == old->src); struct rip_proto *p = (struct rip_proto *) new->src->proto; - u32 new_metric = ea_get_int(new->attrs->eattrs, EA_RIP_METRIC, p->infinity); - u32 old_metric = ea_get_int(old->attrs->eattrs, EA_RIP_METRIC, p->infinity); + u32 new_metric = ea_get_int(new->attrs->eattrs, &ea_rip_metric, p->infinity); + u32 old_metric = ea_get_int(old->attrs->eattrs, &ea_rip_metric, p->infinity); return new_metric < old_metric; } static u32 -rip_rte_igp_metric(struct rte *rt) +rip_rte_igp_metric(const rte *rt) { - return ea_get_int(rt->attrs->eattrs, EA_RIP_METRIC, IGP_METRIC_UNKNOWN); + return ea_get_int(rt->attrs->eattrs, &ea_rip_metric, IGP_METRIC_UNKNOWN); } static void @@ -1198,33 +1229,38 @@ static void rip_get_route_info(rte *rte, byte *buf) { struct rip_proto *p = (struct rip_proto *) rte->src->proto; - u32 rt_metric = ea_get_int(rte->attrs->eattrs, EA_RIP_METRIC, p->infinity); - u32 rt_tag = ea_get_int(rte->attrs->eattrs, EA_RIP_TAG, 0); + u32 rt_metric = ea_get_int(rte->attrs->eattrs, &ea_rip_metric, p->infinity); + u32 rt_tag = ea_get_int(rte->attrs->eattrs, &ea_rip_tag, 0); - buf += bsprintf(buf, " (%d/%d)", rte->attrs->pref, rt_metric); + buf += bsprintf(buf, " (%d/%d)", rt_get_preference(rte), rt_metric); if (rt_tag) bsprintf(buf, " [%04x]", rt_tag); } -static int -rip_get_attr(const eattr *a, byte *buf, int buflen UNUSED) +static void +rip_tag_format(const eattr *a, byte *buf, uint buflen) { - switch (a->id) - { - case EA_RIP_METRIC: - bsprintf(buf, "metric: %d", a->u.data); - return GA_FULL; - - case EA_RIP_TAG: - bsprintf(buf, "tag: %04x", a->u.data); - return GA_FULL; - - default: - return GA_UNKNOWN; - } + bsnprintf(buf, buflen, "tag: %04x", a->u.data); } +static struct ea_class ea_rip_metric = { + .name = "rip_metric", + .type = T_INT, +}; + +static struct ea_class ea_rip_tag = { + .name = "rip_tag", + .type = T_INT, + .format = rip_tag_format, +}; + +static struct ea_class ea_rip_from = { + .name = "rip_from", + .type = T_IFACE, + .readonly = 1, +}; + void rip_show_interfaces(struct proto *P, const char *iff) { @@ -1327,7 +1363,6 @@ rip_dump(struct proto *P) struct protocol proto_rip = { .name = "RIP", .template = "rip%d", - .class = PROTOCOL_RIP, .preference = DEF_PREF_RIP, .channel_mask = NB_IP, .proto_size = sizeof(struct rip_proto), @@ -1339,5 +1374,16 @@ struct protocol proto_rip = { .shutdown = rip_shutdown, .reconfigure = rip_reconfigure, .get_route_info = rip_get_route_info, - .get_attr = rip_get_attr }; + +void +rip_build(void) +{ + proto_build(&proto_rip); + + EA_REGISTER_ALL( + &ea_rip_metric, + &ea_rip_tag, + &ea_rip_from + ); +} diff --git a/proto/rip/rip.h b/proto/rip/rip.h index f8713c4a..a01f8d3b 100644 --- a/proto/rip/rip.h +++ b/proto/rip/rip.h @@ -16,7 +16,7 @@ #include "nest/cli.h" #include "nest/iface.h" #include "nest/protocol.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/password.h" #include "nest/locks.h" #include "nest/bfd.h" @@ -195,10 +195,6 @@ struct rip_rte #define RIP_ENTRY_VALID 1 /* Valid outgoing route */ #define RIP_ENTRY_STALE 2 /* Stale outgoing route, waiting for GC */ -#define EA_RIP_METRIC EA_CODE(PROTOCOL_RIP, 0) -#define EA_RIP_TAG EA_CODE(PROTOCOL_RIP, 1) -#define EA_RIP_FROM EA_CODE(PROTOCOL_RIP, 2) - static inline int rip_is_v2(struct rip_proto *p) { return p->rip2; } diff --git a/proto/rpki/Makefile b/proto/rpki/Makefile index eb09b7df..8e3a2761 100644 --- a/proto/rpki/Makefile +++ b/proto/rpki/Makefile @@ -2,5 +2,6 @@ src := rpki.c packets.c tcp_transport.c ssh_transport.c transport.c obj := $(src-o-files) $(all-daemon) $(cf-local) +$(call proto-build,rpki_build) -tests_objs := $(tests_objs) $(src-o-files) \ No newline at end of file +tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/rpki/packets.c b/proto/rpki/packets.c index dd11f997..4a52b54b 100644 --- a/proto/rpki/packets.c +++ b/proto/rpki/packets.c @@ -661,9 +661,9 @@ rpki_handle_cache_response_pdu(struct rpki_cache *cache, const struct pdu_cache_ * a refresh cycle. */ if (cache->p->roa4_channel) - rt_refresh_begin(cache->p->roa4_channel->table, cache->p->roa4_channel); + rt_refresh_begin(cache->p->roa4_channel->table, &cache->p->roa4_channel->in_req); if (cache->p->roa6_channel) - rt_refresh_begin(cache->p->roa6_channel->table, cache->p->roa6_channel); + rt_refresh_begin(cache->p->roa6_channel->table, &cache->p->roa6_channel->in_req); cache->p->refresh_channels = 1; } @@ -737,6 +737,33 @@ rpki_handle_prefix_pdu(struct rpki_cache *cache, const struct pdu_header *pdu) net_addr_union addr = {}; rpki_prefix_pdu_2_net_addr(pdu, &addr); + if (type == IPV4_PREFIX) + { + if ((addr.roa4.pxlen > addr.roa4.max_pxlen) || + (addr.roa4.max_pxlen > IP4_MAX_PREFIX_LENGTH)) + { + RPKI_WARN(cache->p, "Received corrupt packet from RPKI cache server: invalid pxlen or max_pxlen"); + byte tmp[pdu->len]; + const struct pdu_header *hton_pdu = rpki_pdu_back_to_network_byte_order((void *) tmp, (const void *) pdu); + rpki_send_error_pdu(cache, CORRUPT_DATA, pdu->len, hton_pdu, "Corrupted PDU: invalid pxlen or max_pxlen"); + rpki_cache_change_state(cache, RPKI_CS_ERROR_FATAL); + return RPKI_ERROR; + } + } + else + { + if ((addr.roa6.pxlen > addr.roa6.max_pxlen) || + (addr.roa6.max_pxlen > IP6_MAX_PREFIX_LENGTH)) + { + RPKI_WARN(cache->p, "Received corrupt packet from RPKI cache server: invalid pxlen or max_pxlen"); + byte tmp[pdu->len]; + const struct pdu_header *hton_pdu = rpki_pdu_back_to_network_byte_order((void *) tmp, (const void *) pdu); + rpki_send_error_pdu(cache, CORRUPT_DATA, pdu->len, hton_pdu, "Corrupted PDU: invalid pxlen or max_pxlen"); + rpki_cache_change_state(cache, RPKI_CS_ERROR_FATAL); + return RPKI_ERROR; + } + } + if (cf->ignore_max_length) { if (type == IPV4_PREFIX) @@ -819,9 +846,9 @@ rpki_handle_end_of_data_pdu(struct rpki_cache *cache, const struct pdu_end_of_da { cache->p->refresh_channels = 0; if (cache->p->roa4_channel) - rt_refresh_end(cache->p->roa4_channel->table, cache->p->roa4_channel); + rt_refresh_end(cache->p->roa4_channel->table, &cache->p->roa4_channel->in_req); if (cache->p->roa6_channel) - rt_refresh_end(cache->p->roa6_channel->table, cache->p->roa6_channel); + rt_refresh_end(cache->p->roa6_channel->table, &cache->p->roa6_channel->in_req); } cache->last_update = current_time(); @@ -897,6 +924,9 @@ rpki_rx_hook(struct birdsock *sk, uint size) struct rpki_cache *cache = sk->data; struct rpki_proto *p = cache->p; + if ((p->p.proto_state == PS_DOWN) || (p->cache != cache)) + return 0; + byte *pkt_start = sk->rbuf; byte *end = pkt_start + size; @@ -953,6 +983,8 @@ rpki_err_hook(struct birdsock *sk, int error_num) CACHE_TRACE(D_EVENTS, cache, "The other side closed a connection"); } + if (cache->p->cache != cache) + return; rpki_cache_change_state(cache, RPKI_CS_ERROR_TRANSPORT); } @@ -972,6 +1004,9 @@ rpki_tx_hook(sock *sk) { struct rpki_cache *cache = sk->data; + if (cache->p->cache != cache) + return; + while (rpki_fire_tx(cache) > 0) ; } @@ -981,6 +1016,9 @@ rpki_connected_hook(sock *sk) { struct rpki_cache *cache = sk->data; + if (cache->p->cache != cache) + return; + CACHE_TRACE(D_EVENTS, cache, "Connected"); proto_notify_state(&cache->p->p, PS_UP); diff --git a/proto/rpki/rpki.c b/proto/rpki/rpki.c index 72fbc967..c8b0ff67 100644 --- a/proto/rpki/rpki.c +++ b/proto/rpki/rpki.c @@ -120,12 +120,10 @@ rpki_table_add_roa(struct rpki_cache *cache, struct channel *channel, const net_ { struct rpki_proto *p = cache->p; - rta a0 = { - .pref = channel->preference, - .source = RTS_RPKI, - .scope = SCOPE_UNIVERSE, - .dest = RTD_NONE, - }; + rta a0 = {}; + + ea_set_attr_u32(&a0.eattrs, &ea_gen_preference, 0, channel->preference); + ea_set_attr_u32(&a0.eattrs, &ea_gen_source, 0, RTS_RPKI); rte e0 = { .attrs = &a0, .src = p->p.main_source, }; @@ -384,6 +382,9 @@ rpki_refresh_hook(timer *tm) { struct rpki_cache *cache = tm->data; + if (cache->p->cache != cache) + return; + CACHE_DBG(cache, "%s", rpki_cache_state_to_str(cache->state)); switch (cache->state) @@ -430,6 +431,9 @@ rpki_retry_hook(timer *tm) { struct rpki_cache *cache = tm->data; + if (cache->p->cache != cache) + return; + CACHE_DBG(cache, "%s", rpki_cache_state_to_str(cache->state)); switch (cache->state) @@ -475,6 +479,9 @@ rpki_expire_hook(timer *tm) { struct rpki_cache *cache = tm->data; + if (cache->p->cache != cache) + return; + if (!cache->last_update) return; @@ -825,16 +832,27 @@ rpki_show_proto_info(struct proto *P) if (cache) { const char *transport_name = "---"; + uint default_port = 0; switch (cf->tr_config.type) { #if HAVE_LIBSSH - case RPKI_TR_SSH: transport_name = "SSHv2"; break; + case RPKI_TR_SSH: + transport_name = "SSHv2"; + default_port = RPKI_SSH_PORT; + break; #endif - case RPKI_TR_TCP: transport_name = "Unprotected over TCP"; break; + case RPKI_TR_TCP: + transport_name = "Unprotected over TCP"; + default_port = RPKI_TCP_PORT; + break; }; cli_msg(-1006, " Cache server: %s", cf->hostname); + + if (cf->port != default_port) + cli_msg(-1006, " Cache port: %u", cf->port); + cli_msg(-1006, " Status: %s", rpki_cache_state_to_str(cache->state)); cli_msg(-1006, " Transport: %s", transport_name); cli_msg(-1006, " Protocol version: %u", cache->version); @@ -932,7 +950,6 @@ rpki_copy_config(struct proto_config *dest UNUSED, struct proto_config *src UNUS struct protocol proto_rpki = { .name = "RPKI", .template = "rpki%d", - .class = PROTOCOL_RPKI, .preference = DEF_PREF_RPKI, .proto_size = sizeof(struct rpki_proto), .config_size = sizeof(struct rpki_config), @@ -946,3 +963,9 @@ struct protocol proto_rpki = { .reconfigure = rpki_reconfigure, .get_status = rpki_get_status, }; + +void +rpki_build(void) +{ + proto_build(&proto_rpki); +} diff --git a/proto/rpki/rpki.h b/proto/rpki/rpki.h index 8a5c38fd..26fbb46e 100644 --- a/proto/rpki/rpki.h +++ b/proto/rpki/rpki.h @@ -13,7 +13,7 @@ #define _BIRD_RPKI_H_ #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "lib/socket.h" #include "lib/ip.h" diff --git a/proto/rpki/transport.c b/proto/rpki/transport.c index a1ac7587..81bd6dd8 100644 --- a/proto/rpki/transport.c +++ b/proto/rpki/transport.c @@ -85,6 +85,7 @@ rpki_tr_open(struct rpki_tr_sock *tr) sk->rbsize = RPKI_RX_BUFFER_SIZE; sk->tbsize = RPKI_TX_BUFFER_SIZE; sk->tos = IP_PREC_INTERNET_CONTROL; + sk->vrf = cache->p->p.vrf; if (ipa_zero(sk->daddr) && sk->host) { diff --git a/proto/static/Makefile b/proto/static/Makefile index e38f9b74..26aed31f 100644 --- a/proto/static/Makefile +++ b/proto/static/Makefile @@ -2,5 +2,6 @@ src := static.c obj := $(src-o-files) $(all-daemon) $(cf-local) +$(call proto-build,static_build) -tests_objs := $(tests_objs) $(src-o-files) \ No newline at end of file +tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/static/static.c b/proto/static/static.c index 6e258f6f..6369fea5 100644 --- a/proto/static/static.c +++ b/proto/static/static.c @@ -38,7 +38,7 @@ #include "nest/bird.h" #include "nest/iface.h" #include "nest/protocol.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/cli.h" #include "conf/conf.h" #include "filter/filter.h" @@ -47,8 +47,6 @@ #include "static.h" -static linpool *static_lp; - static inline struct rte_src * static_get_source(struct static_proto *p, uint i) { return i ? rt_get_source(&p->p, i) : p->p.main_source; } @@ -57,47 +55,60 @@ static_announce_rte(struct static_proto *p, struct static_route *r) { rta *a = allocz(RTA_MAX_SIZE); struct rte_src *src = static_get_source(p, r->index); - a->source = RTS_STATIC; - a->scope = SCOPE_UNIVERSE; - a->dest = r->dest; - a->pref = p->p.main_channel->preference; + ea_set_attr_u32(&a->eattrs, &ea_gen_preference, 0, p->p.main_channel->preference); + ea_set_attr_u32(&a->eattrs, &ea_gen_source, 0, RTS_STATIC); if (r->dest == RTD_UNICAST) { - struct static_route *r2; - struct nexthop *nhs = NULL; + uint sz = 0; + for (struct static_route *r2 = r; r2; r2 = r2->mp_next) + if (r2->active) + sz += NEXTHOP_SIZE_CNT(r2->mls ? r2->mls->length / sizeof(u32) : 0); - for (r2 = r; r2; r2 = r2->mp_next) + if (!sz) + goto withdraw; + + struct nexthop_adata *nhad = allocz(sz + sizeof *nhad); + struct nexthop *nh = &nhad->nh; + + for (struct static_route *r2 = r; r2; r2 = r2->mp_next) { if (!r2->active) continue; - struct nexthop *nh = allocz(NEXTHOP_MAX_SIZE); - nh->gw = r2->via; - nh->iface = r2->neigh->iface; - nh->flags = r2->onlink ? RNF_ONLINK : 0; - nh->weight = r2->weight; + *nh = (struct nexthop) { + .gw = r2->via, + .iface = r2->neigh->iface, + .flags = r2->onlink ? RNF_ONLINK : 0, + .weight = r2->weight, + }; + if (r2->mls) { - nh->labels = r2->mls->len; - memcpy(nh->label, r2->mls->stack, r2->mls->len * sizeof(u32)); + nh->labels = r2->mls->length / sizeof(u32); + memcpy(nh->label, r2->mls->data, r2->mls->length); } - nexthop_insert(&nhs, nh); + nh = NEXTHOP_NEXT(nh); } - if (!nhs) - goto withdraw; - - nexthop_link(a, nhs); + ea_set_attr_data(&a->eattrs, &ea_gen_nexthop, 0, + nhad->ad.data, (void *) nh - (void *) nhad->ad.data); } - if (r->dest == RTDX_RECURSIVE) + else if (r->dest == RTDX_RECURSIVE) { rtable *tab = ipa_is_ip4(r->via) ? p->igp_table_ip4 : p->igp_table_ip6; - rta_set_recursive_next_hop(p->p.main_channel->table, a, tab, r->via, IPA_NONE, r->mls); + u32 *labels = r->mls ? (void *) r->mls->data : NULL; + u32 lnum = r->mls ? r->mls->length / sizeof(u32) : 0; + + ea_set_hostentry(&a->eattrs, p->p.main_channel->table, tab, + r->via, IPA_NONE, lnum, labels); } + else if (r->dest) + ea_set_dest(&a->eattrs, 0, r->dest); + /* Already announced */ if (r->state == SRS_CLEAN) return; @@ -107,14 +118,10 @@ static_announce_rte(struct static_proto *p, struct static_route *r) /* Evaluate the filter */ if (r->cmds) - f_eval_rte(r->cmds, e, static_lp); + f_eval_rte(r->cmds, e); rte_update(p->p.main_channel, r->net, e, src); r->state = SRS_CLEAN; - - if (r->cmds) - lp_flush(static_lp); - return; withdraw: @@ -310,31 +317,17 @@ static_same_dest(struct static_route *x, struct static_route *y) (x->weight != y->weight) || (x->use_bfd != y->use_bfd) || (!x->mls != !y->mls) || - ((x->mls) && (y->mls) && (x->mls->len != y->mls->len))) + ((x->mls) && (y->mls) && adata_same(x->mls, y->mls))) return 0; - - if (!x->mls) - continue; - - for (uint i = 0; i < x->mls->len; i++) - if (x->mls->stack[i] != y->mls->stack[i]) - return 0; } return !x && !y; case RTDX_RECURSIVE: if (!ipa_equal(x->via, y->via) || (!x->mls != !y->mls) || - ((x->mls) && (y->mls) && (x->mls->len != y->mls->len))) + ((x->mls) && (y->mls) && adata_same(x->mls, y->mls))) return 0; - if (!x->mls) - return 1; - - for (uint i = 0; i < x->mls->len; i++) - if (x->mls->stack[i] != y->mls->stack[i]) - return 0; - return 1; default: @@ -403,16 +396,16 @@ static_reload_routes(struct channel *C) static int static_rte_better(rte *new, rte *old) { - u32 n = ea_get_int(new->attrs->eattrs, EA_GEN_IGP_METRIC, IGP_METRIC_UNKNOWN); - u32 o = ea_get_int(old->attrs->eattrs, EA_GEN_IGP_METRIC, IGP_METRIC_UNKNOWN); + u32 n = ea_get_int(new->attrs->eattrs, &ea_gen_igp_metric, IGP_METRIC_UNKNOWN); + u32 o = ea_get_int(old->attrs->eattrs, &ea_gen_igp_metric, IGP_METRIC_UNKNOWN); return n < o; } static int static_rte_mergable(rte *pri, rte *sec) { - u32 a = ea_get_int(pri->attrs->eattrs, EA_GEN_IGP_METRIC, IGP_METRIC_UNKNOWN); - u32 b = ea_get_int(sec->attrs->eattrs, EA_GEN_IGP_METRIC, IGP_METRIC_UNKNOWN); + u32 a = ea_get_int(pri->attrs->eattrs, &ea_gen_igp_metric, IGP_METRIC_UNKNOWN); + u32 b = ea_get_int(sec->attrs->eattrs, &ea_gen_igp_metric, IGP_METRIC_UNKNOWN); return a == b; } @@ -474,9 +467,6 @@ static_start(struct proto *P) struct static_config *cf = (void *) P->cf; struct static_route *r; - if (!static_lp) - static_lp = lp_new(&root_pool, LP_GOOD_SIZE(1024)); - if (p->igp_table_ip4) rt_lock_table(p->igp_table_ip4); @@ -507,19 +497,13 @@ static_shutdown(struct proto *P) WALK_LIST(r, cf->routes) static_reset_rte(p, r); - return PS_DOWN; -} - -static void -static_cleanup(struct proto *P) -{ - struct static_proto *p = (void *) P; - if (p->igp_table_ip4) rt_unlock_table(p->igp_table_ip4); if (p->igp_table_ip6) rt_unlock_table(p->igp_table_ip6); + + return PS_DOWN; } static void @@ -709,11 +693,12 @@ static_copy_config(struct proto_config *dest, struct proto_config *src) static void static_get_route_info(rte *rte, byte *buf) { - eattr *a = ea_find(rte->attrs->eattrs, EA_GEN_IGP_METRIC); + eattr *a = ea_find(rte->attrs->eattrs, &ea_gen_igp_metric); + u32 pref = rt_get_preference(rte); if (a) - buf += bsprintf(buf, " (%d/%u)", rte->attrs->pref, a->u.data); + buf += bsprintf(buf, " (%d/%u)", pref, a->u.data); else - buf += bsprintf(buf, " (%d)", rte->attrs->pref); + buf += bsprintf(buf, " (%d)", pref); } static void @@ -767,7 +752,6 @@ static_show(struct proto *P) struct protocol proto_static = { .name = "Static", .template = "static%d", - .class = PROTOCOL_STATIC, .preference = DEF_PREF_STATIC, .channel_mask = NB_ANY, .proto_size = sizeof(struct static_proto), @@ -777,8 +761,13 @@ struct protocol proto_static = { .dump = static_dump, .start = static_start, .shutdown = static_shutdown, - .cleanup = static_cleanup, .reconfigure = static_reconfigure, .copy_config = static_copy_config, .get_route_info = static_get_route_info, }; + +void +static_build(void) +{ + proto_build(&proto_static); +} diff --git a/proto/static/static.h b/proto/static/static.h index fc91f71c..ea7ca33b 100644 --- a/proto/static/static.h +++ b/proto/static/static.h @@ -9,7 +9,7 @@ #ifndef _BIRD_STATIC_H_ #define _BIRD_STATIC_H_ -#include "nest/route.h" +#include "nest/rt.h" #include "nest/bfd.h" #include "lib/buffer.h" @@ -49,7 +49,7 @@ struct static_route { byte weight; /* Multipath next hop weight */ byte use_bfd; /* Configured to use BFD */ struct bfd_request *bfd_req; /* BFD request, if BFD is used */ - mpls_label_stack *mls; /* MPLS label stack; may be NULL */ + struct adata *mls; /* MPLS label stack; may be NULL */ }; /* diff --git a/sysdep/bsd/krt-sock.c b/sysdep/bsd/krt-sock.c index 6f788ac2..1c1bd50c 100644 --- a/sysdep/bsd/krt-sock.c +++ b/sysdep/bsd/krt-sock.c @@ -25,7 +25,7 @@ #include "nest/bird.h" #include "nest/iface.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "nest/iface.h" #include "sysdep/unix/unix.h" @@ -366,6 +366,30 @@ krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old) } } +/** + * krt_assume_onlink - check if routes on interface are considered onlink + * @iface: The interface of the next hop + * @ipv6: Switch to only consider IPv6 or IPv4 addresses. + * + * The BSD kernel does not support an onlink flag. If the interface has only + * host addresses configured, all routes should be considered as onlink and + * the function returns 1. + */ +static int +krt_assume_onlink(struct iface *iface, int ipv6) +{ + const u8 type = ipv6 ? NET_IP6 : NET_IP4; + + struct ifa *ifa; + WALK_LIST(ifa, iface->addrs) + { + if ((ifa->prefix.type == type) && !(ifa->flags & IA_HOST)) + return 0; + } + + return 1; +} + #define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0) static void @@ -494,10 +518,10 @@ krt_read_route(struct ks_msg *msg, struct krt_proto *p, int scan) net = net_get(p->p.main_channel->table, &ndst); rta a = { - .source = RTS_INHERIT, - .scope = SCOPE_UNIVERSE, }; + ea_set_attr_u32(&a->eattrs, &ea_gen_source, 0, RTS_INHERIT); + /* reject/blackhole routes have also set RTF_GATEWAY, we wil check them first. */ @@ -526,15 +550,21 @@ krt_read_route(struct ks_msg *msg, struct krt_proto *p, int scan) a.dest = RTD_UNICAST; if (flags & RTF_GATEWAY) { - neighbor *ng; a.nh.gw = igate; /* Clean up embedded interface ID returned in link-local address */ if (ipa_is_link_local(a.nh.gw)) _I0(a.nh.gw) = 0xfe800000; - ng = neigh_find(&p->p, a.nh.gw, a.nh.iface, 0); - if (!ng || (ng->scope == SCOPE_HOST)) + /* The BSD kernel does not support an onlink flag. We heuristically + set the onlink flag, if the iface has only host addresses. */ + if (krt_assume_onlink(a.nh.iface, ipv6)) + a.nh.flags |= RNF_ONLINK; + + neighbor *nbr; + nbr = neigh_find(&p->p, a.nh.gw, a.nh.iface, + (a.nh.flags & RNF_ONLINK) ? NEF_ONLINK : 0); + if (!nbr || (nbr->scope == SCOPE_HOST)) { /* Ignore routes with next-hop 127.0.0.1, host routes with such next-hop appear on OpenBSD for address aliases. */ @@ -550,15 +580,8 @@ krt_read_route(struct ks_msg *msg, struct krt_proto *p, int scan) done:; rte e0 = { .attrs = &a, .net = net, }; - ea_list *ea = alloca(sizeof(ea_list) + 1 * sizeof(eattr)); - *ea = (ea_list) { .count = 1, .next = e0.attrs->eattrs }; - e0.attrs->eattrs = ea; - - ea->attrs[0] = (eattr) { - .id = EA_KRT_SOURCE, - .type = EAF_TYPE_INT, - .u.data = src2, - }; + ea_set_attr(e0.attrs->eattrs, + EA_LITERAL_EMBEDDED(EA_KRT_SOURCE, T_INT, 0, src2)); if (scan) krt_got_route(p, &e0, src); diff --git a/sysdep/bsd/sysio.h b/sysdep/bsd/sysio.h index c757960a..f1887fb4 100644 --- a/sysdep/bsd/sysio.h +++ b/sysdep/bsd/sysio.h @@ -271,3 +271,9 @@ sk_set_priority(sock *s, int prio UNUSED) { ERR_MSG("Socket priority not supported"); } + +static inline int +sk_set_freebind(sock *s) +{ + ERR_MSG("Freebind is not supported"); +} diff --git a/sysdep/config.h b/sysdep/config.h index 55be90f0..b0531844 100644 --- a/sysdep/config.h +++ b/sysdep/config.h @@ -13,7 +13,7 @@ #ifdef GIT_LABEL #define BIRD_VERSION XSTR1(GIT_LABEL) #else -#define BIRD_VERSION "2.0.8" +#define BIRD_VERSION "2.0.9" #endif /* Include parameters determined by configure script */ diff --git a/sysdep/linux/krt-sys.h b/sysdep/linux/krt-sys.h index a8af4c95..aa90f6e4 100644 --- a/sysdep/linux/krt-sys.h +++ b/sysdep/linux/krt-sys.h @@ -34,41 +34,10 @@ static inline struct ifa * kif_get_primary_ip(struct iface *i UNUSED) { return N #define KRT_ALLOW_MERGE_PATHS 1 -#define EA_KRT_PREFSRC EA_CODE(PROTOCOL_KERNEL, 0x10) -#define EA_KRT_REALM EA_CODE(PROTOCOL_KERNEL, 0x11) -#define EA_KRT_SCOPE EA_CODE(PROTOCOL_KERNEL, 0x12) - - -#define KRT_METRICS_MAX 0x10 /* RTAX_QUICKACK+1 */ -#define KRT_METRICS_OFFSET 0x20 /* Offset of EA_KRT_* vs RTAX_* */ - -#define KRT_FEATURES_MAX 4 - -/* - * Following attributes are parts of RTA_METRICS kernel route attribute, their - * ids must be consistent with their RTAX_* constants (+ KRT_METRICS_OFFSET) - */ -#define EA_KRT_METRICS EA_CODE(PROTOCOL_KERNEL, 0x20) /* Dummy one */ -#define EA_KRT_LOCK EA_CODE(PROTOCOL_KERNEL, 0x21) -#define EA_KRT_MTU EA_CODE(PROTOCOL_KERNEL, 0x22) -#define EA_KRT_WINDOW EA_CODE(PROTOCOL_KERNEL, 0x23) -#define EA_KRT_RTT EA_CODE(PROTOCOL_KERNEL, 0x24) -#define EA_KRT_RTTVAR EA_CODE(PROTOCOL_KERNEL, 0x25) -#define EA_KRT_SSTRESH EA_CODE(PROTOCOL_KERNEL, 0x26) -#define EA_KRT_CWND EA_CODE(PROTOCOL_KERNEL, 0x27) -#define EA_KRT_ADVMSS EA_CODE(PROTOCOL_KERNEL, 0x28) -#define EA_KRT_REORDERING EA_CODE(PROTOCOL_KERNEL, 0x29) -#define EA_KRT_HOPLIMIT EA_CODE(PROTOCOL_KERNEL, 0x2a) -#define EA_KRT_INITCWND EA_CODE(PROTOCOL_KERNEL, 0x2b) -#define EA_KRT_FEATURES EA_CODE(PROTOCOL_KERNEL, 0x2c) -#define EA_KRT_RTO_MIN EA_CODE(PROTOCOL_KERNEL, 0x2d) -#define EA_KRT_INITRWND EA_CODE(PROTOCOL_KERNEL, 0x2e) -#define EA_KRT_QUICKACK EA_CODE(PROTOCOL_KERNEL, 0x2f) - - struct krt_params { u32 table_id; /* Kernel table ID we sync with */ u32 metric; /* Kernel metric used for all routes */ + uint netlink_rx_buffer; /* Rx buffer size for the netlink socket */ }; struct krt_state { diff --git a/sysdep/linux/netlink.Y b/sysdep/linux/netlink.Y index 7097f577..7ba8c7c9 100644 --- a/sysdep/linux/netlink.Y +++ b/sysdep/linux/netlink.Y @@ -10,9 +10,7 @@ CF_HDR CF_DECLS -CF_KEYWORDS(KERNEL, TABLE, METRIC, KRT_PREFSRC, KRT_REALM, KRT_SCOPE, KRT_MTU, KRT_WINDOW, - KRT_RTT, KRT_RTTVAR, KRT_SSTRESH, KRT_CWND, KRT_ADVMSS, KRT_REORDERING, - KRT_HOPLIMIT, KRT_INITCWND, KRT_RTO_MIN, KRT_INITRWND, KRT_QUICKACK, +CF_KEYWORDS(KERNEL, TABLE, METRIC, NETLINK, RX, BUFFER, KRT_LOCK_MTU, KRT_LOCK_WINDOW, KRT_LOCK_RTT, KRT_LOCK_RTTVAR, KRT_LOCK_SSTRESH, KRT_LOCK_CWND, KRT_LOCK_ADVMSS, KRT_LOCK_REORDERING, KRT_LOCK_HOPLIMIT, KRT_LOCK_RTO_MIN, KRT_FEATURE_ECN, KRT_FEATURE_ALLFRAG) @@ -24,41 +22,25 @@ kern_proto: kern_proto kern_sys_item ';' ; kern_sys_item: KERNEL TABLE expr { THIS_KRT->sys.table_id = $3; } | METRIC expr { THIS_KRT->sys.metric = $2; } + | NETLINK RX BUFFER expr { THIS_KRT->sys.netlink_rx_buffer = $4; } ; -dynamic_attr: KRT_PREFSRC { $$ = f_new_dynamic_attr(EAF_TYPE_IP_ADDRESS, T_IP, EA_KRT_PREFSRC); } ; -dynamic_attr: KRT_REALM { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_REALM); } ; -dynamic_attr: KRT_SCOPE { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_SCOPE); } ; - -dynamic_attr: KRT_MTU { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_MTU); } ; -dynamic_attr: KRT_WINDOW { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_WINDOW); } ; -dynamic_attr: KRT_RTT { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_RTT); } ; -dynamic_attr: KRT_RTTVAR { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_RTTVAR); } ; -dynamic_attr: KRT_SSTRESH { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_SSTRESH); } ; -dynamic_attr: KRT_CWND { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_CWND); } ; -dynamic_attr: KRT_ADVMSS { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_ADVMSS); } ; -dynamic_attr: KRT_REORDERING { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_REORDERING); } ; -dynamic_attr: KRT_HOPLIMIT { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_HOPLIMIT); } ; -dynamic_attr: KRT_INITCWND { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_INITCWND); } ; -dynamic_attr: KRT_RTO_MIN { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_RTO_MIN); } ; -dynamic_attr: KRT_INITRWND { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_INITRWND); } ; -dynamic_attr: KRT_QUICKACK { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_QUICKACK); } ; - /* Bits of EA_KRT_LOCK, based on RTAX_* constants */ -dynamic_attr: KRT_LOCK_MTU { $$ = f_new_dynamic_attr_bit(2, T_BOOL, EA_KRT_LOCK); } ; -dynamic_attr: KRT_LOCK_WINDOW { $$ = f_new_dynamic_attr_bit(3, T_BOOL, EA_KRT_LOCK); } ; -dynamic_attr: KRT_LOCK_RTT { $$ = f_new_dynamic_attr_bit(4, T_BOOL, EA_KRT_LOCK); } ; -dynamic_attr: KRT_LOCK_RTTVAR { $$ = f_new_dynamic_attr_bit(5, T_BOOL, EA_KRT_LOCK); } ; -dynamic_attr: KRT_LOCK_SSTRESH { $$ = f_new_dynamic_attr_bit(6, T_BOOL, EA_KRT_LOCK); } ; -dynamic_attr: KRT_LOCK_CWND { $$ = f_new_dynamic_attr_bit(7, T_BOOL, EA_KRT_LOCK); } ; -dynamic_attr: KRT_LOCK_ADVMSS { $$ = f_new_dynamic_attr_bit(8, T_BOOL, EA_KRT_LOCK); } ; -dynamic_attr: KRT_LOCK_REORDERING { $$ = f_new_dynamic_attr_bit(9, T_BOOL, EA_KRT_LOCK); } ; -dynamic_attr: KRT_LOCK_HOPLIMIT { $$ = f_new_dynamic_attr_bit(10, T_BOOL, EA_KRT_LOCK); } ; -dynamic_attr: KRT_LOCK_RTO_MIN { $$ = f_new_dynamic_attr_bit(13, T_BOOL, EA_KRT_LOCK); } ; +attr_bit: KRT_LOCK_MTU { $$ = f_new_dynamic_attr_bit(2, "krt_lock"); } ; +attr_bit: KRT_LOCK_WINDOW { $$ = f_new_dynamic_attr_bit(3, "krt_lock"); } ; +attr_bit: KRT_LOCK_RTT { $$ = f_new_dynamic_attr_bit(4, "krt_lock"); } ; +attr_bit: KRT_LOCK_RTTVAR { $$ = f_new_dynamic_attr_bit(5, "krt_lock"); } ; +attr_bit: KRT_LOCK_SSTRESH { $$ = f_new_dynamic_attr_bit(6, "krt_lock"); } ; +attr_bit: KRT_LOCK_CWND { $$ = f_new_dynamic_attr_bit(7, "krt_lock"); } ; +attr_bit: KRT_LOCK_ADVMSS { $$ = f_new_dynamic_attr_bit(8, "krt_lock"); } ; +attr_bit: KRT_LOCK_REORDERING { $$ = f_new_dynamic_attr_bit(9, "krt_lock"); } ; +attr_bit: KRT_LOCK_HOPLIMIT { $$ = f_new_dynamic_attr_bit(10, "krt_lock"); } ; +attr_bit: KRT_LOCK_RTO_MIN { $$ = f_new_dynamic_attr_bit(13, "krt_lock"); } ; -dynamic_attr: KRT_FEATURE_ECN { $$ = f_new_dynamic_attr_bit(0, T_BOOL, EA_KRT_FEATURES); } ; -dynamic_attr: KRT_FEATURE_ALLFRAG { $$ = f_new_dynamic_attr(3, T_BOOL, EA_KRT_FEATURES); } ; +/* Bits of EA_KRT_FEATURES */ +attr_bit: KRT_FEATURE_ECN { $$ = f_new_dynamic_attr_bit(0, "krt_features"); } ; +attr_bit: KRT_FEATURE_ALLFRAG { $$ = f_new_dynamic_attr_bit(3, "krt_features"); } ; CF_CODE diff --git a/sysdep/linux/netlink.c b/sysdep/linux/netlink.c index bff2d579..a4172a6d 100644 --- a/sysdep/linux/netlink.c +++ b/sysdep/linux/netlink.c @@ -17,7 +17,7 @@ #undef LOCAL_DEBUG #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "nest/iface.h" #include "lib/alloca.h" @@ -26,6 +26,7 @@ #include "lib/socket.h" #include "lib/string.h" #include "lib/hash.h" +#include "lib/macro.h" #include "conf/conf.h" #include <asm/types.h> @@ -69,6 +70,10 @@ #define RTA_ENCAP 22 #endif +#ifndef NETLINK_GET_STRICT_CHK +#define NETLINK_GET_STRICT_CHK 12 +#endif + #define krt_ipv4(p) ((p)->af == AF_INET) #define krt_ecmp6(p) ((p)->af == AF_INET6) @@ -117,6 +122,101 @@ struct nl_parse_state u32 rta_flow; /* Used during parsing */ }; +/* + * Netlink eattr definitions + */ + +#define KRT_METRICS_MAX ARRAY_SIZE(ea_krt_metrics) +#define KRT_FEATURES_MAX 4 + +static void krt_bitfield_format(const eattr *e, byte *buf, uint buflen); + +static struct ea_class + ea_krt_prefsrc = { + .name = "krt_prefsrc", + .type = T_IP, + }, + ea_krt_realm = { + .name = "krt_realm", + .type = T_INT, + }, + ea_krt_scope = { + .name = "krt_scope", + .type = T_INT, + }; + +static struct ea_class ea_krt_metrics[] = { + [RTAX_LOCK] = { + .name = "krt_lock", + .type = T_INT, + .format = krt_bitfield_format, + }, + [RTAX_FEATURES] = { + .name = "krt_features", + .type = T_INT, + .format = krt_bitfield_format, + }, +#define KRT_METRIC_INT(_rtax, _name) [_rtax] = { .name = _name, .type = T_INT } + KRT_METRIC_INT(RTAX_MTU, "krt_mtu"), + KRT_METRIC_INT(RTAX_WINDOW, "krt_window"), + KRT_METRIC_INT(RTAX_RTT, "krt_rtt"), + KRT_METRIC_INT(RTAX_RTTVAR, "krt_rttvar"), + KRT_METRIC_INT(RTAX_SSTHRESH, "krt_sstresh"), + KRT_METRIC_INT(RTAX_CWND, "krt_cwnd"), + KRT_METRIC_INT(RTAX_ADVMSS, "krt_advmss"), + KRT_METRIC_INT(RTAX_REORDERING, "krt_reordering"), + KRT_METRIC_INT(RTAX_HOPLIMIT, "krt_hoplimit"), + KRT_METRIC_INT(RTAX_INITCWND, "krt_initcwnd"), + KRT_METRIC_INT(RTAX_RTO_MIN, "krt_rto_min"), + KRT_METRIC_INT(RTAX_INITRWND, "krt_initrwnd"), + KRT_METRIC_INT(RTAX_QUICKACK, "krt_quickack"), +#undef KRT_METRIC_INT +}; + +static const char *krt_metrics_names[KRT_METRICS_MAX] = { + NULL, "lock", "mtu", "window", "rtt", "rttvar", "sstresh", "cwnd", "advmss", + "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd", "quickack" +}; + +static const char *krt_features_names[KRT_FEATURES_MAX] = { + "ecn", NULL, NULL, "allfrag" +}; + +static void +krt_bitfield_format(const eattr *a, byte *buf, uint buflen) +{ + if (a->id == ea_krt_metrics[RTAX_LOCK].id) + ea_format_bitfield(a, buf, buflen, krt_metrics_names, 2, KRT_METRICS_MAX); + else if (a->id == ea_krt_metrics[RTAX_FEATURES].id) + ea_format_bitfield(a, buf, buflen, krt_features_names, 0, KRT_FEATURES_MAX); +} + +static void +nl_ea_register(void) +{ + EA_REGISTER_ALL( + &ea_krt_prefsrc, + &ea_krt_realm, + &ea_krt_scope + ); + + for (uint i = 0; i < KRT_METRICS_MAX; i++) + { + if (!ea_krt_metrics[i].name) + ea_krt_metrics[i] = (struct ea_class) { + .name = mb_sprintf(&root_pool, "krt_metric_%d", i), + .type = T_INT, + }; + + ea_register_init(&ea_krt_metrics[i]); + } + + for (uint i = 1; i < KRT_METRICS_MAX; i++) + ASSERT_DIE(ea_krt_metrics[i].id == ea_krt_metrics[0].id + i); +} + + + /* * Synchronous Netlink interface */ @@ -130,7 +230,7 @@ struct nl_sock uint last_size; }; -#define NL_RX_SIZE 8192 +#define NL_RX_SIZE 32768 #define NL_OP_DELETE 0 #define NL_OP_ADD (NLM_F_CREATE|NLM_F_EXCL) @@ -157,11 +257,47 @@ nl_open_sock(struct nl_sock *nl) } } +static void +nl_set_strict_dump(struct nl_sock *nl UNUSED, int strict UNUSED) +{ + /* + * Strict checking is not necessary, it improves behavior on newer kernels. + * If it is not available (missing SOL_NETLINK compile-time, or ENOPROTOOPT + * run-time), we can just ignore it. + */ +#ifdef SOL_NETLINK + setsockopt(nl->fd, SOL_NETLINK, NETLINK_GET_STRICT_CHK, &strict, sizeof(strict)); +#endif +} + +static void +nl_set_rcvbuf(int fd, uint val) +{ + if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val)) < 0) + log(L_WARN "KRT: Cannot set netlink rx buffer size to %u: %m", val); +} + +static uint +nl_cfg_rx_buffer_size(struct config *cfg) +{ + uint bufsize = 0; + + struct proto_config *pc; + WALK_LIST(pc, cfg->protos) + if ((pc->protocol == &proto_unix_kernel) && !pc->disabled) + bufsize = MAX(bufsize, ((struct krt_config *) pc)->sys.netlink_rx_buffer); + + return bufsize; +} + + static void nl_open(void) { nl_open_sock(&nl_scan); nl_open_sock(&nl_req); + + nl_set_strict_dump(&nl_scan, 1); } static void @@ -180,20 +316,60 @@ nl_send(struct nl_sock *nl, struct nlmsghdr *nh) } static void -nl_request_dump(int af, int cmd) +nl_request_dump_link(void) { struct { struct nlmsghdr nh; - struct rtgenmsg g; + struct ifinfomsg ifi; } req = { - .nh.nlmsg_type = cmd, - .nh.nlmsg_len = sizeof(req), + .nh.nlmsg_type = RTM_GETLINK, + .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP, - .g.rtgen_family = af + .nh.nlmsg_seq = ++(nl_scan.seq), + .ifi.ifi_family = AF_UNSPEC, }; - nl_send(&nl_scan, &req.nh); + + send(nl_scan.fd, &req, sizeof(req), 0); + nl_scan.last_hdr = NULL; } +static void +nl_request_dump_addr(int af) +{ + struct { + struct nlmsghdr nh; + struct ifaddrmsg ifa; + } req = { + .nh.nlmsg_type = RTM_GETADDR, + .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg)), + .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP, + .nh.nlmsg_seq = ++(nl_scan.seq), + .ifa.ifa_family = af, + }; + + send(nl_scan.fd, &req, sizeof(req), 0); + nl_scan.last_hdr = NULL; +} + +static void +nl_request_dump_route(int af) +{ + struct { + struct nlmsghdr nh; + struct rtmsg rtm; + } req = { + .nh.nlmsg_type = RTM_GETROUTE, + .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)), + .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP, + .nh.nlmsg_seq = ++(nl_scan.seq), + .rtm.rtm_family = af, + }; + + send(nl_scan.fd, &req, sizeof(req), 0); + nl_scan.last_hdr = NULL; +} + + static struct nlmsghdr * nl_get_reply(struct nl_sock *nl) { @@ -651,12 +827,12 @@ nl_add_nexthop(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af UNUS } static void -nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af, ea_list *eattrs) +nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop_adata *nhad, int af, ea_list *eattrs) { struct rtattr *a = nl_open_attr(h, bufsize, RTA_MULTIPATH); - eattr *flow = ea_find(eattrs, EA_KRT_REALM); + eattr *flow = ea_find(eattrs, &ea_krt_realm); - for (; nh; nh = nh->next) + NEXTHOP_WALK(nh, nhad) { struct rtnexthop *rtnh = nl_open_nexthop(h, bufsize); @@ -680,33 +856,49 @@ nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af, e nl_close_attr(h, a); } -static struct nexthop * -nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, struct rtattr *ra, int af) +static struct nexthop_adata * +nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, const net_addr *n, struct rtattr *ra, int af, int krt_src) { struct rtattr *a[BIRD_RTA_MAX]; - struct rtnexthop *nh = RTA_DATA(ra); - struct nexthop *rv, *first, **last; - unsigned len = RTA_PAYLOAD(ra); + struct rtnexthop *nh, *orig_nh = RTA_DATA(ra); + unsigned len, orig_len = RTA_PAYLOAD(ra); + uint cnt = 0; - first = NULL; - last = &first; - - while (len) + /* First count the nexthops */ + for (len = orig_len, nh = orig_nh; len; len -= NLMSG_ALIGN(nh->rtnh_len), nh = RTNH_NEXT(nh)) { /* Use RTNH_OK(nh,len) ?? */ if ((len < sizeof(*nh)) || (len < nh->rtnh_len)) - return NULL; + goto err; - if (nh->rtnh_flags & RTNH_F_DEAD) - goto next; + if ((nh->rtnh_flags & RTNH_F_DEAD) && (krt_src != KRT_SRC_BIRD)) + ; + else + cnt++; + } - *last = rv = lp_allocz(s->pool, NEXTHOP_MAX_SIZE); - last = &(rv->next); + struct nexthop_adata *nhad = lp_allocz(s->pool, cnt * NEXTHOP_MAX_SIZE + sizeof *nhad); + struct nexthop *rv = &nhad->nh; + + for (len = orig_len, nh = orig_nh; len; len -= NLMSG_ALIGN(nh->rtnh_len), nh = RTNH_NEXT(nh)) + { + /* Use RTNH_OK(nh,len) ?? */ + if ((len < sizeof(*nh)) || (len < nh->rtnh_len)) + goto err; + + if ((nh->rtnh_flags & RTNH_F_DEAD) && (krt_src != KRT_SRC_BIRD)) + continue; + + *rv = (struct nexthop) { + .weight = nh->rtnh_hops, + .iface = if_find_by_index(nh->rtnh_ifindex), + }; - rv->weight = nh->rtnh_hops; - rv->iface = if_find_by_index(nh->rtnh_ifindex); if (!rv->iface) - return NULL; + { + log(L_ERR "KRT: Received route %N with unknown ifindex %u", n, nh->rtnh_ifindex); + return NULL; + } /* Nonexistent RTNH_PAYLOAD ?? */ nl_attr_len = nh->rtnh_len - RTNH_LENGTH(0); @@ -714,18 +906,18 @@ nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, struct rtattr { case AF_INET: if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want4, a, sizeof(a))) - return NULL; + goto err; break; case AF_INET6: if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want6, a, sizeof(a))) - return NULL; + goto err; break; #ifdef HAVE_MPLS_KERNEL case AF_MPLS: if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want_mpls, a, sizeof(a))) - return NULL; + goto err; if (a[RTA_NEWDST]) rv->labels = rta_get_mpls(a[RTA_NEWDST], rv->label); @@ -734,7 +926,7 @@ nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, struct rtattr #endif default: - return NULL; + goto err; } if (a[RTA_GATEWAY]) @@ -757,14 +949,19 @@ nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, struct rtattr nbr = neigh_find(&p->p, rv->gw, rv->iface, (rv->flags & RNF_ONLINK) ? NEF_ONLINK : 0); if (!nbr || (nbr->scope == SCOPE_HOST)) - return NULL; + { + log(L_ERR "KRT: Received route %N with strange next-hop %I", n, rv->gw); + return NULL; + } } #ifdef HAVE_MPLS_KERNEL if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE]) { - if (rta_get_u16(a[RTA_ENCAP_TYPE]) != LWTUNNEL_ENCAP_MPLS) { - log(L_WARN "KRT: Unknown encapsulation method %d in multipath", rta_get_u16(a[RTA_ENCAP_TYPE])); + if (rta_get_u16(a[RTA_ENCAP_TYPE]) != LWTUNNEL_ENCAP_MPLS) + { + log(L_WARN "KRT: Received route %N with unknown encapsulation method %d", + n, rta_get_u16(a[RTA_ENCAP_TYPE])); return NULL; } @@ -775,16 +972,18 @@ nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, struct rtattr } #endif - next: - len -= NLMSG_ALIGN(nh->rtnh_len); - nh = RTNH_NEXT(nh); + rv = NEXTHOP_NEXT(rv); } - /* Ensure nexthops are sorted to satisfy nest invariant */ - if (!nexthop_is_sorted(first)) - first = nexthop_sort(first); + /* Store final length */ + nhad->ad.length = (void *) rv - (void *) nhad->ad.data; - return first; + /* Ensure nexthops are sorted to satisfy nest invariant */ + return nexthop_is_sorted(nhad) ? nhad : nexthop_sort(nhad, s->pool); + +err: + log(L_ERR "KRT: Received strange multipath route %N", n); + return NULL; } static void @@ -1139,7 +1338,7 @@ kif_do_scan(struct kif_proto *p UNUSED) if_start_update(); - nl_request_dump(AF_UNSPEC, RTM_GETLINK); + nl_request_dump_link(); while (h = nl_get_scan()) if (h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK) nl_parse_link(h, 1); @@ -1166,14 +1365,14 @@ kif_do_scan(struct kif_proto *p UNUSED) } } - nl_request_dump(AF_INET, RTM_GETADDR); + nl_request_dump_addr(AF_INET); while (h = nl_get_scan()) if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR) nl_parse_addr(h, 1); else log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type); - nl_request_dump(AF_INET6, RTM_GETADDR); + nl_request_dump_addr(AF_INET6); while (h = nl_get_scan()) if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR) nl_parse_addr(h, 1); @@ -1208,11 +1407,16 @@ HASH_DEFINE_REHASH_FN(RTH, struct krt_proto) int krt_capable(rte *e) { - rta *a = e->attrs; + eattr *ea = ea_find(e->attrs->eattrs, &ea_gen_nexthop); + if (!ea) + return 0; - switch (a->dest) + struct nexthop_adata *nhad = (void *) ea->u.ptr; + if (NEXTHOP_IS_REACHABLE(nhad)) + return 1; + + switch (nhad->dest) { - case RTD_UNICAST: case RTD_BLACKHOLE: case RTD_UNREACHABLE: case RTD_PROHIBIT: @@ -1224,21 +1428,22 @@ krt_capable(rte *e) } static inline int -nh_bufsize(struct nexthop *nh) +nh_bufsize(struct nexthop_adata *nhad) { int rv = 0; - for (; nh != NULL; nh = nh->next) + NEXTHOP_WALK(nh, nhad) rv += RTNH_LENGTH(RTA_LENGTH(sizeof(ip_addr))); return rv; } static int -nl_send_route(struct krt_proto *p, const rte *e, int op, int dest, struct nexthop *nh) +nl_send_route(struct krt_proto *p, const rte *e, int op, int dest, struct nexthop_adata *nh) { eattr *ea; rta *a = e->attrs; ea_list *eattrs = a->eattrs; - int bufsize = 128 + KRT_METRICS_MAX*8 + nh_bufsize(&(a->nh)); + + int bufsize = 128 + KRT_METRICS_MAX*8 + (nh ? nh_bufsize(nh) : 0); u32 priority = 0; struct { @@ -1306,7 +1511,7 @@ nl_send_route(struct krt_proto *p, const rte *e, int op, int dest, struct nextho priority = 0; else if (KRT_CF->sys.metric) priority = KRT_CF->sys.metric; - else if ((op != NL_OP_DELETE) && (ea = ea_find(eattrs, EA_KRT_METRIC))) + else if ((op != NL_OP_DELETE) && (ea = ea_find(eattrs, &ea_krt_metric))) priority = ea->u.data; if (priority) @@ -1319,15 +1524,15 @@ nl_send_route(struct krt_proto *p, const rte *e, int op, int dest, struct nextho /* Default scope is LINK for device routes, UNIVERSE otherwise */ if (p->af == AF_MPLS) r->r.rtm_scope = RT_SCOPE_UNIVERSE; - else if (ea = ea_find(eattrs, EA_KRT_SCOPE)) + else if (ea = ea_find(eattrs, &ea_krt_scope)) r->r.rtm_scope = ea->u.data; else - r->r.rtm_scope = (dest == RTD_UNICAST && ipa_zero(nh->gw)) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE; + r->r.rtm_scope = (dest == RTD_UNICAST && ipa_zero(nh->nh.gw)) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE; - if (ea = ea_find(eattrs, EA_KRT_PREFSRC)) + if (ea = ea_find(eattrs, &ea_krt_prefsrc)) nl_add_attr_ipa(&r->h, rsize, RTA_PREFSRC, *(ip_addr *)ea->u.ptr->data); - if (ea = ea_find(eattrs, EA_KRT_REALM)) + if (ea = ea_find(eattrs, &ea_krt_realm)) nl_add_attr_u32(&r->h, rsize, RTA_FLOW, ea->u.data); @@ -1335,9 +1540,9 @@ nl_send_route(struct krt_proto *p, const rte *e, int op, int dest, struct nextho metrics[0] = 0; struct ea_walk_state ews = { .eattrs = eattrs }; - while (ea = ea_walk(&ews, EA_KRT_METRICS, KRT_METRICS_MAX)) + while (ea = ea_walk(&ews, ea_krt_metrics[0].id, KRT_METRICS_MAX)) { - int id = ea->id - EA_KRT_METRICS; + int id = ea->id - ea_krt_metrics[0].id; metrics[0] |= 1 << id; metrics[id] = ea->u.data; } @@ -1351,14 +1556,14 @@ dest: { case RTD_UNICAST: r->r.rtm_type = RTN_UNICAST; - if (nh->next && !krt_ecmp6(p)) + if (!NEXTHOP_ONE(nh) && !krt_ecmp6(p)) nl_add_multipath(&r->h, rsize, nh, p->af, eattrs); else { - nl_add_attr_u32(&r->h, rsize, RTA_OIF, nh->iface->index); - nl_add_nexthop(&r->h, rsize, nh, p->af); + nl_add_attr_u32(&r->h, rsize, RTA_OIF, nh->nh.iface->index); + nl_add_nexthop(&r->h, rsize, &nh->nh, p->af); - if (nh->flags & RNF_ONLINK) + if (nh->nh.flags & RNF_ONLINK) r->r.rtm_flags |= RTNH_F_ONLINK; } break; @@ -1387,21 +1592,36 @@ nl_add_rte(struct krt_proto *p, rte *e) rta *a = e->attrs; int err = 0; - if (krt_ecmp6(p) && a->nh.next) + eattr *nhea = ea_find(a->eattrs, &ea_gen_nexthop); + struct nexthop_adata *nhad = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL; + + if (krt_ecmp6(p) && nhad && NEXTHOP_IS_REACHABLE(nhad) && !NEXTHOP_ONE(nhad)) { - struct nexthop *nh = &(a->nh); + uint cnt = 0; + NEXTHOP_WALK(nh, nhad) + { + struct { + struct nexthop_adata nhad; + u32 labels[MPLS_MAX_LABEL_STACK]; + } nhx; + memcpy(&nhx.nhad.nh, nh, NEXTHOP_SIZE(nh)); + nhx.nhad.ad.length = (void *) NEXTHOP_NEXT(&nhx.nhad.nh) - (void *) nhx.nhad.ad.data; - err = nl_send_route(p, e, NL_OP_ADD, RTD_UNICAST, nh); - if (err < 0) - return err; - - for (nh = nh->next; nh; nh = nh->next) - err += nl_send_route(p, e, NL_OP_APPEND, RTD_UNICAST, nh); + if (!cnt++) + { + err = nl_send_route(p, e, NL_OP_ADD, RTD_UNICAST, &nhx.nhad); + if (err < 0) + return err; + } + else + err += nl_send_route(p, e, NL_OP_APPEND, RTD_UNICAST, &nhx.nhad); + } return err; } - return nl_send_route(p, e, NL_OP_ADD, a->dest, &(a->nh)); + return nl_send_route(p, e, NL_OP_ADD, + NEXTHOP_IS_REACHABLE(nhad) ? RTD_UNICAST : nhad->dest, nhad); } static inline int @@ -1421,7 +1641,10 @@ static inline int nl_replace_rte(struct krt_proto *p, rte *e) { rta *a = e->attrs; - return nl_send_route(p, e, NL_OP_REPLACE, a->dest, &(a->nh)); + eattr *nhea = ea_find(a->eattrs, &ea_gen_nexthop); + struct nexthop_adata *nhad = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL; + return nl_send_route(p, e, NL_OP_REPLACE, + NEXTHOP_IS_REACHABLE(nhad) ? RTD_UNICAST : nhad->dest, nhad); } @@ -1490,20 +1713,15 @@ nl_announce_route(struct nl_parse_state *s) .net = s->net, }; - ea_list *ea = alloca(sizeof(ea_list) + 2 * sizeof(eattr)); - *ea = (ea_list) { .count = 2, .next = e0.attrs->eattrs }; - e0.attrs->eattrs = ea; + EA_LOCAL_LIST(2) ea = { + .l = { .count = 2, .next = e0.attrs->eattrs }, + .a = { + EA_LITERAL_EMBEDDED(&ea_krt_source, 0, s->krt_proto), + EA_LITERAL_EMBEDDED(&ea_krt_metric, 0, s->krt_metric), + }, + }; - ea->attrs[0] = (eattr) { - .id = EA_KRT_SOURCE, - .type = EAF_TYPE_INT, - .u.data = s->krt_proto, - }; - ea->attrs[1] = (eattr) { - .id = EA_KRT_METRIC, - .type = EAF_TYPE_INT, - .u.data = s->krt_metric, - }; + e0.attrs->eattrs = &ea.l; if (s->scan) krt_got_route(s->proto, &e0, s->krt_src); @@ -1532,7 +1750,8 @@ nl_parse_end(struct nl_parse_state *s) } -#define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0) +#define SKIP0(ARG, ...) do { DBG("KRT: Ignoring route - " ARG, ##__VA_ARGS__); return; } while(0) +#define SKIP(ARG, ...) do { DBG("KRT: Ignoring route %N - " ARG, &dst, ##__VA_ARGS__); return; } while(0) static void nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) @@ -1585,10 +1804,10 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) return; if (!a[RTA_DST]) - SKIP("MPLS route without RTA_DST"); + SKIP0("MPLS route without RTA_DST\n"); if (rta_get_mpls(a[RTA_DST], rta_mpls_stack) != 1) - SKIP("MPLS route with multi-label RTA_DST"); + SKIP0("MPLS route with multi-label RTA_DST\n"); net_fill_mpls(&dst, rta_mpls_stack[0]); break; @@ -1606,6 +1825,9 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) else table_id = i->rtm_table; + if (i->rtm_flags & RTM_F_CLONED) + SKIP("cloned\n"); + /* Do we know this table? */ p = HASH_FIND(nl_table_map, RTH, i->rtm_family, table_id); if (!p) @@ -1666,79 +1888,109 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) nl_announce_route(s); rta *ra = lp_allocz(s->pool, RTA_MAX_SIZE); - ra->source = RTS_INHERIT; - ra->scope = SCOPE_UNIVERSE; + ea_set_attr_u32(&ra->eattrs, &ea_gen_source, 0, RTS_INHERIT); if (a[RTA_FLOW]) s->rta_flow = rta_get_u32(a[RTA_FLOW]); else s->rta_flow = 0; + union { + struct { + struct adata ad; + struct nexthop nh; + u32 labels[MPLS_MAX_LABEL_STACK]; + }; + struct nexthop_adata nhad; + } nhad = {}; + switch (i->rtm_type) { case RTN_UNICAST: - ra->dest = RTD_UNICAST; - if (a[RTA_MULTIPATH]) { - struct nexthop *nh = nl_parse_multipath(s, p, a[RTA_MULTIPATH], i->rtm_family); + struct nexthop_adata *nh = nl_parse_multipath(s, p, net, a[RTA_MULTIPATH], i->rtm_family, krt_src); if (!nh) - { - log(L_ERR "KRT: Received strange multipath route %N", net); - return; - } + SKIP("strange RTA_MULTIPATH\n"); - nexthop_link(ra, nh); + ea_set_attr(&ra->eattrs, EA_LITERAL_DIRECT_ADATA( + &ea_gen_nexthop, 0, &nh->ad)); break; } - if (i->rtm_flags & RTNH_F_DEAD) - return; + if ((i->rtm_flags & RTNH_F_DEAD) && (krt_src != KRT_SRC_BIRD)) + SKIP("ignore RTNH_F_DEAD\n"); - ra->nh.iface = if_find_by_index(oif); - if (!ra->nh.iface) + nhad.nh.iface = if_find_by_index(oif); + if (!nhad.nh.iface) { log(L_ERR "KRT: Received route %N with unknown ifindex %u", net, oif); return; } if (a[RTA_GATEWAY]) - ra->nh.gw = rta_get_ipa(a[RTA_GATEWAY]); + nhad.nh.gw = rta_get_ipa(a[RTA_GATEWAY]); #ifdef HAVE_MPLS_KERNEL if (a[RTA_VIA]) - ra->nh.gw = rta_get_via(a[RTA_VIA]); + nhad.nh.gw = rta_get_via(a[RTA_VIA]); #endif - if (ipa_nonzero(ra->nh.gw)) + if (ipa_nonzero(nhad.nh.gw)) { /* Silently skip strange 6to4 routes */ const net_addr_ip6 sit = NET_ADDR_IP6(IP6_NONE, 96); - if ((i->rtm_family == AF_INET6) && ipa_in_netX(ra->nh.gw, (net_addr *) &sit)) + if ((i->rtm_family == AF_INET6) && ipa_in_netX(nhad.nh.gw, (net_addr *) &sit)) return; if (i->rtm_flags & RTNH_F_ONLINK) - ra->nh.flags |= RNF_ONLINK; + nhad.nh.flags |= RNF_ONLINK; neighbor *nbr; - nbr = neigh_find(&p->p, ra->nh.gw, ra->nh.iface, - (ra->nh.flags & RNF_ONLINK) ? NEF_ONLINK : 0); + nbr = neigh_find(&p->p, nhad.nh.gw, nhad.nh.iface, + (nhad.nh.flags & RNF_ONLINK) ? NEF_ONLINK : 0); if (!nbr || (nbr->scope == SCOPE_HOST)) { - log(L_ERR "KRT: Received route %N with strange next-hop %I", net, ra->nh.gw); + log(L_ERR "KRT: Received route %N with strange next-hop %I", net, + nhad.nh.gw); return; } } +#ifdef HAVE_MPLS_KERNEL + if ((i->rtm_family == AF_MPLS) && a[RTA_NEWDST] && !a[RTA_MULTIPATH]) + nhad.nh.labels = rta_get_mpls(a[RTA_NEWDST], nhad.nh.label); + + if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE] && !a[RTA_MULTIPATH]) + { + switch (rta_get_u16(a[RTA_ENCAP_TYPE])) + { + case LWTUNNEL_ENCAP_MPLS: + { + struct rtattr *enca[BIRD_RTA_MAX]; + nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]); + nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca)); + nhad.nh.labels = rta_get_mpls(enca[RTA_DST], nhad.nh.label); + break; + } + default: + SKIP("unknown encapsulation method %d\n", rta_get_u16(a[RTA_ENCAP_TYPE])); + break; + } + } +#endif + + /* Finalize the nexthop */ + nhad.ad.length = (void *) NEXTHOP_NEXT(&nhad.nh) - (void *) nhad.ad.data; break; case RTN_BLACKHOLE: - ra->dest = RTD_BLACKHOLE; + nhad.nhad = NEXTHOP_DEST_LITERAL(RTD_BLACKHOLE); break; case RTN_UNREACHABLE: - ra->dest = RTD_UNREACHABLE; + nhad.nhad = NEXTHOP_DEST_LITERAL(RTD_UNREACHABLE); break; case RTN_PROHIBIT: - ra->dest = RTD_PROHIBIT; + nhad.nhad = NEXTHOP_DEST_LITERAL(RTD_PROHIBIT); break; /* FIXME: What about RTN_THROW? */ default: @@ -1746,105 +1998,36 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) return; } -#ifdef HAVE_MPLS_KERNEL - if ((i->rtm_family == AF_MPLS) && a[RTA_NEWDST] && !ra->nh.next) - ra->nh.labels = rta_get_mpls(a[RTA_NEWDST], ra->nh.label); - - if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE] && !ra->nh.next) - { - switch (rta_get_u16(a[RTA_ENCAP_TYPE])) - { - case LWTUNNEL_ENCAP_MPLS: - { - struct rtattr *enca[BIRD_RTA_MAX]; - nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]); - nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca)); - ra->nh.labels = rta_get_mpls(enca[RTA_DST], ra->nh.label); - break; - } - default: - SKIP("unknown encapsulation method %d\n", rta_get_u16(a[RTA_ENCAP_TYPE])); - break; - } - } -#endif - if (i->rtm_scope != def_scope) - { - ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr)); - ea->next = ra->eattrs; - ra->eattrs = ea; - ea->flags = EALF_SORTED; - ea->count = 1; - ea->attrs[0].id = EA_KRT_SCOPE; - ea->attrs[0].flags = 0; - ea->attrs[0].type = EAF_TYPE_INT; - ea->attrs[0].u.data = i->rtm_scope; - } + ea_set_attr(&ra->eattrs, + EA_LITERAL_EMBEDDED(&ea_krt_scope, 0, i->rtm_scope)); if (a[RTA_PREFSRC]) - { - ip_addr ps = rta_get_ipa(a[RTA_PREFSRC]); + { + ip_addr ps = rta_get_ipa(a[RTA_PREFSRC]); - ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr)); - ea->next = ra->eattrs; - ra->eattrs = ea; - ea->flags = EALF_SORTED; - ea->count = 1; - ea->attrs[0].id = EA_KRT_PREFSRC; - ea->attrs[0].flags = 0; - ea->attrs[0].type = EAF_TYPE_IP_ADDRESS; - - struct adata *ad = lp_alloc(s->pool, sizeof(struct adata) + sizeof(ps)); - ad->length = sizeof(ps); - memcpy(ad->data, &ps, sizeof(ps)); - - ea->attrs[0].u.ptr = ad; - } + ea_set_attr(&ra->eattrs, + EA_LITERAL_STORE_ADATA(&ea_krt_prefsrc, 0, &ps, sizeof(ps))); + } /* Can be set per-route or per-nexthop */ if (s->rta_flow) - { - ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr)); - ea->next = ra->eattrs; - ra->eattrs = ea; - ea->flags = EALF_SORTED; - ea->count = 1; - ea->attrs[0].id = EA_KRT_REALM; - ea->attrs[0].flags = 0; - ea->attrs[0].type = EAF_TYPE_INT; - ea->attrs[0].u.data = s->rta_flow; - } + ea_set_attr(&ra->eattrs, + EA_LITERAL_EMBEDDED(&ea_krt_realm, 0, s->rta_flow)); if (a[RTA_METRICS]) { u32 metrics[KRT_METRICS_MAX]; - ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + KRT_METRICS_MAX * sizeof(eattr)); - int t, n = 0; - if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics)) < 0) { log(L_ERR "KRT: Received route %N with strange RTA_METRICS attribute", net); return; } - for (t = 1; t < KRT_METRICS_MAX; t++) + for (uint t = 1; t < KRT_METRICS_MAX; t++) if (metrics[0] & (1 << t)) - { - ea->attrs[n].id = EA_CODE(PROTOCOL_KERNEL, KRT_METRICS_OFFSET + t); - ea->attrs[n].flags = 0; - ea->attrs[n].type = EAF_TYPE_INT; /* FIXME: Some are EAF_TYPE_BITFIELD */ - ea->attrs[n].u.data = metrics[t]; - n++; - } - - if (n > 0) - { - ea->next = ra->eattrs; - ea->flags = EALF_SORTED; - ea->count = n; - ra->eattrs = ea; - } + ea_set_attr(&ra->eattrs, + EA_LITERAL_EMBEDDED(&ea_krt_metrics[t], 0, metrics[t])); } /* @@ -1862,6 +2045,10 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) net_copy(s->net, net); s->attrs = ra; + + ea_set_attr_data(&ra->eattrs, &ea_gen_nexthop, 0, + nhad.ad.data, nhad.ad.length); + s->proto = p; s->new = new; s->krt_src = krt_src; @@ -1872,20 +2059,18 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) else { /* Merge next hops with the stored route */ - rta *oa = s->attrs; + eattr *nhea = ea_find(s->attrs->eattrs, &ea_gen_nexthop); + struct nexthop_adata *nhad_old = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL; - struct nexthop *nhs = &oa->nh; - nexthop_insert(&nhs, &ra->nh); - - /* Perhaps new nexthop is inserted at the first position */ - if (nhs == &ra->nh) - { - /* Swap rtas */ - s->attrs = ra; - - /* Keep old eattrs */ - ra->eattrs = oa->eattrs; - } + if (nhad_old) + ea_set_attr(&s->attrs->eattrs, + EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, + &(nexthop_merge(nhad_old, &nhad.nhad, + KRT_CF->merge_paths, s->pool)->ad) + )); + else + ea_set_attr_data(&s->attrs->eattrs, &ea_gen_nexthop, 0, + nhad.ad.data, nhad.ad.length); } } @@ -1896,7 +2081,7 @@ krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NUL struct nl_parse_state s; nl_parse_begin(&s, 1); - nl_request_dump(AF_UNSPEC, RTM_GETROUTE); + nl_request_dump_route(AF_UNSPEC); while (h = nl_get_scan()) if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE) nl_parse_route(&s, h); @@ -1911,6 +2096,8 @@ krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NUL static sock *nl_async_sk; /* BIRD socket for asynchronous notifications */ static byte *nl_async_rx_buffer; /* Receive buffer */ +static uint nl_async_bufsize; /* Kernel rx buffer size for the netlink socket */ +static struct config *nl_last_config; /* For tracking changes to nl_async_bufsize */ static void nl_async_msg(struct nlmsghdr *h) @@ -2046,6 +2233,32 @@ nl_open_async(void) bug("Netlink: sk_open failed"); } +static void +nl_update_async_bufsize(void) +{ + /* No async socket */ + if (!nl_async_sk) + return; + + /* Already reconfigured */ + if (nl_last_config == config) + return; + + /* Update netlink buffer size */ + uint bufsize = nl_cfg_rx_buffer_size(config); + if (bufsize && (bufsize != nl_async_bufsize)) + { + /* Log message for reconfigurations only */ + if (nl_last_config) + log(L_INFO "KRT: Changing netlink rx buffer size to %u", bufsize); + + nl_set_rcvbuf(nl_async_sk->fd, bufsize); + nl_async_bufsize = bufsize; + } + + nl_last_config = config; +} + /* * Interface to the UNIX krt module @@ -2056,6 +2269,8 @@ krt_sys_io_init(void) { nl_linpool = lp_new_default(krt_pool); HASH_INIT(nl_table_map, krt_pool, 6); + + nl_ea_register(); } int @@ -2074,6 +2289,7 @@ krt_sys_start(struct krt_proto *p) nl_open(); nl_open_async(); + nl_update_async_bufsize(); return 1; } @@ -2081,12 +2297,16 @@ krt_sys_start(struct krt_proto *p) void krt_sys_shutdown(struct krt_proto *p) { + nl_update_async_bufsize(); + HASH_REMOVE2(nl_table_map, RTH, krt_pool, p); } int krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o) { + nl_update_async_bufsize(); + return (n->sys.table_id == o->sys.table_id) && (n->sys.metric == o->sys.metric); } @@ -2104,56 +2324,6 @@ krt_sys_copy_config(struct krt_config *d, struct krt_config *s) d->sys.metric = s->sys.metric; } -static const char *krt_metrics_names[KRT_METRICS_MAX] = { - NULL, "lock", "mtu", "window", "rtt", "rttvar", "sstresh", "cwnd", "advmss", - "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd", "quickack" -}; - -static const char *krt_features_names[KRT_FEATURES_MAX] = { - "ecn", NULL, NULL, "allfrag" -}; - -int -krt_sys_get_attr(const eattr *a, byte *buf, int buflen UNUSED) -{ - switch (a->id) - { - case EA_KRT_PREFSRC: - bsprintf(buf, "prefsrc"); - return GA_NAME; - - case EA_KRT_REALM: - bsprintf(buf, "realm"); - return GA_NAME; - - case EA_KRT_SCOPE: - bsprintf(buf, "scope"); - return GA_NAME; - - case EA_KRT_LOCK: - buf += bsprintf(buf, "lock:"); - ea_format_bitfield(a, buf, buflen, krt_metrics_names, 2, KRT_METRICS_MAX); - return GA_FULL; - - case EA_KRT_FEATURES: - buf += bsprintf(buf, "features:"); - ea_format_bitfield(a, buf, buflen, krt_features_names, 0, KRT_FEATURES_MAX); - return GA_FULL; - - default:; - int id = (int)EA_ID(a->id) - KRT_METRICS_OFFSET; - if (id > 0 && id < KRT_METRICS_MAX) - { - bsprintf(buf, "%s", krt_metrics_names[id]); - return GA_NAME; - } - - return GA_UNKNOWN; - } -} - - - void kif_sys_start(struct kif_proto *p UNUSED) { diff --git a/sysdep/linux/sysio.h b/sysdep/linux/sysio.h index e21ff487..f13eda7c 100644 --- a/sysdep/linux/sysio.h +++ b/sysdep/linux/sysio.h @@ -10,6 +10,10 @@ #define IPV6_MINHOPCOUNT 73 #endif +#ifndef IPV6_FREEBIND +#define IPV6_FREEBIND 78 +#endif + #ifndef TCP_MD5SIG_EXT #define TCP_MD5SIG_EXT 32 #endif @@ -266,3 +270,18 @@ sk_set_priority(sock *s, int prio) return 0; } +static inline int +sk_set_freebind(sock *s) +{ + int y = 1; + + if (sk_is_ipv4(s)) + if (setsockopt(s->fd, SOL_IP, IP_FREEBIND, &y, sizeof(y)) < 0) + ERR("IP_FREEBIND"); + + if (sk_is_ipv6(s)) + if (setsockopt(s->fd, SOL_IPV6, IPV6_FREEBIND, &y, sizeof(y)) < 0) + ERR("IPV6_FREEBIND"); + + return 0; +} diff --git a/sysdep/unix/Makefile b/sysdep/unix/Makefile index d0d36b5f..51ab98a9 100644 --- a/sysdep/unix/Makefile +++ b/sysdep/unix/Makefile @@ -2,6 +2,8 @@ src := alloc.c io.c krt.c log.c main.c random.c obj := $(src-o-files) $(all-daemon) $(cf-local) +$(call proto-build,kif_build) +$(call proto-build,krt_build) $(conf-y-targets): $(s)krt.Y src := $(filter-out main.c, $(src)) diff --git a/sysdep/unix/alloc.c b/sysdep/unix/alloc.c index 4c9d5eb5..edad6209 100644 --- a/sysdep/unix/alloc.c +++ b/sysdep/unix/alloc.c @@ -8,7 +8,10 @@ #include "nest/bird.h" #include "lib/resource.h" +#include "lib/lists.h" +#include "lib/event.h" +#include <errno.h> #include <stdlib.h> #include <unistd.h> @@ -17,82 +20,168 @@ #endif long page_size = 0; -_Bool alloc_multipage = 0; #ifdef HAVE_MMAP +#define KEEP_PAGES_MAIN_MAX 256 +#define KEEP_PAGES_MAIN_MIN 8 +#define CLEANUP_PAGES_BULK 256 + +STATIC_ASSERT(KEEP_PAGES_MAIN_MIN * 4 < KEEP_PAGES_MAIN_MAX); + static _Bool use_fake = 0; + +#if DEBUGGING +struct free_page { + node unused[42]; + node n; +}; #else -static _Bool use_fake = 1; +struct free_page { + node n; +}; #endif -void resource_sys_init(void) -{ -#ifdef HAVE_MMAP - if (!(page_size = sysconf(_SC_PAGESIZE))) - die("System page size must be non-zero"); +struct free_pages { + list pages; + u16 min, max; /* Minimal and maximal number of free pages kept */ + uint cnt; /* Number of empty pages */ + event cleanup; +}; - if ((u64_popcount(page_size) > 1) || (page_size > 16384)) - { -#endif - /* Too big or strange page, use the aligned allocator instead */ - page_size = 4096; - use_fake = 1; - } -} +static void global_free_pages_cleanup_event(void *); -void * +static struct free_pages global_free_pages = { + .min = KEEP_PAGES_MAIN_MIN, + .max = KEEP_PAGES_MAIN_MAX, + .cleanup = { .hook = global_free_pages_cleanup_event }, +}; + +uint *pages_kept = &global_free_pages.cnt; + +static void * alloc_sys_page(void) { -#ifdef HAVE_MMAP - if (!use_fake) - { - if (alloc_multipage) - { - void *big = mmap(NULL, page_size * 2, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (big == MAP_FAILED) - bug("mmap(%lu) failed: %m", page_size); + void *ptr = mmap(NULL, page_size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - uintptr_t offset = ((uintptr_t) big) % page_size; - if (offset) - { - void *ret = big + page_size - offset; - munmap(big, page_size - offset); - munmap(ret + page_size, offset); - return ret; - } - else - { - munmap(big + page_size, page_size); - return big; - } - } + if (ptr == MAP_FAILED) + bug("mmap(%lu) failed: %m", page_size); - void *ret = mmap(NULL, page_size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (ret == MAP_FAILED) - bug("mmap(%lu) failed: %m", page_size); + return ptr; +} - return ret; - } - else +extern int shutting_down; /* Shutdown requested. */ + +#else // ! HAVE_MMAP +#define use_fake 1 #endif + +void * +alloc_page(void) +{ + if (use_fake) { - void *ret = aligned_alloc(page_size, page_size); - if (!ret) - bug("aligned_alloc(%lu) failed", page_size); - return ret; + void *ptr = NULL; + int err = posix_memalign(&ptr, page_size, page_size); + + if (err || !ptr) + bug("posix_memalign(%lu) failed", (long unsigned int) page_size); + + return ptr; } + +#ifdef HAVE_MMAP + struct free_pages *fps = &global_free_pages; + + if (fps->cnt) + { + struct free_page *fp = SKIP_BACK(struct free_page, n, HEAD(fps->pages)); + rem_node(&fp->n); + if ((--fps->cnt < fps->min) && !shutting_down) + ev_schedule(&fps->cleanup); + + bzero(fp, page_size); + return fp; + } + + return alloc_sys_page(); +#endif } void -free_sys_page(void *ptr) +free_page(void *ptr) +{ + if (use_fake) + { + free(ptr); + return; + } + +#ifdef HAVE_MMAP + struct free_pages *fps = &global_free_pages; + struct free_page *fp = ptr; + + fp->n = (node) {}; + add_tail(&fps->pages, &fp->n); + + if ((++fps->cnt > fps->max) && !shutting_down) + ev_schedule(&fps->cleanup); +#endif +} + +#ifdef HAVE_MMAP +static void +global_free_pages_cleanup_event(void *data UNUSED) +{ + if (shutting_down) + return; + + struct free_pages *fps = &global_free_pages; + + while (fps->cnt / 2 < fps->min) + { + struct free_page *fp = alloc_sys_page(); + fp->n = (node) {}; + add_tail(&fps->pages, &fp->n); + fps->cnt++; + } + + for (uint seen = 0; (seen < CLEANUP_PAGES_BULK) && (fps->cnt > fps->max / 2); seen++) + { + struct free_page *fp = SKIP_BACK(struct free_page, n, TAIL(fps->pages)); + rem_node(&fp->n); + + if (munmap(fp, page_size) == 0) + fps->cnt--; + else if (errno == ENOMEM) + add_head(&fps->pages, &fp->n); + else + bug("munmap(%p) failed: %m", fp); + } +} +#endif + +void +resource_sys_init(void) { #ifdef HAVE_MMAP - if (!use_fake) + ASSERT_DIE(global_free_pages.cnt == 0); + + if (!(page_size = sysconf(_SC_PAGESIZE))) + die("System page size must be non-zero"); + + if (u64_popcount(page_size) == 1) { - if (munmap(ptr, page_size) < 0) - bug("munmap(%p) failed: %m", ptr); + struct free_pages *fps = &global_free_pages; + + init_list(&fps->pages); + global_free_pages_cleanup_event(NULL); + return; } - else + + /* Too big or strange page, use the aligned allocator instead */ + log(L_WARN "Got strange memory page size (%lu), using the aligned allocator instead", page_size); + use_fake = 1; #endif - free(ptr); + + page_size = 4096; } diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c index 3d67d0a7..8a116789 100644 --- a/sysdep/unix/io.c +++ b/sysdep/unix/io.c @@ -1436,6 +1436,10 @@ sk_open(sock *s) if (sk_set_high_port(s) < 0) log(L_WARN "Socket error: %s%#m", s->err); + if (s->flags & SKF_FREEBIND) + if (sk_set_freebind(s) < 0) + log(L_WARN "Socket error: %s%#m", s->err); + sockaddr_fill(&sa, s->af, bind_addr, s->iface, bind_port); if (bind(fd, &sa.sa, SA_LEN(sa)) < 0) ERR2("bind"); @@ -1850,8 +1854,8 @@ sk_read_ssh(sock *s) /* sk_read() and sk_write() are called from BFD's event loop */ -int -sk_read(sock *s, int revents) +static inline int +sk_read_noflush(sock *s, int revents) { switch (s->type) { @@ -1914,7 +1918,15 @@ sk_read(sock *s, int revents) } int -sk_write(sock *s) +sk_read(sock *s, int revents) +{ + int e = sk_read_noflush(s, revents); + tmp_flush(); + return e; +} + +static inline int +sk_write_noflush(sock *s) { switch (s->type) { @@ -1962,6 +1974,14 @@ sk_write(sock *s) } } +int +sk_write(sock *s) +{ + int e = sk_write_noflush(s); + tmp_flush(); + return e; +} + int sk_is_ipv4(sock *s) { return s->af == AF_INET; } @@ -1980,6 +2000,7 @@ sk_err(sock *s, int revents) } s->err_hook(s, se); + tmp_flush(); } void diff --git a/sysdep/unix/krt.Y b/sysdep/unix/krt.Y index 95b54d65..4ce9a328 100644 --- a/sysdep/unix/krt.Y +++ b/sysdep/unix/krt.Y @@ -29,7 +29,7 @@ kif_set_preferred(ip_addr ip) CF_DECLS -CF_KEYWORDS(KERNEL, PERSIST, SCAN, TIME, LEARN, DEVICE, ROUTES, GRACEFUL, RESTART, KRT_SOURCE, KRT_METRIC, MERGE, PATHS) +CF_KEYWORDS(KERNEL, PERSIST, SCAN, TIME, LEARN, DEVICE, ROUTES, GRACEFUL, RESTART, MERGE, PATHS) CF_KEYWORDS(INTERFACE, PREFERRED) %type <i> kern_mp_limit @@ -122,9 +122,6 @@ kif_iface: kif_iface_start iface_patt_list_nopx kif_iface_opt_list; -dynamic_attr: KRT_SOURCE { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_SOURCE); } ; -dynamic_attr: KRT_METRIC { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_METRIC); } ; - CF_CODE CF_END diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c index 360a2888..84457d37 100644 --- a/sysdep/unix/krt.c +++ b/sysdep/unix/krt.c @@ -53,7 +53,7 @@ #include "nest/bird.h" #include "nest/iface.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "filter/filter.h" #include "conf/conf.h" @@ -232,7 +232,6 @@ kif_copy_config(struct proto_config *dest, struct proto_config *src) struct protocol proto_unix_iface = { .name = "Device", .template = "device%d", - .class = PROTOCOL_DEVICE, .proto_size = sizeof(struct kif_proto), .config_size = sizeof(struct kif_config), .preconfig = kif_preconfig, @@ -243,6 +242,13 @@ struct protocol proto_unix_iface = { .copy_config = kif_copy_config }; +void +kif_build(void) +{ + proto_build(&proto_unix_iface); +} + + /* * Tracing of routes */ @@ -280,7 +286,7 @@ static struct tbf rl_alien = TBF_DEFAULT_LOG_LIMITS; static inline u32 krt_metric(rte *a) { - eattr *ea = ea_find(a->attrs->eattrs, EA_KRT_METRIC); + eattr *ea = ea_find(a->attrs->eattrs, &ea_krt_metric); return ea ? ea->u.data : 0; } @@ -317,7 +323,7 @@ static struct rte_storage * krt_store_async(struct krt_proto *p, net *n, rte *e) { ASSERT(!e->attrs->cached); - e->attrs->pref = p->p.main_channel->preference; + ea_set_attr_u32(&e->attrs->eattrs, &ea_gen_preference, 0, p->p.main_channel->preference); e->src = p->p.main_source; return rte_store(e, n, p->krt_table); } @@ -338,14 +344,14 @@ krt_learn_scan(struct krt_proto *p, rte *e) if (krt_uptodate(&m->rte, e)) { krt_trace_in_rl(&rl_alien, p, e, "[alien] seen"); - rte_free(ee, p->krt_table); + rte_free(ee); m->rte.pflags |= KRT_REF_SEEN; } else { krt_trace_in(p, e, "[alien] updated"); *mm = m->next; - rte_free(m, p->krt_table); + rte_free(m); m = NULL; } } @@ -392,7 +398,7 @@ again: if (!(e->rte.pflags & KRT_REF_SEEN)) { *ee = e->next; - rte_free(e, p->krt_table); + rte_free(e); continue; } @@ -452,12 +458,12 @@ krt_learn_async(struct krt_proto *p, rte *e, int new) if (krt_uptodate(&g->rte, e)) { krt_trace_in(p, e, "[alien async] same"); - rte_free(ee, p->krt_table); + rte_free(ee); return; } krt_trace_in(p, e, "[alien async] updated"); *gg = g->next; - rte_free(g, p->krt_table); + rte_free(g); } else krt_trace_in(p, e, "[alien async] created"); @@ -468,15 +474,15 @@ krt_learn_async(struct krt_proto *p, rte *e, int new) else if (!g) { krt_trace_in(p, e, "[alien async] delete failed"); - rte_free(ee, p->krt_table); + rte_free(ee); return; } else { krt_trace_in(p, e, "[alien async] removed"); *gg = g->next; - rte_free(ee, p->krt_table); - rte_free(g, p->krt_table); + rte_free(ee); + rte_free(g); } best = n->routes; bestp = &n->routes; @@ -546,21 +552,27 @@ krt_is_installed(struct krt_proto *p, net *n) return n->routes && bmap_test(&p->p.main_channel->export_map, n->routes->rte.id); } -static void -krt_flush_routes(struct krt_proto *p) +static uint +rte_feed_count(net *n) { - struct rtable *t = p->p.main_channel->table; + uint count = 0; + for (struct rte_storage *e = n->routes; e; e = e->next) + if (rte_is_valid(RTE_OR_NULL(e))) + count++; + return count; +} - KRT_TRACE(p, D_EVENTS, "Flushing kernel routes"); - FIB_WALK(&t->fib, net, n) +static void +rte_feed_obtain(net *n, rte **feed, uint count) +{ + uint i = 0; + for (struct rte_storage *e = n->routes; e; e = e->next) + if (rte_is_valid(RTE_OR_NULL(e))) { - if (krt_is_installed(p, n)) - { - /* FIXME: this does not work if gw is changed in export filter */ - krt_replace_rte(p, n->n.addr, NULL, &n->routes->rte); - } + ASSERT_DIE(i < count); + feed[i++] = &e->rte; } - FIB_WALK_END; + ASSERT_DIE(i == count); } static struct rte * @@ -570,7 +582,15 @@ krt_export_net(struct krt_proto *p, net *net) const struct filter *filter = c->out_filter; if (c->ra_mode == RA_MERGED) - return rt_export_merged(c, net, krt_filter_lp, 1); + { + uint count = rte_feed_count(net); + if (!count) + return NULL; + + rte **feed = alloca(count * sizeof(rte *)); + rte_feed_obtain(net, feed, count); + return rt_export_merged(c, feed, count, krt_filter_lp, 1); + } static _Thread_local rte rt; rt = net->routes->rte; @@ -586,7 +606,7 @@ krt_export_net(struct krt_proto *p, net *net) if (filter == FILTER_ACCEPT) goto accept; - if (f_run(filter, &rt, krt_filter_lp, FF_SILENT) > F_ACCEPT) + if (f_run(filter, &rt, FF_SILENT) > F_ACCEPT) goto reject; @@ -602,13 +622,10 @@ krt_same_dest(rte *k, rte *e) { rta *ka = k->attrs, *ea = e->attrs; - if (ka->dest != ea->dest) - return 0; + eattr *nhea_k = ea_find(ka->eattrs, &ea_gen_nexthop); + eattr *nhea_e = ea_find(ea->eattrs, &ea_gen_nexthop); - if (ka->dest == RTD_UNICAST) - return nexthop_same(&(ka->nh), &(ea->nh)); - - return 1; + return (!nhea_k == !nhea_e) && adata_same(nhea_k->u.ptr, nhea_e->u.ptr); } /* @@ -641,6 +658,9 @@ krt_got_route(struct krt_proto *p, rte *e, s8 src) #endif /* The rest is for KRT_SRC_BIRD (or KRT_SRC_UNKNOWN) */ + /* Deleting all routes if flush is requested */ + if (p->flush_routes) + goto delete; /* We wait for the initial feed to have correct installed state */ if (!p->ready) @@ -733,6 +753,17 @@ krt_prune(struct krt_proto *p) p->initialized = 1; } +static void +krt_flush_routes(struct krt_proto *p) +{ + KRT_TRACE(p, D_EVENTS, "Flushing kernel routes"); + p->flush_routes = 1; + krt_init_scan(p); + krt_do_scan(p); + /* No prune! */ + p->flush_routes = 0; +} + void krt_got_route_async(struct krt_proto *p, rte *e, int new, s8 src) { @@ -1111,24 +1142,15 @@ krt_copy_config(struct proto_config *dest, struct proto_config *src) krt_sys_copy_config(d, s); } -static int -krt_get_attr(const eattr *a, byte *buf, int buflen) -{ - switch (a->id) - { - case EA_KRT_SOURCE: - bsprintf(buf, "source"); - return GA_NAME; - - case EA_KRT_METRIC: - bsprintf(buf, "metric"); - return GA_NAME; - - default: - return krt_sys_get_attr(a, buf, buflen); - } -} +struct ea_class ea_krt_source = { + .name = "krt_source", + .type = T_INT, +}; +struct ea_class ea_krt_metric = { + .name = "krt_metric", + .type = T_INT, +}; #ifdef CONFIG_IP6_SADR_KERNEL #define MAYBE_IP6_SADR NB_IP6_SADR @@ -1145,7 +1167,6 @@ krt_get_attr(const eattr *a, byte *buf, int buflen) struct protocol proto_unix_kernel = { .name = "Kernel", .template = "kernel%d", - .class = PROTOCOL_KERNEL, .preference = DEF_PREF_INHERITED, .channel_mask = NB_IP | MAYBE_IP6_SADR | MAYBE_MPLS, .proto_size = sizeof(struct krt_proto), @@ -1157,8 +1178,18 @@ struct protocol proto_unix_kernel = { .shutdown = krt_shutdown, .reconfigure = krt_reconfigure, .copy_config = krt_copy_config, - .get_attr = krt_get_attr, #ifdef KRT_ALLOW_LEARN .dump = krt_dump, #endif }; + +void +krt_build(void) +{ + proto_build(&proto_unix_kernel); + + EA_REGISTER_ALL( + &ea_krt_source, + &ea_krt_metric, + ); +} diff --git a/sysdep/unix/krt.h b/sysdep/unix/krt.h index cd4bd07d..69fc9aa1 100644 --- a/sysdep/unix/krt.h +++ b/sysdep/unix/krt.h @@ -21,8 +21,12 @@ struct kif_proto; #define KRT_DEFAULT_ECMP_LIMIT 16 +#if 0 #define EA_KRT_SOURCE EA_CODE(PROTOCOL_KERNEL, 0) #define EA_KRT_METRIC EA_CODE(PROTOCOL_KERNEL, 1) +#endif + +extern struct ea_class ea_krt_source, ea_krt_metric; #define KRT_REF_SEEN 0x1 /* Seen in table */ #define KRT_REF_BEST 0x2 /* Best in table */ @@ -66,6 +70,7 @@ struct krt_proto { byte ready; /* Initial feed has been finished */ byte initialized; /* First scan has been finished */ byte reload; /* Next scan is doing reload */ + byte flush_routes; /* Scanning to flush */ }; extern pool *krt_pool; diff --git a/sysdep/unix/main.c b/sysdep/unix/main.c index 7e8ea0dc..8fdad4e6 100644 --- a/sysdep/unix/main.c +++ b/sysdep/unix/main.c @@ -30,7 +30,7 @@ #include "lib/event.h" #include "lib/timer.h" #include "lib/string.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "nest/iface.h" #include "nest/cli.h" @@ -682,7 +682,7 @@ signal_init(void) * Parsing of command-line arguments */ -static char *opt_list = "B:c:dD:ps:P:u:g:flRh"; +static char *opt_list = "bc:dD:ps:P:u:g:flRh"; int parse_and_exit; char *bird_name; static char *use_user; @@ -703,7 +703,6 @@ display_help(void) fprintf(stderr, "\n" "Options: \n" - " -B <block-size> Use 2^this number as memory allocation block size (default: 12)\n" " -c <config-file> Use given configuration file instead of\n" " " PATH_CONFIG_FILE "\n" " -d Enable debug messages and run bird in foreground\n" @@ -790,15 +789,12 @@ get_gid(const char *s) return gr->gr_gid; } -extern _Bool alloc_multipage; - static void parse_args(int argc, char **argv) { int config_changed = 0; int socket_changed = 0; int c; - int bp; bird_name = get_bird_name(argv[0], "bird"); if (argc == 2) @@ -811,29 +807,6 @@ parse_args(int argc, char **argv) while ((c = getopt(argc, argv, opt_list)) >= 0) switch (c) { - case 'B': - bp = atoi(optarg); - if (bp < 1) - { - fprintf(stderr, "Strange block size power %d\n\n", bp); - display_usage(); - exit(1); - } - - if ((1 << bp) < page_size) - { - fprintf(stderr, "Requested block size %ld is lesser than page size %ld\n\n", (1L<<bp), page_size); - display_usage(); - exit(1); - } - - if ((1L << bp) > page_size) - { - alloc_multipage = 1; - page_size = (1L << bp); - } - - break; case 'c': config_name = optarg; config_changed = 1; @@ -888,8 +861,6 @@ parse_args(int argc, char **argv) } } -void resource_sys_init(void); - /* * Hic Est main() */ @@ -902,7 +873,6 @@ main(int argc, char **argv) dmalloc_debug(0x2f03d00); #endif - resource_sys_init(); parse_args(argc, argv); log_switch(1, NULL, NULL); @@ -911,8 +881,8 @@ main(int argc, char **argv) resource_init(); timer_init(); olock_init(); - io_init(); rt_init(); + io_init(); if_init(); // roa_init(); config_init(); @@ -936,8 +906,6 @@ main(int argc, char **argv) open_pid_file(); protos_build(); - proto_build(&proto_unix_kernel); - proto_build(&proto_unix_iface); struct config *conf = read_config(); diff --git a/test/birdtest.c b/test/birdtest.c index c6a09684..ae05d1a5 100644 --- a/test/birdtest.c +++ b/test/birdtest.c @@ -20,6 +20,7 @@ #include "test/birdtest.h" #include "lib/string.h" +#include "lib/event.h" #ifdef HAVE_EXECINFO_H #include <execinfo.h> @@ -58,14 +59,11 @@ u64 bt_random_state[] = { 0x53d9772877c1b647, 0xab8ce3eb466de6c5, 0xad02844c8a8e865f, 0xe8cc78080295065d }; -void resource_sys_init(void); - void bt_init(int argc, char *argv[]) { int c; - resource_sys_init(); initstate(BT_RANDOM_SEED, (char *) bt_random_state, sizeof(bt_random_state)); bt_verbose = 0; @@ -122,6 +120,9 @@ bt_init(int argc, char *argv[]) clock_gettime(CLOCK_MONOTONIC, &bt_begin); bt_suite_case_begin = bt_suite_begin = bt_begin; + resource_init(); + ev_init_list(&global_event_list); + return; usage: @@ -175,6 +176,8 @@ int bt_run_test_fn(int (*fn)(const void *), const void *fn_arg, int timeout) if (!bt_suite_result) result = 0; + tmp_flush(); + return result; } @@ -312,6 +315,12 @@ bt_log_suite_case_result(int result, const char *fmt, ...) } } +void +bt_reset_suite_case_timer(void) +{ + clock_gettime(CLOCK_MONOTONIC, &bt_suite_case_begin); +} + int bt_test_suite_base(int (*fn)(const void *), const char *id, const void *fn_arg, int forked, int timeout, const char *dsc, ...) { @@ -504,6 +513,15 @@ bt_fmt_ipa(char *buf, size_t size, const void *data) bsnprintf(buf, size, "(null)"); } +void +bt_format_net(char *buf, size_t size, const void *data) +{ + if (data) + bsnprintf(buf, size, "%N", (const net_addr *) data); + else + bsnprintf(buf, size, "(null)"); +} + int bt_is_char(byte c) { diff --git a/test/birdtest.h b/test/birdtest.h index caec529b..ad5f8f9c 100644 --- a/test/birdtest.h +++ b/test/birdtest.h @@ -32,6 +32,7 @@ extern const char *bt_test_id; void bt_init(int argc, char *argv[]); int bt_exit_value(void); +void bt_reset_suite_case_timer(void); int bt_test_suite_base(int (*test_fn)(const void *), const char *test_id, const void *test_fn_argument, int forked, int timeout, const char *dsc, ...); static inline u64 bt_random(void) { return ((u64) random() & 0xffffffff) | ((u64) random() << 32); } @@ -165,6 +166,8 @@ struct bt_batch { void bt_fmt_str(char *buf, size_t size, const void *data); void bt_fmt_unsigned(char *buf, size_t size, const void *data); void bt_fmt_ipa(char *buf, size_t size, const void *data); +void bt_format_net(char *buf, size_t size, const void *data); + int bt_assert_batch__(struct bt_batch *opts); int bt_is_char(byte c); diff --git a/test/bt-utils.c b/test/bt-utils.c index cbca3a6b..509b5ed4 100644 --- a/test/bt-utils.c +++ b/test/bt-utils.c @@ -14,7 +14,7 @@ #include "test/bt-utils.h" #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "sysdep/unix/unix.h" @@ -60,24 +60,18 @@ bt_bird_init(void) log_init_debug(""); log_switch(bt_verbose != 0, NULL, NULL); - resource_init(); olock_init(); timer_init(); - io_init(); rt_init(); + io_init(); if_init(); config_init(); protos_build(); - proto_build(&proto_unix_kernel); - proto_build(&proto_unix_iface); } void bt_bird_cleanup(void) { - for (int i = 0; i < PROTOCOL__MAX; i++) - class_to_protocol[i] = NULL; - config = new_config = NULL; } diff --git a/tools/gendist b/tools/gendist index 2ac59030..2dc42ba9 100755 --- a/tools/gendist +++ b/tools/gendist @@ -2,6 +2,7 @@ # # Generate BIRD Distribution Archive # (c) 2000--2004 Martin Mares <mj@ucw.cz> +# (c) 2005--2022 Ondrej Filip <feela@network.cz> # VERSION=`grep 'BIRD_VERSION \"' sysdep/config.h | sed '/BIRD_VERSION/!d;s/^.*"\(.*\)"$/\1/'` @@ -33,8 +34,6 @@ rm -rf `find $T/$REL -name CVS -o -name tmp` $T/$REL/{misc,rfc,doc/slides,doc/sl rm -rf $T/$REL $T/$DREL echo -n "OK? " read OK -echo Uploading to Atrey... -scp $T/$REL.tar.gz $T/$DREL.tar.gz atrey.karlin.mff.cuni.cz:~ftp/pub/bird/ echo Uploading to Trubka... scp $T/$REL.tar.gz $T/$DREL.tar.gz bird.network.cz:~ftp/pub/bird/ echo Done. diff --git a/tools/linuxdoc b/tools/linuxdoc index 51110e79..58f5cbc4 100755 --- a/tools/linuxdoc +++ b/tools/linuxdoc @@ -25,8 +25,13 @@ use FindBin; $prefix = "/usr"; $isoentities_prefix = "/usr"; $DataDir = "$FindBin::Bin/../doc/sbase"; -$AuxBinDir = "/usr/lib/linuxdoc-tools"; - +if (-d "/usr/lib/linuxdoc-tools") +{ + $AuxBinDir = "/usr/lib/linuxdoc-tools"; +} else +{ + $AuxBinDir = "/usr/bin"; +} use lib "$FindBin::Bin/linuxdoc-tools"; # ---------------------------------------------------------------------