From f8e273b5e7a3c721f4a30cf27a0b4fe54602e83f Mon Sep 17 00:00:00 2001 From: "Ondrej Zajicek (work)" Date: Mon, 14 Jun 2021 16:30:59 +0200 Subject: [PATCH 001/149] Nest: Fix export of tmpattrs through pipes In most cases of export there is no need to store back temporary attributes to rte, as receivers (protocols) access eattr list anyway. But pipe copies the original rte with old values, so we should store tmpattrs also during export. Thanks to Paul Donohue for the bugreport. --- nest/rt-table.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nest/rt-table.c b/nest/rt-table.c index 13209dd7..a7e31d85 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -618,6 +618,9 @@ export_filter_(struct channel *c, rte *rt0, rte **rt_free, linpool *pool, int si goto reject; } + /* Needed for pipes */ + rte_store_tmp_attrs(rt, pool, NULL); + accept: if (rt != rt0) *rt_free = rt; From 3ebabab2778d05212cc07ebccf583159d5e0890a Mon Sep 17 00:00:00 2001 From: "Ondrej Zajicek (work)" Date: Mon, 14 Jun 2021 17:58:37 +0200 Subject: [PATCH 002/149] Revert "Nest: Fix export of tmpattrs through pipes" This reverts commit f8e273b5e7a3c721f4a30cf27a0b4fe54602e83f. --- nest/rt-table.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/nest/rt-table.c b/nest/rt-table.c index a7e31d85..13209dd7 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -618,9 +618,6 @@ export_filter_(struct channel *c, rte *rt0, rte **rt_free, linpool *pool, int si goto reject; } - /* Needed for pipes */ - rte_store_tmp_attrs(rt, pool, NULL); - accept: if (rt != rt0) *rt_free = rt; From 1b9bf4e192a252db861acadc7f800d7046435a3f Mon Sep 17 00:00:00 2001 From: "Ondrej Zajicek (work)" Date: Mon, 14 Jun 2021 20:02:50 +0200 Subject: [PATCH 003/149] Nest: Fix export of tmpattrs through pipes Pipes copy the original rte with old values, so they require rte to be exported with stored tmpattrs. Other protocols access stored attributes using eattr list, so they require rte to be exported with expanded tmpattrs. This is temporary hack, we plan to remove whoe tmpattr mechanism. Thanks to Paul Donohue for the bugreport. --- nest/rt-table.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/nest/rt-table.c b/nest/rt-table.c index 13209dd7..390b3277 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -618,6 +618,12 @@ export_filter_(struct channel *c, rte *rt0, rte **rt_free, linpool *pool, int si goto reject; } +#ifdef CONFIG_PIPE + /* Pipes need rte with stored tmpattrs, remaining protocols need expanded tmpattrs */ + if (p->proto == &proto_pipe) + rte_store_tmp_attrs(rt, pool, NULL); +#endif + accept: if (rt != rt0) *rt_free = rt; From f761be6b30633054a54369eee7d08b951a366e5e Mon Sep 17 00:00:00 2001 From: "Ondrej Zajicek (work)" Date: Thu, 17 Jun 2021 16:56:51 +0200 Subject: [PATCH 004/149] Nest: Clean up main channel handling Remove assumption that main channel is the only channel. --- nest/protocol.h | 2 +- proto/ospf/config.Y | 5 ++--- proto/radv/config.Y | 1 + proto/radv/radv.c | 2 +- proto/rip/rip.c | 2 +- proto/rpki/rpki.c | 2 +- proto/static/static.c | 2 +- sysdep/unix/krt.c | 2 +- 8 files changed, 9 insertions(+), 9 deletions(-) diff --git a/nest/protocol.h b/nest/protocol.h index 48eb01d2..abcc505d 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -616,7 +616,7 @@ struct channel { struct channel_config *proto_cf_find_channel(struct proto_config *p, uint net_type); static inline struct channel_config *proto_cf_main_channel(struct proto_config *pc) -{ struct channel_config *cc = HEAD(pc->channels); return NODE_VALID(cc) ? cc : NULL; } +{ return proto_cf_find_channel(pc, pc->net_type); } struct channel *proto_find_channel_by_table(struct proto *p, struct rtable *t); struct channel *proto_find_channel_by_name(struct proto *p, const char *n); diff --git a/proto/ospf/config.Y b/proto/ospf/config.Y index fd2cfe8a..4b7d5a36 100644 --- a/proto/ospf/config.Y +++ b/proto/ospf/config.Y @@ -85,7 +85,7 @@ ospf_proto_finish(void) struct ospf_iface_patt *ic; /* Define default channel */ - if (EMPTY_LIST(this_proto->channels)) + if (! proto_cf_main_channel(this_proto)) { uint net_type = this_proto->net_type = ospf_cfg_is_v2() ? NET_IP4 : NET_IP6; channel_config_new(NULL, net_label[net_type], net_type, this_proto); @@ -248,8 +248,7 @@ ospf_channel_start: net_type ospf_af_mc $$ = this_channel = channel_config_get(NULL, net_label[$1], $1, this_proto); /* Save the multicast flag */ - if (this_channel == proto_cf_main_channel(this_proto)) - OSPF_CFG->af_mc = $2; + OSPF_CFG->af_mc = $2; }; ospf_channel: ospf_channel_start channel_opt_list channel_end; diff --git a/proto/radv/config.Y b/proto/radv/config.Y index dda9cfcd..8d4a3ab9 100644 --- a/proto/radv/config.Y +++ b/proto/radv/config.Y @@ -46,6 +46,7 @@ proto: radv_proto ; radv_proto_start: proto_start RADV { this_proto = proto_config_new(&proto_radv, $1); + this_proto->net_type = NET_IP6; init_list(&RADV_CFG->patt_list); init_list(&RADV_CFG->pref_list); diff --git a/proto/radv/radv.c b/proto/radv/radv.c index b4235917..66e8eb4b 100644 --- a/proto/radv/radv.c +++ b/proto/radv/radv.c @@ -564,7 +564,7 @@ radv_postconfig(struct proto_config *CF) // struct radv_config *cf = (void *) CF; /* Define default channel */ - if (EMPTY_LIST(CF->channels)) + if (! proto_cf_main_channel(CF)) channel_config_new(NULL, net_label[NET_IP6], NET_IP6, CF); } diff --git a/proto/rip/rip.c b/proto/rip/rip.c index 8b4719f7..e1a235a0 100644 --- a/proto/rip/rip.c +++ b/proto/rip/rip.c @@ -1105,7 +1105,7 @@ rip_postconfig(struct proto_config *CF) // struct rip_config *cf = (void *) CF; /* Define default channel */ - if (EMPTY_LIST(CF->channels)) + if (! proto_cf_main_channel(CF)) channel_config_new(NULL, net_label[CF->net_type], CF->net_type, CF); } diff --git a/proto/rpki/rpki.c b/proto/rpki/rpki.c index 799cb877..ab0837f3 100644 --- a/proto/rpki/rpki.c +++ b/proto/rpki/rpki.c @@ -923,7 +923,7 @@ rpki_postconfig(struct proto_config *CF) { /* Define default channel */ if (EMPTY_LIST(CF->channels)) - channel_config_new(NULL, net_label[CF->net_type], CF->net_type, CF); + cf_error("Channel not specified"); } static void diff --git a/proto/static/static.c b/proto/static/static.c index 661f1aac..2789c1bb 100644 --- a/proto/static/static.c +++ b/proto/static/static.c @@ -434,7 +434,7 @@ static_postconfig(struct proto_config *CF) struct static_config *cf = (void *) CF; struct static_route *r; - if (EMPTY_LIST(CF->channels)) + if (! proto_cf_main_channel(CF)) cf_error("Channel not specified"); struct channel_config *cc = proto_cf_main_channel(CF); diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c index ceb88563..7c2614b1 100644 --- a/sysdep/unix/krt.c +++ b/sysdep/unix/krt.c @@ -1013,7 +1013,7 @@ krt_postconfig(struct proto_config *CF) if (cf->c.class == SYM_TEMPLATE) return; - if (EMPTY_LIST(CF->channels)) + if (! proto_cf_main_channel(CF)) cf_error("Channel not specified"); #ifdef CONFIG_ALL_TABLES_AT_ONCE From 13225f1dbff54619476f2d8f6bc779dbb4983e3e Mon Sep 17 00:00:00 2001 From: "Ondrej Zajicek (work)" Date: Sun, 5 Apr 2020 03:24:46 +0200 Subject: [PATCH 005/149] Filter: Faster prefix sets Use 16-way (4bit) branching in prefix trie instead of basic binary branching. The change makes IPv4 prefix sets almost 3x faster, but with more memory consumption and much more complicated algorithm. Together with a previous filter change, it makes IPv4 prefix sets about ~4.3x faster and slightly smaller (on my test data). --- filter/data.h | 12 +- filter/test.conf | 6 + filter/trie.c | 287 ++++++++++++++++++++++++++++++++++------------- lib/birdlib.h | 3 + lib/ip.h | 17 ++- 5 files changed, 244 insertions(+), 81 deletions(-) diff --git a/filter/data.h b/filter/data.h index d296776d..21967deb 100644 --- a/filter/data.h +++ b/filter/data.h @@ -140,18 +140,22 @@ struct f_tree { void *data; }; +#define TRIE_STEP 4 + struct f_trie_node4 { ip4_addr addr, mask, accept; - uint plen; - struct f_trie_node4 *c[2]; + u16 plen; + u16 local; + struct f_trie_node4 *c[1 << TRIE_STEP]; }; struct f_trie_node6 { ip6_addr addr, mask, accept; - uint plen; - struct f_trie_node6 *c[2]; + u16 plen; + u16 local; + struct f_trie_node6 *c[1 << TRIE_STEP]; }; struct f_trie_node diff --git a/filter/test.conf b/filter/test.conf index 3a8804a1..1edcd64e 100644 --- a/filter/test.conf +++ b/filter/test.conf @@ -558,6 +558,12 @@ prefix set pxs; bt_assert(2000::/29 !~ pxs); bt_assert(1100::/10 !~ pxs); bt_assert(2010::/26 !~ pxs); + + pxs = [ 52E0::/13{13,128} ]; + bt_assert(52E7:BE81:379B:E6FD:541F:B0D0::/93 ~ pxs); + + pxs = [ 41D8:8718::/30{0,30}, 413A:99A8:6C00::/38{38,128} ]; + bt_assert(4180::/9 ~ pxs); } bt_test_suite(t_prefix6_set, "Testing prefix IPv6 sets"); diff --git a/filter/trie.c b/filter/trie.c index 1a4e1ac3..cf805afc 100644 --- a/filter/trie.c +++ b/filter/trie.c @@ -86,7 +86,10 @@ #define ipa_mkmask(x) ip6_mkmask(x) #define ipa_masklen(x) ip6_masklen(&x) #define ipa_pxlen(x,y) ip6_pxlen(x,y) -#define ipa_getbit(x,n) ip6_getbit(x,n) +#define ipa_getbit(a,p) ip6_getbit(a,p) +#define ipa_getbits(a,p,n) ip6_getbits(a,p,n) +#define ipa_setbits(a,p,n) ip6_setbits(a,p,n) +#define trie_local_mask(a,b,c) trie_local_mask6(a,b,c) #define ipt_from_ip4(x) _MI6(_I(x), 0, 0, 0) #define ipt_to_ip4(x) _MI4(_I0(x)) @@ -109,10 +112,11 @@ f_new_trie(linpool *lp, uint data_size) } static inline struct f_trie_node4 * -new_node4(struct f_trie *t, int plen, ip4_addr paddr, ip4_addr pmask, ip4_addr amask) +new_node4(struct f_trie *t, uint plen, uint local, ip4_addr paddr, ip4_addr pmask, ip4_addr amask) { struct f_trie_node4 *n = lp_allocz(t->lp, sizeof(struct f_trie_node4) + t->data_size); n->plen = plen; + n->local = local; n->addr = paddr; n->mask = pmask; n->accept = amask; @@ -120,10 +124,11 @@ new_node4(struct f_trie *t, int plen, ip4_addr paddr, ip4_addr pmask, ip4_addr a } static inline struct f_trie_node6 * -new_node6(struct f_trie *t, int plen, ip6_addr paddr, ip6_addr pmask, ip6_addr amask) +new_node6(struct f_trie *t, uint plen, uint local, ip6_addr paddr, ip6_addr pmask, ip6_addr amask) { struct f_trie_node6 *n = lp_allocz(t->lp, sizeof(struct f_trie_node6) + t->data_size); n->plen = plen; + n->local = local; n->addr = paddr; n->mask = pmask; n->accept = amask; @@ -131,24 +136,24 @@ new_node6(struct f_trie *t, int plen, ip6_addr paddr, ip6_addr pmask, ip6_addr a } static inline struct f_trie_node * -new_node(struct f_trie *t, int plen, ip_addr paddr, ip_addr pmask, ip_addr amask) +new_node(struct f_trie *t, uint plen, uint local, ip_addr paddr, ip_addr pmask, ip_addr amask) { if (t->ipv4) - return (struct f_trie_node *) new_node4(t, plen, ipt_to_ip4(paddr), ipt_to_ip4(pmask), ipt_to_ip4(amask)); + return (struct f_trie_node *) new_node4(t, plen, local, ipt_to_ip4(paddr), ipt_to_ip4(pmask), ipt_to_ip4(amask)); else - return (struct f_trie_node *) new_node6(t, plen, ipa_to_ip6(paddr), ipa_to_ip6(pmask), ipa_to_ip6(amask)); + return (struct f_trie_node *) new_node6(t, plen, local, ipa_to_ip6(paddr), ipa_to_ip6(pmask), ipa_to_ip6(amask)); } static inline void attach_node4(struct f_trie_node4 *parent, struct f_trie_node4 *child) { - parent->c[ip4_getbit(child->addr, parent->plen) ? 1 : 0] = child; + parent->c[ip4_getbits(child->addr, parent->plen, TRIE_STEP)] = child; } static inline void attach_node6(struct f_trie_node6 *parent, struct f_trie_node6 *child) { - parent->c[ip6_getbit(child->addr, parent->plen) ? 1 : 0] = child; + parent->c[ip6_getbits(child->addr, parent->plen, TRIE_STEP)] = child; } static inline void @@ -160,10 +165,139 @@ attach_node(struct f_trie_node *parent, struct f_trie_node *child, int v4) attach_node6(&parent->v6, &child->v6); } + +static inline uint +trie_local_mask4(ip4_addr px, uint plen, uint nlen) +{ + uint step = plen - nlen; + uint pos = (1u << step) + ip4_getbits(px, nlen, step); + return 1u << pos; +} + +static inline uint +trie_local_mask6(ip6_addr px, uint plen, uint nlen) +{ + uint step = plen - nlen; + uint pos = (1u << step) + ip6_getbits(px, nlen, step); + return 1u << pos; +} + +static inline uint +trie_amask_to_local(ip_addr px, ip_addr amask, uint nlen) +{ + uint local = 0; + + for (uint plen = MAX(nlen, 1); plen < (nlen + TRIE_STEP); plen++) + if (ipa_getbit(amask, plen - 1)) + local |= trie_local_mask(px, plen, nlen); + + return local; +} + #define GET_ADDR(N,F,X) ((X) ? ipt_from_ip4((N)->v4.F) : ipa_from_ip6((N)->v6.F)) #define SET_ADDR(N,F,X,V) ({ if (X) (N)->v4.F =ipt_to_ip4(V); else (N)->v6.F =ipa_to_ip6(V); }) +#define ADD_LOCAL(N,X,V) ({ uint v_ = (V); if (X) (N)->v4.local |= v_; else (N)->v6.local |= v_; }) + #define GET_CHILD(N,F,X,I) ((X) ? (struct f_trie_node *) (N)->v4.c[I] : (struct f_trie_node *) (N)->v6.c[I]) + + +static void * +trie_add_node(struct f_trie *t, uint plen, ip_addr px, uint local, uint l, uint h) +{ + uint l_ = l ? (l - 1) : 0; + ip_addr amask = (l_ < h) ? ipa_xor(ipa_mkmask(l_), ipa_mkmask(h)) : IPA_NONE; + ip_addr pmask = ipa_mkmask(plen); + ip_addr paddr = ipa_and(px, pmask); + struct f_trie_node *o = NULL; + struct f_trie_node *n = &t->root; + int v4 = t->ipv4; + + /* Add all bits for each active level (0x0002 0x000c 0x00f0 0xff00) */ + for (uint i = 0; i < TRIE_STEP; i++) + if ((l <= (plen + i)) && ((plen + i) <= h)) + local |= ((1u << (1u << i)) - 1) << (1u << i); + + DBG("Insert node %I/%u (%I %x)\n", paddr, plen, amask, local); + while (n) + { + ip_addr naddr = GET_ADDR(n, addr, v4); + ip_addr nmask = GET_ADDR(n, mask, v4); + ip_addr accept = GET_ADDR(n, accept, v4); + ip_addr cmask = ipa_and(nmask, pmask); + uint nlen = v4 ? n->v4.plen : n->v6.plen; + + DBG("Found node %I/%u (%I %x)\n", + naddr, nlen, accept, v4 ? n->v4.local : n->v6.local); + + if (ipa_compare(ipa_and(paddr, cmask), ipa_and(naddr, cmask))) + { + /* We are out of path - we have to add branching node 'b' + between node 'o' and node 'n', and attach new node 'a' + as the other child of 'b'. */ + int blen = ROUND_DOWN_POW2(ipa_pxlen(paddr, naddr), TRIE_STEP); + ip_addr bmask = ipa_mkmask(blen); + ip_addr baddr = ipa_and(px, bmask); + + /* Merge accept masks from children to get accept mask for node 'b' */ + ip_addr baccm = ipa_and(ipa_or(amask, accept), bmask); + uint bloc = trie_amask_to_local(naddr, accept, blen) | + trie_amask_to_local(paddr, amask, blen); + + struct f_trie_node *a = new_node(t, plen, local, paddr, pmask, amask); + struct f_trie_node *b = new_node(t, blen, bloc, baddr, bmask, baccm); + attach_node(o, b, v4); + attach_node(b, n, v4); + attach_node(b, a, v4); + + DBG("Case 1\n"); + return a; + } + + if (plen < nlen) + { + /* We add new node 'a' between node 'o' and node 'n' */ + amask = ipa_or(amask, ipa_and(accept, pmask)); + local |= trie_amask_to_local(naddr, accept, plen); + struct f_trie_node *a = new_node(t, plen, local, paddr, pmask, amask); + attach_node(o, a, v4); + attach_node(a, n, v4); + + DBG("Case 2\n"); + return a; + } + + if (plen == nlen) + { + /* We already found added node in trie. Just update accept and local mask */ + accept = ipa_or(accept, amask); + SET_ADDR(n, accept, v4, accept); + ADD_LOCAL(n, v4, local); + + DBG("Case 3\n"); + return n; + } + + /* Update accept mask part M2 and go deeper */ + accept = ipa_or(accept, ipa_and(amask, nmask)); + SET_ADDR(n, accept, v4, accept); + ADD_LOCAL(n, v4, trie_amask_to_local(paddr, amask, nlen)); + + DBG("Step %u\n", ipa_getbits(paddr, nlen)); + + /* n->plen < plen and plen <= 32 (128) */ + o = n; + n = GET_CHILD(n, c, v4, ipa_getbits(paddr, nlen, TRIE_STEP)); + } + + /* We add new tail node 'a' after node 'o' */ + struct f_trie_node *a = new_node(t, plen, local, paddr, pmask, amask); + attach_node(o, a, v4); + + DBG("Case 4\n"); + return a; +} + /** * trie_add_prefix * @t: trie to add to @@ -180,7 +314,6 @@ attach_node(struct f_trie_node *parent, struct f_trie_node *child, int v4) * a pointer to the root node is returned. Returns NULL when called with * mismatched IPv4/IPv6 net type. */ - void * trie_add_prefix(struct f_trie *t, const net_addr *net, uint l, uint h) { @@ -203,82 +336,66 @@ trie_add_prefix(struct f_trie *t, const net_addr *net, uint l, uint h) return NULL; } + DBG("\nInsert net %N (%u-%u)\n", net, l, h); + if (l == 0) t->zero = 1; - else - l--; if (h < plen) plen = h; - ip_addr amask = ipa_xor(ipa_mkmask(l), ipa_mkmask(h)); - ip_addr pmask = ipa_mkmask(plen); - ip_addr paddr = ipa_and(px, pmask); - struct f_trie_node *o = NULL; - struct f_trie_node *n = &t->root; + /* Primary node length, plen rounded down */ + uint nlen = ROUND_DOWN_POW2(plen, TRIE_STEP); - while (n) - { - ip_addr naddr = GET_ADDR(n, addr, v4); - ip_addr nmask = GET_ADDR(n, mask, v4); - ip_addr accept = GET_ADDR(n, accept, v4); - ip_addr cmask = ipa_and(nmask, pmask); - uint nlen = v4 ? n->v4.plen : n->v6.plen; + if (plen == nlen) + return trie_add_node(t, nlen, px, 0, l, h); - if (ipa_compare(ipa_and(paddr, cmask), ipa_and(naddr, cmask))) - { - /* We are out of path - we have to add branching node 'b' - between node 'o' and node 'n', and attach new node 'a' - as the other child of 'b'. */ - int blen = ipa_pxlen(paddr, naddr); - ip_addr bmask = ipa_mkmask(blen); - ip_addr baddr = ipa_and(px, bmask); + /* Secondary node length, plen rouned up */ + uint slen = nlen + TRIE_STEP; + void *node = NULL; - /* Merge accept masks from children to get accept mask for node 'b' */ - ip_addr baccm = ipa_and(ipa_or(amask, accept), bmask); + /* + * For unaligned prefix lengths it is more complicated. We need to encode + * matching prefixes of lengths from l to h. There are three cases of lengths: + * + * 1) 0..nlen are encoded by the accept mask of the primary node + * 2) nlen..(slen-1) are encoded by the local mask of the primary node + * 3) slen..max are encoded in secondary nodes + */ - struct f_trie_node *a = new_node(t, plen, paddr, pmask, amask); - struct f_trie_node *b = new_node(t, blen, baddr, bmask, baccm); - attach_node(o, b, v4); - attach_node(b, n, v4); - attach_node(b, a, v4); - return a; - } + if (l < slen) + { + uint local = 0; - if (plen < nlen) - { - /* We add new node 'a' between node 'o' and node 'n' */ - amask = ipa_or(amask, ipa_and(accept, pmask)); - struct f_trie_node *a = new_node(t, plen, paddr, pmask, amask); - attach_node(o, a, v4); - attach_node(a, n, v4); - return a; - } + /* Compute local bits for accepted nlen..(slen-1) prefixes */ + for (uint i = 0; i < TRIE_STEP; i++) + if ((l <= (nlen + i)) && ((nlen + i) <= h)) + { + uint pos = (1u << i) + ipa_getbits(px, nlen, i); + uint len = ((nlen + i) <= plen) ? 1 : (1u << (nlen + i - plen)); - if (plen == nlen) - { - /* We already found added node in trie. Just update accept mask */ - accept = ipa_or(accept, amask); - SET_ADDR(n, accept, v4, accept); - return n; - } + /* We need to fill 'len' bits starting at 'pos' position */ + local |= ((1u << len) - 1) << pos; + } - /* Update accept mask part M2 and go deeper */ - accept = ipa_or(accept, ipa_and(amask, nmask)); - SET_ADDR(n, accept, v4, accept); + /* Add the primary node */ + node = trie_add_node(t, nlen, px, local, l, nlen); + } - /* n->plen < plen and plen <= 32 (128) */ - o = n; - n = GET_CHILD(n, c, v4, ipa_getbit(paddr, nlen) ? 1 : 0); - } + if (slen <= h) + { + uint l2 = MAX(l, slen); + uint max = (1u << (slen - plen)); - /* We add new tail node 'a' after node 'o' */ - struct f_trie_node *a = new_node(t, plen, paddr, pmask, amask); - attach_node(o, a, v4); + /* Add secondary nodes */ + for (uint i = 0; i < max; i++) + node = trie_add_node(t, slen, ipa_setbits(px, slen - 1, i), 0, l2, h); + } - return a; + return node; } + static int trie_match_net4(const struct f_trie *t, ip4_addr px, uint plen) { @@ -289,6 +406,8 @@ trie_match_net4(const struct f_trie *t, ip4_addr px, uint plen) return t->zero; int plentest = plen - 1; + uint nlen = ROUND_DOWN_POW2(plen, TRIE_STEP); + uint local = trie_local_mask4(px, plen, nlen); const struct f_trie_node4 *n = &t->root.v4; while (n) @@ -299,6 +418,10 @@ trie_match_net4(const struct f_trie *t, ip4_addr px, uint plen) if (ip4_compare(ip4_and(paddr, cmask), ip4_and(n->addr, cmask))) return 0; + /* Check local mask */ + if ((n->plen == nlen) && (n->local & local)) + return 1; + /* Check accept mask */ if (ip4_getbit(n->accept, plentest)) return 1; @@ -308,7 +431,7 @@ trie_match_net4(const struct f_trie *t, ip4_addr px, uint plen) return 0; /* Choose children */ - n = n->c[(ip4_getbit(paddr, n->plen)) ? 1 : 0]; + n = n->c[ip4_getbits(paddr, n->plen, TRIE_STEP)]; } return 0; @@ -324,6 +447,8 @@ trie_match_net6(const struct f_trie *t, ip6_addr px, uint plen) return t->zero; int plentest = plen - 1; + uint nlen = ROUND_DOWN_POW2(plen, TRIE_STEP); + uint local = trie_local_mask6(px, plen, nlen); const struct f_trie_node6 *n = &t->root.v6; while (n) @@ -334,6 +459,10 @@ trie_match_net6(const struct f_trie *t, ip6_addr px, uint plen) if (ip6_compare(ip6_and(paddr, cmask), ip6_and(n->addr, cmask))) return 0; + /* Check local mask */ + if ((n->plen == nlen) && (n->local & local)) + return 1; + /* Check accept mask */ if (ip6_getbit(n->accept, plentest)) return 1; @@ -343,7 +472,7 @@ trie_match_net6(const struct f_trie *t, ip6_addr px, uint plen) return 0; /* Choose children */ - n = n->c[(ip6_getbit(paddr, n->plen)) ? 1 : 0]; + n = n->c[ip6_getbits(paddr, n->plen, TRIE_STEP)]; } return 0; @@ -392,7 +521,11 @@ trie_node_same4(const struct f_trie_node4 *t1, const struct f_trie_node4 *t2) (! ip4_equal(t1->accept, t2->accept))) return 0; - return trie_node_same4(t1->c[0], t2->c[0]) && trie_node_same4(t1->c[1], t2->c[1]); + for (uint i = 0; i < (1 << TRIE_STEP); i++) + if (! trie_node_same4(t1->c[i], t2->c[i])) + return 0; + + return 1; } static int @@ -409,7 +542,11 @@ trie_node_same6(const struct f_trie_node6 *t1, const struct f_trie_node6 *t2) (! ip6_equal(t1->accept, t2->accept))) return 0; - return trie_node_same6(t1->c[0], t2->c[0]) && trie_node_same6(t1->c[1], t2->c[1]); + for (uint i = 0; i < (1 << TRIE_STEP); i++) + if (! trie_node_same6(t1->c[i], t2->c[i])) + return 0; + + return 1; } /** @@ -440,8 +577,8 @@ trie_node_format4(const struct f_trie_node4 *t, buffer *buf) if (ip4_nonzero(t->accept)) buffer_print(buf, "%I4/%d{%I4}, ", t->addr, t->plen, t->accept); - trie_node_format4(t->c[0], buf); - trie_node_format4(t->c[1], buf); + for (uint i = 0; i < (1 << TRIE_STEP); i++) + trie_node_format4(t->c[i], buf); } static void @@ -453,8 +590,8 @@ trie_node_format6(const struct f_trie_node6 *t, buffer *buf) if (ip6_nonzero(t->accept)) buffer_print(buf, "%I6/%d{%I6}, ", t->addr, t->plen, t->accept); - trie_node_format6(t->c[0], buf); - trie_node_format6(t->c[1], buf); + for (uint i = 0; i < (1 << TRIE_STEP); i++) + trie_node_format6(t->c[i], buf); } /** diff --git a/lib/birdlib.h b/lib/birdlib.h index 431b7c0d..81d4908a 100644 --- a/lib/birdlib.h +++ b/lib/birdlib.h @@ -32,6 +32,9 @@ struct align_probe { char x; long int y; }; #define MAX(a,b) MAX_(a,b) #endif +#define ROUND_DOWN_POW2(a,b) ((a) & ~((b)-1)) +#define ROUND_UP_POW2(a,b) (((a)+((b)-1)) & ~((b)-1)) + #define U64(c) UINT64_C(c) #define ABS(a) ((a)>=0 ? (a) : -(a)) #define DELTA(a,b) (((a)>=(b))?(a)-(b):(b)-(a)) diff --git a/lib/ip.h b/lib/ip.h index 5b179acb..cc36ce64 100644 --- a/lib/ip.h +++ b/lib/ip.h @@ -280,10 +280,16 @@ static inline uint ip6_pxlen(ip6_addr a, ip6_addr b) } static inline u32 ip4_getbit(ip4_addr a, uint pos) -{ return _I(a) & (0x80000000 >> pos); } +{ return (_I(a) >> (31 - pos)) & 1; } + +static inline u32 ip4_getbits(ip4_addr a, uint pos, uint n) +{ return (_I(a) >> ((32 - n) - pos)) & ((1u << n) - 1); } static inline u32 ip6_getbit(ip6_addr a, uint pos) -{ return a.addr[pos / 32] & (0x80000000 >> (pos % 32)); } +{ return (a.addr[pos / 32] >> (31 - (pos % 32))) & 0x1; } + +static inline u32 ip6_getbits(ip6_addr a, uint pos, uint n) +{ return (a.addr[pos / 32] >> ((32 - n) - (pos % 32))) & ((1u << n) - 1); } static inline u32 ip4_setbit(ip4_addr *a, uint pos) { return _I(*a) |= (0x80000000 >> pos); } @@ -297,6 +303,13 @@ static inline u32 ip4_clrbit(ip4_addr *a, uint pos) static inline u32 ip6_clrbit(ip6_addr *a, uint pos) { return a->addr[pos / 32] &= ~(0x80000000 >> (pos % 32)); } +static inline ip4_addr ip4_setbits(ip4_addr a, uint pos, uint val) +{ _I(a) |= val << (31 - pos); return a; } + +static inline ip6_addr ip6_setbits(ip6_addr a, uint pos, uint val) +{ a.addr[pos / 32] |= val << (31 - pos % 32); return a; } + + static inline ip4_addr ip4_opposite_m1(ip4_addr a) { return _MI4(_I(a) ^ 1); } From 562a2b8c29a50cca5731b0a19e99a87a261ab4ef Mon Sep 17 00:00:00 2001 From: "Ondrej Zajicek (work)" Date: Sun, 5 Apr 2020 03:56:07 +0200 Subject: [PATCH 006/149] Filter: Fix trie test Generated prefixes must be valid. --- filter/trie_test.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/filter/trie_test.c b/filter/trie_test.c index b2b36716..5e931e4e 100644 --- a/filter/trie_test.c +++ b/filter/trie_test.c @@ -58,7 +58,8 @@ get_random_ip6_prefix(void) struct f_prefix p; u8 pxlen = xrandom(120)+8; ip6_addr ip6 = ip6_build(bt_random(),bt_random(),bt_random(),bt_random()); - net_addr_ip6 net6 = NET_ADDR_IP6(ip6, pxlen); + ip6_addr mask = ip6_mkmask(pxlen); + net_addr_ip6 net6 = NET_ADDR_IP6(ip6_and(ip6, mask), pxlen); p.net = *((net_addr*) &net6); @@ -87,7 +88,7 @@ generate_random_ipv6_prefixes(list *prefixes) struct f_prefix_node *px = calloc(1, sizeof(struct f_prefix_node)); px->prefix = f; - bt_debug("ADD\t" PRIip6 "/%d %d-%d\n", ARGip6(net6_prefix(&px->prefix.net)), px->prefix.net.pxlen, px->prefix.lo, px->prefix.hi); + bt_debug("ADD\t" PRIip6 "/%d{%d,%d}\n", ARGip6(net6_prefix(&px->prefix.net)), px->prefix.net.pxlen, px->prefix.lo, px->prefix.hi); add_tail(prefixes, &px->n); } } From dd61278c9db1d4bea29f0a21aa460c7fe931eb32 Mon Sep 17 00:00:00 2001 From: "Ondrej Zajicek (work)" Date: Mon, 6 Apr 2020 14:20:16 +0200 Subject: [PATCH 007/149] Filter: Update trie documentation --- filter/trie.c | 113 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 69 insertions(+), 44 deletions(-) diff --git a/filter/trie.c b/filter/trie.c index cf805afc..dbed5ace 100644 --- a/filter/trie.c +++ b/filter/trie.c @@ -1,7 +1,8 @@ /* * Filters: Trie for prefix sets * - * Copyright 2009 Ondrej Zajicek + * (c) 2009--2020 Ondrej Zajicek + * (c) 2009--2020 CZ.NIC z.s.p.o. * * Can be freely distributed and used under the terms of the GNU GPL. */ @@ -9,53 +10,68 @@ /** * DOC: Trie for prefix sets * - * We use a (compressed) trie to represent prefix sets. Every node - * in the trie represents one prefix (&addr/&plen) and &plen also - * indicates the index of the bit in the address that is used to - * branch at the node. If we need to represent just a set of - * prefixes, it would be simple, but we have to represent a - * set of prefix patterns. Each prefix pattern consists of - * &ppaddr/&pplen and two integers: &low and &high, and a prefix - * &paddr/&plen matches that pattern if the first MIN(&plen, &pplen) - * bits of &paddr and &ppaddr are the same and &low <= &plen <= &high. + * We use a (compressed) trie to represent prefix sets. Every node in the trie + * represents one prefix (&addr/&plen) and &plen also indicates the index of + * bits in the address that are used to branch at the node. Note that such + * prefix is not necessary a member of the prefix set, it is just a canonical + * prefix associated with a node. Prefix lengths of nodes are aligned to + * multiples of &TRIE_STEP (4) and there is 16-way branching in each + * node. Therefore, we say that a node is associated with a range of prefix + * lengths (&plen .. &plen + TRIE_STEP - 1). * - * We use a bitmask (&accept) to represent accepted prefix lengths - * at a node. As there are 33 prefix lengths (0..32 for IPv4), but - * there is just one prefix of zero length in the whole trie so we - * have &zero flag in &f_trie (indicating whether the trie accepts - * prefix 0.0.0.0/0) as a special case, and &accept bitmask + * The prefix set is not just a set of prefixes, it is defined by a set of + * prefix patterns. Each prefix pattern consists of &ppaddr/&pplen and two + * integers: &low and &high. The tested prefix &paddr/&plen matches that pattern + * if the first MIN(&plen, &pplen) bits of &paddr and &ppaddr are the same and + * &low <= &plen <= &high. + * + * There are two ways to represent accepted prefixes for a node. First, there is + * a bitmask &local, which represents independently all 15 prefixes that extend + * the canonical prefix of the node and are within a range of prefix lengths + * associated with the node. E.g., for node 10.0.0.0/8 they are 10.0.0.0/8, + * 10.0.0.0/9, 10.128.0.0/9, .. 10.224.0.0/11. This order (first by length, then + * lexicographically) is used for indexing the bitmask &local, starting at + * position 1. I.e., index is 2^(plen - base) + offset within the same length, + * see function trie_local_mask6() for details. + * + * Second, we use a bitmask &accept to represent accepted prefix lengths at a + * node. The bit is set means that all prefixes of given length that are either + * subprefixes or superprefixes of the canonical prefix are accepted. As there + * are 33 prefix lengths (0..32 for IPv4), but there is just one prefix of zero + * length in the whole trie so we have &zero flag in &f_trie (indicating whether + * the trie accepts prefix 0.0.0.0/0) as a special case, and &accept bitmask * represents accepted prefix lengths from 1 to 32. * - * There are two cases in prefix matching - a match when the length - * of the prefix is smaller that the length of the prefix pattern, - * (&plen < &pplen) and otherwise. The second case is simple - we - * just walk through the trie and look at every visited node - * whether that prefix accepts our prefix length (&plen). The - * first case is tricky - we don't want to examine every descendant - * of a final node, so (when we create the trie) we have to propagate - * that information from nodes to their ascendants. + * One complication is handling of prefix patterns with unaligned prefix length. + * When such pattern is to be added, we add a primary node above (with rounded + * down prefix length &nlen) and a set of secondary nodes below (with rounded up + * prefix lengths &slen). Accepted prefix lengths of the original prefix pattern + * are then represented in different places based on their lengths. For prefixes + * shorter than &nlen, it is &accept bitmask of the primary node, for prefixes + * between &nlen and &slen - 1 it is &local bitmask of the primary node, and for + * prefixes longer of equal &slen it is &accept bitmasks of secondary nodes. * - * Suppose that we have two masks (M1 and M2) for a node. Mask M1 - * represents accepted prefix lengths by just the node and mask M2 - * represents accepted prefix lengths by the node or any of its - * descendants. Therefore M2 is a bitwise or of M1 and children's - * M2 and this is a maintained invariant during trie building. - * Basically, when we want to match a prefix, we walk through the trie, - * check mask M1 for our prefix length and when we came to - * final node, we check mask M2. + * There are two cases in prefix matching - a match when the length of the + * prefix is smaller that the length of the prefix pattern, (&plen < &pplen) and + * otherwise. The second case is simple - we just walk through the trie and look + * at every visited node whether that prefix accepts our prefix length (&plen). + * The first case is tricky - we do not want to examine every descendant of a + * final node, so (when we create the trie) we have to propagate that + * information from nodes to their ascendants. * - * There are two differences in the real implementation. First, - * we use a compressed trie so there is a case that we skip our - * final node (if it is not in the trie) and we came to node that - * is either extension of our prefix, or completely out of path - * In the first case, we also have to check M2. + * There are two kinds of propagations - propagation from child's &accept + * bitmask to parent's &accept bitmask, and propagation from child's &accept + * bitmask to parent's &local bitmask. The first kind is simple - as all + * superprefixes of a parent are also all superprefixes of appropriate length of + * a child, then we can just add (by bitwise or) a child &accept mask masked by + * parent prefix length mask to the parent &accept mask. This handles prefixes + * shorter than node &plen. * - * Second, we really need not to maintain two separate bitmasks. - * Checks for mask M1 are always larger than &applen and we need - * just the first &pplen bits of mask M2 (if trie compression - * hadn't been used it would suffice to know just $applen-th bit), - * so we have to store them together in &accept mask - the first - * &pplen bits of mask M2 and then mask M1. + * The second kind of propagation is necessary to handle superprefixes of a + * child that are represented by parent &local mask - that are in the range of + * prefix lengths associated with the parent. For each accepted (by child + * &accept mask) prefix length from that range, we need to set appropriate bit + * in &local mask. See function trie_amask_to_local() for details. * * There are four cases when we walk through a trie: * @@ -65,8 +81,7 @@ * - we are beyond the end of path (node length > &plen) * - we are still on path and keep walking (node length < &plen) * - * The walking code in trie_match_prefix() is structured according to - * these cases. + * The walking code in trie_match_net() is structured according to these cases. */ #include "nest/bird.h" @@ -166,6 +181,10 @@ attach_node(struct f_trie_node *parent, struct f_trie_node *child, int v4) } +/* + * Compute appropriate mask representing prefix px/plen in local bitmask of node + * with prefix length nlen. Assuming that nlen <= plen < (nlen + TRIE_STEP). + */ static inline uint trie_local_mask4(ip4_addr px, uint plen, uint nlen) { @@ -182,6 +201,12 @@ trie_local_mask6(ip6_addr px, uint plen, uint nlen) return 1u << pos; } +/* + * Compute an appropriate local mask (for a node with prefix length nlen) + * representing prefixes of px that are accepted by amask and fall within the + * range associated with that node. Used for propagation of child accept mask + * to parent local mask. + */ static inline uint trie_amask_to_local(ip_addr px, ip_addr amask, uint nlen) { From e709dc09e61c4604821d10b81604d38616b81a0b Mon Sep 17 00:00:00 2001 From: "Ondrej Zajicek (work)" Date: Tue, 21 Apr 2020 13:49:29 +0200 Subject: [PATCH 008/149] Filter: Improve prefix trie tests Add tests explicitly matching insides and outsides of trie and update tests to do testing of both IPv4 and IPv6 tries. --- filter/trie_test.c | 402 +++++++++++++++++++++++++++++++++++---------- test/birdtest.c | 6 + test/birdtest.h | 2 + 3 files changed, 323 insertions(+), 87 deletions(-) diff --git a/filter/trie_test.c b/filter/trie_test.c index 5e931e4e..6418427e 100644 --- a/filter/trie_test.c +++ b/filter/trie_test.c @@ -14,7 +14,7 @@ #include "conf/conf.h" #define TESTS_NUM 10 -#define PREFIXES_NUM 10 +#define PREFIXES_NUM 32 #define PREFIX_TESTS_NUM 10000 #define BIG_BUFFER_SIZE 10000 @@ -31,145 +31,371 @@ xrandom(u32 max) return (bt_random() % max); } +static inline uint +get_exp_random(void) +{ + uint r, n = 0; + + for (r = bt_random(); r & 1; r = r >> 1) + n++; + + return n; +} + +static inline int +matching_ip4_nets(const net_addr_ip4 *a, const net_addr_ip4 *b) +{ + ip4_addr cmask = ip4_mkmask(MIN(a->pxlen, b->pxlen)); + return ip4_compare(ip4_and(a->prefix, cmask), ip4_and(b->prefix, cmask)) == 0; +} + +static inline int +matching_ip6_nets(const net_addr_ip6 *a, const net_addr_ip6 *b) +{ + ip6_addr cmask = ip6_mkmask(MIN(a->pxlen, b->pxlen)); + return ip6_compare(ip6_and(a->prefix, cmask), ip6_and(b->prefix, cmask)) == 0; +} + +static inline int +matching_nets(const net_addr *a, const net_addr *b) +{ + if (a->type != b->type) + return 0; + + return (a->type == NET_IP4) ? + matching_ip4_nets((const net_addr_ip4 *) a, (const net_addr_ip4 *) b) : + matching_ip6_nets((const net_addr_ip6 *) a, (const net_addr_ip6 *) b); +} + static int -is_prefix_included(list *prefixes, struct f_prefix *needle) +is_prefix_included(list *prefixes, const net_addr *needle) { struct f_prefix_node *n; WALK_LIST(n, *prefixes) - { - ip6_addr cmask = ip6_mkmask(MIN(n->prefix.net.pxlen, needle->net.pxlen)); - - ip6_addr ip = net6_prefix(&n->prefix.net); - ip6_addr needle_ip = net6_prefix(&needle->net); - - if ((ipa_compare(ipa_and(ip, cmask), ipa_and(needle_ip, cmask)) == 0) && - (n->prefix.lo <= needle->net.pxlen) && (needle->net.pxlen <= n->prefix.hi)) + if (matching_nets(&n->prefix.net, needle) && + (n->prefix.lo <= needle->pxlen) && (needle->pxlen <= n->prefix.hi)) { - bt_debug("FOUND\t" PRIip6 "/%d %d-%d\n", ARGip6(net6_prefix(&n->prefix.net)), n->prefix.net.pxlen, n->prefix.lo, n->prefix.hi); + char buf[64]; + bt_format_net(buf, 64, &n->prefix.net); + bt_debug("FOUND %s %d-%d\n", buf, n->prefix.lo, n->prefix.hi); + return 1; /* OK */ } - } + return 0; /* FAIL */ } -static struct f_prefix -get_random_ip6_prefix(void) +static void +get_random_net(net_addr *net, int v6) { - struct f_prefix p; - u8 pxlen = xrandom(120)+8; - ip6_addr ip6 = ip6_build(bt_random(),bt_random(),bt_random(),bt_random()); - ip6_addr mask = ip6_mkmask(pxlen); - net_addr_ip6 net6 = NET_ADDR_IP6(ip6_and(ip6, mask), pxlen); - - p.net = *((net_addr*) &net6); - - if (bt_random() % 2) + if (!v6) { - p.lo = 0; - p.hi = p.net.pxlen; + uint pxlen = xrandom(24)+8; + ip4_addr ip4 = ip4_from_u32((u32) bt_random()); + net_fill_ip4(net, ip4_and(ip4, ip4_mkmask(pxlen)), pxlen); } else { - p.lo = p.net.pxlen; - p.hi = net_max_prefix_length[p.net.type]; + uint pxlen = xrandom(120)+8; + ip6_addr ip6 = ip6_build(bt_random(), bt_random(), bt_random(), bt_random()); + net_fill_ip6(net, ip6_and(ip6, ip6_mkmask(pxlen)), pxlen); } - - return p; } static void -generate_random_ipv6_prefixes(list *prefixes) +get_random_prefix(struct f_prefix *px, int v6) { - int i; - for (i = 0; i < PREFIXES_NUM; i++) + get_random_net(&px->net, v6); + + if (bt_random() % 2) { - struct f_prefix f = get_random_ip6_prefix(); - - struct f_prefix_node *px = calloc(1, sizeof(struct f_prefix_node)); - px->prefix = f; - - bt_debug("ADD\t" PRIip6 "/%d{%d,%d}\n", ARGip6(net6_prefix(&px->prefix.net)), px->prefix.net.pxlen, px->prefix.lo, px->prefix.hi); - add_tail(prefixes, &px->n); + px->lo = 0; + px->hi = px->net.pxlen; + } + else + { + px->lo = px->net.pxlen; + px->hi = net_max_prefix_length[px->net.type]; } } +static void +get_random_ip4_subnet(net_addr_ip4 *net, const net_addr_ip4 *src, int pxlen) +{ + *net = NET_ADDR_IP4(ip4_and(src->prefix, ip4_mkmask(pxlen)), pxlen); + + if (pxlen > src->pxlen) + { + ip4_addr rnd = ip4_from_u32((u32) bt_random()); + ip4_addr mask = ip4_xor(ip4_mkmask(src->pxlen), ip4_mkmask(pxlen)); + net->prefix = ip4_or(net->prefix, ip4_and(rnd, mask)); + } +} + +static void +get_random_ip6_subnet(net_addr_ip6 *net, const net_addr_ip6 *src, int pxlen) +{ + *net = NET_ADDR_IP6(ip6_and(src->prefix, ip6_mkmask(pxlen)), pxlen); + + if (pxlen > src->pxlen) + { + ip6_addr rnd = ip6_build(bt_random(), bt_random(), bt_random(), bt_random()); + ip6_addr mask = ip6_xor(ip6_mkmask(src->pxlen), ip6_mkmask(pxlen)); + net->prefix = ip6_or(net->prefix, ip6_and(rnd, mask)); + } +} + +static void +get_random_subnet(net_addr *net, const net_addr *src, int pxlen) +{ + if (src->type == NET_IP4) + get_random_ip4_subnet((net_addr_ip4 *) net, (const net_addr_ip4 *) src, pxlen); + else + get_random_ip6_subnet((net_addr_ip6 *) net, (const net_addr_ip6 *) src, pxlen); +} + +static void +get_inner_net(net_addr *net, const struct f_prefix *src) +{ + int pxlen, step; + + if (bt_random() % 2) + { + step = get_exp_random(); + step = MIN(step, src->hi - src->lo); + pxlen = (bt_random() % 2) ? (src->lo + step) : (src->hi - step); + } + else + pxlen = src->lo + bt_random() % (src->hi - src->lo + 1); + + get_random_subnet(net, &src->net, pxlen); +} + +static void +swap_random_bits_ip4(net_addr_ip4 *net, int num) +{ + for (int i = 0; i < num; i++) + { + ip4_addr swap = IP4_NONE; + ip4_setbit(&swap, bt_random() % net->pxlen); + net->prefix = ip4_xor(net->prefix, swap); + } +} + +static void +swap_random_bits_ip6(net_addr_ip6 *net, int num) +{ + for (int i = 0; i < num; i++) + { + ip6_addr swap = IP6_NONE; + ip6_setbit(&swap, bt_random() % net->pxlen); + net->prefix = ip6_xor(net->prefix, swap); + } +} + +static void +swap_random_bits(net_addr *net, int num) +{ + if (net->type == NET_IP4) + swap_random_bits_ip4((net_addr_ip4 *) net, num); + else + swap_random_bits_ip6((net_addr_ip6 *) net, num); +} + +static void +get_outer_net(net_addr *net, const struct f_prefix *src) +{ + int pxlen, step; + int inside = 0; + int max = net_max_prefix_length[src->net.type]; + + if ((src->lo > 0) && (bt_random() % 3)) + { + step = 1 + get_exp_random(); + step = MIN(step, src->lo); + pxlen = src->lo - step; + } + else if ((src->hi < max) && (bt_random() % 2)) + { + step = 1 + get_exp_random(); + step = MIN(step, max - src->hi); + pxlen = src->hi + step; + } + else + { + pxlen = src->lo + bt_random() % (src->hi - src->lo + 1); + inside = 1; + } + + get_random_subnet(net, &src->net, pxlen); + + /* Perhaps swap some bits in prefix */ + if ((net->pxlen > 0) && (inside || (bt_random() % 4))) + swap_random_bits(net, 1 + get_exp_random()); +} + +static list * +make_random_prefix_list(linpool *lp, int num, int v6) +{ + list *prefixes = lp_allocz(lp, sizeof(struct f_prefix_node)); + init_list(prefixes); + + for (int i = 0; i < num; i++) + { + struct f_prefix_node *px = lp_allocz(lp, sizeof(struct f_prefix_node)); + get_random_prefix(&px->prefix, v6); + add_tail(prefixes, &px->n); + + char buf[64]; + bt_format_net(buf, 64, &px->prefix.net); + bt_debug("ADD %s{%d,%d}\n", buf, px->prefix.lo, px->prefix.hi); + } + + return prefixes; +} + +static struct f_trie * +make_trie_from_prefix_list(linpool *lp, list *prefixes) +{ + struct f_trie *trie = f_new_trie(lp, 0); + + struct f_prefix_node *n; + WALK_LIST(n, *prefixes) + trie_add_prefix(trie, &n->prefix.net, n->prefix.lo, n->prefix.hi); + + return trie; +} + +static void +test_match_net(list *prefixes, struct f_trie *trie, const net_addr *net) +{ + char buf[64]; + bt_format_net(buf, 64, net); + bt_debug("TEST %s\n", buf); + + int should_be = is_prefix_included(prefixes, net); + int is_there = trie_match_net(trie, net); + + bt_assert_msg(should_be == is_there, "Prefix %s %s match", buf, + (should_be ? "should" : "should not")); +} + static int -t_match_net(void) +t_match_random_net(void) { bt_bird_init(); bt_config_parse(BT_CONFIG_SIMPLE); - uint round; - for (round = 0; round < TESTS_NUM; round++) + int v6 = 0; + linpool *lp = lp_new_default(&root_pool); + for (int round = 0; round < TESTS_NUM; round++) { - list prefixes; /* of structs f_extended_prefix */ - init_list(&prefixes); - struct f_trie *trie = f_new_trie(config->mem, 0); + list *prefixes = make_random_prefix_list(lp, PREFIXES_NUM, v6); + struct f_trie *trie = make_trie_from_prefix_list(lp, prefixes); - generate_random_ipv6_prefixes(&prefixes); - struct f_prefix_node *n; - WALK_LIST(n, prefixes) + for (int i = 0; i < PREFIX_TESTS_NUM; i++) { - trie_add_prefix(trie, &n->prefix.net, n->prefix.lo, n->prefix.hi); + net_addr net; + get_random_net(&net, v6); + test_match_net(prefixes, trie, &net); } - int i; - for (i = 0; i < PREFIX_TESTS_NUM; i++) - { - struct f_prefix f = get_random_ip6_prefix(); - bt_debug("TEST\t" PRIip6 "/%d\n", ARGip6(net6_prefix(&f.net)), f.net.pxlen); - - int should_be = is_prefix_included(&prefixes, &f); - int is_there = trie_match_net(trie, &f.net); - bt_assert_msg(should_be == is_there, "Prefix " PRIip6 "/%d %s", ARGip6(net6_prefix(&f.net)), f.net.pxlen, (should_be ? "should be found in trie" : "should not be found in trie")); - } - - struct f_prefix_node *nxt; - WALK_LIST_DELSAFE(n, nxt, prefixes) - { - free(n); - } + v6 = !v6; + lp_flush(lp); } bt_bird_cleanup(); return 1; } +static int +t_match_inner_net(void) +{ + bt_bird_init(); + bt_config_parse(BT_CONFIG_SIMPLE); + + int v6 = 0; + linpool *lp = lp_new_default(&root_pool); + for (int round = 0; round < TESTS_NUM; round++) + { + list *prefixes = make_random_prefix_list(lp, PREFIXES_NUM, v6); + struct f_trie *trie = make_trie_from_prefix_list(lp, prefixes); + + struct f_prefix_node *n = HEAD(*prefixes); + for (int i = 0; i < PREFIX_TESTS_NUM; i++) + { + net_addr net; + get_inner_net(&net, &n->prefix); + test_match_net(prefixes, trie, &net); + + n = NODE_VALID(NODE_NEXT(n)) ? NODE_NEXT(n) : HEAD(*prefixes); + } + + v6 = !v6; + lp_flush(lp); + } + + bt_bird_cleanup(); + return 1; +} + +static int +t_match_outer_net(void) +{ + bt_bird_init(); + bt_config_parse(BT_CONFIG_SIMPLE); + + int v6 = 0; + linpool *lp = lp_new_default(&root_pool); + for (int round = 0; round < TESTS_NUM; round++) + { + list *prefixes = make_random_prefix_list(lp, PREFIXES_NUM, v6); + struct f_trie *trie = make_trie_from_prefix_list(lp, prefixes); + + struct f_prefix_node *n = HEAD(*prefixes); + for (int i = 0; i < PREFIX_TESTS_NUM; i++) + { + net_addr net; + get_outer_net(&net, &n->prefix); + test_match_net(prefixes, trie, &net); + + n = NODE_VALID(NODE_NEXT(n)) ? NODE_NEXT(n) : HEAD(*prefixes); + } + + v6 = !v6; + lp_flush(lp); + } + + v6 = !v6; + bt_bird_cleanup(); + return 1; +} + static int t_trie_same(void) { bt_bird_init(); bt_config_parse(BT_CONFIG_SIMPLE); - int round; - for (round = 0; round < TESTS_NUM*4; round++) + int v6 = 0; + linpool *lp = lp_new_default(&root_pool); + for (int round = 0; round < TESTS_NUM*4; round++) { - struct f_trie * trie1 = f_new_trie(config->mem, 0); - struct f_trie * trie2 = f_new_trie(config->mem, 0); - - list prefixes; /* a list of f_extended_prefix structures */ - init_list(&prefixes); - int i; - for (i = 0; i < 100; i++) - generate_random_ipv6_prefixes(&prefixes); + list *prefixes = make_random_prefix_list(lp, 100 * PREFIXES_NUM, v6); + struct f_trie *trie1 = f_new_trie(lp, 0); + struct f_trie *trie2 = f_new_trie(lp, 0); struct f_prefix_node *n; - WALK_LIST(n, prefixes) - { + WALK_LIST(n, *prefixes) trie_add_prefix(trie1, &n->prefix.net, n->prefix.lo, n->prefix.hi); - } - WALK_LIST_BACKWARDS(n, prefixes) - { + + WALK_LIST_BACKWARDS(n, *prefixes) trie_add_prefix(trie2, &n->prefix.net, n->prefix.lo, n->prefix.hi); - } bt_assert(trie_same(trie1, trie2)); - struct f_prefix_node *nxt; - WALK_LIST_DELSAFE(n, nxt, prefixes) - { - free(n); - } + v6 = !v6; + lp_flush(lp); } return 1; @@ -180,7 +406,9 @@ main(int argc, char *argv[]) { bt_init(argc, argv); - bt_test_suite(t_match_net, "Testing random prefix matching"); + bt_test_suite(t_match_random_net, "Testing random prefix matching"); + bt_test_suite(t_match_inner_net, "Testing random inner prefix matching"); + bt_test_suite(t_match_outer_net, "Testing random outer prefix matching"); bt_test_suite(t_trie_same, "A trie filled forward should be same with a trie filled backward."); return bt_exit_value(); diff --git a/test/birdtest.c b/test/birdtest.c index a1da078f..d739e78b 100644 --- a/test/birdtest.c +++ b/test/birdtest.c @@ -501,6 +501,12 @@ bt_fmt_ipa(char *buf, size_t size, const void *data) bsnprintf(buf, size, "(null)"); } +void +bt_format_net(char *buf, size_t size, const void *data) +{ + bsnprintf(buf, size, "%N", (const net_addr *) data); +} + int bt_is_char(byte c) { diff --git a/test/birdtest.h b/test/birdtest.h index caec529b..7a0c2fc4 100644 --- a/test/birdtest.h +++ b/test/birdtest.h @@ -165,6 +165,8 @@ struct bt_batch { void bt_fmt_str(char *buf, size_t size, const void *data); void bt_fmt_unsigned(char *buf, size_t size, const void *data); void bt_fmt_ipa(char *buf, size_t size, const void *data); +void bt_format_net(char *buf, size_t size, const void *data); + int bt_assert_batch__(struct bt_batch *opts); int bt_is_char(byte c); From 067f69a56de0e0e61d423ec5aa68095aa28e3124 Mon Sep 17 00:00:00 2001 From: "Ondrej Zajicek (work)" Date: Sat, 25 Sep 2021 16:00:30 +0200 Subject: [PATCH 009/149] Filter: Add prefix trie benchmarks Add trie tests intended as benchmarks that use external datasets instead of generated prefixes. As datasets are not included, they are commented out by default. --- filter/trie_test.c | 235 +++++++++++++++++++++++++++++++++++++++++++++ test/birdtest.c | 6 ++ test/birdtest.h | 1 + 3 files changed, 242 insertions(+) diff --git a/filter/trie_test.c b/filter/trie_test.c index 6418427e..3e8ce84d 100644 --- a/filter/trie_test.c +++ b/filter/trie_test.c @@ -16,7 +16,10 @@ #define TESTS_NUM 10 #define PREFIXES_NUM 32 #define PREFIX_TESTS_NUM 10000 +#define PREFIX_BENCH_NUM 100000000 +#define TRIE_BUFFER_SIZE 1024 +#define TEST_BUFFER_SIZE (1024*1024) #define BIG_BUFFER_SIZE 10000 /* Wrapping structure for storing f_prefixes structures in list */ @@ -266,6 +269,142 @@ make_trie_from_prefix_list(linpool *lp, list *prefixes) return trie; } +/* + * Read sequence of prefixes from file handle and return prefix list. + * Each prefix is on one line, sequence terminated by empty line or eof. + * Arg @plus means prefix should include all longer ones. + */ +static list * +read_prefix_list(linpool *lp, FILE *f, int v6, int plus) +{ + ASSERT(!v6); + + uint a0, a1, a2, a3, pl; + char s[32]; + int n; + + list *pxlist = lp_allocz(lp, sizeof(struct f_prefix_node)); + init_list(pxlist); + + errno = 0; + while (fgets(s, 32, f)) + { + if (s[0] == '\n') + return pxlist; + + n = sscanf(s, "%u.%u.%u.%u/%u", &a0, &a1, &a2, &a3, &pl); + + if (n != 5) + bt_abort_msg("Invalid content of trie_data"); + + struct f_prefix_node *px = lp_allocz(lp, sizeof(struct f_prefix_node)); + net_fill_ip4(&px->prefix.net, ip4_build(a0, a1, a2, a3), pl); + px->prefix.lo = pl; + px->prefix.hi = plus ? IP4_MAX_PREFIX_LENGTH : pl; + add_tail(pxlist, &px->n); + + char buf[64]; + bt_format_net(buf, 64, &px->prefix.net); + bt_debug("ADD %s{%d,%d}\n", buf, px->prefix.lo, px->prefix.hi); + } + + bt_syscall(errno, "fgets()"); + return EMPTY_LIST(*pxlist) ? NULL : pxlist; +} + +/* + * Open file, read multiple sequences of prefixes from it. Fill @data with + * prefix lists and @trie with generated tries. Return number of sequences / + * tries. Use separate linpool @lp0 for prefix lists and @lp1 for tries. + * Arg @plus means prefix should include all longer ones. + */ +static int +read_prefix_file(const char *filename, int plus, + linpool *lp0, linpool *lp1, + list *data[], struct f_trie *trie[]) +{ + FILE *f = fopen(filename, "r"); + bt_syscall(!f, "fopen(%s)", filename); + + int n = 0; + list *pxlist; + while (pxlist = read_prefix_list(lp0, f, 0, plus)) + { + data[n] = pxlist; + trie[n] = make_trie_from_prefix_list(lp1, pxlist); + bt_debug("NEXT\n"); + n++; + } + + fclose(f); + bt_debug("DONE reading %d tries\n", n); + + return n; +} + +/* + * Select random subset of @dn prefixes from prefix list @src of length @sn, + * and store them to buffer @dst (of size @dn). Prefixes may be chosen multiple + * times. Randomize order of prefixes in @dst buffer. + */ +static void +select_random_prefix_subset(list *src[], net_addr dst[], int sn, int dn) +{ + int pn = 0; + + if (!dn) + return; + + /* Compute total prefix number */ + for (int i = 0; i < sn; i++) + pn += list_length(src[i]); + + /* Change of selecting a prefix */ + int rnd = (pn / dn) + 10; + int n = 0; + + /* Iterate indefinitely over src array */ + for (int i = 0; 1; i++, i = (i < sn) ? i : 0) + { + struct f_prefix_node *px; + WALK_LIST(px, *src[i]) + { + if (xrandom(rnd) != 0) + continue; + + net_copy(&dst[n], &px->prefix.net); + n++; + + /* We have enough */ + if (n == dn) + goto done; + } + } + +done: + /* Shuffle networks */ + for (int i = 0; i < dn; i++) + { + int j = xrandom(dn); + + if (i == j) + continue; + + net_addr tmp; + net_copy(&tmp, &dst[i]); + net_copy(&dst[i], &dst[j]); + net_copy(&dst[j], &tmp); + } +} + +/* Fill @dst buffer with @dn randomly generated /32 prefixes */ +static void +make_random_addresses(net_addr dst[], int dn) +{ + for (int i = 0; i < dn; i++) + net_fill_ip4(&dst[i], ip4_from_u32((u32) bt_random()), IP4_MAX_PREFIX_LENGTH); +} + static void test_match_net(list *prefixes, struct f_trie *trie, const net_addr *net) { @@ -371,6 +510,99 @@ t_match_outer_net(void) return 1; } +/* + * Read prefixes from @filename, build set of tries, prepare test data and do + * PREFIX_BENCH_NUM trie lookups. With @plus = 0, use random subset of known + * prefixes as test data, with @plus = 1, use randomly generated /32 prefixes + * as test data. + */ +static int +benchmark_trie_dataset(const char *filename, int plus) +{ + int n = 0; + linpool *lp0 = lp_new_default(&root_pool); + linpool *lp1 = lp_new_default(&root_pool); + list *data[TRIE_BUFFER_SIZE]; + struct f_trie *trie[TRIE_BUFFER_SIZE]; + net_addr *nets; + + bt_reset_suite_case_timer(); + bt_log_suite_case_result(1, "Reading %s", filename, n); + n = read_prefix_file(filename, plus, lp0, lp1, data, trie); + bt_log_suite_case_result(1, "Read prefix data, %d lists, ", n); + + size_t trie_size = rmemsize(lp1) * 1000 / (1024*1024); + bt_log_suite_case_result(1, "Trie size %u.%03u MB", + (uint) (trie_size / 1000), (uint) (trie_size % 1000)); + + int t = PREFIX_BENCH_NUM / n; + int tb = MIN(t, TEST_BUFFER_SIZE); + nets = lp_alloc(lp0, tb * sizeof(net_addr)); + + if (!plus) + select_random_prefix_subset(data, nets, n, tb); + else + make_random_addresses(nets, tb); + + bt_log_suite_case_result(1, "Make test data, %d (%d) tests", t, tb); + bt_reset_suite_case_timer(); + + /* + int match = 0; + for (int i = 0; i < t; i++) + for (int j = 0; j < n; j++) + test_match_net(data[j], trie[j], &nets[i]); + */ + + int match = 0; + for (int i = 0; i < t; i++) + for (int j = 0; j < n; j++) + if (trie_match_net(trie[j], &nets[i % TEST_BUFFER_SIZE])) + match++; + + bt_log_suite_case_result(1, "Matching done, %d / %d matches", match, t * n); + + rfree(lp0); + rfree(lp1); + + return 1; +} + +static int UNUSED +t_bench_trie_datasets_subset(void) +{ + bt_bird_init(); + bt_config_parse(BT_CONFIG_SIMPLE); + + /* Specific datasets, not included */ + benchmark_trie_dataset("trie-data-bgp-1", 0); + benchmark_trie_dataset("trie-data-bgp-10", 0); + benchmark_trie_dataset("trie-data-bgp-100", 0); + benchmark_trie_dataset("trie-data-bgp-1000", 0); + + bt_bird_cleanup(); + + return 1; +} + +static int UNUSED +t_bench_trie_datasets_random(void) +{ + bt_bird_init(); + bt_config_parse(BT_CONFIG_SIMPLE); + + /* Specific datasets, not included */ + benchmark_trie_dataset("trie-data-bgp-1", 1); + benchmark_trie_dataset("trie-data-bgp-10", 1); + benchmark_trie_dataset("trie-data-bgp-100", 1); + benchmark_trie_dataset("trie-data-bgp-1000", 1); + + bt_bird_cleanup(); + + return 1; +} + + static int t_trie_same(void) { @@ -411,5 +643,8 @@ main(int argc, char *argv[]) bt_test_suite(t_match_outer_net, "Testing random outer prefix matching"); bt_test_suite(t_trie_same, "A trie filled forward should be same with a trie filled backward."); + // bt_test_suite(t_bench_trie_datasets_subset, "Benchmark tries from datasets by random subset of nets"); + // bt_test_suite(t_bench_trie_datasets_random, "Benchmark tries from datasets by generated addresses"); + return bt_exit_value(); } diff --git a/test/birdtest.c b/test/birdtest.c index d739e78b..053954e1 100644 --- a/test/birdtest.c +++ b/test/birdtest.c @@ -309,6 +309,12 @@ bt_log_suite_case_result(int result, const char *fmt, ...) } } +void +bt_reset_suite_case_timer(void) +{ + clock_gettime(CLOCK_MONOTONIC, &bt_suite_case_begin); +} + int bt_test_suite_base(int (*fn)(const void *), const char *id, const void *fn_arg, int forked, int timeout, const char *dsc, ...) { diff --git a/test/birdtest.h b/test/birdtest.h index 7a0c2fc4..ad5f8f9c 100644 --- a/test/birdtest.h +++ b/test/birdtest.h @@ -32,6 +32,7 @@ extern const char *bt_test_id; void bt_init(int argc, char *argv[]); int bt_exit_value(void); +void bt_reset_suite_case_timer(void); int bt_test_suite_base(int (*test_fn)(const void *), const char *test_id, const void *test_fn_argument, int forked, int timeout, const char *dsc, ...); static inline u64 bt_random(void) { return ((u64) random() & 0xffffffff) | ((u64) random() << 32); } From 9f24fef5e91fb4df301242ede91ee7ac1b46b8a8 Mon Sep 17 00:00:00 2001 From: "Ondrej Zajicek (work)" Date: Wed, 20 Oct 2021 01:51:28 +0200 Subject: [PATCH 010/149] Conf: Fix crash during shutdown BIRD implements shutdown by reconfiguring to fake empty configuration. Such fake config structure is created from the last running config and shares some data, including symbol table. This allows access to (removed) routing tables and causes crash when 'show route' command is used during shutdown. Clean up symbol table, table list and links to default tables, so removed routing tables cannot be accessed during shutdown. --- conf/conf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/conf/conf.c b/conf/conf.c index 58abcde1..a2b01667 100644 --- a/conf/conf.c +++ b/conf/conf.c @@ -520,6 +520,9 @@ order_shutdown(int gr) memcpy(c, config, sizeof(struct config)); init_list(&c->protos); init_list(&c->tables); + init_list(&c->symbols); + memset(c->def_tables, 0, sizeof(c->def_tables)); + HASH_INIT(c->sym_hash, c->pool, 4); c->shutdown = 1; c->gr_down = gr; From 6d87cf4be7536433d263609828414e687e1d4f08 Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Fri, 1 Oct 2021 14:11:13 +0200 Subject: [PATCH 011/149] Kernel routes are flushed on shutdown by kernel scan, not by table scan --- sysdep/unix/krt.c | 31 ++++++++++++++----------------- sysdep/unix/krt.h | 1 + 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c index b98e7ec0..32bfe7fc 100644 --- a/sysdep/unix/krt.c +++ b/sysdep/unix/krt.c @@ -542,23 +542,6 @@ krt_is_installed(struct krt_proto *p, net *n) return n->routes && bmap_test(&p->p.main_channel->export_map, n->routes->rte.id); } -static void -krt_flush_routes(struct krt_proto *p) -{ - struct rtable *t = p->p.main_channel->table; - - KRT_TRACE(p, D_EVENTS, "Flushing kernel routes"); - FIB_WALK(&t->fib, net, n) - { - if (krt_is_installed(p, n)) - { - /* FIXME: this does not work if gw is changed in export filter */ - krt_replace_rte(p, n->n.addr, NULL, &n->routes->rte); - } - } - FIB_WALK_END; -} - static struct rte * krt_export_net(struct krt_proto *p, net *net) { @@ -637,6 +620,9 @@ krt_got_route(struct krt_proto *p, rte *e, s8 src) #endif /* The rest is for KRT_SRC_BIRD (or KRT_SRC_UNKNOWN) */ + /* Deleting all routes if flush is requested */ + if (p->flush_routes) + goto delete; /* We wait for the initial feed to have correct installed state */ if (!p->ready) @@ -729,6 +715,17 @@ krt_prune(struct krt_proto *p) p->initialized = 1; } +static void +krt_flush_routes(struct krt_proto *p) +{ + KRT_TRACE(p, D_EVENTS, "Flushing kernel routes"); + p->flush_routes = 1; + krt_init_scan(p); + krt_do_scan(p); + /* No prune! */ + p->flush_routes = 0; +} + void krt_got_route_async(struct krt_proto *p, rte *e, int new, s8 src) { diff --git a/sysdep/unix/krt.h b/sysdep/unix/krt.h index cd4bd07d..f6ad6fde 100644 --- a/sysdep/unix/krt.h +++ b/sysdep/unix/krt.h @@ -66,6 +66,7 @@ struct krt_proto { byte ready; /* Initial feed has been finished */ byte initialized; /* First scan has been finished */ byte reload; /* Next scan is doing reload */ + byte flush_routes; /* Scanning to flush */ }; extern pool *krt_pool; From 13ebe7717685aaa64bbbe09a2e6cc0c6da2bd6bd Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Sat, 30 Oct 2021 14:56:55 +0000 Subject: [PATCH 012/149] RPKI shouldn't process more packets when being stopped --- proto/rpki/packets.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/proto/rpki/packets.c b/proto/rpki/packets.c index dd11f997..2c37df76 100644 --- a/proto/rpki/packets.c +++ b/proto/rpki/packets.c @@ -897,6 +897,9 @@ rpki_rx_hook(struct birdsock *sk, uint size) struct rpki_cache *cache = sk->data; struct rpki_proto *p = cache->p; + if ((p->p.proto_state == PS_DOWN) || (p->cache != cache)) + return 0; + byte *pkt_start = sk->rbuf; byte *end = pkt_start + size; From 46739f007a8a21c7887a29a465db0c2520fb4a13 Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Sat, 30 Oct 2021 15:50:16 +0000 Subject: [PATCH 013/149] RPKI: Do nothing when protocol is stopping --- proto/rpki/packets.c | 8 ++++++++ proto/rpki/rpki.c | 9 +++++++++ 2 files changed, 17 insertions(+) diff --git a/proto/rpki/packets.c b/proto/rpki/packets.c index 2c37df76..a711858c 100644 --- a/proto/rpki/packets.c +++ b/proto/rpki/packets.c @@ -956,6 +956,8 @@ rpki_err_hook(struct birdsock *sk, int error_num) CACHE_TRACE(D_EVENTS, cache, "The other side closed a connection"); } + if (cache->p->cache != cache) + return; rpki_cache_change_state(cache, RPKI_CS_ERROR_TRANSPORT); } @@ -975,6 +977,9 @@ rpki_tx_hook(sock *sk) { struct rpki_cache *cache = sk->data; + if (cache->p->cache != cache) + return; + while (rpki_fire_tx(cache) > 0) ; } @@ -984,6 +989,9 @@ rpki_connected_hook(sock *sk) { struct rpki_cache *cache = sk->data; + if (cache->p->cache != cache) + return; + CACHE_TRACE(D_EVENTS, cache, "Connected"); proto_notify_state(&cache->p->p, PS_UP); diff --git a/proto/rpki/rpki.c b/proto/rpki/rpki.c index 72fbc967..95066499 100644 --- a/proto/rpki/rpki.c +++ b/proto/rpki/rpki.c @@ -384,6 +384,9 @@ rpki_refresh_hook(timer *tm) { struct rpki_cache *cache = tm->data; + if (cache->p->cache != cache) + return; + CACHE_DBG(cache, "%s", rpki_cache_state_to_str(cache->state)); switch (cache->state) @@ -430,6 +433,9 @@ rpki_retry_hook(timer *tm) { struct rpki_cache *cache = tm->data; + if (cache->p->cache != cache) + return; + CACHE_DBG(cache, "%s", rpki_cache_state_to_str(cache->state)); switch (cache->state) @@ -475,6 +481,9 @@ rpki_expire_hook(timer *tm) { struct rpki_cache *cache = tm->data; + if (cache->p->cache != cache) + return; + if (!cache->last_update) return; From 1e76f6e954e8043ec08c917c3f53e20f7dd06c99 Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Thu, 30 Sep 2021 11:54:11 +0200 Subject: [PATCH 014/149] Suppressed MRT unused static function warning --- proto/mrt/mrt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/proto/mrt/mrt.c b/proto/mrt/mrt.c index 03f0d59e..5da3c7c6 100644 --- a/proto/mrt/mrt.c +++ b/proto/mrt/mrt.c @@ -113,13 +113,13 @@ mrt_buffer_flush(buffer *b) } #define MRT_DEFINE_TYPE(S, T) \ - static inline void mrt_put_##S##_(buffer *b, T x) \ + UNUSED static inline void mrt_put_##S##_(buffer *b, T x) \ { \ put_##S(b->pos, x); \ b->pos += sizeof(T); \ } \ \ - static inline void mrt_put_##S(buffer *b, T x) \ + UNUSED static inline void mrt_put_##S(buffer *b, T x) \ { \ mrt_buffer_need(b, sizeof(T)); \ put_##S(b->pos, x); \ From 1c2f66f2bd9b2996c8cba0604e7ac38738399000 Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Fri, 14 May 2021 16:23:18 +0200 Subject: [PATCH 015/149] Refeed is done from export table when appropriate --- nest/route.h | 3 ++- nest/rt-table.c | 44 +++++++++++++++++++++++++++++++++++--------- proto/bgp/packets.c | 2 +- 3 files changed, 38 insertions(+), 11 deletions(-) diff --git a/nest/route.h b/nest/route.h index ade14857..98d605c8 100644 --- a/nest/route.h +++ b/nest/route.h @@ -346,8 +346,9 @@ int rt_feed_channel(struct channel *c); void rt_feed_channel_abort(struct channel *c); int rt_reload_channel(struct channel *c); void rt_reload_channel_abort(struct channel *c); +void rt_refeed_channel(struct channel *c); void rt_prune_sync(rtable *t, int all); -int rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old, struct rte_storage **old_exported, int refeed); +int rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old, struct rte_storage **old_exported); struct rtable_config *rt_new_table(struct symbol *s, uint addr_type); diff --git a/nest/rt-table.c b/nest/rt-table.c index 837e0ab9..b005f6f3 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -451,8 +451,11 @@ do_rt_notify(struct channel *c, const net_addr *net, rte *new, rte *old, int ref struct rte_storage *old_exported = NULL; if (c->out_table) { - if (!rte_update_out(c, net, new, old, &old_exported, refeed)) + if (!rte_update_out(c, net, new, old, &old_exported)) + { + rte_trace_out(D_ROUTES, c, new, "idempotent"); return; + } } if (new) @@ -2406,7 +2409,7 @@ again: */ int -rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old0, struct rte_storage **old_exported, int refeed) +rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old0, struct rte_storage **old_exported) { struct rtable *tab = c->out_table; struct rte_src *src; @@ -2423,7 +2426,7 @@ rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old0, struct src = old0->src; if (!net) - goto drop_withdraw; + goto drop; } /* Find the old rte */ @@ -2433,7 +2436,7 @@ rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old0, struct if (old = *pos) { if (new && rte_same(&(*pos)->rte, new)) - goto drop_update; + goto drop; /* Remove the old rte */ *pos = old->next; @@ -2444,7 +2447,7 @@ rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old0, struct if (!new) { if (!old) - goto drop_withdraw; + goto drop; if (!net->routes) fib_delete(&tab->fib, net); @@ -2460,13 +2463,36 @@ rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old0, struct tab->rt_count++; return 1; -drop_update: - return refeed; - -drop_withdraw: +drop: return 0; } +void +rt_refeed_channel(struct channel *c) +{ + if (!c->out_table) + { + channel_request_feeding(c); + return; + } + + ASSERT_DIE(c->ra_mode != RA_ANY); + + c->proto->feed_begin(c, 0); + + FIB_WALK(&c->out_table->fib, net, n) + { + if (!n->routes) + continue; + + rte e = n->routes->rte; + c->proto->rt_notify(c->proto, c, n->n.addr, &e, NULL); + } + FIB_WALK_END; + + c->proto->feed_end(c); +} + /* * Hostcache diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index 647551e5..f1e6d7d2 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -2695,7 +2695,7 @@ bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, uint len) { case BGP_RR_REQUEST: BGP_TRACE(D_PACKETS, "Got ROUTE-REFRESH"); - channel_request_feeding(&c->c); + rt_refeed_channel(&c->c); break; case BGP_RR_BEGIN: From c56752e4367733c03a05e65ba62ccd2e54f7aadd Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Mon, 21 Jun 2021 19:11:42 +0200 Subject: [PATCH 016/149] Protocol stats split to import and export --- nest/proto.c | 45 +++++++++++++------------ nest/protocol.h | 46 +++++++++++++------------ nest/rt-table.c | 86 +++++++++++++++++++++++------------------------ proto/pipe/pipe.c | 24 +++++++------ 4 files changed, 105 insertions(+), 96 deletions(-) diff --git a/nest/proto.c b/nest/proto.c index 7cfb1555..631e4b60 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -284,7 +284,7 @@ channel_feed_loop(void *ptr) if (c->refeeding && (l->state == PLS_BLOCKED) && (c->refeed_count <= l->limit) && - (c->stats.exp_routes <= l->limit)) + (c->export_stats.routes <= l->limit)) { log(L_INFO "Protocol %s resets route export limit (%u)", c->proto->name, l->limit); channel_reset_limit(&c->out_limit); @@ -461,7 +461,7 @@ channel_stop_export(struct channel *c) rt_feed_channel_abort(c); c->export_state = ES_DOWN; - c->stats.exp_routes = 0; + c->export_stats.routes = 0; bmap_reset(&c->export_map, 1024); } @@ -551,7 +551,8 @@ channel_do_start(struct channel *c) c->feed_event = ev_new_init(c->proto->pool, channel_feed_loop, c); bmap_init(&c->export_map, c->proto->pool, 1024); - memset(&c->stats, 0, sizeof(struct proto_stats)); + memset(&c->export_stats, 0, sizeof(struct export_stats)); + memset(&c->import_stats, 0, sizeof(struct import_stats)); channel_reset_limit(&c->rx_limit); channel_reset_limit(&c->in_limit); @@ -600,11 +601,12 @@ channel_do_down(struct channel *c) rt_unlock_table(c->table); c->proto->active_channels--; - if ((c->stats.imp_routes + c->stats.filt_routes) != 0) + if ((c->import_stats.routes + c->import_stats.filtered) != 0) log(L_ERR "%s: Channel %s is down but still has some routes", c->proto->name, c->name); // bmap_free(&c->export_map); - memset(&c->stats, 0, sizeof(struct proto_stats)); + memset(&c->import_stats, 0, sizeof(struct import_stats)); + memset(&c->export_stats, 0, sizeof(struct export_stats)); c->in_table = NULL; c->reload_event = NULL; @@ -1842,19 +1844,19 @@ static void channel_verify_limits(struct channel *c) { struct channel_limit *l; - u32 all_routes = c->stats.imp_routes + c->stats.filt_routes; + u32 all_routes = c->import_stats.routes + c->import_stats.filtered; l = &c->rx_limit; if (l->action && (all_routes > l->limit)) channel_notify_limit(c, l, PLD_RX, all_routes); l = &c->in_limit; - if (l->action && (c->stats.imp_routes > l->limit)) - channel_notify_limit(c, l, PLD_IN, c->stats.imp_routes); + if (l->action && (c->import_stats.routes > l->limit)) + channel_notify_limit(c, l, PLD_IN, c->import_stats.routes); l = &c->out_limit; - if (l->action && (c->stats.exp_routes > l->limit)) - channel_notify_limit(c, l, PLD_OUT, c->stats.exp_routes); + if (l->action && (c->export_stats.routes > l->limit)) + channel_notify_limit(c, l, PLD_OUT, c->export_stats.routes); } static inline void @@ -2009,28 +2011,29 @@ proto_state_name(struct proto *p) static void channel_show_stats(struct channel *c) { - struct proto_stats *s = &c->stats; + struct import_stats *is = &c->import_stats; + struct export_stats *es = &c->export_stats; if (c->in_keep_filtered) cli_msg(-1006, " Routes: %u imported, %u filtered, %u exported, %u preferred", - s->imp_routes, s->filt_routes, s->exp_routes, s->pref_routes); + is->routes, is->filtered, es->routes, is->pref); else cli_msg(-1006, " Routes: %u imported, %u exported, %u preferred", - s->imp_routes, s->exp_routes, s->pref_routes); + is->routes, es->routes, is->pref); cli_msg(-1006, " Route change stats: received rejected filtered ignored accepted"); cli_msg(-1006, " Import updates: %10u %10u %10u %10u %10u", - s->imp_updates_received, s->imp_updates_invalid, - s->imp_updates_filtered, s->imp_updates_ignored, - s->imp_updates_accepted); + is->updates_received, is->updates_invalid, + is->updates_filtered, is->updates_ignored, + is->updates_accepted); cli_msg(-1006, " Import withdraws: %10u %10u --- %10u %10u", - s->imp_withdraws_received, s->imp_withdraws_invalid, - s->imp_withdraws_ignored, s->imp_withdraws_accepted); + is->withdraws_received, is->withdraws_invalid, + is->withdraws_ignored, is->withdraws_accepted); cli_msg(-1006, " Export updates: %10u %10u %10u --- %10u", - s->exp_updates_received, s->exp_updates_rejected, - s->exp_updates_filtered, s->exp_updates_accepted); + es->updates_received, es->updates_rejected, + es->updates_filtered, es->updates_accepted); cli_msg(-1006, " Export withdraws: %10u --- --- --- %10u", - s->exp_withdraws_received, s->exp_withdraws_accepted); + es->withdraws_received, es->withdraws_accepted); } void diff --git a/nest/protocol.h b/nest/protocol.h index 80b4509b..9be8e531 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -132,29 +132,31 @@ struct proto_config { }; /* Protocol statistics */ -struct proto_stats { +struct import_stats { /* Import - from protocol to core */ - u32 imp_routes; /* Number of routes successfully imported to the (adjacent) routing table */ - u32 filt_routes; /* Number of routes rejected in import filter but kept in the routing table */ - u32 pref_routes; /* Number of routes selected as best in the (adjacent) routing table */ - u32 imp_updates_received; /* Number of route updates received */ - u32 imp_updates_invalid; /* Number of route updates rejected as invalid */ - u32 imp_updates_filtered; /* Number of route updates rejected by filters */ - u32 imp_updates_ignored; /* Number of route updates rejected as already in route table */ - u32 imp_updates_accepted; /* Number of route updates accepted and imported */ - u32 imp_withdraws_received; /* Number of route withdraws received */ - u32 imp_withdraws_invalid; /* Number of route withdraws rejected as invalid */ - u32 imp_withdraws_ignored; /* Number of route withdraws rejected as already not in route table */ - u32 imp_withdraws_accepted; /* Number of route withdraws accepted and processed */ + u32 routes; /* Number of routes successfully imported to the (adjacent) routing table */ + u32 filtered; /* Number of routes rejected in import filter but kept in the routing table */ + u32 pref; /* Number of routes selected as best in the (adjacent) routing table */ + u32 updates_received; /* Number of route updates received */ + u32 updates_invalid; /* Number of route updates rejected as invalid */ + u32 updates_filtered; /* Number of route updates rejected by filters */ + u32 updates_ignored; /* Number of route updates rejected as already in route table */ + u32 updates_accepted; /* Number of route updates accepted and imported */ + u32 withdraws_received; /* Number of route withdraws received */ + u32 withdraws_invalid; /* Number of route withdraws rejected as invalid */ + u32 withdraws_ignored; /* Number of route withdraws rejected as already not in route table */ + u32 withdraws_accepted; /* Number of route withdraws accepted and processed */ +}; +struct export_stats { /* Export - from core to protocol */ - u32 exp_routes; /* Number of routes successfully exported to the protocol */ - u32 exp_updates_received; /* Number of route updates received */ - u32 exp_updates_rejected; /* Number of route updates rejected by protocol */ - u32 exp_updates_filtered; /* Number of route updates rejected by filters */ - u32 exp_updates_accepted; /* Number of route updates accepted and exported */ - u32 exp_withdraws_received; /* Number of route withdraws received */ - u32 exp_withdraws_accepted; /* Number of route withdraws accepted and processed */ + u32 routes; /* Number of routes successfully exported to the protocol */ + u32 updates_received; /* Number of route updates received */ + u32 updates_rejected; /* Number of route updates rejected by protocol */ + u32 updates_filtered; /* Number of route updates rejected by filters */ + u32 updates_accepted; /* Number of route updates accepted and exported */ + u32 withdraws_received; /* Number of route withdraws received */ + u32 withdraws_accepted; /* Number of route withdraws accepted and processed */ }; struct proto { @@ -516,7 +518,9 @@ struct channel { struct event *feed_event; /* Event responsible for feeding */ struct fib_iterator feed_fit; /* Routing table iterator used during feeding */ - struct proto_stats stats; /* Per-channel protocol statistics */ + struct import_stats import_stats; /* Import statistics */ + struct export_stats export_stats; /* Export statistics */ + u32 refeed_count; /* Number of routes exported during refeed regardless of out_limit */ u8 net_type; /* Routing table network type (NET_*), 0 for undefined */ diff --git a/nest/rt-table.c b/nest/rt-table.c index 6851b4bc..0b6351e9 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -375,7 +375,7 @@ export_filter_(struct channel *c, rte *rt, linpool *pool, int silent) { struct proto *p = c->proto; const struct filter *filter = c->out_filter; - struct proto_stats *stats = &c->stats; + struct export_stats *stats = &c->export_stats; int v; v = p->preexport ? p->preexport(c, rt) : 0; @@ -384,7 +384,7 @@ export_filter_(struct channel *c, rte *rt, linpool *pool, int silent) if (silent) goto reject; - stats->exp_updates_rejected++; + stats->updates_rejected++; if (v == RIC_REJECT) rte_trace_out(D_FILTERS, c, rt, "rejected by protocol"); goto reject; @@ -404,7 +404,7 @@ export_filter_(struct channel *c, rte *rt, linpool *pool, int silent) if (silent) goto reject; - stats->exp_updates_filtered++; + stats->updates_filtered++; rte_trace_out(D_FILTERS, c, rt, "filtered out"); goto reject; } @@ -427,7 +427,7 @@ static void do_rt_notify(struct channel *c, const net_addr *net, rte *new, rte *old, int refeed) { struct proto *p = c->proto; - struct proto_stats *stats = &c->stats; + struct export_stats *stats = &c->export_stats; if (refeed && new) c->refeed_count++; @@ -436,12 +436,12 @@ do_rt_notify(struct channel *c, const net_addr *net, rte *new, rte *old, int ref struct channel_limit *l = &c->out_limit; if (l->action && !old && new) { - if (stats->exp_routes >= l->limit) - channel_notify_limit(c, l, PLD_OUT, stats->exp_routes); + if (stats->routes >= l->limit) + channel_notify_limit(c, l, PLD_OUT, stats->routes); if (l->state == PLS_BLOCKED) { - stats->exp_updates_rejected++; + stats->updates_rejected++; rte_trace_out(D_FILTERS, c, new, "rejected [limit]"); return; } @@ -459,20 +459,20 @@ do_rt_notify(struct channel *c, const net_addr *net, rte *new, rte *old, int ref } if (new) - stats->exp_updates_accepted++; + stats->updates_accepted++; else - stats->exp_withdraws_accepted++; + stats->withdraws_accepted++; if (old) { bmap_clear(&c->export_map, old->id); - stats->exp_routes--; + stats->routes--; } if (new) { bmap_set(&c->export_map, new->id); - stats->exp_routes++; + stats->routes++; } if (p->debug & D_ROUTES) @@ -495,9 +495,9 @@ static void rt_notify_basic(struct channel *c, const net_addr *net, rte *new, rte *old, int refeed) { if (new) - c->stats.exp_updates_received++; + c->export_stats.updates_received++; else - c->stats.exp_withdraws_received++; + c->export_stats.withdraws_received++; if (new) new = export_filter(c, new, 0); @@ -537,9 +537,9 @@ rt_notify_accepted(struct channel *c, net *net, rte *new_changed, rte *old_chang */ if (net->routes) - c->stats.exp_updates_received++; + c->export_stats.updates_received++; else - c->stats.exp_withdraws_received++; + c->export_stats.withdraws_received++; /* Find old_best - either old_changed, or route for net->routes */ if (old_changed && bmap_test(&c->export_map, old_changed->id)) @@ -655,9 +655,9 @@ rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed return; if (new_best) - c->stats.exp_updates_received++; + c->export_stats.updates_received++; else - c->stats.exp_withdraws_received++; + c->export_stats.withdraws_received++; /* Prepare new merged route */ if (new_best) @@ -735,9 +735,9 @@ rte_announce(rtable *tab, uint type, net *net, struct rte_storage *new, struct r if (new_best != old_best) { if (new_best) - new_best->rte.sender->stats.pref_routes++; + new_best->rte.sender->import_stats.pref++; if (old_best) - old_best->rte.sender->stats.pref_routes--; + old_best->rte.sender->import_stats.pref--; if (tab->hostcache) rt_notify_hostcache(tab, net); @@ -836,7 +836,7 @@ rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) { struct proto *p = c->proto; struct rtable *table = c->table; - struct proto_stats *stats = &c->stats; + struct import_stats *stats = &c->import_stats; struct rte_storage *old_best_stored = net->routes, *old_stored = NULL; rte *old_best = old_best_stored ? &old_best_stored->rte : NULL; rte *old = NULL; @@ -873,7 +873,7 @@ rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) if (!rte_is_filtered(new)) { - stats->imp_updates_ignored++; + stats->updates_ignored++; rte_trace_in(D_ROUTES, c, new, "ignored"); } @@ -887,7 +887,7 @@ rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) if (!old && !new) { - stats->imp_withdraws_ignored++; + stats->withdraws_ignored++; return; } @@ -897,7 +897,7 @@ rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) struct channel_limit *l = &c->rx_limit; if (l->action && !old && new && !c->in_table) { - u32 all_routes = stats->imp_routes + stats->filt_routes; + u32 all_routes = stats->routes + stats->filtered; if (all_routes >= l->limit) channel_notify_limit(c, l, PLD_RX, all_routes); @@ -907,7 +907,7 @@ rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) /* In receive limit the situation is simple, old is NULL so we just free new and exit like nothing happened */ - stats->imp_updates_ignored++; + stats->updates_ignored++; rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); return; } @@ -916,8 +916,8 @@ rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) l = &c->in_limit; if (l->action && !old_ok && new_ok) { - if (stats->imp_routes >= l->limit) - channel_notify_limit(c, l, PLD_IN, stats->imp_routes); + if (stats->routes >= l->limit) + channel_notify_limit(c, l, PLD_IN, stats->routes); if (l->state == PLS_BLOCKED) { @@ -928,7 +928,7 @@ rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) if both are NULL as this case is probably assumed to be already handled. */ - stats->imp_updates_ignored++; + stats->updates_ignored++; rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); if (c->in_keep_filtered) @@ -948,11 +948,11 @@ rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) } if (new_ok) - stats->imp_updates_accepted++; + stats->updates_accepted++; else if (old_ok) - stats->imp_withdraws_accepted++; + stats->withdraws_accepted++; else - stats->imp_withdraws_ignored++; + stats->withdraws_ignored++; if (old_ok || new_ok) table->last_rt_change = current_time(); @@ -961,9 +961,9 @@ rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) struct rte_storage *new_stored = new ? rte_store(new, net, table) : NULL; if (new) - rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++; + rte_is_filtered(new) ? stats->filtered++ : stats->routes++; if (old) - rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--; + rte_is_filtered(old) ? stats->filtered-- : stats->routes--; if (table->config->sorted) { @@ -1128,7 +1128,7 @@ rte_update(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) if (c->in_table && !rte_update_in(c, n, new, src)) return; - struct proto_stats *stats = &c->stats; + struct import_stats *stats = &c->import_stats; const struct filter *filter = c->in_filter; net *nn; @@ -1140,17 +1140,17 @@ rte_update(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) new->net = n; new->sender = c; - stats->imp_updates_received++; + stats->updates_received++; if (!rte_validate(new)) { rte_trace_in(D_FILTERS, c, new, "invalid"); - stats->imp_updates_invalid++; + stats->updates_invalid++; goto drop; } if (filter == FILTER_REJECT) { - stats->imp_updates_filtered++; + stats->updates_filtered++; rte_trace_in(D_FILTERS, c, new, "filtered out"); if (! c->in_keep_filtered) @@ -1164,7 +1164,7 @@ rte_update(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) int fr = f_run(filter, new, rte_update_pool, 0); if (fr > F_ACCEPT) { - stats->imp_updates_filtered++; + stats->updates_filtered++; rte_trace_in(D_FILTERS, c, new, "filtered out"); if (! c->in_keep_filtered) @@ -1180,11 +1180,11 @@ rte_update(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) } else { - stats->imp_withdraws_received++; + stats->withdraws_received++; if (!(nn = net_find(c->table, n)) || !src) { - stats->imp_withdraws_ignored++; + stats->withdraws_ignored++; rte_update_unlock(); return; } @@ -2297,8 +2297,8 @@ rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *sr return 1; drop_update: - c->stats.imp_updates_received++; - c->stats.imp_updates_ignored++; + c->import_stats.updates_received++; + c->import_stats.updates_ignored++; if (!net->routes) fib_delete(&tab->fib, net); @@ -2306,8 +2306,8 @@ drop_update: return 0; drop_withdraw: - c->stats.imp_withdraws_received++; - c->stats.imp_withdraws_ignored++; + c->import_stats.withdraws_received++; + c->import_stats.withdraws_ignored++; return 0; } diff --git a/proto/pipe/pipe.c b/proto/pipe/pipe.c index 7604cc79..55a0b526 100644 --- a/proto/pipe/pipe.c +++ b/proto/pipe/pipe.c @@ -212,8 +212,10 @@ pipe_get_status(struct proto *P, byte *buf) static void pipe_show_stats(struct pipe_proto *p) { - struct proto_stats *s1 = &p->pri->stats; - struct proto_stats *s2 = &p->sec->stats; + struct import_stats *s1i = &p->pri->import_stats; + struct export_stats *s1e = &p->pri->export_stats; + struct import_stats *s2i = &p->sec->import_stats; + struct export_stats *s2e = &p->sec->export_stats; /* * Pipe stats (as anything related to pipes) are a bit tricky. There @@ -237,20 +239,20 @@ pipe_show_stats(struct pipe_proto *p) */ cli_msg(-1006, " Routes: %u imported, %u exported", - s1->imp_routes, s2->imp_routes); + s1i->routes, s2i->routes); cli_msg(-1006, " Route change stats: received rejected filtered ignored accepted"); cli_msg(-1006, " Import updates: %10u %10u %10u %10u %10u", - s2->exp_updates_received, s2->exp_updates_rejected + s1->imp_updates_invalid, - s2->exp_updates_filtered, s1->imp_updates_ignored, s1->imp_updates_accepted); + s2e->updates_received, s2e->updates_rejected + s1i->updates_invalid, + s2e->updates_filtered, s1i->updates_ignored, s1i->updates_accepted); cli_msg(-1006, " Import withdraws: %10u %10u --- %10u %10u", - s2->exp_withdraws_received, s1->imp_withdraws_invalid, - s1->imp_withdraws_ignored, s1->imp_withdraws_accepted); + s2e->withdraws_received, s1i->withdraws_invalid, + s1i->withdraws_ignored, s1i->withdraws_accepted); cli_msg(-1006, " Export updates: %10u %10u %10u %10u %10u", - s1->exp_updates_received, s1->exp_updates_rejected + s2->imp_updates_invalid, - s1->exp_updates_filtered, s2->imp_updates_ignored, s2->imp_updates_accepted); + s1e->updates_received, s1e->updates_rejected + s2i->updates_invalid, + s1e->updates_filtered, s2i->updates_ignored, s2i->updates_accepted); cli_msg(-1006, " Export withdraws: %10u %10u --- %10u %10u", - s1->exp_withdraws_received, s2->imp_withdraws_invalid, - s2->imp_withdraws_ignored, s2->imp_withdraws_accepted); + s1e->withdraws_received, s2i->withdraws_invalid, + s2i->withdraws_ignored, s2i->withdraws_accepted); } static const char *pipe_feed_state[] = { [ES_DOWN] = "down", [ES_FEEDING] = "feed", [ES_READY] = "up" }; From 3a8197a9dce6fc5d38b089a291ac79d8d394fea1 Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Sat, 6 Nov 2021 20:34:16 +0100 Subject: [PATCH 017/149] Limit containment --- nest/limit.h | 49 ++++++++++ nest/proto.c | 225 +++++++++++++++++++++++++--------------------- nest/protocol.h | 40 ++++++--- nest/rt-table.c | 144 +++++++++++++---------------- proto/pipe/pipe.c | 13 ++- 5 files changed, 269 insertions(+), 202 deletions(-) create mode 100644 nest/limit.h diff --git a/nest/limit.h b/nest/limit.h new file mode 100644 index 00000000..5838ad3b --- /dev/null +++ b/nest/limit.h @@ -0,0 +1,49 @@ +/* + * BIRD Internet Routing Daemon -- Limits + * + * (c) 1998--2000 Martin Mares + * (c) 2021 Maria Matejka + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRD_LIMIT_H_ +#define _BIRD_LIMIT_H_ + +struct limit { + u32 max; + u32 count; + int (*action)(struct limit *, void *data); +}; + +static inline int limit_do_action(struct limit *l, void *data) +{ + return l->action ? l->action(l, data) : 1; +} + +static inline int limit_push(struct limit *l, void *data) +{ + if ((l->count >= l->max) && limit_do_action(l, data)) + return 1; + + l->count++; + return 0; +} + +static inline void limit_pop(struct limit *l) +{ + --l->count; +} + +static inline void limit_reset(struct limit *l) +{ + l->count = 0; +} + +static inline void limit_update(struct limit *l, void *data, u32 max) +{ + if (l->count > (l->max = max)) + limit_do_action(l, data); +} + +#endif diff --git a/nest/proto.c b/nest/proto.c index c7e25209..2009ff1f 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -52,9 +52,9 @@ static void channel_request_reload(struct channel *c); static void proto_shutdown_loop(timer *); static void proto_rethink_goal(struct proto *p); static char *proto_state_name(struct proto *p); -static void channel_verify_limits(struct channel *c); -static inline void channel_reset_limit(struct channel_limit *l); - +static void channel_init_limit(struct channel *c, struct limit *l, int dir, struct channel_limit *cf); +static void channel_update_limit(struct channel *c, struct limit *l, int dir, struct channel_limit *cf); +static void channel_reset_limit(struct channel *c, struct limit *l, int dir); static inline int proto_is_done(struct proto *p) { return (p->proto_state == PS_DOWN) && (p->active_channels == 0); } @@ -168,9 +168,10 @@ proto_add_channel(struct proto *p, struct channel_config *cf) c->in_filter = cf->in_filter; c->out_filter = cf->out_filter; - c->rx_limit = cf->rx_limit; - c->in_limit = cf->in_limit; - c->out_limit = cf->out_limit; + + channel_init_limit(c, &c->rx_limit, PLD_RX, &cf->rx_limit); + channel_init_limit(c, &c->in_limit, PLD_IN, &cf->in_limit); + channel_init_limit(c, &c->out_limit, PLD_OUT, &cf->out_limit); c->net_type = cf->net_type; c->ra_mode = cf->ra_mode; @@ -280,14 +281,12 @@ channel_feed_loop(void *ptr) } /* Reset export limit if the feed ended with acceptable number of exported routes */ - struct channel_limit *l = &c->out_limit; if (c->refeeding && - (l->state == PLS_BLOCKED) && - (c->refeed_count <= l->limit) && - (c->export_stats.routes <= l->limit)) + (c->limit_active & (1 << PLD_OUT)) && + (c->refeed_count <= c->out_limit.max)) { - log(L_INFO "Protocol %s resets route export limit (%u)", c->proto->name, l->limit); - channel_reset_limit(&c->out_limit); + log(L_INFO "Protocol %s resets route export limit (%u)", c->proto->name, c->out_limit.max); + channel_reset_limit(c, &c->out_limit, PLD_OUT); /* Continue in feed - it will process routing table again from beginning */ c->refeed_count = 0; @@ -461,7 +460,8 @@ channel_stop_export(struct channel *c) rt_feed_channel_abort(c); c->export_state = ES_DOWN; - c->export_stats.routes = 0; + + channel_reset_limit(c, &c->out_limit, PLD_OUT); bmap_reset(&c->export_map, 1024); bmap_reset(&c->export_reject_map, 1024); } @@ -556,9 +556,9 @@ channel_do_start(struct channel *c) memset(&c->export_stats, 0, sizeof(struct export_stats)); memset(&c->import_stats, 0, sizeof(struct import_stats)); - channel_reset_limit(&c->rx_limit); - channel_reset_limit(&c->in_limit); - channel_reset_limit(&c->out_limit); + channel_reset_limit(c, &c->rx_limit, PLD_RX); + channel_reset_limit(c, &c->in_limit, PLD_IN); + channel_reset_limit(c, &c->out_limit, PLD_OUT); CALL(c->channel->start, c); } @@ -604,8 +604,8 @@ channel_do_down(struct channel *c) rt_unlock_table(c->table); c->proto->active_channels--; - if ((c->import_stats.routes + c->import_stats.filtered) != 0) - log(L_ERR "%s: Channel %s is down but still has some routes", c->proto->name, c->name); + if (c->in_limit.count || c->rx_limit.count) + bug("%s: Channel %s is down but still has some routes", c->proto->name, c->name); // bmap_free(&c->export_map); memset(&c->import_stats, 0, sizeof(struct import_stats)); @@ -748,8 +748,8 @@ channel_request_reload(struct channel *c) * Should this be done before reload_routes() hook? * Perhaps, but routes are updated asynchronously. */ - channel_reset_limit(&c->rx_limit); - channel_reset_limit(&c->in_limit); + channel_reset_limit(c, &c->rx_limit, PLD_RX); + channel_reset_limit(c, &c->in_limit, PLD_IN); } const struct channel_class channel_basic = { @@ -852,9 +852,10 @@ channel_reconfigure(struct channel *c, struct channel_config *cf) /* Reconfigure channel fields */ c->in_filter = cf->in_filter; c->out_filter = cf->out_filter; - c->rx_limit = cf->rx_limit; - c->in_limit = cf->in_limit; - c->out_limit = cf->out_limit; + + channel_update_limit(c, &c->rx_limit, PLD_RX, &cf->rx_limit); + channel_update_limit(c, &c->in_limit, PLD_IN, &cf->in_limit); + channel_update_limit(c, &c->out_limit, PLD_OUT, &cf->out_limit); // c->ra_mode = cf->ra_mode; c->merge_limit = cf->merge_limit; @@ -863,8 +864,6 @@ channel_reconfigure(struct channel *c, struct channel_config *cf) c->in_keep_filtered = cf->in_keep_filtered; c->rpki_reload = cf->rpki_reload; - channel_verify_limits(c); - /* Execute channel-specific reconfigure hook */ if (c->channel->reconfigure && !c->channel->reconfigure(c, cf, &import_changed, &export_changed)) return 0; @@ -1785,88 +1784,104 @@ proto_set_message(struct proto *p, char *msg, int len) } -static const char * -channel_limit_name(struct channel_limit *l) -{ - const char *actions[] = { - [PLA_WARN] = "warn", - [PLA_BLOCK] = "block", - [PLA_RESTART] = "restart", - [PLA_DISABLE] = "disable", - }; +static const char * channel_limit_name[] = { + [PLA_WARN] = "warn", + [PLA_BLOCK] = "block", + [PLA_RESTART] = "restart", + [PLA_DISABLE] = "disable", +}; - return actions[l->action]; -} -/** - * channel_notify_limit: notify about limit hit and take appropriate action - * @c: channel - * @l: limit being hit - * @dir: limit direction (PLD_*) - * @rt_count: the number of routes - * - * The function is called by the route processing core when limit @l - * is breached. It activates the limit and tooks appropriate action - * according to @l->action. - */ -void -channel_notify_limit(struct channel *c, struct channel_limit *l, int dir, u32 rt_count) +static void +channel_log_limit(struct channel *c, struct limit *l, int dir) { const char *dir_name[PLD_MAX] = { "receive", "import" , "export" }; - const byte dir_down[PLD_MAX] = { PDC_RX_LIMIT_HIT, PDC_IN_LIMIT_HIT, PDC_OUT_LIMIT_HIT }; - struct proto *p = c->proto; - - if (l->state == PLS_BLOCKED) - return; - - /* For warning action, we want the log message every time we hit the limit */ - if (!l->state || ((l->action == PLA_WARN) && (rt_count == l->limit))) - log(L_WARN "Protocol %s hits route %s limit (%d), action: %s", - p->name, dir_name[dir], l->limit, channel_limit_name(l)); - - switch (l->action) - { - case PLA_WARN: - l->state = PLS_ACTIVE; - break; - - case PLA_BLOCK: - l->state = PLS_BLOCKED; - break; - - case PLA_RESTART: - case PLA_DISABLE: - l->state = PLS_BLOCKED; - if (p->proto_state == PS_UP) - proto_schedule_down(p, l->action == PLA_RESTART, dir_down[dir]); - break; - } + log(L_WARN "Channel %s.%s hits route %s limit (%d), action: %s", + c->proto->name, c->name, dir_name[dir], l->max, channel_limit_name[c->limit_actions[dir]]); } static void -channel_verify_limits(struct channel *c) +channel_activate_limit(struct channel *c, struct limit *l, int dir) { - struct channel_limit *l; - u32 all_routes = c->import_stats.routes + c->import_stats.filtered; + if (c->limit_active & (1 << dir)) + return; - l = &c->rx_limit; - if (l->action && (all_routes > l->limit)) - channel_notify_limit(c, l, PLD_RX, all_routes); - - l = &c->in_limit; - if (l->action && (c->import_stats.routes > l->limit)) - channel_notify_limit(c, l, PLD_IN, c->import_stats.routes); - - l = &c->out_limit; - if (l->action && (c->export_stats.routes > l->limit)) - channel_notify_limit(c, l, PLD_OUT, c->export_stats.routes); + c->limit_active |= (1 << dir); + channel_log_limit(c, l, dir); } -static inline void -channel_reset_limit(struct channel_limit *l) +static int +channel_limit_warn(struct limit *l, void *data) { - if (l->action) - l->state = PLS_INITIAL; + struct channel_limit_data *cld = data; + struct channel *c = cld->c; + int dir = cld->dir; + + channel_log_limit(c, l, dir); + + return 0; +} + +static int +channel_limit_block(struct limit *l, void *data) +{ + struct channel_limit_data *cld = data; + struct channel *c = cld->c; + int dir = cld->dir; + + channel_activate_limit(c, l, dir); + + return 1; +} + +static const byte chl_dir_down[PLD_MAX] = { PDC_RX_LIMIT_HIT, PDC_IN_LIMIT_HIT, PDC_OUT_LIMIT_HIT }; + +static int +channel_limit_down(struct limit *l, void *data) +{ + struct channel_limit_data *cld = data; + struct channel *c = cld->c; + struct proto *p = c->proto; + int dir = cld->dir; + + channel_activate_limit(c, l, dir); + + if (p->proto_state == PS_UP) + proto_schedule_down(p, c->limit_actions[dir] == PLA_RESTART, chl_dir_down[dir]); + + return 1; +} + +static int (*channel_limit_action[])(struct limit *, void *) = { + [PLA_NONE] = NULL, + [PLA_WARN] = channel_limit_warn, + [PLA_BLOCK] = channel_limit_block, + [PLA_RESTART] = channel_limit_down, + [PLA_DISABLE] = channel_limit_down, +}; + +static void +channel_update_limit(struct channel *c, struct limit *l, int dir, struct channel_limit *cf) +{ + l->action = channel_limit_action[cf->action]; + c->limit_actions[dir] = cf->action; + + struct channel_limit_data cld = { .c = c, .dir = dir }; + limit_update(l, &cld, cf->action ? cf->limit : ~((u32) 0)); +} + +static void +channel_init_limit(struct channel *c, struct limit *l, int dir, struct channel_limit *cf) +{ + channel_reset_limit(c, l, dir); + channel_update_limit(c, l, dir, cf); +} + +static void +channel_reset_limit(struct channel *c, struct limit *l, int dir) +{ + limit_reset(l); + c->limit_active &= ~(1 << dir); } static inline void @@ -2017,12 +2032,16 @@ channel_show_stats(struct channel *c) struct import_stats *is = &c->import_stats; struct export_stats *es = &c->export_stats; + u32 rx_routes = c->rx_limit.count; + u32 in_routes = c->in_limit.count; + u32 out_routes = c->out_limit.count; + if (c->in_keep_filtered) cli_msg(-1006, " Routes: %u imported, %u filtered, %u exported, %u preferred", - is->routes, is->filtered, es->routes, is->pref); + in_routes, (rx_routes - in_routes), out_routes, is->pref); else cli_msg(-1006, " Routes: %u imported, %u exported, %u preferred", - is->routes, es->routes, is->pref); + in_routes, out_routes, is->pref); cli_msg(-1006, " Route change stats: received rejected filtered ignored accepted"); cli_msg(-1006, " Import updates: %10u %10u %10u %10u %10u", @@ -2040,13 +2059,13 @@ channel_show_stats(struct channel *c) } void -channel_show_limit(struct channel_limit *l, const char *dsc) +channel_show_limit(struct limit *l, const char *dsc, int active, int action) { if (!l->action) return; - cli_msg(-1006, " %-16s%d%s", dsc, l->limit, l->state ? " [HIT]" : ""); - cli_msg(-1006, " Action: %s", channel_limit_name(l)); + cli_msg(-1006, " %-16s%d%s", dsc, l->max, active ? " [HIT]" : ""); + cli_msg(-1006, " Action: %s", channel_limit_name[action]); } void @@ -2064,9 +2083,9 @@ channel_show_info(struct channel *c) c->gr_lock ? " pending" : "", c->gr_wait ? " waiting" : ""); - channel_show_limit(&c->rx_limit, "Receive limit:"); - channel_show_limit(&c->in_limit, "Import limit:"); - channel_show_limit(&c->out_limit, "Export limit:"); + channel_show_limit(&c->rx_limit, "Receive limit:", c->limit_active & (1 << PLD_RX), c->limit_actions[PLD_RX]); + channel_show_limit(&c->in_limit, "Import limit:", c->limit_active & (1 << PLD_IN), c->limit_actions[PLD_IN]); + channel_show_limit(&c->out_limit, "Export limit:", c->limit_active & (1 << PLD_OUT), c->limit_actions[PLD_OUT]); if (c->channel_state != CS_DOWN) channel_show_stats(c); diff --git a/nest/protocol.h b/nest/protocol.h index 4c87c72d..c1978914 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -13,6 +13,7 @@ #include "lib/resource.h" #include "lib/event.h" #include "nest/route.h" +#include "nest/limit.h" #include "conf/conf.h" struct iface; @@ -134,8 +135,6 @@ struct proto_config { /* Protocol statistics */ struct import_stats { /* Import - from protocol to core */ - u32 routes; /* Number of routes successfully imported to the (adjacent) routing table */ - u32 filtered; /* Number of routes rejected in import filter but kept in the routing table */ u32 pref; /* Number of routes selected as best in the (adjacent) routing table */ u32 updates_received; /* Number of route updates received */ u32 updates_invalid; /* Number of route updates rejected as invalid */ @@ -150,7 +149,6 @@ struct import_stats { struct export_stats { /* Export - from core to protocol */ - u32 routes; /* Number of routes successfully exported to the protocol */ u32 updates_received; /* Number of route updates received */ u32 updates_rejected; /* Number of route updates rejected by protocol */ u32 updates_filtered; /* Number of route updates rejected by filters */ @@ -277,7 +275,7 @@ void channel_graceful_restart_unlock(struct channel *c); #define DEFAULT_GR_WAIT 240 -void channel_show_limit(struct channel_limit *l, const char *dsc); +void channel_show_limit(struct limit *l, const char *dsc, int active, int action); void channel_show_info(struct channel *c); void channel_cmd_debug(struct channel *c, uint mask); @@ -432,18 +430,29 @@ extern struct proto_config *cf_dev_proto; #define PLA_RESTART 4 /* Force protocol restart */ #define PLA_DISABLE 5 /* Shutdown and disable protocol */ -#define PLS_INITIAL 0 /* Initial limit state after protocol start */ -#define PLS_ACTIVE 1 /* Limit was hit */ -#define PLS_BLOCKED 2 /* Limit is active and blocking new routes */ - struct channel_limit { u32 limit; /* Maximum number of prefixes */ u8 action; /* Action to take (PLA_*) */ - u8 state; /* State of limit (PLS_*) */ }; -void channel_notify_limit(struct channel *c, struct channel_limit *l, int dir, u32 rt_count); +struct channel_limit_data { + struct channel *c; + int dir; +}; +#define CLP__RX(_c) (&(_c)->rx_limit) +#define CLP__IN(_c) (&(_c)->in_limit) +#define CLP__OUT(_c) (&(_c)->out_limit) + + +#if 0 +#define CHANNEL_LIMIT_LOG(_c, _dir, _op) log(L_TRACE "%s.%s: %s limit %s %u", (_c)->proto->name, (_c)->name, #_dir, _op, (CLP__##_dir(_c))->count) +#else +#define CHANNEL_LIMIT_LOG(_c, _dir, _op) +#endif + +#define CHANNEL_LIMIT_PUSH(_c, _dir) ({ CHANNEL_LIMIT_LOG(_c, _dir, "push from"); struct channel_limit_data cld = { .c = (_c), .dir = PLD_##_dir }; limit_push(CLP__##_dir(_c), &cld); }) +#define CHANNEL_LIMIT_POP(_c, _dir) ({ limit_pop(CLP__##_dir(_c)); CHANNEL_LIMIT_LOG(_c, _dir, "pop to"); }) /* * Channels @@ -486,6 +495,7 @@ struct channel_config { struct proto_config *parent; /* Where channel is defined (proto or template) */ struct rtable_config *table; /* Table we're attached to */ const struct filter *in_filter, *out_filter; /* Attached filters */ + struct channel_limit rx_limit; /* Limit for receiving routes from protocol (relevant when in_keep_filtered is active) */ struct channel_limit in_limit; /* Limit for importing routes from protocol */ @@ -513,9 +523,13 @@ struct channel { const struct filter *out_filter; /* Output filter */ struct bmap export_map; /* Keeps track which routes were really exported */ struct bmap export_reject_map; /* Keeps track which routes were rejected by export filter */ - struct channel_limit rx_limit; /* Receive limit (for in_keep_filtered) */ - struct channel_limit in_limit; /* Input limit */ - struct channel_limit out_limit; /* Output limit */ + + struct limit rx_limit; /* Receive limit (for in_keep_filtered) */ + struct limit in_limit; /* Input limit */ + struct limit out_limit; /* Output limit */ + + u8 limit_actions[PLD_MAX]; /* Limit actions enum */ + u8 limit_active; /* Flags for active limits */ struct event *feed_event; /* Event responsible for feeding */ struct fib_iterator feed_fit; /* Routing table iterator used during feeding */ diff --git a/nest/rt-table.c b/nest/rt-table.c index 146734f4..66e63acf 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -473,20 +473,16 @@ do_rt_notify(struct channel *c, const net_addr *net, rte *new, rte *old, int ref if (refeed && new) c->refeed_count++; - /* Apply export limit */ - struct channel_limit *l = &c->out_limit; - if (l->action && !old && new) - { - if (stats->routes >= l->limit) - channel_notify_limit(c, l, PLD_OUT, stats->routes); - - if (l->state == PLS_BLOCKED) + if (!old && new) + if (CHANNEL_LIMIT_PUSH(c, OUT)) { stats->updates_rejected++; rte_trace_out(D_FILTERS, c, new, "rejected [limit]"); return; } - } + + if (!new && old) + CHANNEL_LIMIT_POP(c, OUT); /* Apply export table */ struct rte_storage *old_exported = NULL; @@ -505,16 +501,10 @@ do_rt_notify(struct channel *c, const net_addr *net, rte *new, rte *old, int ref stats->withdraws_accepted++; if (old) - { bmap_clear(&c->export_map, old->id); - stats->routes--; - } if (new) - { bmap_set(&c->export_map, new->id); - stats->routes++; - } if (p->debug & D_ROUTES) { @@ -973,58 +963,53 @@ rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) int new_ok = rte_is_ok(new); int old_ok = rte_is_ok(old); - struct channel_limit *l = &c->rx_limit; - if (l->action && !old && new && !c->in_table) + if (!c->in_table) + { + if (!old && new) + if (CHANNEL_LIMIT_PUSH(c, RX)) + { + /* In receive limit the situation is simple, old is NULL so + we just free new and exit like nothing happened */ + + stats->updates_ignored++; + rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); + return; + } + + if (old && !new) + CHANNEL_LIMIT_POP(c, RX); + } + + if (!old_ok && new_ok) + if (CHANNEL_LIMIT_PUSH(c, IN)) { - u32 all_routes = stats->routes + stats->filtered; + /* In import limit the situation is more complicated. We + shouldn't just drop the route, we should handle it like + it was filtered. We also have to continue the route + processing if old or new is non-NULL, but we should exit + if both are NULL as this case is probably assumed to be + already handled. */ - if (all_routes >= l->limit) - channel_notify_limit(c, l, PLD_RX, all_routes); + stats->updates_ignored++; + rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); - if (l->state == PLS_BLOCKED) - { - /* In receive limit the situation is simple, old is NULL so - we just free new and exit like nothing happened */ + if (c->in_keep_filtered) + new->flags |= REF_FILTERED; + else + new = NULL; - stats->updates_ignored++; - rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); - return; - } + /* Note that old && !new could be possible when + c->in_keep_filtered changed in the recent past. */ + + if (!old && !new) + return; + + new_ok = 0; + goto skip_stats1; } - l = &c->in_limit; - if (l->action && !old_ok && new_ok) - { - if (stats->routes >= l->limit) - channel_notify_limit(c, l, PLD_IN, stats->routes); - - if (l->state == PLS_BLOCKED) - { - /* In import limit the situation is more complicated. We - shouldn't just drop the route, we should handle it like - it was filtered. We also have to continue the route - processing if old or new is non-NULL, but we should exit - if both are NULL as this case is probably assumed to be - already handled. */ - - stats->updates_ignored++; - rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); - - if (c->in_keep_filtered) - new->flags |= REF_FILTERED; - else - new = NULL; - - /* Note that old && !new could be possible when - c->in_keep_filtered changed in the recent past. */ - - if (!old && !new) - return; - - new_ok = 0; - goto skip_stats1; - } - } + if (old_ok && !new_ok) + CHANNEL_LIMIT_POP(c, IN); if (new_ok) stats->updates_accepted++; @@ -1039,11 +1024,6 @@ rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) skip_stats1:; struct rte_storage *new_stored = new ? rte_store(new, net, table) : NULL; - if (new) - rte_is_filtered(new) ? stats->filtered++ : stats->routes++; - if (old) - rte_is_filtered(old) ? stats->filtered-- : stats->routes--; - if (table->config->sorted) { /* If routes are sorted, just insert new route to appropriate position */ @@ -2332,6 +2312,9 @@ rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *sr goto drop_update; } + if (!new) + CHANNEL_LIMIT_POP(c, RX); + /* Move iterator if needed */ if (*pos == c->reload_next_rte) c->reload_next_rte = (*pos)->next; @@ -2342,7 +2325,18 @@ rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *sr rte_free(del, tab); tab->rt_count--; } - else if (!new) + else if (new) + { + if (CHANNEL_LIMIT_PUSH(c, RX)) + { + /* Required by rte_trace_in() */ + new->net = n; + + rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); + goto drop_update; + } + } + else goto drop_withdraw; if (!new) @@ -2353,22 +2347,6 @@ rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *sr return 1; } - struct channel_limit *l = &c->rx_limit; - if (l->action && !*pos) - { - if (tab->rt_count >= l->limit) - channel_notify_limit(c, l, PLD_RX, tab->rt_count); - - if (l->state == PLS_BLOCKED) - { - /* Required by rte_trace_in() */ - new->net = n; - - rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); - goto drop_update; - } - } - /* Insert the new rte */ struct rte_storage *e = rte_store(new, net, tab); e->rte.sender = c; diff --git a/proto/pipe/pipe.c b/proto/pipe/pipe.c index 55a0b526..74d0e518 100644 --- a/proto/pipe/pipe.c +++ b/proto/pipe/pipe.c @@ -217,6 +217,9 @@ pipe_show_stats(struct pipe_proto *p) struct import_stats *s2i = &p->sec->import_stats; struct export_stats *s2e = &p->sec->export_stats; + u32 pri_routes = p->pri->in_limit.count; + u32 sec_routes = p->sec->in_limit.count; + /* * Pipe stats (as anything related to pipes) are a bit tricky. There * are two sets of stats - s1 for ahook to the primary routing and @@ -239,7 +242,7 @@ pipe_show_stats(struct pipe_proto *p) */ cli_msg(-1006, " Routes: %u imported, %u exported", - s1i->routes, s2i->routes); + pri_routes, sec_routes); cli_msg(-1006, " Route change stats: received rejected filtered ignored accepted"); cli_msg(-1006, " Import updates: %10u %10u %10u %10u %10u", s2e->updates_received, s2e->updates_rejected + s1i->updates_invalid, @@ -270,8 +273,12 @@ pipe_show_proto_info(struct proto *P) cli_msg(-1006, " Import filter: %s", filter_name(p->sec->out_filter)); cli_msg(-1006, " Export filter: %s", filter_name(p->pri->out_filter)); - channel_show_limit(&p->pri->in_limit, "Import limit:"); - channel_show_limit(&p->sec->in_limit, "Export limit:"); + + + channel_show_limit(&p->pri->in_limit, "Import limit:", + (p->pri->limit_active & (1 << PLD_IN)), p->pri->limit_actions[PLD_IN]); + channel_show_limit(&p->sec->in_limit, "Export limit:", + (p->sec->limit_active & (1 << PLD_IN)), p->sec->limit_actions[PLD_IN]); if (P->proto_state != PS_DOWN) pipe_show_stats(p); From 575da88f7a6cac54d204839d2b2cfc1809811ba3 Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Thu, 25 Feb 2021 21:52:49 +0100 Subject: [PATCH 018/149] Recursive route nexthop updates now announced with valid new_best/old_best information --- nest/rt-table.c | 84 +++++++++++++++++++++++++++---------------------- 1 file changed, 46 insertions(+), 38 deletions(-) diff --git a/nest/rt-table.c b/nest/rt-table.c index ee69d7c4..837e0ab9 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -349,7 +349,7 @@ rte_mergable(rte *pri, rte *sec) } static void -rte_trace(struct channel *c, rte *e, int dir, char *msg) +rte_trace(struct channel *c, rte *e, int dir, const char *msg) { log(L_TRACE "%s.%s %c %s %N %uL %uG %s", c->proto->name, c->name ?: "?", dir, msg, e->net, e->src->private_id, e->src->global_id, @@ -357,14 +357,14 @@ rte_trace(struct channel *c, rte *e, int dir, char *msg) } static inline void -rte_trace_in(uint flag, struct channel *c, rte *e, char *msg) +rte_trace_in(uint flag, struct channel *c, rte *e, const char *msg) { if ((c->debug & flag) || (c->proto->debug & flag)) rte_trace(c, e, '>', msg); } static inline void -rte_trace_out(uint flag, struct channel *c, rte *e, char *msg) +rte_trace_out(uint flag, struct channel *c, rte *e, const char *msg) { if ((c->debug & flag) || (c->proto->debug & flag)) rte_trace(c, e, '<', msg); @@ -1870,44 +1870,50 @@ rt_next_hop_update_rte(rtable *tab, net *n, rte *old) static inline int rt_next_hop_update_net(rtable *tab, net *n) { - struct rte_storage **k, *e, *new, *old_best, **new_best; + struct rte_storage *new; int count = 0; - int free_old_best = 0; - old_best = n->routes; + struct rte_storage *old_best = n->routes; if (!old_best) return 0; - for (k = &n->routes; e = *k; k = &e->next) + for (struct rte_storage *e, **k = &n->routes; e = *k; k = &e->next) if (rta_next_hop_outdated(e->rte.attrs)) - { - new = rt_next_hop_update_rte(tab, n, &e->rte); - new->next = e->next; - *k = new; - - rte_trace_in(D_ROUTES, new->rte.sender, &new->rte, "updated"); - rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL); - - /* Call a pre-comparison hook */ - /* Not really an efficient way to compute this */ - if (e->rte.src->proto->rte_recalculate) - e->rte.src->proto->rte_recalculate(tab, n, &new->rte, &e->rte, NULL); - - if (e != old_best) - rte_free(e, tab); - else /* Freeing of the old best rte is postponed */ - free_old_best = 1; - - e = new; - count++; - } + count++; if (!count) return 0; + struct rte_multiupdate { + struct rte_storage *old, *new; + } *updates = alloca(sizeof(struct rte_multiupdate) * count); + + int pos = 0; + for (struct rte_storage *e, **k = &n->routes; e = *k; k = &e->next) + if (rta_next_hop_outdated(e->rte.attrs)) + { + struct rte_storage *new = rt_next_hop_update_rte(tab, n, &e->rte); + + /* Call a pre-comparison hook */ + /* Not really an efficient way to compute this */ + if (e->rte.src->proto->rte_recalculate) + e->rte.src->proto->rte_recalculate(tab, n, &new->rte, &e->rte, &old_best->rte); + + updates[pos++] = (struct rte_multiupdate) { + .old = e, + .new = new, + }; + + /* Replace the route in the list */ + new->next = e->next; + *k = e = new; + } + + ASSERT_DIE(pos == count); + /* Find the new best route */ - new_best = NULL; - for (k = &n->routes; e = *k; k = &e->next) + struct rte_storage **new_best = NULL; + for (struct rte_storage *e, **k = &n->routes; e = *k; k = &e->next) { if (!new_best || rte_better(&e->rte, &(*new_best)->rte)) new_best = k; @@ -1922,15 +1928,17 @@ rt_next_hop_update_net(rtable *tab, net *n) n->routes = new; } - /* Announce the new best route */ - if (new != old_best) - rte_trace_in(D_ROUTES, new->rte.sender, &new->rte, "updated [best]"); + /* Announce the changes */ + for (int i=0; irte.sender, &updates[i].new->rte, best_indicator[nb][ob]); + rte_announce_i(tab, RA_UNDEF, n, updates[i].new, updates[i].old, new, old_best); + } - /* Propagate changes */ - rte_announce_i(tab, RA_UNDEF, n, NULL, NULL, n->routes, old_best); - - if (free_old_best) - rte_free(old_best, tab); + for (int i=0; i Date: Fri, 1 May 2020 22:26:24 +0200 Subject: [PATCH 019/149] Nest: Route generations and explicit tracking route propagion through pipes --- conf/conf.h | 1 + doc/bird.sgml | 8 ++++++++ nest/config.Y | 3 ++- nest/route.h | 5 ++++- nest/rt-table.c | 47 +++++++++++++++++++++++---------------------- proto/pipe/config.Y | 7 ++++++- proto/pipe/pipe.c | 32 ++++++++++++++++-------------- proto/pipe/pipe.h | 2 ++ 8 files changed, 65 insertions(+), 40 deletions(-) diff --git a/conf/conf.h b/conf/conf.h index 55cb9c58..69ef8a10 100644 --- a/conf/conf.h +++ b/conf/conf.h @@ -45,6 +45,7 @@ struct config { int cli_debug; /* Tracing of CLI connections and commands */ int latency_debug; /* I/O loop tracks duration of each event */ + int pipe_debug; /* Track route propagation through pipes */ u32 latency_limit; /* Events with longer duration are logged (us) */ u32 watchdog_warning; /* I/O loop watchdog limit for warning (us) */ u32 watchdog_timeout; /* Watchdog timeout (in seconds, 0 = disabled) */ diff --git a/doc/bird.sgml b/doc/bird.sgml index a2138b55..d1a3b70f 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -4124,6 +4124,14 @@ include standard channel config options; see the example below.