From 14fc24f3a53ebc5525b854ccdc93274aa74a400f Mon Sep 17 00:00:00 2001 From: "Ondrej Zajicek (work)" Date: Fri, 26 Nov 2021 03:26:36 +0100 Subject: [PATCH] Trie: Implement longest-prefix-match queries and walks The prefix trie now supports longest-prefix-match query by function trie_match_longest_ipX() and it can be extended to iteration over all covering prefixes for a given prefix (from longest to shortest) using TRIE_WALK_TO_ROOT_IPx() macro. --- filter/data.h | 51 ++++++++++++ filter/trie.c | 190 ++++++++++++++++++++++++++++++++++++++++++++- filter/trie_test.c | 115 +++++++++++++++++++++++++++ test/birdtest.c | 5 +- 4 files changed, 359 insertions(+), 2 deletions(-) diff --git a/filter/data.h b/filter/data.h index 4a0ee865..28c7a888 100644 --- a/filter/data.h +++ b/filter/data.h @@ -196,11 +196,61 @@ void tree_walk(const struct f_tree *t, void (*hook)(const struct f_tree *, void struct f_trie *f_new_trie(linpool *lp, uint data_size); void *trie_add_prefix(struct f_trie *t, const net_addr *n, uint l, uint h); int trie_match_net(const struct f_trie *t, const net_addr *n); +int trie_match_longest_ip4(const struct f_trie *t, const net_addr_ip4 *net, net_addr_ip4 *dst, ip4_addr *found0); +int trie_match_longest_ip6(const struct f_trie *t, const net_addr_ip6 *net, net_addr_ip6 *dst, ip6_addr *found0); void trie_walk_init(struct f_trie_walk_state *s, const struct f_trie *t, const net_addr *from); int trie_walk_next(struct f_trie_walk_state *s, net_addr *net); int trie_same(const struct f_trie *t1, const struct f_trie *t2); void trie_format(const struct f_trie *t, buffer *buf); +static inline int +trie_match_next_longest_ip4(net_addr_ip4 *n, ip4_addr *found) +{ + while (n->pxlen) + { + n->pxlen--; + ip4_clrbit(&n->prefix, n->pxlen); + + if (ip4_getbit(*found, n->pxlen)) + return 1; + } + + return 0; +} + +static inline int +trie_match_next_longest_ip6(net_addr_ip6 *n, ip6_addr *found) +{ + while (n->pxlen) + { + n->pxlen--; + ip6_clrbit(&n->prefix, n->pxlen); + + if (ip6_getbit(*found, n->pxlen)) + return 1; + } + + return 0; +} + + +#define TRIE_WALK_TO_ROOT_IP4(trie, net, dst) ({ \ + net_addr_ip4 dst; \ + ip4_addr _found; \ + for (int _n = trie_match_longest_ip4(trie, net, &dst, &_found); \ + _n; \ + _n = trie_match_next_longest_ip4(&dst, &_found)) + +#define TRIE_WALK_TO_ROOT_IP6(trie, net, dst) ({ \ + net_addr_ip6 dst; \ + ip6_addr _found; \ + for (int _n = trie_match_longest_ip6(trie, net, &dst, &_found); \ + _n; \ + _n = trie_match_next_longest_ip6(&dst, &_found)) + +#define TRIE_WALK_TO_ROOT_END }) + + #define TRIE_WALK(trie, net, from) ({ \ net_addr net; \ struct f_trie_walk_state tws_; \ @@ -209,6 +259,7 @@ void trie_format(const struct f_trie *t, buffer *buf); #define TRIE_WALK_END }) + #define F_CMP_ERROR 999 const char *f_type_name(enum f_type t); diff --git a/filter/trie.c b/filter/trie.c index 21b5b5d7..66b56297 100644 --- a/filter/trie.c +++ b/filter/trie.c @@ -85,7 +85,7 @@ * * Iteration over prefixes in a trie can be done using TRIE_WALK() macro, or * directly using trie_walk_init() and trie_walk_next() functions. The second - * approeach allows suspending the iteration and continuing in it later. + * approach allows suspending the iteration and continuing in it later. * Prefixes are enumerated in the usual lexicographic order and may be * restricted to a subset of the trie (all subnets of a specified prefix). * @@ -100,6 +100,13 @@ * path between the current node and its parent node, stored in the bitmap * &accept of the current node) and &local_pos for iteration over intra-node * prefixes (stored in the bitmap &local). + * + * The trie also supports longest-prefix-match query by trie_match_longest_ip4() + * and it can be extended to iteration over all covering prefixes for a given + * prefix (from longest to shortest) using TRIE_WALK_TO_ROOT_IP4() macro. There + * are also IPv6 versions (for practical reasons, these functions and macros are + * separate for IPv4 and IPv6). There is the same limitation to enumeration of + * `implicit' prefixes like with the previous TRIE_WALK() macro. */ #include "nest/bird.h" @@ -541,6 +548,187 @@ trie_match_net(const struct f_trie *t, const net_addr *n) } +/** + * trie_match_longest_ip4 + * @t: trie + * @net: net address + * @dst: return value + * @found0: optional returned bitmask of found nodes + * + * Perform longest prefix match for the address @net and return the resulting + * prefix in the buffer @dst. The bitmask @found0 is used to report lengths of + * prefixes on the path from the root to the resulting prefix. E.g., if there is + * also a /20 shorter matching prefix, then 20-th bit is set in @found0. This + * can be used to enumerate all matching prefixes for the network @net using + * function trie_match_next_longest_ip4() or macro TRIE_WALK_TO_ROOT_IP4(). + * + * This function assumes IPv4 trie, there is also an IPv6 variant. + * + * Result: 1 if a matching prefix was found, 0 if not. + */ +int +trie_match_longest_ip4(const struct f_trie *t, const net_addr_ip4 *net, net_addr_ip4 *dst, ip4_addr *found0) +{ + ASSERT(t->ipv4); + + const struct f_trie_node4 *n = &t->root.v4; + int len = 0; + + ip4_addr found = IP4_NONE; + int last = -1; + + while (n) + { + /* We are out of path */ + if (!ip4_prefix_equal(net->prefix, n->addr, MIN(net->pxlen, n->plen))) + goto done; + + /* Check accept mask */ + for (; len < n->plen; len++) + { + if (len > net->pxlen) + goto done; + + if (ip4_getbit(n->accept, len - 1)) + { + /* len is always < 32 due to len < n->plen */ + ip4_setbit(&found, len); + last = len; + } + } + + /* Special case for max length, there is only one valid local position */ + if (len == IP4_MAX_PREFIX_LENGTH) + { + if (n->local & (1u << 1)) + last = len; + + goto done; + } + + /* Check local mask */ + for (int pos = 1; pos < (1 << TRIE_STEP); pos = 2 * pos + ip4_getbit(net->prefix, len), len++) + { + if (len > net->pxlen) + goto done; + + if (n->local & (1u << pos)) + { + /* len is always < 32 due to special case above */ + ip4_setbit(&found, len); + last = len; + } + } + + /* Choose child */ + n = n->c[ip4_getbits(net->prefix, n->plen, TRIE_STEP)]; + } + +done: + if (last < 0) + return 0; + + net_copy_ip4(dst, net); + dst->prefix = ip4_and(dst->prefix, ip4_mkmask(last)); + dst->pxlen = last; + + if (found0) + *found0 = found; + + return 1; +} + + +/** + * trie_match_longest_ip6 + * @t: trie + * @net: net address + * @dst: return value + * @found0: optional returned bitmask of found nodes + * + * Perform longest prefix match for the address @net and return the resulting + * prefix in the buffer @dst. The bitmask @found0 is used to report lengths of + * prefixes on the path from the root to the resulting prefix. E.g., if there is + * also a /20 shorter matching prefix, then 20-th bit is set in @found0. This + * can be used to enumerate all matching prefixes for the network @net using + * function trie_match_next_longest_ip6() or macro TRIE_WALK_TO_ROOT_IP6(). + * + * This function assumes IPv6 trie, there is also an IPv4 variant. + * + * Result: 1 if a matching prefix was found, 0 if not. + */ +int +trie_match_longest_ip6(const struct f_trie *t, const net_addr_ip6 *net, net_addr_ip6 *dst, ip6_addr *found0) +{ + ASSERT(!t->ipv4); + + const struct f_trie_node6 *n = &t->root.v6; + int len = 0; + + ip6_addr found = IP6_NONE; + int last = -1; + + while (n) + { + /* We are out of path */ + if (!ip6_prefix_equal(net->prefix, n->addr, MIN(net->pxlen, n->plen))) + goto done; + + /* Check accept mask */ + for (; len < n->plen; len++) + { + if (len > net->pxlen) + goto done; + + if (ip6_getbit(n->accept, len - 1)) + { + /* len is always < 128 due to len < n->plen */ + ip6_setbit(&found, len); + last = len; + } + } + + /* Special case for max length, there is only one valid local position */ + if (len == IP6_MAX_PREFIX_LENGTH) + { + if (n->local & (1u << 1)) + last = len; + + goto done; + } + + /* Check local mask */ + for (int pos = 1; pos < (1 << TRIE_STEP); pos = 2 * pos + ip6_getbit(net->prefix, len), len++) + { + if (len > net->pxlen) + goto done; + + if (n->local & (1u << pos)) + { + /* len is always < 128 due to special case above */ + ip6_setbit(&found, len); + last = len; + } + } + + /* Choose child */ + n = n->c[ip6_getbits(net->prefix, n->plen, TRIE_STEP)]; + } + +done: + if (last < 0) + return 0; + + net_copy_ip6(dst, net); + dst->prefix = ip6_and(dst->prefix, ip6_mkmask(last)); + dst->pxlen = last; + + if (found0) + *found0 = found; + + return 1; +} + #define SAME_PREFIX(A,B,X,L) ((X) ? ip4_prefix_equal((A)->v4.addr, net4_prefix(B), (L)) : ip6_prefix_equal((A)->v6.addr, net6_prefix(B), (L))) #define GET_NET_BITS(N,X,A,B) ((X) ? ip4_getbits(net4_prefix(N), (A), (B)) : ip6_getbits(net6_prefix(N), (A), (B))) diff --git a/filter/trie_test.c b/filter/trie_test.c index bb9a2f26..eee48284 100644 --- a/filter/trie_test.c +++ b/filter/trie_test.c @@ -774,6 +774,120 @@ t_trie_walk(void) return 1; } +static int +find_covering_nets(struct f_prefix *prefixes, int num, const net_addr *net, net_addr *found) +{ + struct f_prefix key; + net_addr *n = &key.net; + int found_num = 0; + + net_copy(n, net); + + while (1) + { + struct f_prefix *px = + bsearch(&key, prefixes, num, sizeof(struct f_prefix), compare_prefixes); + + if (px) + { + net_copy(&found[found_num], n); + found_num++; + } + + if (n->pxlen == 0) + return found_num; + + n->pxlen--; + + if (n->type == NET_IP4) + ip4_clrbit(&((net_addr_ip4 *) n)->prefix, n->pxlen); + else + ip6_clrbit(&((net_addr_ip6 *) n)->prefix, n->pxlen); + } +} + +static int +t_trie_walk_to_root(void) +{ + bt_bird_init(); + bt_config_parse(BT_CONFIG_SIMPLE); + + linpool *lp = lp_new_default(&root_pool); + for (int round = 0; round < TESTS_NUM * 4; round++) + { + int level = round / TESTS_NUM; + int v6 = level % 2; + int num = PREFIXES_NUM * (int[]){32, 512}[level / 2]; + int pos = 0; + int st = 0, sn = 0, sm = 0; + + list *prefixes = make_random_prefix_list(lp, num, v6, 1); + struct f_trie *trie = make_trie_from_prefix_list(lp, prefixes); + struct f_prefix *pxset = malloc((num + 1) * sizeof(struct f_prefix)); + + struct f_prefix_node *pxn; + WALK_LIST(pxn, *prefixes) + pxset[pos++] = pxn->prefix; + memset(&pxset[pos], 0, sizeof (struct f_prefix)); + + qsort(pxset, num, sizeof(struct f_prefix), compare_prefixes); + + int i; + for (i = 0; i < (PREFIX_TESTS_NUM / 10); i++) + { + net_addr from; + get_random_net(&from, v6); + + net_addr found[129]; + int found_num = find_covering_nets(pxset, num, &from, found); + int n = 0; + + if (bt_verbose >= BT_VERBOSE_ABSOLUTELY_ALL) + { + char buf[64]; + bt_format_net(buf, 64, &from); + bt_debug("Lookup for %s (expect %d)\n", buf, found_num); + } + + /* Walk to root, separate for IPv4 and IPv6 */ + if (!v6) + { + TRIE_WALK_TO_ROOT_IP4(trie, (net_addr_ip4 *) &from, net) + { + log_networks((net_addr *) &net, &found[n]); + bt_assert((n < found_num) && net_equal((net_addr *) &net, &found[n])); + n++; + } + TRIE_WALK_TO_ROOT_END; + } + else + { + TRIE_WALK_TO_ROOT_IP6(trie, (net_addr_ip6 *) &from, net) + { + log_networks((net_addr *) &net, &found[n]); + bt_assert((n < found_num) && net_equal((net_addr *) &net, &found[n])); + n++; + } + TRIE_WALK_TO_ROOT_END; + } + + bt_assert(n == found_num); + + /* Stats */ + st += n; + sn += !!n; + sm = MAX(sm, n); + } + + bt_debug("Success in %d / %d, sum %d, max %d\n", sn, i, st, sm); + + lp_flush(lp); + } + + bt_bird_cleanup(); + return 1; +} + int main(int argc, char *argv[]) { @@ -784,6 +898,7 @@ main(int argc, char *argv[]) bt_test_suite(t_match_outer_net, "Testing random outer prefix matching"); bt_test_suite(t_trie_same, "A trie filled forward should be same with a trie filled backward."); bt_test_suite(t_trie_walk, "Testing TRIE_WALK() on random tries"); + bt_test_suite(t_trie_walk_to_root, "Testing TRIE_WALK_TO_ROOT() on random tries"); // bt_test_suite(t_bench_trie_datasets_subset, "Benchmark tries from datasets by random subset of nets"); // bt_test_suite(t_bench_trie_datasets_random, "Benchmark tries from datasets by generated addresses"); diff --git a/test/birdtest.c b/test/birdtest.c index 053954e1..6ad743ce 100644 --- a/test/birdtest.c +++ b/test/birdtest.c @@ -510,7 +510,10 @@ bt_fmt_ipa(char *buf, size_t size, const void *data) void bt_format_net(char *buf, size_t size, const void *data) { - bsnprintf(buf, size, "%N", (const net_addr *) data); + if (data) + bsnprintf(buf, size, "%N", (const net_addr *) data); + else + bsnprintf(buf, size, "(null)"); } int