From 4e27fb34a105675b894521171d84643a207137bf Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Fri, 5 Apr 2019 15:53:21 +0200 Subject: [PATCH] Trie index: insert and find of same-length data without growing --- lib/Makefile | 4 +- lib/tindex.c | 445 ++++++++++++++++++++++++++++++++++++++++++++++ lib/tindex.h | 48 +++++ lib/tindex_test.c | 71 ++++++++ 4 files changed, 566 insertions(+), 2 deletions(-) create mode 100644 lib/tindex.c create mode 100644 lib/tindex.h create mode 100644 lib/tindex_test.c diff --git a/lib/Makefile b/lib/Makefile index 01f3114d..b612e6f3 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -1,7 +1,7 @@ -src := bitops.c checksum.c event.c flowspec.c idm.c ip.c lists.c mac.c md5.c mempool.c net.c patmatch.c printf.c resource.c sha1.c sha256.c sha512.c slab.c slists.c tbf.c timer.c xmalloc.c +src := bitops.c checksum.c event.c flowspec.c idm.c ip.c lists.c mac.c md5.c mempool.c net.c patmatch.c printf.c resource.c sha1.c sha256.c sha512.c slab.c slists.c tbf.c timer.c tindex.c xmalloc.c obj := $(src-o-files) $(all-daemon) -tests_src := heap_test.c buffer_test.c event_test.c flowspec_test.c bitops_test.c patmatch_test.c fletcher16_test.c slist_test.c checksum_test.c lists_test.c mac_test.c ip_test.c hash_test.c printf_test.c +tests_src := heap_test.c buffer_test.c event_test.c flowspec_test.c bitops_test.c patmatch_test.c fletcher16_test.c slist_test.c checksum_test.c lists_test.c mac_test.c ip_test.c hash_test.c printf_test.c tindex_test.c tests_targets := $(tests_targets) $(tests-target-files) tests_objs := $(tests_objs) $(src-o-files) diff --git a/lib/tindex.c b/lib/tindex.c new file mode 100644 index 00000000..934151a8 --- /dev/null +++ b/lib/tindex.c @@ -0,0 +1,445 @@ +/* + * Trie index for efficient trie storage + * + * (c) 2019 Maria Matejka + * (c) 2019 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include "nest/bird.h" +#include "lib/idm.h" +#include "lib/tindex.h" + +#undef LOCAL_DEBUG +#define LOCAL_DEBUG + +#define TI_MIN_UNIT_SIZE 4 +#define TI_MIN_ADDRESS_SIZE 6 + +union tindex_data { + u32 data4[0]; + u16 data6[0]; + u64 data8[0]; + u32 data12[0]; +}; + +struct tindex { + union tindex_data *index_data; + pool *p; + struct idm idm; + u8 unit_size; + u8 address_size; +}; + +struct tindex * +tindex_new(pool *p) +{ + struct tindex *ti = mb_allocz(p, sizeof(struct tindex)); + ti->p = p; + ti->unit_size = TI_MIN_UNIT_SIZE; + ti->address_size = TI_MIN_ADDRESS_SIZE; + ti->index_data = mb_allocz(p, ti->unit_size * (1 << ti->address_size)); + idm_init(&(ti->idm), p, (1 << ti->address_size)); + u32 rootnode = idm_alloc(&(ti->idm)); + ASSERT(rootnode == 1); + return ti; +} + +static inline u64 +tindex_data(const struct tindex *ti, u64 asize, u64 usize, u64 dsize, u64 dshift, u64 idx, uint *len) +{ + u64 data; + switch (usize) { + case 4: + data = ti->index_data->data4[idx] >> dshift; + break; + case 6: + data = + ((u64)(ti->index_data->data6[idx * 3] >> asize) << (dshift * 2)) | + ((u64)(ti->index_data->data6[idx * 3 + 1] >> asize) << (dshift)) | + (u64)(ti->index_data->data6[idx * 3 + 2] >> asize); + break; + case 8: + data = ti->index_data->data8[idx] >> dshift; + break; + case 12: + data = + ((u64)(ti->index_data->data12[idx * 3] >> asize) << (dshift * 2)) | + ((u64)(ti->index_data->data12[idx * 3 + 1] >> asize) << (dshift)) | + (u64)(ti->index_data->data12[idx * 3 + 2] >> asize); + break; + default: + bug("This shall never happen"); + } + + u64 out = u64_var_decode(data, len); + + if (*len == 64) + *len = 0; + else + *len = dsize - *len; + + return out; +} + +static inline u64 +tindex_left(const struct tindex *ti, u64 idx, u64 usize, u64 asize, u64 addrmask) +{ + switch (usize) { + case 4: return (ti->index_data->data4[idx] >> (asize * 2)) & addrmask; + case 6: return ti->index_data->data6[idx * 3] & addrmask; + case 8: return (ti->index_data->data8[idx] >> (asize * 2)) & addrmask; + case 12: return ti->index_data->data12[idx * 3] & addrmask; + default: bug("This shall never happen"); + } +} + +static inline u64 +tindex_right(const struct tindex *ti, u64 idx, u64 usize, u64 asize, u64 addrmask) +{ + switch (usize) { + case 4: return (ti->index_data->data4[idx] >> (asize)) & addrmask; + case 6: return ti->index_data->data6[idx * 3 + 1] & addrmask; + case 8: return (ti->index_data->data8[idx] >> (asize)) & addrmask; + case 12: return ti->index_data->data12[idx * 3 + 1] & addrmask; + default: bug("This shall never happen"); + } +} + +static inline u64 +tindex_up(const struct tindex *ti, u64 idx, u64 usize, u64 addrmask) +{ + switch (usize) { + case 4: return ti->index_data->data4[idx] & addrmask; + case 6: return ti->index_data->data6[idx * 3 + 2] & addrmask; + case 8: return ti->index_data->data8[idx] & addrmask; + case 12: return ti->index_data->data12[idx * 3 + 2] & addrmask; + default: bug("This shall never happen"); + } +} + +static inline void +tindex_put(struct tindex *ti, u64 idx, u64 usize, u64 asize, u64 dsize, u64 dshift, u64 data, uint dlen, u64 left, u64 right, u64 up) +{ + const u64 dsmask = (1LL << dshift) - 1; + data = u64_var_encode(data, dsize - dlen); + + switch (usize) { + case 4: + ti->index_data->data4[idx] = (data << dshift) | (left << (asize * 2)) | (right << asize) | up; + return; + case 6: + ti->index_data->data6[idx * 3 ] = left | ((data >> (2 * dshift)) << asize); + ti->index_data->data6[idx * 3 + 1] = right | (((data >> dshift) & dsmask) << asize); + ti->index_data->data6[idx * 3 + 2] = up | ((data & dsmask) << asize); + return; + case 8: + ti->index_data->data8[idx] = (data << dshift) | (left << (asize * 2)) | (right << asize) | up; + return; + case 12: + ti->index_data->data12[idx * 3 ] = left | ((data >> (2 * dshift)) << asize); + ti->index_data->data12[idx * 3 + 1] = right | (((data >> dshift) & dsmask) << asize); + ti->index_data->data12[idx * 3 + 2] = up | ((data & dsmask) << asize); + return; + default: bug("This shall never happen"); + } +} + +static inline void +tindex_left_clear(struct tindex *ti, u64 idx, u64 usize, u64 asize, u64 addrmask) +{ + switch (usize) { + case 4: ti->index_data->data4[idx] &= ~(addrmask << (asize * 2)); break; + case 6: ti->index_data->data6[idx * 3] &= ~addrmask; break; + case 8: ti->index_data->data8[idx] &= ~(addrmask << (asize * 2)); break; + case 12: ti->index_data->data6[idx * 3] &= ~addrmask; break; + } +} + +static inline void +tindex_right_clear(struct tindex *ti, u64 idx, u64 usize, u64 asize, u64 addrmask) +{ + switch (usize) { + case 4: ti->index_data->data4[idx] &= ~(addrmask << asize); break; + case 6: ti->index_data->data6[idx * 3 + 1] &= ~addrmask; break; + case 8: ti->index_data->data8[idx] &= ~(addrmask << asize); break; + case 12: ti->index_data->data6[idx * 3 + 1] &= ~addrmask; break; + } +} + +static inline void +tindex_left_set(struct tindex *ti, u64 idx, u64 usize, u64 asize, u64 nidx) +{ + /* The left child must have been zero before */ + switch (usize) { + case 4: ti->index_data->data4[idx] |= nidx << (asize * 2); break; + case 6: ti->index_data->data6[idx * 3] |= nidx; break; + case 8: ti->index_data->data8[idx] |= nidx << (asize * 2); break; + case 12: ti->index_data->data6[idx * 3] |= nidx; break; + } +} + +static inline void +tindex_right_set(struct tindex *ti, u64 idx, u64 usize, u64 asize, u64 nidx) +{ + /* The right child must have been zero before */ + switch (usize) { + case 4: ti->index_data->data4[idx] |= nidx << asize; break; + case 6: ti->index_data->data6[idx * 3 + 1] |= nidx; break; + case 8: ti->index_data->data8[idx] |= nidx << asize; break; + case 12: ti->index_data->data6[idx * 3 + 1] |= nidx; break; + } +} + +static inline void +tindex_child_update(struct tindex *ti, u64 idx, u64 usize, u64 asize, u64 addrmask, u64 oidx, u64 nidx) +{ + if (oidx == tindex_left(ti, idx, usize, asize, addrmask)) { + tindex_left_clear(ti, idx, usize, asize, addrmask); + tindex_left_set(ti, idx, usize, asize, nidx); + } else { + ASSERT(oidx == tindex_right(ti, idx, usize, asize, addrmask)); + tindex_right_clear(ti, idx, usize, asize, addrmask); + tindex_right_set(ti, idx, usize, asize, nidx); + } +} + +static inline uint tindex_input_bits(const u64 *bits_in, const uint blen, uint *bpos, const uint dlen, u64 *bits) { + uint bmax = blen - *bpos; /* How much remains in the input */ + uint ilen = MIN(bmax, dlen); /* How much we really take */ + + if (ilen == 0) { /* End of input */ + *bits = 0; + return 0; + } + + ASSERT(ilen <= 64); /* The limit of output bit count is 64 */ + uint bend = *bpos + ilen - 1; /* The last bit, inclusive (!) */ + + /* Crop the bits at the end */ + *bits = (bits_in[bend / 64] >> (63 - (bend % 64))); + + /* Prepend bits from the previous item if the range goes over */ + if (bend / 64 > *bpos / 64) + *bits |= bits_in[*bpos / 64] << (1 + bend % 64); + else + ASSERT(bend / 64 == *bpos / 64); + + /* Advance the bit pointer */ + *bpos += ilen; + + /* Return the wanted bits */ + *bits &= ((1 << ilen) - 1); + return ilen; +} + +const char dump_indent[] = " "; +#define INDENT (dump_indent + sizeof(dump_indent) - depth - 1) + +static void +_tindex_dump(const struct tindex *ti, u64 idx, uint depth, uint bit) +{ + const uint asize = ti->address_size; + const uint usize = ti->unit_size; + const uint dsize = usize * 8 - asize * 3; + + const uint dshift = (usize % 3) ? (asize * 3) : (dsize / 3); + const u64 addrmask = (1ULL << ti->address_size) - 1; + + /* Validate unit size */ + switch (usize) { + case 4: + case 6: + case 8: + case 12: break; + default: bug("This shall never happen"); + } + + uint dlen; + u64 data = tindex_data(ti, asize, usize, dsize, dshift, idx, &dlen); + if (depth && bit) + data |= 1ULL << dlen; + if (depth) + dlen++; + + debug("%s0x%x/%u (%lu)\n", INDENT, data, dlen, idx); + u64 left = tindex_left(ti, idx, usize, asize, addrmask); + if (left) + _tindex_dump(ti, left, depth+1, 0); + + u64 right = tindex_right(ti, idx, usize, asize, addrmask); + if (right) + _tindex_dump(ti, right, depth+1, 1); +} + +void +tindex_dump(const struct tindex *ti) +{ + _tindex_dump(ti, 1, 0, 0); +} + +u64 +tindex_find(struct tindex *ti, const u64 *bits_in, const uint blen, const int create) +{ + const uint asize = ti->address_size; + const uint usize = ti->unit_size; + const uint dsize = usize * 8 - asize * 3; + + const uint dshift = (usize % 3) ? (asize * 3) : (dsize / 3); + const u64 addrmask = (1ULL << ti->address_size) - 1; + + /* Validate unit size */ + switch (usize) { + case 4: + case 6: + case 8: + case 12: break; + default: bug("This shall never happen"); + } + + u64 idx = 1; /* The root node is always 1 */ + u64 uidx = 0; /* Parent node is 0 on beginning */ + + uint bpos = 0; + + while (1) { + /* Get data from trie */ + uint dlen; + u64 data = tindex_data(ti, asize, usize, dsize, dshift, idx, &dlen); + + /* Get data from input */ + u64 bits; + uint ilen = tindex_input_bits(bits_in, blen, &bpos, dlen, &bits); + + /* Check whether this node matches the data */ + int match = ((ilen == dlen) && (bits == data)); + + /* Doesn't match and we are just traversing */ + if (!create && !match) + return 0; + + /* The bit strings match */ + if (match) { + /* Get one more bit */ + ilen = tindex_input_bits(bits_in, blen, &bpos, 1, &bits); + + /* No more bits, we're done */ + if (!ilen) + return idx; + + /* Just one bit, to be sure */ + ASSERT(bits < 2); + + /* Go left or right? */ + u64 nidx = bits ? tindex_right(ti, idx, usize, asize, addrmask) : tindex_left(ti, idx, usize, asize, addrmask); + + /* There is a path, we'll follow it. */ + if (nidx) { + uidx = idx; + idx = nidx; + continue; + } + + /* There is no path and we shan't create it. */ + if (!create) + return 0; + + /* So there will be a new node on path. */ + nidx = idm_alloc(&(ti->idm)); + + /* Left or right? */ + if (bits) + tindex_right_set(ti, idx, usize, asize, nidx); + else + tindex_left_set(ti, idx, usize, asize, nidx); + + /* Go there. */ + uidx = idx; + idx = nidx; + + /* And now we shall continue by the brand new node. */ + break; + } + + /* Move the bits to same places */ + u64 shorter = dlen - ilen; + bits <<= shorter; + + /* What is the common part? */ + u64 diflen = u64_log2(bits ^ data) + 1; + + /* To be sure that the split is right. */ + ASSERT((bits >> diflen) == (data >> diflen)); + ASSERT(((bits >> (diflen - 1)) ^ (data >> (diflen - 1))) == 1); + + /* Get the common part */ + u64 common = data >> diflen; + u64 comlen = dlen - diflen; + + /* Return the differing part to the input buffer (if there is some) */ + int split = (ilen - comlen > 0); + if (split) + bpos -= ilen - comlen - 1; + + /* Split out the first different bit */ + u64 dataright = !!(data & (1 << (diflen - 1))); + dlen = diflen - 1; + data &= (1 << dlen) - 1; + + /* Allocate the splitting index */ + u64 midx = idm_alloc(&(ti->idm)); + + /* Allocate the new node if it shall exist */ + u64 nidx = split ? idm_alloc(&(ti->idm)) : 0; + + /* Relink idx -> midx in the parent node */ + if (uidx) + tindex_child_update(ti, uidx, usize, asize, addrmask, idx, midx); + + /* Setup the splitting index (midx) */ + tindex_put(ti, midx, usize, asize, dsize, dshift, common, comlen, dataright ? nidx : idx, dataright ? idx : nidx, uidx); + + /* Update the existing index (idx) */ + tindex_put(ti, idx, usize, asize, dsize, dshift, data, dlen, tindex_left(ti, idx, usize, asize, addrmask), tindex_right(ti, idx, usize, asize, addrmask), midx); + + /* Go down to the child */ + uidx = idx; + idx = nidx; + + /* Grow there a branch if it has to be grown, otherwise return */ + if (split) + break; + else + return midx; + } + + /* Growing a new branch */ + while (1) { + /* Get more data from input */ + u64 data; + uint ilen = tindex_input_bits(bits_in, blen, &bpos, dsize - 1, &data); + + /* For the single bit */ + u64 dataright = ~0; + + /* End of input data */ + if ((ilen < dsize - 1) || !tindex_input_bits(bits_in, blen, &bpos, 1, &dataright)) { + tindex_put(ti, idx, usize, asize, dsize, dshift, data, ilen, 0, 0, uidx); + return idx; + } + + /* Just one bit. */ + ASSERT(dataright < 2); + + /* Create a new node */ + uint nidx = idm_alloc(&(ti->idm)); + + /* Link it into the trie */ + tindex_put(ti, idx, usize, asize, dsize, dshift, data, ilen, dataright ? 0 : nidx, dataright ? nidx : 0, uidx); + + /* And continue there */ + uidx = idx; + idx = nidx; + } +} diff --git a/lib/tindex.h b/lib/tindex.h new file mode 100644 index 00000000..acd65c42 --- /dev/null +++ b/lib/tindex.h @@ -0,0 +1,48 @@ +/* + * Trie index for efficient trie storage + * + * (c) 2019 Maria Matejka + * (c) 2019 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include "nest/bird.h" + +/** + * tindex_bitcheck() callback is called by tindex_find() repeatedly + * to get input bits as needed. Maximal number of bits is + * given in @len; it shall be replaced the actual number of bits + * returned. The bits shall be returned in LSB of the return value. + * If (and only if) no bits are remaining, @len shall be changed, + * otherwise the callee must always return full bit string. + * + * This is intended to be implemented as a nested function in + * a library call using this tree index. + **/ + +typedef u64 (*tindex_bitcheck)(u8 *len); + +/** + * Allocate a new tr[ei]e index from the given pool + * @p: pool to allocate from + * + * Returns the allocated tindex structure. + */ +struct tindex* tindex_new(pool *p); + +/** + * Find an index by the auxiliary funcction @tib. + * @t: the index to look into + * @tib: the auxiliary function; see before + * @create: 0 to find only existing records, 1 to create new + * Return value: 0 for not found (create == 0) or retry (create == 1); nonzero = the index + */ + +u64 tindex_find(struct tindex *ti, const u64 *bits_in, const uint blen, const int create); + +/** + * Dump the index. Useful for debugging. + */ + +void tindex_dump(const struct tindex *ti); diff --git a/lib/tindex_test.c b/lib/tindex_test.c new file mode 100644 index 00000000..e11af414 --- /dev/null +++ b/lib/tindex_test.c @@ -0,0 +1,71 @@ +/* + * BIRD Library -- Trie index Tests + * + * (c) 2019 Maria Matejka + * (c) 2019 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include "test/birdtest.h" +#include "test/bt-utils.h" +#include "lib/tindex.h" + +struct test_trie { + struct tindex *ti; + u64 *data; + u64 len; +}; + +static inline void test_trie_add(struct test_trie *tt, u64 data) { + u64 idx = tindex_find(tt->ti, &data, 64, 1); + + u64 nlen = tt->len; + while (idx > nlen) + nlen *= 2; + + if (nlen > tt->len) { + tt->data = mb_realloc(tt->data, nlen * sizeof(u64)); + memset(&(tt->data[tt->len]), 0, (nlen - tt->len) * sizeof(u64)); + tt->len = nlen; + } + + tt->data[idx]++; +} + +static inline u64 test_trie_get(struct test_trie *tt, u64 data) { + u64 idx = tindex_find(tt->ti, &data, 64, 0); + if (!idx) return 0; + return tt->data[idx]; +} + +static int +t_simple(void) +{ + pool *p = rp_new(&root_pool, "tindex test"); + struct test_trie tt = { + .ti = tindex_new(p), + .data = mb_allocz(p, sizeof(u64) * 256), + .len = 256, + }; + + bt_assert(tt.ti); + for (u64 i = 0; i < 20; i++) { + bt_debug("Trie add: %lu\n", i); + test_trie_add(&tt, i); + tindex_dump(tt.ti); + } + + for (u64 i = 0; i < 20; i++) + bt_assert(test_trie_get(&tt, i) == 1); + + return 1; +} + +int main(int argc, char **argv) +{ + bt_init(argc, argv); + bt_bird_init(); + bt_test_suite(t_simple, "tindex"); + return bt_exit_value(); +}