0
0
mirror of https://gitlab.nic.cz/labs/bird.git synced 2024-09-20 12:25:20 +00:00
bird/nest/rt-attr.c

1608 lines
36 KiB
C

/*
* BIRD -- Route Attribute Cache
*
* (c) 1998--2000 Martin Mares <mj@ucw.cz>
*
* Can be freely distributed and used under the terms of the GNU GPL.
*/
/**
* DOC: Route attribute cache
*
* Each route entry carries a set of route attributes. Several of them
* vary from route to route, but most attributes are usually common
* for a large number of routes. To conserve memory, we've decided to
* store only the varying ones directly in the &rte and hold the rest
* in a special structure called &rta which is shared among all the
* &rte's with these attributes.
*
* Each &rta contains all the static attributes of the route (i.e.,
* those which are always present) as structure members and a list of
* dynamic attributes represented by a linked list of &ea_list
* structures, each of them consisting of an array of &eattr's containing
* the individual attributes. An attribute can be specified more than once
* in the &ea_list chain and in such case the first occurrence overrides
* the others. This semantics is used especially when someone (for example
* a filter) wishes to alter values of several dynamic attributes, but
* it wants to preserve the original attribute lists maintained by
* another module.
*
* Each &eattr contains an attribute identifier (split to protocol ID and
* per-protocol attribute ID), protocol dependent flags, a type code (consisting
* of several bit fields describing attribute characteristics) and either an
* embedded 32-bit value or a pointer to a &adata structure holding attribute
* contents.
*
* There exist two variants of &rta's -- cached and un-cached ones. Un-cached
* &rta's can have arbitrarily complex structure of &ea_list's and they
* can be modified by any module in the route processing chain. Cached
* &rta's have their attribute lists normalized (that means at most one
* &ea_list is present and its values are sorted in order to speed up
* searching), they are stored in a hash table to make fast lookup possible
* and they are provided with a use count to allow sharing.
*
* Routing tables always contain only cached &rta's.
*/
#include "nest/bird.h"
#include "nest/route.h"
#include "nest/protocol.h"
#include "nest/iface.h"
#include "nest/cli.h"
#include "nest/attrs.h"
#include "lib/alloca.h"
#include "lib/hash.h"
#include "lib/idm.h"
#include "lib/resource.h"
#include "lib/rcu.h"
#include "lib/string.h"
#include <stddef.h>
const adata null_adata; /* adata of length 0 */
const char * const rta_src_names[RTS_MAX] = {
[RTS_STATIC] = "static",
[RTS_INHERIT] = "inherit",
[RTS_DEVICE] = "device",
[RTS_STATIC_DEVICE] = "static-device",
[RTS_REDIRECT] = "redirect",
[RTS_RIP] = "RIP",
[RTS_OSPF] = "OSPF",
[RTS_OSPF_IA] = "OSPF-IA",
[RTS_OSPF_EXT1] = "OSPF-E1",
[RTS_OSPF_EXT2] = "OSPF-E2",
[RTS_BGP] = "BGP",
[RTS_PIPE] = "pipe",
[RTS_BABEL] = "Babel",
[RTS_RPKI] = "RPKI",
};
const char * rta_dest_names[RTD_MAX] = {
[RTD_NONE] = "",
[RTD_UNICAST] = "unicast",
[RTD_BLACKHOLE] = "blackhole",
[RTD_UNREACHABLE] = "unreachable",
[RTD_PROHIBIT] = "prohibited",
};
DEFINE_DOMAIN(attrs);
static DOMAIN(attrs) src_domain;
#define SRC_LOCK LOCK_DOMAIN(attrs, src_domain)
#define SRC_UNLOCK UNLOCK_DOMAIN(attrs, src_domain)
pool *rta_pool;
pool *src_pool;
static slab *rta_slab_[4];
static slab *nexthop_slab_[4];
static slab *rte_src_slab;
static struct idm src_ids;
#define SRC_ID_INIT_SIZE 4
/* rte source hash */
#define RSH_KEY(n) n->private_id
#define RSH_NEXT(n) n->next
#define RSH_EQ(n1,n2) n1 == n2
#define RSH_FN(n) u32_hash(n)
#define RSH_REHASH rte_src_rehash
#define RSH_PARAMS /2, *2, 1, 1, 8, 20
#define RSH_INIT_ORDER 2
static void
rte_src_init(void)
{
src_domain = DOMAIN_NEW(attrs, "Route sources");
src_pool = rp_new(&root_pool, &main_birdloop, "Route sources");
rte_src_slab = sl_new(src_pool, sizeof(struct rte_src));
idm_init(&src_ids, src_pool, SRC_ID_INIT_SIZE);
}
HASH_DEFINE_REHASH_FN(RSH, struct rte_src)
static struct rte_src *
rt_find_source(struct rte_owner *p, u32 id)
{
return HASH_FIND(p->hash, RSH, id);
}
struct rte_src *
rt_get_source_o(struct rte_owner *p, u32 id)
{
if (p->stop)
bug("Stopping route owner asked for another source.");
struct rte_src *src = rt_find_source(p, id);
if (src)
{
UNUSED u64 uc = atomic_fetch_add_explicit(&src->uc, 1, memory_order_acq_rel);
return src;
}
SRC_LOCK;
src = sl_allocz(rte_src_slab);
src->owner = p;
src->private_id = id;
src->global_id = idm_alloc(&src_ids);
atomic_store_explicit(&src->uc, 1, memory_order_release);
p->uc++;
HASH_INSERT2(p->hash, RSH, src_pool, src);
if (config->table_debug)
log(L_TRACE "Allocated new rte_src for %s, ID %uL %uG, have %u sources now",
p->name, src->private_id, src->global_id, p->uc);
SRC_UNLOCK;
return src;
}
static inline void
rt_done_sources(struct rte_owner *o)
{
if (o->stop->list)
ev_send(o->stop->list, o->stop);
else
ev_send(o->list, o->stop);
}
void
rt_prune_sources(void *data)
{
struct rte_owner *o = data;
HASH_WALK_FILTER(o->hash, next, src, sp)
{
u64 uc;
while ((uc = atomic_load_explicit(&src->uc, memory_order_acquire)) >> RTE_SRC_PU_SHIFT)
;
if (uc == 0)
{
o->uc--;
HASH_DO_REMOVE(o->hash, RSH, sp);
SRC_LOCK;
idm_free(&src_ids, src->global_id);
sl_free(rte_src_slab, src);
SRC_UNLOCK;
}
}
HASH_WALK_FILTER_END;
SRC_LOCK;
HASH_MAY_RESIZE_DOWN(o->hash, RSH, src_pool);
if (o->stop && !o->uc)
{
rfree(o->prune);
SRC_UNLOCK;
if (config->table_debug)
log(L_TRACE "All rte_src's for %s pruned, scheduling stop event", o->name);
rt_done_sources(o);
}
else
SRC_UNLOCK;
}
void
rt_init_sources(struct rte_owner *o, const char *name, event_list *list)
{
SRC_LOCK;
HASH_INIT(o->hash, src_pool, RSH_INIT_ORDER);
o->hash_key = random_u32();
o->uc = 0;
o->name = name;
o->prune = ev_new_init(src_pool, rt_prune_sources, o);
o->stop = NULL;
o->list = list;
SRC_UNLOCK;
}
void
rt_destroy_sources(struct rte_owner *o, event *done)
{
o->stop = done;
if (!o->uc)
{
if (config->table_debug)
log(L_TRACE "Source owner %s destroy requested. All rte_src's already pruned, scheduling stop event", o->name);
SRC_LOCK;
rfree(o->prune);
SRC_UNLOCK;
rt_done_sources(o);
}
else
if (config->table_debug)
log(L_TRACE "Source owner %s destroy requested. Remaining %u rte_src's to prune.", o->name, o->uc);
}
/*
* Multipath Next Hop
*/
static inline u32
nexthop_hash(struct nexthop *x)
{
u32 h = 0;
for (; x; x = x->next)
{
h ^= ipa_hash(x->gw) ^ (h << 5) ^ (h >> 9);
for (int i = 0; i < x->labels; i++)
h ^= x->label[i] ^ (h << 6) ^ (h >> 7);
}
return h;
}
int
nexthop__same(struct nexthop *x, struct nexthop *y)
{
for (; x && y; x = x->next, y = y->next)
{
if (!ipa_equal(x->gw, y->gw) || (x->iface != y->iface) ||
(x->flags != y->flags) || (x->weight != y->weight) ||
(x->labels_orig != y->labels_orig) || (x->labels != y->labels))
return 0;
for (int i = 0; i < x->labels; i++)
if (x->label[i] != y->label[i])
return 0;
}
return x == y;
}
static int
nexthop_compare_node(const struct nexthop *x, const struct nexthop *y)
{
int r;
if (!x)
return 1;
if (!y)
return -1;
/* Should we also compare flags ? */
r = ((int) y->weight) - ((int) x->weight);
if (r)
return r;
r = ipa_compare(x->gw, y->gw);
if (r)
return r;
r = ((int) y->labels) - ((int) x->labels);
if (r)
return r;
for (int i = 0; i < y->labels; i++)
{
r = ((int) y->label[i]) - ((int) x->label[i]);
if (r)
return r;
}
return ((int) x->iface->index) - ((int) y->iface->index);
}
static inline struct nexthop *
nexthop_copy_node(const struct nexthop *src, linpool *lp)
{
struct nexthop *n = lp_alloc(lp, nexthop_size(src));
memcpy(n, src, nexthop_size(src));
n->next = NULL;
return n;
}
/**
* nexthop_merge - merge nexthop lists
* @x: list 1
* @y: list 2
* @rx: reusability of list @x
* @ry: reusability of list @y
* @max: max number of nexthops
* @lp: linpool for allocating nexthops
*
* The nexthop_merge() function takes two nexthop lists @x and @y and merges them,
* eliminating possible duplicates. The input lists must be sorted and the
* result is sorted too. The number of nexthops in result is limited by @max.
* New nodes are allocated from linpool @lp.
*
* The arguments @rx and @ry specify whether corresponding input lists may be
* consumed by the function (i.e. their nodes reused in the resulting list), in
* that case the caller should not access these lists after that. To eliminate
* issues with deallocation of these lists, the caller should use some form of
* bulk deallocation (e.g. stack or linpool) to free these nodes when the
* resulting list is no longer needed. When reusability is not set, the
* corresponding lists are not modified nor linked from the resulting list.
*/
struct nexthop *
nexthop_merge(struct nexthop *x, struct nexthop *y, int rx, int ry, int max, linpool *lp)
{
struct nexthop *root = NULL;
struct nexthop **n = &root;
while ((x || y) && max--)
{
int cmp = nexthop_compare_node(x, y);
if (cmp < 0)
{
ASSUME(x);
*n = rx ? x : nexthop_copy_node(x, lp);
x = x->next;
}
else if (cmp > 0)
{
ASSUME(y);
*n = ry ? y : nexthop_copy_node(y, lp);
y = y->next;
}
else
{
ASSUME(x && y);
*n = rx ? x : (ry ? y : nexthop_copy_node(x, lp));
x = x->next;
y = y->next;
}
n = &((*n)->next);
}
*n = NULL;
return root;
}
void
nexthop_insert(struct nexthop **n, struct nexthop *x)
{
for (; *n; n = &((*n)->next))
{
int cmp = nexthop_compare_node(*n, x);
if (cmp < 0)
continue;
else if (cmp > 0)
break;
else
return;
}
x->next = *n;
*n = x;
}
struct nexthop *
nexthop_sort(struct nexthop *x)
{
struct nexthop *s = NULL;
/* Simple insert-sort */
while (x)
{
struct nexthop *n = x;
x = n->next;
n->next = NULL;
nexthop_insert(&s, n);
}
return s;
}
int
nexthop_is_sorted(struct nexthop *x)
{
for (; x && x->next; x = x->next)
if (nexthop_compare_node(x, x->next) >= 0)
return 0;
return 1;
}
static inline slab *
nexthop_slab(struct nexthop *nh)
{
return nexthop_slab_[MIN(nh->labels, 3)];
}
static struct nexthop *
nexthop_copy(struct nexthop *o)
{
struct nexthop *first = NULL;
struct nexthop **last = &first;
for (; o; o = o->next)
{
struct nexthop *n = sl_allocz(nexthop_slab(o));
n->gw = o->gw;
n->iface = o->iface;
n->next = NULL;
n->flags = o->flags;
n->weight = o->weight;
n->labels_orig = o->labels_orig;
n->labels = o->labels;
for (int i=0; i<o->labels; i++)
n->label[i] = o->label[i];
*last = n;
last = &(n->next);
}
return first;
}
static void
nexthop_free(struct nexthop *o)
{
struct nexthop *n;
while (o)
{
n = o->next;
sl_free(nexthop_slab(o), o);
o = n;
}
}
/*
* Extended Attributes
*/
static inline eattr *
ea__find(ea_list *e, unsigned id)
{
eattr *a;
int l, r, m;
while (e)
{
if (e->flags & EALF_BISECT)
{
l = 0;
r = e->count - 1;
while (l <= r)
{
m = (l+r) / 2;
a = &e->attrs[m];
if (a->id == id)
return a;
else if (a->id < id)
l = m+1;
else
r = m-1;
}
}
else
for(m=0; m<e->count; m++)
if (e->attrs[m].id == id)
return &e->attrs[m];
e = e->next;
}
return NULL;
}
/**
* ea_find - find an extended attribute
* @e: attribute list to search in
* @id: attribute ID to search for
*
* Given an extended attribute list, ea_find() searches for a first
* occurrence of an attribute with specified ID, returning either a pointer
* to its &eattr structure or %NULL if no such attribute exists.
*/
eattr *
ea_find(ea_list *e, unsigned id)
{
eattr *a = ea__find(e, id & EA_CODE_MASK);
if (a && (a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF &&
!(id & EA_ALLOW_UNDEF))
return NULL;
return a;
}
/**
* ea_walk - walk through extended attributes
* @s: walk state structure
* @id: start of attribute ID interval
* @max: length of attribute ID interval
*
* Given an extended attribute list, ea_walk() walks through the list looking
* for first occurrences of attributes with ID in specified interval from @id to
* (@id + @max - 1), returning pointers to found &eattr structures, storing its
* walk state in @s for subsequent calls.
*
* The function ea_walk() is supposed to be called in a loop, with initially
* zeroed walk state structure @s with filled the initial extended attribute
* list, returning one found attribute in each call or %NULL when no other
* attribute exists. The extended attribute list or the arguments should not be
* modified between calls. The maximum value of @max is 128.
*/
eattr *
ea_walk(struct ea_walk_state *s, uint id, uint max)
{
ea_list *e = s->eattrs;
eattr *a = s->ea;
eattr *a_max;
max = id + max;
if (a)
goto step;
for (; e; e = e->next)
{
if (e->flags & EALF_BISECT)
{
int l, r, m;
l = 0;
r = e->count - 1;
while (l < r)
{
m = (l+r) / 2;
if (e->attrs[m].id < id)
l = m + 1;
else
r = m;
}
a = e->attrs + l;
}
else
a = e->attrs;
step:
a_max = e->attrs + e->count;
for (; a < a_max; a++)
if ((a->id >= id) && (a->id < max))
{
int n = a->id - id;
if (BIT32_TEST(s->visited, n))
continue;
BIT32_SET(s->visited, n);
if ((a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF)
continue;
s->eattrs = e;
s->ea = a;
return a;
}
else if (e->flags & EALF_BISECT)
break;
}
return NULL;
}
/**
* ea_get_int - fetch an integer attribute
* @e: attribute list
* @id: attribute ID
* @def: default value
*
* This function is a shortcut for retrieving a value of an integer attribute
* by calling ea_find() to find the attribute, extracting its value or returning
* a provided default if no such attribute is present.
*/
uintptr_t
ea_get_int(ea_list *e, unsigned id, uintptr_t def)
{
eattr *a = ea_find(e, id);
if (!a)
return def;
return a->u.data;
}
static inline void
ea_do_sort(ea_list *e)
{
unsigned n = e->count;
eattr *a = e->attrs;
eattr *b = alloca(n * sizeof(eattr));
unsigned s, ss;
/* We need to use a stable sorting algorithm, hence mergesort */
do
{
s = ss = 0;
while (s < n)
{
eattr *p, *q, *lo, *hi;
p = b;
ss = s;
*p++ = a[s++];
while (s < n && p[-1].id <= a[s].id)
*p++ = a[s++];
if (s < n)
{
q = p;
*p++ = a[s++];
while (s < n && p[-1].id <= a[s].id)
*p++ = a[s++];
lo = b;
hi = q;
s = ss;
while (lo < q && hi < p)
if (lo->id <= hi->id)
a[s++] = *lo++;
else
a[s++] = *hi++;
while (lo < q)
a[s++] = *lo++;
while (hi < p)
a[s++] = *hi++;
}
}
}
while (ss);
}
/**
* In place discard duplicates and undefs in sorted ea_list. We use stable sort
* for this reason.
**/
static inline void
ea_do_prune(ea_list *e)
{
eattr *s, *d, *l, *s0;
int i = 0;
s = d = e->attrs; /* Beginning of the list. @s is source, @d is destination. */
l = e->attrs + e->count; /* End of the list */
/* Walk from begin to end. */
while (s < l)
{
s0 = s++;
/* Find a consecutive block of the same attribute */
while (s < l && s->id == s[-1].id)
s++;
/* Now s0 is the most recent version, s[-1] the oldest one */
/* Drop undefs */
if ((s0->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF)
continue;
/* Copy the newest version to destination */
*d = *s0;
/* Preserve info whether it originated locally */
d->type = (d->type & ~(EAF_ORIGINATED|EAF_FRESH)) | (s[-1].type & EAF_ORIGINATED);
/* Next destination */
d++;
i++;
}
e->count = i;
}
/**
* ea_sort - sort an attribute list
* @e: list to be sorted
*
* This function takes a &ea_list chain and sorts the attributes
* within each of its entries.
*
* If an attribute occurs multiple times in a single &ea_list,
* ea_sort() leaves only the first (the only significant) occurrence.
*/
void
ea_sort(ea_list *e)
{
while (e)
{
if (!(e->flags & EALF_SORTED))
{
ea_do_sort(e);
ea_do_prune(e);
e->flags |= EALF_SORTED;
}
if (e->count > 5)
e->flags |= EALF_BISECT;
e = e->next;
}
}
/**
* ea_scan - estimate attribute list size
* @e: attribute list
*
* This function calculates an upper bound of the size of
* a given &ea_list after merging with ea_merge().
*/
unsigned
ea_scan(ea_list *e)
{
unsigned cnt = 0;
while (e)
{
cnt += e->count;
e = e->next;
}
return sizeof(ea_list) + sizeof(eattr)*cnt;
}
/**
* ea_merge - merge segments of an attribute list
* @e: attribute list
* @t: buffer to store the result to
*
* This function takes a possibly multi-segment attribute list
* and merges all of its segments to one.
*
* The primary use of this function is for &ea_list normalization:
* first call ea_scan() to determine how much memory will the result
* take, then allocate a buffer (usually using alloca()), merge the
* segments with ea_merge() and finally sort and prune the result
* by calling ea_sort().
*/
void
ea_merge(ea_list *e, ea_list *t)
{
eattr *d = t->attrs;
t->flags = 0;
t->count = 0;
t->next = NULL;
while (e)
{
memcpy(d, e->attrs, sizeof(eattr)*e->count);
t->count += e->count;
d += e->count;
e = e->next;
}
}
/**
* ea_same - compare two &ea_list's
* @x: attribute list
* @y: attribute list
*
* ea_same() compares two normalized attribute lists @x and @y and returns
* 1 if they contain the same attributes, 0 otherwise.
*/
int
ea_same(ea_list *x, ea_list *y)
{
int c;
if (!x || !y)
return x == y;
ASSERT(!x->next && !y->next);
if (x->count != y->count)
return 0;
for(c=0; c<x->count; c++)
{
eattr *a = &x->attrs[c];
eattr *b = &y->attrs[c];
if (a->id != b->id ||
a->flags != b->flags ||
a->type != b->type ||
((a->type & EAF_EMBEDDED) ? a->u.data != b->u.data : !adata_same(a->u.ptr, b->u.ptr)))
return 0;
}
return 1;
}
static inline ea_list *
ea_list_copy(ea_list *o)
{
ea_list *n;
unsigned i, adpos, elen;
if (!o)
return NULL;
ASSERT(!o->next);
elen = adpos = sizeof(ea_list) + sizeof(eattr) * o->count;
for(i=0; i<o->count; i++)
{
eattr *a = &o->attrs[i];
if (!(a->type & EAF_EMBEDDED))
elen += sizeof(struct adata) + a->u.ptr->length;
}
n = mb_alloc(rta_pool, elen);
memcpy(n, o, adpos);
n->flags |= EALF_CACHED;
for(i=0; i<o->count; i++)
{
eattr *a = &n->attrs[i];
if (!(a->type & EAF_EMBEDDED))
{
unsigned size = sizeof(struct adata) + a->u.ptr->length;
ASSERT_DIE(adpos + size <= elen);
struct adata *d = ((void *) n) + adpos;
memcpy(d, a->u.ptr, size);
a->u.ptr = d;
adpos += size;
}
}
ASSERT_DIE(adpos == elen);
return n;
}
static inline void
ea_free(ea_list *o)
{
if (o)
{
ASSERT(!o->next);
mb_free(o);
}
}
static int
get_generic_attr(const eattr *a, byte **buf, int buflen UNUSED)
{
if (a->id == EA_GEN_IGP_METRIC)
{
*buf += bsprintf(*buf, "igp_metric");
return GA_NAME;
}
return GA_UNKNOWN;
}
void
ea_format_bitfield(const struct eattr *a, byte *buf, int bufsize, const char **names, int min, int max)
{
byte *bound = buf + bufsize - 32;
u32 data = a->u.data;
int i;
for (i = min; i < max; i++)
if ((data & (1u << i)) && names[i])
{
if (buf > bound)
{
strcpy(buf, " ...");
return;
}
buf += bsprintf(buf, " %s", names[i]);
data &= ~(1u << i);
}
if (data)
bsprintf(buf, " %08x", data);
return;
}
static inline void
opaque_format(const struct adata *ad, byte *buf, uint size)
{
byte *bound = buf + size - 10;
uint i;
for(i = 0; i < ad->length; i++)
{
if (buf > bound)
{
strcpy(buf, " ...");
return;
}
if (i)
*buf++ = ' ';
buf += bsprintf(buf, "%02x", ad->data[i]);
}
*buf = 0;
return;
}
static inline void
ea_show_int_set(struct cli *c, const struct adata *ad, int way, byte *pos, byte *buf, byte *end)
{
int i = int_set_format(ad, way, 0, pos, end - pos);
cli_printf(c, -1012, "\t%s", buf);
while (i)
{
i = int_set_format(ad, way, i, buf, end - buf - 1);
cli_printf(c, -1012, "\t\t%s", buf);
}
}
static inline void
ea_show_ec_set(struct cli *c, const struct adata *ad, byte *pos, byte *buf, byte *end)
{
int i = ec_set_format(ad, 0, pos, end - pos);
cli_printf(c, -1012, "\t%s", buf);
while (i)
{
i = ec_set_format(ad, i, buf, end - buf - 1);
cli_printf(c, -1012, "\t\t%s", buf);
}
}
static inline void
ea_show_lc_set(struct cli *c, const struct adata *ad, byte *pos, byte *buf, byte *end)
{
int i = lc_set_format(ad, 0, pos, end - pos);
cli_printf(c, -1012, "\t%s", buf);
while (i)
{
i = lc_set_format(ad, i, buf, end - buf - 1);
cli_printf(c, -1012, "\t\t%s", buf);
}
}
/**
* ea_show - print an &eattr to CLI
* @c: destination CLI
* @e: attribute to be printed
*
* This function takes an extended attribute represented by its &eattr
* structure and prints it to the CLI according to the type information.
*
* If the protocol defining the attribute provides its own
* get_attr() hook, it's consulted first.
*/
void
ea_show(struct cli *c, const eattr *e)
{
struct protocol *p;
int status = GA_UNKNOWN;
const struct adata *ad = (e->type & EAF_EMBEDDED) ? NULL : e->u.ptr;
byte buf[CLI_MSG_SIZE];
byte *pos = buf, *end = buf + sizeof(buf);
if (EA_IS_CUSTOM(e->id))
{
const char *name = ea_custom_name(e->id);
if (name)
{
pos += bsprintf(pos, "%s", name);
status = GA_NAME;
}
else
pos += bsprintf(pos, "%02x.", EA_PROTO(e->id));
}
else if (p = class_to_protocol[EA_PROTO(e->id)])
{
pos += bsprintf(pos, "%s.", p->name);
if (p->get_attr)
status = p->get_attr(e, pos, end - pos);
pos += strlen(pos);
}
else if (EA_PROTO(e->id))
pos += bsprintf(pos, "%02x.", EA_PROTO(e->id));
else
status = get_generic_attr(e, &pos, end - pos);
if (status < GA_NAME)
pos += bsprintf(pos, "%02x", EA_ID(e->id));
if (status < GA_FULL)
{
*pos++ = ':';
*pos++ = ' ';
switch (e->type & EAF_TYPE_MASK)
{
case EAF_TYPE_INT:
bsprintf(pos, "%u", e->u.data);
break;
case EAF_TYPE_OPAQUE:
opaque_format(ad, pos, end - pos);
break;
case EAF_TYPE_IP_ADDRESS:
bsprintf(pos, "%I", *(ip_addr *) ad->data);
break;
case EAF_TYPE_ROUTER_ID:
bsprintf(pos, "%R", e->u.data);
break;
case EAF_TYPE_AS_PATH:
as_path_format(ad, pos, end - pos);
break;
case EAF_TYPE_BITFIELD:
bsprintf(pos, "%08x", e->u.data);
break;
case EAF_TYPE_INT_SET:
ea_show_int_set(c, ad, 1, pos, buf, end);
return;
case EAF_TYPE_EC_SET:
ea_show_ec_set(c, ad, pos, buf, end);
return;
case EAF_TYPE_LC_SET:
ea_show_lc_set(c, ad, pos, buf, end);
return;
case EAF_TYPE_UNDEF:
default:
bsprintf(pos, "<type %02x>", e->type);
}
}
cli_printf(c, -1012, "\t%s", buf);
}
/**
* ea_dump - dump an extended attribute
* @e: attribute to be dumped
*
* ea_dump() dumps contents of the extended attribute given to
* the debug output.
*/
void
ea_dump(ea_list *e)
{
int i;
if (!e)
{
debug("NONE");
return;
}
while (e)
{
debug("[%c%c%c]",
(e->flags & EALF_SORTED) ? 'S' : 's',
(e->flags & EALF_BISECT) ? 'B' : 'b',
(e->flags & EALF_CACHED) ? 'C' : 'c');
for(i=0; i<e->count; i++)
{
eattr *a = &e->attrs[i];
debug(" %02x:%02x.%02x", EA_PROTO(a->id), EA_ID(a->id), a->flags);
debug("=%c", "?iO?I?P???S?????" [a->type & EAF_TYPE_MASK]);
if (a->type & EAF_ORIGINATED)
debug("o");
if (a->type & EAF_EMBEDDED)
debug(":%08x", a->u.data);
else
{
int j, len = a->u.ptr->length;
debug("[%d]:", len);
for(j=0; j<len; j++)
debug("%02x", a->u.ptr->data[j]);
}
}
if (e = e->next)
debug(" | ");
}
}
/**
* ea_hash - calculate an &ea_list hash key
* @e: attribute list
*
* ea_hash() takes an extended attribute list and calculated a hopefully
* uniformly distributed hash value from its contents.
*/
inline uint
ea_hash(ea_list *e)
{
const u64 mul = 0x68576150f3d6847;
u64 h = 0xafcef24eda8b29;
int i;
if (e) /* Assuming chain of length 1 */
{
for(i=0; i<e->count; i++)
{
struct eattr *a = &e->attrs[i];
h ^= a->id; h *= mul;
if (a->type & EAF_EMBEDDED)
h ^= a->u.data;
else
{
const struct adata *d = a->u.ptr;
h ^= mem_hash(d->data, d->length);
}
h *= mul;
}
}
return (h >> 32) ^ (h & 0xffffffff);
}
/**
* ea_append - concatenate &ea_list's
* @to: destination list (can be %NULL)
* @what: list to be appended (can be %NULL)
*
* This function appends the &ea_list @what at the end of
* &ea_list @to and returns a pointer to the resulting list.
*/
ea_list *
ea_append(ea_list *to, ea_list *what)
{
ea_list *res;
if (!to)
return what;
res = to;
while (to->next)
to = to->next;
to->next = what;
return res;
}
/*
* rta's
*/
static DOMAIN(attrs) attrs_domain;
#define RTA_LOCK LOCK_DOMAIN(attrs, attrs_domain)
#define RTA_UNLOCK UNLOCK_DOMAIN(attrs, attrs_domain)
struct rta_cache {
u32 count;
u32 size;
u32 limit;
u32 mask;
rta * _Atomic table[0];
} * _Atomic rta_cache;
// rta_aux, rta_cache = { .size = ATOMIC_VAR_INIT(32), };
static struct rta_cache *
rta_alloc_hash(u32 size)
{
struct rta_cache *c = mb_allocz(rta_pool, sizeof(struct rta_cache) + sizeof(rta * _Atomic) * size);
c->size = size;
c->limit = (size >> 20) ? (~0U) : (size * 2);
c->mask = size - 1;
return c;
}
static inline uint
rta_hash(rta *a)
{
u64 h;
mem_hash_init(&h);
#define MIX(f) mem_hash_mix(&h, &(a->f), sizeof(a->f));
#define BMIX(f) mem_hash_mix_num(&h, a->f);
MIX(hostentry);
MIX(from);
MIX(igp_metric);
BMIX(source);
BMIX(scope);
BMIX(dest);
MIX(pref);
#undef MIX
return mem_hash_value(&h) ^ nexthop_hash(&(a->nh)) ^ ea_hash(a->eattrs);
}
static inline int
rta_same(rta *x, rta *y)
{
return (x->source == y->source &&
x->scope == y->scope &&
x->dest == y->dest &&
x->igp_metric == y->igp_metric &&
ipa_equal(x->from, y->from) &&
x->hostentry == y->hostentry &&
nexthop_same(&(x->nh), &(y->nh)) &&
ea_same(x->eattrs, y->eattrs));
}
static inline slab *
rta_slab(rta *a)
{
return rta_slab_[a->nh.labels > 2 ? 3 : a->nh.labels];
}
static rta *
rta_copy(rta *o)
{
rta *r = sl_alloc(rta_slab(o));
memcpy(r, o, rta_size(o));
r->uc = 1;
r->nh.next = nexthop_copy(o->nh.next);
r->eattrs = ea_list_copy(o->eattrs);
return r;
}
static inline void
rta_insert(rta *r, struct rta_cache *c)
{
uint h = r->hash_key & c->mask;
rta *next = atomic_load_explicit(&c->table[h], memory_order_relaxed);
atomic_store_explicit(&r->next, next, memory_order_relaxed);
r->pprev = &c->table[h];
if (next)
next->pprev = &r->next;
/* This store MUST be the last and MUST have release order for thread-safety */
atomic_store_explicit(&c->table[h], r, memory_order_release);
}
static void
rta_rehash(struct rta_cache *c)
{
u32 os = c->size;
struct rta_cache *nc = rta_alloc_hash(os * 2);
nc->count = c->count;
/* First we simply copy every chain to both new locations */
for (u32 h = 0; h < os; h++)
{
rta *r = atomic_load_explicit(&c->table[h], memory_order_relaxed);
atomic_store_explicit(&nc->table[h], r, memory_order_relaxed);
atomic_store_explicit(&nc->table[h + os], r, memory_order_relaxed);
}
/* Then we exchange the hashes; release semantics forces the previous code to be already done */
atomic_store_explicit(&rta_cache, nc, memory_order_release);
/* And now we pass through both chains and filter them */
for (u32 h = 0; h < c->size; h++)
{
rta * _Atomic * ap = &nc->table[h];
rta * _Atomic * bp = &nc->table[h + os];
rta *r = atomic_load_explicit(ap, memory_order_relaxed);
ASSERT_DIE(r == atomic_load_explicit(bp, memory_order_relaxed));
while (r)
{
if (r->hash_key & os)
{
r->pprev = bp;
atomic_store_explicit(bp, r, memory_order_release);
bp = &r->next;
}
else
{
r->pprev = ap;
atomic_store_explicit(ap, r, memory_order_release);
ap = &r->next;
}
r = atomic_load_explicit(&r->next, memory_order_acquire);
}
atomic_store_explicit(ap, NULL, memory_order_release);
atomic_store_explicit(bp, NULL, memory_order_release);
}
synchronize_rcu();
mb_free(c);
}
static rta *
rta_find(rta *o, u32 h, struct rta_cache *c)
{
rta *r = NULL;
for (r = atomic_load_explicit(&c->table[h & c->mask], memory_order_acquire); r; r = atomic_load_explicit(&r->next, memory_order_acquire))
if (r->hash_key == h && rta_same(r, o))
{
atomic_fetch_add_explicit(&r->uc, 1, memory_order_acq_rel);
return r;
}
return NULL;
}
/**
* rta_lookup - look up a &rta in attribute cache
* @o: a un-cached &rta
*
* rta_lookup() gets an un-cached &rta structure and returns its cached
* counterpart. It starts with examining the attribute cache to see whether
* there exists a matching entry. If such an entry exists, it's returned and
* its use count is incremented, else a new entry is created with use count
* set to 1.
*
* The extended attribute lists attached to the &rta are automatically
* converted to the normalized form.
*/
rta *
rta_lookup(rta *o)
{
rta *r;
uint h;
ASSERT(!o->cached);
if (o->eattrs)
ea_normalize(o->eattrs);
h = rta_hash(o);
/* Lockless lookup */
rcu_read_lock();
r = rta_find(o, h, atomic_load_explicit(&rta_cache, memory_order_acquire));
rcu_read_unlock();
if (r)
return r;
RTA_LOCK;
/* Locked lookup to avoid duplicates if possible */
struct rta_cache *c = atomic_load_explicit(&rta_cache, memory_order_acquire);
r = rta_find(o, h, c);
if (r)
{
RTA_UNLOCK;
return r;
}
/* Store the rta */
r = rta_copy(o);
r->hash_key = h;
r->cached = 1;
rt_lock_hostentry(r->hostentry);
rta_insert(r, c);
if (++c->count > c->limit)
rta_rehash(c);
RTA_UNLOCK;
return r;
}
static void
rta_cleanup(void *data UNUSED)
{
u32 count = 0;
rta *ax[RTA_OBSOLETE_LIMIT];
RTA_LOCK;
struct rta_cache *c = atomic_load_explicit(&rta_cache, memory_order_acquire);
for(u32 h=0; h<c->size; h++)
for(rta *a = atomic_load_explicit(&c->table[h], memory_order_acquire), *next;
a;
a = next)
{
next = atomic_load_explicit(&a->next, memory_order_acquire);
if (atomic_load_explicit(&a->uc, memory_order_acquire) > 0)
continue;
/* Check if the cleanup fits in the buffer */
if (count == RTA_OBSOLETE_LIMIT)
{
ev_send(&global_work_list, &rta_cleanup_event);
goto wait;
}
/* Relink the forward pointer */
atomic_store_explicit(a->pprev, next, memory_order_release);
/* Relink the backwards pointer */
if (next)
next->pprev = a->pprev;
/* Store for freeing and go to the next */
ax[count++] = a;
a = next;
}
wait:
/* Wait until nobody knows about us */
synchronize_rcu();
u32 freed = 0;
for (u32 i=0; i<count; i++)
{
rta *a = ax[i];
/* Acquired inbetween, relink back */
if (atomic_load_explicit(&a->uc, memory_order_acquire))
{
rta_insert(a, c);
continue;
}
/* Cleared to free the memory */
rt_unlock_hostentry(a->hostentry);
if (a->nh.next)
nexthop_free(a->nh.next);
ea_free(a->eattrs);
a->cached = 0;
c->count--;
sl_free(rta_slab(a), a);
freed++;
}
atomic_fetch_sub_explicit(&rta_obsolete_count, freed, memory_order_release);
RTA_UNLOCK;
}
_Atomic u32 rta_obsolete_count;
event rta_cleanup_event = { .hook = rta_cleanup, .list = &global_work_list };
rta *
rta_do_cow(rta *o, linpool *lp)
{
rta *r = lp_alloc(lp, rta_size(o));
memcpy(r, o, rta_size(o));
for (struct nexthop **nhn = &(r->nh.next), *nho = o->nh.next; nho; nho = nho->next)
{
*nhn = lp_alloc(lp, nexthop_size(nho));
memcpy(*nhn, nho, nexthop_size(nho));
nhn = &((*nhn)->next);
}
rta_uncache(r);
return r;
}
/**
* rta_dump - dump route attributes
* @a: attribute structure to dump
*
* This function takes a &rta and dumps its contents to the debug output.
*/
void
rta_dump(const rta *a)
{
static char *rts[] = { "", "RTS_STATIC", "RTS_INHERIT", "RTS_DEVICE",
"RTS_STAT_DEV", "RTS_REDIR", "RTS_RIP",
"RTS_OSPF", "RTS_OSPF_IA", "RTS_OSPF_EXT1",
"RTS_OSPF_EXT2", "RTS_BGP", "RTS_PIPE", "RTS_BABEL" };
static char *rtd[] = { "", " DEV", " HOLE", " UNREACH", " PROHIBIT" };
debug("pref=%d uc=%d %s %s%s h=%04x",
a->pref, a->uc, rts[a->source], ip_scope_text(a->scope),
rtd[a->dest], a->hash_key);
if (!a->cached)
debug(" !CACHED");
debug(" <-%I", a->from);
if (a->dest == RTD_UNICAST)
for (const struct nexthop *nh = &(a->nh); nh; nh = nh->next)
{
if (ipa_nonzero(nh->gw)) debug(" ->%I", nh->gw);
if (nh->labels) debug(" L %d", nh->label[0]);
for (int i=1; i<nh->labels; i++)
debug("/%d", nh->label[i]);
debug(" [%s]", nh->iface ? nh->iface->name : "???");
}
if (a->eattrs)
{
debug(" EA: ");
ea_dump(a->eattrs);
}
}
/**
* rta_dump_all - dump attribute cache
*
* This function dumps the whole contents of route attribute cache
* to the debug output.
*/
void
rta_dump_all(void)
{
rta *a;
uint h;
RTA_LOCK;
struct rta_cache *c = atomic_load_explicit(&rta_cache, memory_order_acquire);
debug("Route attribute cache (%d entries, rehash at %d):\n", c->count, c->limit);
for(h=0; h<c->size; h++)
for(a = atomic_load_explicit(&c->table[h], memory_order_acquire);
a;
a = atomic_load_explicit(&a->next, memory_order_acquire))
{
debug("%p ", a);
rta_dump(a);
debug("\n");
}
debug("\n");
RTA_UNLOCK;
}
void
rta_show(struct cli *c, const rta *a)
{
cli_printf(c, -1008, "\tType: %s %s", rta_src_names[a->source], ip_scope_text(a->scope));
for(ea_list *eal = a->eattrs; eal; eal=eal->next)
for(int i=0; i<eal->count; i++)
ea_show(c, &eal->attrs[i]);
}
/**
* rta_init - initialize route attribute cache
*
* This function is called during initialization of the routing
* table module to set up the internals of the attribute cache.
*/
void
rta_init(void)
{
attrs_domain = DOMAIN_NEW(attrs, "Attributes");
rta_pool = rp_new(&root_pool, &main_birdloop, "Attributes");
rta_slab_[0] = sl_new(rta_pool, sizeof(rta));
rta_slab_[1] = sl_new(rta_pool, sizeof(rta) + sizeof(u32));
rta_slab_[2] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)*2);
rta_slab_[3] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)*MPLS_MAX_LABEL_STACK);
nexthop_slab_[0] = sl_new(rta_pool, sizeof(struct nexthop));
nexthop_slab_[1] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32));
nexthop_slab_[2] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*2);
nexthop_slab_[3] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK);
atomic_store_explicit(&rta_cache, rta_alloc_hash(32), memory_order_relaxed);
rte_src_init();
}
/*
* Documentation for functions declared inline in route.h
*/
#if 0
/**
* rta_clone - clone route attributes
* @r: a &rta to be cloned
*
* rta_clone() takes a cached &rta and returns its identical cached
* copy. Currently it works by just returning the original &rta with
* its use count incremented.
*/
static inline rta *rta_clone(rta *r)
{ DUMMY; }
/**
* rta_free - free route attributes
* @r: a &rta to be freed
*
* If you stop using a &rta (for example when deleting a route which uses
* it), you need to call rta_free() to notify the attribute cache the
* attribute is no longer in use and can be freed if you were the last
* user (which rta_free() tests by inspecting the use count).
*/
static inline void rta_free(rta *r)
{ DUMMY; }
#endif