0
0
mirror of https://gitlab.nic.cz/labs/bird.git synced 2024-09-07 22:15:19 +00:00

Merge commit '5d5c431a3c23ad7ed2ed5e769d9857e7f3a1e626' into kk-tbf-config-v2

This commit is contained in:
Maria Matejka 2024-05-07 16:51:49 +02:00
commit ebeaf4e8ca
196 changed files with 25528 additions and 10664 deletions

View File

@ -132,10 +132,6 @@ build-fedora-34-amd64:
<<: *build-linux
image: registry.nic.cz/labs/bird:fedora-33-amd64
#build-centos-7-amd64:
# <<: *build-linux
# image: registry.nic.cz/labs/bird:centos-7-amd64
build-centos-8-amd64:
<<: *build-linux
image: registry.nic.cz/labs/bird:centos-8-amd64
@ -260,13 +256,6 @@ pkg-fedora-34-amd64:
needs: [build-fedora-34-amd64]
image: registry.nic.cz/labs/bird:fedora-34-amd64
#pkg-centos-7-amd64:
# <<: *pkg-rpm-wa
# variables:
# LC_ALL: en_US.UTF-8
# needs: [build-centos-7-amd64]
# image: registry.nic.cz/labs/bird:centos-7-amd64
pkg-centos-8-amd64:
<<: *pkg-rpm-wa
needs: [build-centos-8-amd64]
@ -334,7 +323,7 @@ build-netlab:
script:
- cd $TOOLS_DIR/netlab
- sudo ./stop
- sudo ./runtest -s v2 -m check $TEST_NAME
- sudo ./runtest -s v3 -m check $TEST_NAME
test-ospf-base:
<<: *test-base
@ -446,11 +435,11 @@ test-ibgp-loop:
variables:
TEST_NAME: cf-ibgp-loop
test-ibgp-loop-big:
<<: *test-base
variables:
TEST_NAME: cf-ibgp-loop-big
#test-ibgp-loop-big:
# <<: *test-base
# variables:
# TEST_NAME: cf-ibgp-loop-big
#
test-ibgp-flat:
<<: *test-base
variables:

View File

@ -27,9 +27,9 @@ Requirements
For compiling BIRD you need these programs and libraries:
- GNU C Compiler (or LLVM Clang)
- GNU C Compiler (or LLVM Clang) capable of compiling C11 code
- GNU Make
- GNU Bison
- GNU Bison (at least 3.0)
- GNU M4
- Flex

32
NEWS
View File

@ -1,3 +1,35 @@
Version 3.0alpha2 (2023-05-11)
o Fixed memory leaks and use-after free bugs
o Simple thread work balancing
o MRT switched off
o Slow kernel route synchronization to be fixed later
Version 3.0alpha1 (2023-04-18)
o Worker threads for BGP, Pipe, RPKI and BFD
o Configurable number of threads
o Asynchronous route export
o Flat attribute structure
o Inline import tables
o Export tables merged with BGP prefix / attribute buckets
o Fixed ROA check locking inversion in route table dumps
o MRT switched off
Version 3.0-alpha0 (2022-02-07)
o Removal of fixed protocol-specific route attributes
o Asynchronous route export
o Explicit table import / export hooks
o Partially lockless route attribute cache
o Thread-safe resource management
o Thread-safe interface notifications
o Thread-safe protocol API
o Adoption of BFD IO loop for general use
o Parallel Pipe protocol
o Parallel RPKI protocol
o Parallel BGP protocol
o Lots of refactoring
o Bugfixes and improvements as they came along
Version 2.15.1 (2024-03-22)
o OSPF: Fix regression in handling PtP links
o RPKI: Handle connection resets properly

40
aclocal.m4 vendored
View File

@ -1,5 +1,32 @@
dnl ** Additional Autoconf tests for BIRD configure script
dnl ** (c) 1999 Martin Mares <mj@ucw.cz>
dnl ** (c) 2021 Maria Matejka <mq@jmq.cz>
AC_DEFUN([BIRD_CHECK_POINTER_ALIGNMENT],
[
AC_CACHE_CHECK(
[how pointers are aligned],
[bird_cv_pointer_alignment],
AC_COMPILE_IFELSE([
AC_LANG_PROGRAM(
[
_Static_assert(_Alignof(void *) == 8, "bad");
], []
)
],
[bird_cv_pointer_alignment=8],
AC_COMPILE_IFELSE([
AC_LANG_PROGRAM(
[
_Static_assert(_Alignof(void *) == 4, "bad");
], []
)
],
[bird_cv_pointer_alignment=4],
[bird_cv_pointer_alignment=unknown]
))
)
])
AC_DEFUN([BIRD_CHECK_THREAD_LOCAL],
[
@ -9,14 +36,23 @@ AC_DEFUN([BIRD_CHECK_THREAD_LOCAL],
AC_COMPILE_IFELSE([
AC_LANG_PROGRAM(
[
_Thread_local static int x = 42;
static _Thread_local int x = 42;
],
[]
)
],
[bird_cv_thread_local=yes],
[AC_COMPILE_IFELSE([
AC_LANG_PROGRAM(
[
static __thread int x = 42;
],
[]
)
],
[bird_cv_thread_local=__thread],
[bird_cv_thread_local=no]
)
)])
)
])

View File

@ -4,9 +4,10 @@ class BIRDPrinter:
@classmethod
def lookup(cls, val):
if val.type.code != cls.typeCode:
t = val.type.strip_typedefs()
if t.code != cls.typeCode:
return None
if val.type.tag != cls.typeTag:
if t.tag != cls.typeTag:
return None
return cls(val)
@ -25,7 +26,6 @@ class BIRDFValPrinter(BIRDPrinter):
"T_ENUM_RTS": "i",
"T_ENUM_BGP_ORIGIN": "i",
"T_ENUM_SCOPE": "i",
"T_ENUM_RTC": "i",
"T_ENUM_RTD": "i",
"T_ENUM_ROA": "i",
"T_ENUM_NETTYPE": "i",
@ -124,7 +124,7 @@ class BIRDFLinePrinter(BIRDPrinter):
"n": n,
"code": str(self.val['items'][n]['fi_code']),
} if n % 8 == 0 else str(self.val['items'][n]['fi_code']) for n in range(cnt)]))
class BIRDFExecStackPrinter(BIRDPrinter):
"Print BIRD's struct f_exec_stack"
@ -142,6 +142,317 @@ class BIRDFExecStackPrinter(BIRDPrinter):
"n": n
} for n in range(cnt-1, -1, -1) ])
class BIRD:
def skip_back(t, i, v):
if isinstance(t, str):
t = gdb.lookup_type(t)
elif isinstance(t, gdb.Value):
t = gdb.lookup_type(t.string())
elif not isinstance(t, gdb.Type):
raise Exception(f"First argument of skip_back(t, i, v) must be a type, got {type(t)}")
t = t.strip_typedefs()
nullptr = gdb.Value(0).cast(t.pointer())
if isinstance(i, gdb.Value):
i = i.string()
elif not isinstance(i, str):
raise Exception(f"Second argument of skip_back(t, i, v) must be a item name, got {type(i)}")
if not isinstance(v, gdb.Value):
raise Exception(f"Third argument of skip_back(t, i, v) must be a value, got {type(v)}")
if v.type.code != gdb.TYPE_CODE_PTR and v.type.code != gdb.TYPE_CODE_REF:
raise Exception(f"Third argument of skip_back(t, i, v) must be a pointer, is {v.type} ({v.type.code})")
if v.type.target().strip_typedefs() != nullptr[i].type:
raise Exception(f"Third argument of skip_back(t, i, v) points to type {v.type.target().strip_typedefs()}, should be {nullptr[i].type}")
uintptr_t = gdb.lookup_type("uintptr_t")
taddr = v.dereference().address.cast(uintptr_t) - nullptr[i].address.cast(uintptr_t)
return gdb.Value(taddr).cast(t.pointer())
class skip_back_gdb(gdb.Function):
"Given address of a structure item, returns address of the structure, as the SKIP_BACK macro does"
def __init__(self):
gdb.Function.__init__(self, "SKIP_BACK")
def invoke(self, t, i, v):
return BIRD.skip_back(t, i, v)
BIRD.skip_back_gdb()
class BIRDList:
def __init__(self, val):
ltype = val.type.strip_typedefs()
if ltype.code != gdb.TYPE_CODE_UNION or ltype.tag != "list":
raise Exception(f"Not a list, is type {ltype}")
self.head = val["head"]
self.tail_node = val["tail_node"]
if str(self.head.address) == '0x0':
raise Exception("List head is NULL")
if str(self.tail_node["prev"].address) == '0x0':
raise Exception("List tail is NULL")
def walk(self, do):
cur = self.head
while cur.dereference() != self.tail_node:
do(cur)
cur = cur.dereference()["next"]
class BIRDListLength(gdb.Function):
"""Returns length of the list, as in
print $list_length(routing_tables)"""
def __init__(self):
super(BIRDListLength, self).__init__("list_length")
def count(self, _):
self.cnt += 1
def invoke(self, l):
self.cnt = 0
BIRDList(l).walk(self.count)
return self.cnt
BIRDListLength()
class BIRDListItem(gdb.Function):
"""Returns n-th item of the list."""
def __init__(self):
super(BIRDListItem, self).__init__("list_item")
class BLException(Exception):
def __init__(self, node, msg):
Exception.__init__(self, msg)
self.node = node
def count(self, node):
if self.cnt == self.pos:
raise self.BLException(node, "Node found")
self.cnt += 1
def invoke(self, l, n, t=None, item="n"):
self.cnt = 0
self.pos = n
bl = BIRDList(l)
try:
bl.walk(self.count)
except self.BLException as e:
if t is None:
return e.node
else:
return BIRD.skip_back(t, item, e.node)
raise Exception("List too short")
BIRDListItem()
class BIRDResourceSize():
def __init__(self, netto, overhead, free):
self.netto = netto
self.overhead = overhead
self.free = free
def __str__(self):
ns = str(self.netto)
os = str(self.overhead)
fs = str(self.free)
return "{: >12s} | {: >12s} | {: >12s}".format(ns, os, fs)
def __add__(self, val):
return BIRDResourceSize(self.netto + val.netto, self.overhead + val.overhead, self.free + val.free)
class BIRDResource():
def __init__(self, val):
self.val = val
def __str__(self):
return f"Item {self.val.address} of class \"{self.val['class']['name'].string()}\""
def memsize(self):
if str(self.val["class"]["memsize"]) == '0x0':
size = self.val["class"]["size"]
ressize = gdb.lookup_type("struct resource").sizeof
return BIRDResourceSize(size - ressize, ressize, 0)
else:
raise Exception(f"Resource class {self.val['class']['name']} with defined memsize() not known by Python")
def parse(self):
pass
class BIRDMBResource(BIRDResource):
def __init__(self, val):
self.mbtype = gdb.lookup_type("struct mblock")
self.val = val.cast(self.mbtype)
def memsize(self):
return BIRDResourceSize(self.val["size"], 8 + self.mbtype.sizeof, 0)
def __str__(self):
return f"Standalone memory block {self.val.address} of size {self.val['size']}, data at {self.val['data'].address}"
class BIRDLinPoolResource(BIRDResource):
def __init__(self, val):
self.lptype = gdb.lookup_type("struct linpool")
self.val = val.cast(self.lptype)
self.info = None
def count_chunk(self, which):
cnt = 0
chunk = self.val[which]
while str(chunk) != '0x0':
cnt += 1
chunk = chunk.dereference()["next"]
return cnt
def parse(self):
self.info = {
"std_chunks": self.count_chunk("first"),
"large_chunks": self.count_chunk("first_large"),
}
def memsize(self):
if self.info is None:
self.parse()
overhead = (8 - 8*self.val["use_pages"]) + gdb.lookup_type("struct lp_chunk").sizeof
return BIRDResourceSize(
self.val["total"] + self.val["total_large"],
(self.info["std_chunks"] + self.info["large_chunks"]) * overhead,
0)
def __str__(self):
if self.info is None:
self.parse()
return f"Linpool {self.val.address} with {self.info['std_chunks']} standard chunks of size {self.val['chunk_size']} and {self.info['large_chunks']} large chunks"
class BIRDSlabResource(BIRDResource):
def __init__(self, val):
self.slabtype = gdb.lookup_type("struct slab")
self.val = val.cast(self.slabtype)
self.info = None
def count_heads_item(self, item):
self.hcnt += 1
self.used += item.dereference().cast(self.slheadtype)["num_full"]
def count_heads(self, which):
self.hcnt = 0
self.used = 0
BIRDList(self.val[which + "_heads"]).walk(self.count_heads_item)
self.info[which + "_heads"] = self.hcnt
self.info[which + "_used"] = self.used
return (self.hcnt, self.used)
def parse(self):
self.page_size = gdb.lookup_symbol("page_size")[0].value()
self.slheadtype = gdb.lookup_type("struct sl_head")
self.info = {}
self.count_heads("empty")
self.count_heads("partial")
self.count_heads("full")
def memsize(self):
if self.info is None:
self.parse()
total_used = self.info["empty_used"] + self.info["partial_used"] + self.info["full_used"]
total_heads = self.info["empty_heads"] + self.info["partial_heads"] + self.info["full_heads"]
eff_size = total_used * self.val["obj_size"]
free_size = self.info["empty_heads"] * self.page_size
total_size = total_heads * self.page_size + self.slabtype.sizeof
return BIRDResourceSize( eff_size, total_size - free_size - eff_size, free_size)
def __str__(self):
if self.info is None:
self.parse()
return f"Slab {self.val.address} " + ", ".join([
f"{self.info[x + '_heads']} {x} heads" for x in [ "empty", "partial", "full" ]]) + \
f", {self.val['objs_per_slab']} objects of size {self.val['obj_size']} per head"
class BIRDPoolResource(BIRDResource):
def __init__(self, val):
self.pooltype = gdb.lookup_type("struct pool")
self.resptrtype = gdb.lookup_type("struct resource").pointer()
self.page_size = gdb.lookup_symbol("page_size")[0].value()
self.val = val.cast(self.pooltype)
self.items = None
def parse_inside(self, val):
self.items.append(BIRDNewResource(val.cast(self.resptrtype).dereference()))
def parse(self):
self.items = []
BIRDList(self.val["inside"]).walk(self.parse_inside)
def memsize(self):
if self.items is None:
self.parse()
sum = BIRDResourceSize(0, self.pooltype.sizeof, 0)
# for i in self.items:
# sum += i.memsize()
return sum
def __str__(self):
if self.items is None:
self.parse()
# for i in self.items:
# print(i)
return f"Resource pool {self.val.address} \"{self.val['name'].string()}\" containing {len(self.items)} items"
BIRDResourceMap = {
"mbl_memsize": BIRDMBResource,
"pool_memsize": BIRDPoolResource,
"lp_memsize": BIRDLinPoolResource,
"slab_memsize": BIRDSlabResource,
}
def BIRDNewResource(res):
cms = res["class"].dereference()["memsize"]
for cx in BIRDResourceMap:
if cms == gdb.lookup_symbol(cx)[0].value():
return BIRDResourceMap[cx](res)
return BIRDResource(res)
class BIRDResourcePrinter(BIRDPrinter):
"Print BIRD's resource"
typeCode = gdb.TYPE_CODE_STRUCT
typeTag = "resource"
def __init__(self, val):
super(BIRDResourcePrinter, self).__init__(val)
self.resource = BIRDNewResource(val)
self.resource.parse()
self.resourcetype = gdb.lookup_type("struct resource")
if type(self.resource) == BIRDPoolResource:
self.children = self.pool_children
def pool_children(self):
return iter([ ("\n", i.val.cast(self.resourcetype)) for i in self.resource.items ])
def to_string(self):
return f"[ {str(self.resource.memsize())} ] {str(self.resource)}"
def register_printers(objfile):
objfile.pretty_printers.append(BIRDFInstPrinter.lookup)
objfile.pretty_printers.append(BIRDFValPrinter.lookup)
@ -149,6 +460,7 @@ def register_printers(objfile):
objfile.pretty_printers.append(BIRDFLineItemPrinter.lookup)
objfile.pretty_printers.append(BIRDFLinePrinter.lookup)
objfile.pretty_printers.append(BIRDFExecStackPrinter.lookup)
objfile.pretty_printers.append(BIRDResourcePrinter.lookup)
register_printers(gdb.current_objfile())

View File

@ -73,9 +73,18 @@ static uint cf_hash(const byte *c);
HASH_DEFINE_REHASH_FN(SYM, struct symbol)
struct sym_scope *global_root_scope;
/* Global symbol scopes */
pool *global_root_scope_pool;
linpool *global_root_scope_linpool;
struct sym_scope
global_root_scope = {
},
global_filter_scope = {
.next = &global_root_scope,
};
/* Local symbol scope: TODO this isn't thread-safe */
struct sym_scope *conf_this_scope;
linpool *cfg_mem;
@ -323,7 +332,7 @@ else: {
return DDOT;
}
[={}:;,.()+*/%<>~\[\]?!\|-] {
[={}:;,.()+*/%<>~\[\]?!\|&-] {
return yytext[0];
}
@ -567,7 +576,7 @@ cf_new_symbol(struct sym_scope *scope, pool *p, struct linpool *lp, const byte *
s = lp_alloc(lp, sizeof(struct symbol) + l + 1);
*s = (struct symbol) { .scope = scope, .class = SYM_VOID, };
strcpy(s->name, c);
memcpy(s->name, c, l+1);
if (!scope->hash.data)
HASH_INIT(scope->hash, p, SYM_ORDER);
@ -580,6 +589,31 @@ cf_new_symbol(struct sym_scope *scope, pool *p, struct linpool *lp, const byte *
return s;
}
struct symbol *
cf_root_symbol(const byte *c, struct sym_scope *ss)
{
uint l = strlen(c);
if (l > SYM_MAX_LEN)
bug("Root symbol %s too long", c);
if (!global_root_scope_pool)
{
global_root_scope_pool = rp_new(&root_pool, the_bird_domain.the_bird, "Keywords pool");
global_root_scope_linpool = lp_new(global_root_scope_pool);
}
struct symbol *s = lp_alloc(global_root_scope_linpool, sizeof(struct symbol) + l + 1);
*s = (struct symbol) { .scope = ss, .class = SYM_VOID, };
memcpy(s->name, c, l+1);
if (!ss->hash.data)
HASH_INIT(ss->hash, &root_pool, SYM_ORDER);
HASH_INSERT2(ss->hash, SYM, &root_pool, s);
return s;
}
/**
* cf_find_symbol_scope - find a symbol by name
* @scope: config scope
@ -598,7 +632,8 @@ cf_find_symbol_scope(const struct sym_scope *scope, const byte *c)
/* Find the symbol here or anywhere below */
while (scope)
if (scope->hash.data && (s = HASH_FIND(scope->hash, SYM, c)))
if (((scope != &global_filter_scope) || !new_config || new_config->allow_attributes) &&
scope->hash.data && (s = HASH_FIND(scope->hash, SYM, c)))
return s;
else
scope = scope->next;
@ -691,6 +726,33 @@ cf_lex_symbol(const char *data)
}
}
void
ea_lex_register(struct ea_class *def)
{
def->sym = cf_root_symbol(def->name, &global_filter_scope);
def->sym->class = SYM_ATTRIBUTE;
def->sym->attribute = def;
}
#if 0
/* When we start to support complete protocol removal, we may need this function */
void
ea_lex_unregister(struct ea_class *def)
{
struct symbol *sym = def->sym;
HASH_REMOVE2(global_filter_scope.hash, SYM, &root_pool, sym);
mb_free(sym);
def->sym = NULL;
}
#endif
struct ea_class *
ea_class_find_by_name(const char *name)
{
struct symbol *sym = cf_find_symbol_scope(config ? config->root_scope : &global_filter_scope, name);
return sym && (sym->class == SYM_ATTRIBUTE) ? sym->attribute : NULL;
}
void f_type_methods_register(void);
/**
@ -704,20 +766,17 @@ void f_type_methods_register(void);
void
cf_lex_init(int is_cli, struct config *c)
{
if (!global_root_scope_pool)
if (!global_root_scope.readonly)
{
global_root_scope_pool = rp_new(&root_pool, "Keywords pool");
global_root_scope_linpool = lp_new(global_root_scope_pool);
global_root_scope = lp_allocz(global_root_scope_linpool, sizeof(*global_root_scope));
for (const struct keyword *k = keyword_list; k->name; k++)
{
struct symbol *sym = cf_new_symbol(global_root_scope, global_root_scope_pool, global_root_scope_linpool, k->name);
sym->class = SYM_KEYWORD;
sym->keyword = k;
struct symbol *s = cf_root_symbol(k->name, &global_root_scope);
s->class = SYM_KEYWORD;
s->keyword = k;
}
global_root_scope->readonly = 1;
global_root_scope.readonly = 1;
global_filter_scope.readonly = 1;
f_type_methods_register();
}
@ -739,12 +798,11 @@ cf_lex_init(int is_cli, struct config *c)
BEGIN(INITIAL);
c->root_scope = c->current_scope = cfg_allocz(sizeof(struct sym_scope));
c->root_scope->active = 1;
if (is_cli)
c->current_scope->next = config->root_scope;
else
c->current_scope->next = global_root_scope;
c->current_scope->next = &global_filter_scope;
}
/**
@ -764,7 +822,6 @@ cf_push_scope(struct config *conf, struct symbol *sym)
s->next = conf->current_scope;
conf->current_scope = s;
s->active = 1;
s->name = sym;
s->slots = 0;
}
@ -780,10 +837,7 @@ void
cf_pop_scope(struct config *conf)
{
ASSERT(!conf->current_scope->soft_scopes);
conf->current_scope->active = 0;
conf->current_scope = conf->current_scope->next;
ASSERT(conf->current_scope);
}
@ -833,6 +887,41 @@ cf_swap_soft_scope(struct config *conf)
}
}
/**
* cf_enter_filters - enable filter / route attributes namespace
*/
void
cf_enter_filters(void)
{
ASSERT_DIE(!cf_maybe_enter_filters());
}
int
cf_maybe_enter_filters(void)
{
int o = new_config->allow_attributes;
new_config->allow_attributes = 1;
return o;
}
/**
* cf_exit_filters - disable filter / route attributes namespace
*/
void
cf_exit_filters(void)
{
ASSERT_DIE(cf_maybe_exit_filters());
}
int
cf_maybe_exit_filters(void)
{
int o = new_config->allow_attributes;
new_config->allow_attributes = 0;
return o;
}
/**
* cf_symbol_class_name - get name of a symbol class
* @sym: symbol
@ -863,6 +952,8 @@ cf_symbol_class_name(struct symbol *sym)
return "MPLS domain";
case SYM_MPLS_RANGE:
return "MPLS label range";
case SYM_KEYWORD:
return "symbol";
case SYM_CONSTANT_RANGE:
return "constant";
case SYM_VARIABLE_RANGE:

View File

@ -61,7 +61,8 @@
static jmp_buf conf_jmpbuf;
struct config *config, *new_config;
struct config *config;
_Thread_local struct config *new_config;
pool *config_pool;
static struct config *old_config; /* Old configuration */
@ -71,7 +72,7 @@ static int future_cftype; /* Type of scheduled transition, may also be RECONFIG
/* Note that when future_cftype is RECONFIG_UNDO, then future_config is NULL,
therefore proper check for future scheduled config checks future_cftype */
static event *config_event; /* Event for finalizing reconfiguration */
static void config_done(void *cf);
static timer *config_timer; /* Timer for scheduled configuration rollback */
/* These are public just for cmd_show_status(), should not be accessed elsewhere */
@ -91,7 +92,7 @@ int undo_available; /* Undo was not requested from last reconfiguration */
struct config *
config_alloc(const char *name)
{
pool *p = rp_new(config_pool, "Config");
pool *p = rp_new(config_pool, the_bird_domain.the_bird, "Config");
linpool *l = lp_new_default(p);
struct config *c = lp_allocz(l, sizeof(struct config));
@ -102,7 +103,6 @@ config_alloc(const char *name)
init_list(&c->tests);
init_list(&c->symbols);
c->mrtdump_file = -1; /* Hack, this should be sysdep-specific */
c->pool = p;
c->mem = l;
c->file_name = ndup;
@ -111,6 +111,8 @@ config_alloc(const char *name)
c->tf_base = c->tf_log = TM_ISO_LONG_MS;
c->gr_wait = DEFAULT_GR_WAIT;
c->done_event = (event) { .hook = config_done, .data = c, };
return c;
}
@ -202,7 +204,7 @@ config_free(struct config *c)
ASSERT(!c->obstacle_count);
rfree(c->pool);
rp_free(c->pool);
}
/**
@ -230,29 +232,20 @@ void
config_add_obstacle(struct config *c)
{
DBG("+++ adding obstacle %d\n", c->obstacle_count);
c->obstacle_count++;
atomic_fetch_add_explicit(&c->obstacle_count, 1, memory_order_acq_rel);
}
void
config_del_obstacle(struct config *c)
{
DBG("+++ deleting obstacle %d\n", c->obstacle_count);
c->obstacle_count--;
if (!c->obstacle_count && (c != config))
ev_schedule(config_event);
if (atomic_fetch_sub_explicit(&c->obstacle_count, 1, memory_order_acq_rel) == 1)
ev_send_loop(&main_birdloop, &c->done_event);
}
static int
global_commit(struct config *new, struct config *old)
{
if (!new->hostname)
{
new->hostname = get_hostname(new->mem);
if (!new->hostname)
log(L_WARN "Cannot determine hostname");
}
if (!old)
return 0;
@ -288,6 +281,14 @@ config_do_commit(struct config *c, int type)
old_cftype = type;
config = c;
if (!c->hostname)
{
c->hostname = get_hostname(c->mem);
if (!c->hostname)
log(L_WARN "Cannot determine hostname");
}
configuring = 1;
if (old_config && !config->shutdown)
log(L_INFO "Reconfiguring");
@ -316,8 +317,11 @@ config_do_commit(struct config *c, int type)
}
static void
config_done(void *unused UNUSED)
config_done(void *cf)
{
if (cf == config)
return;
if (config->shutdown)
sysdep_shutdown_done();
@ -518,10 +522,7 @@ config_timeout(timer *t UNUSED)
void
config_init(void)
{
config_pool = rp_new(&root_pool, "Configurations");
config_event = ev_new(config_pool);
config_event->hook = config_done;
config_pool = rp_new(&root_pool, the_bird_domain.the_bird, "Configurations");
config_timer = tm_new(config_pool);
config_timer->hook = config_timeout;

View File

@ -26,9 +26,9 @@ struct config {
list tests; /* Configured unit tests (f_bt_test_suite) */
list symbols; /* Configured symbols in config order */
int mrtdump_file; /* Configured MRTDump file (sysdep, fd in unix) */
struct rfile *mrtdump_file; /* Configured MRTDump file */
const char *syslog_name; /* Name used for syslog (NULL -> no syslog) */
struct rtable_config *def_tables[NET_MAX]; /* Default routing tables for each network */
struct symbol *def_tables[NET_MAX]; /* Default routing tables for each network */
struct iface_patt *router_id_from; /* Configured list of router ID iface patterns */
u32 router_id; /* Our Router ID */
@ -36,6 +36,7 @@ struct config {
u32 proto_default_mrtdump; /* Default protocol mrtdump mask */
u32 channel_default_debug; /* Default channel debug mask */
u32 table_default_debug; /* Default table debug mask */
u16 filter_vstk, filter_estk; /* Filter stack depth */
struct timeformat tf_route; /* Time format for 'show route' */
struct timeformat tf_proto; /* Time format for 'show protocol' */
struct timeformat tf_log; /* Time format for the logfile */
@ -54,10 +55,13 @@ struct config {
char *err_file_name; /* File name containing error */
char *file_name; /* Name of main configuration file */
int file_fd; /* File descriptor of main configuration file */
int thread_count; /* How many worker threads to prefork */
struct sym_scope *root_scope; /* Scope for root symbols */
struct sym_scope *current_scope; /* Current scope where we are actually in while parsing */
int obstacle_count; /* Number of items blocking freeing of this config */
int allow_attributes; /* Allow attributes in the current state of configuration parsing */
_Atomic int obstacle_count; /* Number of items blocking freeing of this config */
event done_event; /* Called when obstacle_count reaches zero */
int shutdown; /* This is a pseudo-config for daemon shutdown */
int gr_down; /* This is a pseudo-config for graceful restart */
btime load_time; /* When we've got this configuration */
@ -65,7 +69,7 @@ struct config {
/* Please don't use these variables in protocols. Use proto_config->global instead. */
extern struct config *config; /* Currently active configuration */
extern struct config *new_config; /* Configuration being parsed */
extern _Thread_local struct config *new_config; /* Configuration being parsed */
struct config *config_alloc(const char *name);
int config_parse(struct config *);
@ -129,7 +133,7 @@ struct symbol {
const struct f_line *function; /* For SYM_FUNCTION */
const struct filter *filter; /* For SYM_FILTER */
struct rtable_config *table; /* For SYM_TABLE */
struct f_dynamic_attr *attribute; /* For SYM_ATTRIBUTE */
struct ea_class *attribute; /* For SYM_ATTRIBUTE */
struct mpls_domain_config *mpls_domain; /* For SYM_MPLS_DOMAIN */
struct mpls_range_config *mpls_range; /* For SYM_MPLS_RANGE */
struct f_val *val; /* For SYM_CONSTANT */
@ -149,12 +153,15 @@ struct sym_scope {
uint slots; /* Variable slots */
byte soft_scopes; /* Number of soft scopes above */
byte active:1; /* Currently entered */
byte block:1; /* No independent stack frame */
byte readonly:1; /* Do not add new symbols */
};
extern struct sym_scope *global_root_scope;
void cf_enter_filters(void);
void cf_exit_filters(void);
int cf_maybe_enter_filters(void);
int cf_maybe_exit_filters(void);
extern pool *global_root_scope_pool;
extern linpool *global_root_scope_linpool;
@ -222,6 +229,7 @@ static inline int cf_symbol_is_local(struct config *conf, struct symbol *sym)
/* internal */
struct symbol *cf_new_symbol(struct sym_scope *scope, pool *p, struct linpool *lp, const byte *c);
struct symbol *cf_root_symbol(const byte *, struct sym_scope *);
/**
* cf_define_symbol - define meaning of a symbol
@ -244,9 +252,6 @@ struct symbol *cf_new_symbol(struct sym_scope *scope, pool *p, struct linpool *l
sym_->var_ = def_; \
sym_; })
#define cf_create_symbol(conf_, name_, type_, var_, def_) \
cf_define_symbol(conf_, cf_get_symbol(conf_, name_), type_, var_, def_)
void cf_push_scope(struct config *, struct symbol *);
void cf_pop_scope(struct config *);
void cf_push_soft_scope(struct config *);

View File

@ -14,6 +14,7 @@ CF_HDR
#include "conf/conf.h"
#include "lib/resource.h"
#include "lib/socket.h"
#include "lib/settle.h"
#include "lib/timer.h"
#include "lib/string.h"
#include "nest/protocol.h"
@ -27,6 +28,8 @@ CF_HDR
CF_DEFINES
static _Bool this_sadr_from_hack_active;
static void
check_u16(uint val)
{
@ -63,7 +66,6 @@ CF_DECLS
net_addr net;
net_addr *net_ptr;
struct symbol *s;
struct keyword *kw;
const char *t;
struct rtable_config *r;
struct channel_config *cc;
@ -74,14 +76,16 @@ CF_DECLS
} xp;
enum filter_return fret;
enum ec_subtype ecs;
struct f_dynamic_attr fda;
struct ea_class *ea_class;
struct f_static_attr fsa;
struct f_attr_bit fab;
struct f_lval flv;
struct f_line *fl;
struct f_arg *fa;
const struct filter *f;
struct f_tree *e;
struct f_trie *trie;
const struct f_trie *const_trie;
struct f_val v;
struct password_item *p;
struct rt_show_data *ra;
@ -98,7 +102,8 @@ CF_DECLS
struct table_spec ts;
struct channel_limit cl;
struct timeformat *tf;
mpls_label_stack *mls;
struct settle_config settle;
struct adata *ad;
const struct adata *bs;
struct aggr_item_node *ai;
struct logging_rate_targets *lrt;
@ -120,14 +125,15 @@ CF_DECLS
%type <i> expr bool pxlen4
%type <time> expr_us time
%type <a> ipa
%type <settle> settle
%type <a> ipa net_ip6_slash
%type <net> net_ip4_ net_ip4 net_ip6_ net_ip6 net_ip_ net_ip net_or_ipa
%type <net_ptr> net_ net_any net_vpn4_ net_vpn6_ net_vpn_ net_roa4_ net_roa6_ net_roa_ net_ip6_sadr_ net_mpls_
%type <mls> label_stack_start label_stack
%type <ad> label_stack_start label_stack
%type <t> text opttext
%type <bs> bytestring
%type <s> symbol symbol_known
%type <s> symbol
%type <v> bytestring_text text_or_ipa
%type <x> bytestring_expr
@ -135,6 +141,7 @@ CF_DECLS
%nonassoc PREFIX_DUMMY
%left AND OR
%nonassoc '=' '<' '>' '~' GEQ LEQ NEQ NMA IMP PO PC
%left '|' '&'
%left '+' '-'
%left '*' '/' '%'
%left '!'
@ -166,8 +173,7 @@ conf: definition ;
definition:
DEFINE symbol '=' term ';' {
struct f_val *val = cfg_allocz(sizeof(struct f_val));
*val = cf_eval($4, T_VOID);
struct f_val *val = cf_eval($4, T_VOID);
cf_define_symbol(new_config, $2, SYM_CONSTANT | val->type, val, val);
}
;
@ -175,7 +181,7 @@ definition:
expr:
NUM
| '(' term ')' { $$ = cf_eval_int($2); }
| symbol_known {
| CF_SYM_KNOWN {
if ($1->class != (SYM_CONSTANT | T_INT)) cf_error("Number constant expected");
$$ = SYM_VAL($1).i; }
;
@ -186,8 +192,7 @@ expr_us:
| expr US { $$ = $1 US_; }
;
symbol: CF_SYM_UNDEFINED | CF_SYM_KNOWN | KEYWORD ;
symbol_known: CF_SYM_KNOWN ;
symbol: CF_SYM_UNDEFINED | CF_SYM_KNOWN ;
/* Switches */
@ -237,12 +242,24 @@ net_ip4_: IP4 pxlen4
n->prefix, n->pxlen, ip4_and(n->prefix, ip4_mkmask(n->pxlen)), n->pxlen);
};
net_ip6_: IP6 '/' NUM
net_ip6_slash: IP6 '/'
{
if ($3 > IP6_MAX_PREFIX_LENGTH)
cf_error("Invalid prefix length %u", $3);
this_sadr_from_hack_active = cf_maybe_exit_filters();
$$ = $1;
}
net_fill_ip6(&($$), $1, $3);
net_ip6_: net_ip6_slash NUM
{
if (this_sadr_from_hack_active)
{
cf_enter_filters();
this_sadr_from_hack_active = 0;
}
if ($2 > IP6_MAX_PREFIX_LENGTH)
cf_error("Invalid prefix length %u", $2);
net_fill_ip6(&($$), $1, $2);
net_addr_ip6 *n = (void *) &($$);
if (!net_validate_ip6(n))
@ -250,16 +267,25 @@ net_ip6_: IP6 '/' NUM
n->prefix, n->pxlen, ip6_and(n->prefix, ip6_mkmask(n->pxlen)), n->pxlen);
};
net_ip6_sadr_: IP6 '/' NUM FROM IP6 '/' NUM
net_ip6_sadr_: net_ip6_slash NUM FROM IP6 '/' NUM
{
if ($3 > IP6_MAX_PREFIX_LENGTH)
cf_error("Invalid prefix length %u", $3);
if (this_sadr_from_hack_active)
{
cf_enter_filters();
this_sadr_from_hack_active = 0;
}
if ($7 > IP6_MAX_PREFIX_LENGTH)
cf_error("Invalid prefix length %u", $7);
if (($3->class != SYM_KEYWORD) || ($3->keyword->value != FROM))
cf_error("Expected FROM after %I6/%d", $1, $2);
if ($2 > IP6_MAX_PREFIX_LENGTH)
cf_error("Invalid prefix length %u", $2);
if ($6 > IP6_MAX_PREFIX_LENGTH)
cf_error("Invalid prefix length %u", $6);
$$ = cfg_alloc(sizeof(net_addr_ip6_sadr));
net_fill_ip6_sadr($$, $1, $3, $5, $7);
net_fill_ip6_sadr($$, $1, $2, $4, $6);
net_addr_ip6_sadr *n = (void *) $$;
if (!net_validate_ip6_sadr(n))
@ -372,17 +398,19 @@ net_or_ipa:
label_stack_start: NUM
{
$$ = cfg_allocz(sizeof(mpls_label_stack));
$$->len = 1;
$$->stack[0] = $1;
$$ = cfg_allocz(ADATA_SIZE(MPLS_MAX_LABEL_STACK * sizeof(u32)));
$$->length = sizeof(u32);
*((u32 *)$$->data) = $1;
};
label_stack:
label_stack_start
| label_stack '/' NUM {
if ($1->len >= MPLS_MAX_LABEL_STACK)
if ($1->length >= MPLS_MAX_LABEL_STACK * sizeof(u32))
cf_error("Too many labels in stack");
$1->stack[$1->len++] = $3;
*((u32 *)($$->data + $1->length)) = $3;
$1->length += sizeof(u32);
$$ = $1;
}
;
@ -395,6 +423,13 @@ time:
}
;
/* Settle timer configuration */
settle: expr_us expr_us {
if ($1 > $2) cf_error("Minimum settle time %t is bigger than maximum settle time %t", $1, $2);
$$.min = $1;
$$.max = $2;
};
text:
TEXT
| CF_SYM_KNOWN {
@ -420,7 +455,7 @@ text_or_ipa:
cf_error("String or IP constant expected");
}
| '(' term ')' {
$$ = cf_eval($2, T_VOID);
$$ = *cf_eval($2, T_VOID);
if (($$.type != T_BYTESTRING) && ($$.type != T_STRING))
cf_error("Bytestring or string value expected");
}
@ -428,21 +463,21 @@ text_or_ipa:
bytestring:
BYTETEXT
| bytestring_expr { $$ = cf_eval($1, T_BYTESTRING).val.bs; }
| bytestring_expr { $$ = cf_eval($1, T_BYTESTRING)->val.bs; }
;
bytestring_text:
BYTETEXT { $$.type = T_BYTESTRING; $$.val.bs = $1; }
| TEXT { $$.type = T_STRING; $$.val.s = $1; }
| bytestring_expr {
$$ = cf_eval($1, T_VOID);
$$ = *cf_eval($1, T_VOID);
if (($$.type != T_BYTESTRING) && ($$.type != T_STRING))
cf_error("Bytestring or string value expected");
}
;
bytestring_expr:
symbol_value
lvalue { $$ = f_lval_getter(&$1); }
| term_bs
| '(' term ')' { $$ = $2; }
;

View File

@ -27,7 +27,6 @@ m4_define(CF_handle_kw, `m4_divert(1){ "m4_translit($1,[[A-Z]],[[a-z]])", $1 },
m4_divert(-1)')
m4_define(CF_keywd, `m4_ifdef([[CF_tok_$1]],,[[m4_define([[CF_tok_$1]],1)CF_handle_kw($1)]])')
m4_define(CF_KEYWORDS, `CF_iterate([[CF_keywd]], [[$@]])DNL')
m4_define(CF_KEYWORDS_EXCLUSIVE, `CF_KEYWORDS($@)')
# CLI commands generate keywords as well
m4_define(CF_CLI, `CF_KEYWORDS(m4_translit($1, [[ ]], [[,]]))
@ -40,7 +39,7 @@ m4_divert(-1)')
m4_define(CF_ENUM, `m4_define([[CF_enum_type]],$1)m4_define([[CF_enum_prefix_ext]],$2)m4_define([[CF_enum_prefix_int]],$2)CF_iterate([[CF_enum]], [[m4_shift(m4_shift($@))]])DNL')
m4_define(CF_ENUM_PX, `m4_define([[CF_enum_type]],$1)m4_define([[CF_enum_prefix_ext]],$2)m4_define([[CF_enum_prefix_int]],$3)CF_iterate([[CF_enum]], [[m4_shift(m4_shift(m4_shift($@)))]])DNL')
# After all configuration templates end, we generate the
# After all configuration templates end, we generate the keyword list
m4_m4wrap(`
m4_divert(0)
static const struct keyword keyword_list[] = {

View File

@ -29,19 +29,13 @@ m4_define(CF_END, `m4_divert(-1)')
m4_define(CF_itera, `m4_ifelse($#, 1, [[CF_iter($1)]], [[CF_iter($1)[[]]CF_itera(m4_shift($@))]])')
m4_define(CF_iterate, `m4_define([[CF_iter]], m4_defn([[$1]]))CF_itera($2)')
m4_define(CF_append, `m4_define([[$1]], m4_ifdef([[$1]], m4_defn([[$1]])[[$3]])[[$2]])')
# Keywords act as %token<s>
m4_define(CF_keywd, `m4_ifdef([[CF_tok_$1]],,[[m4_define([[CF_tok_$1]],1)CF_append([[CF_kw_rule]],$1,[[ | ]])m4_define([[CF_toks]],CF_toks $1)]])')
# Keywords act as untyped %token
m4_define(CF_keywd, `m4_ifdef([[CF_tok_$1]],,[[m4_define([[CF_tok_$1]],1)m4_define([[CF_toks]],CF_toks $1)]])')
m4_define(CF_KEYWORDS, `m4_define([[CF_toks]],[[]])CF_iterate([[CF_keywd]], [[$@]])m4_ifelse(CF_toks,,,%token<s>[[]]CF_toks
)DNL')
m4_define(CF_METHODS, `m4_define([[CF_toks]],[[]])CF_iterate([[CF_keywd]], [[$@]])m4_ifelse(CF_toks,,,%token<s>[[]]CF_toks
)DNL')
m4_define(CF_keywd2, `m4_ifdef([[CF_tok_$1]],,[[m4_define([[CF_tok_$1]],1)m4_define([[CF_toks]],CF_toks $1)]])')
m4_define(CF_KEYWORDS_EXCLUSIVE, `m4_define([[CF_toks]],[[]])CF_iterate([[CF_keywd2]], [[$@]])m4_ifelse(CF_toks,,,%token<s>[[]]CF_toks
)DNL')
# CLI commands
m4_define(CF_CLI, `m4_define([[CF_cmd]], cmd_[[]]m4_translit($1, [[ ]], _))DNL
m4_divert(2)CF_KEYWORDS(m4_translit($1, [[ ]], [[,]]))
@ -64,11 +58,7 @@ m4_undivert(1)DNL
m4_undivert(2)DNL
%type <s> KEYWORD
%%
KEYWORD: CF_kw_rule;
m4_undivert(3)DNL
%%

View File

@ -42,12 +42,6 @@ AC_ARG_ENABLE([compact-tries],
[enable_compact_tries=no]
)
AC_ARG_ENABLE([pthreads],
[AS_HELP_STRING([--enable-pthreads], [enable POSIX threads support @<:@try@:>@])],
[],
[enable_pthreads=try]
)
AC_ARG_ENABLE([libssh],
[AS_HELP_STRING([--enable-libssh], [enable LibSSH support in RPKI @<:@try@:>@])],
[],
@ -131,25 +125,19 @@ if test -z "$GCC" ; then
fi
BIRD_CHECK_THREAD_LOCAL
if test "$bird_cv_thread_local" = yes ; then
AC_DEFINE([HAVE_THREAD_LOCAL], [1], [Define to 1 if _Thread_local is available])
if test "$bird_cv_thread_local" = no ; then
AC_MSG_ERROR([This program requires thread local storage.])
elif test "$bird_cv_thread_local" != yes ; then
AC_DEFINE_UNQUOTED([_Thread_local], [$bird_cv_thread_local], [Legacy _Thread_local])
fi
if test "$enable_pthreads" != no ; then
BIRD_CHECK_PTHREADS
BIRD_CHECK_PTHREADS
if test "$bird_cv_lib_pthreads" = yes ; then
AC_DEFINE([USE_PTHREADS], [1], [Define to 1 if pthreads are enabled])
CFLAGS="$CFLAGS -pthread"
LDFLAGS="$LDFLAGS -pthread"
proto_bfd=bfd
elif test "$enable_pthreads" = yes ; then
AC_MSG_ERROR([POSIX threads not available.])
fi
if test "$enable_pthreads" = try ; then
enable_pthreads="$bird_cv_lib_pthreads"
fi
if test "$bird_cv_lib_pthreads" = yes ; then
CFLAGS="$CFLAGS -pthread"
LDFLAGS="$LDFLAGS -pthread"
else
AC_MSG_ERROR([POSIX threads not available.])
fi
# This is assumed to be necessary for proper BIRD build
@ -159,6 +147,7 @@ if test "$bird_cflags_default" = yes ; then
BIRD_CHECK_GCC_OPTION([bird_cv_c_option_wno_pointer_sign], [-Wno-pointer-sign], [-Wall])
BIRD_CHECK_GCC_OPTION([bird_cv_c_option_wno_missing_init], [-Wno-missing-field-initializers], [-Wall -Wextra])
if test "$enable_debug" = no; then
BIRD_CHECK_LTO
fi
@ -296,6 +285,12 @@ if test "$enable_libssh" != no ; then
enable_libssh=no
fi
fi
AC_CHECK_LIB([ssh], [ssh_session_is_known_server], [ssh_old_server_validation_api=no], [ssh_old_server_validation_api=yes])
if test "$ssh_old_server_validation_api" = yes; then
AC_DEFINE([HAVE_SSH_OLD_SERVER_VALIDATION_API], [1], [Define to 1 if ssh_session_is_known_server isn't defined])
fi
fi
if test "$enable_mpls_kernel" != no ; then
@ -312,8 +307,8 @@ if test "$enable_mpls_kernel" != no ; then
fi
fi
all_protocols="aggregator $proto_bfd babel bgp l3vpn mrt ospf perf pipe radv rip rpki static"
# temporarily removed "mrt" from all_protocols to speed up 3.0-alpha1 release
all_protocols="aggregator bfd babel bgp l3vpn ospf perf pipe radv rip rpki static"
all_protocols=`echo $all_protocols | sed 's/ /,/g'`
if test "$with_protocols" = all ; then
@ -361,16 +356,29 @@ case $sysdesc in
;;
esac
AC_CHECK_HEADERS_ONCE([alloca.h syslog.h])
AC_CHECK_HEADER([sys/mman.h], [AC_DEFINE([HAVE_MMAP], [1], [Define to 1 if mmap() is available.])])
AC_CHECK_HEADERS_ONCE([alloca.h syslog.h stdatomic.h])
AC_CHECK_HEADER([sys/mman.h], [AC_DEFINE([HAVE_MMAP], [1], [Define to 1 if mmap() is available.])], have_mman=no)
AC_CHECK_FUNC([aligned_alloc], [AC_DEFINE([HAVE_ALIGNED_ALLOC], [1], [Define to 1 if aligned_alloc() is available.])], have_aligned_alloc=no)
AC_CHECK_MEMBERS([struct sockaddr.sa_len], [], [], [#include <sys/socket.h>])
if test "$have_aligned_alloc" = "no" && test "$have_mman" = "no" ; then
AC_MSG_ERROR([No means of aligned alloc found. Need mmap() or aligned_alloc().])
fi
AC_C_BIGENDIAN(
[AC_DEFINE([CPU_BIG_ENDIAN], [1], [Define to 1 if cpu is big endian])],
[AC_DEFINE([CPU_LITTLE_ENDIAN], [1], [Define to 1 if cpu is little endian])],
[AC_MSG_ERROR([Cannot determine CPU endianity.])]
)
BIRD_CHECK_POINTER_ALIGNMENT
if test "$bird_cv_pointer_alignment" = "unknown" ; then
AC_MSG_ERROR([Couldn't determine pointer alignment])
else
AC_DEFINE_UNQUOTED([CPU_POINTER_ALIGNMENT], [$bird_cv_pointer_alignment], [Pointer alignment for macro usage])
fi
BIRD_CHECK_ANDROID_GLOB
if test "$bird_cv_lib_glob" = no ; then
AC_MSG_ERROR([glob.h not found.])
@ -482,7 +490,6 @@ AC_MSG_RESULT([ Iproute2 directory: $iproutedir])
AC_MSG_RESULT([ System configuration: $sysdesc])
AC_MSG_RESULT([ Debugging: $enable_debug])
AC_MSG_RESULT([ Compact tries: $enable_compact_tries])
AC_MSG_RESULT([ POSIX threads: $enable_pthreads])
AC_MSG_RESULT([ Routing protocols: $protocols])
AC_MSG_RESULT([ LibSSH support in RPKI: $enable_libssh])
AC_MSG_RESULT([ Kernel MPLS support: $enable_mpls_kernel])

View File

@ -1,7 +1,7 @@
<!doctype birddoc system>
<!--
BIRD 2.0 documentation
BIRD 3.0 documentation
This documentation can have 4 forms: sgml (this is master copy), html, ASCII
text and dvi/postscript (generated from sgml using sgmltools). You should always
@ -20,7 +20,7 @@ configuration - something in config which is not keyword.
<book>
<title>BIRD 2.0 User's Guide
<title>BIRD 3.0 User's Guide
<author>
Ondrej Filip <it/&lt;feela@network.cz&gt;/,
Martin Mares <it/&lt;mj@ucw.cz&gt;/,
@ -543,7 +543,7 @@ ipv6 table
include "tablename.conf";;
</code>
<tag><label id="opt-log">log "<m/filename/" [<m/limit/ "<m/backup/"] | syslog [name <m/name/] | stderr | udp <m/address/ [port <m/port/] all|{ <m/list of classes/ }</tag>
<tag><label id="opt-log">log "<m/filename/" [<m/limit/ "<m/backup/"] | fixed "<m/filename/" <m/size/ | syslog [name <m/name/] | stderr | udp <m/address/ [port <m/port/] all|{ <m/list of classes/ }</tag>
Set logging of messages having the given class (either <cf/all/ or <cf>{
error|trace [, <m/.../] }</cf> etc.) into selected destination - a file
specified as a filename string (with optional log rotation information),
@ -561,7 +561,8 @@ include "tablename.conf";;
Logging directly to file supports basic log rotation -- there is an
optional log file limit and a backup filename, when log file reaches the
limit, the current log file is renamed to the backup filename and a new
log file is created.
log file is created. It's also possible to log to a single file behaving
as a ring buffer with a fixed size.
You may specify more than one <cf/log/ line to establish logging to
multiple destinations. Default: log everything to the system log, or
@ -580,7 +581,7 @@ include "tablename.conf";;
<tag><label id="opt-debug-tables">debug tables all|off|{ states|routes|filters|events [, <m/.../] }</tag>
Set global defaults of table debugging options.
See <ref id="table-debug" name="debug"> in the table section.
See <ref id="rtable-debug" name="debug"> in the table section.
Default: off.
<tag><label id="opt-debug-commands">debug commands <m/number/</tag>
@ -605,6 +606,12 @@ include "tablename.conf";;
killed by abort signal. The timeout has effective granularity of
seconds, zero means disabled. Default: disabled (0).
<tag><label id="opt-threads">threads <m/number/</tag>
Set how many worker threads should BIRD spawn. Tests show that every
thread can utilize one complete CPU core, therefore you probably want to
keep at least one free core. The maximum feasible thread count heavily
depends on the actual workload and must be determined by testing or estimation. Default: 1
<tag><label id="opt-mrtdump">mrtdump "<m/filename/"</tag>
Set MRTdump file name. This option must be specified to allow MRTdump
feature. Default: no dump file.
@ -724,12 +731,6 @@ that implicit tables (<cf/master4/ and <cf/master6/) can be redefined in order
to set options.
<descrip>
<tag><label id="table-debug">debug all|off|{ states|routes|filters [, <m/.../] }</tag>
Set table debugging options. Like in <ref id="proto-debug"
name="protocol debugging">, tables are capable of writing trace
messages about its work to the log (with category <cf/trace/).
For now, this does nothing, but in version 3, it is used. Default: off.
<tag><label id="rtable-sorted">sorted <m/switch/</tag>
Usually, a routing table just chooses the selected (best) route from a
list of routes for each network, while keeping remaining routes unsorted.
@ -760,21 +761,6 @@ to set options.
disadvantage is that trie-enabled routing tables require more memory,
which may be an issue especially in multi-table setups. Default: off.
<tag><label id="rtable-min-settle-time">min settle time <m/time/</tag>
Specify a minimum value of the settle time. When a ROA table changes,
automatic <ref id="proto-rpki-reload" name="RPKI reload"> may be
triggered, after a short settle time. Minimum settle time is a delay
from the last ROA table change to wait for more updates. Default: 1 s.
<tag><label id="rtable-max-settle-time">max settle time <m/time/</tag>
Specify a maximum value of the settle time. When a ROA table changes,
automatic <ref id="proto-rpki-reload" name="RPKI reload"> may be
triggered, after a short settle time. Maximum settle time is an upper
limit to the settle time from the initial ROA table change even if
there are consecutive updates gradually renewing the settle time.
Default: 20 s.
<tag><label id="rtable-gc-threshold">gc threshold <m/number/</tag>
Specify a minimum amount of removed networks that triggers a garbage
collection (GC) cycle. Default: 1000.
@ -789,6 +775,40 @@ to set options.
periods. Default: adaptive, based on number of routing tables in the
configuration. From 10 s (with <= 25 routing tables) up to 600 s (with
>= 1500 routing tables).
<tag><label id="rtable-cork-threshold">cork threshold <m/number/ <m/number/</tag>
Too many pending exports may lead to memory bloating. In such cases,
BIRD tries to relieve the memory pressure by pausing some routines until
the queue sizes get low enough. This option allows the user to set the
thresholds; first value is the low threshold (when to resume), the
second one is the high threshold (when to pause). The higher is the
threshold, the more memory can get used. In most cases, the defaults
should work for you. Default: 1024 8192.
<tag><label id="rtable-export-settle-time">export settle time <m/time/ <m/time/</tag>
Minimum and maximum settle times, respectively, for export announcements.
When multiple routes are changing, this mechanism waits for the changes
to settle before waking up sleeping export threads but if the changes are coming
steadily, BIRD isn't waiting forever; at most the maximum time.
Default values: <cf/1 ms 100 ms/. You have to always provide both values.
<tag><label id="rtable-route-refresh-export-settle-time">route refresh export settle time <m/time/ <m/time/</tag>
Minimum and maximum settle times, respectively, for export announcements
(the same as above), valid when any channel is currently doing a route refresh.
This serves a purpose of even more aggresive change bundling, knowing that there
is some active process generating changes in a fast pace. If you don't want
this feature, set this to the same values as <ref id="rtable-export-settle-time" name="export settle time">.
Default values: <cf/100 ms 3 s/.
<tag><label id="rtable-debug">debug all|off|{ states|routes|events [, <m/.../] }</tag>
Set table debugging options. Each table can write some trace messages
into log with category <cf/trace/. You can request <cf/all/ trace messages
or select some types: <cf/states/ for table state changes and auxiliary
processes, <cf/routes/ for auxiliary route notifications (next hop update,
flowspec revalidation) and <cf/events/ for more detailed auxiliary routine
debug. See also <ref id="channel-debug" name="channel debugging option">.
Default: off.
</descrip>
@ -1017,10 +1037,12 @@ inherited from templates can be updated by new definitions.
<cf/none/ is for dropping all routes. Default: <cf/all/ (except for
EBGP).
<tag><label id="proto-export">export <m/filter/</tag>
<tag><label id="proto-export">export [ in <m/prefix/ ] <m/filter/</tag>
This is similar to the <cf>import</cf> keyword, except that it works in
the direction from the routing table to the protocol. Default: <cf/none/
(except for EBGP and L3VPN).
the direction from the routing table to the protocol. If <cf/in/ keyword is used,
only routes inside the given prefix are exported. Other routes are completely
ignored (e.g. no logging and no statistics).
Default: <cf/none/ (except for EBGP and L3VPN).
<tag><label id="proto-import-keep-filtered">import keep filtered <m/switch/</tag>
Usually, if an import filter rejects a route, the route is forgotten.
@ -1034,7 +1056,8 @@ inherited from templates can be updated by new definitions.
<cf/roa_check()/ operator). In contrast to to other filter operators,
this status for the same route may change as the content of ROA tables
changes. When this option is active, BIRD activates automatic reload of
affected channels whenever ROA tables are updated (after a short settle
the appropriate subset of prefixes imported or exported by the channels
whenever ROA tables are updated (after a short settle
time). When disabled, route reloads have to be requested manually. The
option is ignored if <cf/roa_check()/ is not used in channel filters.
Note that for BGP channels, automatic reload requires
@ -1042,6 +1065,16 @@ inherited from templates can be updated by new definitions.
<ref id="bgp-export-table" name="export table"> (for respective
direction). Default: on.
<tag><label id="rtable-min-settle-time">roa settle time <m/time/ <m/time/</tag>
Minimum and maximum settle times, respectively, for ROA table changes.
The automatic reload is triggered after the minimum time after the last
ROA table change has been received but not later than the maximum time after
first unprocessed ROA table change. Therefore with default values, the
automatic reload happens 1 second after the ROA table stops updating, yet if it
were to be later than 20 seconds after the ROA table starts updating,
the automatic reload is triggered anyway. Default values: <cf/1 s 20 s/.
You have to always provide both values.
<tag><label id="proto-import-limit">import limit [<m/number/ | off ] [action warn | block | restart | disable]</tag>
Specify an import route limit (a maximum number of routes imported from
the protocol) and optionally the action to be taken when the limit is
@ -1070,6 +1103,13 @@ inherited from templates can be updated by new definitions.
counter ignores route blocking and block action also blocks route
updates of already accepted routes -- and these details will probably
change in the future. Default: <cf/off/.
<tag><label id="proto-export-block">export block <m/number/</tag>
Set the minimum amount of routes exported at once when feeding or
if `merge paths` or `secondary` is selected. This affects overall latency.
Basically, when your export filters are very expensive, processing
the whole block of routes may take too much time. In such cases, you may need to
shrink this value to improve responsiveness. Default: <cf/16384/.
</descrip>
<p>This is a trivial example of RIP configured for IPv6 on all interfaces:
@ -1396,7 +1436,7 @@ This argument can be omitted if there exists only a single instance.
Enable, disable or restart a given protocol instance, instances matching
the <cf><m/pattern/</cf> or <cf/all/ instances.
<tag><label id="cli-reload">reload [in|out] <m/name/|"<m/pattern/"|all</tag>
<tag><label id="cli-reload">reload [in|out] (<m/name/|"<m/pattern/"|all) [partial prefix] </tag>
Reload a given protocol instance, that means re-import routes from the
protocol instance and re-export preferred routes to the instance. If
<cf/in/ or <cf/out/ options are used, the command is restricted to one
@ -1407,6 +1447,9 @@ This argument can be omitted if there exists only a single instance.
propagates the old set of routes. For example when <cf/configure soft/
command was used to change filters.
If <cf/partial prefix/ option is used, only corresponding routes are reloaded.
Protocol BGP does partial reload only if it has import table enabled, otherwise partial reload for BGP is refused.
Re-export always succeeds, but re-import is protocol-dependent and might
fail (for example, if BGP neighbor does not support route-refresh
extension). In that case, re-export is also skipped. Note that for the
@ -1973,17 +2016,8 @@ Common route attributes are:
primary key of the routing table. Read-only. (See the <ref id="routes"
name="chapter about routes">.)
<tag><label id="rta-scope"><m/enum/ scope</tag>
The scope of the route. Possible values: <cf/SCOPE_HOST/ for routes
local to this host, <cf/SCOPE_LINK/ for those specific for a physical
link, <cf/SCOPE_SITE/ and <cf/SCOPE_ORGANIZATION/ for private routes and
<cf/SCOPE_UNIVERSE/ for globally visible routes. This attribute is not
interpreted by BIRD and can be used to mark routes in filters. The
default value for new routes is <cf/SCOPE_UNIVERSE/.
<tag><label id="rta-preference"><m/int/ preference</tag>
Preference of the route. Valid values are 0-65535. (See the chapter
about routing tables.)
Preference of the route.
<tag><label id="rta-from"><m/ip/ from</tag>
The router which the route has originated from.
@ -2024,14 +2058,6 @@ Common route attributes are:
creation/removal. Zero is returned for routes with undefined outgoing
interfaces. Read-only.
<tag><label id="rta-onlink"><m/bool/ onlink</tag>
Onlink flag means that the specified nexthop is accessible on the
interface regardless of IP prefixes configured on the interface.
The attribute can be used to configure such next hops by first setting
<cf/onlink = true/ and <cf/ifname/, and then setting <cf/gw/. Possible
use case for setting this flag is to automatically build overlay IP-IP
networks on linux.
<tag><label id="rta-weight"><m/int/ weight</tag>
Multipath weight of route next hops. Valid values are 1-256. Reading
returns the weight of the first next hop, setting it sets weights of all
@ -3405,13 +3431,6 @@ be used in explicit configuration.
be examined later by <cf/show route/, and can be used to reconfigure
import filters without full route refresh. Default: off.
Note that currently the import table breaks routes with recursive
nexthops (e.g. ones from IBGP, see <ref id="bgp-gateway" name="gateway
recursive">), they are not properly updated after next hop change. For
the same reason, it also breaks re-evaluation of flowspec routes with
<ref id="bgp-validate" name="flowspec validation"> option enabled on
flowspec channels.
<tag><label id="bgp-export-table">export table <m/switch/</tag>
A BGP export table contains all routes sent to given BGP neighbor, after
application of export filters. It is also called <em/Adj-RIB-Out/ in BGP
@ -4941,6 +4960,14 @@ include standard channel config options; see the example below.
<tag><label id="pipe-peer-table">peer table <m/table/</tag>
Defines secondary routing table to connect to. The primary one is
selected by the <cf/table/ keyword.
<tag><label id="pipe-max-generation">max generation <m/expr/</tag>
Sets maximal generation of route that may pass through this pipe.
The generation value is increased by one by each pipe on its path.
Not meeting this requirement causes an error message complaining about
an overpiped route. If you have long chains of pipes, you probably want
to raise this value; anyway the default of 16 should be enough for even
most strange uses. Maximum is 254.
</descrip>
<sect1>Attributes

View File

@ -62,6 +62,7 @@ Reply codes of BIRD command-line interface
1024 Show Babel neighbors
1025 Show Babel entries
1026 Show MPLS ranges
1027 Show threads
8000 Reply too long
8001 Route not found

2
doc/threads/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*.html
*.pdf

Binary file not shown.

After

Width:  |  Height:  |  Size: 300 KiB

View File

@ -0,0 +1,114 @@
# BIRD Journey to Threads. Chapter 0: The Reason Why.
BIRD is a fast, robust and memory-efficient routing daemon designed and
implemented at the end of 20th century. Its concept of multiple routing
tables with pipes between them, as well as a procedural filtering language,
has been unique for a long time and is still one of main reasons why people use
BIRD for big loads of routing data.
## IPv4 / IPv6 duality: Solved
The original design of BIRD has also some drawbacks. One of these was an idea
of two separate daemons one BIRD for IPv4 and another BIRD for IPv6, built from the same
codebase, cleverly using `#ifdef IPV6` constructions to implement the
common parts of BIRD algorithms and data structures only once.
If IPv6 adoption went forward as people thought in that time,
it would work; after finishing the worldwide transition to IPv6, people could
just stop building BIRD for IPv4 and drop the `#ifdef`-ed code.
The history went other way, however. BIRD developers therefore decided to *integrate*
these two versions into one daemon capable of any address family, allowing for
not only IPv6 but for virtually anything. This rework brought quite a lot of
backward-incompatible changes, therefore we decided to release it as a version 2.0.
This work was mostly finished in 2018 and as for March 2021, we have already
switched the 1.6.x branch to a bugfix-only mode.
## BIRD is single-threaded now
The second drawback is a single-threaded design. Looking back to 1998, this was
a good idea. A common PC had one single core and BIRD was targeting exactly
this segment. As the years went by, the manufacturers launched multicore x86 chips
(AMD Opteron in 2004, Intel Pentium D in 2005). This ultimately led to a world
where as of March 2021, there is virtually no new PC sold with a single-core CPU.
Together with these changes, the speed of one single core has not been growing as fast
as the Internet is growing. BIRD is still capable to handle the full BGP table
(868k IPv4 routes in March 2021) with one core, anyway when BIRD starts, it may take
long minutes to converge.
## Intermezzo: Filters
In 2018, we took some data we had from large internet exchanges and simulated
a cold start of BIRD as a route server. We used `linux-perf` to find most time-critical
parts of BIRD and it pointed very clearly to the filtering code. It also showed that the
IPv4 version of BIRD v1.6.x is substantially faster than the *integrated* version, while
the IPv6 version was quite as fast as the *integrated* one.
Here we should show a little bit more about how the filters really work. Let's use
an example of a simple filter:
```
filter foo {
if net ~ [10.0.0.0/8+] then reject;
preference = 2 * preference - 41;
accept;
}
```
This filter gets translated to an infix internal structure.
![Example of filter internal representation](00_filter_structure.png)
When executing, the filter interpreter just walks the filter internal structure recursively in the
right order, executes the instructions, collects their results and finishes by
either rejection or acceptation of the route
## Filter rework
Further analysis of the filter code revealed an absurdly-looking result. The
most executed parts of the interpreter function were the `push` CPU
instructions on its very beginning and the `pop` CPU instructions on its very
end. This came from the fact that the interpreter function was quite long, yet
most of the filter instructions used an extremely short path, doing all the
stack manipulation at the beginning, branching by the filter instruction type,
then it executed just several CPU instructions, popped everything from the
stack back and returned.
After some thoughts how to minimize stack manipulation when everything you need
is to take two numbers and multiply them, we decided to preprocess the filter
internal structure to another structure which is much easier to execute. The
interpreter is now using a data stack and behaves generally as a
postfix-ordered language. We also thought about Lua which showed up to be quite
a lot of work implementing all the glue achieving about the same performance.
After these changes, we managed to reduce the filter execution time by 1040%,
depending on how complex the filter is.
Anyway, even this reduction is quite too little when there is one CPU core
running for several minutes while others are sleeping.
## We need more threads
As a side effect of the rework, the new filter interpreter is also completely
thread-safe. It seemed to be the way to go running the filters in parallel
while keeping everything else single-threaded. The main problem of this
solution is a too fine granularity of parallel jobs. We would spend lots of
time on synchronization overhead.
The only filter parallel execution was also too one-sided, useful only for
configurations with complex filters. In other cases, the major problem is best
route recalculation, OSPF recalculation or also kernel synchronization.
It also turned out to be dirty a lot from the code cleanliness' point of view.
Therefore we chose to make BIRD multithreaded completely. We designed a way how
to gradually enable parallel computation and best usage of all available CPU
cores. Our goals are three:
* We want to keep current functionality. Parallel computation should never drop
a useful feature.
* We want to do little steps. No big reworks, even though even the smallest
possible step will need quite a lot of refactoring before.
* We want to be backwards compatible as much as possible.
*It's still a long road to the version 2.1. This series of texts should document
what is needed to be changed, why we do it and how. In the next chapter, we're
going to describe the structures for routes and their attributes. Stay tuned!*

View File

@ -0,0 +1,159 @@
# BIRD Journey to Threads. Chapter 1: The Route and its Attributes
BIRD is a fast, robust and memory-efficient routing daemon designed and
implemented at the end of 20th century. We're doing a significant amount of
BIRD's internal structure changes to make it possible to run in multiple
threads in parallel. This chapter covers necessary changes of data structures
which store every single routing data.
*If you want to see the changes in code, look (basically) into the
`route-storage-updates` branch. Not all of them are already implemented, anyway
most of them are pretty finished as of end of March, 2021.*
## How routes are stored
BIRD routing table is just a hierarchical noSQL database. On top level, the
routes are keyed by their destination, called *net*. Due to historic reasons,
the *net* is not only *IPv4 prefix*, *IPv6 prefix*, *IPv4 VPN prefix* etc.,
but also *MPLS label*, *ROA information* or *BGP Flowspec record*. As there may
be several routes for each *net*, an obligatory part of the key is *src* aka.
*route source*. The route source is a tuple of the originating protocol
instance and a 32-bit unsigned integer. If a protocol wants to withdraw a route,
it is enough and necessary to have the *net* and *src* to identify what route
is to be withdrawn.
The route itself consists of (basically) a list of key-value records, with
value types ranging from a 16-bit unsigned integer for preference to a complex
BGP path structure. The keys are pre-defined by protocols (e.g. BGP path or
OSPF metrics), or by BIRD core itself (preference, route gateway).
Finally, the user can declare their own attribute keys using the keyword
`attribute` in config.
## Attribute list implementation
Currently, there are three layers of route attributes. We call them *route*
(*rte*), *attributes* (*rta*) and *extended attributes* (*ea*, *eattr*).
The first layer, *rte*, contains the *net* pointer, several fixed-size route
attributes (mostly preference and protocol-specific metrics), flags, lastmod
time and a pointer to *rta*.
The second layer, *rta*, contains the *src* (a pointer to a singleton instance),
a route gateway, several other fixed-size route attributes and a pointer to
*ea* list.
The third layer, *ea* list, is a variable-length list of key-value attributes,
containing all the remaining route attributes.
Distribution of the route attributes between the attribute layers is somehow
arbitrary. Mostly, in the first and second layer, there are attributes that
were thought to be accessed frequently (e.g. in best route selection) and
filled in in most routes, while the third layer is for infrequently used
and/or infrequently accessed route attributes.
## Attribute list deduplication
When protocols originate routes, there are commonly more routes with the
same attribute list. BIRD could ignore this fact, anyway if you have several
tables connected with pipes, it is more memory-efficient to store the same
attribute lists only once.
Therefore, the two lower layers (*rta* and *ea*) are hashed and stored in a
BIRD-global database. Routes (*rte*) contain a pointer to *rta* in this
database, maintaining a use-count of each *rta*. Attributes (*rta*) contain
a pointer to normalized (sorted by numerical key ID) *ea*.
## Attribute list rework
The first thing to change is the distribution of route attributes between
attribute list layers. We decided to make the first layer (*rte*) only the key
and other per-record internal technical information. Therefore we move *src* to
*rte* and preference to *rta* (beside other things). *This is already done.*
We also found out that the nexthop (gateway), originally one single IP address
and an interface, has evolved to a complex attribute with several sub-attributes;
not only considering multipath routing but also MPLS stacks and other per-route
attributes. This has led to a too complex data structure holding the nexthop set.
We decided finally to squash *rta* and *ea* to one type of data structure,
allowing for completely dynamic route attribute lists. This is also supported
by adding other *net* types (BGP FlowSpec or ROA) where lots of the fields make
no sense at all, yet we still want to use the same data structures and implementation
as we don't like duplicating code. *Multithreading doesn't depend on this change,
anyway this change is going to happen soon anyway.*
## Route storage
The process of route import from protocol into a table can be divided into several phases:
1. (In protocol code.) Create the route itself (typically from
protocol-internal data) and choose the right channel to use.
2. (In protocol code.) Create the *rta* and *ea* and obtain an appropriate
hashed pointer. Allocate the *rte* structure and fill it in.
3. (Optionally.) Store the route to the *import table*.
4. Run filters. If reject, free everything.
5. Check whether this is a real change (it may be idempotent). If not, free everything and do nothing more.
6. Run the best route selection algorithm.
7. Execute exports if needed.
We found out that the *rte* structure allocation is done too early. BIRD uses
global optimized allocators for fixed-size blocks (which *rte* is) to reduce
its memory footprint, therefore the allocation of *rte* structure would be a
synchronization point in multithreaded environment.
The common code is also much more complicated when we have to track whether the
current *rte* has to be freed or not. This is more a problem in export than in
import as the export filter can also change the route (and therefore allocate
another *rte*). The changed route must be therefore freed after use. All the
route changing code must also track whether this route is writable or
read-only.
We therefore introduce a variant of *rte* called *rte_storage*. Both of these
hold the same, the layer-1 route information (destination, author, cached
attribute pointer, flags etc.), anyway *rte* is always local and *rte_storage*
is intended to be put in global data structures.
This change allows us to remove lots of the code which only tracks whether any
*rte* is to be freed as *rte*'s are almost always allocated on-stack, naturally
limiting their lifetime. If not on-stack, it's the responsibility of the owner
to free the *rte* after import is done.
This change also removes the need for *rte* allocation in protocol code and
also *rta* can be safely allocated on-stack. As a result, protocols can simply
allocate all the data on stack, call the update routine and the common code in
BIRD's *nest* does all the storage for them.
Allocating *rta* on-stack is however not required. BGP and OSPF use this to
import several routes with the same attribute list. In BGP, this is due to the
format of BGP update messages containing first the attributes and then the
destinations (BGP NLRI's). In OSPF, in addition to *rta* deduplication, it is
also presumed that no import filter (or at most some trivial changes) is applied
as OSPF would typically not work well when filtered.
*This change is already done.*
## Route cleanup and table maintenance
In some cases, the route update is not originated by a protocol/channel code.
When the channel shuts down, all routes originated by that channel are simply
cleaned up. Also routes with recursive routes may get changed without import,
simply by changing the IGP route.
This is currently done by a `rt_event` (see `nest/rt-table.c` for source code)
which is to be converted to a parallel thread, running when nobody imports any
route. *This change is freshly done in branch `guernsey`.*
## Parallel protocol execution
The long-term goal of these reworks is to allow for completely independent
execution of all the protocols. Typically, there is no direct interaction
between protocols; everything is done thought BIRD's *nest*. Protocols should
therefore run in parallel in future and wait/lock only when something is needed
to do externally.
We also aim for a clean and documented protocol API.
*It's still a long road to the version 2.1. This series of texts should document
what is needed to be changed, why we do it and how. In the next chapter, we're
going to describe how the route is exported from table to protocols and how this
process is changing. Stay tuned!*

View File

@ -0,0 +1,463 @@
# BIRD Journey to Threads. Chapter 2: Asynchronous route export
Route export is a core algorithm of BIRD. This chapter covers how we are making
this procedure multithreaded. Desired outcomes are mostly lower latency of
route import, flap dampening and also faster route processing in large
configurations with lots of export from one table.
BIRD is a fast, robust and memory-efficient routing daemon designed and
implemented at the end of 20th century. We're doing a significant amount of
BIRD's internal structure changes to make it possible to run in multiple
threads in parallel.
## How routes are propagated through BIRD
In the [previous chapter](https://en.blog.nic.cz/2021/03/23/bird-journey-to-threads-chapter-1-the-route-and-its-attributes/), you could learn how the route import works. We should
now extend that process by the route export.
1. (In protocol code.) Create the route itself and propagate it through the
right channel by calling `rte_update`.
2. The channel runs its import filter.
3. New best route is selected.
4. For each channel:
1. The channel runs its preexport hook and export filter.
2. (Optionally.) The channel merges the nexthops to create an ECMP route.
3. The channel calls the protocol's `rt_notify` hook.
5. After all exports are finished, the `rte_update` call finally returns and
the source protocol may do anything else.
Let's imagine that all the protocols are running in parallel. There are two
protocols with a route prepared to import. One of those wins the table lock,
does the import and then the export touches the other protocol which must
either:
* store the route export until it finishes its own imports, or
* have independent import and export parts.
Both of these conditions are infeasible for common use. Implementing them would
make protocols much more complicated with lots of new code to test and release
at once and also quite a lot of corner cases. Risk of deadlocks is also worth
mentioning.
## Asynchronous route export
We decided to make it easier for protocols and decouple the import and export
this way:
1. The import is done.
2. Best route is selected.
3. Resulting changes are stored.
Then, after the importing protocol returns, the exports are processed for each
exporting channel in parallel: Some protocols
may process the export directly after it is stored, other protocols wait
until they finish another job.
This eliminates the risk of deadlocks and all protocols' `rt_notify` hooks can
rely on their independence. There is only one question. How to store the changes?
## Route export modes
To find a good data structure for route export storage, we shall first know the
readers. The exporters may request different modes of route export.
### Export everything
This is the most simple route export mode. The exporter wants to know about all
the routes as they're changing. We therefore simply store the old route until
the change is fully exported and then we free the old stored route.
To manage this, we can simply queue the changes one after another and postpone
old route cleanup after all channels have exported the change. The queue member
would look like this:
```
struct {
struct rte_storage *new;
struct rte_storage *old;
};
```
### Export best
This is another simple route export mode. We check whether the best route has
changed; if not, no export happens. Otherwise, the export is propagated as the
old best route changing to the new best route.
To manage this, we could use the queue from the previous point by adding new
best and old best pointers. It is guaranteed that both the old best and new
best pointers are always valid in time of export as all the changes in them
must be stored in future changes which have not been exported yet by this
channel and therefore not freed yet.
```
struct {
struct rte_storage *new;
struct rte_storage *new_best;
struct rte_storage *old;
struct rte_storage *old_best;
};
```
Anyway, we're getting to the complicated export modes where this simple
structure is simply not enough.
### Export merged
Here we're getting to some kind of problems. The exporting channel requests not
only the best route but also all routes that are good enough to be considered
ECMP-eligible (we call these routes *mergable*). The export is then just one
route with just the nexthops merged. Export filters are executed before
merging and if the best route is rejected, nothing is exported at all.
To achieve this, we have to re-evaluate export filters any time the best route
or any mergable route changes. Until now, the export could just do what it wanted
as there was only one thread working. To change this, we need to access the
whole route list and process it.
### Export first accepted
In this mode, the channel runs export filters on a sorted list of routes, best first.
If the best route gets rejected, it asks for the next one until it finds an
acceptable route or exhausts the list. This export mode requires a sorted table.
BIRD users may know this export mode as `secondary` in BGP.
For now, BIRD stores two bits per route for each channel. The *export bit* is set
if the route has been really exported to that channel. The *reject bit* is set
if the route was rejected by the export filter.
When processing a route change for accepted, the algorithm first checks the
export bit for the old route. If this bit is set, the old route is that one
exported so we have to find the right one to export. Therefore the sorted route
list is walked best to worst to find a new route to export, using the reject
bit to evaluate only routes which weren't rejected in previous runs of this
algorithm.
If the old route bit is not set, the algorithm walks the sorted route list best
to worst, checking the position of new route with respect to the exported route.
If the new route is worse, nothing happens, otherwise the new route is sent to
filters and finally exported if passes.
### Export by feed
To resolve problems arising from previous two export modes (merged and first accepted),
we introduce a way to process a whole route list without locking the table
while export filters are running. To achieve this, we follow this algorithm:
1. The exporting channel sees a pending export.
2. *The table is locked.*
3. All routes (pointers) for the given destination are dumped to a local array.
4. Also first and last pending exports for the given destination are stored.
5. *The table is unlocked.*
6. The channel processes the local array of route pointers.
7. All pending exports between the first and last stored (incl.) are marked as processed to allow for cleanup.
After unlocking the table, the pointed-to routes are implicitly guarded by the
sole fact that no pending export has not yet been processed by all channels
and the cleanup routine frees only resources after being processed.
The pending export range must be stored together with the feed. While
processing export filters for the feed, another export may come in. We
must process the export once again as the feed is now outdated, therefore we
must mark only these exports that were pending for this destination when the
feed was being stored. We also can't mark them before actually processing them
as they would get freed inbetween.
## Pending export data structure
As the two complicated export modes use the export-by-feed algorithm, the
pending export data structure may be quite minimalistic.
```
struct rt_pending_export {
struct rt_pending_export * _Atomic next; /* Next export for the same destination */
struct rte_storage *new; /* New route */
struct rte_storage *new_best; /* New best route in unsorted table */
struct rte_storage *old; /* Old route */
struct rte_storage *old_best; /* Old best route in unsorted table */
_Atomic u64 seq; /* Sequential ID (table-local) of the pending export */
};
```
To allow for squashing outdated pending exports (e.g. for flap dampening
purposes), there is a `next` pointer to the next export for the same
destination. This is also needed for the export-by-feed algorithm to traverse
the list of pending exports.
We should also add several items into `struct channel`.
```
struct coroutine *export_coro; /* Exporter and feeder coroutine */
struct bsem *export_sem; /* Exporter and feeder semaphore */
struct rt_pending_export * _Atomic last_export; /* Last export processed */
struct bmap export_seen_map; /* Keeps track which exports were already processed */
u64 flush_seq; /* Table export seq when the channel announced flushing */
```
To run the exports in parallel, `export_coro` is run and `export_sem` is
used for signalling new exports to it. The exporter coroutine also marks all
seen sequential IDs in its `export_seen_map` to make it possible to skip over
them if seen again. The exporter coroutine is started when export is requested
and stopped when export is stopped.
There is also a table cleaner routine
(see [previous chapter](https://en.blog.nic.cz/2021/03/23/bird-journey-to-threads-chapter-1-the-route-and-its-attributes/))
which must cleanup also the pending exports after all the channels are finished with them.
To signal that, there is `last_export` working as a release point: the channel
guarantees that it doesn't touch the pointed-to pending export (or any older), nor any data
from it.
The last tricky point here is channel flushing. When any channel stops, all its
routes are automatically freed and withdrawals are exported if appropriate.
Until now, the routes could be flushed synchronously, anyway now flush has
several phases, stored in `flush_active` channel variable:
1. Flush started.
2. Withdrawals for all the channel's routes are issued.
Here the channel stores the `seq` of last current pending export to `flush_seq`)
3. When the table's cleanup routine cleans up the withdrawal with `flush_seq`,
the channel may safely stop and free its structures as all `sender` pointers in routes are now gone.
Finally, some additional information has to be stored in tables:
```
_Atomic byte export_used; /* Export journal cleanup scheduled */ \
struct rt_pending_export * _Atomic first_export; /* First export to announce */ \
byte export_scheduled; /* Export is scheduled */
list pending_exports; /* List of packed struct rt_pending_export */
struct fib export_fib; /* Auxiliary fib for storing pending exports */
u64 next_export_seq; /* The next export will have this ID */
```
The exports are:
1. Assigned the `next_export_seq` sequential ID, incrementing this item by one.
2. Put into `pending_exports` and `export_fib` for both sequential and by-destination access.
3. Signalled by setting `export_scheduled` and `first_export`.
After processing several exports, `export_used` is set and route table maintenance
coroutine is woken up to possibly do cleanup.
The `struct rt_pending_export` seems to be best allocated by requesting a whole
memory page, containing a common list node, a simple header and packed all the
structures in the rest of the page. This may save a significant amount of memory.
In case of congestion, there will be lots of exports and every spare kilobyte
counts. If BIRD is almost idle, the optimization does nothing on the overall performance.
## Export algorithm
As we have explained at the beginning, the current export algorithm is
synchronous and table-driven. The table walks the channel list and propagates the update.
The new export algorithm is channel-driven. The table just indicates that it
has something new in export queue and the channel decides what to do with that and when.
### Pushing an export
When a table has something to export, it enqueues an instance of
`struct rt_pending_export` together with updating the `last` pointer (and
possibly also `first`) for this destination's pending exports.
Then it pings its maintenance coroutine (`rt_event`) to notify the exporting
channels about a new route. Before the maintenance coroutine acquires the table
lock, the importing protocol may e.g. prepare the next route inbetween.
The maintenance coroutine, when it wakes up, walks the list of channels and
wakes their export coroutines.
These two levels of asynchronicity are here for an efficiency reason.
1. In case of low table load, the export is announced just after the import happens.
2. In case of table congestion, the export notification locks the table as well
as all route importers, effectively reducing the number of channel list traversals.
### Processing an export
After these two pings, the channel finally knows that there is an export pending.
1. The channel waits for a semaphore. This semaphore is posted by the table
maintenance coroutine.
2. The channel checks whether there is a `last_export` stored.
1. If yes, it proceeds with the next one.
2. Otherwise it takes `first_export` from the table. This special
pointer is atomic and can be accessed without locking and also without clashing
with the export cleanup routine.
3. The channel checks its `export_seen_map` whether this export has been
already processed. If so, it goes back to 1. to get the next export. No
action is needed with this one.
4. As now the export is clearly new, the export chain (single-linked list) is
scanned for the current first and last export. This is done by following the
`next` pointer in the exports.
5. If all-routes mode is used, the exports are processed one-by-one. In future
versions, we may employ some simple flap-dampening by checking the pending
export list for the same route src. *No table locking happens.*
6. If best-only mode is employed, just the first and last exports are
considered to find the old and new best routes. The inbetween exports do nothing. *No table locking happens.*
7. If export-by-feed is used, the current state of routes in table are fetched and processed
as described above in the "Export by feed" section.
8. All processed exports are marked as seen.
9. The channel stores the first processed export to `last_export` and returns
to beginning.to wait for next exports. The latter exports are then skipped by
step 3 when the export coroutine gets to them.
## The full life-cycle of routes
Until now, we're always assuming that the channels *just exist*. In real life,
any channel may go up or down and we must handle it, flushing the routes
appropriately and freeing all the memory just in time to avoid both
use-after-free and memory leaks. BIRD is written in C which has no garbage
collector or other modern features alike so memory management is a thing.
### Protocols and channels as viewed from a route
BIRD consists effectively of protocols and tables. **Protocols** are active parts,
kind-of subprocesses manipulating routes and other data. **Tables** are passive,
serving as a database of routes. To connect a protocol to a table, a
**channel** is created.
Every route has its `sender` storing the channel which has put the route into
the current table. Therefore we know which routes to flush when a channel goes down.
Every route also has its `src`, a route source allocated by the protocol which
originated it first. This is kept when a route is passed through a *pipe*. The
route source is always bound to protocol; it is possible that a protocol
announces routes via several channels using the same src.
Both `src` and `sender` must point to active protocols and channels as inactive
protocols and channels may be deleted any time.
### Protocol and channel lifecycle
In the beginning, all channels and protocols are down. Until they fully start,
no route from them is allowed to any table. When the protocol and channel is up,
they may originate and receive routes freely. However, the transitions are worth mentioning.
### Channel startup and feed
When protocols and channels start, they need to get the current state of the
appropriate table. Therefore, after a protocol and channel start, also the
export-feed coroutine is initiated.
Tables can contain millions of routes. It may lead to long import latency if a channel
was feeding itself in one step. The table structure is (at least for now) too
complicated to be implemented as lockless, thus even read access needs locking.
To mitigate this, the feeds are split to allow for regular route propagation
with a reasonable latency.
When the exports were synchronous, we simply didn't care and just announced the
exports to the channels from the time they started feeding. When making exports
asynchronous, it is crucial to avoid (hopefully) all the possible race conditions
which could arise from simultaneous feed and export. As the feeder routines had
to be rewritten, it is a good opportunity to make this precise.
Therefore, when a channel goes up, it also starts exports:
1. Start the feed-export coroutine.
2. *Lock the table.*
3. Store the last export in queue.
4. Read a limited number of routes to local memory together with their pending exports.
5. If there are some routes to process:
1. *Unlock the table.*
2. Process the loaded routes.
3. Set the appropriate pending exports as seen.
4. *Lock the table*
5. Go to 4. to continue feeding.
6. If there was a last export stored, load the next one to be processed. Otherwise take the table's `first_export`.
7. *Unlock the table.*
8. Run the exporter loop.
*Note: There are some nuances not mentioned here how to do things in right
order to avoid missing some events while changing state. For specifics, look
into the code in `nest/rt-table.c` in branch `alderney`.*
When the feeder loop finishes, it continues smoothly to process all the exports
that have been queued while the feed was running. Step 5.3 ensures that already
seen exports are skipped, steps 3 and 6 ensure that no export is missed.
### Channel flush
Protocols and channels need to stop for a handful of reasons, All of these
cases follow the same routine.
1. (Maybe.) The protocol requests to go down or restart.
2. The channel requests to go down or restart.
3. The channel requests to stop export.
4. In the feed-export coroutine:
1. At a designated cancellation point, check cancellation.
2. Clean up local data.
3. *Lock main BIRD context*
4. If shutdown requested, switch the channel to *flushing* state and request table maintenance.
5. *Stop the coroutine and unlock main BIRD context.*
5. In the table maintenance coroutine:
1. Walk across all channels and check them for *flushing* state, setting `flush_active` to 1.
2. Walk across the table (split to allow for low latency updates) and
generate a withdrawal for each route sent by the flushing channels.
3. When all the table is traversed, the flushing channels' `flush_active` is set to 2 and
`flush_seq` is set to the current last export seq.
3. Wait until all the withdrawals are processed by checking the `flush_seq`.
4. Mark the flushing channels as *down* and eventually proceed to the protocol shutdown or restart.
There is also a separate routine that handles bulk cleanup of `src`'s which
contain a pointer to the originating protocol. This routine may get reworked in
future; for now it is good enough.
### Route export cleanup
Last but not least is the export cleanup routine. Until now, the withdrawn
routes were exported synchronously and freed directly after the import was
done. This is not possible anymore. The export is stored and the import returns
to let the importing protocol continue its work. We therefore need a routine to
cleanup the withdrawn routes and also the processed exports.
First of all, this routine refuses to cleanup when any export is feeding or
shutting down. In future, cleanup while feeding should be possible, anyway for
now we aren't sure about possible race conditions.
Anyway, when all the exports are in a steady state, the routine works as follows:
1. Walk the active exports and find a minimum (oldest export) between their `last_export` values.
2. If there is nothing to clear between the actual oldest export and channels' oldest export, do nothing.
3. Find the table's new `first_export` and set it. Now there is nobody pointing to the old exports.
4. Free the withdrawn routes.
5. Free the old exports, removing them also from the first-last list of exports for the same destination.
## Results of these changes
This step is a first major step to move forward. Using just this version may be
still as slow as the single-threaded version, at least if your export filters are trivial.
Anyway, the main purpose of this step is not an immediate speedup. It is more
of a base for the next steps:
* Unlocking of pipes should enable parallel execution of all the filters on
pipes, limited solely by the principle *one thread for every direction of
pipe*.
* Conversion of CLI's `show route` to the new feed-export coroutines should
enable faster table queries. Moreover, this approach will allow for
better splitting of model and view in CLI with a good opportunity to
implement more output formats, e.g. JSON.
* Unlocking of kernel route synchronization should fix latency issues induced
by long-lasting kernel queries.
* Partial unlocking of BGP packet processing should allow for parallel
execution in almost all phases of BGP route propagation.
* Partial unlocking of OSPF route recalculation should raise the useful
maximums of topology size.
The development is now being done mostly in the branch `alderney`. If you asked
why such strange branch names like `jersey`, `guernsey` and `alderney`, here is
a kind-of reason. Yes, these branches could be named `mq-async-export`,
`mq-async-export-new`, `mq-async-export-new-new`, `mq-another-async-export` and
so on. That's so ugly, isn't it? Let's be creative. *Jersey* is an island where a
same-named knit was first produced and knits are made of *threads*. Then, you
just look into a map and find nearby islands.
Also why so many branches? The development process is quite messy. BIRD's code
heavily depends on single-threaded approach. This is (in this case)
exceptionally good for performance, as long as you have one thread only. On the
other hand, lots of these assumptions are not documented so in many cases one
desired change yields a chain of other unforeseen changes which must precede.
This brings lots of backtracking, branch rebasing and other Git magic. There is
always a can of worms somewhere in the code.
*It's still a long road to the version 2.1. This series of texts should document
what is needed to be changed, why we do it and how. The
[previous chapter](https://en.blog.nic.cz/2021/03/23/bird-journey-to-threads-chapter-1-the-route-and-its-attributes/)
showed the necessary changes in route storage. In the next chapter, we're going
to describe how the coroutines are implemented and what kind of locking system
are we employing to prevent deadlocks. Stay tuned!*

View File

@ -0,0 +1,235 @@
# BIRD Journey to Threads. Chapter 3: Parallel execution and message passing.
Parallel execution in BIRD uses an underlying mechanism of dedicated IO loops
and hierarchical locks. The original event scheduling module has been converted
to do message passing in multithreaded environment. These mechanisms are
crucial for understanding what happens inside BIRD and how its internal API changes.
BIRD is a fast, robust and memory-efficient routing daemon designed and
implemented at the end of 20th century. We're doing a significant amount of
BIRD's internal structure changes to make it run in multiple threads in parallel.
## Locking and deadlock prevention
Most of BIRD data structures and algorithms are thread-unsafe and not even
reentrant. Checking and possibly updating all of these would take an
unreasonable amount of time, thus the multithreaded version uses standard mutexes
to lock all the parts which have not been checked and updated yet.
The authors of original BIRD concepts wisely chose a highly modular structure
which allows to create a hierarchy for locks. The main chokepoint was between
protocols and tables and it has been removed by implementing asynchronous exports
as described in the [previous chapter](https://en.blog.nic.cz/2021/06/14/bird-journey-to-threads-chapter-2-asynchronous-route-export/).
Locks in BIRD (called domains, as they always lock some defined part of BIRD)
are partially ordered. Every *domain* has its *type* and all threads are
strictly required to lock the domains in the order of their respective types.
The full order is defined in `lib/locking.h`. It's forbidden to lock more than
one domain of a type (these domains are uncomparable) and recursive locking is
forbidden as well.
The locking hiearchy is (roughly; as of February 2022) like this:
1. The BIRD Lock (for everything not yet checked and/or updated)
2. Protocols (as of February 2022, it is BFD, RPKI, Pipe and BGP)
3. Routing tables
4. Global route attribute cache
5. Message passing
6. Internals and memory management
There are heavy checks to ensure proper locking and to help debugging any
problem when any code violates the hierarchy rules. This impedes performance
depending on how much that domain is contended and in some cases I have already
implemented lockless (or partially lockless) data structures to overcome this.
You may ask, why are these heavy checks then employed in production builds?
Risks arising from dropping some locking checks include:
* deadlocks; these are deadly in BIRD anyway so it should just fail with a meaningful message, or
* data corruption; it either kills BIRD anyway, or it results into a slow and vicious death,
leaving undebuggable corefiles behind.
To be honest, I believe in principles like *"every nontrivial software has at least one bug"*
and I also don't trust my future self or anybody else to always write bugless code when
it comes to proper locking. I also believe that if a lock becomes a bottle-neck,
then we should think about what is locked inside and how to optimize that,
possibly implementing a lockless or waitless data structure instead of dropping
thorough consistency checks, especially in a multithreaded environment.
### Choosing the right locking order
When considering the locking order of protocols and route tables, the answer
was quite easy. We had to make either import or export asynchronous (or both).
Major reasons for asynchronous export have been stated in the previous chapter,
therefore it makes little sense to allow entering protocol context from table code.
As I write further in this text, even accessing table context from protocol
code leads to contention on table locks, yet for now, it is good enough and the
lock order features routing tables after protocols to make the multithreading
goal easier to achieve.
The major lock level is still The BIRD Lock, containing not only the
not-yet-converted protocols (like Babel, OSPF or RIP) but also processing CLI
commands and reconfiguration. This involves an awful lot of direct access into
other contexts which would be unnecessarily complicated to implement by message
passing. Therefore, this lock is simply *"the director"*, sitting on the top
with its own category.
The lower lock levels under routing tables are mostly for shared global data
structures accessed from everywhere. We'll address some of these later.
## IO Loop
There has been a protocol, BFD, running in its own thread since 2013. This
separation has a good reason; it needs low latency and the main BIRD loop just
walks round-robin around all the available sockets and one round-trip may take
a long time (even more than a minute with large configurations). BFD had its
own IO loop implementation and simple message passing routines. This code could
be easily updated for general use so I did it.
To understand the internal principles, we should say that in the `master`
branch, there is a big loop centered around a `poll()` call, dispatching and
executing everything as needed. In the `sark` branch, there are multiple loops
of this kind. BIRD has several means how to get something dispatched from a
loop.
1. Requesting to read from a **socket** makes the main loop call your hook when there is some data received.
The same happens when a socket refuses to write data. Then the data is buffered and you are called when
the buffer is free to continue writing. There is also a third callback, an error hook, for obvious reasons.
2. Requesting to be called back after a given amount of time. This is called **timer**.
As is common with all timers, they aren't precise and the callback may be
delayed significantly. This was also the reason to have BFD loop separate
since the very beginning, yet now the abundance of threads may lead to
problems with BFD latency in large-scale configurations. We haven't tested
this yet.
3. Requesting to be called back from a clean context when possible. This is
useful to run anything not reentrant which might mess with the caller's
data, e.g. when a protocol decides to shutdown due to some inconsistency
in received data. This is called **event**.
4. Requesting to do some work when possible. These are also events, there is only
a difference where this event is enqueued; in the main loop, there is a
special *work queue* with an execution limit, allowing sockets and timers to be
handled with a reasonable latency while still doing all the work needed.
Other loops don't have designated work queues (we may add them later).
All these, sockets, timers and events, are tightly bound to some domain.
Sockets typically belong to a protocol, timers and events to a protocol or table.
With the modular structure of BIRD, the easy and convenient approach to multithreading
is to get more IO loops, each bound to a specific domain, running their events, timers and
socket hooks in their threads.
## Message passing and loop entering
To request some work in another module, the standard way is to pass a message.
For this purpose, events have been modified to be sent to a given loop without
locking that loop's domain. In fact, every event queue has its own lock with a
low priority, allowing to pass messages from almost any part of BIRD, and also
an assigned loop which executes the events enqueued. When a message is passed
to a queue executed by another loop, that target loop must be woken up so we
must know what loop to wake up to avoid unnecessary delays. Then the target
loop opens its mailbox and processes the task in its context.
The other way is a direct access of another domain. This approach blocks the
appropriate loop from doing anything and we call it *entering a birdloop* to
remember that the task must be fast and *leave the birdloop* as soon as possible.
Route import is done via direct access from protocols to tables; in large
setups with fast filters, this is a major point of contention (after filters
have been parallelized) and will be addressed in future optimization efforts.
Reconfiguration and interface updates also use direct access; more on that later.
In general, this approach should be avoided unless there are good reasons to use it.
Even though direct access is bad, sending lots of messages may be even worse.
Imagine one thousand post(wo)men, coming one by one every minute, ringing your
doorbell and delivering one letter each to you. Horrible! Asynchronous message
passing works exactly this way. After queuing the message, the source sends a
byte to a pipe to wakeup the target loop to process the task. We could also
periodically poll for messages instead of waking up the targets, yet it would
add quite a lot of latency which we also don't like.
Messages in BIRD don't typically suffer from the problem of amount and the
overhead is negligible compared to the overall CPU consumption. With one notable
exception: route import/export.
### Route export message passing
If we had to send a ping for every route we import to every exporting channel,
we'd spend more time pinging than doing anything else. Been there, seen
those unbelievable 80%-like figures in Perf output. Never more.
Route update is quite a complicated process. BIRD must handle large-scale
configurations with lots of importers and exporters. Therefore, a
triple-indirect delayed route announcement is employed:
1. First, when a channel imports a route by entering a loop, it sends an event
to its own loop (no ping needed in such case). This operation is idempotent,
thus for several routes in a row, only one event is enqueued. This reduces
several route import announcements (even hundreds in case of massive BGP
withdrawals) to one single event.
2. When the channel is done importing (or at least takes a coffee break and
checks its mailbox), the scheduled event in its own loop is run, sending
another event to the table's loop, saying basically *"Hey, table, I've just
imported something."*. This event is also idempotent and further reduces
route import announcements from multiple sources to one single event.
3. The table's announcement event is then executed from its loop, enqueuing export
events for all connected channels, finally initiating route exports. As we
already know, imports are done by direct access, therefore if protocols keep
importing, export announcements are slowed down.
4. The actual data on what has been updated is stored in a table journal. This
peculiar technique is used only for informing the exporting channels that
*"there is something to do"*.
This may seem overly complicated, yet it should work and it seems to work. In
case of low load, all these notifications just come through smoothly. In case
of high load, it's common that multiple updates come for the same destination.
Delaying the exports allows for the updates to settle down and export just the
final result, reducing CPU load and export traffic.
## Cork
Route propagation is involved in yet another problem which has to be addressed.
In the old versions with synchronous route propagation, all the buffering
happened after exporting routes to BGP. When a packet arrived, all the work was
done in BGP receive hook parsing, importing into a table, running all the
filters and possibly sending to the peers. No more routes until the previous
was done. This self-regulating mechanism doesn't work any more.
Route table import now returns immediately after inserting the route into a
table, creating a buffer there. These buffers have to be processed by other protocols'
export events. In large-scale configurations, one route import has to be
processed by hundreds, even thousands of exports. Unlimited imports are a major
cause of buffer bloating. This is even worse in configurations with pipes,
as these multiply the exports by propagating them all the way down to other
tables, eventually eating about twice the amount of memory than the single-threaded version.
There is therefore a cork to make this stop. Every table is checking how many
exports it has pending, and when adding a new export to the queue, it may request
a cork, saying simply "please stop the flow for a while". When the export buffer
size is reduced low enough, the table uncorks.
On the other side, there are events and sockets with a cork assigned. When
trying to enqueue an event and the cork is applied, the event is instead put
into the cork's queue and released only when the cork is released. In case of
sockets, when read is indicated or when `poll` arguments are recalculated,
the corked socket is simply not checked for received packets, effectively
keeping them in the TCP queue and slowing down the flow until cork is released.
The cork implementation is quite crude and rough and fragile. It may get some
rework while stabilizing the multi-threaded version of BIRD or we may even
completely drop it for some better mechanism. One of these candidates is this
kind of API:
* (table to protocol) please do not import
* (table to protocol) you may resume imports
* (protocol to table) not processing any exports
* (protocol to table) resuming export processing
Anyway, cork works as intended in most cases at least for now.
*It's a long road to the version 2.1. This series of texts should document what
is changing, why we do it and how. The
[previous chapter](https://en.blog.nic.cz/2021/06/14/bird-journey-to-threads-chapter-2-asynchronous-route-export/)
shows how the route export had to change to allow parallel execution. In the next chapter, some memory management
details are to be explained together with the reasons why memory management matters. Stay tuned!*

View File

@ -0,0 +1,153 @@
# BIRD Journey to Threads. Chapter 3½: Route server performance
All the work on multithreading shall be justified by performance improvements.
This chapter tries to compare times reached by version 3.0-alpha0 and 2.0.8,
showing some data and thinking about them.
BIRD is a fast, robust and memory-efficient routing daemon designed and
implemented at the end of 20th century. We're doing a significant amount of
BIRD's internal structure changes to make it run in multiple threads in parallel.
## Testing setup
There are two machines in one rack. One of these simulates the peers of
a route server, the other runs BIRD in a route server configuration. First, the
peers are launched, then the route server is started and one of the peers
measures the convergence time until routes are fully propagated. Other peers
drop all incoming routes.
There are four configurations. *Single* where all BGPs are directly
connected to the main table, *Multi* where every BGP has its own table and
filters are done on pipes between them, and finally *Imex* and *Mulimex* which are
effectively *Single* and *Multi* where all BGPs have also their auxiliary
import and export tables enabled.
All of these use the same short dummy filter for route import to provide a
consistent load. This filter includes no meaningful logic, it's just some dummy
data to run the CPU with no memory contention. Real filters also do not suffer from
memory contention, with an exception of ROA checks. Optimization of ROA is a
task for another day.
There is also other stuff in BIRD waiting for performance assessment. As the
(by far) most demanding setup of BIRD is route server in IXP, we chose to
optimize and measure BGP and filters first.
Hardware used for testing is Intel(R) Xeon(R) CPU E5-2630 v3 @ 2.40GHz with 8
physical cores, two hyperthreads on each. Memory is 32 GB RAM.
## Test parameters and statistics
BIRD setup may scale on two major axes. Number of peers and number of routes /
destinations. *(There are more axes, e.g.: complexity of filters, routes /
destinations ratio, topology size in IGP)*
Scaling the test on route count is easy, just by adding more routes to the
testing peers. Currently, the largest test data I feed BIRD with is about 2M
routes for around 800K destinations, due to memory limitations. The routes /
destinations ratio is around 2.5 in this testing setup, trying to get close to
real-world routing servers.[^1]
[^1]: BIRD can handle much more in real life, the actual software limit is currently
a 32-bit unsigned route counter in the table structure. Hardware capabilities
are already there and checking how BIRD handles more than 4G routes is
certainly going to be a real thing soon.
Scaling the test on peer count is easy, until you get to higher numbers. When I
was setting up the test, I configured one Linux network namespace for each peer,
connecting them by virtual links to a bridge and by a GRE tunnel to the other
machine. This works well for 10 peers but setting up and removing 1000 network
namespaces takes more than 15 minutes in total. (Note to myself: try this with
a newer Linux kernel than 4.9.)
Another problem of test scaling is bandwidth. With 10 peers, everything is OK.
With 1000 peers, version 3.0-alpha0 does more than 600 Mbps traffic in peak
which is just about the bandwidth of the whole setup. I'm planning to design a
better test setup with less chokepoints in future.
## Hypothesis
There are two versions subjected to the test. One of these is `2.0.8` as an
initial testpoint. The other is version 3.0-alpha0, named `bgp` as parallel BGP
is implemented there.
The major problem of large-scale BIRD setups is convergence time on startup. We
assume that a multithreaded version should reduce the overall convergence time,
at most by a factor equal to number of cores involved. Here we have 16
hyperthreads, in theory we should reduce the times up to 16-fold, yet this is
almost impossible as a non-negligible amount of time is spent in bottleneck
code like best route selection or some cleanup routines. This has become a
bottleneck by making other parts parallel.
## Data
Four charts are included here, one for each setup. All axes have a
logarithmic scale. The route count on X scale is the total route count in
tested BIRD, different color shades belong to different versions and peer
counts. Time is plotted on Y scale.
Raw data is available in Git, as well as the chart generator. Strange results
caused by testbed bugs are already omitted.
There is also a line drawn on a 2-second mark. Convergence is checked by
periodically requesting `birdc show route count` on one of the peers and BGP
peers have also a 1-second connect delay time (default is 5 seconds). All
measured times shorter than 2 seconds are highly unreliable.
![Plotted data for Single](03b_stats_2d_single.png)
[Plotted data for Single in PDF](03b_stats_2d_single.pdf)
Single-table setup has times reduced to about 1/8 when comparing 3.0-alpha0 to
2.0.8. Speedup for 10-peer setup is slightly worse than expected and there is
still some room for improvement, yet 8-fold speedup on 8 physical cores and 16
hyperthreads is good for me now.
The most demanding case with 2M routes and 1k peers failed. On 2.0.8, my
configuration converges after almost two hours on 2.0.8, with the speed of
route processing steadily decreasing until only several routes per second are
done. Version 3.0-alpha0 is memory-bloating for some non-obvious reason and
couldn't fit into 32G RAM. There is definitely some work ahead to stabilize
BIRD behavior with extreme setups.
![Plotted data for Multi](03b_stats_2d_multi.png)
[Plotted data for Multi in PDF](03b_stats_2d_multi.pdf)
Multi-table setup got the same speedup as single-table setup, no big
surprise. Largest cases were not tested at all as they don't fit well into 32G
RAM even with 2.0.8.
![Plotted data for Imex](03b_stats_2d_imex.png)
[Plotted data for Imex in PDF](03b_stats_2d_imex.pdf)
![Plotted data for Mulimex](03b_stats_2d_mulimex.png)
[Plotted data for Mulimex in PDF](03b_stats_2d_mulimex.pdf)
Setups with import / export tables are also sped up by a factor
about 6-8. Data on largest setups (2M routes) are showing some strangely
ineffective behaviour. Considering that both single-table and multi-table
setups yield similar performance data, there is probably some unwanted
inefficiency in the auxiliary table code.
## Conclusion
BIRD 3.0-alpha0 is a good version for preliminary testing in IXPs. There is
some speedup in every testcase and code stability is enough to handle typical
use cases. Some test scenarios went out of available memory and there is
definitely a lot of work to stabilize this, yet for now it makes no sense to
postpone this alpha version any more.
We don't recommend upgrading a production machine to this version
yet, anyway if you have a test setup, getting version 3.0-alpha0 there and
reporting bugs is much welcome.
Notice: Multithreaded BIRD, at least in version 3.0-alpha0, doesn't limit its number of
threads. It will spawn at least one thread per every BGP, RPKI and Pipe
protocol, one thread per every routing table (including auxiliary tables) and
possibly several more. It's up to the machine administrator to setup a limit on
CPU core usage by BIRD. When running with many threads and protocols, you may
need also to raise the filedescriptor limit: BIRD uses 2 filedescriptors per
every thread for internal messaging.
*It's a long road to the version 3. By releasing this alpha version, we'd like
to encourage every user to try this preview. If you want to know more about
what is being done and why, you may also check the full
[blogpost series about multithreaded BIRD](https://en.blog.nic.cz/2021/03/15/bird-journey-to-threads-chapter-0-the-reason-why/). Thank you for your ongoing support!*

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 160 KiB

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 149 KiB

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 147 KiB

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 161 KiB

View File

@ -0,0 +1,223 @@
# BIRD Journey to Threads. Chapter 4: Memory and other resource management.
BIRD is mostly a large specialized database engine, storing mega/gigabytes of
Internet routing data in memory. To keep accounts of every byte of allocated data,
BIRD has its own resource management system which must be adapted to the
multithreaded environment. The resource system has not changed much, yet it
deserves a short chapter.
BIRD is a fast, robust and memory-efficient routing daemon designed and
implemented at the end of 20th century. We're doing a significant amount of
BIRD's internal structure changes to make it run in multiple threads in parallel.
## Resources
Inside BIRD, (almost) every piece of allocated memory is a resource. To achieve this,
every such memory block includes a generic `struct resource` header. The node
is enlisted inside a linked list of a *resource pool* (see below), the class
pointer defines basic operations done on resources.
```
typedef struct resource {
node n; /* Inside resource pool */
struct resclass *class; /* Resource class */
} resource;
struct resclass {
char *name; /* Resource class name */
unsigned size; /* Standard size of single resource */
void (*free)(resource *); /* Freeing function */
void (*dump)(resource *); /* Dump to debug output */
resource *(*lookup)(resource *, unsigned long); /* Look up address (only for debugging) */
struct resmem (*memsize)(resource *); /* Return size of memory used by the resource, may be NULL */
};
void *ralloc(pool *, struct resclass *);
```
Resource cycle begins with an allocation of a resource. To do that, you should call `ralloc()`,
passing the parent pool and the appropriate resource class as arguments. BIRD
allocates a memory block of size given by the given class member `size`.
Beginning of the block is reserved for `struct resource` itself and initialized
by the given arguments. Therefore, you may sometimes see an idiom where a structure
has a first member `struct resource r;`, indicating that this item should be
allocated as a resource.
The counterpart is resource freeing. This may be implicit (by resource pool
freeing) or explicit (by `rfree()`). In both cases, the `free()` function of
the appropriate class is called to cleanup the resource before final freeing.
To account for `dump` and `memsize` calls, there are CLI commands `dump
resources` and `show memory`, using these to dump resources or show memory
usage as perceived by BIRD.
The last, `lookup`, is quite an obsolete way to identify a specific pointer
from a debug interface. You may call `rlookup(pointer)` and BIRD should dump
that resource to the debug output. This mechanism is probably incomplete as no
developer uses it actively for debugging.
Resources can be also moved between pools by `rmove` when needed.
## Resource pools
The first internal resource class is a recursive resource a resource pool. In
the singlethreaded version, this is just a simple structure:
```
struct pool {
resource r;
list inside;
struct birdloop *loop; /* In multithreaded version only */
const char *name;
};
```
Resource pools are used for grouping resources together. There are pools everywhere
and it is a common idiom inside BIRD to just `rfree` the appropriate pool when
e.g. a protocol or table is going down. Everything left there is cleaned up.
There are anyway several classes which must be freed with care. In the
singlethreaded version, the *slab* allocator (see below) must be empty before
it may be freed and this is kept to the multithreaded version while other
restrictions have been added.
There is also a global pool, `root_pool`, containing every single resource BIRD
knows about, either directly or via another resource pool.
### Thread safety in resource pools
In the multithreaded version, every resource pool is bound to a specific IO
loop and therefore includes an IO loop pointer. This is important for allocations
as the resource list inside the pool is thread-unsafe. All pool operations
therefore require the IO loop to be entered to do anything with them, if possible.
(In case of `rfree`, the pool data structure is not accessed at all so no
assert is possible. We're currently relying on the caller to ensure proper locking.
In future, this may change.)
Each IO loop also has its base resource pool for its allocations. All pools
inside the IO loop pool must belong to the same loop or to a loop with a
subordinate lock (see the previous chapter for lock ordering). If there is a
need for multiple IO loops to access one shared data structure, it must be
locked by another lock and allocated in such a way that is independent on these
accessor loops.
The pool structure should follow the locking order. Any pool should belong to
either the same loop as its parent or its loop lock should be after its parent
loop lock in the locking order. This is not enforced explicitly, yet it is
virtually impossible to write some working code violating this recommendation.
### Resource pools in the wilderness
Root pool contains (among others):
* route attributes and sources
* routing tables
* protocols
* interfaces
* configuration data
Each table has its IO loop and uses the loop base pool for allocations.
The same holds for protocols. Each protocol has its pool; it is either its IO
loop base pool or an ordinary pool bound to main loop.
## Memory allocators
BIRD stores data in memory blocks allocated by several allocators. There are 3
of them: simple memory blocks, linear pools and slabs.
### Simple memory block
When just a chunk of memory is needed, `mb_alloc()` or `mb_allocz()` is used
to get it. The first with `malloc()` semantics, the other is also zeroed.
There is also `mb_realloc()` available, `mb_free()` to explicitly free such a
memory and `mb_move()` to move that memory to another pool.
Simple memory blocks consume a fixed amount of overhead memory (32 bytes on
systems with 64-bit pointers) so they are suitable mostly for big chunks,
taking advantage of the default *stdlib* allocator which is used by this
allocation strategy. There are anyway some parts of BIRD (in all versions)
where this allocator is used for little blocks. This will be fixed some day.
### Linear pools
Sometimes, memory is allocated temporarily. When the data may just sit on
stack, we put it there. Anyway, many tasks need more structured execution where
stack allocation is incovenient or even impossible (e.g. when callbacks from
parsers are involved). For such a case, a *linpool* is the best choice.
This data structure allocates memory blocks of requested size with negligible
overhead in functions `lp_alloc()` (uninitialized) or `lp_allocz()` (zeroed).
There is anyway no `realloc` and no `free` call; to have a larger chunk, you
need to allocate another block. All this memory is freed at once by `lp_flush()`
when it is no longer needed.
You may see linpools in parsers (BGP, Linux netlink, config) or in filters.
In the multithreaded version, linpools have received an update, allocating
memory pages directly by `mmap()` instead of calling `malloc()`. More on memory
pages below.
### Slabs
To allocate lots of same-sized objects, a [slab allocator](https://en.wikipedia.org/wiki/Slab_allocation)
is an ideal choice. In versions until 2.0.8, our slab allocator used blocks
allocated by `malloc()`, every object included a *slab head* pointer and free objects
were linked into a single-linked list. This led to memory inefficiency and to
contra-intuitive behavior where a use-after-free bug could do lots of damage
before finally crashing.
Versions from 2.0.9, and also all the multithreaded versions, are coming with
slabs using directly allocated memory pages and usage bitmaps instead of
single-linking the free objects. This approach however relies on the fact that
pointers returned by `mmap()` are always divisible by page size. Freeing of a
slab object involves zeroing (mostly) 13 least significant bits of its pointer
to get the page pointer where the slab head resides.
This update helps with memory consumption by about 5% compared to previous
versions; exact numbers depend on the usage pattern.
## Raw memory pages
Until 2.0.8 (incl.), BIRD allocated all memory by `malloc()`. This method is
suitable for lots of use cases, yet when gigabytes of memory should be
allocated by little pieces, BIRD uses its internal allocators to keep track
about everything. This brings some ineffectivity as stdlib allocator has its
own overhead and doesn't allocate aligned memory unless asked for.
Slabs and linear pools are backed by blocks of memory of kilobyte sizes. As a
typical memory page size is 4 kB, it is a logical step to drop stdlib
allocation from these allocators and to use `mmap()` directly. This however has
some drawbacks, most notably the need of a syscall for every memory mapping and
unmapping. For allocations, this is not much a case and the syscall time is typically
negligible compared to computation time. When freeing memory, this is much
worse as BIRD sometimes frees gigabytes of data in a blink of eye.
To minimize the needed number of syscalls, there is a per-thread page cache,
keeping pages for future use:
* When a new page is requested, first the page cache is tried.
* When a page is freed, the per-thread page cache keeps it without telling the kernel.
* When the number of pages in any per-thread page cache leaves a pre-defined range,
a cleanup routine is scheduled to free excessive pages or request more in advance.
This method gives the multithreaded BIRD not only faster memory management than
ever before but also almost immediate shutdown times as the cleanup routine is
not scheduled on shutdown at all.
## Other resources
Some objects are not only a piece of memory; notable items are sockets, owning
the underlying mechanism of I/O, and *object locks*, owning *the right to use a
specific I/O*. This ensures that collisions on e.g. TCP port numbers and
addresses are resolved in a predictable way.
All these resources should be used with the same locking principles as the
memory blocks. There aren't many checks inside BIRD code to ensure that yet,
nevertheless violating this recommendation may lead to multiple-access issues.
*It's still a long road to the version 2.1. This series of texts should document
what is needed to be changed, why we do it and how. The
[previous chapter](TODO)
showed the locking system and how the parallel execution is done.
The next chapter will cover a bit more detailed explanation about route sources
and route attributes and how lockless data structures are employed there. Stay tuned!*

29
doc/threads/Makefile Normal file
View File

@ -0,0 +1,29 @@
SUFFICES := .pdf -wordpress.html
CHAPTERS := 00_the_name_of_the_game 01_the_route_and_its_attributes 02_asynchronous_export 03_coroutines 03b_performance
all: $(foreach ch,$(CHAPTERS),$(addprefix $(ch),$(SUFFICES)))
00_the_name_of_the_game.pdf: 00_filter_structure.png
%.pdf: %.md
pandoc -f markdown -t latex -o $@ $<
%.html: %.md
pandoc -f markdown -t html5 -o $@ $<
%-wordpress.html: %.html Makefile
sed -r 's#</p>#\n#g; s#<p>##g; s#<(/?)code>#<\1tt>#g; s#<pre><tt>#<code>#g; s#</tt></pre>#</code>#g; s#</?figure>##g; s#<figcaption>#<p style="text-align: center">#; s#</figcaption>#</p>#; ' $< > $@
stats-%.csv: stats.csv stats-filter.pl
perl stats-filter.pl $< $* > $@
STATS_VARIANTS := multi imex mulimex single
stats-all: $(patsubst %,stats-%.csv,$(STATS_VARIANTS))
stats-2d-%.pdf: stats.csv stats-filter-2d.pl
perl stats-filter-2d.pl $< $* $@
stats-2d-%.png: stats-2d-%.pdf
gs -dBATCH -dNOPAUSE -sDEVICE=pngalpha -sOutputFile=$@ -r300 $<
stats-all-2d: $(foreach suf,pdf png,$(patsubst %,stats-2d-%.$(suf),$(STATS_VARIANTS)))

View File

@ -0,0 +1,41 @@
set datafile columnheaders
set datafile separator ";"
#set isosample 15
set dgrid3d 8,8
set logscale
set view 80,15,1,1
set autoscale xy
#set pm3d
set term pdfcairo size 20cm,15cm
set xlabel "TOTAL ROUTES" offset 0,-1.5
set xrange [10000:320000]
set xtics offset 0,-0.5
set xtics (10000,15000,30000,50000,100000,150000,300000)
set ylabel "PEERS"
#set yrange [10:320]
#set ytics (10,15,30,50,100,150,300)
set yrange [10:320]
set ytics (10,15,30,50,100,150,300)
set zrange [1:2000]
set xyplane at 1
set border 895
#set grid ztics lt 20
set output ARG1 . "-" . ARG4 . ".pdf"
splot \
ARG1 . ".csv" \
using "TOTAL_ROUTES":"PEERS":ARG2."/".ARG4 \
with lines \
title ARG2."/".ARG4, \
"" \
using "TOTAL_ROUTES":"PEERS":ARG3."/".ARG4 \
with lines \
title ARG3."/".ARG4

View File

@ -0,0 +1,156 @@
#!/usr/bin/perl
use common::sense;
use Data::Dump;
use List::Util;
my @GROUP_BY = qw/VERSION PEERS TOTAL_ROUTES/;
my @VALUES = qw/TIMEDIF/;
my ($FILE, $TYPE, $OUTPUT) = @ARGV;
### Load data ###
my %data;
open F, "<", $FILE or die $!;
my @header = split /;/, <F>;
chomp @header;
my $line = undef;
while ($line = <F>)
{
chomp $line;
$line =~ s/;;(.*);;/;;\1;/;
$line =~ s/v2\.0\.8-1[89][^;]+/bgp/;
$line =~ s/v2\.0\.8-[^;]+/sark/ and next;
$line =~ s/master;/v2.0.8;/;
my %row;
@row{@header} = split /;/, $line;
push @{$data{join ";", @row{@GROUP_BY}}}, { %row } if $row{TYPE} eq $TYPE;
}
### Do statistics ###
sub avg {
return List::Util::sum(@_) / @_;
}
sub getinbetween {
my $index = shift;
my @list = @_;
return $list[int $index] if $index == int $index;
my $lower = $list[int $index];
my $upper = $list[1 + int $index];
my $frac = $index - int $index;
return ($lower * (1 - $frac) + $upper * $frac);
}
sub stats {
my $avg = shift;
return [0, 0, 0, 0, 0] if @_ <= 1;
# my $stdev = sqrt(List::Util::sum(map { ($avg - $_)**2 } @_) / (@_-1));
my @sorted = sort { $a <=> $b } @_;
my $count = scalar @sorted;
return [
getinbetween(($count-1) * 0.25, @sorted),
$sorted[0],
$sorted[$count-1],
getinbetween(($count-1) * 0.75, @sorted),
];
}
my %output;
my %vers;
my %peers;
my %stplot;
STATS:
foreach my $k (keys %data)
{
my %cols = map { my $vk = $_; $vk => [ map { $_->{$vk} } @{$data{$k}} ]; } @VALUES;
my %avg = map { $_ => avg(@{$cols{$_}})} @VALUES;
my %stloc = map { $_ => stats($avg{$_}, @{$cols{$_}})} @VALUES;
$vers{$data{$k}[0]{VERSION}}++;
$peers{$data{$k}[0]{PEERS}}++;
$output{$data{$k}[0]{VERSION}}{$data{$k}[0]{PEERS}}{$data{$k}[0]{TOTAL_ROUTES}} = { %avg };
$stplot{$data{$k}[0]{VERSION}}{$data{$k}[0]{PEERS}}{$data{$k}[0]{TOTAL_ROUTES}} = { %stloc };
}
#(3 == scalar %vers) and $vers{sark} and $vers{bgp} and $vers{"v2.0.8"} or die "vers size is " . (scalar %vers) . ", items ", join ", ", keys %vers;
(2 == scalar %vers) and $vers{bgp} and $vers{"v2.0.8"} or die "vers size is " . (scalar %vers) . ", items ", join ", ", keys %vers;
### Export the data ###
open PLOT, "|-", "gnuplot" or die $!;
say PLOT <<EOF;
set logscale
set term pdfcairo size 20cm,15cm
set xlabel "Total number of routes" offset 0,-1.5
set xrange [10000:3000000]
set xtics offset 0,-0.5
#set xtics (10000,15000,30000,50000,100000,150000,300000,500000,1000000)
set ylabel "Time to converge (s)"
set yrange [0.5:10800]
set grid
set key left top
set output "$OUTPUT"
EOF
my @colors = (
[ 1, 0.9, 0.3 ],
[ 0.7, 0, 0 ],
# [ 0.6, 1, 0.3 ],
# [ 0, 0.7, 0 ],
[ 0, 0.7, 1 ],
[ 0.3, 0.3, 1 ],
);
my $steps = (scalar %peers) - 1;
my @plot_data;
foreach my $v (sort keys %vers) {
my $color = shift @colors;
my $endcolor = shift @colors;
my $stepcolor = [ map +( ($endcolor->[$_] - $color->[$_]) / $steps ), (0, 1, 2) ];
foreach my $p (sort { int $a <=> int $b } keys %peers) {
my $vnodot = $v; $vnodot =~ s/\.//g;
say PLOT "\$data_${vnodot}_${p} << EOD";
foreach my $tr (sort { int $a <=> int $b } keys %{$output{$v}{$p}}) {
say PLOT "$tr $output{$v}{$p}{$tr}{TIMEDIF}";
}
say PLOT "EOD";
say PLOT "\$data_${vnodot}_${p}_stats << EOD";
foreach my $tr (sort { int $a <=> int $b } keys %{$output{$v}{$p}}) {
say PLOT join " ", ( $tr, @{$stplot{$v}{$p}{$tr}{TIMEDIF}} );
}
say PLOT "EOD";
my $colorstr = sprintf "linecolor rgbcolor \"#%02x%02x%02x\"", map +( int($color->[$_] * 255 + 0.5)), (0, 1, 2);
push @plot_data, "\$data_${vnodot}_${p} using 1:2 with lines $colorstr linewidth 2 title \"$v, $p peers\"";
push @plot_data, "\$data_${vnodot}_${p}_stats with candlesticks $colorstr linewidth 2 notitle \"\"";
$color = [ map +( $color->[$_] + $stepcolor->[$_] ), (0, 1, 2) ];
}
}
push @plot_data, "2 with lines lt 1 dashtype 2 title \"Measurement instability\"";
say PLOT "plot ", join ", ", @plot_data;
close PLOT;

View File

@ -0,0 +1,84 @@
#!/usr/bin/perl
use common::sense;
use Data::Dump;
use List::Util;
my @GROUP_BY = qw/VERSION PEERS TOTAL_ROUTES/;
my @VALUES = qw/RSS SZ VSZ TIMEDIF/;
my ($FILE, $TYPE) = @ARGV;
### Load data ###
my %data;
open F, "<", $FILE or die $!;
my @header = split /;/, <F>;
chomp @header;
my $line = undef;
while ($line = <F>)
{
chomp $line;
my %row;
@row{@header} = split /;/, $line;
push @{$data{join ";", @row{@GROUP_BY}}}, { %row } if $row{TYPE} eq $TYPE;
}
### Do statistics ###
sub avg {
return List::Util::sum(@_) / @_;
}
sub stdev {
my $avg = shift;
return 0 if @_ <= 1;
return sqrt(List::Util::sum(map { ($avg - $_)**2 } @_) / (@_-1));
}
my %output;
my %vers;
STATS:
foreach my $k (keys %data)
{
my %cols = map { my $vk = $_; $vk => [ map { $_->{$vk} } @{$data{$k}} ]; } @VALUES;
my %avg = map { $_ => avg(@{$cols{$_}})} @VALUES;
my %stdev = map { $_ => stdev($avg{$_}, @{$cols{$_}})} @VALUES;
foreach my $v (@VALUES) {
next if $stdev{$v} / $avg{$v} < 0.035;
for (my $i=0; $i<@{$cols{$v}}; $i++)
{
my $dif = $cols{$v}[$i] - $avg{$v};
next if $dif < $stdev{$v} * 2 and $dif > $stdev{$v} * (-2);
=cut
printf "Removing an outlier for %s/%s: avg=%f, stdev=%f, variance=%.1f%%, val=%f, valratio=%.1f%%\n",
$k, $v, $avg{$v}, $stdev{$v}, (100 * $stdev{$v} / $avg{$v}), $cols{$v}[$i], (100 * $dif / $stdev{$v});
=cut
splice @{$data{$k}}, $i, 1, ();
redo STATS;
}
}
$vers{$data{$k}[0]{VERSION}}++;
$output{"$data{$k}[0]{PEERS};$data{$k}[0]{TOTAL_ROUTES}"}{$data{$k}[0]{VERSION}} = { %avg };
}
### Export the data ###
say "PEERS;TOTAL_ROUTES;" . join ";", ( map { my $vk = $_; map { "$_/$vk" } keys %vers; } @VALUES );
sub keysort {
my ($pa, $ta) = split /;/, $_[0];
my ($pb, $tb) = split /;/, $_[1];
return (int $ta) <=> (int $tb) if $pa eq $pb;
return (int $pa) <=> (int $pb);
}
foreach my $k (sort { keysort($a, $b); } keys %output)
{
say "$k;" . join ";", ( map { my $vk = $_; map { $output{$k}{$_}{$vk}; } keys %vers; } @VALUES );
}

File diff suppressed because it is too large Load Diff

2086
doc/threads/stats.csv Normal file

File diff suppressed because it is too large Load Diff

View File

@ -39,16 +39,17 @@ static inline void f_method_call_start(struct f_inst *object)
cf_error("Too many nested method calls");
struct sym_scope *scope = f_type_method_scope(object->type);
if (!scope)
if (!scope->hash.count && !scope->next)
cf_error("No methods defined for type %s", f_type_name(object->type));
/* Replacing the current symbol scope with the appropriate method scope
for the given type. */
FM = (struct f_method_scope) {
.object = object,
.main = new_config->current_scope,
.scope = {
.next = global_root_scope,
.next = scope->next,
.hash = scope->hash,
.active = 1,
.block = 1,
.readonly = 1,
},
@ -58,22 +59,16 @@ static inline void f_method_call_start(struct f_inst *object)
static inline void f_method_call_args(void)
{
ASSERT_DIE(FM.scope.active);
FM.scope.active = 0;
/* For argument parsing, we need to revert back to the standard symbol scope. */
new_config->current_scope = FM.main;
}
static inline void f_method_call_end(void)
{
ASSERT_DIE(f_method_scope_pos >= 0);
if (FM.scope.active) {
ASSERT_DIE(&FM.scope == new_config->current_scope);
if (&FM.scope == new_config->current_scope)
new_config->current_scope = FM.main;
FM.scope.active = 0;
}
f_method_scope_pos--;
}
@ -107,6 +102,10 @@ f_new_var(struct sym_scope *s)
return offset;
}
/* Macro for top-level pre-defined variables. */
#define f_predefined_variable(conf_, name_, type_) \
cf_define_symbol(conf_, cf_get_symbol(conf_, name_), SYM_VARIABLE | type_, offset, f_new_var(conf_->current_scope))
/*
* Sets and their items are during parsing handled as lists, linked
* through left ptr. The first item in a list also contains a pointer
@ -245,24 +244,6 @@ f_new_lc_item(u32 f1, u32 t1, u32 f2, u32 t2, u32 f3, u32 t3)
return t;
}
static inline struct f_inst *
f_const_empty(enum f_type t)
{
switch (t) {
case T_PATH:
case T_CLIST:
case T_ECLIST:
case T_LCLIST:
return f_new_inst(FI_CONSTANT, (struct f_val) {
.type = t,
.val.ad = &null_adata,
});
case T_ROUTE:
return f_new_inst(FI_CONSTANT, (struct f_val) { .type = T_ROUTE });
default:
return f_new_inst(FI_CONSTANT, (struct f_val) {});
}
}
/*
* Remove all new lines and doubled whitespaces
@ -322,9 +303,15 @@ static struct f_inst *
f_lval_getter(struct f_lval *lval)
{
switch (lval->type) {
case F_LVAL_CONSTANT: return f_new_inst(FI_CONSTANT, *(lval->sym->val));
case F_LVAL_VARIABLE: return f_new_inst(FI_VAR_GET, lval->sym);
case F_LVAL_SA: return f_new_inst(FI_RTA_GET, lval->rte, lval->sa);
case F_LVAL_EA: return f_new_inst(FI_EA_GET, lval->rte, lval->da);
case F_LVAL_ATTR_BIT:
{
struct f_inst *c = f_new_inst(FI_CONSTANT, (struct f_val) { .type = T_INT, .val.i = (1U << lval->fab.bit)});
return f_new_inst(FI_EQ, c, f_new_inst(FI_BITAND, f_new_inst(FI_EA_GET, lval->rte, lval->fab.class), c));
}
default: bug("Unknown lval type");
}
}
@ -333,9 +320,28 @@ static struct f_inst *
f_lval_setter(struct f_lval *lval, struct f_inst *expr)
{
switch (lval->type) {
case F_LVAL_CONSTANT: cf_error("Constant %s is read-only", lval->sym->name);
case F_LVAL_VARIABLE: return f_new_inst(FI_VAR_SET, expr, lval->sym);
case F_LVAL_SA: return f_new_inst(FI_RTA_SET, expr, lval->sa);
case F_LVAL_SA:
if (lval->sa.readonly)
cf_error( "This static attribute is read-only.");
return f_new_inst(FI_RTA_SET, expr, lval->sa);
case F_LVAL_EA: return f_new_inst(FI_EA_SET, expr, lval->da);
case F_LVAL_ATTR_BIT: return f_new_inst(FI_CONDITION, expr,
f_new_inst(FI_EA_SET,
f_new_inst(FI_BITOR,
f_new_inst(FI_CONSTANT, (struct f_val) { .type = T_INT, .val.i = (1U << lval->fab.bit)}),
f_new_inst(FI_EA_GET, lval->rte, lval->fab.class)
),
lval->fab.class),
f_new_inst(FI_EA_SET,
f_new_inst(FI_BITAND,
f_new_inst(FI_CONSTANT, (struct f_val) { .type = T_INT, .val.i = ~(1U << lval->fab.bit)}),
f_new_inst(FI_EA_GET, lval->rte, lval->fab.class)
),
lval->fab.class)
);
default: bug("Unknown lval type");
}
}
@ -354,16 +360,14 @@ assert_assign(struct f_lval *lval, struct f_inst *expr, const char *start, const
CF_DECLS
CF_KEYWORDS_EXCLUSIVE(IN)
CF_KEYWORDS(FUNCTION, PRINT, PRINTN, UNSET, RETURN,
ACCEPT, REJECT, ERROR,
INT, BOOL, IP, PREFIX, RD, PAIR, QUAD, EC, LC,
SET, STRING, BYTESTRING, BGPMASK, BGPPATH, CLIST, ECLIST, LCLIST,
IF, THEN, ELSE, CASE,
FOR, DO,
FOR, IN, DO,
TRUE, FALSE, RT, RO, UNKNOWN, GENERIC,
FROM, GW, NET, PROTO, SOURCE, SCOPE, DEST, IFNAME, IFINDEX, WEIGHT, GW_MPLS, GW_MPLS_STACK, ONLINK,
PREFERENCE,
FROM, GW, NET, PROTO, SCOPE, DEST, IFNAME, IFINDEX, WEIGHT, GW_MPLS,
ROA_CHECK,
DEFINED,
ADD, DELETE, RESET,
@ -371,14 +375,13 @@ CF_KEYWORDS(FUNCTION, PRINT, PRINTN, UNSET, RETURN,
EMPTY,
FILTER, WHERE, EVAL, ATTRIBUTE,
FROM_HEX,
BT_ASSERT, BT_TEST_SUITE, BT_CHECK_ASSIGN, BT_TEST_SAME, FORMAT)
BT_ASSERT, BT_TEST_SUITE, BT_CHECK_ASSIGN, BT_TEST_SAME, FORMAT, STACKS)
%nonassoc THEN
%nonassoc ELSE
%type <xp> cmds_int cmd_prep
%type <x> term term_bs cmd cmd_var cmds cmds_scoped constant constructor var var_list var_list_r function_call symbol_value bgp_path_expr bgp_path bgp_path_tail term_dot_method method_name_cont
%type <fda> dynamic_attr
%type <x> term term_bs cmd cmd_var cmds cmds_scoped constant constructor var var_list var_list_r function_call bgp_path_expr bgp_path bgp_path_tail term_dot_method method_name_cont
%type <fsa> static_attr
%type <f> filter where_filter
%type <fl> filter_body function_body
@ -397,10 +400,17 @@ CF_KEYWORDS(FUNCTION, PRINT, PRINTN, UNSET, RETURN,
CF_GRAMMAR
conf: FILTER STACKS expr expr ';' {
new_config->filter_vstk = $3;
new_config->filter_estk = $4;
}
;
conf: filter_def ;
filter_def:
FILTER symbol {
$2 = cf_define_symbol(new_config, $2, SYM_FILTER, filter, NULL);
cf_enter_filters();
cf_push_scope( new_config, $2 );
this_function = NULL;
} filter_body {
@ -409,6 +419,7 @@ filter_def:
$2->filter = f;
cf_pop_scope(new_config);
cf_exit_filters();
}
;
@ -419,12 +430,23 @@ filter_eval:
conf: custom_attr ;
custom_attr: ATTRIBUTE type symbol ';' {
cf_define_symbol(new_config, $3, SYM_ATTRIBUTE, attribute, ca_lookup(new_config->pool, $3->name, $2)->fda);
cf_enter_filters();
struct ea_class *ac = ea_class_find_by_name($3->name);
cf_exit_filters();
if (ac && (ac->type == $2))
ea_ref_class(new_config->pool, ac);
else
ac = ea_register_alloc(new_config->pool, (struct ea_class) {
.name = $3->name,
.type = $2,
})->class;
cf_define_symbol(new_config, $3, SYM_ATTRIBUTE, attribute, ac);
};
conf: bt_test_suite ;
bt_test_suite:
BT_TEST_SUITE '(' symbol_known ',' text ')' {
BT_TEST_SUITE '(' CF_SYM_KNOWN ',' text ')' {
cf_assert_symbol($3, SYM_FUNCTION);
struct f_bt_test_suite *t = cfg_allocz(sizeof(struct f_bt_test_suite));
t->fn = $3->function;
@ -437,7 +459,7 @@ bt_test_suite:
conf: bt_test_same ;
bt_test_same:
BT_TEST_SAME '(' symbol_known ',' symbol_known ',' NUM ')' {
BT_TEST_SAME '(' CF_SYM_KNOWN ',' CF_SYM_KNOWN ',' NUM ')' {
cf_assert_symbol($3, SYM_FUNCTION);
cf_assert_symbol($5, SYM_FUNCTION);
struct f_bt_test_suite *t = cfg_allocz(sizeof(struct f_bt_test_suite));
@ -525,11 +547,12 @@ function_type:
filter_body: function_body ;
filter:
symbol_known {
CF_SYM_KNOWN {
cf_assert_symbol($1, SYM_FILTER);
$$ = $1->filter;
}
| {
cf_enter_filters();
cf_push_scope(new_config, NULL);
this_function = NULL;
} filter_body {
@ -538,13 +561,17 @@ filter:
$$ = f;
cf_pop_scope(new_config);
cf_exit_filters();
}
;
where_filter:
WHERE term {
WHERE {
cf_enter_filters();
} term {
/* Construct 'IF term THEN { ACCEPT; } ELSE { REJECT; }' */
$$ = f_new_where($2);
$$ = f_new_where($3);
cf_exit_filters();
}
;
@ -561,6 +588,7 @@ function_def:
FUNCTION symbol {
DBG( "Beginning of function %s\n", $2->name );
this_function = cf_define_symbol(new_config, $2, SYM_FUNCTION, function, NULL);
cf_enter_filters();
cf_push_scope(new_config, this_function);
} function_args function_type {
/* Make dummy f_line for storing function prototype */
@ -584,6 +612,7 @@ function_def:
$7->return_type = this_function->function->return_type;
$2->function = $7;
cf_pop_scope(new_config);
cf_exit_filters();
}
;
@ -642,10 +671,10 @@ set_atom:
| VPN_RD { $$.type = T_RD; $$.val.ec = $1; }
| ENUM { $$.type = pair_a($1); $$.val.i = pair_b($1); }
| '(' term ')' {
$$ = cf_eval($2, T_VOID);
$$ = cf_eval_tmp($2, T_VOID);
if (!f_valid_set_type($$.type)) cf_error("Set-incompatible type");
}
| symbol_known {
| CF_SYM_KNOWN {
cf_assert_symbol($1, SYM_CONSTANT);
if (!f_valid_set_type(SYM_TYPE($1))) cf_error("%s: set-incompatible type", $1->name);
$$ = *$1->val;
@ -654,7 +683,7 @@ set_atom:
switch_atom:
NUM { $$.type = T_INT; $$.val.i = $1; }
| '(' term ')' { $$ = cf_eval($2, T_INT); }
| '(' term ')' { $$ = cf_eval_tmp($2, T_INT); }
| fipa { $$ = $1; }
| ENUM { $$.type = pair_a($1); $$.val.i = pair_b($1); }
;
@ -766,7 +795,7 @@ switch_body: /* EMPTY */ { $$ = NULL; }
;
bgp_path_expr:
symbol_value { $$ = $1; }
lvalue { $$ = f_lval_getter(&$1); }
| '(' term ')' { $$ = $2; }
;
@ -839,7 +868,7 @@ var_list: var_list_r
;
function_call:
symbol_known '(' var_list ')'
CF_SYM_KNOWN '(' var_list ')'
{
if ($1->class != SYM_FUNCTION)
cf_error("You can't call something which is not a function. Really.");
@ -848,39 +877,16 @@ function_call:
}
;
symbol_value: symbol_known
{
switch ($1->class) {
case SYM_CONSTANT_RANGE:
$$ = f_new_inst(FI_CONSTANT, *($1->val));
break;
case SYM_VARIABLE_RANGE:
$$ = f_new_inst(FI_VAR_GET, $1);
break;
case SYM_ATTRIBUTE:
$$ = f_new_inst(FI_EA_GET, f_new_inst(FI_CONSTANT, (struct f_val) { .type = T_ROUTE, .val.rte = NULL }), *$1->attribute);
break;
default:
cf_error("Can't get value of symbol %s", $1->name);
}
}
;
static_attr:
FROM { $$ = f_new_static_attr(T_IP, SA_FROM, 0); }
| GW { $$ = f_new_static_attr(T_IP, SA_GW, 0); }
GW { $$ = f_new_static_attr(T_IP, SA_GW, 0); }
| NET { $$ = f_new_static_attr(T_NET, SA_NET, 1); }
| PROTO { $$ = f_new_static_attr(T_STRING, SA_PROTO, 1); }
| SOURCE { $$ = f_new_static_attr(T_ENUM_RTS, SA_SOURCE, 1); }
| SCOPE { $$ = f_new_static_attr(T_ENUM_SCOPE, SA_SCOPE, 0); }
| DEST { $$ = f_new_static_attr(T_ENUM_RTD, SA_DEST, 0); }
| IFNAME { $$ = f_new_static_attr(T_STRING, SA_IFNAME, 0); }
| IFINDEX { $$ = f_new_static_attr(T_INT, SA_IFINDEX, 1); }
| WEIGHT { $$ = f_new_static_attr(T_INT, SA_WEIGHT, 0); }
| PREFERENCE { $$ = f_new_static_attr(T_INT, SA_PREF, 0); }
| GW_MPLS { $$ = f_new_static_attr(T_INT, SA_GW_MPLS, 0); }
| GW_MPLS_STACK { $$ = f_new_static_attr(T_CLIST, SA_GW_MPLS_STACK, 0); }
| ONLINK { $$ = f_new_static_attr(T_BOOL, SA_ONLINK, 0); }
;
term_dot_method: term '.' { f_method_call_start($1); } method_name_cont { f_method_call_end(); $$ = $4; };
@ -898,10 +904,12 @@ method_name_cont:
cf_error("Getting a route attribute from %s, need a route", f_type_name(FM.object->type));
$$ = f_new_inst(FI_RTA_GET, FM.object, $1);
}
| dynamic_attr {
| CF_SYM_KNOWN {
if ($1->class != SYM_ATTRIBUTE)
cf_error("Not a method of %s: %s", f_type_name(FM.object->type), $1->name);
if (FM.object->type != T_ROUTE)
cf_error("Getting a route attribute from %s, need a route", f_type_name(FM.object->type));
$$ = f_new_inst(FI_EA_GET, FM.object, $1);
$$ = f_new_inst(FI_EA_GET, FM.object, $1->attribute);
}
;
@ -911,6 +919,8 @@ term:
| term '-' term { $$ = f_new_inst(FI_SUBTRACT, $1, $3); }
| term '*' term { $$ = f_new_inst(FI_MULTIPLY, $1, $3); }
| term '/' term { $$ = f_new_inst(FI_DIVIDE, $1, $3); }
| term '&' term { $$ = f_new_inst(FI_BITAND, $1, $3); }
| term '|' term { $$ = f_new_inst(FI_BITOR, $1, $3); }
| term AND term { $$ = f_new_inst(FI_AND, $1, $3); }
| term OR term { $$ = f_new_inst(FI_OR, $1, $3); }
| term '=' term { $$ = f_new_inst(FI_EQ, $1, $3); }
@ -924,28 +934,25 @@ term:
| '!' term { $$ = f_new_inst(FI_NOT, $2); }
| DEFINED '(' term ')' { $$ = f_new_inst(FI_DEFINED, $3); }
| symbol_value { $$ = $1; }
| constant { $$ = $1; }
| constructor { $$ = $1; }
| static_attr { $$ = f_new_inst(FI_RTA_GET, f_new_inst(FI_CONSTANT, (struct f_val) { .type = T_ROUTE, .val.rte = NULL }), $1); }
| dynamic_attr { $$ = f_new_inst(FI_EA_GET, f_new_inst(FI_CONSTANT, (struct f_val) { .type = T_ROUTE, .val.rte = NULL }), $1); }
| lvalue { $$ = f_lval_getter(&$1); }
| term_dot_method
| '+' EMPTY '+' { $$ = f_new_inst(FI_CONSTANT, val_empty(T_PATH)); }
| '-' EMPTY '-' { $$ = f_new_inst(FI_CONSTANT, val_empty(T_CLIST)); }
| '-' '-' EMPTY '-' '-' { $$ = f_new_inst(FI_CONSTANT, val_empty(T_ECLIST)); }
| '-' '-' '-' EMPTY '-' '-' '-' { $$ = f_new_inst(FI_CONSTANT, val_empty(T_LCLIST)); }
| '+' EMPTY '+' { $$ = f_new_inst(FI_CONSTANT, f_get_empty(T_PATH)); }
| '-' EMPTY '-' { $$ = f_new_inst(FI_CONSTANT, f_get_empty(T_CLIST)); }
| '-' '-' EMPTY '-' '-' { $$ = f_new_inst(FI_CONSTANT, f_get_empty(T_ECLIST)); }
| '-' '-' '-' EMPTY '-' '-' '-' { $$ = f_new_inst(FI_CONSTANT, f_get_empty(T_LCLIST)); }
| PREPEND '(' term ',' term ')' { $$ = f_dispatch_method_x("prepend", $3->type, $3, $5); }
| ADD '(' term ',' term ')' { $$ = f_dispatch_method_x("add", $3->type, $3, $5); }
| DELETE '(' term ',' term ')' { $$ = f_dispatch_method_x("delete", $3->type, $3, $5); }
| FILTER '(' term ',' term ')' { $$ = f_dispatch_method_x("filter", $3->type, $3, $5); }
| ROA_CHECK '(' rtable ')' { $$ = f_new_inst(FI_ROA_CHECK_IMPLICIT, $3); }
| ROA_CHECK '(' rtable ',' term ',' term ')' { $$ = f_new_inst(FI_ROA_CHECK_EXPLICIT, $5, $7, $3); }
| ROA_CHECK '(' rtable ')' { $$ = f_implicit_roa_check($3); }
| ROA_CHECK '(' rtable ',' term ',' term ')' { $$ = f_new_inst(FI_ROA_CHECK, $5, $7, $3); }
| FORMAT '(' term ')' { $$ = f_new_inst(FI_FORMAT, $3); }
@ -1002,17 +1009,8 @@ cmd:
cf_pop_block_scope(new_config);
$$ = f_for_cycle($3, $6, $9);
}
| symbol_known '=' term ';' {
switch ($1->class) {
case SYM_VARIABLE_RANGE:
$$ = f_new_inst(FI_VAR_SET, $3, $1);
break;
case SYM_ATTRIBUTE:
$$ = f_new_inst(FI_EA_SET, $3, *$1->attribute);
break;
default:
cf_error("Can't assign to symbol %s", $1->name);
}
| lvalue '=' term ';' {
$$ = f_lval_setter(&$1, $3);
}
| RETURN term ';' {
DBG( "Ook, we'll return the value\n" );
@ -1030,26 +1028,13 @@ cmd:
$$ = f_new_inst(FI_RETURN, $2);
}
| dynamic_attr '=' term ';' {
$$ = f_new_inst(FI_EA_SET, $3, $1);
| UNSET '(' CF_SYM_KNOWN ')' ';' {
if ($3->class != SYM_ATTRIBUTE)
cf_error("Can't unset %s", $3->name);
if ($3->attribute->readonly)
cf_error("Attribute %s is read-only", $3->attribute->name);
$$ = f_new_inst(FI_EA_UNSET, $3->attribute);
}
| static_attr '=' term ';' {
if ($1.readonly)
cf_error( "This static attribute is read-only.");
$$ = f_new_inst(FI_RTA_SET, $3, $1);
}
| UNSET '(' dynamic_attr ')' ';' {
$$ = f_new_inst(FI_EA_UNSET, $3);
}
| UNSET '(' symbol_known ')' ';' {
switch ($3->class) {
case SYM_ATTRIBUTE:
$$ = f_new_inst(FI_EA_UNSET, *$3->attribute);
break;
default:
cf_error("Can't unset symbol %s", $3->name);
}
}
| break_command var_list_r ';' {
$$ = f_print($2, !!$2, $1);
}
@ -1082,18 +1067,20 @@ lvalue:
CF_SYM_KNOWN {
switch ($1->class)
{
case SYM_CONSTANT_RANGE:
$$ = (struct f_lval) { .type = F_LVAL_CONSTANT, .sym = $1, };
break;
case SYM_VARIABLE_RANGE:
$$ = (struct f_lval) { .type = F_LVAL_VARIABLE, .sym = $1, .rte = f_const_empty(T_ROUTE) };
$$ = (struct f_lval) { .type = F_LVAL_VARIABLE, .sym = $1, };
break;
case SYM_ATTRIBUTE:
$$ = (struct f_lval) { .type = F_LVAL_EA, .da = *($1->attribute), .rte = f_const_empty(T_ROUTE) };
$$ = (struct f_lval) { .type = F_LVAL_EA, .da = $1->attribute, .rte = f_new_inst(FI_CURRENT_ROUTE), };
break;
default:
cf_error("Variable name or custom attribute name required");
cf_error("Variable name or attribute name required");
}
}
| static_attr { $$ = (struct f_lval) { .type = F_LVAL_SA, .sa = $1, .rte = f_const_empty(T_ROUTE) }; }
| dynamic_attr { $$ = (struct f_lval) { .type = F_LVAL_EA, .da = $1, .rte = f_const_empty(T_ROUTE) }; }
| static_attr { $$ = (struct f_lval) { .type = F_LVAL_SA, .sa = $1, .rte = f_new_inst(FI_CURRENT_ROUTE), }; }
;
CF_END

View File

@ -16,10 +16,11 @@
#include "lib/unaligned.h"
#include "lib/net.h"
#include "lib/ip.h"
#include "lib/hash.h"
#include "nest/route.h"
#include "nest/protocol.h"
#include "nest/iface.h"
#include "nest/attrs.h"
#include "lib/attrs.h"
#include "conf/conf.h"
#include "filter/filter.h"
#include "filter/f-inst.h"
@ -29,6 +30,9 @@ static const char * const f_type_str[] = {
[T_VOID] = "void",
[T_NONE] = "none",
[T_OPAQUE] = "opaque byte string",
[T_IFACE] = "interface",
[T_INT] = "int",
[T_BOOL] = "bool",
[T_PAIR] = "pair",
@ -37,7 +41,6 @@ static const char * const f_type_str[] = {
[T_ENUM_RTS] = "enum rts",
[T_ENUM_BGP_ORIGIN] = "enum bgp_origin",
[T_ENUM_SCOPE] = "enum scope",
[T_ENUM_RTC] = "enum rtc",
[T_ENUM_RTD] = "enum rtd",
[T_ENUM_ROA] = "enum roa",
[T_ENUM_NETTYPE] = "enum nettype",
@ -49,6 +52,7 @@ static const char * const f_type_str[] = {
[T_STRING] = "string",
[T_BYTESTRING] = "bytestring",
[T_PATH_MASK] = "bgpmask",
[T_PATH_MASK_ITEM] = "bgpmask item",
[T_PATH] = "bgppath",
[T_CLIST] = "clist",
[T_EC] = "ec",
@ -57,24 +61,23 @@ static const char * const f_type_str[] = {
[T_LCLIST] = "lclist",
[T_RD] = "rd",
[T_ROUTE] = "route",
[T_ROUTE] = "route",
[T_ROUTES_BLOCK] = "block of routes",
[T_SET] = "set",
[T_PREFIX_SET] = "prefix set",
};
STATIC_ASSERT((1 << (8 * sizeof(btype))) == ARRAY_SIZE(f_type_str));
const char *
f_type_name(enum f_type t)
f_type_name(btype t)
{
if (t < ARRAY_SIZE(f_type_str))
return f_type_str[t] ?: "?";
if ((t == T_SET) || (t == T_PREFIX_SET))
return "set";
return "?";
return f_type_str[t] ?: "?";
}
enum f_type
f_type_element_type(enum f_type t)
btype
f_type_element_type(btype t)
{
switch(t) {
case T_PATH: return T_INT;
@ -92,14 +95,6 @@ const struct f_val f_const_empty_prefix_set = {
.val.ti = &f_const_empty_trie,
};
static struct adata *
adata_empty(struct linpool *pool, int l)
{
struct adata *res = lp_alloc(pool, sizeof(struct adata) + l);
res->length = l;
return res;
}
static void
pm_format(const struct f_path_mask *p, buffer *buf)
{
@ -213,7 +208,6 @@ val_compare(const struct f_val *v1, const struct f_val *v2)
case T_PATH:
return as_path_compare(v1->val.ad, v2->val.ad);
case T_ROUTE:
/* Fall through */
case T_ROUTES_BLOCK:
default:
return F_CMP_ERROR;
@ -306,9 +300,14 @@ val_same(const struct f_val *v1, const struct f_val *v2)
case T_PREFIX_SET:
return trie_same(v1->val.ti, v2->val.ti);
case T_ROUTE:
return v1->val.rte == v2->val.rte;
return rte_same(v1->val.rte, v2->val.rte);
case T_ROUTES_BLOCK:
return v1->val.ad == v2->val.ad;
if (v1->val.rte_block.len != v2->val.rte_block.len)
return 0;
for (uint i=0; i < v1->val.rte_block.len; i++)
if (!rte_same(v1->val.rte_block.rte[i], v2->val.rte_block.rte[i]))
return 0;
return 1;
default:
bug("Invalid type in val_same(): %x", v1->type);
}
@ -445,7 +444,7 @@ clist_filter(struct linpool *pool, const struct adata *list, const struct f_val
if (nl == list->length)
return list;
struct adata *res = adata_empty(pool, nl);
struct adata *res = lp_alloc_adata(pool, nl);
memcpy(res->data, tmp, nl);
return res;
}
@ -479,7 +478,7 @@ eclist_filter(struct linpool *pool, const struct adata *list, const struct f_val
if (nl == list->length)
return list;
struct adata *res = adata_empty(pool, nl);
struct adata *res = lp_alloc_adata(pool, nl);
memcpy(res->data, tmp, nl);
return res;
}
@ -511,7 +510,7 @@ lclist_filter(struct linpool *pool, const struct adata *list, const struct f_val
if (nl == list->length)
return list;
struct adata *res = adata_empty(pool, nl);
struct adata *res = lp_alloc_adata(pool, nl);
memcpy(res->data, tmp, nl);
return res;
}
@ -582,6 +581,79 @@ val_in_range(const struct f_val *v1, const struct f_val *v2)
return F_CMP_ERROR;
}
uint
val_hash(struct f_val *v)
{
u64 haux;
mem_hash_init(&haux);
mem_hash_mix_f_val(&haux, v);
return mem_hash_value(&haux);
}
void
mem_hash_mix_f_val(u64 *h, struct f_val *v)
{
mem_hash_mix_num(h, v->type);
#define MX(k) mem_hash_mix(h, &IT(k), sizeof IT(k));
#define IT(k) v->val.k
switch (v->type)
{
case T_VOID:
break;
case T_INT:
case T_BOOL:
case T_PAIR:
case T_QUAD:
case T_ENUM:
MX(i);
break;
case T_EC:
case T_RD:
MX(ec);
break;
case T_LC:
MX(lc);
break;
case T_IP:
MX(ip);
break;
case T_NET:
mem_hash_mix_num(h, net_hash(IT(net)));
break;
case T_STRING:
mem_hash_mix_str(h, IT(s));
break;
case T_PATH_MASK:
mem_hash_mix(h, IT(path_mask), sizeof(*IT(path_mask)) + IT(path_mask)->len * sizeof (IT(path_mask)->item));
break;
case T_PATH:
case T_CLIST:
case T_ECLIST:
case T_LCLIST:
case T_BYTESTRING:
mem_hash_mix(h, IT(ad)->data, IT(ad)->length);
break;
case T_SET:
MX(t);
break;
case T_PREFIX_SET:
MX(ti);
break;
case T_NONE:
case T_PATH_MASK_ITEM:
case T_ROUTE:
case T_ROUTES_BLOCK:
case T_OPAQUE:
case T_NEXTHOP_LIST:
case T_HOSTENTRY:
case T_IFACE:
bug("Invalid type %s in f_val hashing", f_type_name(v->type));
}
}
/*
* rte_format - format route information
*/
@ -589,26 +661,23 @@ static void
rte_format(const struct rte *rte, buffer *buf)
{
if (rte)
buffer_print(buf, "Route [%d] to %N from %s.%s via %s",
rte->src->global_id, rte->net->n.addr,
rte->sender->proto->name, rte->sender->name,
rte->src->proto->name);
buffer_print(buf, "Route [%d] to %N from %s via %s",
rte->src->global_id, rte->net,
rte->sender->req->name,
rte->src->owner->name);
else
buffer_puts(buf, "[No route]");
}
static void
rte_block_format(const struct rte *rte, buffer *buf)
rte_block_format(const struct rte_block *block, buffer *buf)
{
buffer_print(buf, "Block of routes:");
int i = 0;
while (rte)
for (uint i = 0; i < block->len; i++)
{
buffer_print(buf, "%s%d: ", i ? "; " : " ", i);
rte_format(rte, buf);
rte = rte->next;
i++;
rte_format(block->rte[i], buf);
}
}
@ -642,7 +711,7 @@ val_format(const struct f_val *v, buffer *buf)
case T_LCLIST: lc_set_format(v->val.ad, -1, buf2, 1000); buffer_print(buf, "(lclist %s)", buf2); return;
case T_PATH_MASK: pm_format(v->val.path_mask, buf); return;
case T_ROUTE: rte_format(v->val.rte, buf); return;
case T_ROUTES_BLOCK: rte_block_format(v->val.rte, buf); return;
case T_ROUTES_BLOCK: rte_block_format(&v->val.rte_block, buf); return;
default: buffer_print(buf, "[unknown type %x]", v->type); return;
}
}
@ -650,7 +719,7 @@ val_format(const struct f_val *v, buffer *buf)
char *
val_format_str(struct linpool *lp, const struct f_val *v) {
buffer b;
LOG_BUFFER_INIT(b);
STACK_BUFFER_INIT(b, 1024);
val_format(v, &b);
return lp_strdup(lp, b.start);
}
@ -667,3 +736,75 @@ val_dump(const struct f_val *v) {
val_format(v, &b);
return val_dump_buffer;
}
struct f_val *
lp_val_copy(struct linpool *lp, const struct f_val *v)
{
switch (v->type)
{
case T_VOID:
case T_BOOL:
case T_INT:
case T_IP:
case T_PAIR:
case T_QUAD:
case T_EC:
case T_LC:
case T_RD:
case T_ENUM:
case T_PATH_MASK_ITEM:
/* These aren't embedded but there is no need to copy them */
case T_SET:
case T_PREFIX_SET:
case T_PATH_MASK:
case T_IFACE:
{
struct f_val *out = lp_alloc(lp, sizeof(*out));
*out = *v;
return out;
}
case T_NET:
{
struct {
struct f_val val;
net_addr net[0];
} *out = lp_alloc(lp, sizeof(*out) + v->val.net->length);
out->val = *v;
out->val.val.net = out->net;
net_copy(out->net, v->val.net);
return &out->val;
}
case T_STRING:
{
uint len = strlen(v->val.s);
struct {
struct f_val val;
char buf[0];
} *out = lp_alloc(lp, sizeof(*out) + len + 1);
out->val = *v;
out->val.val.s = out->buf;
memcpy(out->buf, v->val.s, len+1);
return &out->val;
}
case T_PATH:
case T_CLIST:
case T_ECLIST:
case T_LCLIST:
{
struct {
struct f_val val;
struct adata ad;
} *out = lp_alloc(lp, sizeof(*out) + v->val.ad->length);
out->val = *v;
out->val.val.ad = &out->ad;
memcpy(&out->ad, v->val.ad, v->val.ad->length);
return &out->val;
}
default:
bug("Unknown type in value copy: %d", v->type);
}
}

View File

@ -11,132 +11,57 @@
#define _BIRD_FILTER_DATA_H_
#include "nest/bird.h"
#include "nest/route.h"
/* Type numbers must be in 0..0xff range */
#define T_MASK 0xff
/* Internal types */
enum f_type {
/* Nothing. Simply nothing. */
T_VOID = 0,
T_NONE = 1, /* Special hack to represent missing arguments */
/* User visible types, which fit in int */
T_INT = 0x10,
T_BOOL = 0x11,
T_PAIR = 0x12, /* Notice that pair is stored as integer: first << 16 | second */
T_QUAD = 0x13,
/* Put enumerational types in 0x30..0x3f range */
T_ENUM_LO = 0x30,
T_ENUM_HI = 0x3f,
T_ENUM_RTS = 0x30,
T_ENUM_BGP_ORIGIN = 0x31,
T_ENUM_SCOPE = 0x32,
T_ENUM_RTC = 0x33,
T_ENUM_RTD = 0x34,
T_ENUM_ROA = 0x35,
T_ENUM_NETTYPE = 0x36,
T_ENUM_RA_PREFERENCE = 0x37,
T_ENUM_AF = 0x38,
T_ENUM_MPLS_POLICY = 0x39,
/* new enums go here */
T_ENUM_EMPTY = 0x3f, /* Special hack for atomic_aggr */
#define T_ENUM T_ENUM_LO ... T_ENUM_HI
/* Bigger ones */
T_IP = 0x20,
T_NET = 0x21,
T_STRING = 0x22,
T_PATH_MASK = 0x23, /* mask for BGP path */
T_PATH = 0x24, /* BGP path */
T_CLIST = 0x25, /* Community list */
T_EC = 0x26, /* Extended community value, u64 */
T_ECLIST = 0x27, /* Extended community list */
T_LC = 0x28, /* Large community value, lcomm */
T_LCLIST = 0x29, /* Large community list */
T_RD = 0x2a, /* Route distinguisher for VPN addresses */
T_PATH_MASK_ITEM = 0x2b, /* Path mask item for path mask constructors */
T_BYTESTRING = 0x2c,
T_ROUTE = 0x78,
T_ROUTES_BLOCK = 0x79,
T_SET = 0x80,
T_PREFIX_SET = 0x81,
} PACKED;
#include "lib/type.h"
#include "nest/iface.h"
struct f_method {
struct symbol *sym;
struct f_inst *(*new_inst)(struct f_inst *obj, struct f_inst *args);
const struct f_method *next;
uint arg_num;
enum f_type args_type[];
enum btype args_type[];
};
/* Filter value; size of this affects filter memory consumption */
struct f_val {
enum f_type type; /* T_* */
union {
uint i;
u64 ec;
lcomm lc;
ip_addr ip;
const net_addr *net;
const char *s;
const struct adata *bs;
const struct f_tree *t;
const struct f_trie *ti;
const struct adata *ad;
const struct f_path_mask *path_mask;
struct f_path_mask_item pmi;
struct rte *rte;
} val;
btype type; /* T_* */
union bval_long val;
};
/* Dynamic attribute definition (eattrs) */
struct f_dynamic_attr {
u8 type; /* EA type (EAF_*) */
u8 bit; /* For bitfield accessors */
enum f_type f_type; /* Filter type */
uint ea_code; /* EA code */
uint flags;
};
#define fputip(a) ({ ip_addr *ax = falloc(sizeof(*ax)); *ax = (a); ax; })
enum f_sa_code {
SA_FROM = 1,
SA_GW,
SA_GW = 1,
SA_NET,
SA_PROTO,
SA_SOURCE,
SA_SCOPE,
SA_DEST,
SA_IFNAME,
SA_IFINDEX,
SA_WEIGHT,
SA_PREF,
SA_GW_MPLS,
SA_GW_MPLS_STACK,
SA_ONLINK,
} PACKED;
/* Static attribute definition (members of struct rta) */
struct f_static_attr {
enum f_type f_type; /* Filter type */
btype type; /* Data type */
enum f_sa_code sa_code; /* Static attribute id */
int readonly:1; /* Don't allow writing */
int readonly:1; /* Don't allow writing */
};
struct f_attr_bit {
const struct ea_class *class;
uint bit;
};
#define f_new_dynamic_attr_bit(_bit, _name) ((struct f_attr_bit) { .bit = _bit, .class = ea_class_find(_name) })
/* Filter l-value type */
enum f_lval_type {
F_LVAL_CONSTANT,
F_LVAL_VARIABLE,
F_LVAL_PREFERENCE,
F_LVAL_SA,
F_LVAL_EA,
F_LVAL_ATTR_BIT,
};
/* Filter l-value */
@ -145,8 +70,9 @@ struct f_lval {
struct f_inst *rte;
union {
struct symbol *sym;
struct f_dynamic_attr da;
const struct ea_class *da;
struct f_static_attr sa;
struct f_attr_bit fab;
};
};
@ -294,9 +220,9 @@ trie_match_next_longest_ip6(net_addr_ip6 *n, ip6_addr *found)
#define F_CMP_ERROR 999
const char *f_type_name(enum f_type t);
enum f_type f_type_element_type(enum f_type t);
struct sym_scope *f_type_method_scope(enum f_type t);
const char *f_type_name(btype t);
enum btype f_type_element_type(btype t);
struct sym_scope *f_type_method_scope(btype t);
int val_same(const struct f_val *v1, const struct f_val *v2);
int val_compare(const struct f_val *v1, const struct f_val *v2);
@ -304,6 +230,11 @@ void val_format(const struct f_val *v, buffer *buf);
char *val_format_str(struct linpool *lp, const struct f_val *v);
const char *val_dump(const struct f_val *v);
uint val_hash(struct f_val *);
void mem_hash_mix_f_val(u64 *, struct f_val *);
struct f_val *lp_val_copy(struct linpool *lp, const struct f_val *v);
static inline int val_is_ip4(const struct f_val *v)
{ return (v->type == T_IP) && ipa_is_ip4(v->val.ip); }
int val_in_range(const struct f_val *v1, const struct f_val *v2);
@ -335,25 +266,23 @@ val_is_undefined(struct f_val v)
(v.val.ad == &null_adata);
}
static inline struct f_val
val_empty(enum f_type t)
extern const struct f_val f_const_empty_prefix_set;
static inline struct f_val f_get_empty(btype t)
{
switch (t)
{
case T_PATH:
case T_CLIST:
case T_ECLIST:
case T_LCLIST:
return (struct f_val) { .type = t, .val.ad = &null_adata };
default:
return (struct f_val) { };
switch (t) {
case T_PATH:
case T_CLIST:
case T_ECLIST:
case T_LCLIST:
return (struct f_val) {
.type = t,
.val.ad = &null_adata,
};
default:
return (struct f_val) { .type = T_VOID };
}
}
extern const struct f_val f_const_empty_prefix_set;
enum filter_return f_eval(const struct f_line *expr, struct linpool *tmp_pool, struct f_val *pres);
enum filter_return f_eval(const struct f_line *expr, struct f_val *pres);
#endif

View File

@ -100,7 +100,7 @@ FID_DUMP_BODY()m4_dnl
debug("%s" $4 "\n", INDENT, $5);
]])
FID_INTERPRET_EXEC()m4_dnl
const $1 $2 = whati->$2
$1 $2 = whati->$2
FID_INTERPRET_BODY')
# Instruction arguments are needed only until linearization is done.
@ -216,6 +216,7 @@ FID_INTERPRET_BODY()')
# that was needed in the former implementation.
m4_define(LINEX, `FID_INTERPRET_EXEC()LINEX_($1)FID_INTERPRET_NEW()return $1 FID_INTERPRET_BODY()')
m4_define(LINEX_, `do if ($1) {
if (fstk->ecnt + 1 >= fstk->elen) runtime("Filter execution stack overflow");
fstk->estk[fstk->ecnt].pos = 0;
fstk->estk[fstk->ecnt].line = $1;
fstk->estk[fstk->ecnt].ventry = fstk->vcnt;
@ -257,7 +258,7 @@ FID_INTERPRET_BODY()')
# state the result and put it to the right place.
m4_define(RESULT, `RESULT_TYPE([[$1]]) RESULT_([[$1]],[[$2]],[[$3]])')
m4_define(RESULT_, `RESULT_VAL([[ (struct f_val) { .type = $1, .val.$2 = $3 } ]])')
m4_define(RESULT_VAL, `FID_HIC(, [[do { res = $1; fstk->vcnt++; } while (0)]],
m4_define(RESULT_VAL, `FID_HIC(, [[do { res = $1; f_vcnt_check_overflow(1); fstk->vcnt++; } while (0)]],
[[return fi_constant(what, $1)]])')
m4_define(RESULT_VOID, `RESULT_VAL([[ (struct f_val) { .type = T_VOID } ]])')
@ -284,7 +285,7 @@ FID_INTERPRET_BODY()')
m4_define(SYMBOL, `FID_MEMBER(struct symbol *, sym, [[strcmp(f1->sym->name, f2->sym->name) || (f1->sym->class != f2->sym->class)]], "symbol %s", item->sym->name)')
m4_define(RTC, `FID_MEMBER(struct rtable_config *, rtc, [[strcmp(f1->rtc->name, f2->rtc->name)]], "route table %s", item->rtc->name)')
m4_define(STATIC_ATTR, `FID_MEMBER(struct f_static_attr, sa, f1->sa.sa_code != f2->sa.sa_code,,)')
m4_define(DYNAMIC_ATTR, `FID_MEMBER(struct f_dynamic_attr, da, f1->da.ea_code != f2->da.ea_code,,)')
m4_define(DYNAMIC_ATTR, `FID_MEMBER(const struct ea_class *, da, f1->da != f2->da,,)')
m4_define(ACCESS_RTE, `FID_HIC(,[[do { if (!fs->rte) runtime("No route to access"); } while (0)]],NEVER_CONSTANT())')
# Method constructor block
@ -425,7 +426,7 @@ m4_undivert(112)
FID_METHOD_SCOPE_INIT()m4_dnl
[INST_METHOD_OBJECT_TYPE] = {},
FID_METHOD_REGISTER()m4_dnl
method = lp_allocz(global_root_scope_linpool, sizeof(struct f_method) + INST_METHOD_NUM_ARGS * sizeof(enum f_type));
method = lp_allocz(global_root_scope_linpool, sizeof(struct f_method) + INST_METHOD_NUM_ARGS * sizeof(enum btype));
method->new_inst = f_new_method_]]INST_NAME()[[;
method->arg_num = INST_METHOD_NUM_ARGS;
m4_undivert(113)
@ -574,7 +575,7 @@ fi_constant(struct f_inst *what, struct f_val val)
}
int
f_const_promotion_(struct f_inst *arg, enum f_type want, int update)
f_const_promotion_(struct f_inst *arg, btype want, int update)
{
if (arg->fi_code != FI_CONSTANT)
return 0;
@ -621,6 +622,8 @@ FID_WR_PUT(11)
#pragma GCC diagnostic ignored "-Woverride-init"
#endif
#pragma clang diagnostic ignored "-Winitializer-overrides"
static struct sym_scope f_type_method_scopes[] = {
FID_WR_PUT(12)
};
@ -629,20 +632,20 @@ FID_WR_PUT(12)
#pragma GCC diagnostic pop
#endif
struct sym_scope *f_type_method_scope(enum f_type t)
struct sym_scope *f_type_method_scope(enum btype t)
{
return (t < ARRAY_SIZE(f_type_method_scopes)) ? &f_type_method_scopes[t] : NULL;
}
static void
f_register_method(enum f_type t, const byte *name, struct f_method *dsc)
f_register_method(enum btype t, const byte *name, struct f_method *dsc)
{
struct sym_scope *scope = &f_type_method_scopes[t];
struct symbol *sym = cf_find_symbol_scope(scope, name);
if (!sym)
{
sym = cf_new_symbol(scope, global_root_scope_pool, global_root_scope_linpool, name);
sym = cf_root_symbol(name, scope);
sym->class = SYM_METHOD;
}
@ -651,6 +654,8 @@ f_register_method(enum f_type t, const byte *name, struct f_method *dsc)
sym->method = dsc;
}
extern struct sym_scope global_filter_scope;
void f_type_methods_register(void)
{
struct f_method *method;
@ -659,6 +664,8 @@ FID_WR_PUT(13)
for (uint i = 0; i < ARRAY_SIZE(f_type_method_scopes); i++)
f_type_method_scopes[i].readonly = 1;
f_type_method_scopes[T_ROUTE].next = &global_filter_scope;
}
/* Line dumpers */
@ -782,7 +789,7 @@ struct f_inst {
struct f_inst *next; /* Next instruction */
enum f_instruction_code fi_code; /* Instruction code */
enum f_instruction_flags flags; /* Flags, instruction-specific */
enum f_type type; /* Type of returned value, if known */
btype type; /* Type of returned value, if known */
int size; /* How many instructions are underneath */
int lineno; /* Line number */
union {

View File

@ -70,7 +70,6 @@
* m4_dnl DYNAMIC_ATTR; dynamic attribute definition
* m4_dnl RTC; route table config
* m4_dnl ACCESS_RTE; this instruction needs route
* m4_dnl ACCESS_EATTRS; this instruction needs extended attributes
*
* m4_dnl METHOD_CONSTRUCTOR(name); this instruction is in fact a method of the first argument's type; register it with the given name for that type
*
@ -244,8 +243,6 @@
* m4_dnl fpool -> the current linpool
* m4_dnl NEVER_CONSTANT-> don't generate pre-interpretation code at all
* m4_dnl ACCESS_RTE -> check that route is available, also NEVER_CONSTANT
* m4_dnl ACCESS_EATTRS -> pre-cache the eattrs; use only with ACCESS_RTE
* m4_dnl f_rta_cow(fs) -> function to call before any change to route should be done
*
* m4_dnl If you are stymied, see FI_CALL or FI_CONSTANT or just search for
* m4_dnl the mentioned macros in this file to see what is happening there in wild.
@ -304,6 +301,16 @@
if (v2.val.i == 0) runtime( "Mother told me not to divide by 0" );
RESULT(T_INT, i, v1.val.i / v2.val.i);
}
INST(FI_BITOR, 2, 1) {
ARG(1,T_INT);
ARG(2,T_INT);
RESULT(T_INT, i, v1.val.i | v2.val.i);
}
INST(FI_BITAND, 2, 1) {
ARG(1,T_INT);
ARG(2,T_INT);
RESULT(T_INT, i, v1.val.i & v2.val.i);
}
INST(FI_AND, 1, 1) {
ARG(1,T_BOOL);
ARG_TYPE_STATIC(2,T_BOOL);
@ -519,7 +526,7 @@
/* New variable is always the last on stack */
uint pos = curline.vbase + sym->offset;
fstk->vstk[pos] = val_empty(sym->class & 0xff);
fstk->vstk[pos] = f_get_empty(sym->class & 0xff);
fstk->vcnt = pos + 1;
}
@ -620,13 +627,10 @@
INST(FI_ROUTES_BLOCK_FOR_NEXT, 3, 0) {
NEVER_CONSTANT;
ARG(1, T_ROUTES_BLOCK);
if (!v2.type)
v2 = v1;
if (v2.val.rte)
if (v2.val.i < v1.val.rte_block.len)
{
v3.val.rte = v2.val.rte;
v2.val.rte = v2.val.rte->next;
v3.val.rte = v1.val.rte_block.rte[v2.val.i++];
LINE(2,0);
}
@ -646,14 +650,19 @@
ARG_ANY(1);
if (!(fs->flags & FF_SILENT))
val_format(&v1, &fs->buf);
{
if (!fs->buf.class)
log_prepare(&fs->buf, *L_INFO);
val_format(&v1, &fs->buf.buf);
}
}
INST(FI_FLUSH, 0, 0) {
NEVER_CONSTANT;
if (!(fs->flags & FF_SILENT))
/* After log_commit, the buffer is reset */
log_commit(*L_INFO, &fs->buf);
log_commit(&fs->buf);
}
INST(FI_DIE, 0, 0) {
@ -670,42 +679,56 @@
}
}
INST(FI_CURRENT_ROUTE, 0, 1) {
NEVER_CONSTANT;
ACCESS_RTE;
RESULT_TYPE(T_ROUTE);
RESULT_VAL([[(struct f_val) { .type = T_ROUTE, .val.rte = fs->rte, }]]);
}
INST(FI_RTA_GET, 1, 1) {
{
ACCESS_RTE;
ARG(1, T_ROUTE);
STATIC_ATTR;
struct rta *rta = v1.val.rte ? v1.val.rte->attrs : (*fs->rte)->attrs;
struct rte *rte = v1.val.rte;
switch (sa.sa_code)
{
case SA_FROM: RESULT(sa.f_type, ip, rta->from); break;
case SA_GW: RESULT(sa.f_type, ip, rta->nh.gw); break;
case SA_NET: RESULT(sa.f_type, net, (*fs->rte)->net->n.addr); break;
case SA_PROTO: RESULT(sa.f_type, s, (*fs->rte)->src->proto->name); break;
case SA_SOURCE: RESULT(sa.f_type, i, rta->source); break;
case SA_SCOPE: RESULT(sa.f_type, i, rta->scope); break;
case SA_DEST: RESULT(sa.f_type, i, rta->dest); break;
case SA_IFNAME: RESULT(sa.f_type, s, rta->nh.iface ? rta->nh.iface->name : ""); break;
case SA_IFINDEX: RESULT(sa.f_type, i, rta->nh.iface ? rta->nh.iface->index : 0); break;
case SA_WEIGHT: RESULT(sa.f_type, i, rta->nh.weight + 1); break;
case SA_PREF: RESULT(sa.f_type, i, rta->pref); break;
case SA_GW_MPLS: RESULT(sa.f_type, i, rta->nh.labels ? rta->nh.label[0] : MPLS_NULL); break;
case SA_GW_MPLS_STACK:
{
uint len = rta->nh.labels * sizeof(u32);
struct adata *list = falloc(sizeof(struct adata) + len);
list->length = len;
memcpy(list->data, rta->nh.label, len);
RESULT(sa.f_type, ad, list);
break;
}
case SA_ONLINK: RESULT(sa.f_type, i, rta->nh.flags & RNF_ONLINK ? 1 : 0); break;
case SA_NET: RESULT(sa.type, net, rte->net); break;
case SA_PROTO: RESULT(sa.type, s, rte->src->owner->name); break;
default:
bug("Invalid static attribute access (%u/%u)", sa.f_type, sa.sa_code);
{
struct eattr *nhea = ea_find(rte->attrs, &ea_gen_nexthop);
struct nexthop_adata *nhad = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL;
struct nexthop *nh = nhad ? &nhad->nh : NULL;
switch (sa.sa_code)
{
case SA_DEST:
RESULT(sa.type, i, nhad ?
(NEXTHOP_IS_REACHABLE(nhad) ? RTD_UNICAST : nhad->dest)
: RTD_NONE);
break;
case SA_GW:
RESULT(sa.type, ip, nh ? nh->gw : IPA_NONE);
break;
case SA_IFNAME:
RESULT(sa.type, s, (nh && nh->iface) ? nh->iface->name : "");
break;
case SA_IFINDEX:
RESULT(sa.type, i, (nh && nh->iface) ? nh->iface->index : 0);
break;
case SA_WEIGHT:
RESULT(sa.type, i, (nh ? nh->weight : 0) + 1);
break;
case SA_GW_MPLS:
RESULT(sa.type, i, (nh && nh->labels) ? nh->label[0] : MPLS_NULL);
break;
default:
bug("Invalid static attribute access (%u/%u)", sa.type, sa.sa_code);
}
}
}
}
}
@ -714,51 +737,52 @@
ACCESS_RTE;
ARG_ANY(1);
STATIC_ATTR;
ARG_TYPE(1, sa.f_type);
f_rta_cow(fs);
ARG_TYPE(1, sa.type);
{
struct rta *rta = (*fs->rte)->attrs;
union {
struct nexthop_adata nha;
struct {
struct adata ad;
struct nexthop nh;
u32 label;
};
} nha;
nha.ad = (struct adata) {
.length = sizeof (struct nexthop_adata) - sizeof (struct adata),
};
eattr *a = NULL;
switch (sa.sa_code)
{
case SA_FROM:
rta->from = v1.val.ip;
break;
case SA_GW:
{
ip_addr ip = v1.val.ip;
struct iface *ifa = ipa_is_link_local(ip) || (rta->nh.flags & RNF_ONLINK) ? rta->nh.iface : NULL;
neighbor *n = neigh_find((*fs->rte)->src->proto, ip, ifa, (rta->nh.flags & RNF_ONLINK) ? NEF_ONLINK : 0);
if (!n || (n->scope == SCOPE_HOST))
runtime( "Invalid gw address" );
rta->dest = RTD_UNICAST;
rta->nh.gw = ip;
rta->nh.iface = n->iface;
rta->nh.next = NULL;
rta->hostentry = NULL;
rta->nh.labels = 0;
}
break;
case SA_SCOPE:
rta->scope = v1.val.i;
break;
case SA_DEST:
{
int i = v1.val.i;
if ((i != RTD_BLACKHOLE) && (i != RTD_UNREACHABLE) && (i != RTD_PROHIBIT))
runtime( "Destination can be changed only to blackhole, unreachable or prohibit" );
rta->dest = i;
rta->nh.gw = IPA_NONE;
rta->nh.iface = NULL;
rta->nh.next = NULL;
rta->hostentry = NULL;
rta->nh.labels = 0;
nha.nha.dest = i;
nha.ad.length = NEXTHOP_DEST_SIZE;
break;
}
case SA_GW:
{
struct eattr *nh_ea = ea_find(fs->rte->attrs, &ea_gen_nexthop);
ip_addr ip = v1.val.ip;
struct iface *ifa = (ipa_is_link_local(ip) && nh_ea) ?
((struct nexthop_adata *) nh_ea->u.ptr)->nh.iface : NULL;
/* XXX this code supposes that every owner is a protocol XXX */
neighbor *n = neigh_find(SKIP_BACK(struct proto, sources, fs->rte->src->owner), ip, ifa, 0);
if (!n || (n->scope == SCOPE_HOST))
runtime( "Invalid gw address" );
nha.nh = (struct nexthop) {
.gw = ip,
.iface = n->iface,
};
}
break;
@ -768,12 +792,9 @@
if (!ifa)
runtime( "Invalid iface name" );
rta->dest = RTD_UNICAST;
rta->nh.gw = IPA_NONE;
rta->nh.iface = ifa;
rta->nh.next = NULL;
rta->hostentry = NULL;
rta->nh.labels = 0;
nha.nh = (struct nexthop) {
.iface = ifa,
};
}
break;
@ -782,43 +803,20 @@
if (v1.val.i >= 0x100000)
runtime( "Invalid MPLS label" );
struct eattr *nh_ea = ea_find(fs->rte->attrs, &ea_gen_nexthop);
if (!nh_ea)
runtime( "No nexthop to add a MPLS label to" );
nha.nh = ((struct nexthop_adata *) nh_ea->u.ptr)->nh;
if (v1.val.i != MPLS_NULL)
{
rta->nh.label[0] = v1.val.i;
rta->nh.labels = 1;
nha.nh.label[0] = v1.val.i;
nha.nh.labels = 1;
nha.ad.length = sizeof nha - sizeof (struct adata);
}
else
rta->nh.labels = 0;
rta->nh.labels_orig = rta->hostentry ? rta->nh.labels : 0;
}
break;
case SA_GW_MPLS_STACK:
{
int len = int_set_get_size(v1.val.ad);
u32 *l = int_set_get_data(v1.val.ad);
if (len > MPLS_MAX_LABEL_STACK)
runtime("Too many MPLS labels in stack (%d)", len);
int i;
for (i = 0; i < len; i++)
{
u32 label = l[i];
if (label >= 0x100000)
runtime("Invalid MPLS label (%u)", label);
/* Ignore rest of label stack if implicit-NULL label (3) is set */
if (label == MPLS_NULL)
break;
rta->nh.label[i] = label;
}
rta->nh.labels = i;
rta->nh.labels_orig = rta->hostentry ? i : 0;
nha.nh.labels = 0;
}
break;
@ -827,172 +825,136 @@
int i = v1.val.i;
if (i < 1 || i > 256)
runtime( "Setting weight value out of bounds" );
if (rta->dest != RTD_UNICAST)
struct eattr *nh_ea = ea_find(fs->rte->attrs, &ea_gen_nexthop);
if (!nh_ea)
runtime( "No nexthop to set weight on" );
struct nexthop_adata *nhad = (struct nexthop_adata *) nh_ea->u.ptr;
if (!NEXTHOP_IS_REACHABLE(nhad))
runtime( "Setting weight needs regular nexthop " );
struct nexthop_adata *nhax = (struct nexthop_adata *) tmp_copy_adata(&nhad->ad);
/* Set weight on all next hops */
for (struct nexthop *nh = &rta->nh; nh; nh = nh->next)
NEXTHOP_WALK(nh, nhax)
nh->weight = i - 1;
a = ea_set_attr(&fs->rte->attrs,
EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, &nhax->ad));
}
break;
case SA_PREF:
rta->pref = v1.val.i;
break;
case SA_ONLINK:
{
if (v1.val.i)
rta->nh.flags |= RNF_ONLINK;
else
rta->nh.flags &= ~RNF_ONLINK;
}
break;
default:
bug("Invalid static attribute access (%u/%u)", sa.f_type, sa.sa_code);
bug("Invalid static attribute access (%u/%u)", sa.type, sa.sa_code);
}
if (!a)
a = ea_set_attr(&fs->rte->attrs,
EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, tmp_copy_adata(&nha.ad)));
a->originated = 1;
a->fresh = 1;
}
}
INST(FI_EA_GET, 1, 1) { /* Access to extended attributes */
ACCESS_RTE;
ACCESS_EATTRS;
ARG(1, T_ROUTE);
DYNAMIC_ATTR;
RESULT_TYPE(da.f_type);
RESULT_TYPE(da->type);
{
struct ea_list *eal = v1.val.rte ? v1.val.rte->attrs->eattrs : *fs->eattrs;
eattr *e = ea_find(eal, da.ea_code);
struct f_val empty;
const eattr *e = ea_find(v1.val.rte->attrs, da->id);
if (!e) {
RESULT_VAL(val_empty(da.f_type));
break;
}
if (e)
{
ASSERT_DIE(e->type == da->type);
switch (e->type & EAF_TYPE_MASK) {
case EAF_TYPE_INT:
RESULT_(da.f_type, i, e->u.data);
break;
case EAF_TYPE_ROUTER_ID:
RESULT_(T_QUAD, i, e->u.data);
break;
case EAF_TYPE_OPAQUE:
if (da.f_type == T_ENUM_EMPTY)
RESULT_(T_ENUM_EMPTY, i, 0);
else
RESULT_(T_BYTESTRING, ad, e->u.ptr);
break;
case EAF_TYPE_IP_ADDRESS:
RESULT_(T_IP, ip, *((ip_addr *) e->u.ptr->data));
break;
case EAF_TYPE_AS_PATH:
RESULT_(T_PATH, ad, e->u.ptr);
break;
case EAF_TYPE_BITFIELD:
RESULT_(T_BOOL, i, !!(e->u.data & (1u << da.bit)));
break;
case EAF_TYPE_INT_SET:
RESULT_(T_CLIST, ad, e->u.ptr);
break;
case EAF_TYPE_EC_SET:
RESULT_(T_ECLIST, ad, e->u.ptr);
break;
case EAF_TYPE_LC_SET:
RESULT_(T_LCLIST, ad, e->u.ptr);
break;
case EAF_TYPE_STRING:
RESULT_(T_STRING, s, (const char *) e->u.ptr->data);
break;
default:
bug("Unknown dynamic attribute type");
switch (e->type) {
case T_IP:
RESULT_(T_IP, ip, *((const ip_addr *) e->u.ptr->data));
break;
case T_STRING:
RESULT_(T_STRING, s, (const char *) e->u.ptr->data);
break;
default:
RESULT_VAL([[(struct f_val) {
.type = e->type,
.val.bval = e->u,
}]]);
}
}
else if ((empty = f_get_empty(da->type)).type != T_VOID)
RESULT_VAL(empty);
else
RESULT_VOID;
}
}
INST(FI_EA_SET, 1, 0) {
ACCESS_RTE;
ACCESS_EATTRS;
ARG_ANY(1);
DYNAMIC_ATTR;
ARG_TYPE(1, da.f_type);
ARG_TYPE(1, da->type);
FID_NEW_BODY;
if (da.f_type == T_ENUM_EMPTY)
if (da->type == T_OPAQUE)
cf_error("Setting opaque attribute is not allowed");
FID_INTERPRET_BODY;
{
struct ea_list *l = lp_alloc(fs->pool, sizeof(struct ea_list) + sizeof(eattr));
struct eattr *a;
l->next = NULL;
l->flags = EALF_SORTED;
l->count = 1;
l->attrs[0].id = da.ea_code;
l->attrs[0].flags = da.flags;
l->attrs[0].type = da.type;
l->attrs[0].originated = 1;
l->attrs[0].fresh = 1;
l->attrs[0].undef = 0;
if (da->type >= EAF_TYPE__MAX)
bug("Unsupported attribute type");
switch (da.type) {
case EAF_TYPE_INT:
case EAF_TYPE_ROUTER_ID:
l->attrs[0].u.data = v1.val.i;
switch (da->type) {
case T_IFACE:
case T_OPAQUE:
runtime( "Setting opaque attribute is not allowed" );
break;
case EAF_TYPE_IP_ADDRESS:;
int len = sizeof(ip_addr);
struct adata *ad = lp_alloc(fs->pool, sizeof(struct adata) + len);
ad->length = len;
(* (ip_addr *) ad->data) = v1.val.ip;
l->attrs[0].u.ptr = ad;
case T_IP:
a = ea_set_attr(&fs->rte->attrs,
EA_LITERAL_STORE_ADATA(da, da->flags, &v1.val.ip, sizeof(ip_addr)));
break;
case EAF_TYPE_OPAQUE:
case EAF_TYPE_AS_PATH:
case EAF_TYPE_INT_SET:
case EAF_TYPE_EC_SET:
case EAF_TYPE_LC_SET:
l->attrs[0].u.ptr = v1.val.ad;
break;
case EAF_TYPE_STRING:;
struct adata *d = lp_alloc_adata(fs->pool, strlen(v1.val.s) + 1);
memcpy(d->data, v1.val.s, d->length);
l->attrs[0].u.ptr = d;
break;
case EAF_TYPE_BITFIELD:
{
/* First, we have to find the old value */
eattr *e = ea_find(*fs->eattrs, da.ea_code);
u32 data = e ? e->u.data : 0;
if (v1.val.i)
l->attrs[0].u.data = data | (1u << da.bit);
else
l->attrs[0].u.data = data & ~(1u << da.bit);
}
case T_STRING:
a = ea_set_attr(&fs->rte->attrs,
EA_LITERAL_STORE_ADATA(da, da->flags, v1.val.s, strlen(v1.val.s) + 1));
break;
default:
bug("Unknown dynamic attribute type");
a = ea_set_attr(&fs->rte->attrs,
EA_LITERAL_GENERIC(da->id, da->type, da->flags, .u = v1.val.bval));
break;
}
f_rta_cow(fs);
l->next = *fs->eattrs;
*fs->eattrs = l;
a->originated = 1;
a->fresh = 1;
}
}
INST(FI_EA_UNSET, 0, 0) {
DYNAMIC_ATTR;
ACCESS_RTE;
ACCESS_EATTRS;
f_rta_cow(fs);
ea_unset_attr(fs->eattrs, fs->pool, 1, da.ea_code);
ea_unset_attr(&fs->rte->attrs, 1, da);
}
INST(FI_DEFAULT, 2, 1) {
ARG_ANY(1);
ARG_ANY(2);
RESULT_TYPE(f_type_element_type(v2.type));
log(L_INFO "Type of arg 1 is: %d", v1.type);
if (v1.type == T_VOID)
RESULT_VAL(v2);
else
RESULT_VAL(v1);
}
/* Get length of */
@ -1191,7 +1153,7 @@
struct f_arg *b = sym->function->arg_list;
for (uint i = 1; a && b; a = a->next, b = b->next, i++)
{
enum f_type b_type = b->arg->class & 0xff;
enum btype b_type = b->arg->class & 0xff;
if (a->type && (a->type != b_type) && !f_const_promotion(a, b_type))
cf_error("Argument %u of '%s' must be %s, got %s",
@ -1228,6 +1190,7 @@
fstk->vcnt += sym->function->args;
/* Storage for local variables */
f_vcnt_check_overflow(sym->function->vars);
memset(&(fstk->vstk[fstk->vcnt]), 0, sizeof(struct f_val) * sym->function->vars);
fstk->vcnt += sym->function->vars;
}
@ -1302,14 +1265,6 @@
RESULT(T_CLIST, ad, [[ int_set_add(fpool, v1.val.ad, v2.val.i) ]]);
}
/* Hack for gw_mpls_list */
INST(FI_CLIST_ADD_INT, 2, 1) {
ARG(1, T_CLIST);
ARG(2, T_INT);
METHOD_CONSTRUCTOR("add");
RESULT(T_CLIST, ad, [[ int_set_add(fpool, v1.val.ad, v2.val.i) ]]);
}
INST(FI_CLIST_ADD_IP, 2, 1) {
ARG(1, T_CLIST);
ARG(2, T_IP);
@ -1392,14 +1347,6 @@
RESULT(T_CLIST, ad, [[ int_set_del(fpool, v1.val.ad, v2.val.i) ]]);
}
/* Hack for gw_mpls_list */
INST(FI_CLIST_DELETE_INT, 2, 1) {
ARG(1, T_CLIST);
ARG(2, T_INT);
METHOD_CONSTRUCTOR("delete");
RESULT(T_CLIST, ad, [[ int_set_del(fpool, v1.val.ad, v2.val.i) ]]);
}
INST(FI_CLIST_DELETE_IP, 2, 1) {
ARG(1, T_CLIST);
ARG(2, T_IP);
@ -1554,42 +1501,12 @@
RESULT(T_LCLIST, ad, [[ lclist_filter(fpool, v1.val.ad, &v2, 1) ]]);
}
INST(FI_ROA_CHECK_IMPLICIT, 0, 1) { /* ROA Check */
NEVER_CONSTANT;
RTC(1);
struct rtable *table = rtc->table;
ACCESS_RTE;
ACCESS_EATTRS;
const net_addr *net = (*fs->rte)->net->n.addr;
/* We ignore temporary attributes, probably not a problem here */
/* 0x02 is a value of BA_AS_PATH, we don't want to include BGP headers */
eattr *e = ea_find(*fs->eattrs, EA_CODE(PROTOCOL_BGP, 0x02));
if (!e || ((e->type & EAF_TYPE_MASK) != EAF_TYPE_AS_PATH))
runtime("Missing AS_PATH attribute");
u32 as = 0;
as_path_get_last(e->u.ptr, &as);
if (!table)
runtime("Missing ROA table");
if (table->addr_type != NET_ROA4 && table->addr_type != NET_ROA6)
runtime("Table type must be either ROA4 or ROA6");
if (table->addr_type != (net->type == NET_IP4 ? NET_ROA4 : NET_ROA6))
RESULT(T_ENUM_ROA, i, ROA_UNKNOWN); /* Prefix and table type mismatch */
else
RESULT(T_ENUM_ROA, i, [[ net_roa_check(table, net, as) ]]);
}
INST(FI_ROA_CHECK_EXPLICIT, 2, 1) { /* ROA Check */
INST(FI_ROA_CHECK, 2, 1) { /* ROA Check */
NEVER_CONSTANT;
ARG(1, T_NET);
ARG(2, T_INT);
RTC(3);
struct rtable *table = rtc->table;
rtable *table = rtc->table;
u32 as = v2.val.i;

View File

@ -37,12 +37,12 @@ static inline const char *f_instruction_name(enum f_instruction_code fi)
{ return f_instruction_name_(fi) + 3; }
int f_const_promotion_(struct f_inst *arg, enum f_type want, int update);
int f_const_promotion_(struct f_inst *arg, enum btype want, int update);
static inline int f_const_promotion(struct f_inst *arg, enum f_type want)
static inline int f_const_promotion(struct f_inst *arg, enum btype want)
{ return f_const_promotion_(arg, want, 1); }
static inline int f_try_const_promotion(struct f_inst *arg, enum f_type want)
static inline int f_try_const_promotion(struct f_inst *arg, enum btype want)
{ return f_const_promotion_(arg, want, 0); }
@ -79,10 +79,10 @@ struct filter_iterator {
void f_add_lines(const struct f_line_item *what, struct filter_iterator *fit);
#define FILTER_ITERATE_INIT(fit, filter, pool) \
#define FILTER_ITERATE_INIT(fit, line, pool) \
({ \
BUFFER_INIT((fit)->lines, (pool), 32); \
BUFFER_PUSH((fit)->lines) = (filter)->root; \
BUFFER_PUSH((fit)->lines) = (line); \
})
#define FILTER_ITERATE(fit, fi) ({ \
@ -107,31 +107,15 @@ void f_add_lines(const struct f_line_item *what, struct filter_iterator *fit);
struct filter *f_new_where(struct f_inst *);
struct f_inst *f_dispatch_method(struct symbol *sym, struct f_inst *obj, struct f_inst *args, int skip);
struct f_inst *f_dispatch_method_x(const char *name, enum f_type t, struct f_inst *obj, struct f_inst *args);
struct f_inst *f_dispatch_method_x(const char *name, enum btype t, struct f_inst *obj, struct f_inst *args);
struct f_inst *f_for_cycle(struct symbol *var, struct f_inst *term, struct f_inst *block);
struct f_inst *f_implicit_roa_check(struct rtable_config *tab);
struct f_inst *f_print(struct f_inst *vars, int flush, enum filter_return fret);
static inline struct f_dynamic_attr f_new_dynamic_attr(u8 type, enum f_type f_type, uint code) /* Type as core knows it, type as filters know it, and code of dynamic attribute */
{ return (struct f_dynamic_attr) { .type = type, .f_type = f_type, .ea_code = code }; } /* f_type currently unused; will be handy for static type checking */
static inline struct f_dynamic_attr f_new_dynamic_attr_bit(u8 bit, enum f_type f_type, uint code) /* Type as core knows it, type as filters know it, and code of dynamic attribute */
{ return (struct f_dynamic_attr) { .type = EAF_TYPE_BITFIELD, .bit = bit, .f_type = f_type, .ea_code = code }; } /* f_type currently unused; will be handy for static type checking */
static inline struct f_static_attr f_new_static_attr(int f_type, int code, int readonly)
{ return (struct f_static_attr) { .f_type = f_type, .sa_code = code, .readonly = readonly }; }
static inline struct f_static_attr f_new_static_attr(btype type, int code, int readonly)
{ return (struct f_static_attr) { .type = type, .sa_code = code, .readonly = readonly }; }
struct f_inst *f_generate_roa_check(struct rtable_config *table, struct f_inst *prefix, struct f_inst *asn);
static inline int f_type_attr(int f_type) {
switch (f_type) {
case T_INT: return EAF_TYPE_INT;
case T_IP: return EAF_TYPE_IP_ADDRESS;
case T_QUAD: return EAF_TYPE_ROUTER_ID;
case T_PATH: return EAF_TYPE_AS_PATH;
case T_CLIST: return EAF_TYPE_INT_SET;
case T_ECLIST: return EAF_TYPE_EC_SET;
case T_LCLIST: return EAF_TYPE_LC_SET;
case T_BYTESTRING: return EAF_TYPE_OPAQUE;
default:
cf_error("Custom route attribute of unsupported type");
}
}
/* Hook for call bt_assert() function in configuration */
extern void (*bt_assert_hook)(int result, const struct f_line_item *assert);

View File

@ -2,7 +2,7 @@
* Filters: utility functions
*
* Copyright 1998 Pavel Machek <pavel@ucw.cz>
* 2017 Jan Maria Matejka <mq@ucw.cz>
* 2017 Maria Matejka <mq@ucw.cz>
*
* Can be freely distributed and used under the terms of the GNU GPL.
*/
@ -142,7 +142,7 @@ f_dispatch_method(struct symbol *sym, struct f_inst *obj, struct f_inst *args, i
}
struct f_inst *
f_dispatch_method_x(const char *name, enum f_type t, struct f_inst *obj, struct f_inst *args)
f_dispatch_method_x(const char *name, enum btype t, struct f_inst *obj, struct f_inst *args)
{
struct sym_scope *scope = f_type_method_scope(t);
struct symbol *sym = cf_find_symbol_scope(scope, name);
@ -164,7 +164,7 @@ f_for_cycle(struct symbol *var, struct f_inst *term, struct f_inst *block)
if (term->type == T_VOID)
cf_error("Cannot infer type of FOR expression, please assign it to a variable");
enum f_type el_type = f_type_element_type(term->type);
enum btype el_type = f_type_element_type(term->type);
struct sym_scope *scope = el_type ? f_type_method_scope(term->type) : NULL;
struct symbol *ms = scope ? cf_find_symbol_scope(scope, "!for_next") : NULL;
@ -188,6 +188,28 @@ f_for_cycle(struct symbol *var, struct f_inst *term, struct f_inst *block)
return ms->method->new_inst(term, loop_start);
}
struct f_inst *
f_implicit_roa_check(struct rtable_config *tab)
{
const struct ea_class *def = ea_class_find("bgp_path");
if (!def)
bug("Couldn't find BGP AS Path attribute definition.");
struct f_inst *path_getter = f_new_inst(FI_EA_GET, f_new_inst(FI_CURRENT_ROUTE), def);
struct sym_scope *scope = f_type_method_scope(path_getter->type);
struct symbol *ms = scope ? cf_find_symbol_scope(scope, "last") : NULL;
if (!ms)
bug("Couldn't find the \"last\" method for AS Path.");
struct f_static_attr fsa = f_new_static_attr(T_NET, SA_NET, 1);
return f_new_inst(FI_ROA_CHECK,
f_new_inst(FI_RTA_GET, f_new_inst(FI_CURRENT_ROUTE), fsa),
ms->method->new_inst(path_getter, NULL),
tab);
}
struct f_inst *
f_print(struct f_inst *vars, int flush, enum filter_return fret)
{
@ -211,151 +233,3 @@ f_print(struct f_inst *vars, int flush, enum filter_return fret)
return output;
#undef AX
}
#define CA_KEY(n) n->name, n->fda.type
#define CA_NEXT(n) n->next
#define CA_EQ(na,ta,nb,tb) (!strcmp(na,nb) && (ta == tb))
#define CA_FN(n,t) (mem_hash(n, strlen(n)) ^ (t*0xaae99453U))
#define CA_ORDER 8 /* Fixed */
struct ca_storage {
struct ca_storage *next;
struct f_dynamic_attr fda;
u32 uc;
char name[0];
};
HASH(struct ca_storage) ca_hash;
static struct idm ca_idm;
static struct ca_storage **ca_storage;
static uint ca_storage_max;
static void
ca_free(resource *r)
{
struct custom_attribute *ca = (void *) r;
struct ca_storage *cas = HASH_FIND(ca_hash, CA, ca->name, ca->fda->type);
ASSERT(cas);
ca->name = NULL;
ca->fda = NULL;
if (!--cas->uc) {
uint id = EA_CUSTOM_ID(cas->fda.ea_code);
idm_free(&ca_idm, id);
HASH_REMOVE(ca_hash, CA, cas);
ca_storage[id] = NULL;
mb_free(cas);
}
}
static void
ca_dump(resource *r)
{
struct custom_attribute *ca = (void *) r;
debug("name \"%s\" id 0x%04x ea_type 0x%02x f_type 0x%02x\n",
ca->name, ca->fda->ea_code, ca->fda->type, ca->fda->f_type);
}
static struct resclass ca_class = {
.name = "Custom attribute",
.size = sizeof(struct custom_attribute),
.free = ca_free,
.dump = ca_dump,
.lookup = NULL,
.memsize = NULL,
};
struct custom_attribute *
ca_lookup(pool *p, const char *name, int f_type)
{
int ea_type;
switch (f_type) {
case T_INT:
ea_type = EAF_TYPE_INT;
break;
case T_IP:
ea_type = EAF_TYPE_IP_ADDRESS;
break;
case T_QUAD:
ea_type = EAF_TYPE_ROUTER_ID;
break;
case T_PATH:
ea_type = EAF_TYPE_AS_PATH;
break;
case T_CLIST:
ea_type = EAF_TYPE_INT_SET;
break;
case T_ECLIST:
ea_type = EAF_TYPE_EC_SET;
break;
case T_LCLIST:
ea_type = EAF_TYPE_LC_SET;
break;
case T_STRING:
ea_type = EAF_TYPE_STRING;
break;
case T_BYTESTRING:
ea_type = EAF_TYPE_OPAQUE;
break;
default:
cf_error("Custom route attribute of unsupported type");
}
static int inited = 0;
if (!inited) {
idm_init(&ca_idm, config_pool, 8);
HASH_INIT(ca_hash, config_pool, CA_ORDER);
ca_storage_max = 256;
ca_storage = mb_allocz(config_pool, sizeof(struct ca_storage *) * ca_storage_max);
inited++;
}
struct ca_storage *cas = HASH_FIND(ca_hash, CA, name, ea_type);
if (cas) {
cas->uc++;
} else {
uint id = idm_alloc(&ca_idm);
if (id >= EA_CUSTOM_BIT)
cf_error("Too many custom attributes.");
if (id >= ca_storage_max) {
ca_storage_max *= 2;
ca_storage = mb_realloc(ca_storage, sizeof(struct ca_storage *) * ca_storage_max * 2);
}
cas = mb_allocz(config_pool, sizeof(struct ca_storage) + strlen(name) + 1);
cas->fda = f_new_dynamic_attr(ea_type, f_type, EA_CUSTOM(id));
cas->uc = 1;
strcpy(cas->name, name);
ca_storage[id] = cas;
HASH_INSERT(ca_hash, CA, cas);
}
struct custom_attribute *ca = ralloc(p, &ca_class);
ca->fda = &(cas->fda);
ca->name = cas->name;
return ca;
}
const char *
ea_custom_name(uint ea)
{
uint id = EA_CUSTOM_ID(ea);
if (id >= ca_storage_max)
return NULL;
if (!ca_storage[id])
return NULL;
return ca_storage[id]->name;
}

View File

@ -38,7 +38,7 @@
#include "nest/route.h"
#include "nest/protocol.h"
#include "nest/iface.h"
#include "nest/attrs.h"
#include "lib/attrs.h"
#include "conf/conf.h"
#include "filter/filter.h"
#include "filter/f-inst.h"
@ -50,97 +50,49 @@ enum f_exception {
FE_RETURN = 0x1,
};
struct filter_stack {
/* Value stack for execution */
#define F_VAL_STACK_MAX 4096
uint vcnt; /* Current value stack size; 0 for empty */
uint ecnt; /* Current execute stack size; 0 for empty */
struct f_val vstk[F_VAL_STACK_MAX]; /* The stack itself */
/* Instruction stack for execution */
#define F_EXEC_STACK_MAX 4096
struct {
const struct f_line *line; /* The line that is being executed */
uint pos; /* Instruction index in the line */
uint ventry; /* Value stack depth on entry */
uint vbase; /* Where to index variable positions from */
enum f_exception emask; /* Exception mask */
} estk[F_EXEC_STACK_MAX];
struct filter_exec_stack {
const struct f_line *line; /* The line that is being executed */
uint pos; /* Instruction index in the line */
uint ventry; /* Value stack depth on entry */
uint vbase; /* Where to index variable positions from */
enum f_exception emask; /* Exception mask */
};
/* Internal filter state, to be allocated on stack when executing filters */
struct filter_state {
/* Stacks needed for execution */
struct filter_stack *stack;
struct filter_stack {
/* Current filter stack depth */
/* Value stack */
uint vcnt, vlen;
struct f_val *vstk;
/* Instruction stack for execution */
uint ecnt, elen;
struct filter_exec_stack *estk;
} stack;
/* The route we are processing. This may be NULL to indicate no route available. */
struct rte **rte;
struct rte *rte;
/* The old rta to be freed after filters are done. */
struct rta *old_rta;
/* Cached pointer to ea_list */
struct ea_list **eattrs;
/* Linpool for adata allocation */
struct linpool *pool;
/* Additional external values provided to the filter */
const struct f_val *val;
/* Buffer for log output */
struct buffer buf;
/* Pointers to routes we are aggregating */
const struct f_val *val;
log_buffer buf;
/* Filter execution flags */
int flags;
};
_Thread_local static struct filter_state filter_state;
_Thread_local static struct filter_stack filter_stack;
void (*bt_assert_hook)(int result, const struct f_line_item *assert);
static inline void f_cache_eattrs(struct filter_state *fs)
{
fs->eattrs = &((*fs->rte)->attrs->eattrs);
}
#define _f_stack_init(fs, px, def) ((fs).stack.px##stk = alloca(sizeof(*(fs).stack.px##stk) * ((fs).stack.px##len = (config && config->filter_##px##stk) ? config->filter_##px##stk : (def))))
static inline void f_rte_cow(struct filter_state *fs)
{
if (!((*fs->rte)->flags & REF_COW))
return;
*fs->rte = rte_cow(*fs->rte);
}
/*
* rta_cow - prepare rta for modification by filter
*/
static void
f_rta_cow(struct filter_state *fs)
{
if (!rta_is_cached((*fs->rte)->attrs))
return;
/* Prepare to modify rte */
f_rte_cow(fs);
/* Store old rta to free it later, it stores reference from rte_cow() */
fs->old_rta = (*fs->rte)->attrs;
/*
* Get shallow copy of rta. Fields eattrs and nexthops of rta are shared
* with fs->old_rta (they will be copied when the cached rta will be obtained
* at the end of f_run()), also the lock of hostentry is inherited (we
* suppose hostentry is not changed by filters).
*/
(*fs->rte)->attrs = rta_do_cow((*fs->rte)->attrs, fs->pool);
/* Re-cache the ea_list */
f_cache_eattrs(fs);
}
#define f_stack_init(fs) ( _f_stack_init(fs, v, 128), _f_stack_init(fs, e, 128) )
static struct tbf rl_runtime_err = TBF_DEFAULT_LOG_LIMITS;
@ -160,23 +112,25 @@ static struct tbf rl_runtime_err = TBF_DEFAULT_LOG_LIMITS;
* TWOARGS macro to get both of them evaluated.
*/
static enum filter_return
interpret(struct filter_state *fs, const struct f_line *line, uint argc, const struct f_val *argv, struct f_val *val)
interpret(struct filter_state *fs, const struct f_line *line, uint argc, const struct f_val *argv, uint resc, struct f_val *resv)
{
/* No arguments allowed */
ASSERT_DIE(line->args == argc);
/* Check of appropriate number of arguments */
ASSERT(line->args == argc);
/* Initialize the filter stack */
struct filter_stack *fstk = fs->stack;
struct filter_stack *fstk = &fs->stack;
/* Set the arguments and top-level variables */
fstk->vcnt = line->vars + line->args;
memcpy(fstk->vstk, argv, sizeof(struct f_val) * line->args);
memset(fstk->vstk + line->args, 0, sizeof(struct f_val) * line->vars);
memset(fstk->vstk + argc, 0, sizeof(struct f_val) * line->vars);
/* The same as with the value stack. Not resetting the stack completely for performance reasons. */
fstk->ecnt = 1;
fstk->estk[0].line = line;
fstk->estk[0].pos = 0;
fstk->estk[0] = (struct filter_exec_stack) {
.line = line,
.pos = 0,
};
#define curline fstk->estk[fstk->ecnt-1]
#define prevline fstk->estk[fstk->ecnt-2]
@ -197,16 +151,16 @@ interpret(struct filter_state *fs, const struct f_line *line, uint argc, const s
#define v2 vv(1)
#define v3 vv(2)
#define f_vcnt_check_overflow(n) do { if (fstk->vcnt + n >= fstk->vlen) runtime("Filter execution stack overflow"); } while (0)
#define runtime(fmt, ...) do { \
if (!(fs->flags & FF_SILENT)) \
log_rl(&rl_runtime_err, L_ERR "filters, line %d: " fmt, what->lineno, ##__VA_ARGS__); \
return F_ERROR; \
} while(0)
#define falloc(size) lp_alloc(fs->pool, size)
#define fpool fs->pool
#define ACCESS_EATTRS do { if (!fs->eattrs) f_cache_eattrs(fs); } while (0)
#define falloc(size) tmp_alloc(size)
#define fpool tmp_linpool
#include "filter/inst-interpret.c"
#undef res
@ -216,7 +170,6 @@ interpret(struct filter_state *fs, const struct f_line *line, uint argc, const s
#undef runtime
#undef falloc
#undef fpool
#undef ACCESS_EATTRS
}
}
@ -225,49 +178,29 @@ interpret(struct filter_state *fs, const struct f_line *line, uint argc, const s
fstk->ecnt--;
}
if (fstk->vcnt == 0) {
if (val) {
log_rl(&rl_runtime_err, L_ERR "filters: No value left on stack");
return F_ERROR;
}
return F_NOP;
if (fstk->vcnt != resc)
{
log_rl(&rl_runtime_err, L_ERR "Filter expected to leave %d values on stack but %d left instead", resc, fstk->vcnt);
return F_ERROR;
}
if (val && (fstk->vcnt == 1)) {
*val = fstk->vstk[0];
return F_NOP;
}
log_rl(&rl_runtime_err, L_ERR "Too many items left on stack: %u", fstk->vcnt);
return F_ERROR;
memcpy(resv, fstk->vstk, sizeof(struct f_val) * resc);
return F_NOP;
}
/**
* f_run - run a filter for a route
* @filter: filter to run
* @rte: route being filtered, may be modified
* @rte: route being filtered, must be write-able
* @tmp_pool: all filter allocations go from this pool
* @flags: flags
*
* If filter needs to modify the route, there are several
* posibilities. @rte might be read-only (with REF_COW flag), in that
* case rw copy is obtained by rte_cow() and @rte is replaced. If
* @rte is originally rw, it may be directly modified (and it is never
* copied).
*
* The returned rte may reuse the (possibly cached, cloned) rta, or
* (if rta was modified) contains a modified uncached rta, which
* uses parts allocated from @tmp_pool and parts shared from original
* rta. There is one exception - if @rte is rw but contains a cached
* rta and that is modified, rta in returned rte is also cached.
*
* Ownership of cached rtas is consistent with rte, i.e.
* if a new rte is returned, it has its own clone of cached rta
* (and cached rta of read-only source rte is intact), if rte is
* modified in place, old cached rta is possibly freed.
* If @rte->attrs is cached, the returned rte allocates a new rta on
* tmp_pool, otherwise the filters may modify it.
*/
enum filter_return
f_run(const struct filter *filter, struct rte **rte, struct linpool *tmp_pool, int flags)
f_run(const struct filter *filter, struct rte *rte, int flags)
{
if (filter == FILTER_ACCEPT)
return F_ACCEPT;
@ -275,53 +208,24 @@ f_run(const struct filter *filter, struct rte **rte, struct linpool *tmp_pool, i
if (filter == FILTER_REJECT)
return F_REJECT;
return f_run_args(filter, rte, tmp_pool, 0, NULL, flags);
return f_run_args(filter, rte, 0, NULL, flags);
}
enum filter_return
f_run_args(const struct filter *filter, struct rte **rte, struct linpool *tmp_pool, uint argc, const struct f_val *argv, int flags)
f_run_args(const struct filter *filter, struct rte *rte, uint argc, const struct f_val *argv, int flags)
{
int rte_cow = ((*rte)->flags & REF_COW);
DBG( "Running filter `%s'...", filter->name );
/* Initialize the filter state */
filter_state = (struct filter_state) {
.stack = &filter_stack,
.rte = rte,
.pool = tmp_pool,
.flags = flags,
};
LOG_BUFFER_INIT(filter_state.buf);
f_stack_init(filter_state);
/* Run the interpreter itself */
enum filter_return fret = interpret(&filter_state, filter->root, argc, argv, NULL);
if (filter_state.old_rta) {
/*
* Cached rta was modified and filter_state->rte contains now an uncached one,
* sharing some part with the cached one. The cached rta should
* be freed (if rte was originally COW, filter_state->old_rta is a clone
* obtained during rte_cow()).
*
* This also implements the exception mentioned in f_run()
* description. The reason for this is that rta reuses parts of
* filter_state->old_rta, and these may be freed during rta_free(filter_state->old_rta).
* This is not the problem if rte was COW, because original rte
* also holds the same rta.
*/
if (!rte_cow) {
/* Cache the new attrs */
(*filter_state.rte)->attrs = rta_lookup((*filter_state.rte)->attrs);
/* Drop cached ea_list pointer */
filter_state.eattrs = NULL;
}
/* Uncache the old attrs and drop the pointer as it is invalid now. */
rta_free(filter_state.old_rta);
filter_state.old_rta = NULL;
}
enum filter_return fret = interpret(&filter_state, filter->root, argc, argv, 0, NULL);
/* Process the filter output, log it and return */
if (fret < F_ACCEPT) {
@ -347,50 +251,43 @@ f_run_args(const struct filter *filter, struct rte **rte, struct linpool *tmp_po
*/
enum filter_return
f_eval_rte(const struct f_line *expr, struct rte **rte, struct linpool *tmp_pool, uint argc, const struct f_val *argv, struct f_val *pres)
f_eval_rte(const struct f_line *expr, struct rte *rte, uint argc, const struct f_val *argv, uint resc, struct f_val *resv)
{
filter_state = (struct filter_state) {
.stack = &filter_stack,
.rte = rte,
.pool = tmp_pool,
};
LOG_BUFFER_INIT(filter_state.buf);
f_stack_init(filter_state);
return interpret(&filter_state, expr, argc, argv, pres);
return interpret(&filter_state, expr, argc, argv, resc, resv);
}
/*
* f_eval - get a value of a term
* @expr: filter line containing the term
* @tmp_pool: long data may get allocated from this pool
* @pres: here the output will be stored
* @pres: here the output will be stored if requested
*/
enum filter_return
f_eval(const struct f_line *expr, struct linpool *tmp_pool, struct f_val *pres)
f_eval(const struct f_line *expr, struct f_val *pres)
{
filter_state = (struct filter_state) {
.stack = &filter_stack,
.pool = tmp_pool,
};
filter_state = (struct filter_state) {};
LOG_BUFFER_INIT(filter_state.buf);
f_stack_init(filter_state);
enum filter_return fret = interpret(&filter_state, expr, 0, NULL, pres);
enum filter_return fret = interpret(&filter_state, expr, 0, NULL, !!pres, pres);
return fret;
}
/*
* cf_eval - evaluate a value of a term and check its type
* Called internally from the config parser, uses its internal memory pool
* for allocations. Do not call in other cases.
* cf_eval_tmp - evaluate a value of a term and check its type
*/
struct f_val
cf_eval(const struct f_inst *inst, int type)
cf_eval_tmp(const struct f_inst *inst, int type)
{
struct f_val val;
if (f_eval(f_linearize(inst, 1), cfg_mem, &val) > F_RETURN)
if (f_eval(f_linearize(inst, 1), &val) > F_RETURN)
cf_error("Runtime error while evaluating expression; see log for details");
if (type != T_VOID && val.type != type)
@ -399,14 +296,15 @@ cf_eval(const struct f_inst *inst, int type)
return val;
}
/*
* f_eval_buf - get a value of a term and print it to the supplied buffer
*/
enum filter_return
f_eval_buf(const struct f_line *expr, struct linpool *tmp_pool, buffer *buf)
f_eval_buf(const struct f_line *expr, buffer *buf)
{
struct f_val val;
enum filter_return fret = f_eval(expr, tmp_pool, &val);
enum filter_return fret = f_eval(expr, &val);
if (fret <= F_RETURN)
val_format(&val, buf);
return fret;
@ -474,6 +372,23 @@ filter_commit(struct config *new, struct config *old)
}
}
void channel_filter_dump(const struct filter *f)
{
if (f == FILTER_ACCEPT)
debug(" ALL");
else if (f == FILTER_REJECT)
debug(" NONE");
else if (f == FILTER_UNDEF)
debug(" UNDEF");
else if (f->sym) {
ASSERT(f->sym->filter == f);
debug(" named filter %s", f->sym->name);
} else {
debug("\n");
f_dump_line(f->root, 2);
}
}
void filters_dump_all(void)
{
struct symbol *sym;
@ -493,19 +408,10 @@ void filters_dump_all(void)
struct channel *c;
WALK_LIST(c, sym->proto->proto->channels) {
debug(" Channel %s (%s) IMPORT", c->name, net_label[c->net_type]);
if (c->in_filter == FILTER_ACCEPT)
debug(" ALL\n");
else if (c->in_filter == FILTER_REJECT)
debug(" NONE\n");
else if (c->in_filter == FILTER_UNDEF)
debug(" UNDEF\n");
else if (c->in_filter->sym) {
ASSERT(c->in_filter->sym->filter == c->in_filter);
debug(" named filter %s\n", c->in_filter->sym->name);
} else {
debug("\n");
f_dump_line(c->in_filter->root, 2);
}
channel_filter_dump(c->in_filter);
debug(" EXPORT", c->name, net_label[c->net_type]);
channel_filter_dump(c->out_filter);
debug("\n");
}
}
}

View File

@ -14,8 +14,9 @@
#include "lib/ip.h"
#include "lib/macro.h"
#include "nest/route.h"
#include "nest/attrs.h"
#include "lib/attrs.h"
#include "filter/data.h"
#include "conf/conf.h"
/* Possible return values of filter execution */
enum filter_return {
@ -51,13 +52,19 @@ struct filter {
struct rte;
enum filter_return f_run(const struct filter *filter, struct rte **rte, struct linpool *tmp_pool, int flags);
enum filter_return f_run_args(const struct filter *filter, struct rte **rte, struct linpool *tmp_pool, uint argc, const struct f_val *argv, int flags);
enum filter_return f_eval_rte(const struct f_line *expr, struct rte **rte, struct linpool *tmp_pool, uint argc, const struct f_val *argv, struct f_val *pres);
enum filter_return f_eval_buf(const struct f_line *expr, struct linpool *tmp_pool, buffer *buf);
enum filter_return f_run(const struct filter *filter, struct rte *rte, int flags);
enum filter_return f_run_args(const struct filter *filter, struct rte *rte, uint argc, const struct f_val *argv, int flags);
enum filter_return f_eval_rte(const struct f_line *expr, struct rte *rte, uint argc, const struct f_val *argv, uint resc, struct f_val *resv);
enum filter_return f_eval_buf(const struct f_line *expr, buffer *buf);
struct f_val cf_eval(const struct f_inst *inst, int type);
static inline uint cf_eval_int(const struct f_inst *inst) { return cf_eval(inst, T_INT).val.i; };
struct f_val cf_eval_tmp(const struct f_inst *inst, int type);
static inline struct f_val *cf_eval(const struct f_inst *inst, int type)
{
struct f_val val = cf_eval_tmp(inst, type);
return lp_val_copy(cfg_mem, &val);
}
static inline uint cf_eval_int(const struct f_inst *inst) { return cf_eval_tmp(inst, T_INT).val.i; };
const char *filter_name(const struct filter *filter);
int filter_same(const struct filter *new, const struct filter *old);
@ -73,13 +80,4 @@ void filters_dump_all(void);
#define FF_SILENT 2 /* Silent filter execution */
/* Custom route attributes */
struct custom_attribute {
resource r;
struct f_dynamic_attr *fda;
const char *name;
};
struct custom_attribute *ca_lookup(pool *p, const char *name, int ea_type);
#endif

View File

@ -46,7 +46,7 @@ run_function(const void *arg)
if (t->cmp)
return t->result == f_same(t->fn, t->cmp);
enum filter_return fret = f_eval(t->fn, tmp_linpool, NULL);
enum filter_return fret = f_eval(t->fn, NULL);
return (fret < F_REJECT);
}
@ -70,6 +70,7 @@ int
main(int argc, char *argv[])
{
bt_init(argc, argv);
bt_bird_init();
bt_assert_hook = bt_assert_filter;
@ -78,14 +79,13 @@ main(int argc, char *argv[])
if (!bt_config_file_parse(BT_CONFIG_FILE))
abort();
bt_test_suite_arg(t_reconfig, BT_CONFIG_FILE ".overlay", "Testing reconfiguration to overlay");
bt_test_suite_arg(t_reconfig, BT_CONFIG_FILE, "Testing reconfiguration back");
bt_test_suite_arg(t_reconfig, BT_CONFIG_FILE, "Testing reconfiguration to the same file");
bt_test_suite_arg_extra(t_reconfig, BT_CONFIG_FILE ".overlay", 0, BT_TIMEOUT, "Testing reconfiguration to overlay");
bt_test_suite_arg_extra(t_reconfig, BT_CONFIG_FILE, 0, BT_TIMEOUT, "Testing reconfiguration back");
bt_test_suite_arg_extra(t_reconfig, BT_CONFIG_FILE, 0, BT_TIMEOUT, "Testing reconfiguration to the same file");
struct f_bt_test_suite *t;
WALK_LIST(t, config->tests)
bt_test_suite_base(run_function, t->fn_name, t, BT_FORKING, BT_TIMEOUT, "%s", t->dsc);
bt_test_suite_base(run_function, t->fn_name, t, 0, BT_TIMEOUT, "%s", t->dsc);
bt_bird_cleanup();
return bt_exit_value();
}

View File

@ -12,6 +12,109 @@ protocol device { }
attribute bgppath mypath;
attribute lclist mylclist;
/* Setting some custom attributes, enough to force BIRD to reallocate the attribute idmap */
attribute int test_ca_int1;
attribute int test_ca_int2;
attribute int test_ca_int3;
attribute int test_ca_int4;
attribute int test_ca_int5;
attribute int test_ca_int6;
attribute int test_ca_int7;
attribute int test_ca_int8;
attribute int test_ca_int9;
attribute int test_ca_int10;
attribute ip test_ca_ip1;
attribute ip test_ca_ip2;
attribute ip test_ca_ip3;
attribute ip test_ca_ip4;
attribute ip test_ca_ip5;
attribute ip test_ca_ip6;
attribute ip test_ca_ip7;
attribute ip test_ca_ip8;
attribute ip test_ca_ip9;
attribute ip test_ca_ip10;
attribute quad test_ca_quad1;
attribute quad test_ca_quad2;
attribute quad test_ca_quad3;
attribute quad test_ca_quad4;
attribute quad test_ca_quad5;
attribute quad test_ca_quad6;
attribute quad test_ca_quad7;
attribute quad test_ca_quad8;
attribute quad test_ca_quad9;
attribute quad test_ca_quad10;
attribute bgppath test_ca_bgppath1;
attribute bgppath test_ca_bgppath2;
attribute bgppath test_ca_bgppath3;
attribute bgppath test_ca_bgppath4;
attribute bgppath test_ca_bgppath5;
attribute bgppath test_ca_bgppath6;
attribute bgppath test_ca_bgppath7;
attribute bgppath test_ca_bgppath8;
attribute bgppath test_ca_bgppath9;
attribute bgppath test_ca_bgppath10;
attribute clist test_ca_clist1;
attribute clist test_ca_clist2;
attribute clist test_ca_clist3;
attribute clist test_ca_clist4;
attribute clist test_ca_clist5;
attribute clist test_ca_clist6;
attribute clist test_ca_clist7;
attribute clist test_ca_clist8;
attribute clist test_ca_clist9;
attribute clist test_ca_clist10;
attribute eclist test_ca_eclist1;
attribute eclist test_ca_eclist2;
attribute eclist test_ca_eclist3;
attribute eclist test_ca_eclist4;
attribute eclist test_ca_eclist5;
attribute eclist test_ca_eclist6;
attribute eclist test_ca_eclist7;
attribute eclist test_ca_eclist8;
attribute eclist test_ca_eclist9;
attribute eclist test_ca_eclist10;
attribute lclist test_ca_lclist1;
attribute lclist test_ca_lclist2;
attribute lclist test_ca_lclist3;
attribute lclist test_ca_lclist4;
attribute lclist test_ca_lclist5;
attribute lclist test_ca_lclist6;
attribute lclist test_ca_lclist7;
attribute lclist test_ca_lclist8;
attribute lclist test_ca_lclist9;
attribute lclist test_ca_lclist10;
attribute lclist test_ca_lclist_max1;
attribute lclist test_ca_lclist_max2;
attribute lclist test_ca_lclist_max3;
attribute lclist test_ca_lclist_max4;
attribute lclist test_ca_lclist_max5;
attribute lclist test_ca_lclist_max6;
attribute lclist test_ca_lclist_max7;
attribute lclist test_ca_lclist_max8;
attribute lclist test_ca_lclist_max9;
attribute lclist test_ca_lclist_max10;
attribute lclist test_ca_lclist_max11;
attribute lclist test_ca_lclist_max12;
attribute lclist test_ca_lclist_max13;
attribute lclist test_ca_lclist_max14;
attribute lclist test_ca_lclist_max15;
attribute lclist test_ca_lclist_max16;
attribute lclist test_ca_lclist_max17;
attribute lclist test_ca_lclist_max18;
attribute lclist test_ca_lclist_max19;
attribute lclist test_ca_lclist_max20;
attribute lclist test_ca_lclist_max21;
/* Uncomment this to get an error */
#attribute int bgp_path;
/*
* Common definitions and functions
@ -132,6 +235,14 @@ function t_int()
bt_assert(1 <= 1);
bt_assert(!(1234 < 1234));
bt_assert(10 - 5 = 5);
bt_assert(4294967295 + 1 = 0);
bt_assert(6*9=54);
bt_assert(984/41 = 24);
bt_assert(123/45 = 2);
bt_assert(0xfee1a | 0xbeef = 0xffeff);
bt_assert(0xfee1a & 0xbeef = 0xae0a);
case i {
4200000000: bt_assert(true);
else: bt_assert(false);
@ -471,9 +582,9 @@ bt_test_suite(t_ip_set, "Testing sets of ip address");
function t_enum()
{
bt_assert(format(RTS_STATIC) = "(enum 30)1");
bt_assert(format(NET_IP4) = "(enum 36)1");
bt_assert(format(NET_VPN6) = "(enum 36)4");
bt_assert(format(RTS_STATIC) = "(enum 31)1");
bt_assert(format(NET_IP4) = "(enum 3b)1");
bt_assert(format(NET_VPN6) = "(enum 3b)4");
bt_assert(RTS_STATIC ~ [RTS_STATIC, RTS_DEVICE]);
bt_assert(RTS_BGP !~ [RTS_STATIC, RTS_DEVICE]);
@ -2133,6 +2244,7 @@ function __test2()
filter testf
int j;
bool t;
{
print "Heya, filtering route to ", net.ip, " prefixlen ", net.len, " source ", source;
print "This route was from ", from;
@ -2144,6 +2256,52 @@ int j;
rip_metric = 14;
unset(rip_metric);
preference = 1234;
test_ca_int1 = 42;
test_ca_ip2 = 1.3.5.7;
test_ca_quad3 = 2.4.6.8;
test_ca_bgppath4 = +empty+;
test_ca_clist5 = -empty-;
test_ca_eclist6 = --empty--;
test_ca_lclist7 = ---empty---;
igp_metric = 53;
babel_metric = 64;
t = defined(babel_router_id);
bgp_origin = ORIGIN_IGP;
bgp_path = +empty+;
bgp_next_hop = 3456:789a:bcde:f012::3456:789a;
bgp_med = 71;
bgp_local_pref = 942;
t = defined(bgp_atomic_aggr);
t = defined(bgp_aggregator);
bgp_community = -empty-;
bgp_originator_id = 9.7.5.3;
bgp_cluster_list = -empty-;
bgp_ext_community = --empty--;
t = defined(bgp_aigp);
bgp_large_community = ---empty---;
t = defined(bgp_mpls_label_stack);
ospf_metric1 = 64;
ospf_metric2 = 111;
ospf_tag = 654432;
radv_preference = RA_PREF_LOW;
radv_lifetime = 28;
rip_metric = 2;
rip_tag = 4;
krt_source = 17;
krt_metric = 19;
# krt_lock_mtu = false;
# krt_lock_window = true;
# krt_lock_rtt = krt_lock_rttvar && krt_lock_sstresh || krt_lock_cwnd;
accept "ok I take that";
}

View File

@ -38,12 +38,6 @@ protocol static {
print from;
from = 1.2.3.4;
print from;
print scope;
scope = SCOPE_HOST;
print scope;
if !(scope ~ [ SCOPE_HOST, SCOPE_SITE ]) then {
print "Failed in test";
}
preference = 15;
print preference;

View File

@ -15,13 +15,6 @@
#define MAX_TREE_HEIGHT 13
static void
start_conf_env(void)
{
bt_bird_init();
cfg_mem = tmp_linpool;
}
static struct f_tree *
new_tree(uint id)
{
@ -153,8 +146,6 @@ get_balanced_tree_with_ranged_values(uint nodes_count)
static int
t_balancing(void)
{
start_conf_env();
uint height;
for (height = 1; height < MAX_TREE_HEIGHT; height++)
{
@ -170,6 +161,8 @@ t_balancing(void)
show_tree(balanced_tree_from_simple);
bt_assert(same_tree(balanced_tree_from_simple, expected_balanced_tree));
tmp_flush();
}
return 1;
@ -179,8 +172,6 @@ t_balancing(void)
static int
t_balancing_random(void)
{
start_conf_env();
uint height;
for (height = 1; height < MAX_TREE_HEIGHT; height++)
{
@ -191,6 +182,8 @@ t_balancing_random(void)
uint i;
for(i = 0; i < 10; i++)
{
struct lp_state *lps = lp_save(tmp_linpool);
struct f_tree *random_degenerated_tree = get_random_degenerated_left_tree(nodes_count);
show_tree(random_degenerated_tree);
@ -200,7 +193,11 @@ t_balancing_random(void)
show_tree(balanced_tree_from_random);
bt_assert(same_tree(balanced_tree_from_random, expected_balanced_tree));
lp_restore(tmp_linpool, lps);
}
tmp_flush();
}
return 1;
@ -209,8 +206,6 @@ t_balancing_random(void)
static int
t_find(void)
{
start_conf_env();
uint height;
for (height = 1; height < MAX_TREE_HEIGHT; height++)
{
@ -227,6 +222,8 @@ t_find(void)
const struct f_tree *found_tree = find_tree(tree, &looking_up_value);
bt_assert((val_compare(&looking_up_value, &(found_tree->from)) == 0) && (val_compare(&looking_up_value, &(found_tree->to)) == 0));
}
tmp_flush();
}
return 1;
@ -255,8 +252,6 @@ get_max_value_in_unbalanced_tree(struct f_tree *node, uint max)
static int
t_find_ranges(void)
{
start_conf_env();
uint height;
for (height = 1; height < MAX_TREE_HEIGHT; height++)
{
@ -283,6 +278,8 @@ t_find_ranges(void)
((val_compare(&needle, &(found_tree->from)) == 1) && (val_compare(&needle, &(found_tree->to)) == -1))
);
}
tmp_flush();
}
return 1;
@ -292,6 +289,8 @@ int
main(int argc, char *argv[])
{
bt_init(argc, argv);
bt_bird_init();
cfg_mem = tmp_linpool;
bt_test_suite(t_balancing, "Balancing strong unbalanced trees");
bt_test_suite(t_balancing_random, "Balancing random unbalanced trees");

View File

@ -228,12 +228,12 @@ get_outer_net(net_addr *net, const struct f_prefix *src)
static list *
make_random_prefix_list(int num, int v6, int tight)
{
list *prefixes = lp_allocz(tmp_linpool, sizeof(struct f_prefix_node));
list *prefixes = tmp_allocz(sizeof(struct f_prefix_node));
init_list(prefixes);
for (int i = 0; i < num; i++)
{
struct f_prefix_node *px = lp_allocz(tmp_linpool, sizeof(struct f_prefix_node));
struct f_prefix_node *px = tmp_allocz(sizeof(struct f_prefix_node));
get_random_prefix(&px->prefix, v6, tight);
add_tail(prefixes, &px->n);
@ -271,7 +271,7 @@ read_prefix_list(FILE *f, int v6, int plus)
char s[32];
int n;
list *pxlist = lp_allocz(tmp_linpool, sizeof(struct f_prefix_node));
list *pxlist = tmp_allocz(sizeof(struct f_prefix_node));
init_list(pxlist);
errno = 0;
@ -285,7 +285,7 @@ read_prefix_list(FILE *f, int v6, int plus)
if (n != 5)
bt_abort_msg("Invalid content of trie_data");
struct f_prefix_node *px = lp_allocz(tmp_linpool, sizeof(struct f_prefix_node));
struct f_prefix_node *px = tmp_allocz(sizeof(struct f_prefix_node));
net_fill_ip4(&px->prefix.net, ip4_build(a0, a1, a2, a3), pl);
px->prefix.lo = pl;
px->prefix.hi = plus ? IP4_MAX_PREFIX_LENGTH : pl;
@ -409,9 +409,6 @@ test_match_net(list *prefixes, struct f_trie *trie, const net_addr *net)
static int
t_match_random_net(void)
{
bt_bird_init();
bt_config_parse(BT_CONFIG_SIMPLE);
int v6 = 0;
for (int round = 0; round < TESTS_NUM; round++)
{
@ -429,16 +426,12 @@ t_match_random_net(void)
tmp_flush();
}
bt_bird_cleanup();
return 1;
}
static int
t_match_inner_net(void)
{
bt_bird_init();
bt_config_parse(BT_CONFIG_SIMPLE);
int v6 = 0;
for (int round = 0; round < TESTS_NUM; round++)
{
@ -459,16 +452,12 @@ t_match_inner_net(void)
tmp_flush();
}
bt_bird_cleanup();
return 1;
}
static int
t_match_outer_net(void)
{
bt_bird_init();
bt_config_parse(BT_CONFIG_SIMPLE);
int v6 = 0;
for (int round = 0; round < TESTS_NUM; round++)
{
@ -490,7 +479,6 @@ t_match_outer_net(void)
}
v6 = !v6;
bt_bird_cleanup();
return 1;
}
@ -551,34 +539,24 @@ benchmark_trie_dataset(const char *filename, int plus)
static int UNUSED
t_bench_trie_datasets_subset(void)
{
bt_bird_init();
bt_config_parse(BT_CONFIG_SIMPLE);
/* Specific datasets, not included */
benchmark_trie_dataset("trie-data-bgp-1", 0);
benchmark_trie_dataset("trie-data-bgp-10", 0);
benchmark_trie_dataset("trie-data-bgp-100", 0);
benchmark_trie_dataset("trie-data-bgp-1000", 0);
bt_bird_cleanup();
return 1;
}
static int UNUSED
t_bench_trie_datasets_random(void)
{
bt_bird_init();
bt_config_parse(BT_CONFIG_SIMPLE);
/* Specific datasets, not included */
benchmark_trie_dataset("trie-data-bgp-1", 1);
benchmark_trie_dataset("trie-data-bgp-10", 1);
benchmark_trie_dataset("trie-data-bgp-100", 1);
benchmark_trie_dataset("trie-data-bgp-1000", 1);
bt_bird_cleanup();
return 1;
}
@ -586,9 +564,6 @@ t_bench_trie_datasets_random(void)
static int
t_trie_same(void)
{
bt_bird_init();
bt_config_parse(BT_CONFIG_SIMPLE);
int v6 = 0;
for (int round = 0; round < TESTS_NUM*4; round++)
{
@ -609,7 +584,6 @@ t_trie_same(void)
tmp_flush();
}
bt_bird_cleanup();
return 1;
}
@ -629,9 +603,6 @@ log_networks(const net_addr *a, const net_addr *b)
static int
t_trie_walk(void)
{
bt_bird_init();
bt_config_parse(BT_CONFIG_SIMPLE);
for (int round = 0; round < TESTS_NUM*8; round++)
{
int level = round / TESTS_NUM;
@ -740,7 +711,6 @@ t_trie_walk(void)
tmp_flush();
}
bt_bird_cleanup();
return 1;
}
@ -779,9 +749,6 @@ find_covering_nets(struct f_prefix *prefixes, int num, const net_addr *net, net_
static int
t_trie_walk_to_root(void)
{
bt_bird_init();
bt_config_parse(BT_CONFIG_SIMPLE);
for (int round = 0; round < TESTS_NUM * 4; round++)
{
int level = round / TESTS_NUM;
@ -853,7 +820,6 @@ t_trie_walk_to_root(void)
tmp_flush();
}
bt_bird_cleanup();
return 1;
}
@ -861,6 +827,8 @@ int
main(int argc, char *argv[])
{
bt_init(argc, argv);
bt_bird_init();
bt_config_parse(BT_CONFIG_SIMPLE);
bt_test_suite(t_match_random_net, "Testing random prefix matching");
bt_test_suite(t_match_inner_net, "Testing random inner prefix matching");

View File

@ -1,7 +1,7 @@
src := bitmap.c bitops.c blake2s.c blake2b.c checksum.c event.c flowspec.c idm.c ip.c lists.c mac.c md5.c mempool.c net.c patmatch.c printf.c resource.c sha1.c sha256.c sha512.c slab.c slists.c strtoul.c tbf.c timer.c xmalloc.c
src := a-path.c a-set.c bitmap.c bitops.c blake2s.c blake2b.c checksum.c event.c flowspec.c idm.c ip.c lists.c mac.c md5.c mempool.c net.c netindex.c patmatch.c printf.c rcu.c resource.c sha1.c sha256.c sha512.c slab.c slists.c strtoul.c tbf.c timer.c xmalloc.c
obj := $(src-o-files)
$(all-daemon)
tests_src := bitmap_test.c heap_test.c buffer_test.c event_test.c flowspec_test.c bitops_test.c patmatch_test.c fletcher16_test.c slist_test.c checksum_test.c lists_test.c mac_test.c ip_test.c hash_test.c printf_test.c slab_test.c
tests_src := a-set_test.c a-path_test.c attribute_cleanup_test.c bitmap_test.c heap_test.c buffer_test.c event_test.c flowspec_test.c bitops_test.c patmatch_test.c fletcher16_test.c slist_test.c checksum_test.c lists_test.c locking_test.c mac_test.c ip_test.c hash_test.c printf_test.c slab_test.c tlists_test.c type_test.c
tests_targets := $(tests_targets) $(tests-target-files)
tests_objs := $(tests_objs) $(src-o-files)

View File

@ -9,7 +9,7 @@
#include "nest/bird.h"
#include "nest/route.h"
#include "nest/attrs.h"
#include "lib/attrs.h"
#include "lib/resource.h"
#include "lib/unaligned.h"
#include "lib/string.h"

View File

@ -10,7 +10,7 @@
#include "test/bt-utils.h"
#include "nest/route.h"
#include "nest/attrs.h"
#include "lib/attrs.h"
#include "lib/resource.h"
#include "filter/data.h"
@ -78,13 +78,13 @@ t_path_format(void)
bt_debug("Prepending ASN: %10u \n", i);
}
#define BUFFER_SIZE 120
byte buf[BUFFER_SIZE] = {};
#define T_BUFFER_SIZE 120
byte buf[T_BUFFER_SIZE] = {};
as_path_format(&empty_as_path, buf, BUFFER_SIZE);
as_path_format(&empty_as_path, buf, T_BUFFER_SIZE);
bt_assert_msg(strcmp(buf, "") == 0, "Buffer(%zu): '%s'", strlen(buf), buf);
as_path_format(as_path, buf, BUFFER_SIZE);
as_path_format(as_path, buf, T_BUFFER_SIZE);
bt_assert_msg(strcmp(buf, "4294967294 4294967293 4294967292 4294967291 4294967290 4294967289 4294967288 4294967287 4294967286 4294967285") == 0, "Buffer(%zu): '%s'", strlen(buf), buf);
#define SMALL_BUFFER_SIZE 25

View File

@ -11,7 +11,7 @@
#include "nest/bird.h"
#include "nest/route.h"
#include "nest/attrs.h"
#include "lib/attrs.h"
#include "lib/resource.h"
#include "lib/string.h"

View File

@ -11,7 +11,7 @@
#include "lib/net.h"
#include "nest/route.h"
#include "nest/attrs.h"
#include "lib/attrs.h"
#include "lib/resource.h"
#define SET_SIZE 10
@ -20,8 +20,8 @@ static const struct adata *set_sequence_same; /* <0; SET_SIZE) */
static const struct adata *set_sequence_higher; /* <SET_SIZE; 2*SET_SIZE) */
static const struct adata *set_random;
#define BUFFER_SIZE 1000
static byte buf[BUFFER_SIZE] = {};
#define T_BUFFER_SIZE 1000
static byte buf[T_BUFFER_SIZE] = {};
#define SET_SIZE_FOR_FORMAT_OUTPUT 10
@ -92,11 +92,11 @@ t_set_int_union(void)
const struct adata *set_union;
set_union = int_set_union(tmp_linpool, set_sequence, set_sequence_same);
bt_assert(int_set_get_size(set_union) == SET_SIZE);
bt_assert(int_set_format(set_union, 0, 2, buf, BUFFER_SIZE) == 0);
bt_assert(int_set_format(set_union, 0, 2, buf, T_BUFFER_SIZE) == 0);
set_union = int_set_union(tmp_linpool, set_sequence, set_sequence_higher);
bt_assert_msg(int_set_get_size(set_union) == SET_SIZE*2, "int_set_get_size(set_union) %d, SET_SIZE*2 %d", int_set_get_size(set_union), SET_SIZE*2);
bt_assert(int_set_format(set_union, 0, 2, buf, BUFFER_SIZE) == 0);
bt_assert(int_set_format(set_union, 0, 2, buf, T_BUFFER_SIZE) == 0);
return 1;
}
@ -106,15 +106,15 @@ t_set_int_format(void)
{
generate_set_sequence(SET_TYPE_INT, SET_SIZE_FOR_FORMAT_OUTPUT);
bt_assert(int_set_format(set_sequence, 0, 0, buf, BUFFER_SIZE) == 0);
bt_assert(int_set_format(set_sequence, 0, 0, buf, T_BUFFER_SIZE) == 0);
bt_assert(strcmp(buf, "0.0.0.0 0.0.0.1 0.0.0.2 0.0.0.3 0.0.0.4 0.0.0.5 0.0.0.6 0.0.0.7 0.0.0.8 0.0.0.9") == 0);
bzero(buf, BUFFER_SIZE);
bt_assert(int_set_format(set_sequence, 0, 2, buf, BUFFER_SIZE) == 0);
bzero(buf, T_BUFFER_SIZE);
bt_assert(int_set_format(set_sequence, 0, 2, buf, T_BUFFER_SIZE) == 0);
bt_assert(strcmp(buf, "0.0.0.2 0.0.0.3 0.0.0.4 0.0.0.5 0.0.0.6 0.0.0.7 0.0.0.8 0.0.0.9") == 0);
bzero(buf, BUFFER_SIZE);
bt_assert(int_set_format(set_sequence, 1, 0, buf, BUFFER_SIZE) == 0);
bzero(buf, T_BUFFER_SIZE);
bt_assert(int_set_format(set_sequence, 1, 0, buf, T_BUFFER_SIZE) == 0);
bt_assert(strcmp(buf, "(0,0) (0,1) (0,2) (0,3) (0,4) (0,5) (0,6) (0,7) (0,8) (0,9)") == 0);
return 1;
@ -174,11 +174,11 @@ t_set_ec_union(void)
const struct adata *set_union;
set_union = ec_set_union(tmp_linpool, set_sequence, set_sequence_same);
bt_assert(ec_set_get_size(set_union) == SET_SIZE);
bt_assert(ec_set_format(set_union, 0, buf, BUFFER_SIZE) == 0);
bt_assert(ec_set_format(set_union, 0, buf, T_BUFFER_SIZE) == 0);
set_union = ec_set_union(tmp_linpool, set_sequence, set_sequence_higher);
bt_assert_msg(ec_set_get_size(set_union) == SET_SIZE*2, "ec_set_get_size(set_union) %d, SET_SIZE*2 %d", ec_set_get_size(set_union), SET_SIZE*2);
bt_assert(ec_set_format(set_union, 0, buf, BUFFER_SIZE) == 0);
bt_assert(ec_set_format(set_union, 0, buf, T_BUFFER_SIZE) == 0);
return 1;
}
@ -194,7 +194,7 @@ t_set_ec_format(void)
for (i = 1; i < SET_SIZE_FOR_FORMAT_OUTPUT; i++)
set_sequence = ec_set_add(tmp_linpool, set_sequence, i + ((i%2) ? ((u64)EC_RO << 48) : ((u64)EC_RT << 48)));
bt_assert(ec_set_format(set_sequence, 0, buf, BUFFER_SIZE) == 0);
bt_assert(ec_set_format(set_sequence, 0, buf, T_BUFFER_SIZE) == 0);
bt_assert_msg(strcmp(buf, "(unknown 0x0, 0, 0) (ro, 0, 1) (rt, 0, 2) (ro, 0, 3) (rt, 0, 4) (ro, 0, 5) (rt, 0, 6) (ro, 0, 7) (rt, 0, 8) (ro, 0, 9)") == 0,
"ec_set_format() returns '%s'", buf);
@ -221,6 +221,7 @@ t_set_ec_delete(void)
return 1;
}
int
main(int argc, char *argv[])
{

View File

@ -0,0 +1,82 @@
/*
* BIRD Library -- Auto storage attribute cleanup test
*
* (c) 2023 Maria Matejka <mq@jmq.cz>
* (c) 2023 CZ.NIC z.s.p.o.
*
* Can be freely distributed and used under the terms of the GNU GPL.
*/
#include "test/birdtest.h"
static int order_pos;
#define CHECK(n) bt_assert(order_pos++ == (n))
static void
tacd_cleanup(int *val)
{
CHECK(*val);
}
static void
tacd_aux(int pos)
{
CHECK(pos + 0);
UNUSED CLEANUP(tacd_cleanup) int upmost = pos + 18;
if (order_pos > 0)
{
CHECK(pos + 1);
UNUSED CLEANUP(tacd_cleanup) int inner_if = pos + 3;
CHECK(pos + 2);
}
for (int i=0; i<3; i++)
{
CHECK(pos + 4 + 3*i);
UNUSED CLEANUP(tacd_cleanup) int inner_for = pos + 6 + 3*i;
CHECK(pos + 5 + 3*i);
}
for (
CLEANUP(tacd_cleanup) int i = pos + 15;
i < pos + 16; i++)
{
CHECK(pos + 13);
UNUSED CLEANUP(tacd_cleanup) int inner_for = pos + 15;
CHECK(pos + 14);
}
CHECK(pos + 17);
}
#define CHECKCNT 19
static int
t_attribute_cleanup(void)
{
order_pos = 0;
CHECK(0);
for (int i=0; i<3; i++)
{
CHECK(i*(CHECKCNT+3) + 1);
UNUSED CLEANUP(tacd_cleanup) int inner_for = (i+1) * (CHECKCNT+3);
tacd_aux(i*(CHECKCNT+3) + 2);
CHECK((i+1) * (CHECKCNT+3) - 1);
}
CHECK(3 * (CHECKCNT+3) + 1);
return 1;
}
int main(int argc, char **argv)
{
bt_init(argc, argv);
bt_test_suite(t_attribute_cleanup, "Basic usability of the cleanup attribute");
return bt_exit_value();
}

View File

@ -11,7 +11,39 @@
#include <stdint.h>
#include "lib/unaligned.h"
#include "nest/route.h"
typedef struct adata {
uint length; /* Length of data */
byte data[0];
} adata;
#define ADATA_SIZE(s) BIRD_CPU_ALIGN(sizeof(struct adata) + s)
extern const adata null_adata; /* adata of length 0 */
static inline struct adata *
lp_alloc_adata(struct linpool *pool, uint len)
{
struct adata *ad = lp_alloc(pool, sizeof(struct adata) + len);
ad->length = len;
return ad;
}
static inline struct adata *
lp_store_adata(struct linpool *pool, const void *buf, uint len)
{
struct adata *ad = lp_alloc_adata(pool, len);
memcpy(ad->data, buf, len);
return ad;
}
#define tmp_alloc_adata(len) lp_alloc_adata(tmp_linpool, len)
#define tmp_store_adata(buf, len) lp_store_adata(tmp_linpool, buf, len)
#define tmp_copy_adata(ad) tmp_store_adata((ad)->data, (ad)->length)
static inline int adata_same(const struct adata *a, const struct adata *b)
{ return (!a && !b) || (a->length == b->length && !memcmp(a->data, b->data, a->length)); }
/* a-path.c */
@ -239,7 +271,6 @@ int lc_set_max(const struct adata *list, lcomm *val);
int int_set_walk(const struct adata *list, uint *pos, u32 *val);
int ec_set_walk(const struct adata *list, uint *pos, u64 *val);
int lc_set_walk(const struct adata *list, uint *pos, lcomm *val);
int rte_set_walk(const struct adata *list, u32 *pos, struct rte **val);
void ec_set_sort_x(struct adata *set); /* Sort in place */

View File

@ -9,16 +9,44 @@
#ifndef _BIRD_BIRDLIB_H_
#define _BIRD_BIRDLIB_H_
#include <stddef.h>
#include "sysdep/config.h"
#include "lib/alloca.h"
#include "lib/macro.h"
/* Ugly structure offset handling macros */
struct align_probe { char x; long int y; };
#define SAME_TYPE(a, b) ({ int _ = ((a) != (b)); !_; })
#define TYPE_CAST(from, to, what) ( SAME_TYPE(((from) NULL), (what)), ((to) (what)))
#ifdef offsetof
#define OFFSETOF offsetof
#else
#define OFFSETOF(s, i) ((size_t) &((s *)0)->i)
#define SKIP_BACK(s, i, p) ((s *)((char *)p - OFFSETOF(s, i)))
#endif
#define SKIP_BACK(s, i, p) ({ \
typeof(p) _orig = p; \
s *_ptr = ((s *)((char *)_orig - OFFSETOF(s, i))); \
SAME_TYPE(&_ptr->i, _orig); \
_ptr; })
#define BIRD_ALIGN(s, a) (((s)+a-1)&~(a-1))
#define CPU_STRUCT_ALIGN (sizeof(struct align_probe))
#define CPU_STRUCT_ALIGN (MAX_(_Alignof(void*), _Alignof(u64)))
#define BIRD_CPU_ALIGN(s) BIRD_ALIGN((s), CPU_STRUCT_ALIGN)
/* Structure item alignment macros */
#define PADDING_NAME(id) _padding_##id
#define PADDING_(id, sz) u8 PADDING_NAME(id)[sz]
#if CPU_POINTER_ALIGNMENT == 4
#define PADDING(id, n32, n64) PADDING_(id, n32)
#elif CPU_POINTER_ALIGNMENT == 8
#define PADDING(id, n32, n64) PADDING_(id, n64)
#else
#error "Strange CPU pointer alignment: " CPU_POINTER_ALIGNMENT
#endif
/* Utility macros */
@ -66,6 +94,10 @@ static inline int u64_cmp(u64 i1, u64 i2)
#define BIT32R_CLR(b,p) ((b)[(p)/32] &= ~BIT32R_VAL(p))
#define BIT32R_ZERO(b,l) memset((b), 0, (l)/8)
/* Short Bitmask Constructor */
#define BIT32_ALL_HELPER(x) (1 << (x)) |
#define BIT32_ALL(...) (MACRO_FOREACH(BIT32_ALL_HELPER, __VA_ARGS__) 0)
#ifndef NULL
#define NULL ((void *) 0)
#endif
@ -73,17 +105,15 @@ static inline int u64_cmp(u64 i1, u64 i2)
/* Macros for gcc attributes */
#define NORET __attribute__((noreturn))
#define USE_RESULT __atribute__((warn_unused_result))
#define UNUSED __attribute__((unused))
#define PACKED __attribute__((packed))
#define NONNULL(...) __attribute__((nonnull((__VA_ARGS__))))
#define CLEANUP(fun) __attribute__((cleanup(fun)))
#define STATIC_ASSERT(EXP) _Static_assert(EXP, #EXP)
#define STATIC_ASSERT_MSG(EXP,MSG) _Static_assert(EXP, MSG)
#ifndef HAVE_THREAD_LOCAL
#define _Thread_local
#endif
/* Microsecond time */
typedef s64 btime;
@ -95,6 +125,7 @@ typedef s64 btime;
#define TO_S /1000000
#define TO_MS /1000
#define TO_US /1
#define TO_NS * (btime) 1000
#ifndef PARSER
#define S S_
@ -160,6 +191,25 @@ typedef struct buffer {
byte *end;
} buffer;
#define LOG_BUFFER_SIZE 2560
enum log_buffer_pos {
LBP_TIMESTAMP = 0,
LBP_UDP_HEADER,
LBP_THREAD_ID,
LBP_CLASS,
LBP_MSG,
LBP__MAX,
LBPP_TERMINAL,
};
typedef struct log_buffer {
struct buffer buf;
byte *pos[LBP__MAX+1];
int class;
char block[LOG_BUFFER_SIZE];
} log_buffer;
#define STACK_BUFFER_INIT(buf,size) \
do { \
buf.start = alloca(size); \
@ -167,13 +217,9 @@ typedef struct buffer {
buf.end = buf.start + size; \
} while(0)
#define LOG_BUFFER_INIT(buf) \
STACK_BUFFER_INIT(buf, LOG_BUFFER_SIZE)
#define LOG_BUFFER_SIZE 1024
#define log log_msg
void log_commit(int class, buffer *buf);
void log_prepare(log_buffer *buf, int class);
void log_commit(log_buffer *buf);
void log_msg(const char *msg, ...);
void log_rl(struct tbf *rl, const char *msg, ...);
void die(const char *msg, ...) NORET;
@ -192,13 +238,23 @@ void bug(const char *msg, ...) NORET;
void debug(const char *msg, ...); /* Printf to debug output */
void debug_safe(const char *msg); /* Printf to debug output, async-safe */
/* Internal thread ID, useful for logging */
extern _Atomic uint max_thread_id;
extern _Thread_local uint this_thread_id;
#define THIS_THREAD_ID (this_thread_id ?: (this_thread_id = atomic_fetch_add_explicit(&max_thread_id, 1, memory_order_acq_rel)))
/* Debugging */
#if defined(LOCAL_DEBUG) || defined(GLOBAL_DEBUG)
#define DBG(x, y...) debug(x, ##y)
#define DBGL(x, y...) debug(x "\n", ##y)
#elif defined(DEBUG_TO_LOG)
#define DBG(...) do { } while (0)
#define DBGL(...) log(L_DEBUG __VA_ARGS__)
#else
#define DBG(x, y...) do { } while(0)
#define DBG(...) do { } while(0)
#define DBGL(...) do { } while (0)
#endif
#define ASSERT_DIE(x) do { if (!(x)) bug("Assertion '%s' failed at %s:%d", #x, __FILE__, __LINE__); } while(0)

View File

@ -36,6 +36,8 @@ bmap_reset(struct bmap *b, uint size)
void
bmap_grow(struct bmap *b, uint need)
{
ASSERT_DIE(b->size);
uint size = b->size * 2;
while (size < need)
size *= 2;

View File

@ -10,6 +10,8 @@
#ifndef _BIRD_BITMAP_H_
#define _BIRD_BITMAP_H_
#include "lib/resource.h"
struct bmap
{
u32 size;

View File

@ -19,31 +19,163 @@
* events in them and explicitly ask to run them.
*/
#undef LOCAL_DEBUG
#include "nest/bird.h"
#include "lib/event.h"
#include "lib/io-loop.h"
event_list global_event_list;
event_list global_work_list;
//#ifdef DEBUGGING
#if 0
#define EDL_MAX 16384
enum edl_caller {
EDL_REMOVE_FROM = 1,
EDL_POSTPONE = 2,
EDL_RUN = 3,
EDL_SEND = 4,
EDL_RUN_LIST = 5,
} caller;
static struct event_debug_log {
event_list *target_list;
event *event;
event *receiver;
uint pos;
uint prev_edl_pos;
uint thread;
enum edl_caller caller;
} edl[EDL_MAX];
static _Atomic uint edl_cnt;
_Thread_local static uint edl_thread;
_Thread_local static uint prev_edl_pos = ~0;
static inline void edlog(event_list *list, event *e, event *receiver, uint pos, enum edl_caller caller)
{
uint edl_pos = atomic_fetch_add_explicit(&edl_cnt, 1, memory_order_acq_rel);
if (!edl_thread)
edl_thread = edl_pos;
edl[edl_pos % EDL_MAX] = (struct event_debug_log) {
.target_list = list,
.event = e,
.receiver = receiver,
.pos = pos,
.prev_edl_pos = prev_edl_pos,
.thread = edl_thread,
.caller = caller,
};
prev_edl_pos = edl_pos;
}
#else
#define edlog(...)
#endif
void
ev_init_list(event_list *el, struct birdloop *loop, const char *name)
{
el->name = name;
el->loop = loop;
atomic_store_explicit(&el->receiver, NULL, memory_order_release);
atomic_store_explicit(&el->_executor, NULL, memory_order_release);
}
/*
* The event list should work as a message passing point. Sending a message
* must be a fairly fast process with no locks and low waiting times. OTOH,
* processing messages always involves running the assigned code and the
* receiver is always a single one thread with no concurrency at all. There is
* also a postponing requirement to synchronously remove an event from a queue,
* yet we allow this only when the caller has its receiver event loop locked.
* It still means that the event may get postponed from other event in the same
* list, therefore we have to be careful.
*/
static inline int
ev_remove_from(event *e, event * _Atomic * head)
{
/* The head pointer stores where cur is pointed to from */
event * _Atomic *prev = head;
/* The current event in queue to check */
event *cur = atomic_load_explicit(prev, memory_order_acquire);
/* This part of queue is empty! */
if (!cur)
return 0;
edlog(NULL, e, cur, 1, EDL_REMOVE_FROM);
while (cur)
{
/* Pre-loaded next pointer */
event *next = atomic_load_explicit(&cur->next, memory_order_acquire);
if (e == cur)
{
edlog(NULL, e, next, 3, EDL_REMOVE_FROM);
/* Check whether we have collided with somebody else
* adding an item to the queue. */
if (!atomic_compare_exchange_strong_explicit(
prev, &cur, next,
memory_order_acq_rel, memory_order_acquire))
{
/* This may happen only on list head */
ASSERT_DIE(prev == head);
/* Restart. The collision should never happen again. */
return ev_remove_from(e, head);
}
/* Successfully removed from the list; inactivate this event. */
atomic_store_explicit(&cur->next, NULL, memory_order_release);
return 1;
}
edlog(NULL, e, next, 2, EDL_REMOVE_FROM);
/* Go to the next event. */
prev = &cur->next;
cur = next;
}
edlog(NULL, e, cur, 4, EDL_REMOVE_FROM);
return 0;
}
inline void
ev_postpone(event *e)
{
if (ev_active(e))
{
rem_node(&e->n);
e->n.next = NULL;
}
/* Find the list to remove the event from */
event_list *sl = ev_get_list(e);
edlog(sl, e, NULL, 1, EDL_POSTPONE);
if (!sl)
return;
/* Postponing allowed only from the target loop */
ASSERT_DIE(birdloop_inside(sl->loop));
/* Remove from one of these lists. */
ASSERT(ev_remove_from(e, &sl->_executor) || ev_remove_from(e, &sl->receiver));
/* Mark as inactive */
ASSERT_DIE(sl == atomic_exchange_explicit(&e->list, NULL, memory_order_acq_rel));
edlog(sl, e, NULL, 2, EDL_POSTPONE);
}
static void
ev_dump(resource *r)
ev_dump(resource *r, unsigned indent UNUSED)
{
event *e = (event *) r;
debug("(code %p, data %p, %s)\n",
e->hook,
e->data,
e->n.next ? "scheduled" : "inactive");
atomic_load_explicit(&e->next, memory_order_relaxed) ? "scheduled" : "inactive");
}
static struct resclass ev_class = {
@ -82,8 +214,10 @@ ev_new(pool *p)
inline void
ev_run(event *e)
{
edlog(NULL, e, NULL, 1, EDL_RUN);
ev_postpone(e);
e->hook(e->data);
edlog(NULL, e, NULL, 2, EDL_RUN);
}
/**
@ -95,40 +229,39 @@ ev_run(event *e)
* list @l which can be run by calling ev_run_list().
*/
inline void
ev_enqueue(event_list *l, event *e)
ev_send(event_list *l, event *e)
{
ev_postpone(e);
add_tail(l, &e->n);
}
edlog(l, e, NULL, 1, EDL_SEND);
/* Set the target list */
event_list *ol = NULL;
if (!atomic_compare_exchange_strong_explicit(
&e->list, &ol, l,
memory_order_acq_rel, memory_order_acquire))
if (ol == l)
return;
else
bug("Queuing an already queued event to another queue is not supported.");
/**
* ev_schedule - schedule an event
* @e: an event
*
* This function schedules an event by enqueueing it to a system-wide
* event list which is run by the platform dependent code whenever
* appropriate.
*/
void
ev_schedule(event *e)
{
ev_enqueue(&global_event_list, e);
}
/* Here should be no concurrent senders */
event *next = atomic_load_explicit(&l->receiver, memory_order_acquire);
edlog(l, e, next, 2, EDL_SEND);
event *old_next = NULL;
do
if (!atomic_compare_exchange_strong_explicit(
&e->next, &old_next, next,
memory_order_acq_rel, memory_order_acquire))
bug("Event %p in inconsistent state");
else
{
old_next = next;
edlog(l, old_next, next, 3, EDL_SEND);
}
while (!atomic_compare_exchange_strong_explicit(
&l->receiver, &next, e,
memory_order_acq_rel, memory_order_acquire));
/**
* ev_schedule_work - schedule a work-event.
* @e: an event
*
* This function schedules an event by enqueueing it to a system-wide work-event
* list which is run by the platform dependent code whenever appropriate. This
* is designated for work-events instead of regular events. They are executed
* less often in order to not clog I/O loop.
*/
void
ev_schedule_work(event *e)
{
if (!ev_active(e))
add_tail(&global_work_list, &e->n);
edlog(l, e, next, 4, EDL_SEND);
if (l->loop) birdloop_ping(l->loop);
}
void io_log_event(void *hook, void *data);
@ -139,63 +272,67 @@ void io_log_event(void *hook, void *data);
*
* This function calls ev_run() for all events enqueued in the list @l.
*/
int
ev_run_list(event_list *l)
{
node *n;
list tmp_list;
init_list(&tmp_list);
add_tail_list(&tmp_list, l);
init_list(l);
WALK_LIST_FIRST(n, tmp_list)
{
event *e = SKIP_BACK(event, n, n);
/* This is ugly hack, we want to log just events executed from the main I/O loop */
if ((l == &global_event_list) || (l == &global_work_list))
io_log_event(e->hook, e->data);
ev_run(e);
tmp_flush();
}
return !EMPTY_LIST(*l);
}
int
ev_run_list_limited(event_list *l, uint limit)
{
node *n;
list tmp_list;
event * _Atomic *ep = &l->_executor;
edlog(l, NULL, NULL, 1, EDL_RUN_LIST);
init_list(&tmp_list);
add_tail_list(&tmp_list, l);
init_list(l);
/* No pending events, refill the queue. */
if (!atomic_load_explicit(ep, memory_order_acquire))
{
/* Move the current event list aside and create a new one. */
event *received = atomic_exchange_explicit(&l->receiver, NULL, memory_order_acq_rel);
edlog(l, NULL, received, 2, EDL_RUN_LIST);
WALK_LIST_FIRST(n, tmp_list)
/* No event to run. */
if (!received)
return 0;
/* Setup the executor queue */
event *head = NULL;
/* Flip the order of the events by relinking them one by one (push-pop) */
while (received)
{
event *e = SKIP_BACK(event, n, n);
event *cur = received;
received = atomic_exchange_explicit(&cur->next, head, memory_order_acq_rel);
edlog(l, head, received, 3, EDL_RUN_LIST);
head = cur;
}
if (!limit)
break;
/* Store the executor queue to its designated place */
ASSERT_DIE(atomic_exchange_explicit(ep, head, memory_order_acq_rel) == NULL);
edlog(l, NULL, head, 4, EDL_RUN_LIST);
}
/* Run the events in order. */
event *e;
while (e = atomic_load_explicit(ep, memory_order_acquire))
{
edlog(l, e, NULL, 5, EDL_RUN_LIST);
/* Check limit */
if (!--limit)
return 1;
/* This is ugly hack, we want to log just events executed from the main I/O loop */
if ((l == &global_event_list) || (l == &global_work_list))
io_log_event(e->hook, e->data);
ev_run(e);
edlog(l, e, NULL, 6, EDL_RUN_LIST);
/* Inactivate the event */
event *next = atomic_load_explicit(&e->next, memory_order_relaxed);
ASSERT_DIE(e == atomic_exchange_explicit(ep, next, memory_order_acq_rel));
ASSERT_DIE(next == atomic_exchange_explicit(&e->next, NULL, memory_order_acq_rel));
ASSERT_DIE(l == atomic_exchange_explicit(&e->list, NULL, memory_order_acq_rel));
edlog(l, e, next, 7, EDL_RUN_LIST);
/* Run the event */
e->hook(e->data);
tmp_flush();
limit--;
edlog(l, e, next, 8, EDL_RUN_LIST);
}
if (!EMPTY_LIST(tmp_list))
{
/* Attach new items after the unprocessed old items */
add_tail_list(&tmp_list, l);
init_list(l);
add_tail_list(l, &tmp_list);
}
return !EMPTY_LIST(*l);
return !!atomic_load_explicit(&l->receiver, memory_order_acquire);
}

View File

@ -10,33 +10,57 @@
#define _BIRD_EVENT_H_
#include "lib/resource.h"
#include "lib/locking.h"
#include <stdatomic.h>
struct birdloop;
typedef struct event {
resource r;
void (*hook)(void *);
void *data;
node n; /* Internal link */
struct event * _Atomic next;
struct event_list * _Atomic list;
} event;
typedef list event_list;
typedef struct event_list {
event * _Atomic receiver; /* Event receive list */
event * _Atomic _executor; /* Event execute list */
const char *name;
struct birdloop *loop; /* The executor loop */
} event_list;
extern event_list global_event_list;
extern event_list global_work_list;
event *ev_new(pool *);
void ev_run(event *);
#define ev_init_list(el) init_list(el)
void ev_init_list(event_list *, struct birdloop *loop, const char *name);
void ev_enqueue(event_list *, event *);
void ev_schedule(event *);
void ev_schedule_work(event *);
#define ev_send ev_enqueue
#define ev_send_loop(l, e) ev_send(birdloop_event_list((l)), (e))
#define ev_schedule(e) ({ ASSERT_THE_BIRD_LOCKED; if (!ev_active((e))) ev_send(&global_event_list, (e)); })
#define ev_schedule_work(e) ({ ASSERT_THE_BIRD_LOCKED; if (!ev_active((e))) ev_send(&global_work_list, (e)); })
void ev_postpone(event *);
int ev_run_list(event_list *);
int ev_run_list_limited(event_list *, uint);
#define ev_run_list(l) ev_run_list_limited((l), ~0)
#define ev_list_empty(l) !ev_run_list_limited((l), 0)
#define LEGACY_EVENT_LIST(l) (((l) == &global_event_list) || ((l) == &global_work_list))
static inline int
ev_active(event *e)
{
return e->n.next != NULL;
return atomic_load_explicit(&e->list, memory_order_acquire) != NULL;
}
static inline event_list *
ev_get_list(event *e)
{
return atomic_load_explicit(&e->list, memory_order_acquire);
}
static inline event*
@ -48,5 +72,8 @@ ev_new_init(pool *p, void (*hook)(void *), void *data)
return e;
}
#define ev_new_send(loop, pool, hook, data) \
ev_send_loop((loop), ev_new_init((pool), (hook), (data)))
#endif

View File

@ -54,9 +54,8 @@ t_ev_run_list(void)
int i;
olock_init();
timer_init();
io_init();
rt_init();
io_init();
if_init();
// roa_init();
config_init();

118
lib/fib.h Normal file
View File

@ -0,0 +1,118 @@
/*
* BIRD Internet Routing Daemon -- Network prefix storage
*
* (c) 1998--2000 Martin Mares <mj@ucw.cz>
* (c) 2022 Maria Matejka <mq@jmq.cz>
*
* Can be freely distributed and used under the terms of the GNU GPL.
*/
#ifndef _BIRD_LIB_FIB_H_
#define _BIRD_LIB_FIB_H_
/*
* BIRD FIBs are generic data structure for storing network prefixes.
* Also used for the master routing table. Currently implemented as
* a hash table.
*
* Available operations:
* - insertion of new entry
* - deletion of entry
* - searching for entry by network prefix
* - asynchronous retrieval of fib contents
*/
struct fib;
struct fib_node {
struct fib_node *next; /* Next in hash chain */
struct fib_iterator *readers; /* List of readers of this node */
net_addr addr[0];
};
struct fib_iterator { /* See lib/slists.h for an explanation */
struct fib_iterator *prev, *next; /* Must be synced with struct fib_node! */
byte efef; /* 0xff to distinguish between iterator and node */
byte pad[3];
struct fib_node *node; /* Or NULL if freshly merged */
uint hash;
};
typedef void (*fib_init_fn)(struct fib *, void *);
struct fib {
pool *fib_pool; /* Pool holding all our data */
slab *fib_slab; /* Slab holding all fib nodes */
struct fib_node **hash_table; /* Node hash table */
uint hash_size; /* Number of hash table entries (a power of two) */
uint hash_order; /* Binary logarithm of hash_size */
uint hash_shift; /* 32 - hash_order */
uint addr_type; /* Type of address data stored in fib (NET_*) */
uint node_size; /* FIB node size, 0 for nonuniform */
uint node_offset; /* Offset of fib_node struct inside of user data */
uint entries; /* Number of entries */
uint entries_min, entries_max; /* Entry count limits (else start rehashing) */
fib_init_fn init; /* Constructor */
};
static inline void * fib_node_to_user(struct fib *f, struct fib_node *e)
{ return e ? (void *) ((char *) e - f->node_offset) : NULL; }
static inline struct fib_node * fib_user_to_node(struct fib *f, void *e)
{ return e ? (void *) ((char *) e + f->node_offset) : NULL; }
void fib_init(struct fib *f, pool *p, uint addr_type, uint node_size, uint node_offset, uint hash_order, fib_init_fn init);
void *fib_find(struct fib *, const net_addr *); /* Find or return NULL if doesn't exist */
void *fib_get_chain(struct fib *f, const net_addr *a); /* Find first node in linked list from hash table */
void *fib_get(struct fib *, const net_addr *); /* Find or create new if nonexistent */
void *fib_route(struct fib *, const net_addr *); /* Longest-match routing lookup */
void fib_delete(struct fib *, void *); /* Remove fib entry */
void fib_free(struct fib *); /* Destroy the fib */
void fib_check(struct fib *); /* Consistency check for debugging */
void fit_init(struct fib_iterator *, struct fib *); /* Internal functions, don't call */
struct fib_node *fit_get(struct fib *, struct fib_iterator *);
void fit_put(struct fib_iterator *, struct fib_node *);
void fit_put_next(struct fib *f, struct fib_iterator *i, struct fib_node *n, uint hpos);
void fit_put_end(struct fib_iterator *i);
void fit_copy(struct fib *f, struct fib_iterator *dst, struct fib_iterator *src);
#define FIB_WALK(fib, type, z) do { \
struct fib_node *fn_, **ff_ = (fib)->hash_table; \
uint count_ = (fib)->hash_size; \
type *z; \
while (count_--) \
for (fn_ = *ff_++; z = fib_node_to_user(fib, fn_); fn_=fn_->next)
#define FIB_WALK_END } while (0)
#define FIB_ITERATE_INIT(it, fib) fit_init(it, fib)
#define FIB_ITERATE_START(fib, it, type, z) do { \
struct fib_node *fn_ = fit_get(fib, it); \
uint count_ = (fib)->hash_size; \
uint hpos_ = (it)->hash; \
type *z; \
for(;;fn_ = fn_->next) { \
while (!fn_ && ++hpos_ < count_) \
{ \
fn_ = (fib)->hash_table[hpos_]; \
} \
if (hpos_ >= count_) \
break; \
z = fib_node_to_user(fib, fn_);
#define FIB_ITERATE_END } } while(0)
#define FIB_ITERATE_PUT(it) fit_put(it, fn_)
#define FIB_ITERATE_PUT_NEXT(it, fib) fit_put_next(fib, it, fn_, hpos_)
#define FIB_ITERATE_PUT_END(it) fit_put_end(it)
#define FIB_ITERATE_UNLINK(it, fib) fit_get(fib, it)
#define FIB_ITERATE_COPY(dst, src, fib) fit_copy(fib, dst, src)
#endif

View File

@ -10,7 +10,7 @@
#ifndef _BIRD_HASH_H_
#define _BIRD_HASH_H_
#define HASH(type) struct { type **data; uint count, order; }
#define HASH(type) struct { type **data; uint count; u16 iterators; u8 order; u8 down_requested:1; }
#define HASH_TYPE(v) typeof(** (v).data)
#define HASH_SIZE(v) (1U << (v).order)
@ -31,10 +31,15 @@
(v) = (typeof(v)){ }; \
})
#define HASH_FIND(v,id,key...) \
#define HASH_FIND_CHAIN(v,id,key...) \
({ \
u32 _h = HASH_FN(v, id, key); \
HASH_TYPE(v) *_n = (v).data[_h]; \
(v).data[_h]; \
})
#define HASH_FIND(v,id,key...) \
({ \
HASH_TYPE(v) *_n = HASH_FIND_CHAIN(v, id, key); \
while (_n && !HASH_EQ(v, id, id##_KEY(_n), key)) \
_n = id##_NEXT(_n); \
_n; \
@ -125,20 +130,26 @@
#define HASH_MAY_STEP_DOWN_(v,pool,rehash_fn,args) \
({ \
if (((v).count < (HASH_SIZE(v) REHASH_LO_MARK(args))) && \
((v).order > (REHASH_LO_BOUND(args)))) \
if ((v).iterators) \
(v).down_requested = 1; \
else if (((v).count < (HASH_SIZE(v) REHASH_LO_MARK(args))) && \
((v).order > (REHASH_LO_BOUND(args)))) \
rehash_fn(&(v), pool, -(REHASH_LO_STEP(args))); \
})
#define HASH_MAY_RESIZE_DOWN_(v,pool,rehash_fn,args) \
({ \
uint _o = (v).order; \
while (((v).count < ((1U << _o) REHASH_LO_MARK(args))) && \
(_o > (REHASH_LO_BOUND(args)))) \
_o -= (REHASH_LO_STEP(args)); \
if (_o < (v).order) \
rehash_fn(&(v), pool, _o - (v).order); \
})
if ((v).iterators) \
(v).down_requested = 1; \
else { \
uint _o = (v).order; \
while (((v).count < ((1U << _o) REHASH_LO_MARK(args))) && \
(_o > (REHASH_LO_BOUND(args)))) \
_o -= (REHASH_LO_STEP(args)); \
if (_o < (v).order) \
rehash_fn(&(v), pool, _o - (v).order); \
} \
})
#define HASH_INSERT2(v,id,pool,node) \
@ -195,6 +206,20 @@
#define HASH_WALK_FILTER_END } while (0)
#define HASH_WALK_ITER(v, id, n, iter) \
do { \
uint _hash_walk_iter_put = 0; \
uint _shift = 32 - (v).order; \
for ( ; !_hash_walk_iter_put; (iter) += (1U << _shift)) { \
_hash_walk_iter_put = ((iter) + (1U << _shift) == 0); \
for (HASH_TYPE(v) *n = (v).data[(iter) >> _shift]; n; n = id##_NEXT((n)))\
if (HASH_FN(v, id, id##_KEY(n)) >= ((iter) >> _shift)) \
#define HASH_WALK_ITER_PUT (_hash_walk_iter_put = 1)
#define HASH_WALK_ITER_END } } while (0)
static inline void
mem_hash_init(u64 *h)
{
@ -245,7 +270,7 @@ mem_hash(const void *p, uint s)
}
static inline uint
ptr_hash(const void *ptr)
ptr_hash(void *ptr)
{
uintptr_t p = (uintptr_t) ptr;
return p ^ (p << 8) ^ (p >> 16);

View File

@ -61,7 +61,7 @@ dump_nodes(void)
static void
init_hash_(uint order)
{
my_pool = rp_new(&root_pool, "Test pool");
my_pool = rp_new(&root_pool, the_bird_domain.the_bird, "Test pool");
HASH_INIT(hash, my_pool, order);
@ -285,6 +285,46 @@ t_walk_filter(void)
return 1;
}
static int
t_walk_iter(void)
{
init_hash();
fill_hash();
u32 hit = 0;
u32 prev_hash = ~0;
for (uint cnt = 0; cnt < MAX_NUM; )
{
u32 last_hash = ~0;
// printf("PUT!\n");
HASH_WALK_ITER(hash, TEST, n, hit)
{
cnt++;
u32 cur_hash = HASH_FN(hash, TEST, n->key);
/*
printf("C%08x L%08x P%08x K%08x H%08x N%p S%d I%ld\n",
cur_hash, last_hash, prev_hash, n->key, hit, n, _shift, n - &nodes[0]);
*/
if (last_hash == ~0U)
{
if (prev_hash != ~0U)
bt_assert(prev_hash < cur_hash);
last_hash = prev_hash = cur_hash;
}
else
bt_assert(last_hash == cur_hash);
if (cnt < MAX_NUM)
HASH_WALK_ITER_PUT;
}
HASH_WALK_ITER_END;
}
return 1;
}
int
main(int argc, char *argv[])
{
@ -299,6 +339,7 @@ main(int argc, char *argv[])
bt_test_suite(t_walk_delsafe_remove, "HASH_WALK_DELSAFE and HASH_REMOVE");
bt_test_suite(t_walk_delsafe_remove2, "HASH_WALK_DELSAFE and HASH_REMOVE2. HASH_REMOVE2 is HASH_REMOVE and smart auto-resize function");
bt_test_suite(t_walk_filter, "HASH_WALK_FILTER");
bt_test_suite(t_walk_iter, "HASH_WALK_ITER");
return bt_exit_value();
}

View File

@ -22,6 +22,7 @@ idm_init(struct idm *m, pool *p, uint size)
m->used = 1;
m->size = size;
m->data = mb_allocz(p, m->size * sizeof(u32));
m->pool = p;
/* ID 0 is reserved */
m->data[0] = 1;
@ -34,6 +35,8 @@ idm_alloc(struct idm *m)
{
uint i, j;
ASSERT_DIE(DG_IS_LOCKED(m->pool->domain));
for (i = m->pos; i < m->size; i++)
if (m->data[i] != 0xffffffff)
goto found;
@ -67,6 +70,8 @@ found:
void
idm_free(struct idm *m, u32 id)
{
ASSERT_DIE(DG_IS_LOCKED(m->pool->domain));
uint i = id / 32;
uint j = id % 32;

View File

@ -12,6 +12,7 @@
struct idm
{
pool *pool;
u32 *data;
u32 pos;
u32 used;

70
lib/io-loop.h Normal file
View File

@ -0,0 +1,70 @@
/*
* BIRD -- I/O and event loop
*
* Can be freely distributed and used under the terms of the GNU GPL.
*/
#ifndef _BIRD_IO_LOOP_H_
#define _BIRD_IO_LOOP_H_
#include "nest/bird.h"
#include "lib/lists.h"
#include "lib/locking.h"
#include "lib/resource.h"
#include "lib/event.h"
#include "lib/socket.h"
extern struct birdloop main_birdloop;
/* Start a new birdloop owned by given pool and domain */
struct birdloop *birdloop_new(pool *p, uint order, btime max_latency, const char *fmt, ...);
/* Stop the loop. At the end, the @stopped callback is called unlocked in tail
* position to finish cleanup. Run birdloop_free() from that callback to free
* the loop itself. */
void birdloop_stop(struct birdloop *loop, void (*stopped)(void *data), void *data);
void birdloop_stop_self(struct birdloop *loop, void (*stopped)(void *data), void *data);
void birdloop_free(struct birdloop *loop);
/* Get birdloop's event list */
event_list *birdloop_event_list(struct birdloop *loop);
/* Get birdloop's time heap */
struct timeloop *birdloop_time_loop(struct birdloop *loop);
#define birdloop_domain(l) (birdloop_time_loop((l))->domain)
/* Get birdloop's pool */
pool *birdloop_pool(struct birdloop *loop);
/* Enter and exit the birdloop */
void birdloop_enter(struct birdloop *loop);
void birdloop_leave(struct birdloop *loop);
_Bool birdloop_inside(struct birdloop *loop);
void birdloop_mask_wakeups(struct birdloop *loop);
void birdloop_unmask_wakeups(struct birdloop *loop);
void birdloop_link(struct birdloop *loop);
void birdloop_unlink(struct birdloop *loop);
void birdloop_ping(struct birdloop *loop);
struct birdloop_flag_handler {
void (*hook)(struct birdloop_flag_handler *, u32 flags);
void *data;
};
void birdloop_flag(struct birdloop *loop, u32 flag);
void birdloop_flag_set_handler(struct birdloop *, struct birdloop_flag_handler *);
/* Setup sockets */
void birdloop_add_socket(struct birdloop *, struct birdsock *);
void birdloop_remove_socket(struct birdloop *, struct birdsock *);
void birdloop_init(void);
/* Yield for a little while. Use only in special cases. */
void birdloop_yield(void);
#endif /* _BIRD_IO_LOOP_H_ */

View File

@ -26,7 +26,7 @@
#define _BIRD_LISTS_C_
#include "nest/bird.h"
#include "lib/birdlib.h"
#include "lib/lists.h"
LIST_INLINE int
@ -37,9 +37,10 @@ check_list(list *l, node *n)
ASSERT_DIE(n);
ASSERT_DIE(n->prev);
do { n = n->prev; } while (n->prev);
node *nn = n;
do { nn = nn->prev; } while (nn->prev);
l = SKIP_BACK(list, head_node, n);
l = SKIP_BACK(list, head_node, nn);
}
int seen = 0;
@ -120,7 +121,7 @@ add_head(list *l, node *n)
LIST_INLINE void
insert_node(node *n, node *after)
{
EXPENSIVE_CHECK(check_list(l, after));
EXPENSIVE_CHECK((after->prev == NULL) || check_list(NULL, after));
ASSUME(n->prev == NULL);
ASSUME(n->next == NULL);
@ -141,7 +142,7 @@ insert_node(node *n, node *after)
LIST_INLINE void
rem_node(node *n)
{
EXPENSIVE_CHECK(check_list(NULL, n));
EXPENSIVE_CHECK((n->prev == n) && (n->next == n) || check_list(NULL, n));
node *z = n->prev;
node *x = n->next;

View File

@ -69,6 +69,18 @@ typedef union list { /* In fact two overlayed nodes */
#define EMPTY_LIST(list) (!(list).head->next)
static inline _Bool
enlisted(node *n)
{
switch ((!!n->next) + (!!n->prev))
{
case 0: return 0;
case 2: return 1;
case 1: bug("Garbled event list node");
}
bug("Maths is broken. And you should see a new heaven and a new earth: for the first heaven and the first earth had been passed away.");
}
#ifndef _BIRD_LISTS_C_
#define LIST_INLINE static inline

150
lib/lockfree.h Normal file
View File

@ -0,0 +1,150 @@
/*
* BIRD Library -- Generic lock-free structures
*
* (c) 2023 Maria Matejka <mq@jmq.cz>
* (c) 2023 CZ.NIC, z.s.p.o.
*
* Can be freely distributed and used under the terms of the GNU GPL.
*/
#ifndef _BIRD_LOCKFREE_H_
#define _BIRD_LOCKFREE_H_
#include "lib/event.h"
#include "lib/rcu.h"
#include <stdatomic.h>
/**
* Lock-free usecounts.
*/
struct lfuc {
_Atomic u64 uc;
};
#define LFUC_PU_SHIFT 44
#define LFUC_IN_PROGRESS (1ULL << LFUC_PU_SHIFT)
/**
* lfuc_lock - increase an atomic usecount
* @c: the usecount structure
*/
static inline u64 lfuc_lock(struct lfuc *c)
{
/* Locking is trivial; somebody already holds the underlying data structure
* so we just increase the use count. Nothing can be freed underneath our hands. */
u64 uc = atomic_fetch_add_explicit(&c->uc, 1, memory_order_acq_rel);
ASSERT_DIE(uc > 0);
return uc & (LFUC_IN_PROGRESS - 1);
}
/**
* lfuc_lock_revive - increase an atomic usecount even if it's zero
* @c: the usecount structure
*
* If the caller is sure that they can't collide with the prune routine,
* they can call this even on structures with already zeroed usecount.
* Handy for situations with flapping routes. Use only from the same
* loop as which runs the prune routine.
*/
static inline u64 lfuc_lock_revive(struct lfuc *c)
{
u64 uc = atomic_fetch_add_explicit(&c->uc, 1, memory_order_acq_rel);
return uc & (LFUC_IN_PROGRESS - 1);
}
/**
* lfuc_unlock - decrease an atomic usecount
* @c: the usecount structure
* @el: prune event list
* @ev: prune event itself
*
* If the usecount reaches zero, a prune event is run to possibly free the object.
* The prune event MUST use lfuc_finished() to check the object state.
*/
static inline u64 lfuc_unlock(struct lfuc *c, event_list *el, event *ev)
{
/* Unlocking is tricky. We do it lockless so at the same time, the prune
* event may be running, therefore if the unlock gets us to zero, it must be
* the last thing in this routine, otherwise the prune routine may find the
* source's usecount zeroed, freeing it prematurely.
*
* The usecount is split into two parts:
* the top 20 bits are an in-progress indicator
* the bottom 44 bits keep the actual usecount.
*
* Therefore at most 1 million of writers can simultaneously unlock the same
* structure, while at most ~17T different places can reference it. Both limits
* are insanely high from the 2022 point of view. Let's suppose that when 17T
* routes or 1M peers/tables get real, we get also 128bit atomic variables in the
* C norm. */
/* First, we push the in-progress indicator */
u64 uc = atomic_fetch_add_explicit(&c->uc, LFUC_IN_PROGRESS, memory_order_acq_rel);
/* Then we split the indicator to its parts. Remember, we got the value
* before the operation happened so we're re-doing the operation locally
* to get a view how the indicator _would_ look if nobody else was interacting.
*/
u64 pending = (uc >> LFUC_PU_SHIFT) + 1;
uc &= LFUC_IN_PROGRESS - 1;
/* We per-use the RCU critical section indicator to make the prune event wait
* until we finish here in the rare case we get preempted. */
rcu_read_lock();
/* Obviously, there can't be more pending unlocks than the usecount itself */
if (uc == pending)
/* If we're the last unlocker (every owner is already unlocking), schedule
* the owner's prune event */
ev_send(el, ev);
else
ASSERT_DIE(uc > pending);
/* And now, finally, simultaneously pop the in-progress indicator and the
* usecount, possibly allowing the pruning routine to free this structure */
uc = atomic_fetch_sub_explicit(&c->uc, LFUC_IN_PROGRESS + 1, memory_order_acq_rel);
/* ... and to reduce the load a bit, the pruning routine will better wait for
* RCU synchronization instead of a busy loop. */
rcu_read_unlock();
return uc - LFUC_IN_PROGRESS - 1;
}
/**
* lfuc_finished - auxiliary routine for prune event
* @c: usecount structure
*
* This routine simply waits until all unlockers finish their job and leave
* the critical section of lfuc_unlock(). Then we decide whether the usecount
* is indeed zero or not, and therefore whether the structure is free to be freed.
*/
static inline _Bool
lfuc_finished(struct lfuc *c)
{
u64 uc;
/* Wait until all unlockers finish */
while ((uc = atomic_load_explicit(&c->uc, memory_order_acquire)) >> LFUC_PU_SHIFT)
synchronize_rcu();
/* All of them are now done and if the usecount is now zero, then we're
* the last place to reference the object and we can call it finished. */
return (uc == 0);
}
/**
* lfuc_init - auxiliary routine for usecount initialization
* @c: usecount structure
*
* Called on object initialization, sets the usecount to an initial one to make
* sure that the prune routine doesn't free it before somebody else references it.
*/
static inline void
lfuc_init(struct lfuc *c)
{
atomic_store_explicit(&c->uc, 1, memory_order_release);
}
#endif

292
lib/locking.h Normal file
View File

@ -0,0 +1,292 @@
/*
* BIRD Library -- Locking
*
* (c) 2020--2021 Maria Matejka <mq@jmq.cz>
*
* Can be freely distributed and used under the terms of the GNU GPL.
*/
#ifndef _BIRD_LOCKING_H_
#define _BIRD_LOCKING_H_
#include "lib/macro.h"
struct domain_generic;
struct pool;
#define LOCK_ORDER \
the_bird, \
meta, \
control, \
proto, \
service, \
rtable, \
attrs, \
logging, \
resource, \
/* Here define the global lock order; first to last. */
struct lock_order {
#define LOCK_ORDER_EXPAND(p) struct domain_generic *p;
MACRO_FOREACH(LOCK_ORDER_EXPAND, LOCK_ORDER)
#undef LOCK_ORDER_EXPAND
};
#define LOCK_ORDER_EXPAND(p) struct domain__##p { struct domain_generic *p; };
MACRO_FOREACH(LOCK_ORDER_EXPAND, LOCK_ORDER)
#undef LOCK_ORDER_EXPAND
extern _Thread_local struct lock_order locking_stack;
extern _Thread_local struct domain_generic **last_locked;
#define DOMAIN(type) struct domain__##type
#define DOMAIN_ORDER(type) OFFSETOF(struct lock_order, type)
#define DOMAIN_NEW(type) (DOMAIN(type)) { .type = domain_new(DOMAIN_ORDER(type)) }
struct domain_generic *domain_new(uint order);
#define DOMAIN_FREE(type, d) domain_free((d).type)
void domain_free(struct domain_generic *);
#define DOMAIN_NAME(type, d) domain_name((d).type)
const char *domain_name(struct domain_generic *);
#define DOMAIN_SETUP(type, d, n, p) domain_setup((d).type, n, p)
void domain_setup(struct domain_generic *, const char *name, struct pool *);
#define DOMAIN_NULL(type) (DOMAIN(type)) {}
#define LOCK_DOMAIN(type, d) do_lock(((d).type), &(locking_stack.type))
#define UNLOCK_DOMAIN(type, d) do_unlock(((d).type), &(locking_stack.type))
#define DOMAIN_IS_LOCKED(type, d) (((d).type) == (locking_stack.type))
#define DG_IS_LOCKED(d) ((d) == *(DG_LSP(d)))
/* Internal for locking */
void do_lock(struct domain_generic *dg, struct domain_generic **lsp);
void do_unlock(struct domain_generic *dg, struct domain_generic **lsp);
uint dg_order(struct domain_generic *dg);
#define DG_LSP(d) ((struct domain_generic **) (((void *) &locking_stack) + dg_order(d)))
#define DG_LOCK(d) do_lock(d, DG_LSP(d))
#define DG_UNLOCK(d) do_unlock(d, DG_LSP(d))
/* Use with care. To be removed in near future. */
extern DOMAIN(the_bird) the_bird_domain;
#define the_bird_lock() LOCK_DOMAIN(the_bird, the_bird_domain)
#define the_bird_unlock() UNLOCK_DOMAIN(the_bird, the_bird_domain)
#define the_bird_locked() DOMAIN_IS_LOCKED(the_bird, the_bird_domain)
#define ASSERT_THE_BIRD_LOCKED ({ if (!the_bird_locked()) bug("The BIRD lock must be locked here: %s:%d", __FILE__, __LINE__); })
/**
* Objects bound with domains
*
* First, we need some object to have its locked and unlocked part.
* This is accomplished typically by the following pattern:
*
* struct foo_public {
* ... // Public fields
* DOMAIN(bar) lock; // The assigned domain
* };
*
* struct foo_private {
* struct foo_public; // Importing public fields
* struct foo_private **locked_at; // Auxiliary field for locking routines
* ... // Private fields
* };
*
* typedef union foo {
* struct foo_public;
* struct foo_private priv;
* } foo;
*
* All persistently stored object pointers MUST point to the public parts.
* If accessing the locked object from embedded objects, great care must
* be applied to always SKIP_BACK to the public object version, not the
* private one.
*
* To access the private object parts, either the private object pointer
* is explicitly given to us, therefore assuming somewhere else the domain
* has been locked, or we have to lock the domain ourselves. To do that,
* there are some handy macros.
*/
#define LOBJ_LOCK_SIMPLE(_obj, _level) \
({ LOCK_DOMAIN(_level, (_obj)->lock); &(_obj)->priv; })
#define LOBJ_UNLOCK_SIMPLE(_obj, _level) \
UNLOCK_DOMAIN(_level, (_obj)->lock)
/*
* These macros can be used to define specific macros for given class.
*
* #define FOO_LOCK_SIMPLE(foo) LOBJ_LOCK_SIMPLE(foo, bar)
* #define FOO_UNLOCK_SIMPLE(foo) LOBJ_UNLOCK_SIMPLE(foo, bar)
*
* Then these can be used like this:
*
* void foo_frobnicate(foo *f)
* {
* // Unlocked context
* ...
* struct foo_private *fp = FOO_LOCK_SIMPLE(f);
* // Locked context
* ...
* FOO_UNLOCK_SIMPLE(f);
* // Unlocked context
* ...
* }
*
* These simple calls have two major drawbacks. First, if you return
* from locked context, you don't unlock, which may lock you dead.
* And second, the foo_private pointer is still syntactically valid
* even after unlocking.
*
* To fight this, we need more magic and the switch should stay in that
* position.
*
* First, we need an auxiliary _function_ for unlocking. This function
* is intended to be called in a local variable cleanup context.
*/
#define LOBJ_UNLOCK_CLEANUP_NAME(_stem) _lobj__##_stem##_unlock_cleanup
#define LOBJ_UNLOCK_CLEANUP(_stem, _level) \
static inline void LOBJ_UNLOCK_CLEANUP_NAME(_stem)(struct _stem##_private **obj) { \
if (!*obj) return; \
ASSERT_DIE(LOBJ_IS_LOCKED((*obj), _level)); \
ASSERT_DIE((*obj)->locked_at == obj); \
(*obj)->locked_at = NULL; \
UNLOCK_DOMAIN(_level, (*obj)->lock); \
}
#define LOBJ_LOCK(_obj, _pobj, _stem, _level) \
CLEANUP(LOBJ_UNLOCK_CLEANUP_NAME(_stem)) struct _stem##_private *_pobj = LOBJ_LOCK_SIMPLE(_obj, _level); _pobj->locked_at = &_pobj;
/*
* And now the usage of these macros. You first need to declare the auxiliary
* cleanup function.
*
* LOBJ_UNLOCK_CLEANUP(foo, bar);
*
* And then declare the lock-local macro:
*
* #define FOO_LOCK(foo, fpp) LOBJ_LOCK(foo, fpp, foo, bar)
*
* This construction then allows you to lock much more safely:
*
* void foo_frobnicate_safer(foo *f)
* {
* // Unlocked context
* ...
* do {
* FOO_LOCK(foo, fpp);
* // Locked context, fpp is valid here
*
* if (something) return; // This implicitly unlocks
* if (whatever) break; // This unlocks too
*
* // Finishing context with no unlock at all
* } while (0);
*
* // Here is fpp invalid and the object is back unlocked.
* ...
* }
*
* There is no explicit unlock statement. To unlock, simply leave the block
* with locked context.
*
* This may be made even nicer to use by employing a for-cycle.
*/
#define LOBJ_LOCKED(_obj, _pobj, _stem, _level) \
for (CLEANUP(LOBJ_UNLOCK_CLEANUP_NAME(_stem)) struct _stem##_private *_pobj = LOBJ_LOCK_SIMPLE(_obj, _level); \
_pobj ? (_pobj->locked_at = &_pobj) : NULL; \
LOBJ_UNLOCK_CLEANUP_NAME(_stem)(&_pobj), _pobj = NULL)
/*
* This for-cycle employs heavy magic to hide as much of the boilerplate
* from the user as possibly needed. Here is how it works.
*
* First, the for-1 clause is executed, setting up _pobj, to the private
* object pointer. It has a cleanup hook set.
*
* Then, the for-2 clause is checked. As _pobj is non-NULL, _pobj->locked_at
* is initialized to the _pobj address to ensure that the cleanup hook unlocks
* the right object.
*
* Now the user block is executed. If it ends by break or return, the cleanup
* hook fires for _pobj, triggering object unlock.
*
* If the user block executed completely, the for-3 clause is run, executing
* the cleanup hook directly and then deactivating it by setting _pobj to NULL.
*
* Finally, the for-2 clause is checked again but now with _pobj being NULL,
* causing the loop to end. As the object has already been unlocked, nothing
* happens after leaving the context.
*
* #define FOO_LOCKED(foo, fpp) LOBJ_LOCKED(foo, fpp, foo, bar)
*
* Then the previous code can be modified like this:
*
* void foo_frobnicate_safer(foo *f)
* {
* // Unlocked context
* ...
* FOO_LOCKED(foo, fpp)
* {
* // Locked context, fpp is valid here
*
* if (something) return; // This implicitly unlocks
* if (whatever) break; // This unlocks too
*
* // Finishing context with no unlock at all
* }
*
* // Unlocked context
* ...
*
* // Locking once again without an explicit block
* FOO_LOCKED(foo, fpp)
* do_something(fpp);
*
* // Here is fpp invalid and the object is back unlocked.
* ...
* }
*
*
* For many reasons, a lock-check macro is handy.
*
* #define FOO_IS_LOCKED(foo) LOBJ_IS_LOCKED(foo, bar)
*/
#define LOBJ_IS_LOCKED(_obj, _level) DOMAIN_IS_LOCKED(_level, (_obj)->lock)
/*
* An example implementation is available in lib/locking_test.c
*/
/*
* Please don't use this macro unless you at least try to prove that
* it's completely safe. It's a can of worms.
*
* NEVER RETURN OR BREAK FROM THIS MACRO, it will crash.
*/
#define LOBJ_UNLOCKED_TEMPORARILY(_obj, _pobj, _stem, _level) \
for (union _stem *_obj = SKIP_BACK(union _stem, priv, _pobj), **_lataux = (union _stem **) _pobj->locked_at; \
_obj ? (_pobj->locked_at = NULL, LOBJ_UNLOCK_SIMPLE(_obj, _level), _obj) : NULL; \
LOBJ_LOCK_SIMPLE(_obj, _level), _pobj->locked_at = (struct _stem##_private **) _lataux, _obj = NULL)
/*
* Get the locked object when the lock is already taken
*/
#define LOBJ_PRIV(_obj, _level) \
({ ASSERT_DIE(DOMAIN_IS_LOCKED(_level, (_obj)->lock)); &(_obj)->priv; })
#endif

89
lib/locking_test.c Normal file
View File

@ -0,0 +1,89 @@
#include "test/birdtest.h"
#include "test/bt-utils.h"
#include "lib/locking.h"
#include <stdatomic.h>
#include <pthread.h>
#define FOO_PUBLIC \
const char *name; \
_Atomic uint counter; \
DOMAIN(proto) lock; \
struct foo_private {
struct { FOO_PUBLIC; };
struct foo_private **locked_at;
uint private_counter;
};
typedef union foo {
struct { FOO_PUBLIC; };
struct foo_private priv;
} foo;
LOBJ_UNLOCK_CLEANUP(foo, proto);
#define FOO_LOCK(_foo, _fpp) LOBJ_LOCK(_foo, _fpp, foo, proto)
#define FOO_LOCKED(_foo, _fpp) LOBJ_LOCKED(_foo, _fpp, foo, proto)
#define FOO_IS_LOCKED(_foo) LOBJ_IS_LOCKED(_foo, proto)
static uint
inc_public(foo *f)
{
return atomic_fetch_add_explicit(&f->counter, 1, memory_order_relaxed) + 1;
}
static uint
inc_private(foo *f)
{
FOO_LOCKED(f, fp) return ++fp->private_counter;
bug("Returning always");
}
#define BLOCKCOUNT 4096
#define THREADS 16
#define REPEATS 128
static void *
thread_run(void *_foo)
{
foo *f = _foo;
for (int i=0; i<REPEATS; i++)
if (i % 2)
for (int j=0; j<BLOCKCOUNT; j++)
inc_public(f);
else
for (int j=0; j<BLOCKCOUNT; j++)
inc_private(f);
return NULL;
}
static int
t_locking(void)
{
pthread_t thr[THREADS];
foo f = { .lock = DOMAIN_NEW(proto), };
for (int i=0; i<THREADS; i++)
bt_assert(pthread_create(&thr[i], NULL, thread_run, &f) == 0);
for (int i=0; i<THREADS; i++)
bt_assert(pthread_join(thr[i], NULL) == 0);
bt_assert(f.priv.private_counter == atomic_load_explicit(&f.counter, memory_order_relaxed));
bt_assert(f.priv.private_counter == THREADS * BLOCKCOUNT * REPEATS / 2);
return 1;
}
int
main(int argc, char **argv)
{
bt_init(argc, argv);
bt_bird_init();
bt_test_suite(t_locking, "Testing locks");
return bt_exit_value();
}

View File

@ -26,6 +26,8 @@
#define MACRO_DROP(...)
#define MACRO_UNPAREN(...) __VA_ARGS__
#define MACRO_SEP(a, b, sep) a sep b
#define MACRO_STR(a) #a
#define MACRO_STR_AFTER(a) MACRO_STR(a)
/* Aliases for some special chars */
#define MACRO_COMMA ,

View File

@ -27,6 +27,7 @@
struct lp_chunk {
struct lp_chunk *next;
struct linpool *lp;
uintptr_t data_align[0];
byte data[0];
};
@ -38,13 +39,12 @@ struct linpool {
byte *ptr, *end;
struct lp_chunk *first, *current; /* Normal (reusable) chunks */
struct lp_chunk *first_large; /* Large chunks */
struct lp_state *initial; /* Initial state to restore to */
uint total, total_large;
};
_Thread_local linpool *tmp_linpool;
static void lp_free(resource *);
static void lp_dump(resource *);
static void lp_dump(resource *, unsigned);
static resource *lp_lookup(resource *, unsigned long);
static struct resmem lp_memsize(resource *r);
@ -67,7 +67,9 @@ static struct resclass lp_class = {
linpool
*lp_new(pool *p)
{
return ralloc(p, &lp_class);
linpool *m = ralloc(p, &lp_class);
m->initial = lp_save(m);
return m;
}
/**
@ -87,6 +89,7 @@ linpool
void *
lp_alloc(linpool *m, uint size)
{
ASSERT_DIE(DG_IS_LOCKED(resource_parent(&m->r)->domain));
byte *a = (byte *) BIRD_ALIGN((unsigned long) m->ptr, CPU_STRUCT_ALIGN);
byte *e = a + size;
@ -102,30 +105,29 @@ lp_alloc(linpool *m, uint size)
{
/* Too large => allocate large chunk */
c = xmalloc(sizeof(struct lp_chunk) + size);
m->total_large += size;
c->lp = m;
c->next = m->first_large;
m->total_large += size;
m->first_large = c;
}
else
{
if (m->current && m->current->next)
{
/* Still have free chunks from previous incarnation (before lp_flush()) */
c = m->current->next;
}
if (m->current)
ASSERT_DIE(!m->current->next);
/* Need to allocate a new chunk */
c = alloc_page();
m->total += LP_DATA_SIZE;
c->next = NULL;
c->lp = m;
if (m->current)
m->current->next = c;
else
{
/* Need to allocate a new chunk */
c = alloc_page();
m->first = c;
m->total += LP_DATA_SIZE;
c->next = NULL;
if (m->current)
m->current->next = c;
else
m->first = c;
}
m->current = c;
m->ptr = c->data + size;
m->end = c->data + LP_DATA_SIZE;
@ -147,6 +149,7 @@ lp_alloc(linpool *m, uint size)
void *
lp_allocu(linpool *m, uint size)
{
ASSERT_DIE(DG_IS_LOCKED(resource_parent(&m->r)->domain));
byte *a = m->ptr;
byte *e = a + size;
@ -185,26 +188,8 @@ lp_allocz(linpool *m, uint size)
void
lp_flush(linpool *m)
{
struct lp_chunk *c;
/* Move ptr to the first chunk and free all other chunks */
m->current = c = m->first;
m->ptr = c ? c->data : NULL;
m->end = c ? c->data + LP_DATA_SIZE : NULL;
while (c && c->next)
{
struct lp_chunk *d = c->next;
c->next = d->next;
free_page(d);
}
while (c = m->first_large)
{
m->first_large = c->next;
xfree(c);
}
m->total_large = 0;
lp_restore(m, m->initial);
m->initial = lp_save(m);
}
/**
@ -215,13 +200,19 @@ lp_flush(linpool *m)
* This function saves the state of a linear memory pool. Saved state can be
* used later to restore the pool (to free memory allocated since).
*/
void
lp_save(linpool *m, lp_state *p)
struct lp_state *
lp_save(linpool *m)
{
p->current = m->current;
p->large = m->first_large;
p->total_large = m->total_large;
p->ptr = m->ptr;
ASSERT_DIE(DG_IS_LOCKED(resource_parent(&m->r)->domain));
struct lp_state *p = lp_alloc(m, sizeof(struct lp_state));
ASSERT_DIE(m->current);
*p = (struct lp_state) {
.current = m->current,
.large = m->first_large,
.total_large = m->total_large,
};
return p;
}
/**
@ -238,11 +229,13 @@ void
lp_restore(linpool *m, lp_state *p)
{
struct lp_chunk *c;
ASSERT_DIE(DG_IS_LOCKED(resource_parent(&m->r)->domain));
/* Move ptr to the saved pos and free all newer large chunks */
m->current = c = p->current ?: m->first;
m->ptr = p->ptr ?: (c ? c->data : NULL);
m->end = c ? (c->data + LP_DATA_SIZE) : NULL;
ASSERT_DIE(p->current);
m->current = c = p->current;
m->ptr = (byte *) p;
m->end = c->data + LP_DATA_SIZE;
m->total_large = p->total_large;
while ((c = m->first_large) && (c != p->large))
@ -250,6 +243,12 @@ lp_restore(linpool *m, lp_state *p)
m->first_large = c->next;
xfree(c);
}
while (c = m->current->next)
{
m->current->next = c->next;
free_page(c);
}
}
static void
@ -271,11 +270,12 @@ lp_free(resource *r)
}
static void
lp_dump(resource *r)
lp_dump(resource *r, unsigned indent)
{
linpool *m = (linpool *) r;
struct lp_chunk *c;
int cnt, cntl;
char x[32];
for(cnt=0, c=m->first; c; c=c->next, cnt++)
;
@ -286,6 +286,14 @@ lp_dump(resource *r)
cntl,
m->total,
m->total_large);
bsprintf(x, "%%%dschunk %%p\n", indent + 2);
for (c=m->first; c; c=c->next)
debug(x, "", c);
bsprintf(x, "%%%dslarge %%p\n", indent + 2);
for (c=m->first_large; c; c=c->next)
debug(x, "", c);
}
static struct resmem

222
lib/netindex.c Normal file
View File

@ -0,0 +1,222 @@
/*
* BIRD Internet Routing Daemon -- Semi-global index of nets
*
* (c) 2023 Maria Matejka <mq@jmq.cz>
*
* Can be freely distributed and used under the terms of the GNU GPL.
*/
#include "lib/birdlib.h"
#include "lib/netindex_private.h"
#define NETINDEX_KEY(n) (n)->hash, (n)->addr
#define NETINDEX_NEXT(n) (n)->next
#define NETINDEX_EQ(h,n,i,o) ((h == i) && net_equal(n,o))
#define NETINDEX_FN(h,n) (h)
#define NETINDEX_ORDER 4 /* Initial */
#define NETINDEX_REHASH netindex_rehash
#define NETINDEX_PARAMS /8, *1, 2, 2, 4, 28
HASH_DEFINE_REHASH_FN(NETINDEX, struct netindex);
static void netindex_hash_cleanup(void *netindex_hash);
/*
* Handle for persistent or semipersistent usage
*/
struct netindex_handle {
resource r;
struct netindex *index;
netindex_hash *h;
};
static void
net_unlock_index_persistent(resource *r)
{
struct netindex_handle *nh = SKIP_BACK(struct netindex_handle, r, r);
net_unlock_index(nh->h, nh->index);
}
static void
netindex_handle_dump(resource *r, unsigned indent UNUSED)
{
struct netindex_handle *nh = SKIP_BACK(struct netindex_handle, r, r);
debug("index=%u, net=%N", nh->index->index, nh->index->addr);
}
static struct resclass netindex_handle_class = {
.name = "Netindex handle",
.size = sizeof(struct netindex_handle),
.free = net_unlock_index_persistent,
.dump = netindex_handle_dump,
};
static struct netindex *
net_lock_index_persistent(struct netindex_hash_private *hp, struct netindex *ni, pool *p)
{
if (!ni)
return NULL;
struct netindex_handle *nh = ralloc(p, &netindex_handle_class);
// log(L_TRACE "Revive index %p", ni);
lfuc_lock_revive(&ni->uc);
nh->index = ni;
nh->h = SKIP_BACK(netindex_hash, priv, hp);
return ni;
}
/*
* Index initialization
*/
netindex_hash *
netindex_hash_new(pool *sp)
{
DOMAIN(attrs) dom = DOMAIN_NEW(attrs);
LOCK_DOMAIN(attrs, dom);
pool *p = rp_new(sp, dom.attrs, "Network index");
struct netindex_hash_private *nh = mb_allocz(p, sizeof *nh);
nh->lock = dom;
nh->pool = p;
nh->cleanup_list = &global_event_list;
nh->cleanup_event = (event) { .hook = netindex_hash_cleanup, nh };
UNLOCK_DOMAIN(attrs, dom);
return SKIP_BACK(netindex_hash, priv, nh);
}
static void
netindex_hash_cleanup(void *_nh)
{
NH_LOCK((netindex_hash *) _nh, nh);
for (uint t = 0; t < NET_MAX; t++)
{
if (!nh->net[t].hash.data)
continue;
HASH_WALK_FILTER(nh->net[t].hash, next, i, ii)
if (lfuc_finished(&i->uc))
{
HASH_DO_REMOVE(nh->net[t].hash, NETINDEX, ii);
hmap_clear(&nh->net[t].id_map, i->index);
if (nh->net[t].slab)
sl_free(i);
else
mb_free(i);
}
HASH_WALK_DELSAFE_END;
}
}
static void
netindex_hash_init(struct netindex_hash_private *hp, u8 type)
{
ASSERT_DIE(hp->net[type].block == NULL);
hp->net[type].slab = net_addr_length[type] ? sl_new(hp->pool, sizeof (struct netindex) + net_addr_length[type]) : NULL;
HASH_INIT(hp->net[type].hash, hp->pool, NETINDEX_ORDER);
hp->net[type].block_size = 128;
hp->net[type].block = mb_allocz(hp->pool, hp->net[type].block_size * sizeof (struct netindex *));
hmap_init(&hp->net[type].id_map, hp->pool, 128);
};
/*
* Private index manipulation
*/
struct netindex *
net_find_index_fragile_chain(struct netindex_hash_private *hp, const net_addr *n)
{
ASSERT_DIE(n->type < NET_MAX);
if (!hp->net[n->type].block)
return NULL;
u32 h = net_hash(n);
return HASH_FIND_CHAIN(hp->net[n->type].hash, NETINDEX, h, n);
}
struct netindex *
net_find_index_fragile(struct netindex_hash_private *hp, const net_addr *n)
{
ASSERT_DIE(n->type < NET_MAX);
if (!hp->net[n->type].block)
return NULL;
u32 h = net_hash(n);
return HASH_FIND(hp->net[n->type].hash, NETINDEX, h, n);
}
static struct netindex *
net_find_index_locked(struct netindex_hash_private *hp, const net_addr *n, pool *p)
{
struct netindex *ni = net_find_index_fragile(hp, n);
return ni ? net_lock_index_persistent(hp, ni, p) : NULL;
}
static struct netindex *
net_new_index_locked(struct netindex_hash_private *hp, const net_addr *n, pool *p)
{
if (!hp->net[n->type].block)
netindex_hash_init(hp, n->type);
u32 i = hmap_first_zero(&hp->net[n->type].id_map);
hmap_set(&hp->net[n->type].id_map, i);
struct netindex *ni = hp->net[n->type].slab ?
sl_alloc(hp->net[n->type].slab) :
mb_alloc(hp->pool, n->length + sizeof *ni);
*ni = (struct netindex) {
.hash = net_hash(n),
.index = i,
};
net_copy(ni->addr, n);
HASH_INSERT2(hp->net[n->type].hash, NETINDEX, hp->pool, ni);
return net_lock_index_persistent(hp, ni, p);
}
/*
* Public entry points
*/
void net_lock_index(netindex_hash *h UNUSED, struct netindex *i)
{
// log(L_TRACE "Lock index %p", i);
lfuc_lock(&i->uc);
}
void net_unlock_index(netindex_hash *h, struct netindex *i)
{
// log(L_TRACE "Unlock index %p", i);
lfuc_unlock(&i->uc, h->cleanup_list, &h->cleanup_event);
}
struct netindex *
net_find_index_persistent(netindex_hash *h, const net_addr *n, pool *p)
{
NH_LOCK(h, hp);
return net_find_index_locked(hp, n, p);
}
struct netindex *
net_get_index_persistent(netindex_hash *h, const net_addr *n, pool *p)
{
NH_LOCK(h, hp);
return
net_find_index_locked(hp, n, p) ?:
net_new_index_locked(hp, n, p);
}
struct netindex *
net_resolve_index_persistent(netindex_hash *h, u8 net_type, u32 i, pool *p)
{
NH_LOCK(h, hp);
return net_lock_index_persistent(hp, hp->net[net_type].block_size > i ? hp->net[net_type].block[i] : NULL, p);
}

55
lib/netindex.h Normal file
View File

@ -0,0 +1,55 @@
/*
* BIRD Internet Routing Daemon -- Semi-global index of nets
*
* (c) 2023 Maria Matejka <mq@jmq.cz>
*
* Can be freely distributed and used under the terms of the GNU GPL.
*/
#ifndef _BIRD_LIB_NETINDEX_H_
#define _BIRD_LIB_NETINDEX_H_
#include "lib/bitmap.h"
#include "lib/hash.h"
#include "lib/lockfree.h"
#include "lib/net.h"
#include "lib/resource.h"
/* Index object */
struct netindex {
struct netindex *next; /* Next in hash chain */
u32 hash; /* Cached hash value */
u32 index; /* Assigned index */
struct lfuc uc; /* Atomic usecount */
net_addr addr[0]; /* The net itself (one) */
};
/* Index hash: data structure completely opaque, use handlers */
typedef union netindex_hash netindex_hash;
/* Initialization */
netindex_hash *netindex_hash_new(pool *);
/* Find/get/resolve index and allocate its usecount to the given pool */
struct netindex *net_find_index_persistent(netindex_hash *, const net_addr *, pool *);
struct netindex *net_get_index_persistent(netindex_hash *, const net_addr *, pool *);
struct netindex *net_resolve_index_persistent(netindex_hash *, u8, u32, pool *);
/* Find/get/resolve index; pointer valid until end of task */
static inline struct netindex *net_find_index(netindex_hash *h, const net_addr *n)
{ return net_find_index_persistent(h, n, tmp_res.pool); }
static inline struct netindex *net_get_index(netindex_hash *h, const net_addr *n)
{ return net_get_index_persistent(h, n, tmp_res.pool); }
static inline struct netindex *net_resolve_index(netindex_hash *h, u8 net_type, u32 index)
{ return net_resolve_index_persistent(h, net_type, index, tmp_res.pool); }
/* Update use-count without allocating a handle. Take same care
* to ensure that your locks and unlocks are always balanced. */
void net_lock_index(netindex_hash *h, struct netindex *i);
void net_unlock_index(netindex_hash *h, struct netindex *i);
/* Retrieve the index from its addr pointer */
#define NET_TO_INDEX(a) \
SKIP_BACK(struct netindex, addr, TYPE_CAST(net_addr *, net_addr (*)[0], a))
#endif //_BIRD_LIB_NETINDEX_H_

47
lib/netindex_private.h Normal file
View File

@ -0,0 +1,47 @@
/*
* BIRD Internet Routing Daemon -- Semi-global index of nets
*
* (c) 2023 Maria Matejka <mq@jmq.cz>
*
* Can be freely distributed and used under the terms of the GNU GPL.
*/
#ifndef _BIRD_LIB_NETINDEX_PRIVATE_H_
#define _BIRD_LIB_NETINDEX_PRIVATE_H_
#include "lib/netindex.h"
#define NETINDEX_HASH_PUBLIC \
DOMAIN(attrs) lock; /* Assigned lock */ \
event_list *cleanup_list; /* Cleanup event list */ \
event cleanup_event; /* Cleanup event */ \
struct netindex_hash_private {
struct { NETINDEX_HASH_PUBLIC; };
struct netindex_hash_private **locked_at;
pool *pool;
struct {
slab *slab;
HASH(struct netindex) hash;
uint block_size;
struct netindex **block;
struct hmap id_map;
} net[NET_MAX];
};
typedef union netindex_hash {
struct { NETINDEX_HASH_PUBLIC; };
struct netindex_hash_private priv;
} netindex_hash;
LOBJ_UNLOCK_CLEANUP(netindex_hash, attrs);
#define NH_LOCK(h, hp) LOBJ_LOCK(h, hp, netindex_hash, attrs)
/* Find indices in a locked context with no usecounting */
struct netindex *net_find_index_fragile(struct netindex_hash_private *hp, const net_addr *n);
/* The same but instead of returning the exact match,
* return the first item in hash chain */
struct netindex *net_find_index_fragile_chain(struct netindex_hash_private *hp, const net_addr *n);
#endif

79
lib/rcu.c Normal file
View File

@ -0,0 +1,79 @@
/*
* BIRD Library -- Read-Copy-Update Basic Operations
*
* (c) 2021 Maria Matejka <mq@jmq.cz>
* (c) 2021 CZ.NIC z.s.p.o.
*
* Can be freely distributed and used under the terms of the GNU GPL.
* Note: all the relevant patents shall be expired.
*
* Using the Supplementary Material for User-Level Implementations of Read-Copy-Update
* by Matthieu Desnoyers, Paul E. McKenney, Alan S. Stern, Michel R. Dagenais and Jonathan Walpole
* obtained from https://www.efficios.com/pub/rcu/urcu-supp-accepted.pdf
*/
#include "lib/rcu.h"
#include "lib/io-loop.h"
#include "lib/locking.h"
_Atomic uint rcu_gp_ctl = RCU_NEST_CNT;
_Thread_local struct rcu_thread *this_rcu_thread = NULL;
static list rcu_thread_list;
static struct rcu_thread main_rcu_thread;
static DOMAIN(resource) rcu_domain;
static int
rcu_gp_ongoing(_Atomic uint *ctl)
{
uint val = atomic_load(ctl);
return (val & RCU_NEST_CNT) && ((val ^ rcu_gp_ctl) & RCU_GP_PHASE);
}
static void
update_counter_and_wait(void)
{
atomic_fetch_xor(&rcu_gp_ctl, RCU_GP_PHASE);
struct rcu_thread *rc;
WALK_LIST(rc, rcu_thread_list)
while (rcu_gp_ongoing(&rc->ctl))
birdloop_yield();
}
void
synchronize_rcu(void)
{
LOCK_DOMAIN(resource, rcu_domain);
update_counter_and_wait();
update_counter_and_wait();
UNLOCK_DOMAIN(resource, rcu_domain);
}
void
rcu_thread_start(struct rcu_thread *rc)
{
LOCK_DOMAIN(resource, rcu_domain);
add_tail(&rcu_thread_list, &rc->n);
this_rcu_thread = rc;
UNLOCK_DOMAIN(resource, rcu_domain);
}
void
rcu_thread_stop(struct rcu_thread *rc)
{
LOCK_DOMAIN(resource, rcu_domain);
this_rcu_thread = NULL;
rem_node(&rc->n);
UNLOCK_DOMAIN(resource, rcu_domain);
}
void
rcu_init(void)
{
rcu_domain = DOMAIN_NEW(resource);
DOMAIN_SETUP(resource, rcu_domain, "Read-Copy-Update", NULL);
init_list(&rcu_thread_list);
rcu_thread_start(&main_rcu_thread);
}

55
lib/rcu.h Normal file
View File

@ -0,0 +1,55 @@
/*
* BIRD Library -- Read-Copy-Update Basic Operations
*
* (c) 2021 Maria Matejka <mq@jmq.cz>
* (c) 2021 CZ.NIC z.s.p.o.
*
* Can be freely distributed and used under the terms of the GNU GPL.
* Note: all the relevant patents shall be expired.
*/
#ifndef _BIRD_RCU_H_
#define _BIRD_RCU_H_
#include "lib/birdlib.h"
#include "lib/lists.h"
#include <stdatomic.h>
#define RCU_GP_PHASE 0x100000
#define RCU_NEST_MASK 0x0fffff
#define RCU_NEST_CNT 0x000001
extern _Atomic uint rcu_gp_ctl;
struct rcu_thread {
node n;
_Atomic uint ctl;
};
extern _Thread_local struct rcu_thread *this_rcu_thread;
static inline void rcu_read_lock(void)
{
uint cmp = atomic_load_explicit(&this_rcu_thread->ctl, memory_order_acquire);
if (cmp & RCU_NEST_MASK)
atomic_store_explicit(&this_rcu_thread->ctl, cmp + RCU_NEST_CNT, memory_order_relaxed);
else
atomic_store(&this_rcu_thread->ctl, atomic_load_explicit(&rcu_gp_ctl, memory_order_acquire));
}
static inline void rcu_read_unlock(void)
{
atomic_fetch_sub(&this_rcu_thread->ctl, RCU_NEST_CNT);
}
void synchronize_rcu(void);
/* Registering and unregistering a birdloop. To be called from birdloop implementation */
void rcu_thread_start(struct rcu_thread *);
void rcu_thread_stop(struct rcu_thread *);
/* Run this from resource init */
void rcu_init(void);
#endif

View File

@ -14,6 +14,7 @@
#include "nest/bird.h"
#include "lib/resource.h"
#include "lib/string.h"
#include "lib/rcu.h"
/**
* DOC: Resource pools
@ -29,13 +30,7 @@
* is freed upon shutdown of the module.
*/
struct pool {
resource r;
list inside;
const char *name;
};
static void pool_dump(resource *);
static void pool_dump(resource *, unsigned);
static void pool_free(resource *);
static resource *pool_lookup(resource *, unsigned long);
static struct resmem pool_memsize(resource *P);
@ -51,7 +46,18 @@ static struct resclass pool_class = {
pool root_pool;
static int indent;
static void
rp_init(pool *z, struct domain_generic *dom, const char *name)
{
ASSERT_DIE(DG_IS_LOCKED(dom));
if (name && !domain_name(dom))
domain_setup(dom, name, z);
z->name = name;
z->domain = dom;
z->inside = (TLIST_LIST(resource)) {};
}
/**
* rp_new - create a resource pool
@ -62,73 +68,108 @@ static int indent;
* parent pool.
*/
pool *
rp_new(pool *p, const char *name)
rp_new(pool *p, struct domain_generic *dom, const char *name)
{
pool *z = ralloc(p, &pool_class);
z->name = name;
init_list(&z->inside);
if (dg_order(p->domain) > dg_order(dom))
bug("Requested reverse order pool creation: %s (%s, order %d) can't be a parent of %s (%s, order %d)",
p->name, domain_name(p->domain), dg_order(p->domain),
name, domain_name(dom), dg_order(dom));
if ((dg_order(p->domain) == dg_order(dom)) && (p->domain != dom))
bug("Requested incomparable order pool creation: %s (%s, order %d) can't be a parent of %s (%s, order %d)",
p->name, domain_name(p->domain), dg_order(p->domain),
name, domain_name(dom), dg_order(dom));
rp_init(z, dom, name);
return z;
}
pool *
rp_newf(pool *p, const char *fmt, ...)
rp_vnewf(pool *p, struct domain_generic *dom, const char *fmt, va_list args)
{
pool *z = rp_new(p, NULL);
pool *z = rp_new(p, dom, NULL);
z->name = mb_vsprintf(z, fmt, args);
if (!domain_name(dom))
domain_setup(dom, z->name, z);
return z;
}
pool *
rp_newf(pool *p, struct domain_generic *dom, const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
z->name = mb_vsprintf(p, fmt, args);
pool *z = rp_vnewf(p, dom, fmt, args);
va_end(args);
return z;
}
#define POOL_LOCK \
struct domain_generic *dom = p->domain; \
int locking = !DG_IS_LOCKED(dom); \
if (locking) \
DG_LOCK(dom); \
#define POOL_UNLOCK if (locking) DG_UNLOCK(dom);\
void rp_free(pool *p)
{
ASSERT_DIE(DG_IS_LOCKED(p->domain));
rfree(p);
}
static void
pool_free(resource *P)
{
pool *p = (pool *) P;
resource *r, *rr;
r = HEAD(p->inside);
while (rr = (resource *) r->n.next)
POOL_LOCK;
WALK_TLIST_DELSAFE(resource, r, &p->inside)
{
r->class->free(r);
xfree(r);
r = rr;
}
POOL_UNLOCK;
}
static void
pool_dump(resource *P)
pool_dump(resource *P, unsigned indent)
{
pool *p = (pool *) P;
resource *r;
POOL_LOCK;
debug("%s\n", p->name);
indent += 3;
WALK_LIST(r, p->inside)
rdump(r);
indent -= 3;
WALK_TLIST_DELSAFE(resource, r, &p->inside)
rdump(r, indent + 3);
POOL_UNLOCK;
}
static struct resmem
pool_memsize(resource *P)
{
pool *p = (pool *) P;
resource *r;
struct resmem sum = {
.effective = 0,
.overhead = sizeof(pool) + ALLOC_OVERHEAD,
};
WALK_LIST(r, p->inside)
POOL_LOCK;
WALK_TLIST(resource, r, &p->inside)
{
struct resmem add = rmemsize(r);
sum.effective += add.effective;
sum.overhead += add.overhead;
}
POOL_UNLOCK;
return sum;
}
@ -136,12 +177,16 @@ static resource *
pool_lookup(resource *P, unsigned long a)
{
pool *p = (pool *) P;
resource *r, *q;
resource *q = NULL;
WALK_LIST(r, p->inside)
POOL_LOCK;
WALK_TLIST(resource, r, &p->inside)
if (r->class->lookup && (q = r->class->lookup(r, a)))
return q;
return NULL;
break;
POOL_UNLOCK;
return q;
}
/**
@ -155,13 +200,13 @@ pool_lookup(resource *P, unsigned long a)
void rmove(void *res, pool *p)
{
resource *r = res;
pool *orig = resource_parent(r);
if (r)
{
if (r->n.next)
rem_node(&r->n);
add_tail(&p->inside, &r->n);
}
ASSERT_DIE(DG_IS_LOCKED(orig->domain));
ASSERT_DIE(DG_IS_LOCKED(p->domain));
resource_rem_node(&orig->inside, r);
resource_add_tail(&p->inside, r);
}
/**
@ -182,8 +227,10 @@ rfree(void *res)
if (!r)
return;
if (r->n.next)
rem_node(&r->n);
pool *orig = resource_parent(r);
ASSERT_DIE(DG_IS_LOCKED(orig->domain));
resource_rem_node(&orig->inside, r);
r->class->free(r);
r->class = NULL;
xfree(r);
@ -199,7 +246,7 @@ rfree(void *res)
* It works by calling a class-specific dump function.
*/
void
rdump(void *res)
rdump(void *res, unsigned indent)
{
char x[16];
resource *r = res;
@ -209,7 +256,7 @@ rdump(void *res)
if (r)
{
debug("%s ", r->class->name);
r->class->dump(r);
r->class->dump(r, indent);
}
else
debug("NULL\n");
@ -242,12 +289,14 @@ rmemsize(void *res)
void *
ralloc(pool *p, struct resclass *c)
{
ASSERT_DIE(DG_IS_LOCKED(p->domain));
resource *r = xmalloc(c->size);
bzero(r, c->size);
r->class = c;
if (p)
add_tail(&p->inside, &r->n);
resource_add_tail(&p->inside, r);
return r;
}
@ -269,7 +318,7 @@ rlookup(unsigned long a)
debug("Looking up %08lx\n", a);
if (r = pool_lookup(&root_pool.r, a))
rdump(r);
rdump(r, 3);
else
debug("Not found.\n");
}
@ -284,14 +333,36 @@ rlookup(unsigned long a)
void
resource_init(void)
{
rcu_init();
resource_sys_init();
root_pool.r.class = &pool_class;
root_pool.name = "Root";
init_list(&root_pool.inside);
tmp_init(&root_pool);
rp_init(&root_pool, the_bird_domain.the_bird, "Root");
tmp_init(&root_pool, the_bird_domain.the_bird);
}
_Thread_local struct tmp_resources tmp_res;
void
tmp_init(pool *p, struct domain_generic *dom)
{
tmp_res.lp = lp_new_default(p);
tmp_res.parent = p;
tmp_res.pool = rp_new(p, dom, "TMP");
tmp_res.domain = dom;
}
void
tmp_flush(void)
{
ASSERT_DIE(DG_IS_LOCKED(tmp_res.domain));
lp_flush(tmp_linpool);
rp_free(tmp_res.pool);
tmp_res.pool = rp_new(tmp_res.parent, tmp_res.domain, "TMP");
}
/**
* DOC: Memory blocks
*
@ -316,7 +387,7 @@ static void mbl_free(resource *r UNUSED)
{
}
static void mbl_debug(resource *r)
static void mbl_debug(resource *r, unsigned indent UNUSED)
{
struct mblock *m = (struct mblock *) r;
@ -368,11 +439,13 @@ static struct resclass mb_class = {
void *
mb_alloc(pool *p, unsigned size)
{
ASSERT_DIE(DG_IS_LOCKED(p->domain));
struct mblock *b = xmalloc(sizeof(struct mblock) + size);
b->r.class = &mb_class;
b->r.n = (node) {};
add_tail(&p->inside, &b->r.n);
b->r.n = (struct resource_node) {};
resource_add_tail(&p->inside, &b->r);
b->size = size;
return b->data;
}
@ -417,10 +490,14 @@ void *
mb_realloc(void *m, unsigned size)
{
struct mblock *b = SKIP_BACK(struct mblock, data, m);
struct pool *p = resource_parent(&b->r);
ASSERT_DIE(DG_IS_LOCKED(p->domain));
b = xrealloc(b, sizeof(struct mblock) + size);
update_node(&b->r.n);
b->size = size;
resource_update_node(&p->inside, &b->r);
return b->data;
}
@ -438,7 +515,7 @@ mb_free(void *m)
return;
struct mblock *b = SKIP_BACK(struct mblock, data, m);
rfree(b);
rfree(&b->r);
}

View File

@ -10,7 +10,10 @@
#ifndef _BIRD_RESOURCE_H_
#define _BIRD_RESOURCE_H_
#include "lib/lists.h"
#include "lib/locking.h"
#include "lib/tlists.h"
#include <stdarg.h>
struct resmem {
size_t effective; /* Memory actually used for data storage */
@ -19,18 +22,27 @@ struct resmem {
/* Resource */
#define TLIST_PREFIX resource
#define TLIST_TYPE struct resource
#define TLIST_ITEM n
#define TLIST_WANT_WALK
#define TLIST_WANT_ADD_TAIL
#define TLIST_WANT_UPDATE_NODE
typedef struct resource {
node n; /* Inside resource pool */
struct resclass *class; /* Resource class */
TLIST_DEFAULT_NODE; /* Inside resource pool */
const struct resclass *class; /* Resource class */
} resource;
#include "lib/tlists.h"
/* Resource class */
struct resclass {
char *name; /* Resource class name */
unsigned size; /* Standard size of single resource */
void (*free)(resource *); /* Freeing function */
void (*dump)(resource *); /* Dump to debug output */
void (*dump)(resource *, unsigned indent); /* Dump to debug output */
resource *(*lookup)(resource *, unsigned long); /* Look up address (only for debugging) */
struct resmem (*memsize)(resource *); /* Return size of memory used by the resource, may be NULL */
};
@ -40,21 +52,33 @@ struct resclass {
/* Generic resource manipulation */
typedef struct pool pool;
typedef struct pool {
resource r;
TLIST_LIST(resource) inside;
struct domain_generic *domain;
const char *name;
} pool;
void resource_init(void);
pool *rp_new(pool *, const char *); /* Create new pool */
pool *rp_newf(pool *, const char *, ...); /* Create a new pool with a formatted string as its name */
void rfree(void *); /* Free single resource */
void rdump(void *); /* Dump to debug output */
void rdump(void *, unsigned indent); /* Dump to debug output */
struct resmem rmemsize(void *res); /* Return size of memory used by the resource */
void rlookup(unsigned long); /* Look up address (only for debugging) */
void rmove(void *, pool *); /* Move to a different pool */
void *ralloc(pool *, struct resclass *);
pool *rp_new(pool *, struct domain_generic *, const char *); /* Create a new pool */
pool *rp_newf(pool *, struct domain_generic *, const char *, ...); /* Create a new pool with a formatted string as its name */
pool *rp_vnewf(pool *, struct domain_generic *, const char *, va_list); /* Create a new pool with a formatted string as its name */
void rp_free(pool *p); /* Free the whole pool */
extern pool root_pool;
static inline pool *resource_parent(resource *r)
{ return SKIP_BACK(pool, inside, resource_enlisted(r)); }
/* Normal memory blocks */
void *mb_alloc(pool *, unsigned size);
@ -68,7 +92,6 @@ typedef struct linpool linpool;
typedef struct lp_state {
void *current, *large;
byte *ptr;
uint total_large;
} lp_state;
@ -77,17 +100,28 @@ void *lp_alloc(linpool *, unsigned size); /* Aligned */
void *lp_allocu(linpool *, unsigned size); /* Unaligned */
void *lp_allocz(linpool *, unsigned size); /* With clear */
void lp_flush(linpool *); /* Free everything, but leave linpool */
void lp_save(linpool *m, lp_state *p); /* Save state */
lp_state *lp_save(linpool *m); /* Save state */
void lp_restore(linpool *m, lp_state *p); /* Restore state */
extern _Thread_local linpool *tmp_linpool; /* Temporary linpool autoflushed regularily */
#define LP_SAVED(m) for (struct lp_state *_lp_state = lp_save(m); _lp_state; lp_restore(m, _lp_state), _lp_state = NULL)
#define TMP_SAVED LP_SAVED(tmp_linpool)
struct tmp_resources {
pool *pool, *parent;
linpool *lp;
struct domain_generic *domain;
};
extern _Thread_local struct tmp_resources tmp_res;
#define tmp_linpool tmp_res.lp
#define tmp_alloc(sz) lp_alloc(tmp_linpool, sz)
#define tmp_allocu(sz) lp_allocu(tmp_linpool, sz)
#define tmp_allocz(sz) lp_allocz(tmp_linpool, sz)
#define tmp_init(p) tmp_linpool = lp_new_default(p)
#define tmp_flush() lp_flush(tmp_linpool)
void tmp_init(pool *p, struct domain_generic *dg);
void tmp_flush(void);
#define lp_new_default lp_new
@ -99,6 +133,7 @@ slab *sl_new(pool *, unsigned size);
void *sl_alloc(slab *);
void *sl_allocz(slab *);
void sl_free(void *);
void sl_delete(slab *);
/*
* Low-level memory allocation functions, please don't use
@ -108,9 +143,13 @@ void sl_free(void *);
void buffer_realloc(void **buf, unsigned *size, unsigned need, unsigned item_size);
/* Allocator of whole pages; for use in slabs and other high-level allocators. */
#define PAGE_HEAD(x) ((void *) (((uintptr_t) (x)) & ~(page_size-1)))
extern long page_size;
extern _Atomic int pages_kept;
extern _Atomic int pages_kept_locally;
void *alloc_page(void);
void free_page(void *);
void flush_local_pages(void);
void resource_sys_init(void);

553
lib/route.h Normal file
View File

@ -0,0 +1,553 @@
/*
* BIRD Internet Routing Daemon -- Routing data structures
*
* (c) 1998--2000 Martin Mares <mj@ucw.cz>
* (c) 2022 Maria Matejka <mq@jmq.cz>
*
* Can be freely distributed and used under the terms of the GNU GPL.
*/
#ifndef _BIRD_LIB_ROUTE_H_
#define _BIRD_LIB_ROUTE_H_
#undef RT_SOURCE_DEBUG
#include "lib/type.h"
#include "lib/rcu.h"
#include "lib/hash.h"
#include "lib/event.h"
#include "lib/lockfree.h"
struct network;
struct proto;
struct cli;
struct rtable_private;
struct rte_storage;
#define RTE_IN_TABLE_WRITABLE \
byte pflags; /* Protocol-specific flags; may change in-table (!) */ \
u8 stale_cycle; /* Auxiliary value for route refresh; may change in-table (!) */ \
typedef struct rte {
RTE_IN_TABLE_WRITABLE;
byte flags; /* Table-specific flags */
u8 generation; /* If this route import is based on other previously exported route,
this value should be 1 + MAX(generation of the parent routes).
Otherwise the route is independent and this value is zero. */
u32 id; /* Table specific route id */
struct ea_list *attrs; /* Attributes of this route */
const net_addr *net; /* Network this RTE belongs to */
struct rte_src *src; /* Route source that created the route */
struct rt_import_hook *sender; /* Import hook used to send the route to the routing table */
btime lastmod; /* Last modified (set by table) */
} rte;
#define REF_FILTERED 2 /* Route is rejected by import filter */
#define REF_PENDING 32 /* Route has not propagated completely yet */
/* Route is valid for propagation (may depend on other flags in the future), accepts NULL */
static inline int rte_is_valid(const rte *r) { return r && !(r->flags & REF_FILTERED); }
/* Route just has REF_FILTERED flag */
static inline int rte_is_filtered(const rte *r) { return !!(r->flags & REF_FILTERED); }
/* Strip the route of the table-specific values */
static inline rte rte_init_from(const rte *r)
{
return (rte) {
.attrs = r->attrs,
.net = r->net,
.src = r->src,
};
}
int rte_same(const rte *, const rte *);
struct rte_src {
struct rte_src *next; /* Hash chain */
struct rte_owner *owner; /* Route source owner */
u64 private_id; /* Private ID, assigned by the protocol */
u32 global_id; /* Globally unique ID of the source */
struct lfuc uc; /* Use count */
};
struct rte_owner_class {
void (*get_route_info)(const rte *, byte *buf); /* Get route information (for `show route' command) */
int (*rte_better)(const rte *, const rte *);
int (*rte_mergable)(const rte *, const rte *);
u32 (*rte_igp_metric)(const rte *);
};
struct rte_owner {
struct rte_owner_class *class;
int (*rte_recalculate)(struct rtable_private *, struct network *, struct rte_storage *new, struct rte_storage *, struct rte_storage *);
HASH(struct rte_src) hash;
const char *name;
u32 hash_key;
u32 uc;
u32 debug;
event_list *list;
event *prune;
event *stop;
};
extern DOMAIN(attrs) attrs_domain;
#define RTA_LOCK LOCK_DOMAIN(attrs, attrs_domain)
#define RTA_UNLOCK UNLOCK_DOMAIN(attrs, attrs_domain)
#define RTE_SRC_PU_SHIFT 44
#define RTE_SRC_IN_PROGRESS (1ULL << RTE_SRC_PU_SHIFT)
/* Get a route source. This also locks the source, therefore the caller has to
* unlock the source after the route has been propagated. */
struct rte_src *rt_get_source_o(struct rte_owner *o, u32 id);
#define rt_get_source(p, id) rt_get_source_o(&(p)->sources, (id))
struct rte_src *rt_find_source_global(u32 id);
#ifdef RT_SOURCE_DEBUG
#define rt_lock_source _rt_lock_source_internal
#define rt_unlock_source _rt_unlock_source_internal
#endif
static inline void rt_lock_source(struct rte_src *src)
{
lfuc_lock(&src->uc);
}
static inline void rt_unlock_source(struct rte_src *src)
{
lfuc_unlock(&src->uc, src->owner->list, src->owner->prune);
}
#ifdef RT_SOURCE_DEBUG
#undef rt_lock_source
#undef rt_unlock_source
#define rt_lock_source(x) ( log(L_INFO "Lock source %uG at %s:%d", (x)->global_id, __FILE__, __LINE__), _rt_lock_source_internal(x) )
#define rt_unlock_source(x) ( log(L_INFO "Unlock source %uG at %s:%d", (x)->global_id, __FILE__, __LINE__), _rt_unlock_source_internal(x) )
#endif
void rt_init_sources(struct rte_owner *, const char *name, event_list *list);
void rt_destroy_sources(struct rte_owner *, event *);
void rt_dump_sources(struct rte_owner *);
/*
* Route Attributes
*
* Beware: All standard BGP attributes must be represented here instead
* of making them local to the route. This is needed to ensure proper
* construction of BGP route attribute lists.
*/
/* Nexthop structure */
struct nexthop {
ip_addr gw; /* Next hop */
struct iface *iface; /* Outgoing interface */
byte flags;
byte weight;
byte labels; /* Number of all labels */
u32 label[0];
};
/* For packing one into eattrs */
struct nexthop_adata {
struct adata ad;
/* There is either a set of nexthops or a special destination (RTD_*) */
union {
struct nexthop nh;
uint dest;
};
};
/* For MPLS label stack generation */
struct nexthop_adata_mpls {
struct nexthop_adata nhad;
u32 label_space[MPLS_MAX_LABEL_STACK];
};
#define NEXTHOP_DEST_SIZE (OFFSETOF(struct nexthop_adata, dest) + sizeof(uint) - OFFSETOF(struct adata, data))
#define NEXTHOP_DEST_LITERAL(x) ((struct nexthop_adata) { \
.ad.length = NEXTHOP_DEST_SIZE, .dest = (x), })
#define RNF_ONLINK 0x1 /* Gateway is onlink regardless of IP ranges */
#define RTS_STATIC 1 /* Normal static route */
#define RTS_INHERIT 2 /* Route inherited from kernel */
#define RTS_DEVICE 3 /* Device route */
#define RTS_STATIC_DEVICE 4 /* Static device route */
#define RTS_REDIRECT 5 /* Learned via redirect */
#define RTS_RIP 6 /* RIP route */
#define RTS_OSPF 7 /* OSPF route */
#define RTS_OSPF_IA 8 /* OSPF inter-area route */
#define RTS_OSPF_EXT1 9 /* OSPF external route type 1 */
#define RTS_OSPF_EXT2 10 /* OSPF external route type 2 */
#define RTS_BGP 11 /* BGP route */
#define RTS_PIPE 12 /* Inter-table wormhole */
#define RTS_BABEL 13 /* Babel route */
#define RTS_RPKI 14 /* Route Origin Authorization */
#define RTS_PERF 15 /* Perf checker */
#define RTS_L3VPN 16 /* MPLS L3VPN */
#define RTS_AGGREGATED 17 /* Aggregated route */
#define RTS_MAX 18
#define RTD_NONE 0 /* Undefined next hop */
#define RTD_UNICAST 1 /* A standard next hop */
#define RTD_BLACKHOLE 2 /* Silently drop packets */
#define RTD_UNREACHABLE 3 /* Reject as unreachable */
#define RTD_PROHIBIT 4 /* Administratively prohibited */
#define RTD_MAX 5
extern const char * rta_dest_names[RTD_MAX];
static inline const char *rta_dest_name(uint n)
{ return (n < RTD_MAX) ? rta_dest_names[n] : "???"; }
/*
* Extended Route Attributes
*/
typedef struct eattr {
word id; /* EA_CODE(PROTOCOL_..., protocol-dependent ID) */
byte flags; /* Protocol-dependent flags */
byte type; /* Attribute type */
byte rfu:5;
byte originated:1; /* The attribute has originated locally */
byte fresh:1; /* An uncached attribute (e.g. modified in export filter) */
byte undef:1; /* Explicitly undefined */
PADDING(unused, 3, 3);
union bval u;
} eattr;
#define EA_CODE_MASK 0xffff
#define EA_ALLOW_UNDEF 0x10000 /* ea_find: allow EAF_TYPE_UNDEF */
#define EA_BIT(n) ((n) << 24) /* Used in bitfield accessors */
#define EA_BIT_GET(ea) ((ea) >> 24)
typedef struct ea_list {
struct ea_list *next; /* In case we have an override list */
byte flags; /* Flags: EALF_... */
byte rfu;
word count; /* Number of attributes */
eattr attrs[0]; /* Attribute definitions themselves */
} ea_list;
struct ea_storage {
struct ea_storage *next_hash; /* Next in hash chain */
struct ea_storage **pprev_hash; /* Previous in hash chain */
_Atomic u32 uc; /* Use count */
u32 hash_key; /* List hash */
ea_list l[0]; /* The list itself */
};
#define EALF_SORTED 1 /* Attributes are sorted by code */
#define EALF_BISECT 2 /* Use interval bisection for searching */
#define EALF_CACHED 4 /* List is cached */
#define EALF_HUGE 8 /* List is too big to fit into slab */
struct ea_class {
#define EA_CLASS_INSIDE \
const char *name; /* Name (both print and filter) */ \
struct symbol *sym; /* Symbol to export to configs */ \
uint id; /* Autoassigned attribute ID */ \
uint uc; /* Reference count */ \
btype type; /* Data type ID */ \
u16 flags; /* Protocol-dependent flags */ \
uint readonly:1; /* This attribute can't be changed by filters */ \
uint conf:1; /* Requested by config */ \
uint hidden:1; /* Technical attribute, do not show, do not expose to filters */ \
void (*format)(const eattr *ea, byte *buf, uint size); \
void (*stored)(const eattr *ea); /* When stored into global hash */ \
void (*freed)(const eattr *ea); /* When released from global hash */ \
EA_CLASS_INSIDE;
};
struct ea_class_ref {
resource r;
struct ea_class *class;
};
void ea_register_init(struct ea_class *);
struct ea_class_ref *ea_register_alloc(pool *, struct ea_class);
struct ea_class_ref *ea_ref_class(pool *, struct ea_class *); /* Reference for an attribute alias */
#define EA_REGISTER_ALL_HELPER(x) ea_register_init(x);
#define EA_REGISTER_ALL(...) MACRO_FOREACH(EA_REGISTER_ALL_HELPER, __VA_ARGS__)
struct ea_class *ea_class_find_by_id(uint id);
struct ea_class *ea_class_find_by_name(const char *name);
static inline struct ea_class *ea_class_self(struct ea_class *self) { return self; }
#define ea_class_find(_arg) _Generic((_arg), \
uint: ea_class_find_by_id, \
word: ea_class_find_by_id, \
char *: ea_class_find_by_name, \
const char *: ea_class_find_by_name, \
struct ea_class *: ea_class_self)(_arg)
struct ea_walk_state {
ea_list *eattrs; /* Ccurrent ea_list, initially set by caller */
eattr *ea; /* Current eattr, initially NULL */
u32 visited[4]; /* Bitfield, limiting max to 128 */
};
#define ea_find(_l, _arg) _Generic((_arg), uint: ea_find_by_id, struct ea_class *: ea_find_by_class, char *: ea_find_by_name)(_l, _arg)
eattr *ea_find_by_id(ea_list *, unsigned ea);
static inline eattr *ea_find_by_class(ea_list *l, const struct ea_class *def)
{ return ea_find_by_id(l, def->id); }
static inline eattr *ea_find_by_name(ea_list *l, const char *name)
{
const struct ea_class *def = ea_class_find_by_name(name);
return def ? ea_find_by_class(l, def) : NULL;
}
#define ea_get_int(_l, _ident, _def) ({ \
struct ea_class *cls = ea_class_find((_ident)); \
ASSERT_DIE(cls->type & EAF_EMBEDDED); \
const eattr *ea = ea_find((_l), cls->id); \
(ea ? ea->u.data : (_def)); \
})
#define ea_get_ip(_l, _ident, _def) ({ \
struct ea_class *cls = ea_class_find((_ident)); \
ASSERT_DIE(cls->type == T_IP); \
const eattr *ea = ea_find((_l), cls->id); \
(ea ? *((const ip_addr *) ea->u.ptr->data) : (_def)); \
})
#define ea_get_adata(_l, _ident) ({ \
struct ea_class *cls = ea_class_find((_ident)); \
ASSERT_DIE(!(cls->type & EAF_EMBEDDED)); \
const eattr *ea = ea_find((_l), cls->id); \
(ea ? ea->u.ptr : &null_adata); \
})
eattr *ea_walk(struct ea_walk_state *s, uint id, uint max);
void ea_dump(ea_list *);
int ea_same(ea_list *x, ea_list *y); /* Test whether two ea_lists are identical */
uint ea_hash(ea_list *e); /* Calculate 16-bit hash value */
ea_list *ea_append(ea_list *to, ea_list *what);
void ea_format_bitfield(const struct eattr *a, byte *buf, int bufsize, const char **names, int min, int max);
/* Normalize ea_list; allocates the result from tmp_linpool */
ea_list *ea_normalize(ea_list *e, int overlay);
uint ea_list_size(ea_list *);
void ea_list_copy(ea_list *dest, ea_list *src, uint size);
#define EA_LOCAL_LIST(N) struct { ea_list l; eattr a[N]; }
#define EA_LITERAL_EMBEDDED(_class, _flags, _val) ({ \
btype _type = (_class)->type; \
ASSERT_DIE(_type & EAF_EMBEDDED); \
EA_LITERAL_GENERIC((_class)->id, _type, _flags, .u.i = _val); \
})
#define EA_LITERAL_STORE_ADATA(_class, _flags, _buf, _len) ({ \
btype _type = (_class)->type; \
ASSERT_DIE(!(_type & EAF_EMBEDDED)); \
EA_LITERAL_GENERIC((_class)->id, _type, _flags, .u.ad = tmp_store_adata((_buf), (_len))); \
})
#define EA_LITERAL_DIRECT_ADATA(_class, _flags, _adata) ({ \
btype _type = (_class)->type; \
ASSERT_DIE(!(_type & EAF_EMBEDDED)); \
EA_LITERAL_GENERIC((_class)->id, _type, _flags, .u.ad = _adata); \
})
#define EA_LITERAL_GENERIC(_id, _type, _flags, ...) \
((eattr) { .id = _id, .type = _type, .flags = _flags, __VA_ARGS__ })
static inline eattr *
ea_set_attr(ea_list **to, eattr a)
{
EA_LOCAL_LIST(1) *ea = tmp_alloc(sizeof(*ea));
*ea = (typeof(*ea)) {
.l.flags = EALF_SORTED,
.l.count = 1,
.l.next = *to,
.a[0] = a,
};
*to = &ea->l;
return &ea->a[0];
}
static inline void
ea_unset_attr(ea_list **to, _Bool local, const struct ea_class *def)
{
ea_set_attr(to, EA_LITERAL_GENERIC(def->id, 0, 0,
.fresh = local, .originated = local, .undef = 1));
}
static inline void
ea_set_attr_u32(ea_list **to, const struct ea_class *def, uint flags, u64 data)
{ ea_set_attr(to, EA_LITERAL_EMBEDDED(def, flags, data)); }
static inline void
ea_set_attr_data(ea_list **to, const struct ea_class *def, uint flags, const void *data, uint len)
{ ea_set_attr(to, EA_LITERAL_STORE_ADATA(def, flags, data, len)); }
static inline void
ea_copy_attr(ea_list **to, ea_list *from, const struct ea_class *def)
{
eattr *e = ea_find_by_class(from, def);
if (e)
if (e->type & EAF_EMBEDDED)
ea_set_attr_u32(to, def, e->flags, e->u.data);
else
ea_set_attr_data(to, def, e->flags, e->u.ptr->data, e->u.ptr->length);
else
ea_unset_attr(to, 0, def);
}
/*
* Common route attributes
*/
/* Preference: first-order comparison */
extern struct ea_class ea_gen_preference;
static inline u32 rt_get_preference(const rte *rt)
{ return ea_get_int(rt->attrs, &ea_gen_preference, 0); }
/* IGP metric: second-order comparison */
extern struct ea_class ea_gen_igp_metric;
u32 rt_get_igp_metric(const rte *rt);
#define IGP_METRIC_UNKNOWN 0x80000000 /* Default igp_metric used when no other
protocol-specific metric is availabe */
/* From: Advertising router */
extern struct ea_class ea_gen_from;
/* MPLS Label, Policy and Class */
extern struct ea_class ea_gen_mpls_label,
ea_gen_mpls_policy, ea_gen_mpls_class;
/* Source: An old method to devise the route source protocol and kind.
* To be superseded in a near future by something more informative. */
extern struct ea_class ea_gen_source;
static inline u32 rt_get_source_attr(const rte *rt)
{ return ea_get_int(rt->attrs, &ea_gen_source, 0); }
/* Flowspec validation result */
enum flowspec_valid {
FLOWSPEC_UNKNOWN = 0,
FLOWSPEC_VALID = 1,
FLOWSPEC_INVALID = 2,
FLOWSPEC__MAX,
};
extern const char * flowspec_valid_names[FLOWSPEC__MAX];
static inline const char *flowspec_valid_name(enum flowspec_valid v)
{ return (v < FLOWSPEC__MAX) ? flowspec_valid_names[v] : "???"; }
extern struct ea_class ea_gen_flowspec_valid;
static inline enum flowspec_valid rt_get_flowspec_valid(const rte *rt)
{ return ea_get_int(rt->attrs, &ea_gen_flowspec_valid, FLOWSPEC_UNKNOWN); }
/* Next hop: For now, stored as adata */
extern struct ea_class ea_gen_nexthop;
static inline void ea_set_dest(struct ea_list **to, uint flags, uint dest)
{
struct nexthop_adata nhad = NEXTHOP_DEST_LITERAL(dest);
ea_set_attr_data(to, &ea_gen_nexthop, flags, &nhad.ad.data, nhad.ad.length);
}
/* Next hop structures */
#define NEXTHOP_ALIGNMENT (_Alignof(struct nexthop))
#define NEXTHOP_MAX_SIZE (sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK)
#define NEXTHOP_SIZE(_nh) NEXTHOP_SIZE_CNT(((_nh)->labels))
#define NEXTHOP_SIZE_CNT(cnt) BIRD_ALIGN((sizeof(struct nexthop) + sizeof(u32) * (cnt)), NEXTHOP_ALIGNMENT)
#define nexthop_size(nh) NEXTHOP_SIZE((nh))
#define NEXTHOP_NEXT(_nh) ((void *) (_nh) + NEXTHOP_SIZE(_nh))
#define NEXTHOP_END(_nhad) ((_nhad)->ad.data + (_nhad)->ad.length)
#define NEXTHOP_VALID(_nh, _nhad) ((void *) (_nh) < (void *) NEXTHOP_END(_nhad))
#define NEXTHOP_ONE(_nhad) (NEXTHOP_NEXT(&(_nhad)->nh) == NEXTHOP_END(_nhad))
#define NEXTHOP_WALK(_iter, _nhad) for ( \
struct nexthop *_iter = &(_nhad)->nh; \
(void *) _iter < (void *) NEXTHOP_END(_nhad); \
_iter = NEXTHOP_NEXT(_iter))
static inline int nexthop_same(struct nexthop_adata *x, struct nexthop_adata *y)
{ return adata_same(&x->ad, &y->ad); }
struct nexthop_adata *nexthop_merge(struct nexthop_adata *x, struct nexthop_adata *y, int max, linpool *lp);
struct nexthop_adata *nexthop_sort(struct nexthop_adata *x, linpool *lp);
int nexthop_is_sorted(struct nexthop_adata *x);
#define NEXTHOP_IS_REACHABLE(nhad) ((nhad)->ad.length > NEXTHOP_DEST_SIZE)
static inline struct nexthop_adata *
rte_get_nexthops(rte *r)
{
eattr *nhea = ea_find(r->attrs, &ea_gen_nexthop);
return nhea ? SKIP_BACK(struct nexthop_adata, ad, nhea->u.ptr) : NULL;
}
/* Route has regular, reachable nexthop (i.e. not RTD_UNREACHABLE and like) */
static inline int rte_is_reachable(rte *r)
{
struct nexthop_adata *nhad = rte_get_nexthops(r);
return nhad && NEXTHOP_IS_REACHABLE(nhad);
}
static inline int nhea_dest(eattr *nhea)
{
if (!nhea)
return RTD_NONE;
struct nexthop_adata *nhad = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL;
if (NEXTHOP_IS_REACHABLE(nhad))
return RTD_UNICAST;
else
return nhad->dest;
}
static inline int rte_dest(const rte *r)
{
return nhea_dest(ea_find(r->attrs, &ea_gen_nexthop));
}
void rta_init(void);
ea_list *ea_lookup(ea_list *, int overlay); /* Get a cached (and normalized) variant of this attribute list */
static inline int ea_is_cached(const ea_list *r) { return r->flags & EALF_CACHED; }
static inline struct ea_storage *ea_get_storage(ea_list *r)
{
ASSERT_DIE(ea_is_cached(r));
return SKIP_BACK(struct ea_storage, l[0], r);
}
static inline ea_list *ea_clone(ea_list *r) {
ASSERT_DIE(0 < atomic_fetch_add_explicit(&ea_get_storage(r)->uc, 1, memory_order_acq_rel));
return r;
}
void ea__free(struct ea_storage *r);
static inline void ea_free(ea_list *l) {
if (!l) return;
struct ea_storage *r = ea_get_storage(l);
if (1 == atomic_fetch_sub_explicit(&r->uc, 1, memory_order_acq_rel)) ea__free(r);
}
void ea_dump(ea_list *);
void ea_dump_all(void);
void ea_show_list(struct cli *, ea_list *);
#define rta_lookup ea_lookup
#define rta_is_cached ea_is_cached
#define rta_clone ea_clone
#define rta_free ea_free
#endif

64
lib/settle.h Normal file
View File

@ -0,0 +1,64 @@
/*
* BIRD -- Settle timer
*
* (c) 2022 Maria Matejka <mq@jmq.cz>
* (c) 2022 CZ.NIC z.s.p.o.
*
* Can be freely distributed and used under the terms of the GNU GPL.
*/
#ifndef _BIRD_SETTLE_H_
#define _BIRD_SETTLE_H_
#include "lib/birdlib.h"
#include "lib/timer.h"
struct settle_config {
btime min, max;
};
struct settle {
union {
/* Timer hook polymorphism. */
struct {
resource _r;
void (*hook)(struct settle *);
};
timer tm;
};
struct settle_config cf;
btime started;
};
STATIC_ASSERT(OFFSETOF(struct settle, hook) == OFFSETOF(struct settle, tm) + OFFSETOF(timer, hook));
#define SETTLE_INIT(_cfp, _hook, _data) (struct settle) { .tm = { .data = (_data), .hook = TYPE_CAST(void (*)(struct settle *), void (*)(struct timer *), (_hook)), }, .cf = ({ASSERT_DIE((_cfp)->min <= (_cfp)->max); *(_cfp); }), }
static inline void settle_init(struct settle *s, struct settle_config *cf, void (*hook)(struct settle *), void *data)
{
*s = SETTLE_INIT(cf, hook, data);
}
#define settle_active(s) tm_active(&(s)->tm)
static inline void settle_kick(struct settle *s, struct birdloop *loop)
{
if (!tm_active(&s->tm))
{
s->started = current_time();
tm_set_in(&s->tm, s->started + s->cf.min, loop);
}
else
{
btime now = current_time();
tm_set_in(&s->tm, MIN_(now + s->cf.min, s->started + s->cf.max), loop);
}
}
static inline void settle_cancel(struct settle *s)
{
tm_stop(&s->tm);
}
#endif

View File

@ -41,7 +41,7 @@
#endif
static void slab_free(resource *r);
static void slab_dump(resource *r);
static void slab_dump(resource *r, unsigned indent);
static resource *slab_lookup(resource *r, unsigned long addr);
static struct resmem slab_memsize(resource *r);
@ -118,7 +118,7 @@ slab_free(resource *r)
}
static void
slab_dump(resource *r)
slab_dump(resource *r, unsigned indent UNUSED)
{
slab *s = (slab *) r;
int cnt = 0;
@ -197,7 +197,7 @@ static struct resclass sl_class = {
slab_memsize
};
#define SL_GET_HEAD(x) ((struct sl_head *) (((uintptr_t) (x)) & ~(page_size-1)))
#define SL_GET_HEAD(x) PAGE_HEAD(x)
#define SL_HEAD_CHANGE_STATE(_s, _h, _from, _to) ({ \
ASSERT_DIE(_h->state == slh_##_from); \
@ -236,7 +236,7 @@ sl_new(pool *p, uint size)
+ sizeof(u32) * s->head_bitfield_len
+ align - 1)
/ align * align;
} while (s->objs_per_slab * size + s->head_size > page_size);
} while (s->objs_per_slab * size + s->head_size > (size_t) page_size);
if (!s->objs_per_slab)
bug("Slab: object too large");
@ -256,6 +256,7 @@ void *
sl_alloc(slab *s)
{
struct sl_head *h;
ASSERT_DIE(DG_IS_LOCKED(resource_parent(&s->r)->domain));
redo:
if (!(h = s->partial_heads.first))
@ -331,6 +332,7 @@ sl_free(void *oo)
{
struct sl_head *h = SL_GET_HEAD(oo);
struct slab *s = h->slab;
ASSERT_DIE(DG_IS_LOCKED(resource_parent(&s->r)->domain));
#ifdef POISON
memset(oo, 0xdb, s->data_size);
@ -378,7 +380,7 @@ slab_free(resource *r)
}
static void
slab_dump(resource *r)
slab_dump(resource *r, unsigned indent UNUSED)
{
slab *s = (slab *) r;
int ec=0, pc=0, fc=0;
@ -390,6 +392,15 @@ slab_dump(resource *r)
WALK_TLIST(sl_head, h, &s->full_heads)
fc++;
debug("(%de+%dp+%df blocks per %d objs per %d bytes)\n", ec, pc, fc, s->objs_per_slab, s->obj_size);
char x[16];
bsprintf(x, "%%%ds%%s %%p\n", indent + 2);
WALK_TLIST(sl_head, h, &s->full_heads)
debug(x, "", "full", h);
WALK_TLIST(sl_head, h, &s->partial_heads)
debug(x, "", "partial", h);
WALK_TLIST(sl_head, h, &s->empty_heads)
debug(x, "", "empty", h);
}
static struct resmem

View File

@ -12,6 +12,7 @@
#include <errno.h>
#include "lib/resource.h"
#include "lib/event.h"
#ifdef HAVE_LIBSSH
#define LIBSSH_LEGACY_0_4
#include <libssh/libssh.h>
@ -79,16 +80,23 @@ typedef struct birdsock {
const char *password; /* Password for MD5 authentication */
const char *err; /* Error message */
struct ssh_sock *ssh; /* Used in SK_SSH */
struct birdloop *loop; /* BIRDLoop owning this socket */
} sock;
sock *sock_new(pool *); /* Allocate new socket */
#define sk_new(X) sock_new(X) /* Wrapper to avoid name collision with OpenSSL */
int sk_open(sock *); /* Open socket */
int sk_open(sock *, struct birdloop *); /* Open socket */
void sk_reloop(sock *, struct birdloop *); /* Move socket to another loop. Both loops must be locked. */
static inline void sk_close(sock *s) { rfree(&s->r); } /* Explicitly close socket */
int sk_rx_ready(sock *s);
_Bool sk_tx_pending(sock *s);
int sk_send(sock *, uint len); /* Send data, <0=err, >0=ok, 0=sleep */
int sk_send_to(sock *, uint len, ip_addr to, uint port); /* sk_send to given destination */
void sk_reallocate(sock *); /* Free and allocate tbuf & rbuf */
void sk_pause_rx(struct birdloop *loop, sock *s);
void sk_resume_rx(struct birdloop *loop, sock *s, int (*hook)(sock *, uint));
void sk_set_rbsize(sock *s, uint val); /* Resize RX buffer */
void sk_set_tbsize(sock *s, uint val); /* Resize TX buffer, keeping content */
void sk_set_tbuf(sock *s, void *tbuf); /* Switch TX buffer, NULL-> return to internal */
@ -112,6 +120,7 @@ int sk_set_icmp6_filter(sock *s, int p1, int p2);
void sk_log_error(sock *s, const char *p);
byte * sk_rx_buffer(sock *s, int *len); /* Temporary */
sock *sk_next(sock *s);
extern int sk_priority_control; /* Suggested priority for control traffic, should be sysdep define */
@ -126,7 +135,6 @@ extern int sk_priority_control; /* Suggested priority for control traffic, shou
#define SKF_FREEBIND 0x40 /* Allow socket to bind to a nonlocal address */
#define SKF_CONNECT 0x80 /* Connect datagram socket to given dst address/port */
#define SKF_THREAD 0x100 /* Socked used in thread, Do not add to main loop */
#define SKF_TRUNCATED 0x200 /* Received packet was truncated, set by IO layer */
#define SKF_HDRINCL 0x400 /* Used internally */
#define SKF_PKTINFO 0x800 /* Used internally */

View File

@ -36,57 +36,13 @@
#include "lib/resource.h"
#include "lib/timer.h"
struct timeloop main_timeloop;
#ifdef USE_PTHREADS
#include <pthread.h>
/* Data accessed and modified from proto/bfd/io.c */
pthread_key_t current_time_key;
static inline struct timeloop *
timeloop_current(void)
{
return pthread_getspecific(current_time_key);
}
static inline void
timeloop_init_current(void)
{
pthread_key_create(&current_time_key, NULL);
pthread_setspecific(current_time_key, &main_timeloop);
}
_Atomic btime last_time;
_Atomic btime real_time;
void wakeup_kick_current(void);
#else
/* Just use main timelooop */
static inline struct timeloop * timeloop_current(void) { return &main_timeloop; }
static inline void timeloop_init_current(void) { }
#endif
btime
current_time(void)
{
return timeloop_current()->last_time;
}
btime
current_real_time(void)
{
struct timeloop *loop = timeloop_current();
if (!loop->real_time)
times_update_real_time(loop);
return loop->real_time;
}
#define TIMER_LESS(a,b) ((a)->expires < (b)->expires)
#define TIMER_SWAP(heap,a,b,t) (t = heap[a], heap[a] = heap[b], heap[b] = t, \
@ -102,7 +58,7 @@ tm_free(resource *r)
}
static void
tm_dump(resource *r)
tm_dump(resource *r, unsigned indent UNUSED)
{
timer *t = (void *) r;
@ -112,7 +68,7 @@ tm_dump(resource *r)
if (t->recurrent)
debug("recur %d, ", t->recurrent);
if (t->expires)
debug("expires in %d ms)\n", (t->expires - current_time()) TO_MS);
debug("in loop %p expires in %d ms)\n", t->loop, (t->expires - current_time()) TO_MS);
else
debug("inactive)\n");
}
@ -135,41 +91,40 @@ tm_new(pool *p)
return t;
}
void
tm_set(timer *t, btime when)
static void
tm_set_in_tl(timer *t, btime when, struct timeloop *local_timeloop)
{
struct timeloop *loop = timeloop_current();
uint tc = timers_count(loop);
uint tc = timers_count(local_timeloop);
if (!t->expires)
{
t->index = ++tc;
t->expires = when;
BUFFER_PUSH(loop->timers) = t;
HEAP_INSERT(loop->timers.data, tc, timer *, TIMER_LESS, TIMER_SWAP);
BUFFER_PUSH(local_timeloop->timers) = t;
HEAP_INSERT(local_timeloop->timers.data, tc, timer *, TIMER_LESS, TIMER_SWAP);
}
else if (t->expires < when)
{
t->expires = when;
HEAP_INCREASE(loop->timers.data, tc, timer *, TIMER_LESS, TIMER_SWAP, t->index);
HEAP_INCREASE(local_timeloop->timers.data, tc, timer *, TIMER_LESS, TIMER_SWAP, t->index);
}
else if (t->expires > when)
{
t->expires = when;
HEAP_DECREASE(loop->timers.data, tc, timer *, TIMER_LESS, TIMER_SWAP, t->index);
HEAP_DECREASE(local_timeloop->timers.data, tc, timer *, TIMER_LESS, TIMER_SWAP, t->index);
}
#ifdef CONFIG_BFD
/* Hack to notify BFD loops */
if ((loop != &main_timeloop) && (t->index == 1))
wakeup_kick_current();
#endif
t->loop = local_timeloop;
if (t->index == 1)
birdloop_ping(local_timeloop->loop);
}
void
tm_start(timer *t, btime after)
tm_set_in(timer *t, btime when, struct birdloop *loop)
{
tm_set(t, current_time() + MAX(after, 0));
ASSERT_DIE(birdloop_inside(loop));
tm_set_in_tl(t, when, birdloop_time_loop(loop));
}
void
@ -178,20 +133,22 @@ tm_stop(timer *t)
if (!t->expires)
return;
struct timeloop *loop = timeloop_current();
uint tc = timers_count(loop);
TLOCK_TIMER_ASSERT(t->loop);
HEAP_DELETE(loop->timers.data, tc, timer *, TIMER_LESS, TIMER_SWAP, t->index);
BUFFER_POP(loop->timers);
uint tc = timers_count(t->loop);
HEAP_DELETE(t->loop->timers.data, tc, timer *, TIMER_LESS, TIMER_SWAP, t->index);
BUFFER_POP(t->loop->timers);
t->index = -1;
t->expires = 0;
t->loop = NULL;
}
void
timers_init(struct timeloop *loop, pool *p)
{
times_init(loop);
TLOCK_TIMER_ASSERT(loop);
BUFFER_INIT(loop->timers, p, 4);
BUFFER_PUSH(loop->timers) = NULL;
@ -200,13 +157,15 @@ timers_init(struct timeloop *loop, pool *p)
void io_log_event(void *hook, void *data);
void
timers_fire(struct timeloop *loop)
timers_fire(struct timeloop *loop, int io_log)
{
TLOCK_TIMER_ASSERT(loop);
btime base_time;
timer *t;
times_update(loop);
base_time = loop->last_time;
times_update();
base_time = current_time();
while (t = timers_first(loop))
{
@ -217,19 +176,19 @@ timers_fire(struct timeloop *loop)
{
btime when = t->expires + t->recurrent;
if (when <= loop->last_time)
when = loop->last_time + t->recurrent;
if (when <= base_time)
when = base_time + t->recurrent;
if (t->randomize)
when += random() % (t->randomize + 1);
tm_set(t, when);
tm_set_in_tl(t, when, loop);
}
else
tm_stop(t);
/* This is ugly hack, we want to log just timers executed from the main I/O loop */
if (loop == &main_timeloop)
if (io_log)
io_log_event(t->hook, t->data);
t->hook(t);
@ -237,13 +196,6 @@ timers_fire(struct timeloop *loop)
}
}
void
timer_init(void)
{
timers_init(&main_timeloop, &root_pool);
timeloop_init_current();
}
/**
* tm_parse_time - parse a date and time
@ -371,8 +323,5 @@ tm_format_real_time(char *x, size_t max, const char *fmt, btime t)
if (!strfusec(tbuf, tbuf_size, fmt, t2))
return 0;
if (!strftime(x, max, tbuf, &tm))
return 0;
return 1;
return strftime(x, max, tbuf, &tm);
}

View File

@ -12,8 +12,14 @@
#include "nest/bird.h"
#include "lib/buffer.h"
#include "lib/io-loop.h"
#include "lib/locking.h"
#include "lib/resource.h"
#include <stdatomic.h>
extern _Atomic btime last_time;
extern _Atomic btime real_time;
typedef struct timer
{
@ -25,26 +31,29 @@ typedef struct timer
uint randomize; /* Amount of randomization */
uint recurrent; /* Timer recurrence */
struct timeloop *loop; /* Loop where the timer is active */
int index;
} timer;
struct timeloop
{
BUFFER_(timer *) timers;
btime last_time;
btime real_time;
struct domain_generic *domain;
struct birdloop *loop;
};
#define TLOCK_TIMER_ASSERT(loop) ASSERT_DIE((loop)->domain && DG_IS_LOCKED((loop)->domain))
#define TLOCK_LOCAL_ASSERT(loop) ASSERT_DIE(!(loop)->domain || DG_IS_LOCKED((loop)->domain))
static inline uint timers_count(struct timeloop *loop)
{ return loop->timers.used - 1; }
{ TLOCK_TIMER_ASSERT(loop); return loop->timers.used - 1; }
static inline timer *timers_first(struct timeloop *loop)
{ return (loop->timers.used > 1) ? loop->timers.data[1] : NULL; }
{ TLOCK_TIMER_ASSERT(loop); return (loop->timers.used > 1) ? loop->timers.data[1] : NULL; }
extern struct timeloop main_timeloop;
btime current_time(void);
btime current_real_time(void);
#define current_time() atomic_load_explicit(&last_time, memory_order_acquire)
#define current_real_time() atomic_load_explicit(&real_time, memory_order_acquire)
/* In sysdep code */
btime current_time_now(void);
@ -54,10 +63,13 @@ btime current_time_now(void);
extern btime boot_time;
timer *tm_new(pool *p);
void tm_set(timer *t, btime when);
void tm_start(timer *t, btime after);
#define tm_set(t, when) tm_set_in((t), (when), &main_birdloop)
#define tm_start(t, after) tm_start_in((t), (after), &main_birdloop)
void tm_stop(timer *t);
void tm_set_in(timer *t, btime when, struct birdloop *loop);
#define tm_start_in(t, after, loop) tm_set_in((t), (current_time() + MAX_((after), 0)), loop)
static inline int
tm_active(timer *t)
{
@ -90,22 +102,20 @@ tm_set_max(timer *t, btime when)
}
static inline void
tm_start_max(timer *t, btime after)
tm_start_max_in(timer *t, btime after, struct birdloop *loop)
{
btime rem = tm_remains(t);
tm_start(t, MAX_(rem, after));
tm_start_in(t, MAX_(rem, after), loop);
}
#define tm_start_max(t, after) tm_start_max_in(t, after, &main_birdloop)
/* In sysdep code */
void times_init(struct timeloop *loop);
void times_update(struct timeloop *loop);
void times_update_real_time(struct timeloop *loop);
void times_update(void);
/* For I/O loop */
void timers_init(struct timeloop *loop, pool *p);
void timers_fire(struct timeloop *loop);
void timer_init(void);
void timers_fire(struct timeloop *loop, int io_log);
struct timeformat {

View File

@ -79,19 +79,27 @@ typedef struct TLIST_LIST_STRUCT {
TLIST_TYPE *last;
} TLIST_LIST_STRUCT;
static inline struct TLIST_LIST_STRUCT * TLIST_NAME(enlisted)(TLIST_TYPE *node)
{
return node->TLIST_ITEM.list;
}
#ifdef TLIST_WANT_WALK
static inline struct TLIST_NAME(node) * TLIST_NAME(node_get)(TLIST_TYPE *node)
{ return &(node->TLIST_ITEM); }
#endif
#ifdef TLIST_WANT_ADD_HEAD
#if defined(TLIST_WANT_ADD_HEAD) || defined(TLIST_WANT_ADD_AFTER)
static inline void TLIST_NAME(add_head)(TLIST_LIST_STRUCT *list, TLIST_TYPE *node)
{
ASSERT_DIE(!node->TLIST_ITEM.prev && !node->TLIST_ITEM.next);
ASSERT_DIE(!TLIST_NAME(enlisted)(node));
node->TLIST_ITEM.list = list;
if (node->TLIST_ITEM.next = list->first)
list->first->TLIST_ITEM.prev = node;
else
list->last = node;
list->first = node;
}
#endif
@ -99,17 +107,65 @@ static inline void TLIST_NAME(add_head)(TLIST_LIST_STRUCT *list, TLIST_TYPE *nod
#ifdef TLIST_WANT_ADD_TAIL
static inline void TLIST_NAME(add_tail)(TLIST_LIST_STRUCT *list, TLIST_TYPE *node)
{
ASSERT_DIE(!node->TLIST_ITEM.prev && !node->TLIST_ITEM.next);
ASSERT_DIE(!TLIST_NAME(enlisted)(node));
node->TLIST_ITEM.list = list;
if (node->TLIST_ITEM.prev = list->last)
list->last->TLIST_ITEM.next = node;
else
list->first = node;
list->last = node;
}
#endif
#ifdef TLIST_WANT_UPDATE_NODE
static inline void TLIST_NAME(update_node)(TLIST_LIST_STRUCT *list, TLIST_TYPE *node)
{
ASSERT_DIE(TLIST_NAME(enlisted)(node) == list);
if (node->TLIST_ITEM.prev)
node->TLIST_ITEM.prev->TLIST_ITEM.next = node;
else
list->first = node;
if (node->TLIST_ITEM.next)
node->TLIST_ITEM.next->TLIST_ITEM.prev = node;
else
list->last = node;
}
#endif
#ifdef TLIST_WANT_ADD_AFTER
static inline void TLIST_NAME(add_after)(TLIST_LIST_STRUCT *list, TLIST_TYPE *node, TLIST_TYPE *after)
{
ASSERT_DIE(!TLIST_NAME(enlisted)(node));
/* Adding to beginning */
if (!(node->TLIST_ITEM.prev = after))
return TLIST_NAME(add_head)(list, node);
/* OK, Adding after a real node */
node->TLIST_ITEM.list = list;
/* There is another node after the anchor */
if (node->TLIST_ITEM.next = after->TLIST_ITEM.next)
/* Link back */
node->TLIST_ITEM.next->TLIST_ITEM.prev = node;
else
/* Or we are adding the last node */
list->last = node;
/* Link forward from "after" */
after->TLIST_ITEM.next = node;
}
#endif
static inline void TLIST_NAME(rem_node)(TLIST_LIST_STRUCT *list, TLIST_TYPE *node)
{
ASSERT_DIE(TLIST_NAME(enlisted)(node) == list);
if (node->TLIST_ITEM.prev)
node->TLIST_ITEM.prev->TLIST_ITEM.next = node->TLIST_ITEM.next;
else
@ -127,6 +183,7 @@ static inline void TLIST_NAME(rem_node)(TLIST_LIST_STRUCT *list, TLIST_TYPE *nod
}
node->TLIST_ITEM.next = node->TLIST_ITEM.prev = NULL;
node->TLIST_ITEM.list = NULL;
}
#undef TLIST_PREFIX
@ -136,6 +193,7 @@ static inline void TLIST_NAME(rem_node)(TLIST_LIST_STRUCT *list, TLIST_TYPE *nod
#undef TLIST_ITEM
#undef TLIST_WANT_ADD_HEAD
#undef TLIST_WANT_ADD_TAIL
#undef TLIST_WANT_UPDATE_NODE
# endif
#else
@ -147,8 +205,13 @@ static inline void TLIST_NAME(rem_node)(TLIST_LIST_STRUCT *list, TLIST_TYPE *nod
#error "You should first include lib/tlists.h without requesting a TLIST"
#endif
#define TLIST_NODE(_name, _type) struct _name##_node { _type *next; _type *prev; }
#define TLIST_LIST(_name) struct _name##_list
#define TLIST_LIST(_name) struct _name##_list
#define TLIST_NODE_IN(_name, _type) { _type *next; _type *prev; TLIST_LIST(_name) *list; }
#define TLIST_NODE(_name, _type) struct _name##_node TLIST_NODE_IN(_name, _type)
#define TLIST_DEFAULT_NODE struct MACRO_CONCAT_AFTER(TLIST_PREFIX,_node) \
TLIST_NODE_IN(TLIST_PREFIX,TLIST_TYPE) TLIST_ITEM
/* Use ->first and ->last to access HEAD and TAIL */
#define THEAD(_name, _list) (_list)->first

314
lib/tlists_test.c Normal file
View File

@ -0,0 +1,314 @@
/*
* BIRD Library -- Linked Lists Tests
*
* (c) 2015 CZ.NIC z.s.p.o.
*
* Can be freely distributed and used under the terms of the GNU GPL.
*/
#include "test/birdtest.h"
#include "lib/tlists.h"
#define TLIST_PREFIX tp
#define TLIST_TYPE struct test_node
#define TLIST_ITEM n
#define TLIST_WANT_ADD_HEAD
#define TLIST_WANT_ADD_TAIL
#define TLIST_WANT_ADD_AFTER
#define TLIST_WANT_UPDATE_NODE
struct test_node {
TLIST_DEFAULT_NODE;
};
#include "lib/tlists.h"
#define MAX_NUM 1000
static struct test_node nodes[MAX_NUM];
static TLIST_LIST(tp) l;
static void
show_list(void)
{
bt_debug("\n");
bt_debug("list.first points to %p\n", l.first);
bt_debug("list.last points to %p\n", l.last);
int i;
for (i = 0; i < MAX_NUM; i++)
{
bt_debug("n[%3i] is at %p\n", i, &nodes[i]);
bt_debug(" prev is at %p and point to %p\n", &(nodes[i].n.prev), nodes[i].n.prev);
bt_debug(" next is at %p and point to %p\n", &(nodes[i].n.next), nodes[i].n.next);
}
}
static int
is_filled_list_well_linked(void)
{
int i;
bt_assert(l.first == &nodes[0]);
bt_assert(l.last == &nodes[MAX_NUM-1]);
bt_assert(!nodes[0].n.prev);
bt_assert(!nodes[MAX_NUM-1].n.next);
for (i = 0; i < MAX_NUM; i++)
{
bt_assert(nodes[i].n.list == &l);
if (i < (MAX_NUM-1))
bt_assert(nodes[i].n.next == &nodes[i+1]);
if (i > 0)
bt_assert(nodes[i].n.prev == &nodes[i-1]);
}
return 1;
}
static int
is_empty_list_well_unlinked(void)
{
int i;
bt_assert(!l.first);
bt_assert(!l.last);
bt_assert(EMPTY_TLIST(tp, &l));
for (i = 0; i < MAX_NUM; i++)
{
bt_assert(nodes[i].n.next == NULL);
bt_assert(nodes[i].n.prev == NULL);
bt_assert(nodes[i].n.list == NULL);
}
return 1;
}
static void
init_list__(TLIST_LIST(tp) *l, struct test_node nodes[])
{
*l = (TLIST_LIST(tp)) {};
int i;
for (i = 0; i < MAX_NUM; i++)
{
nodes[i].n.next = NULL;
nodes[i].n.prev = NULL;
nodes[i].n.list = NULL;
}
}
static void
init_list_(void)
{
init_list__(&l, nodes);
}
static int
t_add_tail(void)
{
int i;
init_list_();
for (i = 0; i < MAX_NUM; i++)
{
tp_add_tail(&l, &nodes[i]);
bt_debug(".");
bt_assert(l.last == &nodes[i]);
bt_assert(l.first == &nodes[0]);
bt_assert(nodes[i].n.list == &l);
bt_assert(!nodes[i].n.next);
if (i > 0)
{
bt_assert(nodes[i-1].n.next == &nodes[i]);
bt_assert(nodes[i].n.prev == &nodes[i-1]);
}
}
show_list();
bt_assert(is_filled_list_well_linked());
return 1;
}
static int
t_add_head(void)
{
int i;
init_list_();
for (i = MAX_NUM-1; i >= 0; i--)
{
tp_add_head(&l, &nodes[i]);
bt_debug(".");
bt_assert(l.first == &nodes[i]);
bt_assert(l.last == &nodes[MAX_NUM-1]);
if (i < MAX_NUM-1)
{
bt_assert(nodes[i+1].n.prev == &nodes[i]);
bt_assert(nodes[i].n.next == &nodes[i+1]);
}
}
show_list();
bt_assert(is_filled_list_well_linked());
return 1;
}
static void
insert_node_(TLIST_LIST(tp) *l, struct test_node *n, struct test_node *after)
{
tp_add_after(l, n, after);
bt_debug(".");
}
static int
t_insert_node(void)
{
int i;
init_list_();
// add first node
insert_node_(&l, &nodes[0], NULL);
// add odd nodes
for (i = 2; i < MAX_NUM; i+=2)
insert_node_(&l, &nodes[i], &nodes[i-2]);
// add even nodes
for (i = 1; i < MAX_NUM; i+=2)
insert_node_(&l, &nodes[i], &nodes[i-1]);
bt_debug("\n");
bt_assert(is_filled_list_well_linked());
return 1;
}
static void
fill_list2(TLIST_LIST(tp) *l, struct test_node nodes[])
{
int i;
for (i = 0; i < MAX_NUM; i++)
tp_add_tail(l, &nodes[i]);
}
static void
fill_list(void)
{
fill_list2(&l, nodes);
}
static int
t_remove_node(void)
{
int i;
init_list_();
/* Fill & Remove & Check */
fill_list();
for (i = 0; i < MAX_NUM; i++)
tp_rem_node(&l, &nodes[i]);
bt_assert(is_empty_list_well_unlinked());
/* Fill & Remove the half of nodes & Check & Remove the rest nodes & Check */
fill_list();
for (i = 0; i < MAX_NUM; i+=2)
tp_rem_node(&l, &nodes[i]);
int tail_node_index = (MAX_NUM % 2) ? MAX_NUM - 2 : MAX_NUM - 1;
bt_assert(l.first == &nodes[1]);
bt_assert(l.last == &nodes[tail_node_index]);
bt_assert(!nodes[tail_node_index].n.next);
for (i = 1; i < MAX_NUM; i+=2)
{
if (i > 1)
bt_assert(nodes[i].n.prev == &nodes[i-2]);
if (i < tail_node_index)
bt_assert(nodes[i].n.next == &nodes[i+2]);
}
for (i = 1; i < MAX_NUM; i+=2)
tp_rem_node(&l, &nodes[i]);
bt_assert(is_empty_list_well_unlinked());
return 1;
}
static int
t_update_node(void)
{
struct test_node head, inside, tail;
init_list_();
fill_list();
head = nodes[0];
tp_update_node(&l, &head);
bt_assert(l.first == &head);
bt_assert(head.n.prev == NULL);
bt_assert(head.n.next == &nodes[1]);
bt_assert(nodes[1].n.prev == &head);
inside = nodes[MAX_NUM/2];
tp_update_node(&l, &inside);
bt_assert(nodes[MAX_NUM/2-1].n.next == &inside);
bt_assert(nodes[MAX_NUM/2+1].n.prev == &inside);
bt_assert(inside.n.prev == &nodes[MAX_NUM/2-1]);
bt_assert(inside.n.next == &nodes[MAX_NUM/2+1]);
tail = nodes[MAX_NUM-1];
tp_update_node(&l, &tail);
bt_assert(l.last == &tail);
bt_assert(tail.n.prev == &nodes[MAX_NUM-2]);
bt_assert(tail.n.next == NULL);
bt_assert(nodes[MAX_NUM-2].n.next == &tail);
return 1;
}
#if 0
static int
t_add_tail_list(void)
{
node nodes2[MAX_NUM];
list l2;
init_list__(&l, (node *) nodes);
fill_list2(&l, (node *) nodes);
init_list__(&l2, (node *) nodes2);
fill_list2(&l2, (node *) nodes2);
add_tail_list(&l, &l2);
bt_assert(nodes[MAX_NUM-1].next == &nodes2[0]);
bt_assert(nodes2[0].prev == &nodes[MAX_NUM-1]);
bt_assert(l.tail == &nodes2[MAX_NUM-1]);
return 1;
}
#endif
int
main(int argc, char *argv[])
{
bt_init(argc, argv);
bt_test_suite(t_add_tail, "Adding nodes to tail of list");
bt_test_suite(t_add_head, "Adding nodes to head of list");
bt_test_suite(t_insert_node, "Inserting nodes to list");
bt_test_suite(t_remove_node, "Removing nodes from list");
bt_test_suite(t_update_node, "Updating nodes in list");
#if 0
bt_test_suite(t_add_tail_list, "At the tail of a list adding the another list");
#endif
return bt_exit_value();
}

123
lib/type.h Normal file
View File

@ -0,0 +1,123 @@
/*
* BIRD Internet Routing Daemon -- Internal Data Types
*
* (c) 2022 Maria Matejka <mq@jmq.cz>
*
* Can be freely distributed and used under the terms of the GNU GPL.
*/
#ifndef _BIRD_TYPE_H_
#define _BIRD_TYPE_H_
#include "lib/birdlib.h"
#include "lib/attrs.h"
union bval {
#define BVAL_ITEMS \
struct { \
u32 data; /* Integer type inherited from eattrs */ \
PADDING(data, 0, 4); /* Must be padded on 64-bits */ \
}; \
struct { \
u32 i; /* Integer type inherited from filters */ \
PADDING(i, 0, 4); /* Must be padded on 64-bits */ \
}; \
const struct adata *ptr; /* Generic attribute data inherited from eattrs */ \
const struct adata *ad; /* Generic attribute data inherited from filters */ \
BVAL_ITEMS;
};
union bval_long {
union bval bval; /* For direct assignments */
BVAL_ITEMS; /* For item-wise access */
u64 ec;
lcomm lc;
ip_addr ip;
const net_addr *net;
const char *s;
const struct adata *bs;
const struct f_tree *t;
const struct f_trie *ti;
const struct f_path_mask *path_mask;
struct f_path_mask_item pmi;
struct rte *rte;
struct rte_block {
struct rte **rte;
uint len;
} rte_block;
};
/* Internal types */
enum btype {
/* Nothing. Simply nothing. */
T_VOID = 0,
T_NONE = 0xff,
/* Something but inaccessible. */
T_OPAQUE = 0x02, /* Opaque byte string (not filterable) */
T_IFACE = 0x0c, /* Pointer to an interface (inside adata) */
T_ROUTES_BLOCK = 0x68, /* Block of route pointers */
T_ROUTE = 0x6a, /* One route pointer */
T_NEXTHOP_LIST = 0x6c, /* The whole nexthop block */
T_HOSTENTRY = 0x6e, /* Hostentry with possible MPLS labels */
/* Types shared with eattrs */
T_INT = 0x01, /* 32-bit unsigned integer number */
T_IP = 0x04, /* IP address */
T_QUAD = 0x05, /* Router ID (IPv4 address) */
T_PATH = 0x06, /* BGP AS path (encoding per RFC 1771:4.3) */
T_CLIST = 0x0a, /* Set of u32's (e.g., a community list) */
T_ECLIST = 0x0e, /* Set of pairs of u32's - ext. community list */
T_LCLIST = 0x08, /* Set of triplets of u32's - large community list */
T_STRING = 0x10,
T_ENUM_BGP_ORIGIN = 0x13, /* BGP Origin enum */
T_ENUM_RA_PREFERENCE = 0x15, /* RA Preference enum */
T_ENUM_FLOWSPEC_VALID = 0x17, /* Flowspec validation result */
#define EAF_TYPE__MAX 0x1f
#define EAF_EMBEDDED 0x01 /* Data stored in eattr.u.data (part of type spec) */
/* Otherwise, attribute data is adata */
/* Other user visible types which fit in int */
T_BOOL = 0xa0,
T_PAIR = 0xa4, /* Notice that pair is stored as integer: first << 16 | second */
/* Put enumerational types in 0x20..0x3f range */
T_ENUM_LO = 0x12,
T_ENUM_HI = 0x3f,
T_ENUM_RTS = 0x31,
T_ENUM_SCOPE = 0x33,
T_ENUM_MPLS_POLICY = 0x35,
T_ENUM_RTD = 0x37,
T_ENUM_ROA = 0x39,
T_ENUM_NETTYPE = 0x3b,
T_ENUM_AF = 0x3d,
/* new enums go here */
#define T_ENUM T_ENUM_LO ... T_ENUM_HI
/* Bigger ones */
T_NET = 0xb0,
T_PATH_MASK = 0xb8, /* mask for BGP path */
T_EC = 0xbc, /* Extended community value, u64 */
T_LC = 0xc0, /* Large community value, lcomm */
T_RD = 0xc4, /* Route distinguisher for VPN addresses */
T_PATH_MASK_ITEM = 0xc8, /* Path mask item for path mask constructors */
T_BYTESTRING = 0xcc,
T_SET = 0x80,
T_PREFIX_SET = 0x84,
} PACKED;
typedef enum btype btype;
STATIC_ASSERT(sizeof(btype) == sizeof(byte));
#endif

79
lib/type_test.c Normal file
View File

@ -0,0 +1,79 @@
/*
* BIRD Library -- Data Type Alignment Tests
*
* (c) 2022 Maria Matejka <mq@jmq.cz>
*
* Can be freely distributed and used under the terms of the GNU GPL.
*/
#include "test/birdtest.h"
#include "lib/type.h"
#include "lib/route.h"
#define CHECK_ONE(val) \
for (uint i=0; i<sizeof(val); i++) \
bt_assert(((const u8 *) &val)[i] == (u8) ~0);
#define SET_PADDING(val, name) \
for (uint i=0; i<sizeof(val.PADDING_NAME(name)); i++) \
val.PADDING_NAME(name)[i] = ~0;
static int
t_bval(void)
{
union bval v;
memset(&v, 0, sizeof(v));
v.data = ~0;
SET_PADDING(v, data);
CHECK_ONE(v);
memset(&v, 0, sizeof(v));
v.i = ~0;
SET_PADDING(v, i);
CHECK_ONE(v);
memset(&v, 0, sizeof(v));
v.ptr = (void *) ~0;
CHECK_ONE(v);
memset(&v, 0, sizeof(v));
v.ad = (void *) ~0;
CHECK_ONE(v);
return 1;
}
static int
t_eattr(void)
{
struct eattr e;
memset(&e, 0, sizeof(e));
e.id = ~0;
e.flags = ~0;
e.type = ~0;
e.rfu = ~0;
e.originated = ~0;
e.fresh = ~0;
e.undef = ~0;
memset(&e.u, ~0, sizeof(e.u)); /* Assumes t_bval passed */
SET_PADDING(e, unused);
CHECK_ONE(e);
return 1;
}
int main(int argc, char *argv[])
{
bt_init(argc, argv);
bt_test_suite(t_bval, "Structure alignment test: bval");
bt_test_suite(t_eattr, "Structure alignment test: eattr");
return bt_exit_value();
}

View File

@ -1,6 +1,6 @@
Summary: BIRD Internet Routing Daemon
Name: bird
Version: 2.15.1
Version: 3.0alpha2
Release: 1
Copyright: GPL
Group: Networking/Daemons

View File

@ -1,4 +1,4 @@
src := a-path.c a-set.c cli.c cmds.c iface.c locks.c mpls.c neighbor.c password.c proto.c proto-build.c rt-attr.c rt-dev.c rt-fib.c rt-show.c rt-table.c
src := cli.c cmds.c iface.c locks.c mpls.c neighbor.c password.c proto.c proto-build.c rt-attr.c rt-dev.c rt-fib.c rt-show.c rt-table.c
obj := $(src-o-files)
$(all-daemon)
$(cf-local)
@ -6,10 +6,14 @@ $(conf-y-targets): $(s)mpls.Y
$(o)proto-build.c: Makefile $(lastword $(MAKEFILE_LIST)) $(objdir)/.dir-stamp
$(E)echo GEN $@
$(Q)echo "$(patsubst %,void %_build(void); ,$(PROTO_BUILD)) void protos_build_gen(void) { $(patsubst %, %_build(); ,$(PROTO_BUILD))}" > $@
$(Q)echo "#include \"lib/birdlib.h\"" > $@
$(Q)$(patsubst %,echo 'void %_build(void);' >> $@;,$(PROTO_BUILD))
$(Q)echo "void protos_build_gen(void) {" >> $@
$(Q)$(patsubst %,echo ' %_build();'>>$@;,$(PROTO_BUILD))
$(Q)echo "}" >> $@
prepare: $(o)proto-build.c
tests_src := a-set_test.c a-path_test.c rt-fib_test.c
tests_src := rt-fib_test.c
tests_targets := $(tests_targets) $(tests-target-files)
tests_objs := $(tests_objs) $(src-o-files)

View File

@ -35,6 +35,7 @@ struct bfd_request {
void (*hook)(struct bfd_request *);
void *data;
struct birdloop *target;
struct bfd_session *session;
@ -57,14 +58,14 @@ static inline struct bfd_options * bfd_new_options(void)
#ifdef CONFIG_BFD
struct bfd_request * bfd_request_session(pool *p, ip_addr addr, ip_addr local, struct iface *iface, struct iface *vrf, void (*hook)(struct bfd_request *), void *data, const struct bfd_options *opts);
struct bfd_request * bfd_request_session(pool *p, ip_addr addr, ip_addr local, struct iface *iface, struct iface *vrf, void (*hook)(struct bfd_request *), void *data, struct birdloop *target, const struct bfd_options *opts);
void bfd_update_request(struct bfd_request *req, const struct bfd_options *opts);
static inline void cf_check_bfd(int use UNUSED) { }
#else
static inline struct bfd_request * bfd_request_session(pool *p UNUSED, ip_addr addr UNUSED, ip_addr local UNUSED, struct iface *iface UNUSED, struct iface *vrf UNUSED, void (*hook)(struct bfd_request *) UNUSED, void *data UNUSED, const struct bfd_options *opts UNUSED) { return NULL; }
static inline struct bfd_request * bfd_request_session(pool *p UNUSED, ip_addr addr UNUSED, ip_addr local UNUSED, struct iface *iface UNUSED, struct iface *vrf UNUSED, void (*hook)(struct bfd_request *) UNUSED, void *data UNUSED, struct birdloop *target UNUSED, const struct bfd_options *opts UNUSED) { return NULL; }
static inline void bfd_update_request(struct bfd_request *req UNUSED, const struct bfd_options *opts UNUSED) { };
static inline void cf_check_bfd(int use) { if (use) cf_error("BFD not available"); }

View File

@ -9,7 +9,6 @@
#ifndef _BIRD_BIRD_H_
#define _BIRD_BIRD_H_
#include "sysdep/config.h"
#include "lib/birdlib.h"
#include "lib/ip.h"
#include "lib/net.h"

View File

@ -160,43 +160,6 @@ cli_printf(cli *c, int code, char *msg, ...)
memcpy(cli_alloc_out(c, size), buf, size);
}
static void
cli_copy_message(cli *c)
{
byte *p, *q;
uint cnt = 2;
if (c->ring_overflow)
{
byte buf[64];
int n = bsprintf(buf, "<%d messages lost>\n", c->ring_overflow);
c->ring_overflow = 0;
memcpy(cli_alloc_out(c, n), buf, n);
}
p = c->ring_read;
while (*p)
{
cnt++;
p++;
if (p == c->ring_end)
p = c->ring_buf;
ASSERT(p != c->ring_write);
}
c->async_msg_size += cnt;
q = cli_alloc_out(c, cnt);
*q++ = '+';
p = c->ring_read;
do
{
*q = *p++;
if (p == c->ring_end)
p = c->ring_buf;
}
while (*q++);
c->ring_read = p;
q[-1] = '\n';
}
static void
cli_hello(cli *c)
{
@ -262,7 +225,7 @@ cli_command(struct cli *c)
log(L_TRACE "CLI: %s", c->rx_buf);
bzero(&f, sizeof(f));
f.mem = c->parser_pool;
f.pool = rp_new(c->pool, "Config");
f.pool = rp_new(c->pool, the_bird_domain.the_bird, "Config");
init_list(&f.symbols);
cf_read_hook = cli_cmd_read_hook;
cli_rh_pos = c->rx_buf;
@ -283,10 +246,6 @@ cli_event(void *data)
cli *c = data;
int err;
while (c->ring_read != c->ring_write &&
c->async_msg_size < CLI_MAX_ASYNC_QUEUE)
cli_copy_message(c);
if (c->tx_pos)
;
else if (c->cont)
@ -302,24 +261,24 @@ cli_event(void *data)
cli_command(c);
}
cli_write_trigger(c);
if (c->tx_pos)
cli_write_trigger(c);
}
cli *
cli_new(void *priv)
cli_new(struct birdsock *sock)
{
pool *p = rp_new(cli_pool, "CLI");
pool *p = rp_new(cli_pool, the_bird_domain.the_bird, "CLI");
cli *c = mb_alloc(p, sizeof(cli));
bzero(c, sizeof(cli));
c->pool = p;
c->priv = priv;
c->sock = sock;
c->event = ev_new(p);
c->event->hook = cli_event;
c->event->data = c;
c->cont = cli_hello;
c->parser_pool = lp_new_default(c->pool);
c->show_pool = lp_new_default(c->pool);
c->rx_buf = mb_alloc(c->pool, CLI_RX_BUF_SIZE);
ev_schedule(c->event);
return c;
@ -335,85 +294,25 @@ cli_kick(cli *c)
static list cli_log_hooks;
static int cli_log_inited;
void
cli_set_log_echo(cli *c, uint mask, uint size)
{
if (c->ring_buf)
{
mb_free(c->ring_buf);
c->ring_buf = c->ring_end = c->ring_read = c->ring_write = NULL;
rem_node(&c->n);
}
c->log_mask = mask;
if (mask && size)
{
c->ring_buf = mb_alloc(c->pool, size);
c->ring_end = c->ring_buf + size;
c->ring_read = c->ring_write = c->ring_buf;
add_tail(&cli_log_hooks, &c->n);
c->log_threshold = size / 8;
}
c->ring_overflow = 0;
}
void
cli_echo(uint class, byte *msg)
{
unsigned len, free, i, l;
cli *c;
byte *m;
if (!cli_log_inited || EMPTY_LIST(cli_log_hooks))
return;
len = strlen(msg) + 1;
WALK_LIST(c, cli_log_hooks)
{
if (!(c->log_mask & (1 << class)))
continue;
if (c->ring_read <= c->ring_write)
free = (c->ring_end - c->ring_buf) - (c->ring_write - c->ring_read + 1);
else
free = c->ring_read - c->ring_write - 1;
if ((len > free) ||
(free < c->log_threshold && class < (unsigned) L_INFO[0]))
{
c->ring_overflow++;
continue;
}
if (c->ring_read == c->ring_write)
ev_schedule(c->event);
m = msg;
l = len;
while (l)
{
if (c->ring_read <= c->ring_write)
i = c->ring_end - c->ring_write;
else
i = c->ring_read - c->ring_write;
if (i > l)
i = l;
memcpy(c->ring_write, m, i);
m += i;
l -= i;
c->ring_write += i;
if (c->ring_write == c->ring_end)
c->ring_write = c->ring_buf;
}
}
}
/* Hack for scheduled undo notification */
extern cli *cmd_reconfig_stored_cli;
void
cli_free(cli *c)
{
cli_set_log_echo(c, 0, 0);
int defer = 0;
if (c->cleanup)
c->cleanup(c);
defer = c->cleanup(c);
if (c == cmd_reconfig_stored_cli)
cmd_reconfig_stored_cli = NULL;
rfree(c->pool);
if (defer)
{
sk_close(c->sock);
c->sock = NULL;
}
else
rp_free(c->pool);
}
/**
@ -425,7 +324,7 @@ cli_free(cli *c)
void
cli_init(void)
{
cli_pool = rp_new(&root_pool, "CLI");
cli_pool = rp_new(&root_pool, the_bird_domain.the_bird, "CLI");
init_list(&cli_log_hooks);
cli_log_inited = 1;
}

View File

@ -10,6 +10,7 @@
#define _BIRD_CLI_H_
#include "lib/resource.h"
#include "lib/lists.h"
#include "lib/event.h"
#define CLI_RX_BUF_SIZE 4096
@ -28,20 +29,17 @@ struct cli_out {
typedef struct cli {
node n; /* Node in list of all log hooks */
pool *pool;
void *priv; /* Private to sysdep layer */
struct birdsock *sock; /* Underlying socket */
byte *rx_buf, *rx_pos; /* sysdep */
struct cli_out *tx_buf, *tx_pos, *tx_write;
event *event;
void (*cont)(struct cli *c);
void (*cleanup)(struct cli *c);
int (*cleanup)(struct cli *c); /* Return 0 if finished and cli may be freed immediately.
Otherwise return 1 and call rfree(c->pool) when appropriate. */
void *rover; /* Private to continuation routine */
int last_reply;
int restricted; /* CLI is restricted to read-only commands */
struct linpool *parser_pool; /* Pool used during parsing */
struct linpool *show_pool; /* Pool used during route show */
byte *ring_buf; /* Ring buffer for asynchronous messages */
byte *ring_end, *ring_read, *ring_write; /* Pointers to the ring buffer */
uint ring_overflow; /* Counter of ring overflows */
uint log_mask; /* Mask of allowed message levels */
uint log_threshold; /* When free < log_threshold, store only important messages */
uint async_msg_size; /* Total size of async messages queued in tx_buf */
@ -56,19 +54,17 @@ extern struct cli *this_cli; /* Used during parsing */
void cli_printf(cli *, int, char *, ...);
#define cli_msg(x...) cli_printf(this_cli, x)
void cli_set_log_echo(cli *, uint mask, uint size);
static inline void cli_separator(cli *c)
{ if (c->last_reply) cli_printf(c, -c->last_reply, ""); };
/* Functions provided to sysdep layer */
cli *cli_new(void *);
cli *cli_new(struct birdsock *);
void cli_init(void);
void cli_free(cli *);
void cli_kick(cli *);
void cli_written(cli *);
void cli_echo(uint class, byte *msg);
static inline int cli_access_restricted(void)
{

View File

@ -54,9 +54,6 @@ cmd_show_symbols(struct sym_show_data *sd)
for (const struct sym_scope *scope = config->root_scope; scope; scope = scope->next)
HASH_WALK(scope->hash, next, sym)
{
if (!sym->scope->active)
continue;
if (sd->type && (sym->class != sd->type))
continue;
@ -109,7 +106,6 @@ print_size(char *dsc, struct resmem vals)
extern pool *rt_table_pool;
extern pool *rta_pool;
extern uint *pages_kept;
void
cmd_show_memory(void)
@ -122,8 +118,10 @@ cmd_show_memory(void)
print_size("Current config:", rmemsize(config_pool));
struct resmem total = rmemsize(&root_pool);
#ifdef HAVE_MMAP
print_size("Standby memory:", (struct resmem) { .overhead = page_size * *pages_kept });
total.overhead += page_size * *pages_kept;
int pk = atomic_load_explicit(&pages_kept, memory_order_relaxed)
+ atomic_load_explicit(&pages_kept_locally, memory_order_relaxed);
print_size("Standby memory:", (struct resmem) { .overhead = page_size * pk });
total.overhead += page_size * pk;
#endif
print_size("Total:", total);
cli_msg(0, "");
@ -133,9 +131,9 @@ void
cmd_eval(const struct f_line *expr)
{
buffer buf;
LOG_BUFFER_INIT(buf);
STACK_BUFFER_INIT(buf, CLI_MSG_SIZE);
if (f_eval_buf(expr, this_cli->parser_pool, &buf) > F_RETURN)
if (f_eval_buf(expr, &buf) > F_RETURN)
{
cli_msg(8008, "runtime error");
return;

Some files were not shown because too many files have changed in this diff Show More