0
0
mirror of https://gitlab.nic.cz/labs/bird.git synced 2024-11-18 17:18:42 +00:00

Merge commit 'a32cee78' into HEAD

This commit is contained in:
Maria Matejka 2022-10-04 15:59:15 +02:00
commit 0eba27c69f
16 changed files with 458 additions and 221 deletions

View File

@ -2379,6 +2379,7 @@ avoid routing loops.
<item> <rfc id="8203"> - BGP Administrative Shutdown Communication
<item> <rfc id="8212"> - Default EBGP Route Propagation Behavior without Policies
<item> <rfc id="9117"> - Revised Validation Procedure for BGP Flow Specifications
<item> <rfc id="9234"> - Route Leak Prevention and Detection Using Roles
</itemize>
<sect1>Route selection rules
@ -2819,6 +2820,29 @@ using the following configuration parameters:
protocol itself (for example, if a route is received through eBGP and
therefore does not have such attribute). Default: 100 (0 in pre-1.2.0
versions of BIRD).
<tag><label id="bgp-local-role">local role <m/role-name/</tag>
BGP roles are a mechanism for route leak prevention and automatic route
filtering based on common BGP topology relationships. They are defined
in <rfc id="9234">. Instead of manually configuring filters and
communities, automatic filtering is done with the help of the OTC
attribute - a flag for routes that should be sent only to customers.
The same attribute is also used to automatically detect and filter route
leaks created by third parties.
This option is valid for EBGP sessions, but it is not recommended to be
used within AS confederations (which would require manual filtering of
<cf/bgp_otc/ attribute on confederation boundaries).
Possible <cf><m/role-name/</cf> values are: <cf/provider/,
<cf/rs_server/, <cf/rs_client/, <cf/customer/ and <cf/peer/.
Default: No local role assigned.
<tag><label id="bgp-require-roles">require roles <m/switch/</tag>
If this option is set, the BGP roles must be defined on both sides,
otherwise the session will not be established. This behavior is defined
in <rfc id="9234"> as "strict mode" and is used to enforce corresponding
configuration at your conterpart side. Default: disabled.
</descrip>
<sect1>Channel configuration
@ -3126,6 +3150,11 @@ some of them (marked with `<tt/O/') are optional.
This attribute contains accumulated IGP metric, which is a total
distance to the destination through multiple autonomous systems.
Currently, the attribute is not accessible from filters.
<tag><label id="bgp-otc">int bgp_otc [O]</tag>
This attribute is defined in <rfc id="9234">. OTC is a flag that marks
routes that should be sent only to customers. If <ref id="bgp-role"
name="local Role"> is configured it set automatically.
</descrip>
<sect1>Example

View File

@ -10,6 +10,8 @@
#ifndef _BIRD_LIB_ROUTE_H_
#define _BIRD_LIB_ROUTE_H_
#undef RT_SOURCE_DEBUG
#include "lib/type.h"
#include "lib/rcu.h"
#include "lib/hash.h"
@ -87,6 +89,11 @@ struct rte_src *rt_get_source_o(struct rte_owner *o, u32 id);
struct rte_src *rt_find_source_global(u32 id);
#ifdef RT_SOURCE_DEBUG
#define rt_lock_source _rt_lock_source_internal
#define rt_unlock_source _rt_unlock_source_internal
#endif
static inline void rt_lock_source(struct rte_src *src)
{
/* Locking a source is trivial; somebody already holds it so we just increase
@ -139,6 +146,14 @@ static inline void rt_unlock_source(struct rte_src *src)
rcu_read_unlock();
}
#ifdef RT_SOURCE_DEBUG
#undef rt_lock_source
#undef rt_unlock_source
#define rt_lock_source(x) ( log(L_INFO "Lock source %uG at %s:%d", (x)->global_id, __FILE__, __LINE__), _rt_lock_source_internal(x) )
#define rt_unlock_source(x) ( log(L_INFO "Unlock source %uG at %s:%d", (x)->global_id, __FILE__, __LINE__), _rt_unlock_source_internal(x) )
#endif
void rt_init_sources(struct rte_owner *, const char *name, event_list *list);
void rt_destroy_sources(struct rte_owner *, event *);
@ -242,7 +257,7 @@ typedef struct ea_list {
struct ea_storage {
struct ea_storage *next_hash; /* Next in hash chain */
struct ea_storage **pprev_hash; /* Previous in hash chain */
u32 uc; /* Use count */
_Atomic u32 uc; /* Use count */
u32 hash_key; /* List hash */
ea_list l[0]; /* The list itself */
};
@ -511,12 +526,15 @@ static inline struct ea_storage *ea_get_storage(ea_list *r)
return SKIP_BACK(struct ea_storage, l[0], r);
}
static inline ea_list *ea_clone(ea_list *r) { ea_get_storage(r)->uc++; return r; }
static inline ea_list *ea_clone(ea_list *r) {
ASSERT_DIE(0 < atomic_fetch_add_explicit(&ea_get_storage(r)->uc, 1, memory_order_acq_rel));
return r;
}
void ea__free(struct ea_storage *r);
static inline void ea_free(ea_list *l) {
if (!l) return;
struct ea_storage *r = ea_get_storage(l);
if (!--r->uc) ea__free(r);
if (1 == atomic_fetch_sub_explicit(&r->uc, 1, memory_order_acq_rel)) ea__free(r);
}
void ea_dump(ea_list *);

View File

@ -313,9 +313,9 @@ proto_remove_channels(struct proto *p)
}
static void
channel_roa_in_changed(struct rt_subscription *s)
channel_roa_in_changed(void *_data)
{
struct channel *c = s->data;
struct channel *c = _data;
int active = !!c->reload_req.hook;
CD(c, "Reload triggered by RPKI change%s", active ? " - already active" : "");
@ -327,9 +327,9 @@ channel_roa_in_changed(struct rt_subscription *s)
}
static void
channel_roa_out_changed(struct rt_subscription *s)
channel_roa_out_changed(void *_data)
{
struct channel *c = s->data;
struct channel *c = _data;
CD(c, "Feeding triggered by RPKI change");
c->refeed_pending = 1;
@ -347,14 +347,14 @@ struct roa_subscription {
static int
channel_roa_is_subscribed(struct channel *c, rtable *tab, int dir)
{
void (*hook)(struct rt_subscription *) =
void (*hook)(void *) =
dir ? channel_roa_in_changed : channel_roa_out_changed;
struct roa_subscription *s;
node *n;
WALK_LIST2(s, n, c->roa_subscriptions, roa_node)
if ((s->s.tab == tab) && (s->s.hook == hook))
if ((s->s.tab == tab) && (s->s.event->hook == hook))
return 1;
return 0;
@ -368,9 +368,9 @@ channel_roa_subscribe(struct channel *c, rtable *tab, int dir)
return;
struct roa_subscription *s = mb_allocz(c->proto->pool, sizeof(struct roa_subscription));
s->s.event = ev_new_init(c->proto->pool, dir ? channel_roa_in_changed : channel_roa_out_changed, c);
s->s.list = proto_work_list(c->proto);
s->s.hook = dir ? channel_roa_in_changed : channel_roa_out_changed;
s->s.data = c;
rt_subscribe(tab, &s->s);
add_tail(&c->roa_subscriptions, &s->roa_node);
@ -381,6 +381,7 @@ channel_roa_unsubscribe(struct roa_subscription *s)
{
rt_unsubscribe(&s->s);
rem_node(&s->roa_node);
rfree(s->s.event);
mb_free(s);
}

View File

@ -1043,6 +1043,8 @@ ea_list_ref(ea_list *l)
}
}
static void ea_free_nested(ea_list *l);
static void
ea_list_unref(ea_list *l)
{
@ -1063,7 +1065,7 @@ ea_list_unref(ea_list *l)
}
if (l->next)
ea_free(l->next);
ea_free_nested(l->next);
}
void
@ -1472,9 +1474,15 @@ ea_lookup(ea_list *o, int overlay)
o = ea_normalize(o, overlay);
h = ea_hash(o);
RTA_LOCK;
for(r=rta_hash_table[h & rta_cache_mask]; r; r=r->next_hash)
if (r->hash_key == h && ea_same(r->l, o))
return ea_clone(r->l);
{
atomic_fetch_add_explicit(&r->uc, 1, memory_order_acq_rel);
RTA_UNLOCK;
return r->l;
}
uint elen = ea_list_size(o);
r = mb_alloc(rta_pool, elen + sizeof(struct ea_storage));
@ -1490,12 +1498,17 @@ ea_lookup(ea_list *o, int overlay)
if (++rta_cache_count > rta_cache_limit)
rta_rehash();
RTA_UNLOCK;
return r->l;
}
void
ea__free(struct ea_storage *a)
static void
ea_free_locked(struct ea_storage *a)
{
/* Somebody has cloned this rta inbetween. This sometimes happens. */
if (atomic_load_explicit(&a->uc, memory_order_acquire))
return;
ASSERT(rta_cache_count);
rta_cache_count--;
*a->pprev_hash = a->next_hash;
@ -1506,6 +1519,22 @@ ea__free(struct ea_storage *a)
mb_free(a);
}
static void
ea_free_nested(struct ea_list *l)
{
struct ea_storage *r = ea_get_storage(l);
if (1 == atomic_fetch_sub_explicit(&r->uc, 1, memory_order_acq_rel))
ea_free_locked(r);
}
void
ea__free(struct ea_storage *a)
{
RTA_LOCK;
ea_free_locked(a);
RTA_UNLOCK;
}
/**
* rta_dump_all - dump attribute cache
*
@ -1515,6 +1544,8 @@ ea__free(struct ea_storage *a)
void
ea_dump_all(void)
{
RTA_LOCK;
debug("Route attribute cache (%d entries, rehash at %d):\n", rta_cache_count, rta_cache_limit);
for (uint h=0; h < rta_cache_size; h++)
for (struct ea_storage *a = rta_hash_table[h]; a; a = a->next_hash)
@ -1524,6 +1555,8 @@ ea_dump_all(void)
debug("\n");
}
debug("\n");
RTA_UNLOCK;
}
void

View File

@ -114,8 +114,6 @@
pool *rt_table_pool;
static linpool *rte_update_pool;
list routing_tables;
list deleted_routing_tables;
@ -153,9 +151,6 @@ static void rt_cork_release_hook(void *);
static inline void rt_export_used(struct rt_exporter *);
static void rt_export_cleanup(rtable *tab);
static inline void rte_update_lock(void);
static inline void rte_update_unlock(void);
static int rte_same(rte *x, rte *y);
const char *rt_import_state_name_array[TIS_MAX] = {
@ -966,13 +961,10 @@ done:
}
/* Nothing to export */
if (!new_best && !old_best)
{
if (new_best || old_best)
do_rt_notify(c, n, new_best, old_best);
else
DBG("rt_notify_accepted: nothing to export\n");
return;
}
do_rt_notify(c, n, new_best, old_best);
}
rte *
@ -1079,7 +1071,7 @@ rt_notify_merged(struct rt_export_request *req, const net_addr *n, struct rt_pen
}
/* Prepare new merged route */
rte *new_merged = count ? rt_export_merged(c, feed, count, rte_update_pool, 0) : NULL;
rte *new_merged = count ? rt_export_merged(c, feed, count, tmp_linpool, 0) : NULL;
if (new_merged || old_best)
do_rt_notify(c, n, new_merged, old_best);
@ -1089,7 +1081,6 @@ void
rt_notify_optimal(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe)
{
struct channel *c = SKIP_BACK(struct channel, out_req, req);
rte *o = RTE_VALID_OR_NULL(rpe->old_best);
struct rte_storage *new_best = rpe->new_best;
@ -1482,14 +1473,10 @@ rt_export_hook(void *_data)
/* Process the export */
for (uint i=0; i<RT_EXPORT_BULK; i++)
{
rte_update_lock();
rte_export(c, c->rpe_next);
if (!c->rpe_next)
break;
rte_update_unlock();
}
rt_send_export_event(c);
@ -1775,21 +1762,6 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr
}
static int rte_update_nest_cnt; /* Nesting counter to allow recursive updates */
static inline void
rte_update_lock(void)
{
rte_update_nest_cnt++;
}
static inline void
rte_update_unlock(void)
{
if (!--rte_update_nest_cnt)
lp_flush(rte_update_pool);
}
int
channel_preimport(struct rt_import_request *req, rte *new, rte *old)
{
@ -1839,7 +1811,6 @@ rte_update(struct channel *c, const net_addr *n, rte *new, struct rte_src *src)
const struct filter *filter = c->in_filter;
struct channel_import_stats *stats = &c->import_stats;
rte_update_lock();
if (new)
{
new->net = n;
@ -1890,7 +1861,6 @@ rte_update(struct channel *c, const net_addr *n, rte *new, struct rte_src *src)
ea_free(a);
}
rte_update_unlock();
}
void
@ -1921,25 +1891,6 @@ rte_import(struct rt_import_request *req, const net_addr *n, rte *new, struct rt
rte_recalculate(hook, nn, new, src);
}
/* Independent call to rte_announce(), used from next hop
recalculation, outside of rte_update(). new must be non-NULL */
static inline void
rte_announce_i(rtable *tab, net *net, struct rte_storage *new, struct rte_storage *old,
struct rte_storage *new_best, struct rte_storage *old_best)
{
rte_update_lock();
rte_announce(tab, net, new, old, new_best, old_best);
rte_update_unlock();
}
static inline void
rte_discard(net *net, rte *old) /* Non-filtered route deletion, used during garbage collection */
{
rte_update_lock();
rte_recalculate(old->sender, net, NULL, old->src);
rte_update_unlock();
}
/* Check rtable for best route to given net whether it would be exported do p */
int
rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter)
@ -1951,15 +1902,11 @@ rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filte
rte rt = n->routes->rte;
rte_update_lock();
/* Rest is stripped down export_filter() */
int v = c->proto->preexport ? c->proto->preexport(c, &rt) : 0;
if (v == RIC_PROCESS)
v = (f_run(filter, &rt, FF_SILENT) <= F_ACCEPT);
rte_update_unlock();
return v > 0;
}
@ -2489,7 +2436,7 @@ rt_settle_timer(timer *t)
struct rt_subscription *s;
WALK_LIST(s, tab->subscribers)
s->hook(s);
ev_send(s->list, s->event);
}
static void
@ -2731,7 +2678,6 @@ rt_init(void)
{
rta_init();
rt_table_pool = rp_new(&root_pool, "Routing tables");
rte_update_pool = lp_new_default(rt_table_pool);
init_list(&routing_tables);
init_list(&deleted_routing_tables);
ev_init_list(&rt_cork.queue, &main_birdloop, "Route cork release");
@ -2816,7 +2762,7 @@ again:
(e->rte.stale_cycle < s->stale_valid) ||
(e->rte.stale_cycle > s->stale_set))
{
rte_discard(n, &e->rte);
rte_recalculate(e->rte.sender, n, NULL, e->rte.src);
limit--;
goto rescan;
@ -3590,7 +3536,7 @@ rt_next_hop_update_net(rtable *tab, net *n)
{ "autoupdated [+best]", "autoupdated [best]" }
};
rt_rte_trace_in(D_ROUTES, updates[i].new->rte.sender->req, &updates[i].new->rte, best_indicator[nb][ob]);
rte_announce_i(tab, n, updates[i].new, updates[i].old, new, old_best);
rte_announce(tab, n, updates[i].new, updates[i].old, new, old_best);
}
return count;
@ -3940,20 +3886,16 @@ rt_feed_net(struct rt_export_hook *c, net *n)
count = rte_feed_count(n);
if (count)
{
rte_update_lock();
rte **feed = alloca(count * sizeof(rte *));
rte_feed_obtain(n, feed, count);
c->req->export_bulk(c->req, n->n.addr, NULL, feed, count);
rte_update_unlock();
}
}
else if (n->routes)
{
rte_update_lock();
struct rt_pending_export rpe = { .new = n->routes, .new_best = n->routes };
c->req->export_one(c->req, n->n.addr, &rpe);
rte_update_unlock();
count = 1;
}

View File

@ -139,8 +139,8 @@ typedef struct rtable {
struct rt_subscription {
node n;
rtable *tab;
void (*hook)(struct rt_subscription *b);
void *data;
event *event;
event_list *list;
};
struct rt_flowspec_link {

View File

@ -939,6 +939,18 @@ bgp_decode_large_community(struct bgp_parse_state *s, uint code UNUSED, uint fla
bgp_set_attr_ptr(to, BA_LARGE_COMMUNITY, flags, ad);
}
static void
bgp_decode_otc(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data UNUSED, uint len, ea_list **to)
{
if (len != 4)
WITHDRAW(BAD_LENGTH, "OTC", len);
u32 val = get_u32(data);
bgp_set_attr_u32(to, BA_ONLY_TO_CUSTOMER, flags, val);
}
static void
bgp_export_mpls_label_stack(struct bgp_export_state *s, eattr *a)
{
@ -1175,6 +1187,13 @@ static union bgp_attr_desc bgp_attr_table[BGP_ATTR_MAX] = {
.encode = bgp_encode_u32s,
.decode = bgp_decode_large_community,
},
[BA_ONLY_TO_CUSTOMER] = {
.name = "otc",
.type = T_INT,
.flags = BAF_OPTIONAL | BAF_TRANSITIVE,
.encode = bgp_encode_u32,
.decode = bgp_decode_otc,
},
[BA_MPLS_LABEL_STACK] = {
.name = "bgp_mpls_label_stack",
.type = T_CLIST,
@ -1504,6 +1523,29 @@ bgp_finish_attrs(struct bgp_parse_state *s, ea_list **to)
REPORT("Discarding AIGP attribute received on non-AIGP session");
bgp_unset_attr(to, BA_AIGP);
}
/* Handle OTC ingress procedure, RFC 9234 */
if (bgp_channel_is_role_applicable(s->channel))
{
struct bgp_proto *p = s->proto;
eattr *e = bgp_find_attr(*to, BA_ONLY_TO_CUSTOMER);
/* Reject routes from downstream if they are leaked */
if (e && (p->cf->local_role == BGP_ROLE_PROVIDER ||
p->cf->local_role == BGP_ROLE_RS_SERVER))
WITHDRAW("Route leak detected - OTC attribute from downstream");
/* Reject routes from peers if they are leaked */
if (e && (p->cf->local_role == BGP_ROLE_PEER) && (e->u.data != p->cf->remote_as))
WITHDRAW("Route leak detected - OTC attribute with mismatched ASN (%u)",
(uint) e->u.data);
/* Mark routes from upstream if it did not happened before */
if (!e && (p->cf->local_role == BGP_ROLE_CUSTOMER ||
p->cf->local_role == BGP_ROLE_PEER ||
p->cf->local_role == BGP_ROLE_RS_CLIENT))
bgp_set_attr_u32(to, BA_ONLY_TO_CUSTOMER, 0, p->cf->remote_as);
}
}
@ -1522,8 +1564,8 @@ bgp_finish_attrs(struct bgp_parse_state *s, ea_list **to)
HASH_DEFINE_REHASH_FN(RBH, struct bgp_bucket)
void
bgp_init_bucket_table(struct bgp_channel *c)
static void
bgp_init_bucket_table(struct bgp_pending_tx *c)
{
HASH_INIT(c->bucket_hash, c->pool, 8);
@ -1531,24 +1573,8 @@ bgp_init_bucket_table(struct bgp_channel *c)
c->withdraw_bucket = NULL;
}
void
bgp_free_bucket_table(struct bgp_channel *c)
{
HASH_FREE(c->bucket_hash);
struct bgp_bucket *b;
WALK_LIST_FIRST(b, c->bucket_queue)
{
rem_node(&b->send_node);
mb_free(b);
}
mb_free(c->withdraw_bucket);
c->withdraw_bucket = NULL;
}
static struct bgp_bucket *
bgp_get_bucket(struct bgp_channel *c, ea_list *new)
bgp_get_bucket(struct bgp_pending_tx *c, ea_list *new)
{
/* Hash and lookup */
u32 hash = ea_hash(new);
@ -1577,7 +1603,7 @@ bgp_get_bucket(struct bgp_channel *c, ea_list *new)
}
static struct bgp_bucket *
bgp_get_withdraw_bucket(struct bgp_channel *c)
bgp_get_withdraw_bucket(struct bgp_pending_tx *c)
{
if (!c->withdraw_bucket)
{
@ -1589,15 +1615,17 @@ bgp_get_withdraw_bucket(struct bgp_channel *c)
}
static void
bgp_free_bucket_xx(struct bgp_channel *c, struct bgp_bucket *b)
bgp_free_bucket(struct bgp_pending_tx *c, struct bgp_bucket *b)
{
HASH_REMOVE2(c->bucket_hash, RBH, c->pool, b);
mb_free(b);
}
int
bgp_done_bucket(struct bgp_channel *c, struct bgp_bucket *b)
bgp_done_bucket(struct bgp_channel *bc, struct bgp_bucket *b)
{
struct bgp_pending_tx *c = bc->ptx;
/* Won't free the withdraw bucket */
if (b == c->withdraw_bucket)
return 0;
@ -1608,21 +1636,23 @@ bgp_done_bucket(struct bgp_channel *c, struct bgp_bucket *b)
if (b->px_uc || !EMPTY_LIST(b->prefixes))
return 0;
bgp_free_bucket_xx(c, b);
bgp_free_bucket(c, b);
return 1;
}
void
bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b)
bgp_defer_bucket(struct bgp_channel *bc, struct bgp_bucket *b)
{
struct bgp_pending_tx *c = bc->ptx;
rem_node(&b->send_node);
add_tail(&c->bucket_queue, &b->send_node);
}
void
bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b)
bgp_withdraw_bucket(struct bgp_channel *bc, struct bgp_bucket *b)
{
struct bgp_proto *p = (void *) c->c.proto;
struct bgp_proto *p = (void *) bc->c.proto;
struct bgp_pending_tx *c = bc->ptx;
struct bgp_bucket *wb = bgp_get_withdraw_bucket(c);
log(L_ERR "%s: Attribute list too long", p->p.name);
@ -1643,7 +1673,7 @@ bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b)
#define PXH_KEY(px) px->net, px->path_id, px->hash
#define PXH_NEXT(px) px->next
#define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && (c->add_path_tx ? (i1 == i2) : 1) && net_equal(n1, n2)
#define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && (add_path_tx ? (i1 == i2) : 1) && net_equal(n1, n2)
#define PXH_FN(n,i,h) h
#define PXH_REHASH bgp_pxh_rehash
@ -1652,29 +1682,21 @@ bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b)
HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix)
void
bgp_init_prefix_table(struct bgp_channel *c)
static void
bgp_init_prefix_table(struct bgp_channel *bc)
{
struct bgp_pending_tx *c = bc->ptx;
HASH_INIT(c->prefix_hash, c->pool, 8);
uint alen = net_addr_length[c->c.net_type];
uint alen = net_addr_length[bc->c.net_type];
c->prefix_slab = alen ? sl_new(c->pool, sizeof(struct bgp_prefix) + alen) : NULL;
}
void
bgp_free_prefix_table(struct bgp_channel *c)
{
HASH_FREE(c->prefix_hash);
rfree(c->prefix_slab);
c->prefix_slab = NULL;
}
static struct bgp_prefix *
bgp_get_prefix(struct bgp_channel *c, const net_addr *net, struct rte_src *src)
bgp_get_prefix(struct bgp_pending_tx *c, const net_addr *net, struct rte_src *src, int add_path_tx)
{
u32 path_id = src->global_id;
u32 path_id_hash = c->add_path_tx ? path_id : 0;
u32 path_id_hash = add_path_tx ? path_id : 0;
/* We must use a different hash function than the rtable */
u32 hash = u32_hash(net_hash(net) ^ u32_hash(path_id_hash));
struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id_hash, hash);
@ -1698,15 +1720,16 @@ bgp_get_prefix(struct bgp_channel *c, const net_addr *net, struct rte_src *src)
return px;
}
static void bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px);
static void bgp_free_prefix(struct bgp_pending_tx *c, struct bgp_prefix *px);
static inline int
bgp_update_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *b)
{
#define IS_WITHDRAW_BUCKET(b) ((b) == c->ptx->withdraw_bucket)
#define BPX_TRACE(what) do { \
if (c->c.debug & D_ROUTES) log(L_TRACE "%s.%s < %s %N %uG %s", \
c->c.proto->name, c->c.name, what, \
px->net, px->path_id, (b == c->withdraw_bucket) ? "withdraw" : "update"); } while (0)
px->net, px->path_id, IS_WITHDRAW_BUCKET(b) ? "withdraw" : "update"); } while (0)
px->lastmod = current_time();
/* Already queued for the same bucket */
@ -1724,7 +1747,7 @@ bgp_update_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucke
}
/* The new bucket is the same as we sent before */
if ((px->last == b) || c->c.out_table && !px->last && (b == c->withdraw_bucket))
if ((px->last == b) || c->c.out_table && !px->last && IS_WITHDRAW_BUCKET(b))
{
if (px->cur)
BPX_TRACE("reverted");
@ -1733,15 +1756,15 @@ bgp_update_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucke
/* Well, we haven't sent anything yet */
if (!px->last)
bgp_free_prefix(c, px);
bgp_free_prefix(c->ptx, px);
px->cur = NULL;
return 0;
}
/* Enqueue the bucket if it has been empty */
if ((b != c->withdraw_bucket) && EMPTY_LIST(b->prefixes))
add_tail(&c->bucket_queue, &b->send_node);
if (!IS_WITHDRAW_BUCKET(b) && EMPTY_LIST(b->prefixes))
add_tail(&c->ptx->bucket_queue, &b->send_node);
/* Enqueue to the new bucket and indicate the change */
add_tail(&b->prefixes, &px->buck_node_xx);
@ -1754,7 +1777,7 @@ bgp_update_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucke
}
static void
bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px)
bgp_free_prefix(struct bgp_pending_tx *c, struct bgp_prefix *px)
{
HASH_REMOVE2(c->prefix_hash, PXH, c->pool, px);
@ -1784,7 +1807,7 @@ bgp_done_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket
px->last->px_uc--;
/* Ref the current sent version */
if (buck != c->withdraw_bucket)
if (!IS_WITHDRAW_BUCKET(buck))
{
px->last = buck;
px->last->px_uc++;
@ -1794,7 +1817,49 @@ bgp_done_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket
/* Prefixes belonging to the withdraw bucket are freed always */
}
bgp_free_prefix(c, px);
bgp_free_prefix(c->ptx, px);
}
static void
bgp_pending_tx_rfree(resource *r)
{
struct bgp_pending_tx *ptx = SKIP_BACK(struct bgp_pending_tx, r, r);
HASH_WALK(ptx->prefix_hash, next, n)
rt_unlock_source(rt_find_source_global(n->path_id));
HASH_WALK_END;
}
static void bgp_pending_tx_dump(resource *r UNUSED) { debug("\n"); }
static struct resclass bgp_pending_tx_class = {
.name = "BGP Pending TX",
.size = sizeof(struct bgp_pending_tx),
.free = bgp_pending_tx_rfree,
.dump = bgp_pending_tx_dump,
};
void
bgp_init_pending_tx(struct bgp_channel *c)
{
ASSERT_DIE(!c->ptx);
pool *p = rp_new(c->pool, "BGP Pending TX");
c->ptx = ralloc(p, &bgp_pending_tx_class);
c->ptx->pool = p;
bgp_init_bucket_table(c->ptx);
bgp_init_prefix_table(c);
}
void
bgp_free_pending_tx(struct bgp_channel *c)
{
ASSERT_DIE(c->ptx);
ASSERT_DIE(c->ptx->pool);
rfree(c->ptx->pool);
c->ptx = NULL;
}
@ -1806,7 +1871,8 @@ static void
bgp_out_table_feed(void *data)
{
struct rt_export_hook *hook = data;
struct bgp_channel *c = SKIP_BACK(struct bgp_channel, prefix_exporter, hook->table);
struct bgp_channel *bc = SKIP_BACK(struct bgp_channel, prefix_exporter, hook->table);
struct bgp_pending_tx *c = bc->ptx;
int max = 512;
@ -1901,8 +1967,8 @@ bgp_out_table_feed(void *data)
static struct rt_export_hook *
bgp_out_table_export_start(struct rt_exporter *re, struct rt_export_request *req UNUSED)
{
struct bgp_channel *c = SKIP_BACK(struct bgp_channel, prefix_exporter, re);
pool *p = rp_new(c->c.proto->pool, "Export hook");
struct bgp_channel *bc = SKIP_BACK(struct bgp_channel, prefix_exporter, re);
pool *p = rp_new(bc->c.proto->pool, "Export hook");
struct rt_export_hook *hook = mb_allocz(p, sizeof(struct rt_export_hook));
hook->pool = p;
hook->event = ev_new_init(p, bgp_out_table_feed, hook);
@ -1937,6 +2003,7 @@ bgp_preexport(struct channel *C, rte *e)
{
struct bgp_proto *p = (struct bgp_proto *) C->proto;
struct bgp_proto *src = bgp_rte_proto(e);
struct bgp_channel *c = (struct bgp_channel *) C;
/* Reject our routes */
if (src == p)
@ -1976,11 +2043,11 @@ bgp_preexport(struct channel *C, rte *e)
}
/* Handle well-known communities, RFC 1997 */
struct eattr *com;
struct eattr *a;
if (p->cf->interpret_communities &&
(com = ea_find(e->attrs, BGP_EA_ID(BA_COMMUNITY))))
(a = bgp_find_attr(e->attrs, BA_COMMUNITY)))
{
const struct adata *d = com->u.ptr;
const struct adata *d = a->u.ptr;
/* Do not export anywhere */
if (int_set_contains(d, BGP_COMM_NO_ADVERTISE))
@ -1999,6 +2066,16 @@ bgp_preexport(struct channel *C, rte *e)
return -1;
}
/* Do not export routes marked with OTC to upstream, RFC 9234 */
if (bgp_channel_is_role_applicable(c))
{
a = bgp_find_attr(e->attrs, BA_ONLY_TO_CUSTOMER);
if (a && (p->cf->local_role==BGP_ROLE_CUSTOMER ||
p->cf->local_role==BGP_ROLE_PEER ||
p->cf->local_role==BGP_ROLE_RS_CLIENT))
return -1;
}
return 0;
}
@ -2107,6 +2184,16 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at
}
}
/* Mark routes for downstream with OTC, RFC 9234 */
if (bgp_channel_is_role_applicable(c))
{
a = bgp_find_attr(attrs, BA_ONLY_TO_CUSTOMER);
if (!a && (p->cf->local_role == BGP_ROLE_PROVIDER ||
p->cf->local_role == BGP_ROLE_PEER ||
p->cf->local_role == BGP_ROLE_RS_SERVER))
bgp_set_attr_u32(&attrs, BA_ONLY_TO_CUSTOMER, 0, p->public_as);
}
/*
* Presence of mandatory attributes ORIGIN and AS_PATH is ensured by above
* conditions. Presence and validity of quasi-mandatory NEXT_HOP attribute
@ -2134,16 +2221,16 @@ bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, c
log(L_ERR "%s: Invalid route %N withdrawn", p->p.name, n);
/* If attributes are invalid, we fail back to withdraw */
buck = attrs ? bgp_get_bucket(c, attrs) : bgp_get_withdraw_bucket(c);
buck = attrs ? bgp_get_bucket(c->ptx, attrs) : bgp_get_withdraw_bucket(c->ptx);
path = new->src;
}
else
{
buck = bgp_get_withdraw_bucket(c);
buck = bgp_get_withdraw_bucket(c->ptx);
path = old->src;
}
if (bgp_update_prefix(c, bgp_get_prefix(c, n, path), buck))
if (bgp_update_prefix(c, bgp_get_prefix(c->ptx, n, path, c->add_path_tx), buck))
bgp_schedule_packet(p->conn, c, PKT_UPDATE);
}

View File

@ -102,6 +102,7 @@
* RFC 8212 - Default EBGP Route Propagation Behavior without Policies
* RFC 8654 - Extended Message Support for BGP
* RFC 9117 - Revised Validation Procedure for BGP Flow Specifications
* RFC 9234 - Route Leak Prevention and Detection Using Roles
* draft-ietf-idr-ext-opt-param-07
* draft-uttaro-idr-bgp-persistence-04
* draft-walton-bgp-hostname-capability-02
@ -518,6 +519,12 @@ bgp_stop(struct bgp_proto *p, int subcode, byte *data, uint len)
p->uncork_ev->data = NULL;
bgp_graceful_close_conn(&p->outgoing_conn, subcode, data, len);
bgp_graceful_close_conn(&p->incoming_conn, subcode, data, len);
struct bgp_channel *c;
WALK_LIST(c, p->p.channels)
if (c->ptx)
bgp_free_pending_tx(c);
ev_schedule(p->event);
}
@ -787,10 +794,8 @@ bgp_handle_graceful_restart(struct bgp_proto *p)
}
/* Reset bucket and prefix tables */
bgp_free_bucket_table(c);
bgp_free_prefix_table(c);
bgp_init_bucket_table(c);
bgp_init_prefix_table(c);
bgp_free_pending_tx(c);
bgp_init_pending_tx(c);
c->packets_to_send = 0;
}
@ -1806,8 +1811,7 @@ bgp_channel_start(struct channel *C)
if (c->cf->export_table)
bgp_setup_out_table(c);
bgp_init_bucket_table(c);
bgp_init_prefix_table(c);
bgp_init_pending_tx(c);
c->stale_timer = tm_new_init(c->pool, bgp_long_lived_stale_timeout, c, 0, 0);
@ -2017,6 +2021,15 @@ bgp_postconfig(struct proto_config *CF)
if (internal && cf->rs_client)
cf_error("Only external neighbor can be RS client");
if (internal && (cf->local_role != BGP_ROLE_UNDEFINED))
cf_error("Local role cannot be set on IBGP sessions");
if (interior && (cf->local_role != BGP_ROLE_UNDEFINED))
log(L_WARN "BGP roles are not recommended to be used within AS confederations");
if (cf->require_roles && (cf->local_role == BGP_ROLE_UNDEFINED))
cf_error("Local role must be set if roles are required");
if (!cf->confederation && cf->confederation_member)
cf_error("Confederation ID must be set for member sessions");
@ -2379,6 +2392,15 @@ bgp_show_afis(int code, char *s, u32 *afis, uint count)
cli_msg(code, b.start);
}
static const char *
bgp_format_role_name(u8 role)
{
static const char *bgp_role_names[] = { "provider", "rs_server", "rs_client", "customer", "peer" };
if (role == BGP_ROLE_UNDEFINED) return "undefined";
if (role < ARRAY_SIZE(bgp_role_names)) return bgp_role_names[role];
return "?";
}
static void
bgp_show_capabilities(struct bgp_proto *p UNUSED, struct bgp_caps *caps)
{
@ -2507,6 +2529,9 @@ bgp_show_capabilities(struct bgp_proto *p UNUSED, struct bgp_caps *caps)
if (caps->hostname)
cli_msg(-1006, " Hostname: %s", caps->hostname);
if (caps->role != BGP_ROLE_UNDEFINED)
cli_msg(-1006, " Role: %s", bgp_format_role_name(caps->role));
}
static void

View File

@ -113,6 +113,8 @@ struct bgp_config {
int gr_mode; /* Graceful restart mode (BGP_GR_*) */
int llgr_mode; /* Long-lived graceful restart mode (BGP_LLGR_*) */
int setkey; /* Set MD5 password to system SA/SP database */
u8 local_role; /* Set peering role with neighbor [RFC 9234] */
int require_roles; /* Require configured roles on both sides */
/* Times below are in seconds */
unsigned gr_time; /* Graceful restart timeout */
unsigned llgr_time; /* Long-lived graceful restart stale time */
@ -166,6 +168,13 @@ struct bgp_channel_config {
#define BGP_PT_INTERNAL 1
#define BGP_PT_EXTERNAL 2
#define BGP_ROLE_UNDEFINED 255
#define BGP_ROLE_PROVIDER 0
#define BGP_ROLE_RS_SERVER 1
#define BGP_ROLE_RS_CLIENT 2
#define BGP_ROLE_CUSTOMER 3
#define BGP_ROLE_PEER 4
#define NH_NO 0
#define NH_ALL 1
#define NH_IBGP 2
@ -226,6 +235,7 @@ struct bgp_caps {
u8 ext_messages; /* Extended message length, RFC draft */
u8 route_refresh; /* Route refresh capability, RFC 2918 */
u8 enhanced_refresh; /* Enhanced route refresh, RFC 7313 */
u8 role; /* BGP role capability, RFC 9234 */
u8 gr_aware; /* Graceful restart capability, RFC 4724 */
u8 gr_flags; /* Graceful restart flags */
@ -351,14 +361,8 @@ struct bgp_channel {
/* Rest are zeroed when down */
pool *pool;
HASH(struct bgp_bucket) bucket_hash; /* Hash table of route buckets */
struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */
list bucket_queue; /* Queue of buckets to send (struct bgp_bucket) */
HASH(struct bgp_prefix) prefix_hash; /* Prefixes to be sent */
slab *prefix_slab; /* Slab holding prefix nodes */
struct rt_exporter prefix_exporter; /* Table-like exporter for prefix_hash */
struct bgp_pending_tx *ptx; /* Routes waiting to be sent */
struct rt_exporter prefix_exporter; /* Table-like exporter for ptx */
ip_addr next_hop_addr; /* Local address for NEXT_HOP attribute */
ip_addr link_addr; /* Link-local version of next_hop_addr */
@ -401,6 +405,18 @@ struct bgp_bucket {
ea_list eattrs[0]; /* Per-bucket extended attributes */
};
struct bgp_pending_tx {
resource r;
pool *pool;
HASH(struct bgp_bucket) bucket_hash; /* Hash table of route buckets */
struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */
list bucket_queue; /* Queue of buckets to send (struct bgp_bucket) */
HASH(struct bgp_prefix) prefix_hash; /* Prefixes to be sent */
slab *prefix_slab; /* Slab holding prefix nodes */
};
struct bgp_export_state {
struct bgp_proto *proto;
struct bgp_channel *channel;
@ -494,6 +510,12 @@ static inline int bgp_cc_is_ipv4(struct bgp_channel_config *c)
static inline int bgp_cc_is_ipv6(struct bgp_channel_config *c)
{ return BGP_AFI(c->afi) == BGP_AFI_IPV6; }
static inline int bgp_channel_is_role_applicable(struct bgp_channel *c)
{ return (c->afi == BGP_AF_IPV4 || c->afi == BGP_AF_IPV6); }
static inline int bgp_cc_is_role_applicable(struct bgp_channel_config *c)
{ return (c->afi == BGP_AF_IPV4 || c->afi == BGP_AF_IPV6); }
static inline uint bgp_max_packet_length(struct bgp_conn *conn)
{ return conn->ext_messages ? BGP_MAX_EXT_MSG_LENGTH : BGP_MAX_MESSAGE_LENGTH; }
@ -567,13 +589,12 @@ void bgp_finish_attrs(struct bgp_parse_state *s, ea_list **to);
void bgp_setup_out_table(struct bgp_channel *c);
void bgp_init_bucket_table(struct bgp_channel *c);
void bgp_free_bucket_table(struct bgp_channel *c);
void bgp_init_pending_tx(struct bgp_channel *c);
void bgp_free_pending_tx(struct bgp_channel *c);
void bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b);
int bgp_done_bucket(struct bgp_channel *c, struct bgp_bucket *b);
void bgp_init_prefix_table(struct bgp_channel *c);
void bgp_free_prefix_table(struct bgp_channel *c);
void bgp_done_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *buck);
int bgp_rte_better(struct rte *, struct rte *);
@ -662,6 +683,7 @@ enum bgp_attr_id {
BA_AS4_AGGREGATOR = 0x12, /* RFC 6793 */
BA_AIGP = 0x1a, /* RFC 7311 */
BA_LARGE_COMMUNITY = 0x20, /* RFC 8092 */
#define BA_ONLY_TO_CUSTOMER 0x23 /* RFC 9234 */
/* Bird's private internal BGP attributes */
BA_MPLS_LABEL_STACK = 0x100, /* MPLS label stack transfer attribute */

View File

@ -30,7 +30,8 @@ CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, KEEPALIVE,
STRICT, BIND, CONFEDERATION, MEMBER, MULTICAST, FLOW4, FLOW6, LONG,
LIVED, STALE, IMPORT, IBGP, EBGP, MANDATORY, INTERNAL, EXTERNAL, SETS,
DYNAMIC, RANGE, NAME, DIGITS, AIGP, ORIGINATE, COST, ENFORCE,
FIRST, FREE, VALIDATE, BASE)
FIRST, FREE, VALIDATE, BASE, ROLE, ROLES, PEER, PROVIDER, CUSTOMER,
RS_SERVER, RS_CLIENT, REQUIRE, BGP_OTC)
%type <i> bgp_nh
%type <i32> bgp_afi
@ -39,7 +40,7 @@ CF_KEYWORDS(CEASE, PREFIX, LIMIT, HIT, ADMINISTRATIVE, SHUTDOWN, RESET, PEER,
CONFIGURATION, CHANGE, DECONFIGURED, CONNECTION, REJECTED, COLLISION,
OUT, OF, RESOURCES)
%type<i> bgp_cease_mask bgp_cease_list bgp_cease_flag
%type<i> bgp_cease_mask bgp_cease_list bgp_cease_flag bgp_role_name
CF_GRAMMAR
@ -73,6 +74,7 @@ bgp_proto_start: proto_start BGP {
BGP_CFG->llgr_mode = -1;
BGP_CFG->llgr_time = 3600;
BGP_CFG->setkey = 1;
BGP_CFG->local_role = BGP_ROLE_UNDEFINED;
BGP_CFG->dynamic_name = "dynbgp";
BGP_CFG->check_link = -1;
}
@ -115,6 +117,14 @@ bgp_cease_flag:
| OUT OF RESOURCES { $$ = 1 << 8; }
;
bgp_role_name:
PEER { $$ = BGP_ROLE_PEER; }
| PROVIDER { $$ = BGP_ROLE_PROVIDER; }
| CUSTOMER { $$ = BGP_ROLE_CUSTOMER; }
| RS_SERVER { $$ = BGP_ROLE_RS_SERVER; }
| RS_CLIENT { $$ = BGP_ROLE_RS_CLIENT; }
;
bgp_proto:
bgp_proto_start proto_name '{'
| bgp_proto proto_item ';'
@ -198,6 +208,8 @@ bgp_proto:
| bgp_proto BFD GRACEFUL ';' { init_bfd_opts(&BGP_CFG->bfd); BGP_CFG->bfd->mode = BGP_BFD_GRACEFUL; }
| bgp_proto BFD { open_bfd_opts(&BGP_CFG->bfd); } bfd_opts { close_bfd_opts(); } ';'
| bgp_proto ENFORCE FIRST AS bool ';' { BGP_CFG->enforce_first_as = $5; }
| bgp_proto LOCAL ROLE bgp_role_name ';' { BGP_CFG->local_role = $4; }
| bgp_proto REQUIRE ROLES bool ';' { BGP_CFG->require_roles = $4; }
;
bgp_afi:

View File

@ -238,6 +238,7 @@ bgp_prepare_capabilities(struct bgp_conn *conn)
caps->ext_messages = p->cf->enable_extended_messages;
caps->route_refresh = p->cf->enable_refresh;
caps->enhanced_refresh = p->cf->enable_refresh;
caps->role = p->cf->local_role;
if (caps->as4_support)
caps->as4_number = p->public_as;
@ -350,6 +351,13 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
*buf++ = 0; /* Capability data length */
}
if (caps->role != BGP_ROLE_UNDEFINED)
{
*buf++ = 9; /* Capability 9: Announce chosen BGP role */
*buf++ = 1; /* Capability data length */
*buf++ = caps->role;
}
if (caps->gr_aware)
{
*buf++ = 64; /* Capability 64: Support for graceful restart */
@ -449,11 +457,15 @@ bgp_read_capabilities(struct bgp_conn *conn, byte *pos, int len)
struct bgp_proto *p = conn->bgp;
struct bgp_caps *caps;
struct bgp_af_caps *ac;
uint err_subcode = 0;
int i, cl;
u32 af;
if (!conn->remote_caps)
{
caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps) + sizeof(struct bgp_af_caps));
caps->role = BGP_ROLE_UNDEFINED;
}
else
{
caps = conn->remote_caps;
@ -513,6 +525,21 @@ bgp_read_capabilities(struct bgp_conn *conn, byte *pos, int len)
caps->ext_messages = 1;
break;
case 9: /* BGP role capability, RFC 9234 */
if (cl != 1)
goto err;
/* Reserved value */
if (pos[2] == BGP_ROLE_UNDEFINED)
{ err_subcode = 11; goto err; }
/* Multiple inconsistent values */
if ((caps->role != BGP_ROLE_UNDEFINED) && (caps->role != pos[2]))
{ err_subcode = 11; goto err; }
caps->role = pos[2];
break;
case 64: /* Graceful restart capability, RFC 4724 */
if (cl % 4 != 2)
goto err;
@ -638,7 +665,7 @@ bgp_read_capabilities(struct bgp_conn *conn, byte *pos, int len)
err:
mb_free(caps);
bgp_error(conn, 2, 0, NULL, 0);
bgp_error(conn, 2, err_subcode, NULL, 0);
return -1;
}
@ -854,6 +881,22 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len)
conn->received_as = asn;
}
/* RFC 9234 4.2 - check role agreement */
u8 local_role = p->cf->local_role;
u8 neigh_role = caps->role;
if ((local_role != BGP_ROLE_UNDEFINED) &&
(neigh_role != BGP_ROLE_UNDEFINED) &&
!((local_role == BGP_ROLE_PEER && neigh_role == BGP_ROLE_PEER) ||
(local_role == BGP_ROLE_CUSTOMER && neigh_role == BGP_ROLE_PROVIDER) ||
(local_role == BGP_ROLE_PROVIDER && neigh_role == BGP_ROLE_CUSTOMER) ||
(local_role == BGP_ROLE_RS_CLIENT && neigh_role == BGP_ROLE_RS_SERVER) ||
(local_role == BGP_ROLE_RS_SERVER && neigh_role == BGP_ROLE_RS_CLIENT)))
{ bgp_error(conn, 2, 11, NULL, 0); return; }
if ((p->cf->require_roles) && (neigh_role == BGP_ROLE_UNDEFINED))
{ bgp_error(conn, 2, 11, NULL, 0); return; }
/* Check the other connection */
other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn;
switch (other->state)
@ -2169,7 +2212,7 @@ bgp_create_ip_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu
* var IPv4 Network Layer Reachability Information
*/
ASSERT_DIE(s->channel->withdraw_bucket != buck);
ASSERT_DIE(s->channel->ptx->withdraw_bucket != buck);
int lr, la;
@ -2192,7 +2235,7 @@ bgp_create_ip_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu
static byte *
bgp_create_mp_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
{
ASSERT_DIE(s->channel->withdraw_bucket != buck);
ASSERT_DIE(s->channel->ptx->withdraw_bucket != buck);
/*
* 2 B IPv4 Withdrawn Routes Length (zero)
@ -2332,7 +2375,7 @@ again: ;
};
/* Try unreachable bucket */
if ((buck = c->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
if ((buck = c->ptx->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
{
res = (c->afi == BGP_AF_IPV4) && !c->ext_next_hop ?
bgp_create_ip_unreach(&s, buck, buf, end):
@ -2342,9 +2385,9 @@ again: ;
}
/* Try reachable buckets */
if (!EMPTY_LIST(c->bucket_queue))
if (!EMPTY_LIST(c->ptx->bucket_queue))
{
buck = HEAD(c->bucket_queue);
buck = HEAD(c->ptx->bucket_queue);
/* Cleanup empty buckets */
if (bgp_done_bucket(c, buck))
@ -2977,6 +3020,7 @@ static struct {
{ 2, 6, "Unacceptable hold time" },
{ 2, 7, "Required capability missing" }, /* [RFC5492] */
{ 2, 8, "No supported AFI/SAFI" }, /* This error msg is nonstandard */
{ 2,11, "Role mismatch" }, /* From Open Policy, RFC 9234 */
{ 3, 0, "Invalid UPDATE message" },
{ 3, 1, "Malformed attribute list" },
{ 3, 2, "Unrecognized well-known attribute" },

View File

@ -4,7 +4,6 @@ Available configuration variables:
CONFIG_AUTO_ROUTES Device routes are added automagically by the kernel
CONFIG_SELF_CONSCIOUS We're able to recognize whether route was installed by us
CONFIG_MULTIPLE_TABLES The kernel supports multiple routing tables
CONFIG_ALL_TABLES_AT_ONCE Kernel scanner wants to process all tables at once
CONFIG_SINGLE_ROUTE There is only one route per network
CONFIG_MC_PROPER_SRC Multicast packets have source address according to socket saddr field

View File

@ -9,7 +9,6 @@
#define CONFIG_AUTO_ROUTES
#define CONFIG_SELF_CONSCIOUS
#define CONFIG_MULTIPLE_TABLES
#define CONFIG_ALL_TABLES_AT_ONCE
#define CONFIG_IP6_SADR_KERNEL
#define CONFIG_MC_PROPER_SRC

View File

@ -257,16 +257,13 @@ nl_open_sock(struct nl_sock *nl)
}
}
static void
static int
nl_set_strict_dump(struct nl_sock *nl UNUSED, int strict UNUSED)
{
/*
* Strict checking is not necessary, it improves behavior on newer kernels.
* If it is not available (missing SOL_NETLINK compile-time, or ENOPROTOOPT
* run-time), we can just ignore it.
*/
#ifdef SOL_NETLINK
setsockopt(nl->fd, SOL_NETLINK, NETLINK_GET_STRICT_CHK, &strict, sizeof(strict));
return setsockopt(nl->fd, SOL_NETLINK, NETLINK_GET_STRICT_CHK, &strict, sizeof(strict));
#else
return -1;
#endif
}
@ -294,10 +291,17 @@ nl_cfg_rx_buffer_size(struct config *cfg)
static void
nl_open(void)
{
if ((nl_scan.fd >= 0) && (nl_req.fd >= 0))
return;
nl_open_sock(&nl_scan);
nl_open_sock(&nl_req);
nl_set_strict_dump(&nl_scan, 1);
if (nl_set_strict_dump(&nl_scan, 1) < 0)
{
log(L_WARN "KRT: Netlink strict checking failed, will scan all tables at once");
krt_use_shared_scan();
}
}
static void
@ -352,11 +356,13 @@ nl_request_dump_addr(int af)
}
static void
nl_request_dump_route(int af)
nl_request_dump_route(int af, int table_id)
{
struct {
struct nlmsghdr nh;
struct rtmsg rtm;
struct rtattr rta;
u32 table_id;
} req = {
.nh.nlmsg_type = RTM_GETROUTE,
.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)),
@ -365,7 +371,17 @@ nl_request_dump_route(int af)
.rtm.rtm_family = af,
};
send(nl_scan.fd, &req, sizeof(req), 0);
if (table_id < 256)
req.rtm.rtm_table = table_id;
else
{
req.rta.rta_type = RTA_TABLE;
req.rta.rta_len = RTA_LENGTH(4);
req.table_id = table_id;
req.nh.nlmsg_len = NLMSG_ALIGN(req.nh.nlmsg_len) + req.rta.rta_len;
}
send(nl_scan.fd, &req, req.nh.nlmsg_len, 0);
nl_scan.last_hdr = NULL;
}
@ -2072,18 +2088,27 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
}
void
krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NULL */
krt_do_scan(struct krt_proto *p)
{
struct nlmsghdr *h;
struct nl_parse_state s;
nl_parse_begin(&s, 1);
nl_request_dump_route(AF_UNSPEC);
/* Table-specific scan or shared scan */
if (p)
nl_request_dump_route(p->af, krt_table_id(p));
else
nl_request_dump_route(AF_UNSPEC, 0);
while (h = nl_get_scan())
{
if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
nl_parse_route(&s, h);
else
log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
}
nl_parse_end(&s);
}

View File

@ -578,18 +578,17 @@ krt_got_route_async(struct krt_proto *p, rte *e, int new, s8 src)
}
}
/*
* Periodic scanning
*/
#ifdef CONFIG_ALL_TABLES_AT_ONCE
static timer *krt_scan_timer;
static int krt_scan_count;
static timer *krt_scan_all_timer;
static int krt_scan_all_count;
static _Bool krt_scan_all_tables;
static void
krt_scan(timer *t UNUSED)
krt_scan_all(timer *t UNUSED)
{
struct krt_proto *p;
node *n;
@ -610,35 +609,42 @@ krt_scan(timer *t UNUSED)
}
static void
krt_scan_timer_start(struct krt_proto *p)
krt_scan_all_timer_start(struct krt_proto *p)
{
if (!krt_scan_count)
krt_scan_timer = tm_new_init(krt_pool, krt_scan, NULL, KRT_CF->scan_time, 0);
if (!krt_scan_all_count)
krt_scan_all_timer = tm_new_init(krt_pool, krt_scan_all, NULL, KRT_CF->scan_time, 0);
krt_scan_count++;
krt_scan_all_count++;
tm_start(krt_scan_timer, 1 S);
tm_start(krt_scan_all_timer, 1 S);
}
static void
krt_scan_timer_stop(struct krt_proto *p UNUSED)
krt_scan_all_timer_stop(void)
{
krt_scan_count--;
ASSERT(krt_scan_all_count > 0);
if (!krt_scan_count)
krt_scan_all_count--;
if (!krt_scan_all_count)
{
rfree(krt_scan_timer);
krt_scan_timer = NULL;
rfree(krt_scan_all_timer);
krt_scan_all_timer = NULL;
}
}
static void
krt_scan_timer_kick(struct krt_proto *p UNUSED)
krt_scan_all_timer_kick(void)
{
tm_start(krt_scan_timer, 0);
tm_start(krt_scan_all_timer, 0);
}
void
krt_use_shared_scan(void)
{
krt_scan_all_tables = 1;
}
#else
static void
krt_scan(timer *t)
@ -656,26 +662,33 @@ krt_scan(timer *t)
static void
krt_scan_timer_start(struct krt_proto *p)
{
p->scan_timer = tm_new_init(p->p.pool, krt_scan, p, KRT_CF->scan_time, 0);
tm_start(p->scan_timer, 1 S);
if (krt_scan_all_tables)
krt_scan_all_timer_start(p);
else
{
p->scan_timer = tm_new_init(p->p.pool, krt_scan, p, KRT_CF->scan_time, 0);
tm_start(p->scan_timer, 1 S);
}
}
static void
krt_scan_timer_stop(struct krt_proto *p)
{
tm_stop(p->scan_timer);
if (krt_scan_all_tables)
krt_scan_all_timer_stop();
else
tm_stop(p->scan_timer);
}
static void
krt_scan_timer_kick(struct krt_proto *p)
{
tm_start(p->scan_timer, 0);
if (krt_scan_all_tables)
krt_scan_all_timer_kick();
else
tm_start(p->scan_timer, 0);
}
#endif
/*
* Updates
@ -792,11 +805,6 @@ krt_postconfig(struct proto_config *CF)
if (! proto_cf_main_channel(CF))
cf_error("Channel not specified");
#ifdef CONFIG_ALL_TABLES_AT_ONCE
if (krt_cf->scan_time != cf->scan_time)
cf_error("All kernel syncers must use the same table scan interval");
#endif
struct channel_config *cc = proto_cf_main_channel(CF);
struct rtable_config *tab = cc->table;
if (tab->krt_attached)

View File

@ -21,11 +21,6 @@ struct kif_proto;
#define KRT_DEFAULT_ECMP_LIMIT 16
#if 0
#define EA_KRT_SOURCE EA_CODE(PROTOCOL_KERNEL, 0)
#define EA_KRT_METRIC EA_CODE(PROTOCOL_KERNEL, 1)
#endif
extern struct ea_class ea_krt_source, ea_krt_metric;
#define KRT_REF_SEEN 0x1 /* Seen in table */
@ -55,10 +50,7 @@ struct krt_proto {
struct proto p;
struct krt_state sys; /* Sysdep state */
#ifndef CONFIG_ALL_TABLES_AT_ONCE
timer *scan_timer;
#endif
struct bmap sync_map; /* Keeps track which exported routes were successfully written to kernel */
struct bmap seen_map; /* Routes seen during last periodic scan */
node krt_node; /* Node in krt_proto_list */
@ -80,6 +72,7 @@ extern pool *krt_pool;
struct proto_config * kif_init_config(int class);
void kif_request_scan(void);
void krt_use_shared_scan(void);
void krt_got_route(struct krt_proto *p, struct rte *e, s8 src);
void krt_got_route_async(struct krt_proto *p, struct rte *e, int new, s8 src);