diff --git a/README b/README index daeb18bd..5d4bd9b2 100644 --- a/README +++ b/README @@ -64,6 +64,7 @@ What do we support: o Static routes o Inter-table protocol o IPv6 router advertisements + o Bidirectional Forwarding Detection (BFD) o Command-line interface (using the `birdc' client; to get some help, just press `?') o Soft reconfiguration -- no online commands for changing the diff --git a/doc/bird.sgml b/doc/bird.sgml index 0cc5cab9..237da8d4 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -1610,7 +1610,7 @@ RFC 4271 It also supports the community attributes (RFC 1997), capability negotiation -(RFC 3392), +(RFC 5492), MD5 password authentication (RFC 2385), extended communities @@ -1746,7 +1746,11 @@ using the following configuration parameters: scan time - Time in seconds between two scans of the network interface list. On systems where we are notified about interface status changes asynchronously (such as newer versions of Linux), we need to scan the @@ -2266,6 +2269,18 @@ limitations can be overcome using another routing table and the pipe protocol. a graceful restart recovery is active, the Kernel protocol will defer synchronization of routing tables until the end of the recovery. Note that import of kernel routes to BIRD is not affected. + + merge paths switch [limit number] + Usually, only best routes are exported to the kernel protocol. With path + merging enabled, both best routes and equivalent non-best routes are + merged during export to generate one ECMP (equal-cost multipath) route + for each network. This is useful e.g. for BGP multipath. Note that best + routes are still pivotal for route export (responsible for most + properties of resulting ECMP routes), while exported non-best routes are + responsible just for additional multipath next hops. This option also + allows to specify a limit on maximal number of nexthops in one route. By + default, multipath merging is disabled. If enabled, default value of the + limit is 16. Attributes @@ -2293,6 +2308,20 @@ these attributes: The realm of the route. Can be used for traffic classification. +

In Linux, there is also a plenty of obscure route attributes mostly focused +on tuning TCP performance of local connections. BIRD supports most of these +attributes, see Linux or iproute2 documentation for their meaning. Attributes +Example

A simple configuration can look this way: @@ -3421,7 +3450,9 @@ of the protocol contains mainly a list of static routes: route - Static route through a neighboring router. + Static route through a neighboring router. For link-local next hops, + interface can be specified as a part of the address (e.g., + route Static multipath route. Contains several nexthops (gateways), possibly diff --git a/filter/filter.c b/filter/filter.c index 3b14fc0c..55062aca 100644 --- a/filter/filter.c +++ b/filter/filter.c @@ -471,26 +471,22 @@ static inline void f_rte_cow(void) static void f_rta_cow(void) { - if ((*f_rte)->attrs->aflags & RTAF_CACHED) { + if (!rta_is_cached((*f_rte)->attrs)) + return; - /* Prepare to modify rte */ - f_rte_cow(); + /* Prepare to modify rte */ + f_rte_cow(); - /* Store old rta to free it later */ - f_old_rta = (*f_rte)->attrs; + /* Store old rta to free it later, it stores reference from rte_cow() */ + f_old_rta = (*f_rte)->attrs; - /* - * Alloc new rta, do shallow copy and update rte. Fields eattrs - * and nexthops of rta are shared with f_old_rta (they will be - * copied when the cached rta will be obtained at the end of - * f_run()), also the lock of hostentry is inherited (we suppose - * hostentry is not changed by filters). - */ - rta *ra = lp_alloc(f_pool, sizeof(rta)); - memcpy(ra, f_old_rta, sizeof(rta)); - ra->aflags = 0; - (*f_rte)->attrs = ra; - } + /* + * Get shallow copy of rta. Fields eattrs and nexthops of rta are shared + * with f_old_rta (they will be copied when the cached rta will be obtained + * at the end of f_run()), also the lock of hostentry is inherited (we + * suppose hostentry is not changed by filters). + */ + (*f_rte)->attrs = rta_do_cow((*f_rte)->attrs, f_pool); } static struct tbf rl_runtime_err = TBF_DEFAULT_LOG_LIMITS; @@ -1531,6 +1527,30 @@ f_run(struct filter *filter, struct rte **rte, struct ea_list **tmp_attrs, struc return res.val.i; } +/* TODO: perhaps we could integrate f_eval(), f_eval_rte() and f_run() */ + +struct f_val +f_eval_rte(struct f_inst *expr, struct rte **rte, struct linpool *tmp_pool) +{ + struct ea_list *tmp_attrs = NULL; + + f_rte = rte; + f_old_rta = NULL; + f_tmp_attrs = &tmp_attrs; + f_pool = tmp_pool; + f_flags = 0; + + LOG_BUFFER_INIT(f_buf); + + /* Note that in this function we assume that rte->attrs is private / uncached */ + struct f_val res = interpret(expr); + + /* Hack to include EAF_TEMP attributes to the main list */ + (*rte)->attrs->eattrs = ea_append(tmp_attrs, (*rte)->attrs->eattrs); + + return res; +} + struct f_val f_eval(struct f_inst *expr, struct linpool *tmp_pool) { diff --git a/filter/filter.h b/filter/filter.h index 2b2d23c2..e59c8226 100644 --- a/filter/filter.h +++ b/filter/filter.h @@ -107,6 +107,7 @@ struct ea_list; struct rte; int f_run(struct filter *filter, struct rte **rte, struct ea_list **tmp_attrs, struct linpool *tmp_pool, int flags); +struct f_val f_eval_rte(struct f_inst *expr, struct rte **rte, struct linpool *tmp_pool); struct f_val f_eval(struct f_inst *expr, struct linpool *tmp_pool); uint f_eval_int(struct f_inst *expr); u32 f_eval_asn(struct f_inst *expr); diff --git a/lib/birdlib.h b/lib/birdlib.h index 77d03a8b..24e769db 100644 --- a/lib/birdlib.h +++ b/lib/birdlib.h @@ -33,6 +33,7 @@ #endif #define ABS(a) ((a)>=0 ? (a) : -(a)) +#define DELTA(a,b) (((a)>=(b))?(a)-(b):(b)-(a)) #define ARRAY_SIZE(a) (sizeof(a)/sizeof(*(a))) diff --git a/nest/bfd.h b/nest/bfd.h index 79c3c921..f1e95cb2 100644 --- a/nest/bfd.h +++ b/nest/bfd.h @@ -32,6 +32,12 @@ struct bfd_request { }; +#define BFD_STATE_ADMIN_DOWN 0 +#define BFD_STATE_DOWN 1 +#define BFD_STATE_INIT 2 +#define BFD_STATE_UP 3 + + #ifdef CONFIG_BFD struct bfd_request * bfd_request_session(pool *p, ip_addr addr, ip_addr local, struct iface *iface, void (*hook)(struct bfd_request *), void *data); diff --git a/nest/config.Y b/nest/config.Y index 4172f3b3..93eedd64 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -96,6 +96,7 @@ rtrid: idval: NUM { $$ = $1; } + | '(' term ')' { $$ = f_eval_int($2); } | RTRID | IPA { #ifndef IPV6 @@ -104,6 +105,16 @@ idval: cf_error("Router IDs must be entered as hexadecimal numbers or IPv4 addresses in IPv6 version"); #endif } + | SYM { + if ($1->class == (SYM_CONSTANT | T_INT) || $1->class == (SYM_CONSTANT | T_QUAD)) + $$ = SYM_VAL($1).i; +#ifndef IPV6 + else if ($1->class == (SYM_CONSTANT | T_IP)) + $$ = ipa_to_u32(SYM_VAL($1).px.ip); +#endif + else + cf_error("Number of IPv4 address constant expected"); + } ; @@ -185,16 +196,18 @@ proto_name: } | FROM SYM { struct symbol *s = cf_default_name(this_proto->protocol->template, &this_proto->protocol->name_counter); + s->class = this_proto->class; + s->def = this_proto; this_proto->name = s->name; + if (($2->class != SYM_TEMPLATE) && ($2->class != SYM_PROTO)) cf_error("Template or protocol name expected"); proto_copy_config(this_proto, $2->def); } | SYM FROM SYM { - if (($3->class != SYM_TEMPLATE) && ($3->class != SYM_PROTO)) cf_error("Template or protocol name expected"); - cf_define_symbol($1, this_proto->class, this_proto); this_proto->name = $1->name; + if (($3->class != SYM_TEMPLATE) && ($3->class != SYM_PROTO)) cf_error("Template or protocol name expected"); proto_copy_config(this_proto, $3->def); } ; diff --git a/nest/protocol.h b/nest/protocol.h index a51e9afd..8c49154f 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -158,6 +158,7 @@ struct proto { byte gr_wait; /* Route export to protocol is postponed until graceful restart */ byte down_sched; /* Shutdown is scheduled for later (PDS_*) */ byte down_code; /* Reason for shutdown (PDC_* codes) */ + byte merge_limit; /* Maximal number of nexthops for RA_MERGED */ u32 hash_key; /* Random key used for hashing of neighbors */ bird_clock_t last_state_change; /* Time of last state transition */ char *last_state_name_announced; /* Last state name we've announced to the user */ @@ -200,6 +201,7 @@ struct proto { * rte_recalculate Called at the beginning of the best route selection * rte_better Compare two rte's and decide which one is better (1=first, 0=second). * rte_same Compare two rte's and decide whether they are identical (1=yes, 0=no). + * rte_mergable Compare two rte's and decide whether they could be merged (1=yes, 0=no). * rte_insert Called whenever a rte is inserted to a routing table. * rte_remove Called whenever a rte is removed from the routing table. */ @@ -207,6 +209,7 @@ struct proto { int (*rte_recalculate)(struct rtable *, struct network *, struct rte *, struct rte *, struct rte *); int (*rte_better)(struct rte *, struct rte *); int (*rte_same)(struct rte *, struct rte *); + int (*rte_mergable)(struct rte *, struct rte *); void (*rte_insert)(struct network *, struct rte *); void (*rte_remove)(struct network *, struct rte *); diff --git a/nest/route.h b/nest/route.h index 095a2474..0d3a85f8 100644 --- a/nest/route.h +++ b/nest/route.h @@ -241,6 +241,7 @@ static inline int rte_is_filtered(rte *r) { return !!(r->flags & REF_FILTERED); #define RA_OPTIMAL 1 /* Announcement of optimal route change */ #define RA_ACCEPTED 2 /* Announcement of first accepted route */ #define RA_ANY 3 /* Announcement of any route change */ +#define RA_MERGED 4 /* Announcement of optimal route merged with next ones */ /* Return value of import_control() callback */ #define RIC_ACCEPT 1 /* Accepted by protocol */ @@ -264,12 +265,14 @@ void rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *s static inline void rte_update(struct proto *p, net *net, rte *new) { rte_update2(p->main_ahook, net, new, p->main_source); } void rte_discard(rtable *tab, rte *old); int rt_examine(rtable *t, ip_addr prefix, int pxlen, struct proto *p, struct filter *filter); +rte *rt_export_merged(struct announce_hook *ah, net *net, rte **rt_free, struct ea_list **tmpa, int silent); void rt_refresh_begin(rtable *t, struct announce_hook *ah); void rt_refresh_end(rtable *t, struct announce_hook *ah); void rte_dump(rte *); void rte_free(rte *); rte *rte_do_cow(rte *); static inline rte * rte_cow(rte *r) { return (r->flags & REF_COW) ? rte_do_cow(r) : r; } +rte *rte_cow_rta(rte *r, linpool *lp); void rt_dump(rtable *); void rt_dump_all(void); int rt_feed_baby(struct proto *p); @@ -389,6 +392,12 @@ typedef struct rta { #define IGP_METRIC_UNKNOWN 0x80000000 /* Default igp_metric used when no other protocol-specific metric is availabe */ + +/* Route has regular, reachable nexthop (i.e. not RTD_UNREACHABLE and like) */ +static inline int rte_is_reachable(rte *r) +{ uint d = r->attrs->dest; return (d == RTD_ROUTER) || (d == RTD_DEVICE) || (d == RTD_MULTIPATH); } + + /* * Extended Route Attributes */ @@ -483,6 +492,7 @@ void ea_format_bitfield(struct eattr *a, byte *buf, int bufsize, const char **na int mpnh__same(struct mpnh *x, struct mpnh *y); /* Compare multipath nexthops */ static inline int mpnh_same(struct mpnh *x, struct mpnh *y) { return (x == y) || mpnh__same(x, y); } +struct mpnh *mpnh_merge(struct mpnh *x, struct mpnh *y, int rx, int ry, int max, linpool *lp); void rta_init(void); rta *rta_lookup(rta *); /* Get rta equivalent to this one, uc++ */ @@ -490,6 +500,8 @@ static inline int rta_is_cached(rta *r) { return r->aflags & RTAF_CACHED; } static inline rta *rta_clone(rta *r) { r->uc++; return r; } void rta__free(rta *r); static inline void rta_free(rta *r) { if (r && !--r->uc) rta__free(r); } +rta *rta_do_cow(rta *o, linpool *lp); +static inline rta * rta_cow(rta *r, linpool *lp) { return rta_is_cached(r) ? rta_do_cow(r, lp) : r; } void rta_dump(rta *); void rta_dump_all(void); void rta_show(struct cli *, rta *, ea_list *); diff --git a/nest/rt-attr.c b/nest/rt-attr.c index 85a192c8..7fa05d6d 100644 --- a/nest/rt-attr.c +++ b/nest/rt-attr.c @@ -167,7 +167,7 @@ rt_get_source(struct proto *p, u32 id) src->private_id = id; src->global_id = rte_src_alloc_id(); src->uc = 0; - + HASH_INSERT2(src_hash, RSH, rta_pool, src); return src; @@ -215,6 +215,94 @@ mpnh__same(struct mpnh *x, struct mpnh *y) return x == y; } +static int +mpnh_compare_node(struct mpnh *x, struct mpnh *y) +{ + int r; + + if (!x) + return 1; + + if (!y) + return -1; + + r = ((int) y->weight) - ((int) x->weight); + if (r) + return r; + + r = ipa_compare(x->gw, y->gw); + if (r) + return r; + + return ((int) x->iface->index) - ((int) y->iface->index); +} + +static inline struct mpnh * +mpnh_copy_node(const struct mpnh *src, linpool *lp) +{ + struct mpnh *n = lp_alloc(lp, sizeof(struct mpnh)); + n->gw = src->gw; + n->iface = src->iface; + n->next = NULL; + n->weight = src->weight; + return n; +} + +/** + * mpnh_merge - merge nexthop lists + * @x: list 1 + * @y: list 2 + * @rx: reusability of list @x + * @ry: reusability of list @y + * @max: max number of nexthops + * @lp: linpool for allocating nexthops + * + * The mpnh_merge() function takes two nexthop lists @x and @y and merges them, + * eliminating possible duplicates. The input lists must be sorted and the + * result is sorted too. The number of nexthops in result is limited by @max. + * New nodes are allocated from linpool @lp. + * + * The arguments @rx and @ry specify whether corresponding input lists may be + * consumed by the function (i.e. their nodes reused in the resulting list), in + * that case the caller should not access these lists after that. To eliminate + * issues with deallocation of these lists, the caller should use some form of + * bulk deallocation (e.g. stack or linpool) to free these nodes when the + * resulting list is no longer needed. When reusability is not set, the + * corresponding lists are not modified nor linked from the resulting list. + */ +struct mpnh * +mpnh_merge(struct mpnh *x, struct mpnh *y, int rx, int ry, int max, linpool *lp) +{ + struct mpnh *root = NULL; + struct mpnh **n = &root; + + while ((x || y) && max--) + { + int cmp = mpnh_compare_node(x, y); + if (cmp < 0) + { + *n = rx ? x : mpnh_copy_node(x, lp); + x = x->next; + } + else if (cmp > 0) + { + *n = ry ? y : mpnh_copy_node(y, lp); + y = y->next; + } + else + { + *n = rx ? x : (ry ? y : mpnh_copy_node(x, lp)); + x = x->next; + y = y->next; + } + n = &((*n)->next); + } + *n = NULL; + + return root; +} + + static struct mpnh * mpnh_copy(struct mpnh *o) { @@ -635,7 +723,7 @@ get_generic_attr(eattr *a, byte **buf, int buflen UNUSED) *buf += bsprintf(*buf, "igp_metric"); return GA_NAME; } - + return GA_UNKNOWN; } @@ -741,7 +829,7 @@ ea_show(struct cli *c, eattr *e) } else if (EA_PROTO(e->id)) pos += bsprintf(pos, "%02x.", EA_PROTO(e->id)); - else + else status = get_generic_attr(e, &pos, end - pos); if (status < GA_NAME) @@ -1050,6 +1138,16 @@ rta__free(rta *a) sl_free(rta_slab, a); } +rta * +rta_do_cow(rta *o, linpool *lp) +{ + rta *r = lp_alloc(lp, sizeof(rta)); + memcpy(r, o, sizeof(rta)); + r->aflags = 0; + r->uc = 0; + return r; +} + /** * rta_dump - dump route attributes * @a: attribute structure to dump diff --git a/nest/rt-dev.c b/nest/rt-dev.c index 87ffc5ec..f6bc1432 100644 --- a/nest/rt-dev.c +++ b/nest/rt-dev.c @@ -51,7 +51,10 @@ dev_ifa_notify(struct proto *p, unsigned c, struct ifa *ad) DBG("dev_if_notify: device shutdown: prefix not found\n"); return; } - rte_update(p, n, NULL); + + /* Use iface ID as local source ID */ + struct rte_src *src = rt_get_source(p, ad->iface->index); + rte_update2(p->main_ahook, n, NULL, src); } else if (c & IF_CHANGE_UP) { @@ -61,8 +64,11 @@ dev_ifa_notify(struct proto *p, unsigned c, struct ifa *ad) DBG("dev_if_notify: %s:%I going up\n", ad->iface->name, ad->ip); + /* Use iface ID as local source ID */ + struct rte_src *src = rt_get_source(p, ad->iface->index); + rta a0 = { - .src = p->main_source, + .src = src, .source = RTS_DEVICE, .scope = SCOPE_UNIVERSE, .cast = RTC_UNICAST, @@ -75,7 +81,7 @@ dev_ifa_notify(struct proto *p, unsigned c, struct ifa *ad) e = rte_get_temp(a); e->net = n; e->pflags = 0; - rte_update(p, n, e); + rte_update2(p->main_ahook, n, e, src); } } diff --git a/nest/rt-table.c b/nest/rt-table.c index 884e2827..e38c3ddb 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -153,6 +153,38 @@ rte_do_cow(rte *r) return e; } +/** + * rte_cow_rta - get a private writable copy of &rte with writable &rta + * @r: a route entry to be copied + * @lp: a linpool from which to allocate &rta + * + * rte_cow_rta() takes a &rte and prepares it and associated &rta for + * modification. There are three possibilities: First, both &rte and &rta are + * private copies, in that case they are returned unchanged. Second, &rte is + * private copy, but &rta is cached, in that case &rta is duplicated using + * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case + * both structures are duplicated by rte_do_cow() and rta_do_cow(). + * + * Note that in the second case, cached &rta loses one reference, while private + * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs, + * nexthops, ...) with it. To work properly, original shared &rta should have + * another reference during the life of created private copy. + * + * Result: a pointer to the new writable &rte with writable &rta. + */ +rte * +rte_cow_rta(rte *r, linpool *lp) +{ + if (!rta_is_cached(r->attrs)) + return r; + + rte *e = rte_cow(r); + rta *a = rta_do_cow(r->attrs, lp); + rta_free(e->attrs); + e->attrs = a; + return e; +} + static int /* Actually better or at least as good as */ rte_better(rte *new, rte *old) { @@ -181,6 +213,26 @@ rte_better(rte *new, rte *old) return 0; } +static int +rte_mergable(rte *pri, rte *sec) +{ + int (*mergable)(rte *, rte *); + + if (!rte_is_valid(pri) || !rte_is_valid(sec)) + return 0; + + if (pri->pref != sec->pref) + return 0; + + if (pri->attrs->src->proto->proto != sec->attrs->src->proto->proto) + return 0; + + if (mergable = pri->attrs->src->proto->rte_mergable) + return mergable(pri, sec); + + return 0; +} + static void rte_trace(struct proto *p, rte *e, int dir, char *msg) { @@ -217,12 +269,10 @@ export_filter(struct announce_hook *ah, rte *rt0, rte **rt_free, ea_list **tmpa, rt = rt0; *rt_free = NULL; - /* If called does not care for eattrs, we prepare one internally */ if (!tmpa) - { - tmpb = make_tmp_attrs(rt, rte_update_pool); - tmpa = &tmpb; - } + tmpa = &tmpb; + + *tmpa = make_tmp_attrs(rt, rte_update_pool); v = p->import_control ? p->import_control(p, &rt, tmpa, rte_update_pool) : 0; if (v < 0) @@ -356,7 +406,7 @@ do_rt_notify(struct announce_hook *ah, net *net, rte *new, rte *old, ea_list *tm } static void -rt_notify_basic(struct announce_hook *ah, net *net, rte *new0, rte *old0, ea_list *tmpa, int refeed) +rt_notify_basic(struct announce_hook *ah, net *net, rte *new0, rte *old0, int refeed) { struct proto *p = ah->proto; struct proto_stats *stats = ah->stats; @@ -365,6 +415,7 @@ rt_notify_basic(struct announce_hook *ah, net *net, rte *new0, rte *old0, ea_lis rte *old = old0; rte *new_free = NULL; rte *old_free = NULL; + ea_list *tmpa = NULL; if (new) stats->exp_updates_received++; @@ -428,17 +479,17 @@ rt_notify_basic(struct announce_hook *ah, net *net, rte *new0, rte *old0, ea_lis } static void -rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *old_changed, rte *before_old, - ea_list *tmpa, int feed) +rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *old_changed, rte *before_old, int feed) { // struct proto *p = ah->proto; struct proto_stats *stats = ah->stats; + rte *r; rte *new_best = NULL; rte *old_best = NULL; rte *new_free = NULL; rte *old_free = NULL; - rte *r; + ea_list *tmpa = NULL; /* Used to track whether we met old_changed position. If before_old is NULL old_changed was the first and we met it implicitly before current best route. */ @@ -545,6 +596,129 @@ rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *ol rte_free(old_free); } + +static struct mpnh * +mpnh_merge_rta(struct mpnh *nhs, rta *a, int max) +{ + struct mpnh nh = { .gw = a->gw, .iface = a->iface }; + struct mpnh *nh2 = (a->dest == RTD_MULTIPATH) ? a->nexthops : &nh; + return mpnh_merge(nhs, nh2, 1, 0, max, rte_update_pool); +} + +rte * +rt_export_merged(struct announce_hook *ah, net *net, rte **rt_free, ea_list **tmpa, int silent) +{ + // struct proto *p = ah->proto; + struct mpnh *nhs = NULL; + rte *best0, *best, *rt0, *rt, *tmp; + + best0 = net->routes; + *rt_free = NULL; + + if (!rte_is_valid(best0)) + return NULL; + + best = export_filter(ah, best0, rt_free, tmpa, silent); + + if (!best || !rte_is_reachable(best)) + return best; + + for (rt0 = best0->next; rt0; rt0 = rt0->next) + { + if (!rte_mergable(best0, rt0)) + continue; + + rt = export_filter(ah, rt0, &tmp, NULL, 1); + + if (!rt) + continue; + + if (rte_is_reachable(rt)) + nhs = mpnh_merge_rta(nhs, rt->attrs, ah->proto->merge_limit); + + if (tmp) + rte_free(tmp); + } + + if (nhs) + { + nhs = mpnh_merge_rta(nhs, best->attrs, ah->proto->merge_limit); + + if (nhs->next) + { + best = rte_cow_rta(best, rte_update_pool); + best->attrs->dest = RTD_MULTIPATH; + best->attrs->nexthops = nhs; + } + } + + if (best != best0) + *rt_free = best; + + return best; +} + + +static void +rt_notify_merged(struct announce_hook *ah, net *net, rte *new_changed, rte *old_changed, + rte *new_best, rte*old_best, int refeed) +{ + // struct proto *p = ah->proto; + + rte *new_best_free = NULL; + rte *old_best_free = NULL; + rte *new_changed_free = NULL; + rte *old_changed_free = NULL; + ea_list *tmpa = NULL; + + /* We assume that all rte arguments are either NULL or rte_is_valid() */ + + /* This check should be done by the caller */ + if (!new_best && !old_best) + return; + + /* Check whether the change is relevant to the merged route */ + if ((new_best == old_best) && !refeed) + { + new_changed = rte_mergable(new_best, new_changed) ? + export_filter(ah, new_changed, &new_changed_free, NULL, 1) : NULL; + + old_changed = rte_mergable(old_best, old_changed) ? + export_filter(ah, old_changed, &old_changed_free, NULL, 1) : NULL; + + if (!new_changed && !old_changed) + return; + } + + if (new_best) + ah->stats->exp_updates_received++; + else + ah->stats->exp_withdraws_received++; + + /* Prepare new merged route */ + if (new_best) + new_best = rt_export_merged(ah, net, &new_best_free, &tmpa, 0); + + /* Prepare old merged route (without proper merged next hops) */ + /* There are some issues with running filter on old route - see rt_notify_basic() */ + if (old_best && !refeed) + old_best = export_filter(ah, old_best, &old_best_free, NULL, 1); + + if (new_best || old_best) + do_rt_notify(ah, net, new_best, old_best, tmpa, refeed); + + /* Discard temporary rte's */ + if (new_best_free) + rte_free(new_best_free); + if (old_best_free) + rte_free(old_best_free); + if (new_changed_free) + rte_free(new_changed_free); + if (old_changed_free) + rte_free(old_changed_free); +} + + /** * rte_announce - announce a routing table change * @tab: table the route has been added to @@ -552,7 +726,6 @@ rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *ol * @net: network in question * @new: the new route to be announced * @old: the previous route for the same network - * @tmpa: a list of temporary attributes belonging to the new route * * This function gets a routing table update and announces it * to all protocols that acccepts given type of route announcement @@ -575,13 +748,20 @@ rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *ol * the protocol gets called. */ static void -rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old, rte *before_old, ea_list *tmpa) +rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old, + rte *new_best, rte *old_best, rte *before_old) { + if (!rte_is_valid(new)) + new = NULL; + if (!rte_is_valid(old)) old = before_old = NULL; - if (!rte_is_valid(new)) - new = NULL; + if (!rte_is_valid(new_best)) + new_best = NULL; + + if (!rte_is_valid(old_best)) + old_best = NULL; if (!old && !new) return; @@ -603,9 +783,11 @@ rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old, rte *befo ASSERT(a->proto->export_state != ES_DOWN); if (a->proto->accept_ra_types == type) if (type == RA_ACCEPTED) - rt_notify_accepted(a, net, new, old, before_old, tmpa, 0); + rt_notify_accepted(a, net, new, old, before_old, 0); + else if (type == RA_MERGED) + rt_notify_merged(a, net, new, old, new_best, old_best, 0); else - rt_notify_basic(a, net, new, old, tmpa, 0); + rt_notify_basic(a, net, new, old, 0); } } @@ -668,7 +850,7 @@ rte_same(rte *x, rte *y) static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); } static void -rte_recalculate(struct announce_hook *ah, net *net, rte *new, ea_list *tmpa, struct rte_src *src) +rte_recalculate(struct announce_hook *ah, net *net, rte *new, struct rte_src *src) { struct proto *p = ah->proto; struct rtable *table = ah->table; @@ -909,11 +1091,12 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, ea_list *tmpa, str } /* Propagate the route change */ - rte_announce(table, RA_ANY, net, new, old, NULL, tmpa); + rte_announce(table, RA_ANY, net, new, old, NULL, NULL, NULL); if (net->routes != old_best) - rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL, tmpa); + rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL, NULL, NULL); if (table->config->sorted) - rte_announce(table, RA_ACCEPTED, net, new, old, before_old, tmpa); + rte_announce(table, RA_ACCEPTED, net, new, old, NULL, NULL, before_old); + rte_announce(table, RA_MERGED, net, new, old, net->routes, old_best, NULL); if (!net->routes && (table->gc_counter++ >= table->config->gc_max_ops) && @@ -1078,7 +1261,7 @@ rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src) recalc: rte_hide_dummy_routes(net, &dummy); - rte_recalculate(ah, net, new, tmpa, src); + rte_recalculate(ah, net, new, src); rte_unhide_dummy_routes(net, &dummy); rte_update_unlock(); return; @@ -1086,20 +1269,17 @@ rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src) drop: rte_free(new); new = NULL; - tmpa = NULL; goto recalc; } /* Independent call to rte_announce(), used from next hop recalculation, outside of rte_update(). new must be non-NULL */ static inline void -rte_announce_i(rtable *tab, unsigned type, net *n, rte *new, rte *old) +rte_announce_i(rtable *tab, unsigned type, net *net, rte *new, rte *old, + rte *new_best, rte *old_best) { - ea_list *tmpa; - rte_update_lock(); - tmpa = make_tmp_attrs(new, rte_update_pool); - rte_announce(tab, type, n, new, old, NULL, tmpa); + rte_announce(tab, type, net, new, old, new_best, old_best, NULL); rte_update_unlock(); } @@ -1107,7 +1287,7 @@ void rte_discard(rtable *t, rte *old) /* Non-filtered route deletion, used during garbage collection */ { rte_update_lock(); - rte_recalculate(old->sender, old->net, NULL, NULL, old->attrs->src); + rte_recalculate(old->sender, old->net, NULL, old->attrs->src); rte_update_unlock(); } @@ -1565,7 +1745,7 @@ rt_next_hop_update_net(rtable *tab, net *n) new = rt_next_hop_update_rte(tab, e); *k = new; - rte_announce_i(tab, RA_ANY, n, new, e); + rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL); rte_trace_in(D_ROUTES, new->sender->proto, new, "updated"); /* Call a pre-comparison hook */ @@ -1605,10 +1785,13 @@ rt_next_hop_update_net(rtable *tab, net *n) /* Announce the new best route */ if (new != old_best) { - rte_announce_i(tab, RA_OPTIMAL, n, new, old_best); + rte_announce_i(tab, RA_OPTIMAL, n, new, old_best, NULL, NULL); rte_trace_in(D_ROUTES, new->sender->proto, new, "updated [best]"); } + /* FIXME: Better announcement of merged routes */ + rte_announce_i(tab, RA_MERGED, n, new, old_best, new, old_best); + if (free_old_best) rte_free_quick(old_best); @@ -1777,14 +1960,13 @@ rt_commit(struct config *new, struct config *old) static inline void do_feed_baby(struct proto *p, int type, struct announce_hook *h, net *n, rte *e) { - ea_list *tmpa; - rte_update_lock(); - tmpa = make_tmp_attrs(e, rte_update_pool); if (type == RA_ACCEPTED) - rt_notify_accepted(h, n, e, NULL, NULL, tmpa, p->refeeding ? 2 : 1); + rt_notify_accepted(h, n, e, NULL, NULL, p->refeeding ? 2 : 1); + else if (type == RA_MERGED) + rt_notify_merged(h, n, NULL, NULL, e, p->refeeding ? e : NULL, p->refeeding); else - rt_notify_basic(h, n, e, p->refeeding ? e : NULL, tmpa, p->refeeding); + rt_notify_basic(h, n, e, p->refeeding ? e : NULL, p->refeeding); rte_update_unlock(); } @@ -1830,20 +2012,26 @@ again: /* XXXX perhaps we should change feed for RA_ACCEPTED to not use 'new' */ if ((p->accept_ra_types == RA_OPTIMAL) || - (p->accept_ra_types == RA_ACCEPTED)) + (p->accept_ra_types == RA_ACCEPTED) || + (p->accept_ra_types == RA_MERGED)) if (rte_is_valid(e)) { if (p->export_state != ES_FEEDING) return 1; /* In the meantime, the protocol fell down. */ + do_feed_baby(p, p->accept_ra_types, h, n, e); max_feed--; } if (p->accept_ra_types == RA_ANY) - for(e = n->routes; rte_is_valid(e); e = e->next) + for(e = n->routes; e; e = e->next) { if (p->export_state != ES_FEEDING) return 1; /* In the meantime, the protocol fell down. */ + + if (!rte_is_valid(e)) + continue; + do_feed_baby(p, RA_ANY, h, n, e); max_feed--; } @@ -2290,12 +2478,22 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) rte_update_lock(); /* We use the update buffer for filtering */ tmpa = make_tmp_attrs(e, rte_update_pool); - if (d->export_mode) + /* Special case for merged export */ + if ((d->export_mode == RSEM_EXPORT) && (d->export_protocol->accept_ra_types == RA_MERGED)) + { + rte *rt_free; + e = rt_export_merged(a, n, &rt_free, &tmpa, 1); + pass = 1; + + if (!e) + { e = ee; goto skip; } + } + else if (d->export_mode) { struct proto *ep = d->export_protocol; int ic = ep->import_control ? ep->import_control(ep, &e, &tmpa, rte_update_pool) : 0; - if (ep->accept_ra_types == RA_OPTIMAL) + if (ep->accept_ra_types == RA_OPTIMAL || ep->accept_ra_types == RA_MERGED) pass = 1; if (ic < 0) diff --git a/proto/bfd/bfd.c b/proto/bfd/bfd.c index 5f089846..7a085791 100644 --- a/proto/bfd/bfd.c +++ b/proto/bfd/bfd.c @@ -43,7 +43,7 @@ * the needs of BFD sessions. When a new session is created, it requests a * proper BFD interface by function bfd_get_iface(), which either finds an * existing one in &iface_list (from &bfd_proto) or allocates a new one. When a - * session is removed, an associated iface is dicharged by bfd_free_iface(). + * session is removed, an associated iface is discharged by bfd_free_iface(). * * BFD requests are the external API for the other protocols. When a protocol * wants a BFD session, it calls bfd_request_session(), which creates a @@ -62,7 +62,7 @@ * configuration (like static routes in the static protocol). BFD neighbors are * handled by BFD protocol like it is a BFD client -- when a BFD neighbor is * ready, the protocol just creates a BFD request like any other protocol. - * + * * The protocol uses a new generic event loop (structure &birdloop) from |io.c|, * which supports sockets, timers and events like the main loop. Timers * (structure &timer2) are new microsecond based timers, while sockets and @@ -129,11 +129,11 @@ static inline void bfd_notify_kick(struct bfd_proto *p); * BFD sessions */ -static void +static void bfd_session_update_state(struct bfd_session *s, uint state, uint diag) { struct bfd_proto *p = s->ifa->bfd; - uint old_state = s->loc_state; + uint old_state = s->loc_state; int notify; if (state == old_state) @@ -201,8 +201,8 @@ bfd_session_control_tx_timer(struct bfd_session *s, int reset) if (s->passive && (s->rem_id == 0)) goto stop; - if (s->rem_demand_mode && - !s->poll_active && + if (s->rem_demand_mode && + !s->poll_active && (s->loc_state == BFD_STATE_UP) && (s->rem_state == BFD_STATE_UP)) goto stop; @@ -303,7 +303,7 @@ bfd_session_process_ctl(struct bfd_session *s, u8 flags, u32 old_tx_int, u32 old bfd_send_ctl(s->ifa->bfd, s, 1); } -static void +static void bfd_session_timeout(struct bfd_session *s) { struct bfd_proto *p = s->ifa->bfd; @@ -353,7 +353,7 @@ bfd_session_set_min_rx(struct bfd_session *s, u32 val) if (val == s->req_min_rx_new) return; - s->req_min_rx_new = val; + s->req_min_rx_new = val; /* Postpone timer update if req_min_rx_int decreases and the session is up */ if ((s->loc_state != BFD_STATE_UP) || (val > s->req_min_rx_int)) @@ -575,9 +575,13 @@ bfd_free_iface(struct bfd_iface *ifa) if (!ifa || --ifa->uc) return; + if (ifa->sk) + { + sk_stop(ifa->sk); + rfree(ifa->sk); + } + rem_node(&ifa->n); - sk_stop(ifa->sk); - rfree(ifa->sk); mb_free(ifa); } @@ -873,7 +877,7 @@ bfd_notify_hook(sock *sk, int len) diag = s->loc_diag; bfd_unlock_sessions(p); - /* FIXME: convert to btime and move to bfd_session_update_state() */ + /* FIXME: convert to btime and move to bfd_session_update_state() */ s->last_state_change = now; s->notify_running = 1; @@ -1092,7 +1096,7 @@ bfd_show_sessions(struct proto *P) /* FIXME: this is thread-unsafe, but perhaps harmless */ state = s->loc_state; diag = s->loc_diag; - ifname = (s->ifa && s->ifa->sk->iface) ? s->ifa->sk->iface->name : "---"; + ifname = (s->ifa && s->ifa->iface) ? s->ifa->iface->name : "---"; tx_int = s->last_tx ? (MAX(s->des_min_tx_int, s->rem_min_rx_int) TO_MS) : 0; timeout = (MAX(s->req_min_rx_int, s->rem_min_tx_int) TO_MS) * s->rem_detect_mult; diff --git a/proto/bfd/packets.c b/proto/bfd/packets.c index b5fd6782..cb40bcda 100644 --- a/proto/bfd/packets.c +++ b/proto/bfd/packets.c @@ -63,9 +63,13 @@ void bfd_send_ctl(struct bfd_proto *p, struct bfd_session *s, int final) { sock *sk = s->ifa->sk; - struct bfd_ctl_packet *pkt = (struct bfd_ctl_packet *) sk->tbuf; + struct bfd_ctl_packet *pkt; char fb[8]; + if (!sk) + return; + + pkt = (struct bfd_ctl_packet *) sk->tbuf; pkt->vdiag = bfd_pack_vdiag(1, s->loc_diag); pkt->flags = bfd_pack_flags(s->loc_state, 0); pkt->detect_mult = s->detect_mult; @@ -139,7 +143,7 @@ bfd_rx_hook(sock *sk, int len) u8 ps = bfd_pkt_get_state(pkt); if (ps > BFD_STATE_DOWN) DROP("invalid init state", ps); - + s = bfd_find_session_by_addr(p, sk->faddr); /* FIXME: better session matching and message */ diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index d56c017d..d85afa8f 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -1312,6 +1312,82 @@ bgp_rte_better(rte *new, rte *old) } +int +bgp_rte_mergable(rte *pri, rte *sec) +{ + struct bgp_proto *pri_bgp = (struct bgp_proto *) pri->attrs->src->proto; + struct bgp_proto *sec_bgp = (struct bgp_proto *) sec->attrs->src->proto; + eattr *x, *y; + u32 p, s; + + /* Skip suppressed routes (see bgp_rte_recalculate()) */ + if (pri->u.bgp.suppressed != sec->u.bgp.suppressed) + return 0; + + /* RFC 4271 9.1.2.1. Route resolvability test */ + if (!rte_resolvable(sec)) + return 0; + + /* Start with local preferences */ + x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF)); + y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF)); + p = x ? x->u.data : pri_bgp->cf->default_local_pref; + s = y ? y->u.data : sec_bgp->cf->default_local_pref; + if (p != s) + return 0; + + /* RFC 4271 9.1.2.2. a) Use AS path lengths */ + if (pri_bgp->cf->compare_path_lengths || sec_bgp->cf->compare_path_lengths) + { + x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); + y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); + p = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN; + s = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN; + + if (p != s) + return 0; + +// if (DELTA(p, s) > pri_bgp->cf->relax_multipath) +// return 0; + } + + /* RFC 4271 9.1.2.2. b) Use origins */ + x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN)); + y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN)); + p = x ? x->u.data : ORIGIN_INCOMPLETE; + s = y ? y->u.data : ORIGIN_INCOMPLETE; + if (p != s) + return 0; + + /* RFC 4271 9.1.2.2. c) Compare MED's */ + if (pri_bgp->cf->med_metric || sec_bgp->cf->med_metric || + (bgp_get_neighbor(pri) == bgp_get_neighbor(sec))) + { + x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); + y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); + p = x ? x->u.data : pri_bgp->cf->default_med; + s = y ? y->u.data : sec_bgp->cf->default_med; + if (p != s) + return 0; + } + + /* RFC 4271 9.1.2.2. d) Prefer external peers */ + if (pri_bgp->is_internal != sec_bgp->is_internal) + return 0; + + /* RFC 4271 9.1.2.2. e) Compare IGP metrics */ + p = pri_bgp->cf->igp_metric ? pri->attrs->igp_metric : 0; + s = sec_bgp->cf->igp_metric ? sec->attrs->igp_metric : 0; + if (p != s) + return 0; + + /* Remaining criteria are ignored */ + + return 1; +} + + + static inline int same_group(rte *r, u32 lpref, u32 lasn) { diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 2eb2ff17..5723f8f3 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -571,6 +571,7 @@ bgp_send_open(struct bgp_conn *conn) conn->peer_gr_time = 0; conn->peer_gr_flags = 0; conn->peer_gr_aflags = 0; + conn->peer_ext_messages_support = 0; DBG("BGP: Sending open\n"); conn->sk->rx_hook = bgp_rx; @@ -735,8 +736,8 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c s->dport = p->cf->remote_port; s->iface = p->neigh ? p->neigh->iface : NULL; s->ttl = p->cf->ttl_security ? 255 : hops; - s->rbsize = BGP_RX_BUFFER_SIZE; - s->tbsize = BGP_TX_BUFFER_SIZE; + s->rbsize = p->cf->enable_extended_messages ? BGP_RX_BUFFER_EXT_SIZE : BGP_RX_BUFFER_SIZE; + s->tbsize = p->cf->enable_extended_messages ? BGP_TX_BUFFER_EXT_SIZE : BGP_TX_BUFFER_SIZE; s->tos = IP_PREC_INTERNET_CONTROL; s->password = p->cf->password; s->tx_hook = bgp_connected; @@ -845,6 +846,13 @@ bgp_incoming_connection(sock *sk, int dummy UNUSED) if (sk_set_min_ttl(sk, 256 - hops) < 0) goto err; + if (p->cf->enable_extended_messages) + { + sk->rbsize = BGP_RX_BUFFER_EXT_SIZE; + sk->tbsize = BGP_TX_BUFFER_EXT_SIZE; + sk_reallocate(sk); + } + bgp_setup_conn(p, &p->incoming_conn); bgp_setup_sk(&p->incoming_conn, sk); bgp_send_open(&p->incoming_conn); @@ -1245,6 +1253,7 @@ bgp_init(struct proto_config *C) P->feed_begin = bgp_feed_begin; P->feed_end = bgp_feed_end; P->rte_better = bgp_rte_better; + P->rte_mergable = bgp_rte_mergable; P->rte_recalculate = c->deterministic_med ? bgp_rte_recalculate : NULL; p->cf = c; @@ -1520,21 +1529,23 @@ bgp_show_proto_info(struct proto *P) else if (P->proto_state == PS_UP) { cli_msg(-1006, " Neighbor ID: %R", p->remote_id); - cli_msg(-1006, " Neighbor caps: %s%s%s%s%s%s", + cli_msg(-1006, " Neighbor caps: %s%s%s%s%s%s%s", c->peer_refresh_support ? " refresh" : "", c->peer_enhanced_refresh_support ? " enhanced-refresh" : "", c->peer_gr_able ? " restart-able" : (c->peer_gr_aware ? " restart-aware" : ""), c->peer_as4_support ? " AS4" : "", (c->peer_add_path & ADD_PATH_RX) ? " add-path-rx" : "", - (c->peer_add_path & ADD_PATH_TX) ? " add-path-tx" : ""); - cli_msg(-1006, " Session: %s%s%s%s%s%s%s", + (c->peer_add_path & ADD_PATH_TX) ? " add-path-tx" : "", + c->peer_ext_messages_support ? " ext-messages" : ""); + cli_msg(-1006, " Session: %s%s%s%s%s%s%s%s", p->is_internal ? "internal" : "external", p->cf->multihop ? " multihop" : "", p->rr_client ? " route-reflector" : "", p->rs_client ? " route-server" : "", p->as4_session ? " AS4" : "", p->add_path_rx ? " add-path-rx" : "", - p->add_path_tx ? " add-path-tx" : ""); + p->add_path_tx ? " add-path-tx" : "", + p->ext_messages ? " ext-messages" : ""); cli_msg(-1006, " Source address: %I", p->source_addr); if (P->cf->in_limit) cli_msg(-1006, " Route limit: %d/%d", diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index b40ee9b9..f64d8a15 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -40,6 +40,7 @@ struct bgp_config { int capabilities; /* Enable capability handshake [RFC3392] */ int enable_refresh; /* Enable local support for route refresh [RFC2918] */ int enable_as4; /* Enable local support for 4B AS numbers [RFC4893] */ + int enable_extended_messages; /* Enable local support for extended messages [draft] */ u32 rr_cluster_id; /* Route reflector cluster ID, if different from local ID */ int rr_client; /* Whether neighbor is RR client of me */ int rs_client; /* Whether neighbor is RS client of me */ @@ -109,6 +110,7 @@ struct bgp_conn { u16 peer_gr_time; u8 peer_gr_flags; u8 peer_gr_aflags; + u8 peer_ext_messages_support; /* Peer supports extended message length [draft] */ unsigned hold_time, keepalive_time; /* Times calculated from my and neighbor's requirements */ }; @@ -121,6 +123,7 @@ struct bgp_proto { u8 as4_session; /* Session uses 4B AS numbers in AS_PATH (both sides support it) */ u8 add_path_rx; /* Session expects receive of ADD-PATH extended NLRI */ u8 add_path_tx; /* Session expects transmit of ADD-PATH extended NLRI */ + u8 ext_messages; /* Session allows to use extended messages (both sides support it) */ u32 local_id; /* BGP identifier of this router */ u32 remote_id; /* BGP identifier of the neighbor */ u32 rr_cluster_id; /* Route reflector cluster ID */ @@ -181,11 +184,17 @@ struct bgp_bucket { #define BGP_PORT 179 #define BGP_VERSION 4 #define BGP_HEADER_LENGTH 19 -#define BGP_MAX_PACKET_LENGTH 4096 +#define BGP_MAX_MESSAGE_LENGTH 4096 +#define BGP_MAX_EXT_MSG_LENGTH 65535 #define BGP_RX_BUFFER_SIZE 4096 -#define BGP_TX_BUFFER_SIZE BGP_MAX_PACKET_LENGTH +#define BGP_TX_BUFFER_SIZE 4096 +#define BGP_RX_BUFFER_EXT_SIZE 65535 +#define BGP_TX_BUFFER_EXT_SIZE 65535 #define BGP_ATTR_BUFFER_SIZE 2048 /* Default buffer size for encoded bgp attributes */ +static inline int bgp_max_packet_length(struct bgp_proto *p) +{ return p->ext_messages ? BGP_MAX_EXT_MSG_LENGTH : BGP_MAX_MESSAGE_LENGTH; } + extern struct linpool *bgp_linpool; @@ -240,6 +249,7 @@ byte *bgp_attach_attr_wa(struct ea_list **to, struct linpool *pool, unsigned att struct rta *bgp_decode_attrs(struct bgp_conn *conn, byte *a, uint len, struct linpool *pool, int mandatory); int bgp_get_attr(struct eattr *e, byte *buf, int buflen); int bgp_rte_better(struct rte *, struct rte *); +int bgp_rte_mergable(rte *pri, rte *sec); int bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best); void bgp_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *attrs); int bgp_import_control(struct proto *, struct rte **, struct ea_list **, struct linpool *); diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y index 49afe5ae..85b93a6b 100644 --- a/proto/bgp/config.Y +++ b/proto/bgp/config.Y @@ -27,7 +27,7 @@ CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, INTERPRET, COMMUNITIES, BGP_ORIGINATOR_ID, BGP_CLUSTER_LIST, IGP, TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL, SECURITY, DETERMINISTIC, SECONDARY, ALLOW, BFD, ADD, PATHS, RX, TX, GRACEFUL, RESTART, AWARE, - CHECK, LINK, PORT) + CHECK, LINK, PORT, EXTENDED, MESSAGES) CF_GRAMMAR @@ -108,6 +108,7 @@ bgp_proto: | bgp_proto DISABLE AFTER ERROR bool ';' { BGP_CFG->disable_after_error = $5; } | bgp_proto ENABLE ROUTE REFRESH bool ';' { BGP_CFG->enable_refresh = $5; } | bgp_proto ENABLE AS4 bool ';' { BGP_CFG->enable_as4 = $4; } + | bgp_proto ENABLE EXTENDED MESSAGES bool ';' { BGP_CFG->enable_extended_messages = $5; } | bgp_proto CAPABILITIES bool ';' { BGP_CFG->capabilities = $3; } | bgp_proto ADVERTISE IPV4 bool ';' { BGP_CFG->advertise_ipv4 = $4; } | bgp_proto PASSWORD text ';' { BGP_CFG->password = $3; } diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index 74fb637c..a117895c 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -84,8 +84,9 @@ mrt_put_bgp4_hdr(byte *buf, struct bgp_conn *conn, int as4) static void mrt_dump_bgp_packet(struct bgp_conn *conn, byte *pkt, unsigned len) { - byte buf[BGP_MAX_PACKET_LENGTH + 128]; + byte *buf = alloca(128+len); /* 128 is enough for MRT headers */ byte *bp = buf + MRT_HDR_LENGTH; + int as4 = conn->bgp->as4_session; bp = mrt_put_bgp4_hdr(bp, conn, as4); @@ -222,6 +223,14 @@ bgp_put_cap_err(struct bgp_proto *p UNUSED, byte *buf) return buf; } +static byte * +bgp_put_cap_ext_msg(struct bgp_proto *p UNUSED, byte *buf) +{ + *buf++ = 230; /* Capability TBD: Support for extended messages */ + *buf++ = 0; /* Capability data length */ + return buf; +} + static byte * bgp_create_open(struct bgp_conn *conn, byte *buf) @@ -273,6 +282,9 @@ bgp_create_open(struct bgp_conn *conn, byte *buf) if (p->cf->enable_refresh) cap = bgp_put_cap_err(p, cap); + if (p->cf->enable_extended_messages) + cap = bgp_put_cap_ext_msg(p, cap); + cap_len = cap - buf - 12; if (cap_len > 0) { @@ -341,7 +353,7 @@ bgp_create_update(struct bgp_conn *conn, byte *buf) { struct bgp_proto *p = conn->bgp; struct bgp_bucket *buck; - int remains = BGP_MAX_PACKET_LENGTH - BGP_HEADER_LENGTH - 4; + int remains = bgp_max_packet_length(p) - BGP_HEADER_LENGTH - 4; byte *w; int wd_size = 0; int r_size = 0; @@ -427,7 +439,7 @@ bgp_create_update(struct bgp_conn *conn, byte *buf) struct bgp_proto *p = conn->bgp; struct bgp_bucket *buck; int size, second, rem_stored; - int remains = BGP_MAX_PACKET_LENGTH - BGP_HEADER_LENGTH - 4; + int remains = bgp_max_packet_length(p) - BGP_HEADER_LENGTH - 4; byte *w, *w_stored, *tmp, *tstart; ip_addr *ipp, ip, ip_ll; ea_list *ea; @@ -855,6 +867,12 @@ bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len) conn->peer_enhanced_refresh_support = 1; break; + case 230: /* Extended message length capability, draft, cap number TBD */ + if (cl != 0) + goto err; + conn->peer_ext_messages_support = 1; + break; + /* We can safely ignore all other capabilities */ } len -= 2 + cl; @@ -1018,6 +1036,7 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len) p->add_path_rx = (p->cf->add_path & ADD_PATH_RX) && (conn->peer_add_path & ADD_PATH_TX); p->add_path_tx = (p->cf->add_path & ADD_PATH_TX) && (conn->peer_add_path & ADD_PATH_RX); p->gr_ready = p->cf->gr_mode && conn->peer_gr_able; + p->ext_messages = p->cf->enable_extended_messages && conn->peer_ext_messages_support; if (p->add_path_tx) p->p.accept_ra_types = RA_ANY; @@ -1417,7 +1436,7 @@ static struct { { 2, 4, "Unsupported optional parameter" }, { 2, 5, "Authentication failure" }, { 2, 6, "Unacceptable hold time" }, - { 2, 7, "Required capability missing" }, /* [RFC3392] */ + { 2, 7, "Required capability missing" }, /* [RFC5492] */ { 2, 8, "No supported AFI/SAFI" }, /* This error msg is nonstandard */ { 3, 0, "Invalid UPDATE message" }, { 3, 1, "Malformed attribute list" }, @@ -1665,6 +1684,7 @@ int bgp_rx(sock *sk, int size) { struct bgp_conn *conn = sk->data; + struct bgp_proto *p = conn->bgp; byte *pkt_start = sk->rbuf; byte *end = pkt_start + size; unsigned i, len; @@ -1681,7 +1701,7 @@ bgp_rx(sock *sk, int size) break; } len = get_u16(pkt_start+16); - if (len < BGP_HEADER_LENGTH || len > BGP_MAX_PACKET_LENGTH) + if (len < BGP_HEADER_LENGTH || len > bgp_max_packet_length(p)) { bgp_error(conn, 1, 2, pkt_start+16, 2); break; diff --git a/proto/ospf/iface.c b/proto/ospf/iface.c index 656184c6..9b0f7797 100644 --- a/proto/ospf/iface.c +++ b/proto/ospf/iface.c @@ -1344,9 +1344,9 @@ ospf_iface_info(struct ospf_iface *ifa) cli_msg(-1015, "\tRetransmit timer: %u", ifa->rxmtint); if ((ifa->type == OSPF_IT_BCAST) || (ifa->type == OSPF_IT_NBMA)) { - cli_msg(-1015, "\tDesigned router (ID): %R", ifa->drid); - cli_msg(-1015, "\tDesigned router (IP): %I", ifa->drip); - cli_msg(-1015, "\tBackup designed router (ID): %R", ifa->bdrid); - cli_msg(-1015, "\tBackup designed router (IP): %I", ifa->bdrip); + cli_msg(-1015, "\tDesignated router (ID): %R", ifa->drid); + cli_msg(-1015, "\tDesignated router (IP): %I", ifa->drip); + cli_msg(-1015, "\tBackup designated router (ID): %R", ifa->bdrid); + cli_msg(-1015, "\tBackup designated router (IP): %I", ifa->bdrip); } } diff --git a/proto/ospf/ospf.c b/proto/ospf/ospf.c index 1bc4e077..d5d5d354 100644 --- a/proto/ospf/ospf.c +++ b/proto/ospf/ospf.c @@ -450,10 +450,21 @@ ospf_import_control(struct proto *P, rte **new, ea_list **attrs, struct linpool if (oa_is_stub(oa)) return -1; /* Do not export routes to stub areas */ - eattr *ea = ea_find(e->attrs->eattrs, EA_GEN_IGP_METRIC); - u32 m1 = (ea && (ea->u.data < LSINFINITY)) ? ea->u.data : LSINFINITY; + ea_list *ea = e->attrs->eattrs; + u32 m0 = ea_get_int(ea, EA_GEN_IGP_METRIC, LSINFINITY); + u32 m1 = MIN(m0, LSINFINITY); + u32 m2 = 10000; + u32 tag = 0; - *attrs = ospf_build_attrs(*attrs, pool, m1, 10000, 0, 0); + /* Hack for setting attributes directly in static protocol */ + if (e->attrs->source == RTS_STATIC) + { + m1 = ea_get_int(ea, EA_OSPF_METRIC1, m1); + m2 = ea_get_int(ea, EA_OSPF_METRIC2, 10000); + tag = ea_get_int(ea, EA_OSPF_TAG, 0); + } + + *attrs = ospf_build_attrs(*attrs, pool, m1, m2, tag, 0); return 0; /* Leave decision to the filters */ } diff --git a/proto/ospf/rt.c b/proto/ospf/rt.c index e68ee0f4..fc52f631 100644 --- a/proto/ospf/rt.c +++ b/proto/ospf/rt.c @@ -53,88 +53,6 @@ new_nexthop(struct ospf_proto *p, ip_addr gw, struct iface *iface, byte weight) return nh; } -static inline struct mpnh * -copy_nexthop(struct ospf_proto *p, const struct mpnh *src) -{ - struct mpnh *nh = lp_alloc(p->nhpool, sizeof(struct mpnh)); - nh->gw = src->gw; - nh->iface = src->iface; - nh->next = NULL; - nh->weight = src->weight; - return nh; -} - -/* Compare nexthops during merge. - We need to maintain nhs sorted to eliminate duplicities */ -static int -cmp_nhs(struct mpnh *s1, struct mpnh *s2) -{ - int r; - - if (!s1) - return 1; - - if (!s2) - return -1; - - r = ((int) s2->weight) - ((int) s1->weight); - if (r) - return r; - - r = ipa_compare(s1->gw, s2->gw); - if (r) - return r; - - return ((int) s1->iface->index) - ((int) s2->iface->index); -} - -static struct mpnh * -merge_nexthops(struct ospf_proto *p, struct mpnh *s1, struct mpnh *s2, int r1, int r2) -{ - struct mpnh *root = NULL; - struct mpnh **n = &root; - int count = p->ecmp; - - ASSERT(p->ecmp); - - /* - * r1, r2 signalize whether we can reuse nexthops from s1, s2. - * New nexthops (s2, new) can be reused if they are not inherited - * from the parent (i.e. it is allocated in calc_next_hop()). - * Current nexthops (s1, en->nhs) can be reused if they weren't - * inherited in previous steps (that is stored in nhs_reuse, - * i.e. created by merging or allocated in calc_next_hop()). - * - * Generally, a node first inherits shared nexthops from its - * parent and later possibly gets reusable copy during merging. - */ - - while ((s1 || s2) && count--) - { - int cmp = cmp_nhs(s1, s2); - if (cmp < 0) - { - *n = r1 ? s1 : copy_nexthop(p, s1); - s1 = s1->next; - } - else if (cmp > 0) - { - *n = r2 ? s2 : copy_nexthop(p, s2); - s2 = s2->next; - } - else - { - *n = r1 ? s1 : (r2 ? s2 : copy_nexthop(p, s1)); - s1 = s1->next; - s2 = s2->next; - } - n = &((*n)->next); - } - *n = NULL; - - return root; -} - /* Returns true if there are device nexthops in n */ static inline int has_device_nexthops(const struct mpnh *n) @@ -178,7 +96,7 @@ fix_device_nexthops(struct ospf_proto *p, const struct mpnh *n, ip_addr gw) } } - return merge_nexthops(p, root1, root2, 1, 1); + return mpnh_merge(root1, root2, 1, 1, p->ecmp, p->nhpool); } @@ -374,7 +292,8 @@ ort_merge(struct ospf_proto *p, ort *o, const orta *new) if (old->nhs != new->nhs) { - old->nhs = merge_nexthops(p, old->nhs, new->nhs, old->nhs_reuse, new->nhs_reuse); + old->nhs = mpnh_merge(old->nhs, new->nhs, old->nhs_reuse, new->nhs_reuse, + p->ecmp, p->nhpool); old->nhs_reuse = 1; } @@ -389,7 +308,8 @@ ort_merge_ext(struct ospf_proto *p, ort *o, const orta *new) if (old->nhs != new->nhs) { - old->nhs = merge_nexthops(p, old->nhs, new->nhs, old->nhs_reuse, new->nhs_reuse); + old->nhs = mpnh_merge(old->nhs, new->nhs, old->nhs_reuse, new->nhs_reuse, + p->ecmp, p->nhpool); old->nhs_reuse = 1; } @@ -1885,8 +1805,7 @@ add_cand(list * l, struct top_hash_entry *en, struct top_hash_entry *par, return; } - /* We know that en->color == CANDIDATE and en->nhs is defined. */ - + /* If en->dist > 0, we know that en->color == CANDIDATE and en->nhs is defined. */ if ((dist == en->dist) && !nh_is_vlink(en->nhs)) { /* @@ -1900,7 +1819,14 @@ add_cand(list * l, struct top_hash_entry *en, struct top_hash_entry *par, * allocated in calc_next_hop()). * * Generally, a node first inherits shared nexthops from its parent and - * later possibly gets reusable copy during merging. + * later possibly gets reusable (private) copy during merging. This is more + * or less same for both top_hash_entry nodes and orta nodes. + * + * Note that when a child inherits a private nexthop from its parent, it + * should make the nexthop shared for both parent and child, while we only + * update nhs_reuse for the child node. This makes nhs_reuse field for the + * parent technically incorrect, but it is not a problem as parent's nhs + * will not be modified (and nhs_reuse examined) afterwards. */ /* Keep old ones */ @@ -1909,7 +1835,7 @@ add_cand(list * l, struct top_hash_entry *en, struct top_hash_entry *par, /* Merge old and new */ int new_reuse = (par->nhs != nhs); - en->nhs = merge_nexthops(p, en->nhs, nhs, en->nhs_reuse, new_reuse); + en->nhs = mpnh_merge(en->nhs, nhs, en->nhs_reuse, new_reuse, p->ecmp, p->nhpool); en->nhs_reuse = 1; return; } diff --git a/proto/ospf/rt.h b/proto/ospf/rt.h index 61936f3c..30332f3b 100644 --- a/proto/ospf/rt.h +++ b/proto/ospf/rt.h @@ -18,7 +18,8 @@ typedef struct orta { u8 type; /* RTS_OSPF_* */ - u8 nhs_reuse; /* Whether nhs nodes can be reused during merging */ + u8 nhs_reuse; /* Whether nhs nodes can be reused during merging. + See a note in rt.c:add_cand() */ u32 options; /* * For ORT_ROUTER routes, options field are router-LSA style diff --git a/proto/ospf/topology.h b/proto/ospf/topology.h index e2d6c773..5652ced0 100644 --- a/proto/ospf/topology.h +++ b/proto/ospf/topology.h @@ -39,7 +39,7 @@ struct top_hash_entry #define INSPF 2 u8 mode; /* LSA generated during RT calculation (LSA_RTCALC or LSA_STALE)*/ u8 nhs_reuse; /* Whether nhs nodes can be reused during merging. - See a note in rt.c:merge_nexthops() */ + See a note in rt.c:add_cand() */ }; diff --git a/proto/static/config.Y b/proto/static/config.Y index a8bfa36f..182721b3 100644 --- a/proto/static/config.Y +++ b/proto/static/config.Y @@ -14,11 +14,24 @@ CF_DEFINES #define STATIC_CFG ((struct static_config *) this_proto) static struct static_route *this_srt, *this_srt_nh, *last_srt_nh; +static struct f_inst **this_srt_last_cmd; + +static void +static_route_finish(void) +{ + struct static_route *r; + + /* Update undefined use_bfd entries in multipath nexthops */ + if (this_srt->dest == RTD_MULTIPATH) + for (r = this_srt->mp_next; r; r = r->mp_next) + if (r->use_bfd < 0) + r->use_bfd = this_srt->use_bfd; +} CF_DECLS CF_KEYWORDS(STATIC, ROUTE, VIA, DROP, REJECT, PROHIBIT, PREFERENCE, CHECK, LINK) -CF_KEYWORDS(MULTIPATH, WEIGHT, RECURSIVE, IGP, TABLE, BLACKHOLE, UNREACHABLE) +CF_KEYWORDS(MULTIPATH, WEIGHT, RECURSIVE, IGP, TABLE, BLACKHOLE, UNREACHABLE, BFD) CF_GRAMMAR @@ -36,7 +49,7 @@ static_proto: | static_proto proto_item ';' | static_proto CHECK LINK bool ';' { STATIC_CFG->check_link = $4; } | static_proto IGP TABLE rtable ';' { STATIC_CFG->igp_table = $4; } - | static_proto stat_route ';' + | static_proto stat_route stat_route_opt_list ';' { static_route_finish(); } ; stat_route0: ROUTE prefix { @@ -44,6 +57,7 @@ stat_route0: ROUTE prefix { add_tail(&STATIC_CFG->other_routes, &this_srt->n); this_srt->net = $2.addr; this_srt->masklen = $2.len; + this_srt_last_cmd = &(this_srt->cmds); } ; @@ -55,11 +69,15 @@ stat_multipath1: this_srt_nh->via = $2; this_srt_nh->via_if = $3; this_srt_nh->if_name = (void *) this_srt; /* really */ + this_srt_nh->use_bfd = -1; /* undefined */ } | stat_multipath1 WEIGHT expr { this_srt_nh->masklen = $3 - 1; /* really */ if (($3<1) || ($3>256)) cf_error("Weight must be in range 1-256"); } + | stat_multipath1 BFD bool { + this_srt_nh->use_bfd = $3; cf_check_bfd($3); + } ; stat_multipath: @@ -94,6 +112,22 @@ stat_route: | stat_route0 PROHIBIT { this_srt->dest = RTD_PROHIBIT; } ; +stat_route_item: + cmd { *this_srt_last_cmd = $1; this_srt_last_cmd = &($1->next); } + | BFD bool ';' { this_srt->use_bfd = $2; cf_check_bfd($2); } + ; + +stat_route_opts: + /* empty */ + | stat_route_opts stat_route_item + ; + +stat_route_opt_list: + /* empty */ + | '{' stat_route_opts '}' + ; + + CF_CLI(SHOW STATIC, optsym, [], [[Show details of static protocol]]) { static_show(proto_get_named($3, &proto_static)); } ; diff --git a/proto/static/static.c b/proto/static/static.c index 4b72fa9d..be808593 100644 --- a/proto/static/static.c +++ b/proto/static/static.c @@ -42,11 +42,14 @@ #include "nest/route.h" #include "nest/cli.h" #include "conf/conf.h" +#include "filter/filter.h" #include "lib/string.h" #include "lib/alloca.h" #include "static.h" +static linpool *static_lp; + static inline rtable * p_igp_table(struct proto *p) { @@ -54,12 +57,11 @@ p_igp_table(struct proto *p) return cf->igp_table ? cf->igp_table->table : p->table; } - static void static_install(struct proto *p, struct static_route *r, struct iface *ifa) { net *n; - rta a, *aa; + rta a; rte *e; if (r->installed > 0) @@ -108,13 +110,21 @@ static_install(struct proto *p, struct static_route *r, struct iface *ifa) if (r->dest == RTDX_RECURSIVE) rta_set_recursive_next_hop(p->table, &a, p_igp_table(p), &r->via, &r->via); - aa = rta_lookup(&a); + /* We skip rta_lookup() here */ + n = net_get(p->table, r->net, r->masklen); - e = rte_get_temp(aa); + e = rte_get_temp(&a); e->net = n; e->pflags = 0; + + if (r->cmds) + f_eval_rte(r->cmds, &e, static_lp); + rte_update(p, n, e); r->installed = 1; + + if (r->cmds) + lp_flush(static_lp); } static void @@ -131,6 +141,29 @@ static_remove(struct proto *p, struct static_route *r) r->installed = 0; } +static void +static_bfd_notify(struct bfd_request *req); + +static void +static_update_bfd(struct proto *p, struct static_route *r) +{ + struct neighbor *nb = r->neigh; + int bfd_up = (nb->scope > 0) && r->use_bfd; + + if (bfd_up && !r->bfd_req) + { + // ip_addr local = ipa_nonzero(r->local) ? r->local : nb->ifa->ip; + r->bfd_req = bfd_request_session(p->pool, r->via, nb->ifa->ip, nb->iface, + static_bfd_notify, r); + } + + if (!bfd_up && r->bfd_req) + { + rfree(r->bfd_req); + r->bfd_req = NULL; + } +} + static int static_decide(struct static_config *cf, struct static_route *r) { @@ -143,6 +176,9 @@ static_decide(struct static_config *cf, struct static_route *r) if (cf->check_link && !(r->neigh->iface->flags & IF_LINK_UP)) return 0; + if (r->bfd_req && r->bfd_req->state != BFD_STATE_UP) + return 0; + return 1; } @@ -161,6 +197,8 @@ static_add(struct proto *p, struct static_config *cf, struct static_route *r) r->chain = n->data; n->data = r; r->neigh = n; + + static_update_bfd(p, r); if (static_decide(cf, r)) static_install(p, r, n->iface); else @@ -190,6 +228,8 @@ static_add(struct proto *p, struct static_config *cf, struct static_route *r) r2->chain = n->data; n->data = r2; r2->neigh = n; + + static_update_bfd(p, r2); r2->installed = static_decide(cf, r2); count += r2->installed; } @@ -212,6 +252,26 @@ static_add(struct proto *p, struct static_config *cf, struct static_route *r) } } +static void +static_rte_cleanup(struct proto *p, struct static_route *r) +{ + struct static_route *r2; + + if (r->bfd_req) + { + rfree(r->bfd_req); + r->bfd_req = NULL; + } + + if (r->dest == RTD_MULTIPATH) + for (r2 = r->mp_next; r2; r2 = r2->mp_next) + if (r2->bfd_req) + { + rfree(r2->bfd_req); + r2->bfd_req = NULL; + } +} + static int static_start(struct proto *p) { @@ -220,6 +280,9 @@ static_start(struct proto *p) DBG("Static: take off!\n"); + if (!static_lp) + static_lp = lp_new(&root_pool, 1008); + if (cf->igp_table) rt_lock_table(cf->igp_table->table); @@ -241,7 +304,10 @@ static_shutdown(struct proto *p) WALK_LIST(r, cf->iface_routes) r->installed = 0; WALK_LIST(r, cf->other_routes) + { + static_rte_cleanup(p, r); r->installed = 0; + } return PS_DOWN; } @@ -255,6 +321,44 @@ static_cleanup(struct proto *p) rt_unlock_table(cf->igp_table->table); } +static void +static_update_rte(struct proto *p, struct static_route *r) +{ + switch (r->dest) + { + case RTD_ROUTER: + if (static_decide((struct static_config *) p->cf, r)) + static_install(p, r, r->neigh->iface); + else + static_remove(p, r); + break; + + case RTD_NONE: /* a part of multipath route */ + { + int decision = static_decide((struct static_config *) p->cf, r); + if (decision == r->installed) + break; /* no change */ + r->installed = decision; + + struct static_route *r1, *r2; + int count = 0; + r1 = (void *) r->if_name; /* really */ + for (r2 = r1->mp_next; r2; r2 = r2->mp_next) + count += r2->installed; + + if (count) + { + /* Set of nexthops changed - force reinstall */ + r1->installed = 0; + static_install(p, r1, NULL); + } + else + static_remove(p, r1); + + break; + } + } +} static void static_neigh_notify(struct neighbor *n) @@ -264,40 +368,21 @@ static_neigh_notify(struct neighbor *n) DBG("Static: neighbor notify for %I: iface %p\n", n->addr, n->iface); for(r=n->data; r; r=r->chain) - switch (r->dest) - { - case RTD_ROUTER: - if (static_decide((struct static_config *) p->cf, r)) - static_install(p, r, n->iface); - else - static_remove(p, r); - break; + { + static_update_bfd(p, r); + static_update_rte(p, r); + } +} - case RTD_NONE: /* a part of multipath route */ - { - int decision = static_decide((struct static_config *) p->cf, r); - if (decision == r->installed) - break; /* no change */ - r->installed = decision; +static void +static_bfd_notify(struct bfd_request *req) +{ + struct static_route *r = req->data; + struct proto *p = r->neigh->proto; - struct static_route *r1, *r2; - int count = 0; - r1 = (void *) r->if_name; /* really */ - for (r2 = r1->mp_next; r2; r2 = r2->mp_next) - count += r2->installed; + // if (req->down) TRACE(D_EVENTS, "BFD session down for nbr %I on %s", XXXX); - if (count) - { - /* Set of nexthops changed - force reinstall */ - r1->installed = 0; - static_install(p, r1, NULL); - } - else - static_remove(p, r1); - - break; - } - } + static_update_rte(p, r); } static void @@ -352,6 +437,12 @@ static_if_notify(struct proto *p, unsigned flags, struct iface *i) } } +int +static_rte_mergable(rte *pri, rte *sec) +{ + return 1; +} + void static_init_config(struct static_config *c) { @@ -366,6 +457,7 @@ static_init(struct proto_config *c) p->neigh_notify = static_neigh_notify; p->if_notify = static_if_notify; + p->rte_mergable = static_rte_mergable; return p; } @@ -394,7 +486,7 @@ static_same_dest(struct static_route *x, struct static_route *y) for (x = x->mp_next, y = y->mp_next; x && y; x = x->mp_next, y = y->mp_next) - if (!ipa_equal(x->via, y->via) || (x->via_if != y->via_if)) + if (!ipa_equal(x->via, y->via) || (x->via_if != y->via_if) || (x->use_bfd != y->use_bfd)) return 0; return !x && !y; @@ -406,6 +498,13 @@ static_same_dest(struct static_route *x, struct static_route *y) } } +static inline int +static_same_rte(struct static_route *x, struct static_route *y) +{ + return static_same_dest(x, y) && i_same(x->cmds, y->cmds); +} + + static void static_match(struct proto *p, struct static_route *r, struct static_config *n) { @@ -434,7 +533,7 @@ static_match(struct proto *p, struct static_route *r, struct static_config *n) found: /* If destination is different, force reinstall */ - if ((r->installed > 0) && !static_same_dest(r, t)) + if ((r->installed > 0) && !static_same_rte(r, t)) t->installed = -1; else t->installed = r->installed; @@ -472,6 +571,9 @@ static_reconfigure(struct proto *p, struct proto_config *new) WALK_LIST(r, n->other_routes) static_add(p, n, r); + WALK_LIST(r, o->other_routes) + static_rte_cleanup(p, r); + return 1; } @@ -557,13 +659,14 @@ static_show_rt(struct static_route *r) case RTDX_RECURSIVE: bsprintf(via, "recursive %I", r->via); break; default: bsprintf(via, "???"); } - cli_msg(-1009, "%I/%d %s%s", r->net, r->masklen, via, r->installed ? "" : " (dormant)"); + cli_msg(-1009, "%I/%d %s%s%s", r->net, r->masklen, via, + r->bfd_req ? " (bfd)" : "", r->installed ? "" : " (dormant)"); struct static_route *r2; if (r->dest == RTD_MULTIPATH) for (r2 = r->mp_next; r2; r2 = r2->mp_next) - cli_msg(-1009, "\tvia %I%J weight %d%s", r2->via, r2->via_if, r2->masklen + 1, /* really */ - r2->installed ? "" : " (dormant)"); + cli_msg(-1009, "\tvia %I%J weight %d%s%s", r2->via, r2->via_if, r2->masklen + 1, /* really */ + r2->bfd_req ? " (bfd)" : "", r2->installed ? "" : " (dormant)"); } void diff --git a/proto/static/static.h b/proto/static/static.h index 99a0e68b..6b047234 100644 --- a/proto/static/static.h +++ b/proto/static/static.h @@ -9,6 +9,9 @@ #ifndef _BIRD_STATIC_H_ #define _BIRD_STATIC_H_ +#include "nest/route.h" +#include "nest/bfd.h" + struct static_config { struct proto_config c; list iface_routes; /* Routes to search on interface events */ @@ -31,7 +34,10 @@ struct static_route { struct neighbor *neigh; byte *if_name; /* Name for RTD_DEVICE routes */ struct static_route *mp_next; /* Nexthops for RTD_MULTIPATH routes */ + struct f_inst *cmds; /* List of commands for setting attributes */ int installed; /* Installed in rt table, -1 for reinstall */ + int use_bfd; /* Configured to use BFD */ + struct bfd_request *bfd_req; /* BFD request, if BFD is used */ }; /* Dummy nodes (parts of multipath route) abuses masklen field for weight diff --git a/sysdep/linux/netlink.c b/sysdep/linux/netlink.c index c4d52255..9c9449e2 100644 --- a/sysdep/linux/netlink.c +++ b/sysdep/linux/netlink.c @@ -100,11 +100,12 @@ nl_request_dump(int af, int cmd) struct { struct nlmsghdr nh; struct rtgenmsg g; - } req; - req.nh.nlmsg_type = cmd; - req.nh.nlmsg_len = sizeof(req); - req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; - req.g.rtgen_family = af; + } req = { + .nh.nlmsg_type = cmd, + .nh.nlmsg_len = sizeof(req), + .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP, + .g.rtgen_family = af + }; nl_send(&nl_scan, &req.nh); } @@ -703,6 +704,11 @@ nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int new) r.r.rtm_scope = RT_SCOPE_UNIVERSE; nl_add_attr_ipa(&r.h, sizeof(r), RTA_DST, net->n.prefix); + /* For route delete, we do not specify route attributes */ + if (!new) + return nl_exchange(&r.h); + + if (ea = ea_find(eattrs, EA_KRT_METRIC)) nl_add_attr_u32(&r.h, sizeof(r), RTA_PRIORITY, ea->u.data); diff --git a/sysdep/unix/krt.Y b/sysdep/unix/krt.Y index 630cda38..e036081d 100644 --- a/sysdep/unix/krt.Y +++ b/sysdep/unix/krt.Y @@ -17,7 +17,7 @@ CF_DEFINES CF_DECLS -CF_KEYWORDS(KERNEL, PERSIST, SCAN, TIME, LEARN, DEVICE, ROUTES, GRACEFUL, RESTART, KRT_SOURCE, KRT_METRIC) +CF_KEYWORDS(KERNEL, PERSIST, SCAN, TIME, LEARN, DEVICE, ROUTES, GRACEFUL, RESTART, KRT_SOURCE, KRT_METRIC, MERGE, PATHS) CF_GRAMMAR @@ -47,6 +47,8 @@ kern_item: } | DEVICE ROUTES bool { THIS_KRT->devroutes = $3; } | GRACEFUL RESTART bool { THIS_KRT->graceful_restart = $3; } + | MERGE PATHS bool { THIS_KRT->merge_paths = $3 ? KRT_DEFAULT_ECMP_LIMIT : 0; } + | MERGE PATHS bool LIMIT expr { THIS_KRT->merge_paths = $3 ? $5 : 0; if (($5 <= 0) || ($5 > 255)) cf_error("Merge paths limit must be in range 1-255"); } ; /* Kernel interface protocol */ diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c index cfb623ce..2eab5cb2 100644 --- a/sysdep/unix/krt.c +++ b/sysdep/unix/krt.c @@ -592,6 +592,48 @@ krt_flush_routes(struct krt_proto *p) FIB_WALK_END; } +static struct rte * +krt_export_net(struct krt_proto *p, net *net, rte **rt_free, ea_list **tmpa) +{ + struct announce_hook *ah = p->p.main_ahook; + struct filter *filter = ah->out_filter; + rte *rt; + + if (p->p.accept_ra_types == RA_MERGED) + return rt_export_merged(ah, net, rt_free, tmpa, 1); + + rt = net->routes; + *rt_free = NULL; + + if (!rte_is_valid(rt)) + return NULL; + + if (filter == FILTER_REJECT) + return NULL; + + struct proto *src = rt->attrs->src->proto; + *tmpa = src->make_tmp_attrs ? src->make_tmp_attrs(rt, krt_filter_lp) : NULL; + + /* We could run krt_import_control() here, but it is already handled by KRF_INSTALLED */ + + if (filter == FILTER_ACCEPT) + goto accept; + + if (f_run(filter, &rt, tmpa, krt_filter_lp, FF_FORCE_TMPATTR) > F_ACCEPT) + goto reject; + + +accept: + if (rt != net->routes) + *rt_free = rt; + return rt; + +reject: + if (rt != net->routes) + rte_free(rt); + return NULL; +} + static int krt_same_dest(rte *k, rte *e) { @@ -620,7 +662,6 @@ krt_same_dest(rte *k, rte *e) void krt_got_route(struct krt_proto *p, rte *e) { - rte *old; net *net = e->net; int verdict; @@ -663,15 +704,26 @@ krt_got_route(struct krt_proto *p, rte *e) goto sentenced; } - old = net->routes; - if ((net->n.flags & KRF_INSTALLED) && rte_is_valid(old)) + if (net->n.flags & KRF_INSTALLED) { - /* There may be changes in route attributes, we ignore that. - Also, this does not work well if gw is changed in export filter */ - if ((net->n.flags & KRF_SYNC_ERROR) || ! krt_same_dest(e, old)) + rte *new, *rt_free; + ea_list *tmpa; + + new = krt_export_net(p, net, &rt_free, &tmpa); + + /* TODO: There also may be changes in route eattrs, we ignore that for now. */ + + if (!new) + verdict = KRF_DELETE; + else if ((net->n.flags & KRF_SYNC_ERROR) || !krt_same_dest(e, new)) verdict = KRF_UPDATE; else verdict = KRF_SEEN; + + if (rt_free) + rte_free(rt_free); + + lp_flush(krt_filter_lp); } else verdict = KRF_DELETE; @@ -692,25 +744,6 @@ krt_got_route(struct krt_proto *p, rte *e) rte_free(e); } -static inline int -krt_export_rte(struct krt_proto *p, rte **new, ea_list **tmpa) -{ - struct filter *filter = p->p.main_ahook->out_filter; - - if (! *new) - return 0; - - if (filter == FILTER_REJECT) - return 0; - - if (filter == FILTER_ACCEPT) - return 1; - - struct proto *src = (*new)->attrs->src->proto; - *tmpa = src->make_tmp_attrs ? src->make_tmp_attrs(*new, krt_filter_lp) : NULL; - return f_run(filter, new, tmpa, krt_filter_lp, FF_FORCE_TMPATTR) <= F_ACCEPT; -} - static void krt_prune(struct krt_proto *p) { @@ -721,7 +754,7 @@ krt_prune(struct krt_proto *p) { net *n = (net *) f; int verdict = f->flags & KRF_VERDICT_MASK; - rte *new, *new0, *old; + rte *new, *old, *rt_free = NULL; ea_list *tmpa = NULL; if (verdict == KRF_UPDATE || verdict == KRF_DELETE) @@ -733,23 +766,18 @@ krt_prune(struct krt_proto *p) else old = NULL; - new = new0 = n->routes; if (verdict == KRF_CREATE || verdict == KRF_UPDATE) { /* We have to run export filter to get proper 'new' route */ - if (! krt_export_rte(p, &new, &tmpa)) - { - /* Route rejected, should not happen (KRF_INSTALLED) but to be sure .. */ - verdict = (verdict == KRF_CREATE) ? KRF_IGNORE : KRF_DELETE; - } + new = krt_export_net(p, n, &rt_free, &tmpa); + + if (!new) + verdict = (verdict == KRF_CREATE) ? KRF_IGNORE : KRF_DELETE; else - { - ea_list **x = &tmpa; - while (*x) - x = &((*x)->next); - *x = new ? new->attrs->eattrs : NULL; - } + tmpa = ea_append(tmpa, new->attrs->eattrs); } + else + new = NULL; switch (verdict) { @@ -778,8 +806,8 @@ krt_prune(struct krt_proto *p) if (old) rte_free(old); - if (new != new0) - rte_free(new); + if (rt_free) + rte_free(rt_free); lp_flush(krt_filter_lp); f->flags &= ~KRF_VERDICT_MASK; } @@ -974,7 +1002,8 @@ krt_import_control(struct proto *P, rte **new, ea_list **attrs, struct linpool * * We will remove KRT_INSTALLED flag, which stops such withdraw to be * processed in krt_rt_notify() and krt_replace_rte(). */ - e->net->n.flags &= ~KRF_INSTALLED; + if (e == e->net->routes) + e->net->n.flags &= ~KRF_INSTALLED; #endif return -1; } @@ -1066,11 +1095,13 @@ krt_rte_same(rte *a, rte *b) struct krt_config *krt_cf; static struct proto * -krt_init(struct proto_config *c) +krt_init(struct proto_config *C) { - struct krt_proto *p = proto_new(c, sizeof(struct krt_proto)); + struct krt_proto *p = proto_new(C, sizeof(struct krt_proto)); + struct krt_config *c = (struct krt_config *) C; - p->p.accept_ra_types = RA_OPTIMAL; + p->p.accept_ra_types = c->merge_paths ? RA_MERGED : RA_OPTIMAL; + p->p.merge_limit = c->merge_paths; p->p.import_control = krt_import_control; p->p.rt_notify = krt_rt_notify; p->p.if_notify = krt_if_notify; @@ -1136,7 +1167,8 @@ krt_reconfigure(struct proto *p, struct proto_config *new) return 0; /* persist, graceful restart need not be the same */ - return o->scan_time == n->scan_time && o->learn == n->learn && o->devroutes == n->devroutes; + return o->scan_time == n->scan_time && o->learn == n->learn && + o->devroutes == n->devroutes && o->merge_paths == n->merge_paths; } static void diff --git a/sysdep/unix/krt.h b/sysdep/unix/krt.h index 1940cbcd..9d5d4e8c 100644 --- a/sysdep/unix/krt.h +++ b/sysdep/unix/krt.h @@ -26,6 +26,8 @@ struct kif_proto; #define KRF_DELETE 3 /* Should be deleted */ #define KRF_IGNORE 4 /* To be ignored */ +#define KRT_DEFAULT_ECMP_LIMIT 16 + #define EA_KRT_SOURCE EA_CODE(EAP_KRT, 0) #define EA_KRT_METRIC EA_CODE(EAP_KRT, 1) @@ -47,6 +49,7 @@ struct krt_config { int learn; /* Learn routes from other sources */ int devroutes; /* Allow export of device routes */ int graceful_restart; /* Regard graceful restart recovery */ + int merge_paths; /* Exported routes are merged for ECMP */ }; struct krt_proto { diff --git a/tools/Makefile.in b/tools/Makefile.in index 30dd79e5..24b03add 100644 --- a/tools/Makefile.in +++ b/tools/Makefile.in @@ -47,13 +47,16 @@ subdir: sysdep/paths.h .dir-stamp .dep-stamp set -e ; for a in $(static-dirs) $(client-dirs) ; do $(MAKE) -C $$a -f $(srcdir_abs)/$$a/Makefile $@ ; done $(exedir)/bird: $(bird-dep) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) + @echo LD $(LDFLAGS) -o $@ $^ $(LIBS) + @$(CC) $(LDFLAGS) -o $@ $^ $(LIBS) $(exedir)/birdc: $(birdc-dep) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) $(CLIENT_LIBS) + @echo LD $(LDFLAGS) -o $@ $^ $(LIBS) $(CLIENT_LIBS) + @$(CC) $(LDFLAGS) -o $@ $^ $(LIBS) $(CLIENT_LIBS) $(exedir)/birdcl: $(birdcl-dep) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) + @echo LD $(LDFLAGS) -o $@ $^ $(LIBS) + @$(CC) $(LDFLAGS) -o $@ $^ $(LIBS) .dir-stamp: sysdep/paths.h mkdir -p $(static-dirs) $(client-dirs) $(doc-dirs) diff --git a/tools/Rules.in b/tools/Rules.in index c5830443..090c7282 100644 --- a/tools/Rules.in +++ b/tools/Rules.in @@ -81,12 +81,14 @@ subdir: all.o all.o: $(objs) # $(LD) -r -o $@ $^ # Changed to $(CC) because $(LD) has problems with crosscompiling - $(CC) -nostdlib -r -o $@ $^ + @echo LD -r -o $@ $^ + @$(CC) -nostdlib -r -o $@ $^ endif %.o: $(src-path)%.c - $(CC) $(CFLAGS) -o $@ -c $< + @echo CC -o $@ -c $< + @$(CC) $(CFLAGS) -o $@ -c $< ifndef source-dep source-dep := $(source)