From f15f2fcee7eeb5a100bd204a0e67018e25953420 Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Thu, 5 May 2022 18:08:37 +0200 Subject: [PATCH] Moved nexthop from struct rta to extended attribute. This doesn't do anything more than to put the whole structure inside adata. The overall performance is certainly going downhill; we'll optimize this later. Anyway, this is one of the latest items inside rta and in several commits we may drop rta completely and move to eattrs-only routes. --- filter/f-inst.c | 139 +++++++++++++------ lib/attrs.h | 2 + lib/route.h | 65 ++++++--- lib/type.h | 1 + nest/rt-attr.c | 304 +++++++++++++++-------------------------- nest/rt-dev.c | 7 +- nest/rt-show.c | 9 +- nest/rt-table.c | 156 ++++++++++++--------- proto/babel/babel.c | 51 ++++--- proto/bgp/packets.c | 68 ++++++--- proto/ospf/rt.c | 164 +++++++++++----------- proto/ospf/rt.h | 4 +- proto/ospf/topology.c | 16 ++- proto/ospf/topology.h | 4 +- proto/perf/perf.c | 10 +- proto/rip/rip.c | 54 ++++++-- proto/static/static.c | 34 +++-- sysdep/linux/netlink.c | 183 +++++++++++++++---------- sysdep/unix/krt.c | 10 +- 19 files changed, 722 insertions(+), 559 deletions(-) diff --git a/filter/f-inst.c b/filter/f-inst.c index 23271ce7..8ce78a99 100644 --- a/filter/f-inst.c +++ b/filter/f-inst.c @@ -529,27 +529,47 @@ { STATIC_ATTR; ACCESS_RTE; + ACCESS_EATTRS; struct rta *rta = (*fs->rte)->attrs; switch (sa.sa_code) { - case SA_GW: RESULT(sa.type, ip, rta->nh.gw); break; case SA_NET: RESULT(sa.type, net, (*fs->rte)->net->n.addr); break; case SA_PROTO: RESULT(sa.type, s, (*fs->rte)->src->proto->name); break; case SA_DEST: RESULT(sa.type, i, rta->dest); break; - case SA_IFNAME: RESULT(sa.type, s, rta->nh.iface ? rta->nh.iface->name : ""); break; - case SA_IFINDEX: RESULT(sa.type, i, rta->nh.iface ? rta->nh.iface->index : 0); break; - case SA_WEIGHT: RESULT(sa.type, i, rta->nh.weight + 1); break; - case SA_GW_MPLS: RESULT(sa.type, i, rta->nh.labels ? rta->nh.label[0] : MPLS_NULL); break; - default: - bug("Invalid static attribute access (%u/%u)", sa.type, sa.sa_code); + { + struct eattr *nh_ea = ea_find(*fs->eattrs, &ea_gen_nexthop); + struct nexthop *nh = nh_ea ? &((struct nexthop_adata *) nh_ea->u.ptr)->nh : NULL; + + switch (sa.sa_code) + { + case SA_GW: + RESULT(sa.type, ip, nh ? nh->gw : IPA_NONE); + break; + case SA_IFNAME: + RESULT(sa.type, s, (nh && nh->iface) ? nh->iface->name : ""); + break; + case SA_IFINDEX: + RESULT(sa.type, i, (nh && nh->iface) ? nh->iface->index : 0); + break; + case SA_WEIGHT: + RESULT(sa.type, i, (nh ? nh->weight : 0) + 1); + break; + case SA_GW_MPLS: + RESULT(sa.type, i, (nh && nh->labels) ? nh->label[0] : MPLS_NULL); + break; + default: + bug("Invalid static attribute access (%u/%u)", sa.type, sa.sa_code); + } + } } } } INST(FI_RTA_SET, 1, 0) { ACCESS_RTE; + ACCESS_EATTRS; ARG_ANY(1); STATIC_ATTR; ARG_TYPE(1, sa.type); @@ -558,37 +578,51 @@ { struct rta *rta = (*fs->rte)->attrs; - switch (sa.sa_code) - { - case SA_GW: - { - ip_addr ip = v1.val.ip; - struct iface *ifa = ipa_is_link_local(ip) ? rta->nh.iface : NULL; - neighbor *n = neigh_find((*fs->rte)->src->proto, ip, ifa, 0); - if (!n || (n->scope == SCOPE_HOST)) - runtime( "Invalid gw address" ); - - rta->dest = RTD_UNICAST; - rta->nh.gw = ip; - rta->nh.iface = n->iface; - rta->nh.next = NULL; - rta->hostentry = NULL; - rta->nh.labels = 0; - } - break; - - case SA_DEST: + if (sa.sa_code == SA_DEST) { int i = v1.val.i; if ((i != RTD_BLACKHOLE) && (i != RTD_UNREACHABLE) && (i != RTD_PROHIBIT)) runtime( "Destination can be changed only to blackhole, unreachable or prohibit" ); rta->dest = i; - rta->nh.gw = IPA_NONE; - rta->nh.iface = NULL; - rta->nh.next = NULL; - rta->hostentry = NULL; - rta->nh.labels = 0; + ea_unset_attr(fs->eattrs, 1, &ea_gen_nexthop); + } + else + { + union { + struct nexthop_adata nha; + struct { + struct adata ad; + struct nexthop nh; + u32 label; + }; + } nha; + + nha.ad = (struct adata) { + .length = sizeof (struct nexthop_adata) - sizeof (struct adata), + }; + + eattr *a = NULL; + + switch (sa.sa_code) + { + case SA_GW: + { + struct eattr *nh_ea = ea_find(*fs->eattrs, &ea_gen_nexthop); + + ip_addr ip = v1.val.ip; + struct iface *ifa = (ipa_is_link_local(ip) && nh_ea) ? + ((struct nexthop_adata *) nh_ea->u.ptr)->nh.iface : NULL; + + neighbor *n = neigh_find((*fs->rte)->src->proto, ip, ifa, 0); + if (!n || (n->scope == SCOPE_HOST)) + runtime( "Invalid gw address" ); + + rta->dest = RTD_UNICAST; + nha.nh = (struct nexthop) { + .gw = ip, + .iface = n->iface, + }; } break; @@ -599,11 +633,9 @@ runtime( "Invalid iface name" ); rta->dest = RTD_UNICAST; - rta->nh.gw = IPA_NONE; - rta->nh.iface = ifa; - rta->nh.next = NULL; - rta->hostentry = NULL; - rta->nh.labels = 0; + nha.nh = (struct nexthop) { + .iface = ifa, + }; } break; @@ -612,13 +644,20 @@ if (v1.val.i >= 0x100000) runtime( "Invalid MPLS label" ); + struct eattr *nh_ea = ea_find(*fs->eattrs, &ea_gen_nexthop); + if (!nh_ea) + runtime( "No nexthop to add a MPLS label to" ); + + nha.nh = ((struct nexthop_adata *) nh_ea->u.ptr)->nh; + if (v1.val.i != MPLS_NULL) { - rta->nh.label[0] = v1.val.i; - rta->nh.labels = 1; + nha.nh.label[0] = v1.val.i; + nha.nh.labels = 1; + nha.ad.length = sizeof nha - sizeof (struct adata); } else - rta->nh.labels = 0; + nha.nh.labels = 0; } break; @@ -630,14 +669,32 @@ if (rta->dest != RTD_UNICAST) runtime( "Setting weight needs regular nexthop " ); + struct eattr *nh_ea = ea_find(*fs->eattrs, &ea_gen_nexthop); + if (!nh_ea) + runtime( "No nexthop to set weight on" ); + + struct nexthop_adata *nhax = (struct nexthop_adata *) + tmp_copy_adata(&((struct nexthop_adata *) nh_ea->u.ptr)->ad); + /* Set weight on all next hops */ - for (struct nexthop *nh = &rta->nh; nh; nh = nh->next) + NEXTHOP_WALK(nh, nhax) nh->weight = i - 1; + + a = ea_set_attr(fs->eattrs, + EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, &nhax->ad)); } break; default: bug("Invalid static attribute access (%u/%u)", sa.type, sa.sa_code); + } + + if (!a) + a = ea_set_attr(fs->eattrs, + EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, tmp_copy_adata(&nha.ad))); + + a->originated = 1; + a->fresh = 1; } } } diff --git a/lib/attrs.h b/lib/attrs.h index d2638f3f..79b7b14a 100644 --- a/lib/attrs.h +++ b/lib/attrs.h @@ -37,7 +37,9 @@ lp_store_adata(struct linpool *pool, const void *buf, uint len) return ad; } +#define tmp_alloc_adata(len) lp_alloc_adata(tmp_linpool, len) #define tmp_store_adata(buf, len) lp_store_adata(tmp_linpool, buf, len) +#define tmp_copy_adata(ad) tmp_store_adata((ad)->data, (ad)->length) static inline int adata_same(const struct adata *a, const struct adata *b) { return (a->length == b->length && !memcmp(a->data, b->data, a->length)); } diff --git a/lib/route.h b/lib/route.h index 5423ada3..29a78e90 100644 --- a/lib/route.h +++ b/lib/route.h @@ -67,13 +67,18 @@ void rt_prune_sources(void); struct nexthop { ip_addr gw; /* Next hop */ struct iface *iface; /* Outgoing interface */ - struct nexthop *next; byte flags; byte weight; byte labels; /* Number of all labels */ u32 label[0]; }; +/* For packing one into eattrs */ +struct nexthop_adata { + struct adata ad; + struct nexthop nh; +}; + #define RNF_ONLINK 0x1 /* Gateway is onlink regardless of IP ranges */ @@ -85,7 +90,6 @@ typedef struct rta { struct hostentry *hostentry; /* Hostentry for recursive next-hops */ u16 cached:1; /* Are attributes cached? */ u16 dest:4; /* Route destination type (RTD_...) */ - struct nexthop nh; /* Next hop */ } rta; #define RTS_STATIC 1 /* Normal static route */ @@ -286,9 +290,22 @@ ea_set_attr_u32(ea_list **to, const struct ea_class *def, uint flags, u64 data) { ea_set_attr(to, EA_LITERAL_EMBEDDED(def, flags, data)); } static inline void -ea_set_attr_data(ea_list **to, const struct ea_class *def, uint flags, void *data, uint len) +ea_set_attr_data(ea_list **to, const struct ea_class *def, uint flags, const void *data, uint len) { ea_set_attr(to, EA_LITERAL_STORE_ADATA(def, flags, data, len)); } +static inline void +ea_copy_attr(ea_list **to, ea_list *from, const struct ea_class *def) +{ + eattr *e = ea_find_by_class(from, def); + if (e) + if (e->type & EAF_EMBEDDED) + ea_set_attr_u32(to, def, e->flags, e->u.data); + else + ea_set_attr_data(to, def, e->flags, e->u.ptr->data, e->u.ptr->length); + else + ea_unset_attr(to, 0, def); +} + /* * Common route attributes */ @@ -317,25 +334,39 @@ static inline u32 rt_get_source_attr(rte *rt) * to add additional labels to the resolved nexthop */ extern struct ea_class ea_mpls_labels; +/* Next hop: For now, stored as adata */ +extern struct ea_class ea_gen_nexthop; + /* Next hop structures */ -#define NEXTHOP_MAX_SIZE (sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK) +#define NEXTHOP_ALIGNMENT (_Alignof(struct nexthop)) +#define NEXTHOP_MAX_SIZE (sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK) +#define NEXTHOP_SIZE(_nh) NEXTHOP_SIZE_CNT(((_nh)->labels)) +#define NEXTHOP_SIZE_CNT(cnt) BIRD_ALIGN((sizeof(struct nexthop) + sizeof(u32) * (cnt)), NEXTHOP_ALIGNMENT) +#define nexthop_size(nh) NEXTHOP_SIZE((nh)) + +#define NEXTHOP_NEXT(_nh) ((void *) (_nh) + NEXTHOP_SIZE(_nh)) +#define NEXTHOP_END(_nhad) ((_nhad)->ad.data + (_nhad)->ad.length) +#define NEXTHOP_VALID(_nh, _nhad) ((void *) (_nh) < (void *) NEXTHOP_END(_nhad)) +#define NEXTHOP_ONE(_nhad) (NEXTHOP_NEXT(&(_nhad)->nh) == NEXTHOP_END(_nhad)) + +#define NEXTHOP_WALK(_iter, _nhad) for ( \ + struct nexthop *_iter = &(_nhad)->nh; \ + (void *) _iter < (void *) NEXTHOP_END(_nhad); \ + _iter = NEXTHOP_NEXT(_iter)) + + +static inline int nexthop_same(struct nexthop_adata *x, struct nexthop_adata *y) +{ return adata_same(&x->ad, &y->ad); } +struct nexthop_adata *nexthop_merge(struct nexthop_adata *x, struct nexthop_adata *y, int max, linpool *lp); +struct nexthop_adata *nexthop_sort(struct nexthop_adata *x, linpool *lp); +int nexthop_is_sorted(struct nexthop_adata *x); + -static inline size_t nexthop_size(const struct nexthop *nh) -{ return sizeof(struct nexthop) + sizeof(u32)*nh->labels; } -int nexthop__same(struct nexthop *x, struct nexthop *y); /* Compare multipath nexthops */ -static inline int nexthop_same(struct nexthop *x, struct nexthop *y) -{ return (x == y) || nexthop__same(x, y); } -struct nexthop *nexthop_merge(struct nexthop *x, struct nexthop *y, int rx, int ry, int max, linpool *lp); -struct nexthop *nexthop_sort(struct nexthop *x); -static inline void nexthop_link(struct rta *a, struct nexthop *from) -{ memcpy(&a->nh, from, nexthop_size(from)); } -void nexthop_insert(struct nexthop **n, struct nexthop *y); -int nexthop_is_sorted(struct nexthop *x); void rta_init(void); -static inline size_t rta_size(const rta *a) { return sizeof(rta) + sizeof(u32)*a->nh.labels; } -#define RTA_MAX_SIZE (sizeof(rta) + sizeof(u32)*MPLS_MAX_LABEL_STACK) +#define rta_size(...) (sizeof(rta)) +#define RTA_MAX_SIZE (sizeof(rta)) rta *rta_lookup(rta *); /* Get rta equivalent to this one, uc++ */ static inline int rta_is_cached(rta *r) { return r->cached; } static inline rta *rta_clone(rta *r) { r->uc++; return r; } diff --git a/lib/type.h b/lib/type.h index abce1a1f..6da86c77 100644 --- a/lib/type.h +++ b/lib/type.h @@ -52,6 +52,7 @@ enum btype { /* Something but inaccessible. */ T_OPAQUE = 0x02, /* Opaque byte string (not filterable) */ T_IFACE = 0x0c, /* Pointer to an interface (inside adata) */ + T_NEXTHOP_LIST = 0x2c, /* The whole nexthop block */ /* Types shared with eattrs */ T_INT = 0x01, /* 32-bit unsigned integer number */ diff --git a/nest/rt-attr.c b/nest/rt-attr.c index dc4fe785..bd7ca425 100644 --- a/nest/rt-attr.c +++ b/nest/rt-attr.c @@ -57,6 +57,7 @@ #include "lib/string.h" #include +#include const adata null_adata; /* adata of length 0 */ @@ -108,6 +109,11 @@ struct ea_class ea_gen_source = { .format = ea_gen_source_format, }; +struct ea_class ea_gen_nexthop = { + .name = "nexthop", + .type = T_NEXTHOP_LIST, +}; + struct ea_class ea_mpls_labels = { .name = "mpls_labels", .type = T_CLIST, @@ -124,8 +130,7 @@ const char * rta_dest_names[RTD_MAX] = { pool *rta_pool; -static slab *rta_slab_[4]; -static slab *nexthop_slab_[4]; +static slab *rta_slab; static slab *rte_src_slab; static struct idm src_ids; @@ -204,50 +209,10 @@ rt_prune_sources(void) * Multipath Next Hop */ -static inline u32 -nexthop_hash(struct nexthop *x) -{ - u32 h = 0; - for (; x; x = x->next) - { - h ^= ipa_hash(x->gw) ^ (h << 5) ^ (h >> 9); - - for (int i = 0; i < x->labels; i++) - h ^= x->label[i] ^ (h << 6) ^ (h >> 7); - } - - return h; -} - -int -nexthop__same(struct nexthop *x, struct nexthop *y) -{ - for (; x && y; x = x->next, y = y->next) - { - if (!ipa_equal(x->gw, y->gw) || (x->iface != y->iface) || - (x->flags != y->flags) || (x->weight != y->weight) || - (x->labels != y->labels)) - return 0; - - for (int i = 0; i < x->labels; i++) - if (x->label[i] != y->label[i]) - return 0; - } - - return x == y; -} - static int nexthop_compare_node(const struct nexthop *x, const struct nexthop *y) { int r; - - if (!x) - return 1; - - if (!y) - return -1; - /* Should we also compare flags ? */ r = ((int) y->weight) - ((int) x->weight); @@ -272,23 +237,16 @@ nexthop_compare_node(const struct nexthop *x, const struct nexthop *y) return ((int) x->iface->index) - ((int) y->iface->index); } -static inline struct nexthop * -nexthop_copy_node(const struct nexthop *src, linpool *lp) +static int +nexthop_compare_qsort(const void *x, const void *y) { - struct nexthop *n = lp_alloc(lp, nexthop_size(src)); - - memcpy(n, src, nexthop_size(src)); - n->next = NULL; - - return n; + return nexthop_compare_node( *(const struct nexthop **) x, *(const struct nexthop **) y ); } /** * nexthop_merge - merge nexthop lists * @x: list 1 * @y: list 2 - * @rx: reusability of list @x - * @ry: reusability of list @y * @max: max number of nexthops * @lp: linpool for allocating nexthops * @@ -305,134 +263,111 @@ nexthop_copy_node(const struct nexthop *src, linpool *lp) * resulting list is no longer needed. When reusability is not set, the * corresponding lists are not modified nor linked from the resulting list. */ -struct nexthop * -nexthop_merge(struct nexthop *x, struct nexthop *y, int rx, int ry, int max, linpool *lp) +struct nexthop_adata * +nexthop_merge(struct nexthop_adata *xin, struct nexthop_adata *yin, int max, linpool *lp) { - struct nexthop *root = NULL; - struct nexthop **n = &root; + uint outlen = ADATA_SIZE(xin->ad.length) + ADATA_SIZE(yin->ad.length); + struct nexthop_adata *out = lp_alloc(lp, outlen); + out->ad.length = outlen - sizeof (struct adata); - while ((x || y) && max--) + struct nexthop *x = &xin->nh, *y = &yin->nh, *cur = &out->nh; + int xvalid, yvalid; + + while (max--) { - int cmp = nexthop_compare_node(x, y); + xvalid = NEXTHOP_VALID(x, xin); + yvalid = NEXTHOP_VALID(y, yin); - if (cmp < 0) - { - ASSUME(x); - *n = rx ? x : nexthop_copy_node(x, lp); - x = x->next; - } - else if (cmp > 0) - { - ASSUME(y); - *n = ry ? y : nexthop_copy_node(y, lp); - y = y->next; - } - else - { - ASSUME(x && y); - *n = rx ? x : (ry ? y : nexthop_copy_node(x, lp)); - x = x->next; - y = y->next; - } - n = &((*n)->next); - } - *n = NULL; - - return root; -} - -void -nexthop_insert(struct nexthop **n, struct nexthop *x) -{ - for (; *n; n = &((*n)->next)) - { - int cmp = nexthop_compare_node(*n, x); - - if (cmp < 0) - continue; - else if (cmp > 0) + if (!xvalid && !yvalid) break; + + ASSUME(NEXTHOP_VALID(cur, out)); + + int cmp = !xvalid ? 1 : !yvalid ? -1 : nexthop_compare_node(x, y); + + if (cmp < 0) + { + ASSUME(NEXTHOP_VALID(x, xin)); + memcpy(cur, x, nexthop_size(x)); + x = NEXTHOP_NEXT(x); + } + else if (cmp > 0) + { + ASSUME(NEXTHOP_VALID(y, yin)); + memcpy(cur, y, nexthop_size(y)); + y = NEXTHOP_NEXT(y); + } else - return; + { + ASSUME(NEXTHOP_VALID(x, xin)); + memcpy(cur, x, nexthop_size(x)); + x = NEXTHOP_NEXT(x); + + ASSUME(NEXTHOP_VALID(y, yin)); + y = NEXTHOP_NEXT(y); + } + cur = NEXTHOP_NEXT(cur); } - x->next = *n; - *n = x; + out->ad.length = (void *) cur - (void *) out->ad.data; + + return out; } -struct nexthop * -nexthop_sort(struct nexthop *x) +struct nexthop_adata * +nexthop_sort(struct nexthop_adata *nhad, linpool *lp) { - struct nexthop *s = NULL; + /* Count the nexthops */ + uint cnt = 0; + NEXTHOP_WALK(nh, nhad) + cnt++; - /* Simple insert-sort */ - while (x) + if (cnt <= 1) + return nhad; + + /* Get pointers to them */ + struct nexthop **sptr = tmp_alloc(cnt * sizeof(struct nexthop *)); + + uint i = 0; + NEXTHOP_WALK(nh, nhad) + sptr[i++] = nh; + + /* Sort the pointers */ + qsort(sptr, cnt, sizeof(struct nexthop *), nexthop_compare_qsort); + + /* Allocate the output */ + struct nexthop_adata *out = (struct nexthop_adata *) lp_alloc_adata(lp, nhad->ad.length); + struct nexthop *dest = &out->nh; + + /* Deduplicate nexthops while storing them */ + for (uint i = 0; i < cnt; i++) { - struct nexthop *n = x; - x = n->next; - n->next = NULL; + if (i && !nexthop_compare_node(sptr[i], sptr[i-1])) + continue; - nexthop_insert(&s, n); + memcpy(dest, sptr[i], NEXTHOP_SIZE(sptr[i])); + dest = NEXTHOP_NEXT(dest); } - return s; + out->ad.length = (void *) dest - (void *) out->ad.data; + return out; } int -nexthop_is_sorted(struct nexthop *x) +nexthop_is_sorted(struct nexthop_adata *nhad) { - for (; x && x->next; x = x->next) - if (nexthop_compare_node(x, x->next) >= 0) + struct nexthop *prev = NULL; + NEXTHOP_WALK(nh, nhad) + { + if (prev && (nexthop_compare_node(prev, nh) >= 0)) return 0; + prev = nh; + } + return 1; } -static inline slab * -nexthop_slab(struct nexthop *nh) -{ - return nexthop_slab_[MIN(nh->labels, 3)]; -} - -static struct nexthop * -nexthop_copy(struct nexthop *o) -{ - struct nexthop *first = NULL; - struct nexthop **last = &first; - - for (; o; o = o->next) - { - struct nexthop *n = sl_allocz(nexthop_slab(o)); - n->gw = o->gw; - n->iface = o->iface; - n->next = NULL; - n->flags = o->flags; - n->weight = o->weight; - n->labels = o->labels; - for (int i=0; ilabels; i++) - n->label[i] = o->label[i]; - - *last = n; - last = &(n->next); - } - - return first; -} - -static void -nexthop_free(struct nexthop *o) -{ - struct nexthop *n; - - while (o) - { - n = o->next; - sl_free(o); - o = n; - } -} - - /* * Extended Attributes */ @@ -1122,6 +1057,23 @@ ea_show(struct cli *c, const eattr *e) cli_printf(c, -1012, "\t%s", buf); } +static void +nexthop_dump(const struct adata *ad) +{ + struct nexthop_adata *nhad = (struct nexthop_adata *) ad; + + debug(":"); + + NEXTHOP_WALK(nh, nhad) + { + if (ipa_nonzero(nh->gw)) debug(" ->%I", nh->gw); + if (nh->labels) debug(" L %d", nh->label[0]); + for (int i=1; ilabels; i++) + debug("/%d", nh->label[i]); + debug(" [%s]", nh->iface ? nh->iface->name : "???"); + } +} + /** * ea_dump - dump an extended attribute * @e: attribute to be dumped @@ -1157,6 +1109,8 @@ ea_dump(ea_list *e) debug("o"); if (a->type & EAF_EMBEDDED) debug(":%08x", a->u.data); + else if (a->id == ea_gen_nexthop.id) + nexthop_dump(a->u.ptr); else { int j, len = a->u.ptr->length; @@ -1258,7 +1212,7 @@ rta_hash(rta *a) BMIX(dest); #undef MIX - return mem_hash_value(&h) ^ nexthop_hash(&(a->nh)) ^ ea_hash(a->eattrs); + return mem_hash_value(&h) ^ ea_hash(a->eattrs); } static inline int @@ -1266,24 +1220,16 @@ rta_same(rta *x, rta *y) { return (x->dest == y->dest && x->hostentry == y->hostentry && - nexthop_same(&(x->nh), &(y->nh)) && ea_same(x->eattrs, y->eattrs)); } -static inline slab * -rta_slab(rta *a) -{ - return rta_slab_[a->nh.labels > 2 ? 3 : a->nh.labels]; -} - static rta * rta_copy(rta *o) { - rta *r = sl_alloc(rta_slab(o)); + rta *r = sl_alloc(rta_slab); memcpy(r, o, rta_size(o)); r->uc = 1; - r->nh.next = nexthop_copy(o->nh.next); if (!r->eattrs) return r; @@ -1375,8 +1321,6 @@ rta__free(rta *a) if (a->next) a->next->pprev = a->pprev; rt_unlock_hostentry(a->hostentry); - if (a->nh.next) - nexthop_free(a->nh.next); ea_free(a->eattrs); a->cached = 0; sl_free(a); @@ -1387,12 +1331,6 @@ rta_do_cow(rta *o, linpool *lp) { rta *r = lp_alloc(lp, rta_size(o)); memcpy(r, o, rta_size(o)); - for (struct nexthop **nhn = &(r->nh.next), *nho = o->nh.next; nho; nho = nho->next) - { - *nhn = lp_alloc(lp, nexthop_size(nho)); - memcpy(*nhn, nho, nexthop_size(nho)); - nhn = &((*nhn)->next); - } r->cached = 0; r->uc = 0; return r; @@ -1413,15 +1351,6 @@ rta_dump(rta *a) a->uc, rtd[a->dest], a->hash_key); if (!a->cached) debug(" !CACHED"); - if (a->dest == RTD_UNICAST) - for (struct nexthop *nh = &(a->nh); nh; nh = nh->next) - { - if (ipa_nonzero(nh->gw)) debug(" ->%I", nh->gw); - if (nh->labels) debug(" L %d", nh->label[0]); - for (int i=1; ilabels; i++) - debug("/%d", nh->label[i]); - debug(" [%s]", nh->iface ? nh->iface->name : "???"); - } if (a->eattrs) { debug(" EA: "); @@ -1471,15 +1400,7 @@ rta_init(void) { rta_pool = rp_new(&root_pool, "Attributes"); - rta_slab_[0] = sl_new(rta_pool, sizeof(rta)); - rta_slab_[1] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)); - rta_slab_[2] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)*2); - rta_slab_[3] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)*MPLS_MAX_LABEL_STACK); - - nexthop_slab_[0] = sl_new(rta_pool, sizeof(struct nexthop)); - nexthop_slab_[1] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)); - nexthop_slab_[2] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*2); - nexthop_slab_[3] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK); + rta_slab = sl_new(rta_pool, sizeof(rta)); rta_alloc_hash(); rte_src_init(); @@ -1489,6 +1410,7 @@ rta_init(void) ea_register_init(&ea_gen_igp_metric); ea_register_init(&ea_gen_from); ea_register_init(&ea_gen_source); + ea_register_init(&ea_gen_nexthop); ea_register_init(&ea_mpls_labels); } diff --git a/nest/rt-dev.c b/nest/rt-dev.c index af6506f6..9953e270 100644 --- a/nest/rt-dev.c +++ b/nest/rt-dev.c @@ -84,11 +84,16 @@ dev_ifa_notify(struct proto *P, uint flags, struct ifa *ad) rta a0 = { .dest = RTD_UNICAST, - .nh.iface = ad->iface, + }; + + struct nexthop_adata nhad = { + .nh = { .iface = ad->iface, }, + .ad = { .length = (void *) NEXTHOP_NEXT(&nhad.nh) - (void *) nhad.ad.data, }, }; ea_set_attr_u32(&a0.eattrs, &ea_gen_preference, 0, c->preference); ea_set_attr_u32(&a0.eattrs, &ea_gen_source, 0, RTS_DEVICE); + ea_set_attr_data(&a0.eattrs, &ea_gen_nexthop, 0, nhad.ad.data, nhad.ad.length); a = rta_lookup(&a0); e = rte_get_temp(a, src); diff --git a/nest/rt-show.c b/nest/rt-show.c index be4c0186..32f7aa2d 100644 --- a/nest/rt-show.c +++ b/nest/rt-show.c @@ -45,11 +45,12 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, int primary rta *a = e->attrs; int sync_error = d->kernel ? krt_get_sync_error(d->kernel, e) : 0; void (*get_route_info)(struct rte *, byte *buf); - struct nexthop *nh; + eattr *nhea = ea_find(a->eattrs, &ea_gen_nexthop); + struct nexthop_adata *nhad = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL; tm_format_time(tm, &config->tf_route, e->lastmod); ip_addr a_from = ea_get_ip(a->eattrs, &ea_gen_from, IPA_NONE); - if (ipa_nonzero(a_from) && !ipa_equal(a_from, a->nh.gw)) + if (ipa_nonzero(a_from) && (!nhad || !ipa_equal(a_from, nhad->nh.gw))) bsprintf(from, " from %I", a_from); else from[0] = 0; @@ -71,7 +72,7 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, int primary e->src->proto->name, tm, from, primary ? (sync_error ? " !" : " *") : "", info); if (a->dest == RTD_UNICAST) - for (nh = &(a->nh); nh; nh = nh->next) + NEXTHOP_WALK(nh, nhad) { char mpls[MPLS_MAX_LABEL_STACK*12 + 5], *lsp = mpls; char *onlink = (nh->flags & RNF_ONLINK) ? " onlink" : ""; @@ -85,7 +86,7 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, int primary } *lsp = '\0'; - if (a->nh.next) + if (!NEXTHOP_ONE(nhad)) bsprintf(weight, " weight %d", nh->weight + 1); if (ipa_nonzero(nh->gw)) diff --git a/nest/rt-table.c b/nest/rt-table.c index 37f17bbc..6f948ada 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -940,18 +940,11 @@ rt_notify_accepted(struct channel *c, net *net, rte *new_changed, rte *old_chang rte_free(new_free); } - -static struct nexthop * -nexthop_merge_rta(struct nexthop *nhs, rta *a, linpool *pool, int max) -{ - return nexthop_merge(nhs, &(a->nh), 1, 0, max, pool); -} - rte * rt_export_merged(struct channel *c, net *net, rte **rt_free, linpool *pool, int silent) { // struct proto *p = c->proto; - struct nexthop *nhs = NULL; + struct nexthop_adata *nhs = NULL; rte *best0, *best, *rt0, *rt, *tmp; best0 = net->routes; @@ -976,21 +969,31 @@ rt_export_merged(struct channel *c, net *net, rte **rt_free, linpool *pool, int continue; if (rte_is_reachable(rt)) - nhs = nexthop_merge_rta(nhs, rt->attrs, pool, c->merge_limit); + { + eattr *nhea = ea_find(rt->attrs->eattrs, &ea_gen_nexthop); + ASSERT_DIE(nhea); + + if (nhs) + nhs = nexthop_merge(nhs, (struct nexthop_adata *) nhea->u.ptr, c->merge_limit, pool); + else + nhs = (struct nexthop_adata *) nhea->u.ptr; + } if (tmp) rte_free(tmp); } + if (nhs) { - nhs = nexthop_merge_rta(nhs, best->attrs, pool, c->merge_limit); + eattr *nhea = ea_find(best->attrs->eattrs, &ea_gen_nexthop); + ASSERT_DIE(nhea); - if (nhs->next) - { - best = rte_cow_rta(best, pool); - nexthop_link(best->attrs, nhs); - } + nhs = nexthop_merge(nhs, (struct nexthop_adata *) nhea->u.ptr, c->merge_limit, pool); + + best = rte_cow_rta(best, pool); + ea_set_attr(&best->attrs->eattrs, + EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, &nhs->ad)); } if (best != best0) @@ -1184,7 +1187,16 @@ rte_validate(rte *e) return 0; } - if ((e->attrs->dest == RTD_UNICAST) && !nexthop_is_sorted(&(e->attrs->nh))) + eattr *nhea = ea_find(e->attrs->eattrs, &ea_gen_nexthop); + if ((!nhea) != (e->attrs->dest != RTD_UNICAST)) + { + log(L_WARN "Ignoring route %N with destination %d and %snexthop received via %s", + n->n.addr, e->attrs->dest, (nhea ? "" : "no "), e->sender->proto->name); + return 0; + } + + if ((e->attrs->dest == RTD_UNICAST) && + !nexthop_is_sorted((struct nexthop_adata *) nhea->u.ptr)) { log(L_WARN "Ignoring unsorted multipath route %N received via %s", n->n.addr, e->sender->proto->name); @@ -2395,8 +2407,7 @@ rta_apply_hostentry(rta *a, struct hostentry *he) if (a->dest != RTD_UNICAST) { /* No nexthop */ -no_nexthop: - a->nh = (struct nexthop) {}; + ea_unset_attr(&a->eattrs, 0, &ea_gen_nexthop); return; } @@ -2404,74 +2415,71 @@ no_nexthop: if (!mls_ea && he->nexthop_linkable) { /* Just link the nexthop chain, no label append happens. */ - memcpy(&(a->nh), &(he->src->nh), nexthop_size(&(he->src->nh))); + ea_copy_attr(&a->eattrs, he->src->eattrs, &ea_gen_nexthop); return; } - struct nexthop *nhp = NULL, *nhr = NULL; - int skip_nexthop = 0; - const struct adata *mls = mls_ea ? mls_ea->u.ptr : NULL; uint mls_cnt = mls ? mls->length / sizeof(u32) : 0; - for (struct nexthop *nh = &(he->src->nh); nh; nh = nh->next) + eattr *he_nh_ea = ea_find(he->src->eattrs, &ea_gen_nexthop); + struct nexthop_adata *nhad = (struct nexthop_adata *) he_nh_ea->u.ptr; + + uint total_size = OFFSETOF(struct nexthop_adata, nh); + + NEXTHOP_WALK(nh, nhad) { - if (skip_nexthop) - skip_nexthop--; - else + if (nh->labels + mls_cnt > MPLS_MAX_LABEL_STACK) { - nhr = nhp; - nhp = (nhp ? (nhp->next = lp_alloc(rte_update_pool, NEXTHOP_MAX_SIZE)) : &(a->nh)); + log(L_WARN "Sum of label stack sizes %d + %d = %d exceedes allowed maximum (%d)", + nh->labels, mls_cnt, nh->labels + mls_cnt, MPLS_MAX_LABEL_STACK); + continue; } - memset(nhp, 0, NEXTHOP_MAX_SIZE); - nhp->iface = nh->iface; - nhp->weight = nh->weight; + total_size += NEXTHOP_SIZE_CNT(nh->labels + mls_cnt); + } - if (mls) + if (total_size == OFFSETOF(struct nexthop_adata, nh)) + { + a->dest = RTD_UNREACHABLE; + log(L_WARN "No valid nexthop remaining, setting route unreachable"); + + ea_unset_attr(&a->eattrs, 0, &ea_gen_nexthop); + return; + } + + struct nexthop_adata *new = (struct nexthop_adata *) tmp_alloc_adata(total_size); + struct nexthop *dest = &new->nh; + + NEXTHOP_WALK(nh, nhad) + { + if (nh->labels + mls_cnt > MPLS_MAX_LABEL_STACK) + continue; + + memcpy(dest, nh, NEXTHOP_SIZE(nh)); + if (mls_cnt) { - nhp->labels = nh->labels + mls_cnt; - if (nhp->labels <= MPLS_MAX_LABEL_STACK) - { - memcpy(nhp->label, nh->label, nh->labels * sizeof(u32)); /* First the hostentry labels */ - memcpy(&(nhp->label[nh->labels]), mls->data, mls->length); /* Then the bottom labels */ - } - else - { - log(L_WARN "Sum of label stack sizes %d + %d = %d exceedes allowed maximum (%d)", - nh->labels, mls_cnt, nhp->labels, MPLS_MAX_LABEL_STACK); - skip_nexthop++; - continue; - } - } - else if (nh->labels) - { - nhp->labels = nh->labels; - memcpy(nhp->label, nh->label, nh->labels * sizeof(u32)); + memcpy(&(dest->label[dest->labels]), mls->data, mls->length); + dest->labels += mls_cnt; } if (ipa_nonzero(nh->gw)) - { - nhp->gw = nh->gw; /* Router nexthop */ - nhp->flags |= (nh->flags & RNF_ONLINK); - } + /* Router nexthop */ + dest->flags = (dest->flags & RNF_ONLINK); else if (!(nh->iface->flags & IF_MULTIACCESS) || (nh->iface->flags & IF_LOOPBACK)) - nhp->gw = IPA_NONE; /* PtP link - no need for nexthop */ + dest->gw = IPA_NONE; /* PtP link - no need for nexthop */ else if (ipa_nonzero(he->link)) - nhp->gw = he->link; /* Device nexthop with link-local address known */ + dest->gw = he->link; /* Device nexthop with link-local address known */ else - nhp->gw = he->addr; /* Device nexthop with link-local address unknown */ + dest->gw = he->addr; /* Device nexthop with link-local address unknown */ + + dest = NEXTHOP_NEXT(dest); } - if (skip_nexthop) - if (nhr) - nhr->next = NULL; - else - { - a->dest = RTD_UNREACHABLE; - log(L_WARN "No valid nexthop remaining, setting route unreachable"); - goto no_nexthop; - } + /* Fix final length */ + new->ad.length = (void *) dest - (void *) new->ad.data; + ea_set_attr(&a->eattrs, EA_LITERAL_DIRECT_ADATA( + &ea_gen_nexthop, 0, &new->ad)); } static inline int @@ -2485,9 +2493,14 @@ rta_next_hop_outdated(rta *a) if (!he->src) return a->dest != RTD_UNREACHABLE; + eattr *he_nh_ea = ea_find(he->src->eattrs, &ea_gen_nexthop); + eattr *a_nh_ea = ea_find(a->eattrs, &ea_gen_nexthop); + return (a->dest != he->dest) || (ea_get_int(a->eattrs, &ea_gen_igp_metric, IGP_METRIC_UNKNOWN) != he->igp_metric) || - (!he->nexthop_linkable) || !nexthop_same(&(a->nh), &(he->src->nh)); + (!he->nexthop_linkable) || + (!he_nh_ea != !a_nh_ea) || + (he_nh_ea && a_nh_ea && !adata_same(he_nh_ea->u.ptr, a_nh_ea->u.ptr)); } static inline rte * @@ -3507,7 +3520,14 @@ rt_update_hostentry(rtable *tab, struct hostentry *he) if (a->dest == RTD_UNICAST) { - for (struct nexthop *nh = &(a->nh); nh; nh = nh->next) + eattr *ea = ea_find(a->eattrs, &ea_gen_nexthop); + if (!ea) + { + log(L_WARN "No nexthop in unicast route"); + goto done; + } + + NEXTHOP_WALK(nh, (struct nexthop_adata *) ea->u.ptr) if (ipa_zero(nh->gw)) { if (if_local_addr(he->addr, nh->iface)) diff --git a/proto/babel/babel.c b/proto/babel/babel.c index a7dcee09..9a43f484 100644 --- a/proto/babel/babel.c +++ b/proto/babel/babel.c @@ -643,26 +643,14 @@ babel_announce_rte(struct babel_proto *p, struct babel_entry *e) if (r) { - struct { - ea_list l; - eattr a[6]; - } eattrs = { - .l.count = ARRAY_SIZE(eattrs.a), - .a = { - EA_LITERAL_EMBEDDED(&ea_gen_preference, 0, c->preference), - EA_LITERAL_STORE_ADATA(&ea_gen_from, 0, &r->neigh->addr, sizeof(r->neigh->addr)), - EA_LITERAL_EMBEDDED(&ea_gen_source, 0, RTS_BABEL), - EA_LITERAL_EMBEDDED(&ea_babel_metric, 0, r->metric), - EA_LITERAL_STORE_ADATA(&ea_babel_router_id, 0, &r->router_id, sizeof(r->router_id)), - EA_LITERAL_EMBEDDED(&ea_babel_seqno, 0, r->seqno), - } - }; - - rta a0 = { - .dest = RTD_UNICAST, - .nh.gw = r->next_hop, - .nh.iface = r->neigh->ifa->iface, - .eattrs = &eattrs.l, + struct nexthop_adata nhad = { + .nh = { + .gw = r->next_hop, + .iface = r->neigh->ifa->iface, + }, + .ad = { + .length = sizeof nhad - sizeof nhad.ad, + }, }; /* @@ -671,7 +659,28 @@ babel_announce_rte(struct babel_proto *p, struct babel_entry *e) * have routing work. */ if (!neigh_find(&p->p, r->next_hop, r->neigh->ifa->iface, 0)) - a0.nh.flags = RNF_ONLINK; + nhad.nh.flags = RNF_ONLINK; + + struct { + ea_list l; + eattr a[7]; + } eattrs = { + .l.count = ARRAY_SIZE(eattrs.a), + .a = { + EA_LITERAL_EMBEDDED(&ea_gen_preference, 0, c->preference), + EA_LITERAL_STORE_ADATA(&ea_gen_from, 0, &r->neigh->addr, sizeof(r->neigh->addr)), + EA_LITERAL_EMBEDDED(&ea_gen_source, 0, RTS_BABEL), + EA_LITERAL_STORE_ADATA(&ea_gen_nexthop, 0, nhad.ad.data, nhad.ad.length), + EA_LITERAL_EMBEDDED(&ea_babel_metric, 0, r->metric), + EA_LITERAL_STORE_ADATA(&ea_babel_router_id, 0, &r->router_id, sizeof(r->router_id)), + EA_LITERAL_EMBEDDED(&ea_babel_seqno, 0, r->seqno), + } + }; + + rta a0 = { + .dest = RTD_UNICAST, + .eattrs = &eattrs.l, + }; rta *a = rta_lookup(&a0); rte *rte = rte_get_temp(a, p->p.main_source); diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index f8228313..fd0a1be4 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -966,10 +966,19 @@ bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll) if (nbr->scope == SCOPE_HOST) WITHDRAW(BAD_NEXT_HOP " - address %I is local", nbr->addr); - a->dest = RTD_UNICAST; - a->nh.gw = nbr->addr; - a->nh.iface = nbr->iface; ea_set_attr_u32(&a->eattrs, &ea_gen_igp_metric, 0, c->cf->cost); + + a->dest = RTD_UNICAST; + struct nexthop_adata nhad = { + .nh = { + .gw = nbr->addr, + .iface = nbr->iface, + }, + .ad = { + .length = sizeof nhad - sizeof nhad.ad, + }, + }; + ea_set_attr_data(&a->eattrs, &ea_gen_nexthop, 0, nhad.ad.data, nhad.ad.length); } else /* GW_RECURSIVE */ { @@ -998,7 +1007,7 @@ bgp_apply_mpls_labels(struct bgp_parse_state *s, rta *a) a->dest = RTD_UNREACHABLE; a->hostentry = NULL; - a->nh = (struct nexthop) { }; + ea_unset_attr(&a->eattrs, 0, &ea_gen_nexthop); return; } @@ -1008,8 +1017,16 @@ bgp_apply_mpls_labels(struct bgp_parse_state *s, rta *a) if (s->channel->cf->gw_mode == GW_DIRECT) { - a->nh.labels = lnum; - memcpy(a->nh.label, labels, 4*lnum); + eattr *e = ea_find(a->eattrs, &ea_gen_nexthop); + struct { + struct nexthop_adata nhad; + u32 labels[MPLS_MAX_LABEL_STACK]; + } nh; + + memcpy(&nh.nhad, e->u.ptr, sizeof(struct adata) + e->u.ptr->length); + nh.nhad.nh.labels = lnum; + memcpy(nh.labels, labels, lnum * sizeof(u32)); + nh.nhad.ad.length = sizeof nh.nhad + lnum * sizeof(u32); } else /* GW_RECURSIVE */ rta_apply_hostentry(a, s->hostentry); @@ -1076,7 +1093,7 @@ bgp_use_next_hop(struct bgp_export_state *s, eattr *a) return p->neigh && (p->neigh->iface == ifa); } -static inline int +static inline struct nexthop * bgp_use_gateway(struct bgp_export_state *s) { struct bgp_proto *p = s->proto; @@ -1085,22 +1102,35 @@ bgp_use_gateway(struct bgp_export_state *s) /* Handle next hop self option - also applies to gateway */ if (c->cf->next_hop_self && bgp_match_src(s, c->cf->next_hop_self)) - return 0; + return NULL; + + /* Unreachable */ + if (ra->dest != RTD_UNICAST) + return NULL; + + eattr *nhea = ea_find(ra->eattrs, &ea_gen_nexthop); + if (!nhea) + return NULL; /* We need one valid global gateway */ - if ((ra->dest != RTD_UNICAST) || ra->nh.next || ipa_zero(ra->nh.gw) || ipa_is_link_local(ra->nh.gw)) - return 0; + struct nexthop_adata *nhad = (struct nexthop_adata *) nhea->u.ptr; + if (!NEXTHOP_ONE(nhad) || ipa_zero(nhad->nh.gw) || + ipa_is_link_local(nhad->nh.gw)) + return NULL; /* Check for non-matching AF */ - if ((ipa_is_ip4(ra->nh.gw) != bgp_channel_is_ipv4(c)) && !c->ext_next_hop) - return 0; + if ((ipa_is_ip4(nhad->nh.gw) != bgp_channel_is_ipv4(c)) && !c->ext_next_hop) + return NULL; /* Use it when exported to internal peers */ if (p->is_interior) - return 1; + return &nhad->nh; /* Use it when forwarded to single-hop BGP peer on on the same iface */ - return p->neigh && (p->neigh->iface == ra->nh.iface); + if (p->neigh && (p->neigh->iface == nhad->nh.iface)) + return &nhad->nh; + + return NULL; } static void @@ -1108,17 +1138,17 @@ bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to) { if (!a || !bgp_use_next_hop(s, a)) { - if (bgp_use_gateway(s)) + struct nexthop *nhloc; + if (nhloc = bgp_use_gateway(s)) { - rta *ra = s->route->attrs; - ip_addr nh[1] = { ra->nh.gw }; + ip_addr nh[1] = { nhloc->gw }; bgp_set_attr_data(to, BA_NEXT_HOP, 0, nh, 16); if (s->mpls) { u32 implicit_null = BGP_MPLS_NULL; - u32 *labels = ra->nh.labels ? ra->nh.label : &implicit_null; - uint lnum = ra->nh.labels ? ra->nh.labels : 1; + u32 *labels = nhloc->labels ? nhloc->label : &implicit_null; + uint lnum = nhloc->labels ? nhloc->labels : 1; bgp_set_attr_data(to, BA_MPLS_LABEL_STACK, 0, labels, lnum * 4); } } diff --git a/proto/ospf/rt.c b/proto/ospf/rt.c index 91739056..6070fd34 100644 --- a/proto/ospf/rt.c +++ b/proto/ospf/rt.c @@ -28,24 +28,30 @@ nh_is_vlink(struct nexthop *nhs) static inline int unresolved_vlink(ort *ort) { - return ort->n.nhs && nh_is_vlink(ort->n.nhs); + return ort->n.nhs && nh_is_vlink(&ort->n.nhs->nh); } -static inline struct nexthop * +static inline struct nexthop_adata * new_nexthop(struct ospf_proto *p, ip_addr gw, struct iface *iface, byte weight) { - struct nexthop *nh = lp_allocz(p->nhpool, sizeof(struct nexthop)); - nh->gw = gw; - nh->iface = iface; - nh->weight = weight; - return nh; + struct nexthop_adata *nhad = lp_alloc(p->nhpool, sizeof(struct nexthop_adata)); + *nhad = (struct nexthop_adata) { + .ad = { .length = sizeof *nhad - sizeof nhad->ad, }, + .nh = { + .gw = gw, + .iface = iface, + .weight = weight, + }, + }; + + return nhad; } /* Returns true if there are device nexthops in n */ static inline int -has_device_nexthops(const struct nexthop *n) +has_device_nexthops(struct nexthop_adata *nhad) { - for (; n; n = n->next) + NEXTHOP_WALK(n, nhad) if (ipa_zero(n->gw)) return 1; @@ -53,38 +59,22 @@ has_device_nexthops(const struct nexthop *n) } /* Replace device nexthops with nexthops to gw */ -static struct nexthop * -fix_device_nexthops(struct ospf_proto *p, const struct nexthop *n, ip_addr gw) +static struct nexthop_adata * +fix_device_nexthops(struct ospf_proto *p, struct nexthop_adata *old, ip_addr gw) { - struct nexthop *root1 = NULL; - struct nexthop *root2 = NULL; - struct nexthop **nn1 = &root1; - struct nexthop **nn2 = &root2; - if (!p->ecmp) - return new_nexthop(p, gw, n->iface, n->weight); - - /* This is a bit tricky. We cannot just copy the list and update n->gw, - because the list should stay sorted, so we create two lists, one with new - gateways and one with old ones, and then merge them. */ - - for (; n; n = n->next) { - struct nexthop *nn = new_nexthop(p, ipa_zero(n->gw) ? gw : n->gw, n->iface, n->weight); - - if (ipa_zero(n->gw)) - { - *nn1 = nn; - nn1 = &(nn->next); - } - else - { - *nn2 = nn; - nn2 = &(nn->next); - } + struct nexthop_adata *new = (struct nexthop_adata *) lp_store_adata(p->nhpool, old->ad.data, old->ad.length); + new->nh.gw = gw; + return new; } - return nexthop_merge(root1, root2, 1, 1, p->ecmp, p->nhpool); + struct nexthop_adata *tmp = (struct nexthop_adata *) tmp_copy_adata(&old->ad); + NEXTHOP_WALK(n, tmp) + if (ipa_zero(n->gw)) + n->gw = gw; + + return nexthop_sort(tmp, p->nhpool); } @@ -169,9 +159,9 @@ orta_compare(const struct ospf_proto *p, const orta *new, const orta *old) return -1; if (!new->nhs) return 1; - if (nh_is_vlink(new->nhs)) + if (nh_is_vlink(&new->nhs->nh)) return -1; - if (nh_is_vlink(old->nhs)) + if (nh_is_vlink(&old->nhs->nh)) return 1; @@ -279,11 +269,7 @@ ort_merge(struct ospf_proto *p, ort *o, const orta *new) orta *old = &o->n; if (old->nhs != new->nhs) - { - old->nhs = nexthop_merge(old->nhs, new->nhs, old->nhs_reuse, new->nhs_reuse, - p->ecmp, p->nhpool); - old->nhs_reuse = 1; - } + old->nhs = nexthop_merge(old->nhs, new->nhs, p->ecmp, p->nhpool); if (old->rid < new->rid) old->rid = new->rid; @@ -295,11 +281,7 @@ ort_merge_ext(struct ospf_proto *p, ort *o, const orta *new) orta *old = &o->n; if (old->nhs != new->nhs) - { - old->nhs = nexthop_merge(old->nhs, new->nhs, old->nhs_reuse, new->nhs_reuse, - p->ecmp, p->nhpool); - old->nhs_reuse = 1; - } + old->nhs = nexthop_merge(old->nhs, new->nhs, p->ecmp, p->nhpool); if (old->tag != new->tag) old->tag = 0; @@ -1165,7 +1147,7 @@ ospf_check_vlinks(struct ospf_proto *p) if (tmp && (tmp->color == INSPF) && ipa_nonzero(tmp->lb) && tmp->nhs) { - struct ospf_iface *nhi = ospf_iface_find(p, tmp->nhs->iface); + struct ospf_iface *nhi = ospf_iface_find(p, tmp->nhs->nh.iface); if ((ifa->state != OSPF_IS_PTP) || (ifa->vifa != nhi) @@ -1579,10 +1561,7 @@ ospf_ext_spf(struct ospf_proto *p) /* Replace device nexthops with nexthops to forwarding address from LSA */ if (has_device_nexthops(nfa.nhs)) - { nfa.nhs = fix_device_nexthops(p, nfa.nhs, rt.fwaddr); - nfa.nhs_reuse = 1; - } } if (rt.ebit) @@ -1726,10 +1705,10 @@ ospf_rt_spf(struct ospf_proto *p) static inline int -inherit_nexthops(struct nexthop *pn) +inherit_nexthops(struct nexthop_adata *pn) { /* Proper nexthops (with defined GW) or dummy vlink nexthops (without iface) */ - return pn && (ipa_nonzero(pn->gw) || !pn->iface); + return pn && (ipa_nonzero(pn->nh.gw) || !pn->nh.iface); } static inline ip_addr @@ -1744,12 +1723,12 @@ link_lsa_lladdr(struct ospf_proto *p, struct top_hash_entry *en) return ospf_is_ip4(p) ? ipa_from_ip4(ospf3_6to4(ll)) : ipa_from_ip6(ll); } -static struct nexthop * +static struct nexthop_adata * calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry *par, int pos, uint data, uint lif, uint nif) { struct ospf_proto *p = oa->po; - struct nexthop *pn = par->nhs; + struct nexthop_adata *pn = par->nhs; struct top_hash_entry *link = NULL; struct ospf_iface *ifa = NULL; ip_addr nh = IPA_NONE; @@ -1827,10 +1806,10 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en, return NULL; } - struct nexthop *nhs = new_nexthop(p, nh, ifa->iface, ifa->ecmp_weight); + struct nexthop_adata *nhs = new_nexthop(p, nh, ifa->iface, ifa->ecmp_weight); if (ifa->addr->flags & IA_HOST) - nhs->flags = RNF_ONLINK; + nhs->nh.flags = RNF_ONLINK; return nhs; } @@ -1851,7 +1830,7 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en, if (ipa_zero(en->lb)) goto bad; - return new_nexthop(p, en->lb, pn->iface, pn->weight); + return new_nexthop(p, en->lb, pn->nh.iface, pn->nh.weight); } else /* OSPFv3 */ { @@ -1859,7 +1838,7 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en, * Next-hop is taken from lladdr field of Link-LSA, en->lb_id * is computed in link_back(). */ - link = ospf_hash_find(p->gr, pn->iface->index, en->lb_id, rid, LSA_T_LINK); + link = ospf_hash_find(p->gr, pn->nh.iface->index, en->lb_id, rid, LSA_T_LINK); if (!link) return NULL; @@ -1867,7 +1846,7 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en, if (ipa_zero(nh)) return NULL; - return new_nexthop(p, nh, pn->iface, pn->weight); + return new_nexthop(p, nh, pn->nh.iface, pn->nh.weight); } } @@ -1914,7 +1893,7 @@ add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry if (!link_back(oa, en, par, lif, nif)) return; - struct nexthop *nhs = calc_next_hop(oa, en, par, pos, data, lif, nif); + struct nexthop_adata *nhs = calc_next_hop(oa, en, par, pos, data, lif, nif); if (!nhs) { log(L_WARN "%s: Cannot find next hop for LSA (Type: %04x, Id: %R, Rt: %R)", @@ -1923,7 +1902,7 @@ add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry } /* If en->dist > 0, we know that en->color == CANDIDATE and en->nhs is defined. */ - if ((dist == en->dist) && !nh_is_vlink(en->nhs)) + if ((dist == en->dist) && !nh_is_vlink(&en->nhs->nh)) { /* * For multipath, we should merge nexthops. We merge regular nexthops only. @@ -1947,13 +1926,11 @@ add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry */ /* Keep old ones */ - if (!p->ecmp || nh_is_vlink(nhs) || (nhs == en->nhs)) + if (!p->ecmp || nh_is_vlink(&nhs->nh) || (nhs == en->nhs)) return; /* Merge old and new */ - int new_reuse = (par->nhs != nhs); - en->nhs = nexthop_merge(en->nhs, nhs, en->nhs_reuse, new_reuse, p->ecmp, p->nhpool); - en->nhs_reuse = 1; + en->nhs = nexthop_merge(en->nhs, nhs, p->ecmp, p->nhpool); return; } @@ -1967,7 +1944,6 @@ add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry en->nhs = nhs; en->dist = dist; en->color = CANDIDATE; - en->nhs_reuse = (par->nhs != nhs); prev = NULL; @@ -2008,10 +1984,23 @@ ort_changed(ort *nf, rta *nr) if (!or || (nf->n.metric1 != nf->old_metric1) || (nf->n.metric2 != nf->old_metric2) || (nf->n.tag != nf->old_tag) || (nf->n.rid != nf->old_rid) || - (nr->dest != or->dest) || - !nexthop_same(&(nr->nh), &(or->nh))) + (nr->dest != or->dest)) return 1; + eattr *nhea_n = ea_find(nr->eattrs, &ea_gen_nexthop); + eattr *nhea_o = ea_find(or->eattrs, &ea_gen_nexthop); + if (!nhea_n != !nhea_o) + return 1; + + if (nhea_n && nhea_o) + { + struct nexthop_adata *nhad_n = (struct nexthop_adata *) nhea_n->u.ptr; + struct nexthop_adata *nhad_o = (struct nexthop_adata *) nhea_o->u.ptr; + + if (!nexthop_same(nhad_n, nhad_o)) + return 1; + } + if ( ea_get_int(nr->eattrs, &ea_gen_source, 0) != ea_get_int(or->eattrs, &ea_gen_source, 0)) return 1; @@ -2038,10 +2027,9 @@ again1: FIB_ITERATE_START(fib, &fit, ort, nf) { /* Sanity check of next-hop addresses, failure should not happen */ - if (nf->n.type) + if (nf->n.type && nf->n.nhs) { - struct nexthop *nh; - for (nh = nf->n.nhs; nh; nh = nh->next) + NEXTHOP_WALK(nh, nf->n.nhs) if (ipa_nonzero(nh->gw)) { neighbor *nbr = neigh_find(&p->p, nh->gw, nh->iface, @@ -2062,9 +2050,24 @@ again1: { rta a0 = { .dest = RTD_UNICAST, - .nh = *(nf->n.nhs), }; + struct { + ea_list l; + eattr a[7]; + } eattrs; + + eattrs.l = (ea_list) {}; + + eattrs.a[eattrs.l.count++] = + EA_LITERAL_EMBEDDED(&ea_gen_preference, 0, p->p.main_channel->preference); + + eattrs.a[eattrs.l.count++] = + EA_LITERAL_EMBEDDED(&ea_gen_source, 0, nf->n.type); + + eattrs.a[eattrs.l.count++] = + EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, &nf->n.nhs->ad); + if (reload || ort_changed(nf, &a0)) { nf->old_metric1 = nf->n.metric1; @@ -2072,19 +2075,6 @@ again1: nf->old_tag = nf->n.tag; nf->old_rid = nf->n.rid; - struct { - ea_list l; - eattr a[6]; - } eattrs; - - eattrs.l = (ea_list) {}; - - eattrs.a[eattrs.l.count++] = - EA_LITERAL_EMBEDDED(&ea_gen_preference, 0, p->p.main_channel->preference); - - eattrs.a[eattrs.l.count++] = - EA_LITERAL_EMBEDDED(&ea_gen_source, 0, nf->n.type); - eattrs.a[eattrs.l.count++] = EA_LITERAL_EMBEDDED(&ea_ospf_metric1, 0, nf->n.metric1); diff --git a/proto/ospf/rt.h b/proto/ospf/rt.h index 094e125b..180216b7 100644 --- a/proto/ospf/rt.h +++ b/proto/ospf/rt.h @@ -18,8 +18,6 @@ typedef struct orta { u8 type; /* RTS_OSPF_* */ - u8 nhs_reuse; /* Whether nhs nodes can be reused during merging. - See a note in rt.c:add_cand() */ u32 options; /* * For ORT_ROUTER routes, options field are router-LSA style @@ -53,7 +51,7 @@ typedef struct orta struct ospf_area *oa; struct ospf_area *voa; /* Used when route is replaced in ospf_rt_sum_tr(), NULL otherwise */ - struct nexthop *nhs; /* Next hops computed during SPF */ + struct nexthop_adata *nhs; /* Next hops computed during SPF */ struct top_hash_entry *en; /* LSA responsible for this orta */ } orta; diff --git a/proto/ospf/topology.c b/proto/ospf/topology.c index db423dc8..ca4620cc 100644 --- a/proto/ospf/topology.c +++ b/proto/ospf/topology.c @@ -1366,8 +1366,20 @@ ospf_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte uint tag = ea_get_int(a->eattrs, &ea_ospf_tag, 0); ip_addr fwd = IPA_NONE; - if ((a->dest == RTD_UNICAST) && use_gw_for_fwaddr(p, a->nh.gw, a->nh.iface)) - fwd = a->nh.gw; + if (a->dest == RTD_UNICAST) + { + eattr *nhea = ea_find(a->eattrs, &ea_gen_nexthop); + if (!nhea) + { + log(L_ERR "%s: Unicast route without nexthop for %N", + p->p.name, n->n.addr); + return; + } + + struct nexthop_adata *nhad = (struct nexthop_adata *) nhea->u.ptr; + if (use_gw_for_fwaddr(p, nhad->nh.gw, nhad->nh.iface)) + fwd = nhad->nh.gw; + } /* NSSA-LSA with P-bit set must have non-zero forwarding address */ if (oa && ipa_zero(fwd)) diff --git a/proto/ospf/topology.h b/proto/ospf/topology.h index 535d1f1b..fe102f77 100644 --- a/proto/ospf/topology.h +++ b/proto/ospf/topology.h @@ -28,7 +28,7 @@ struct top_hash_entry u16 next_lsa_opts; /* For postponed LSA origination */ btime inst_time; /* Time of installation into DB */ struct ort *nf; /* Reference fibnode for sum and ext LSAs, NULL for otherwise */ - struct nexthop *nhs; /* Computed nexthops - valid only in ospf_rt_spf() */ + struct nexthop_adata *nhs; /* Computed nexthops - valid only in ospf_rt_spf() */ ip_addr lb; /* In OSPFv2, link back address. In OSPFv3, any global address in the area useful for vlinks */ u32 lb_id; /* Interface ID of link back iface (for bcast or NBMA networks) */ u32 dist; /* Distance from the root */ @@ -39,8 +39,6 @@ struct top_hash_entry #define CANDIDATE 1 #define INSPF 2 u8 mode; /* LSA generated during RT calculation (LSA_RTCALC or LSA_STALE)*/ - u8 nhs_reuse; /* Whether nhs nodes can be reused during merging. - See a note in rt.c:add_cand() */ }; diff --git a/proto/perf/perf.c b/proto/perf/perf.c index 4329556c..8642e0a1 100644 --- a/proto/perf/perf.c +++ b/proto/perf/perf.c @@ -144,13 +144,19 @@ perf_loop(void *data) if (!p->attrs_per_rte || !(i % p->attrs_per_rte)) { struct rta a0 = { .dest = RTD_UNICAST, + }; + + ea_set_attr_u32(&a0.eattrs, &ea_gen_preference, 0, p->p.main_channel->preference); + ea_set_attr_u32(&a0.eattrs, &ea_gen_source, 0, RTS_PERF); + + struct nexthop_adata nhad = { .nh.iface = p->ifa->iface, .nh.gw = gw, .nh.weight = 1, }; - ea_set_attr_u32(&a0.eattrs, &ea_gen_preference, 0, p->p.main_channel->preference); - ea_set_attr_u32(&a0.eattrs, &ea_gen_source, 0, RTS_PERF); + ea_set_attr_data(&a0.eattrs, &ea_gen_nexthop, 0, + &nhad.ad.data, sizeof nhad - sizeof nhad.ad); p->data[i].a = rta_lookup(&a0); } diff --git a/proto/rip/rip.c b/proto/rip/rip.c index f7f34c27..425a411c 100644 --- a/proto/rip/rip.c +++ b/proto/rip/rip.c @@ -169,38 +169,58 @@ rip_announce_rte(struct rip_proto *p, struct rip_entry *en) a0.eattrs = &ea_block.l; u16 rt_tag = rt->tag; + struct iface *rt_from = NULL; if (p->ecmp) { /* ECMP route */ - struct nexthop *nhs = NULL; int num = 0; + for (rt = en->routes; rt && (num < p->ecmp); rt = rt->next) + if (rip_valid_rte(rt)) + num++; + + struct nexthop_adata *nhad = (struct nexthop_adata *) tmp_alloc_adata((num+1) * sizeof(struct nexthop)); + struct nexthop *nh = &nhad->nh; + for (rt = en->routes; rt && (num < p->ecmp); rt = rt->next) { if (!rip_valid_rte(rt)) - continue; + continue; - struct nexthop *nh = allocz(sizeof(struct nexthop)); + *nh = (struct nexthop) { + .gw = rt->next_hop, + .iface = rt->from->ifa->iface, + .weight = rt->from->ifa->cf->ecmp_weight, + }; - nh->gw = rt->next_hop; - nh->iface = rt->from->ifa->iface; - nh->weight = rt->from->ifa->cf->ecmp_weight; + if (!rt_from) + rt_from = rt->from->ifa->iface; - nexthop_insert(&nhs, nh); - num++; + nh = NEXTHOP_NEXT(nh); if (rt->tag != rt_tag) rt_tag = 0; } - a0.nh = *nhs; + nhad->ad.length = ((void *) nh - (void *) nhad->ad.data); + + ea_set_attr(&a0.eattrs, + EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, + &(nexthop_sort(nhad, tmp_linpool)->ad))); } else { /* Unipath route */ - a0.nh.gw = rt->next_hop; - a0.nh.iface = rt->from->ifa->iface; + rt_from = rt->from->ifa->iface; + + struct nexthop_adata nhad = { + .nh.gw = rt->next_hop, + .nh.iface = rt->from->ifa->iface, + }; + + ea_set_attr_data(&a0.eattrs, &ea_gen_nexthop, 0, + &nhad.ad.data, sizeof nhad - sizeof nhad.ad); ea_set_attr_data(&a0.eattrs, &ea_gen_from, 0, &rt->from->nbr->addr, sizeof(ip_addr)); } @@ -208,7 +228,7 @@ rip_announce_rte(struct rip_proto *p, struct rip_entry *en) struct rip_iface_adata riad = { .ad = { .length = sizeof(struct rip_iface_adata) - sizeof(struct adata) }, - .iface = a0.nh.iface, + .iface = rt_from, }; ea_set_attr(&a0.eattrs, EA_LITERAL_DIRECT_ADATA(&ea_rip_from, 0, &riad.ad)); @@ -362,8 +382,14 @@ rip_rt_notify(struct proto *P, struct channel *ch UNUSED, struct network *net, s en->metric = rt_metric; en->tag = rt_tag; en->from = (new->src->proto == P) ? rt_from : NULL; - en->iface = new->attrs->nh.iface; - en->next_hop = new->attrs->nh.gw; + + eattr *nhea = ea_find(new->attrs->eattrs, &ea_gen_nexthop); + if (nhea) + { + struct nexthop_adata *nhad = (struct nexthop_adata *) nhea->u.ptr; + en->iface = nhad->nh.iface; + en->next_hop = nhad->nh.gw; + } } else { diff --git a/proto/static/static.c b/proto/static/static.c index e792a148..2e4a46a6 100644 --- a/proto/static/static.c +++ b/proto/static/static.c @@ -61,32 +61,40 @@ static_announce_rte(struct static_proto *p, struct static_route *r) if (r->dest == RTD_UNICAST) { - struct static_route *r2; - struct nexthop *nhs = NULL; + uint sz = 0; + for (struct static_route *r2 = r; r2; r2 = r2->mp_next) + if (r2->active) + sz += NEXTHOP_SIZE_CNT(r2->mls ? r2->mls->length / sizeof(u32) : 0); - for (r2 = r; r2; r2 = r2->mp_next) + if (!sz) + goto withdraw; + + struct nexthop_adata *nhad = allocz(sz + sizeof *nhad); + struct nexthop *nh = &nhad->nh; + + for (struct static_route *r2 = r; r2; r2 = r2->mp_next) { if (!r2->active) continue; - struct nexthop *nh = allocz(NEXTHOP_MAX_SIZE); - nh->gw = r2->via; - nh->iface = r2->neigh->iface; - nh->flags = r2->onlink ? RNF_ONLINK : 0; - nh->weight = r2->weight; + *nh = (struct nexthop) { + .gw = r2->via, + .iface = r2->neigh->iface, + .flags = r2->onlink ? RNF_ONLINK : 0, + .weight = r2->weight, + }; + if (r2->mls) { nh->labels = r2->mls->length / sizeof(u32); memcpy(nh->label, r2->mls->data, r2->mls->length); } - nexthop_insert(&nhs, nh); + nh = NEXTHOP_NEXT(nh); } - if (!nhs) - goto withdraw; - - nexthop_link(a, nhs); + ea_set_attr_data(&a->eattrs, &ea_gen_nexthop, 0, + nhad->ad.data, (void *) nh - (void *) nhad->ad.data); } if (r->dest == RTDX_RECURSIVE) diff --git a/sysdep/linux/netlink.c b/sysdep/linux/netlink.c index e0219ce0..fdfd4885 100644 --- a/sysdep/linux/netlink.c +++ b/sysdep/linux/netlink.c @@ -827,12 +827,12 @@ nl_add_nexthop(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af UNUS } static void -nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af, ea_list *eattrs) +nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop_adata *nhad, int af, ea_list *eattrs) { struct rtattr *a = nl_open_attr(h, bufsize, RTA_MULTIPATH); eattr *flow = ea_find(eattrs, &ea_krt_realm); - for (; nh; nh = nh->next) + NEXTHOP_WALK(nh, nhad) { struct rtnexthop *rtnh = nl_open_nexthop(h, bufsize); @@ -856,31 +856,44 @@ nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af, e nl_close_attr(h, a); } -static struct nexthop * +static struct nexthop_adata * nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, const net_addr *n, struct rtattr *ra, int af, int krt_src) { struct rtattr *a[BIRD_RTA_MAX]; - struct rtnexthop *nh = RTA_DATA(ra); - struct nexthop *rv, *first, **last; - unsigned len = RTA_PAYLOAD(ra); + struct rtnexthop *nh, *orig_nh = RTA_DATA(ra); + unsigned len, orig_len = RTA_PAYLOAD(ra); + uint cnt = 0; - first = NULL; - last = &first; - - while (len) + /* First count the nexthops */ + for (len = orig_len, nh = orig_nh; len; len -= NLMSG_ALIGN(nh->rtnh_len), nh = RTNH_NEXT(nh)) { /* Use RTNH_OK(nh,len) ?? */ if ((len < sizeof(*nh)) || (len < nh->rtnh_len)) goto err; if ((nh->rtnh_flags & RTNH_F_DEAD) && (krt_src != KRT_SRC_BIRD)) - goto next; + ; + else + cnt++; + } - *last = rv = lp_allocz(s->pool, NEXTHOP_MAX_SIZE); - last = &(rv->next); + struct nexthop_adata *nhad = lp_allocz(s->pool, cnt * NEXTHOP_MAX_SIZE + sizeof *nhad); + struct nexthop *rv = &nhad->nh; + + for (len = orig_len, nh = orig_nh; len; len -= NLMSG_ALIGN(nh->rtnh_len), nh = RTNH_NEXT(nh)) + { + /* Use RTNH_OK(nh,len) ?? */ + if ((len < sizeof(*nh)) || (len < nh->rtnh_len)) + goto err; + + if ((nh->rtnh_flags & RTNH_F_DEAD) && (krt_src != KRT_SRC_BIRD)) + continue; + + *rv = (struct nexthop) { + .weight = nh->rtnh_hops, + .iface = if_find_by_index(nh->rtnh_ifindex), + }; - rv->weight = nh->rtnh_hops; - rv->iface = if_find_by_index(nh->rtnh_ifindex); if (!rv->iface) { log(L_ERR "KRT: Received route %N with unknown ifindex %u", n, nh->rtnh_ifindex); @@ -959,16 +972,14 @@ nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, const net_addr } #endif - next: - len -= NLMSG_ALIGN(nh->rtnh_len); - nh = RTNH_NEXT(nh); + rv = NEXTHOP_NEXT(rv); } - /* Ensure nexthops are sorted to satisfy nest invariant */ - if (!nexthop_is_sorted(first)) - first = nexthop_sort(first); + /* Store final length */ + nhad->ad.length = (void *) rv - (void *) nhad->ad.data; - return first; + /* Ensure nexthops are sorted to satisfy nest invariant */ + return nexthop_is_sorted(nhad) ? nhad : nexthop_sort(nhad, s->pool); err: log(L_ERR "KRT: Received strange multipath route %N", n); @@ -1412,22 +1423,23 @@ krt_capable(rte *e) } static inline int -nh_bufsize(struct nexthop *nh) +nh_bufsize(struct nexthop_adata *nhad) { int rv = 0; - for (; nh != NULL; nh = nh->next) + NEXTHOP_WALK(nh, nhad) rv += RTNH_LENGTH(RTA_LENGTH(sizeof(ip_addr))); return rv; } static int -nl_send_route(struct krt_proto *p, rte *e, int op, int dest, struct nexthop *nh) +nl_send_route(struct krt_proto *p, rte *e, int op, int dest, struct nexthop_adata *nh) { eattr *ea; net *net = e->net; rta *a = e->attrs; ea_list *eattrs = a->eattrs; - int bufsize = 128 + KRT_METRICS_MAX*8 + nh_bufsize(&(a->nh)); + + int bufsize = 128 + KRT_METRICS_MAX*8 + (nh ? nh_bufsize(nh) : 0); u32 priority = 0; struct { @@ -1511,7 +1523,7 @@ nl_send_route(struct krt_proto *p, rte *e, int op, int dest, struct nexthop *nh) else if (ea = ea_find(eattrs, &ea_krt_scope)) r->r.rtm_scope = ea->u.data; else - r->r.rtm_scope = (dest == RTD_UNICAST && ipa_zero(nh->gw)) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE; + r->r.rtm_scope = (dest == RTD_UNICAST && ipa_zero(nh->nh.gw)) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE; if (ea = ea_find(eattrs, &ea_krt_prefsrc)) nl_add_attr_ipa(&r->h, rsize, RTA_PREFSRC, *(ip_addr *)ea->u.ptr->data); @@ -1540,14 +1552,14 @@ dest: { case RTD_UNICAST: r->r.rtm_type = RTN_UNICAST; - if (nh->next && !krt_ecmp6(p)) + if (!NEXTHOP_ONE(nh) && !krt_ecmp6(p)) nl_add_multipath(&r->h, rsize, nh, p->af, eattrs); else { - nl_add_attr_u32(&r->h, rsize, RTA_OIF, nh->iface->index); - nl_add_nexthop(&r->h, rsize, nh, p->af); + nl_add_attr_u32(&r->h, rsize, RTA_OIF, nh->nh.iface->index); + nl_add_nexthop(&r->h, rsize, &nh->nh, p->af); - if (nh->flags & RNF_ONLINK) + if (nh->nh.flags & RNF_ONLINK) r->r.rtm_flags |= RTNH_F_ONLINK; } break; @@ -1576,21 +1588,35 @@ nl_add_rte(struct krt_proto *p, rte *e) rta *a = e->attrs; int err = 0; - if (krt_ecmp6(p) && a->nh.next) + eattr *nhea = ea_find(a->eattrs, &ea_gen_nexthop); + struct nexthop_adata *nhad = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL; + + if (krt_ecmp6(p) && nhad && !NEXTHOP_ONE(nhad)) { - struct nexthop *nh = &(a->nh); + uint cnt = 0; + NEXTHOP_WALK(nh, nhad) + { + struct { + struct nexthop_adata nhad; + u32 labels[MPLS_MAX_LABEL_STACK]; + } nhx; + memcpy(&nhx.nhad.nh, nh, NEXTHOP_SIZE(nh)); + nhx.nhad.ad.length = (void *) NEXTHOP_NEXT(&nhx.nhad.nh) - (void *) nhx.nhad.ad.data; - err = nl_send_route(p, e, NL_OP_ADD, RTD_UNICAST, nh); - if (err < 0) - return err; - - for (nh = nh->next; nh; nh = nh->next) - err += nl_send_route(p, e, NL_OP_APPEND, RTD_UNICAST, nh); + if (!cnt++) + { + err = nl_send_route(p, e, NL_OP_ADD, RTD_UNICAST, &nhx.nhad); + if (err < 0) + return err; + } + else + err += nl_send_route(p, e, NL_OP_APPEND, RTD_UNICAST, &nhx.nhad); + } return err; } - return nl_send_route(p, e, NL_OP_ADD, a->dest, &(a->nh)); + return nl_send_route(p, e, NL_OP_ADD, a->dest, nhad); } static inline int @@ -1610,7 +1636,9 @@ static inline int nl_replace_rte(struct krt_proto *p, rte *e) { rta *a = e->attrs; - return nl_send_route(p, e, NL_OP_REPLACE, a->dest, &(a->nh)); + eattr *nhea = ea_find(a->eattrs, &ea_gen_nexthop); + struct nexthop_adata *nhad = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL; + return nl_send_route(p, e, NL_OP_REPLACE, a->dest, nhad); } @@ -1861,6 +1889,15 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) else s->rta_flow = 0; + union { + struct { + struct adata ad; + struct nexthop nh; + u32 labels[MPLS_MAX_LABEL_STACK]; + }; + struct nexthop_adata nhad; + } nhad = {}; + switch (i->rtm_type) { case RTN_UNICAST: @@ -1868,49 +1905,50 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) if (a[RTA_MULTIPATH]) { - struct nexthop *nh = nl_parse_multipath(s, p, n, a[RTA_MULTIPATH], i->rtm_family, krt_src); + struct nexthop_adata *nh = nl_parse_multipath(s, p, n, a[RTA_MULTIPATH], i->rtm_family, krt_src); if (!nh) SKIP("strange RTA_MULTIPATH\n"); - nexthop_link(ra, nh); + ea_set_attr(&ra->eattrs, EA_LITERAL_DIRECT_ADATA( + &ea_gen_nexthop, 0, &nh->ad)); break; } if ((i->rtm_flags & RTNH_F_DEAD) && (krt_src != KRT_SRC_BIRD)) SKIP("ignore RTNH_F_DEAD\n"); - ra->nh.iface = if_find_by_index(oif); - if (!ra->nh.iface) + nhad.nh.iface = if_find_by_index(oif); + if (!nhad.nh.iface) { log(L_ERR "KRT: Received route %N with unknown ifindex %u", net->n.addr, oif); return; } if (a[RTA_GATEWAY]) - ra->nh.gw = rta_get_ipa(a[RTA_GATEWAY]); + nhad.nh.gw = rta_get_ipa(a[RTA_GATEWAY]); #ifdef HAVE_MPLS_KERNEL if (a[RTA_VIA]) - ra->nh.gw = rta_get_via(a[RTA_VIA]); + nhad.nh.gw = rta_get_via(a[RTA_VIA]); #endif - if (ipa_nonzero(ra->nh.gw)) + if (ipa_nonzero(nhad.nh.gw)) { /* Silently skip strange 6to4 routes */ const net_addr_ip6 sit = NET_ADDR_IP6(IP6_NONE, 96); - if ((i->rtm_family == AF_INET6) && ipa_in_netX(ra->nh.gw, (net_addr *) &sit)) + if ((i->rtm_family == AF_INET6) && ipa_in_netX(nhad.nh.gw, (net_addr *) &sit)) return; if (i->rtm_flags & RTNH_F_ONLINK) - ra->nh.flags |= RNF_ONLINK; + nhad.nh.flags |= RNF_ONLINK; neighbor *nbr; - nbr = neigh_find(&p->p, ra->nh.gw, ra->nh.iface, - (ra->nh.flags & RNF_ONLINK) ? NEF_ONLINK : 0); + nbr = neigh_find(&p->p, nhad.nh.gw, nhad.nh.iface, + (nhad.nh.flags & RNF_ONLINK) ? NEF_ONLINK : 0); if (!nbr || (nbr->scope == SCOPE_HOST)) { log(L_ERR "KRT: Received route %N with strange next-hop %I", net->n.addr, - ra->nh.gw); + nhad.nh.gw); return; } } @@ -1932,10 +1970,10 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) } #ifdef HAVE_MPLS_KERNEL - if ((i->rtm_family == AF_MPLS) && a[RTA_NEWDST] && !ra->nh.next) - ra->nh.labels = rta_get_mpls(a[RTA_NEWDST], ra->nh.label); + if ((i->rtm_family == AF_MPLS) && a[RTA_NEWDST] && !a[RTA_MULTIPATH]) + nhad.nh.labels = rta_get_mpls(a[RTA_NEWDST], nhad.nh.label); - if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE] && !ra->nh.next) + if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE] && !a[RTA_MULTIPATH]) { switch (rta_get_u16(a[RTA_ENCAP_TYPE])) { @@ -1944,7 +1982,7 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) struct rtattr *enca[BIRD_RTA_MAX]; nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]); nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca)); - ra->nh.labels = rta_get_mpls(enca[RTA_DST], ra->nh.label); + nhad.nh.labels = rta_get_mpls(enca[RTA_DST], nhad.nh.label); break; } default: @@ -1954,6 +1992,9 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) } #endif + /* Finalize the nexthop */ + nhad.ad.length = (void *) NEXTHOP_NEXT(&nhad.nh) - (void *) nhad.ad.data; + if (i->rtm_scope != def_scope) ea_set_attr(&ra->eattrs, EA_LITERAL_EMBEDDED(&ea_krt_scope, 0, i->rtm_scope)); @@ -1999,6 +2040,10 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) /* Store the new route */ s->net = net; s->attrs = ra; + + ea_set_attr_data(&ra->eattrs, &ea_gen_nexthop, 0, + nhad.ad.data, nhad.ad.length); + s->proto = p; s->new = new; s->krt_src = krt_src; @@ -2009,20 +2054,18 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) else { /* Merge next hops with the stored route */ - rta *oa = s->attrs; + eattr *nhea = ea_find(s->attrs->eattrs, &ea_gen_nexthop); + struct nexthop_adata *nhad_old = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL; - struct nexthop *nhs = &oa->nh; - nexthop_insert(&nhs, &ra->nh); - - /* Perhaps new nexthop is inserted at the first position */ - if (nhs == &ra->nh) - { - /* Swap rtas */ - s->attrs = ra; - - /* Keep old eattrs */ - ra->eattrs = oa->eattrs; - } + if (nhad_old) + ea_set_attr(&s->attrs->eattrs, + EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, + &(nexthop_merge(nhad_old, &nhad.nhad, + KRT_CF->merge_paths, s->pool)->ad) + )); + else + ea_set_attr_data(&s->attrs->eattrs, &ea_gen_nexthop, 0, + nhad.ad.data, nhad.ad.length); } } diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c index b8e7670e..89e9e97e 100644 --- a/sysdep/unix/krt.c +++ b/sysdep/unix/krt.c @@ -611,10 +611,14 @@ krt_same_dest(rte *k, rte *e) if (ka->dest != ea->dest) return 0; - if (ka->dest == RTD_UNICAST) - return nexthop_same(&(ka->nh), &(ea->nh)); + if (ka->dest != RTD_UNICAST) + return 1; - return 1; + eattr *nhea_k = ea_find(ka->eattrs, &ea_gen_nexthop); + eattr *nhea_e = ea_find(ea->eattrs, &ea_gen_nexthop); + + ASSUME(nhea_k && nhea_e); + return adata_same(nhea_k->u.ptr, nhea_e->u.ptr); } /*