diff --git a/lib/ip.h b/lib/ip.h index 8f975aba..20e7a336 100644 --- a/lib/ip.h +++ b/lib/ip.h @@ -362,7 +362,7 @@ static inline ip6_addr ip6_hton(ip6_addr a) static inline ip6_addr ip6_ntoh(ip6_addr a) { return _MI6(ntohl(_I0(a)), ntohl(_I1(a)), ntohl(_I2(a)), ntohl(_I3(a))); } -#define MPLS_MAX_LABEL_STACK 8 +#define MPLS_MAX_LABEL_STACK 16 static inline int mpls_get(const char *buf, int buflen, u32 *stack) diff --git a/lib/route.h b/lib/route.h index 1ade1a81..613df0c3 100644 --- a/lib/route.h +++ b/lib/route.h @@ -89,7 +89,6 @@ typedef struct rta { u32 uc; /* Use count */ u32 hash_key; /* Hash over important fields */ struct ea_list *eattrs; /* Extended Attribute chain */ - struct hostentry *hostentry; /* Hostentry for recursive next-hops */ u16 cached:1; /* Are attributes cached? */ u16 dest:4; /* Route destination type (RTD_...) */ } rta; @@ -174,6 +173,8 @@ struct ea_class { uint readonly:1; /* This attribute can't be changed by filters */ \ uint conf:1; /* Requested by config */ \ void (*format)(const eattr *ea, byte *buf, uint size); \ + void (*stored)(const eattr *ea); /* When stored into global hash */ \ + void (*freed)(const eattr *ea); /* When released from global hash */ \ EA_CLASS_INSIDE; }; @@ -332,10 +333,6 @@ extern struct ea_class ea_gen_source; static inline u32 rt_get_source_attr(const rte *rt) { return ea_get_int(rt->attrs->eattrs, &ea_gen_source, 0); } -/* MPLS labels: Use with a recursive nexthop specification - * to add additional labels to the resolved nexthop */ -extern struct ea_class ea_mpls_labels; - /* Next hop: For now, stored as adata */ extern struct ea_class ea_gen_nexthop; diff --git a/lib/type.h b/lib/type.h index 6da86c77..65a032ec 100644 --- a/lib/type.h +++ b/lib/type.h @@ -53,6 +53,7 @@ enum btype { T_OPAQUE = 0x02, /* Opaque byte string (not filterable) */ T_IFACE = 0x0c, /* Pointer to an interface (inside adata) */ T_NEXTHOP_LIST = 0x2c, /* The whole nexthop block */ + T_HOSTENTRY = 0x2e, /* Hostentry with possible MPLS labels */ /* Types shared with eattrs */ T_INT = 0x01, /* 32-bit unsigned integer number */ diff --git a/nest/rt-attr.c b/nest/rt-attr.c index bd7ca425..cf3ab659 100644 --- a/nest/rt-attr.c +++ b/nest/rt-attr.c @@ -114,10 +114,48 @@ struct ea_class ea_gen_nexthop = { .type = T_NEXTHOP_LIST, }; -struct ea_class ea_mpls_labels = { - .name = "mpls_labels", - .type = T_CLIST, +/* + * ea_set_hostentry() acquires hostentry from hostcache. + * New hostentry has zero use count. Cached rta locks its + * hostentry (increases its use count), uncached rta does not lock it. + * Hostentry with zero use count is removed asynchronously + * during host cache update, therefore it is safe to hold + * such hostentry temporarily as long as you hold the table lock. + * + * There is no need to hold a lock for hostentry->dep table, because that table + * contains routes responsible for that hostentry, and therefore is non-empty if + * given hostentry has non-zero use count. If the hostentry has zero use count, + * the entry is removed before dep is referenced. + * + * The protocol responsible for routes with recursive next hops should hold a + * lock for a 'source' table governing that routes (argument tab), + * because its routes reference hostentries related to the governing table. + * When all such routes are + * removed, rtas are immediately removed achieving zero uc. Then the 'source' + * table lock could be immediately released, although hostentries may still + * exist - they will be freed together with the 'source' table. + */ + + static void +ea_gen_hostentry_stored(const eattr *ea) +{ + struct hostentry_adata *had = (struct hostentry_adata *) ea->u.ptr; + had->he->uc++; +} + +static void +ea_gen_hostentry_freed(const eattr *ea) +{ + struct hostentry_adata *had = (struct hostentry_adata *) ea->u.ptr; + had->he->uc--; +} + +struct ea_class ea_gen_hostentry = { + .name = "hostentry", + .type = T_HOSTENTRY, .readonly = 1, + .stored = ea_gen_hostentry_stored, + .freed = ea_gen_hostentry_freed, }; const char * rta_dest_names[RTD_MAX] = { @@ -876,6 +914,8 @@ ea_list_ref(ea_list *l) struct ea_class *cl = ea_class_global[a->id]; ASSERT_DIE(cl && cl->uc); + + CALL(cl->stored, a); cl->uc++; } } @@ -890,6 +930,8 @@ ea_list_unref(ea_list *l) struct ea_class *cl = ea_class_global[a->id]; ASSERT_DIE(cl && cl->uc); + + CALL(cl->freed, a); if (!--cl->uc) ea_class_free(cl); } @@ -1206,9 +1248,7 @@ rta_hash(rta *a) { u64 h; mem_hash_init(&h); -#define MIX(f) mem_hash_mix(&h, &(a->f), sizeof(a->f)); #define BMIX(f) mem_hash_mix_num(&h, a->f); - MIX(hostentry); BMIX(dest); #undef MIX @@ -1219,7 +1259,6 @@ static inline int rta_same(rta *x, rta *y) { return (x->dest == y->dest && - x->hostentry == y->hostentry && ea_same(x->eattrs, y->eattrs)); } @@ -1303,7 +1342,6 @@ rta_lookup(rta *o) r = rta_copy(o); r->hash_key = h; r->cached = 1; - rt_lock_hostentry(r->hostentry); rta_insert(r); if (++rta_cache_count > rta_cache_limit) @@ -1320,7 +1358,6 @@ rta__free(rta *a) *a->pprev = a->next; if (a->next) a->next->pprev = a->pprev; - rt_unlock_hostentry(a->hostentry); ea_free(a->eattrs); a->cached = 0; sl_free(a); @@ -1411,8 +1448,7 @@ rta_init(void) ea_register_init(&ea_gen_from); ea_register_init(&ea_gen_source); ea_register_init(&ea_gen_nexthop); - - ea_register_init(&ea_mpls_labels); + ea_register_init(&ea_gen_hostentry); } /* diff --git a/nest/rt-table.c b/nest/rt-table.c index d43305c9..8677c177 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -121,6 +121,7 @@ static void rt_free_hostcache(rtable *tab); static void rt_notify_hostcache(rtable *tab, net *net); static void rt_update_hostcache(rtable *tab); static void rt_next_hop_update(rtable *tab); +static inline void rt_next_hop_resolve_rte(rte *r); static inline void rt_prune_table(rtable *tab); static inline void rt_schedule_notify(rtable *tab); static void rt_flowspec_notify(rtable *tab, net *net); @@ -159,7 +160,8 @@ const char *rt_export_state_name(u8 state) return rt_export_state_name_array[state]; } - +static inline struct rte_storage *rt_next_hop_update_rte(rtable *tab, net *n, rte *old); +static struct hostentry *rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep); static void net_init_with_trie(struct fib *f, void *N) @@ -1555,13 +1557,7 @@ rte_update_direct(struct channel *c, const net_addr *n, rte *new, struct rte_src int fr; stats->updates_received++; - if (!rte_validate(c, new)) - { - channel_rte_trace_in(D_FILTERS, c, new, "invalid"); - stats->updates_invalid++; - new = NULL; - } - else if ((filter == FILTER_REJECT) || + if ((filter == FILTER_REJECT) || ((fr = f_run(filter, new, 0)) > F_ACCEPT)) { stats->updates_filtered++; @@ -1572,6 +1568,17 @@ rte_update_direct(struct channel *c, const net_addr *n, rte *new, struct rte_src else new = NULL; } + + if (new) + rt_next_hop_resolve_rte(new); + + if (new && !rte_validate(c, new)) + { + channel_rte_trace_in(D_FILTERS, c, new, "invalid"); + stats->updates_invalid++; + new = NULL; + } + } else stats->withdraws_received++; @@ -2513,9 +2520,29 @@ rt_preconfig(struct config *c) */ void -rta_apply_hostentry(rta *a, struct hostentry *he) +ea_set_hostentry(ea_list **to, struct rtable *dep, struct rtable *tab, ip_addr gw, ip_addr ll, u32 lnum, u32 labels[lnum]) { - a->hostentry = he; + struct { + struct adata ad; + struct hostentry *he; + u32 labels[lnum]; + } *head = (void *) tmp_alloc_adata(sizeof *head - sizeof(struct adata)); + + head->he = rt_get_hostentry(tab, gw, ll, dep); + memcpy(head->labels, labels, lnum * sizeof(u32)); + + ea_set_attr(to, EA_LITERAL_DIRECT_ADATA( + &ea_gen_hostentry, 0, &head->ad)); +} + + +static void +rta_apply_hostentry(rta *a, struct hostentry_adata *head) +{ + struct hostentry *he = head->he; + u32 *labels = head->labels; + u32 lnum = (u32 *) (head->ad.data + head->ad.length) - labels; + a->dest = he->dest; ea_set_attr_u32(&a->eattrs, &ea_gen_igp_metric, 0, he->igp_metric); @@ -2527,17 +2554,12 @@ rta_apply_hostentry(rta *a, struct hostentry *he) return; } - eattr *mls_ea = ea_find(a->eattrs, &ea_mpls_labels); - - if (!mls_ea && he->nexthop_linkable) + if (!lnum && he->nexthop_linkable) { /* Just link the nexthop chain, no label append happens. */ ea_copy_attr(&a->eattrs, he->src->eattrs, &ea_gen_nexthop); return; } - const struct adata *mls = mls_ea ? mls_ea->u.ptr : NULL; - uint mls_cnt = mls ? mls->length / sizeof(u32) : 0; - eattr *he_nh_ea = ea_find(he->src->eattrs, &ea_gen_nexthop); struct nexthop_adata *nhad = (struct nexthop_adata *) he_nh_ea->u.ptr; @@ -2545,14 +2567,14 @@ rta_apply_hostentry(rta *a, struct hostentry *he) NEXTHOP_WALK(nh, nhad) { - if (nh->labels + mls_cnt > MPLS_MAX_LABEL_STACK) + if (nh->labels + lnum > MPLS_MAX_LABEL_STACK) { log(L_WARN "Sum of label stack sizes %d + %d = %d exceedes allowed maximum (%d)", - nh->labels, mls_cnt, nh->labels + mls_cnt, MPLS_MAX_LABEL_STACK); + nh->labels, lnum, nh->labels + lnum, MPLS_MAX_LABEL_STACK); continue; } - total_size += NEXTHOP_SIZE_CNT(nh->labels + mls_cnt); + total_size += NEXTHOP_SIZE_CNT(nh->labels + lnum); } if (total_size == OFFSETOF(struct nexthop_adata, nh)) @@ -2569,14 +2591,14 @@ rta_apply_hostentry(rta *a, struct hostentry *he) NEXTHOP_WALK(nh, nhad) { - if (nh->labels + mls_cnt > MPLS_MAX_LABEL_STACK) + if (nh->labels + lnum > MPLS_MAX_LABEL_STACK) continue; memcpy(dest, nh, NEXTHOP_SIZE(nh)); - if (mls_cnt) + if (lnum) { - memcpy(&(dest->label[dest->labels]), mls->data, mls->length); - dest->labels += mls_cnt; + memcpy(&(dest->label[dest->labels]), labels, lnum * sizeof labels[0]); + dest->labels += lnum; } if (ipa_nonzero(nh->gw)) @@ -2598,45 +2620,65 @@ rta_apply_hostentry(rta *a, struct hostentry *he) &ea_gen_nexthop, 0, &new->ad)); } -static inline int +static inline struct hostentry_adata * rta_next_hop_outdated(rta *a) { - struct hostentry *he = a->hostentry; + eattr *heea = ea_find(a->eattrs, &ea_gen_hostentry); + if (!heea) + return NULL; - if (!he) - return 0; + struct hostentry_adata *head = (struct hostentry_adata *) heea->u.ptr; - if (!he->src) - return a->dest != RTD_UNREACHABLE; + if (!head->he->src) + return (a->dest != RTD_UNREACHABLE) ? head : NULL; - eattr *he_nh_ea = ea_find(he->src->eattrs, &ea_gen_nexthop); + eattr *he_nh_ea = ea_find(head->he->src->eattrs, &ea_gen_nexthop); eattr *a_nh_ea = ea_find(a->eattrs, &ea_gen_nexthop); - return (a->dest != he->dest) || - (ea_get_int(a->eattrs, &ea_gen_igp_metric, IGP_METRIC_UNKNOWN) != he->igp_metric) || - (!he->nexthop_linkable) || - (!he_nh_ea != !a_nh_ea) || - (he_nh_ea && a_nh_ea && !adata_same(he_nh_ea->u.ptr, a_nh_ea->u.ptr)); + return ((a->dest != head->he->dest) || + (ea_get_int(a->eattrs, &ea_gen_igp_metric, IGP_METRIC_UNKNOWN) != head->he->igp_metric) || + (!head->he->nexthop_linkable) || + (!he_nh_ea != !a_nh_ea) || + (he_nh_ea && a_nh_ea && !adata_same(he_nh_ea->u.ptr, a_nh_ea->u.ptr))) + ? head : NULL; } static inline struct rte_storage * rt_next_hop_update_rte(rtable *tab, net *n, rte *old) { - if (!rta_next_hop_outdated(old->attrs)) + struct hostentry_adata *head = rta_next_hop_outdated(old->attrs); + if (!head) return NULL; - rta *a = alloca(RTA_MAX_SIZE); - memcpy(a, old->attrs, rta_size(old->attrs)); - - rta_apply_hostentry(a, old->attrs->hostentry); - a->cached = 0; + rta a = *old->attrs; + a.cached = 0; + rta_apply_hostentry(&a, head); rte e0 = *old; - e0.attrs = a; + e0.attrs = &a; return rte_store(&e0, n, tab); } +static inline void +rt_next_hop_resolve_rte(rte *r) +{ + eattr *heea = ea_find(r->attrs->eattrs, &ea_gen_hostentry); + if (!heea) + return; + + struct hostentry_adata *head = (struct hostentry_adata *) heea->u.ptr; + + if (r->attrs->cached) + { + rta *a = tmp_alloc(RTA_MAX_SIZE); + *a = *r->attrs; + a->cached = 0; + r->attrs = a; + } + + rta_apply_hostentry(r->attrs, head); +} #ifdef CONFIG_BGP @@ -3585,7 +3627,7 @@ rt_update_hostentry(rtable *tab, struct hostentry *he) rta *a = e->rte.attrs; pxlen = n->n.addr->pxlen; - if (a->hostentry) + if (ea_find(a->eattrs, &ea_gen_hostentry)) { /* Recursive route should not depend on another recursive route */ log(L_WARN "Next hop address %I resolvable through recursive route for %N", @@ -3658,7 +3700,7 @@ rt_update_hostcache(rtable *tab) tab->hcu_scheduled = 0; } -struct hostentry * +static struct hostentry * rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep) { struct hostentry *he; diff --git a/nest/rt.h b/nest/rt.h index fc8e2d3c..0ee615b8 100644 --- a/nest/rt.h +++ b/nest/rt.h @@ -438,39 +438,27 @@ struct rt_show_data_rtable * rt_show_add_table(struct rt_show_data *d, rtable *t #define RSEM_NOEXPORT 3 /* Routes rejected by export filter */ #define RSEM_EXPORTED 4 /* Routes marked in export map */ -struct hostentry * rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep); -void rta_apply_hostentry(rta *a, struct hostentry *he); +/* Host entry: Resolve hook for recursive nexthops */ +extern struct ea_class ea_gen_hostentry; +struct hostentry_adata { + adata ad; + struct hostentry *he; + u32 labels[0]; +}; -static inline void -rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr gw, ip_addr ll) -{ - rta_apply_hostentry(a, rt_get_hostentry(tab, gw, ll, dep)); -} +void +ea_set_hostentry(ea_list **to, struct rtable *dep, struct rtable *tab, ip_addr gw, ip_addr ll, u32 lnum, u32 labels[lnum]); /* - * rta_set_recursive_next_hop() acquires hostentry from hostcache and fills - * rta->hostentry field. New hostentry has zero use count. Cached rta locks its - * hostentry (increases its use count), uncached rta does not lock it. Hostentry - * with zero use count is removed asynchronously during host cache update, - * therefore it is safe to hold such hostentry temorarily. Hostentry holds a - * lock for a 'source' rta, mainly to share multipath nexthops. - * - * There is no need to hold a lock for hostentry->dep table, because that table - * contains routes responsible for that hostentry, and therefore is non-empty if - * given hostentry has non-zero use count. If the hostentry has zero use count, - * the entry is removed before dep is referenced. - * - * The protocol responsible for routes with recursive next hops should hold a - * lock for a 'source' table governing that routes (argument tab to - * rta_set_recursive_next_hop()), because its routes reference hostentries - * (through rta) related to the governing table. When all such routes are - * removed, rtas are immediately removed achieving zero uc. Then the 'source' - * table lock could be immediately released, although hostentries may still - * exist - they will be freed together with the 'source' table. - */ +struct hostentry * rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep); +void rta_apply_hostentry(rta *a, struct hostentry *he, u32 lnum, u32 labels[lnum]); -static inline void rt_lock_hostentry(struct hostentry *he) { if (he) he->uc++; } -static inline void rt_unlock_hostentry(struct hostentry *he) { if (he) he->uc--; } +static inline void +rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr gw, ip_addr ll, u32 lnum, u32 labels[lnum]) +{ + rta_apply_hostentry(a, rt_get_hostentry(tab, gw, ll, dep), lnum, labels); +} +*/ int rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, rta *a, int interior); diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index d0c2daf2..8e3ed70e 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -456,7 +456,6 @@ struct bgp_parse_state { uint err_subcode; jmp_buf err_jmpbuf; - struct hostentry *hostentry; adata *mpls_labels; /* Cached state for bgp_rte_update() */ diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index 9e65670d..ce2848c0 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -986,27 +986,24 @@ bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll) WITHDRAW(BAD_NEXT_HOP " - zero address"); rtable *tab = ipa_is_ip4(gw) ? c->igp_table_ip4 : c->igp_table_ip6; - s->hostentry = rt_get_hostentry(tab, gw, ll, c->c.table); - - if (!s->mpls) - rta_apply_hostentry(a, s->hostentry); - - /* With MPLS, hostentry is applied later in bgp_apply_mpls_labels() */ + if (s->mpls) + { + u32 labels[BGP_MPLS_MAX]; + ea_set_hostentry(&a->eattrs, c->c.table, tab, gw, ll, BGP_MPLS_MAX, labels); + } + else + ea_set_hostentry(&a->eattrs, c->c.table, tab, gw, ll, 0, NULL); } } static void -bgp_apply_mpls_labels(struct bgp_parse_state *s, rta *a) +bgp_apply_mpls_labels(struct bgp_parse_state *s, rta *a, u32 lnum, u32 labels[lnum]) { - u32 *labels = (u32 *) s->mpls_labels->data; - u32 lnum = s->mpls_labels->length / sizeof(u32); - if (lnum > MPLS_MAX_LABEL_STACK) { REPORT("Too many MPLS labels ($u)", lnum); a->dest = RTD_UNREACHABLE; - a->hostentry = NULL; ea_unset_attr(&a->eattrs, 0, &ea_gen_nexthop); return; } @@ -1029,7 +1026,13 @@ bgp_apply_mpls_labels(struct bgp_parse_state *s, rta *a) nh.nhad.ad.length = sizeof nh.nhad + lnum * sizeof(u32); } else /* GW_RECURSIVE */ - rta_apply_hostentry(a, s->hostentry); + { + eattr *e = ea_find(a->eattrs, &ea_gen_hostentry); + ASSERT_DIE(e); + struct hostentry_adata *head = (void *) e->u.ptr; + memcpy(&head->labels, labels, lnum * sizeof(u32)); + head->ad.length = (void *)(&head->labels[lnum]) - (void *) head->ad.data; + } } static void @@ -1446,12 +1449,7 @@ bgp_encode_mpls_labels(struct bgp_write_state *s UNUSED, const adata *mpls, byte static void bgp_decode_mpls_labels(struct bgp_parse_state *s, byte **pos, uint *len, uint *pxlen, rta *a) { - struct { - struct adata ad; - u32 labels[BGP_MPLS_MAX]; - } labels_adata; - - u32 *labels = labels_adata.labels; + u32 labels[BGP_MPLS_MAX]; u32 label; uint lnum = 0; @@ -1474,19 +1472,8 @@ bgp_decode_mpls_labels(struct bgp_parse_state *s, byte **pos, uint *len, uint *p if (!a) return; - labels_adata.ad.length = lnum * sizeof(u32); - - /* Attach MPLS attribute unless we already have one */ - if (!s->mpls_labels) - ea_set_attr(&(a->eattrs), - EA_LITERAL_DIRECT_ADATA(&ea_mpls_labels, 0, - (s->mpls_labels = tmp_store_adata(labels, BGP_MPLS_MAX * sizeof(u32))))); - else - /* Overwrite data in the attribute */ - memcpy(s->mpls_labels, &labels_adata, sizeof labels_adata); - /* Update next hop entry in rta */ - bgp_apply_mpls_labels(s, a); + bgp_apply_mpls_labels(s, a, lnum, labels); /* Attributes were changed, invalidate cached entry */ rta_free(s->cached_rta); diff --git a/proto/pipe/pipe.c b/proto/pipe/pipe.c index b99df434..e122d771 100644 --- a/proto/pipe/pipe.c +++ b/proto/pipe/pipe.c @@ -62,7 +62,8 @@ pipe_rt_notify(struct proto *P, struct channel *src_ch, const net_addr *n, rte * memcpy(a, new->attrs, rta_size(new->attrs)); a->cached = 0; - a->hostentry = NULL; + ea_unset_attr(&a->eattrs, 0, &ea_gen_hostentry); + rte e0 = { .attrs = a, diff --git a/proto/static/static.c b/proto/static/static.c index 8e389390..038ee018 100644 --- a/proto/static/static.c +++ b/proto/static/static.c @@ -100,11 +100,11 @@ static_announce_rte(struct static_proto *p, struct static_route *r) if (r->dest == RTDX_RECURSIVE) { rtable *tab = ipa_is_ip4(r->via) ? p->igp_table_ip4 : p->igp_table_ip6; - if (r->mls) - ea_set_attr(&a->eattrs, - EA_LITERAL_DIRECT_ADATA(&ea_mpls_labels, 0, r->mls)); + u32 *labels = r->mls ? (void *) r->mls->data : NULL; + u32 lnum = r->mls ? r->mls->length / sizeof(u32) : 0; - rta_set_recursive_next_hop(p->p.main_channel->table, a, tab, r->via, IPA_NONE); + ea_set_hostentry(&a->eattrs, p->p.main_channel->table, tab, + r->via, IPA_NONE, lnum, labels); } /* Already announced */