From 00a09f3c367e79297f827b52ec5f16842db1ac4e Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Sun, 15 Apr 2012 15:07:58 +0200 Subject: [PATCH 1/5] Implement RA_ACCEPTED mode of route propagation. --- nest/route.h | 7 +- nest/rt-table.c | 416 ++++++++++++++++++++++++++++----------------- proto/bgp/bgp.c | 7 +- proto/bgp/bgp.h | 1 + proto/bgp/config.Y | 10 +- 5 files changed, 281 insertions(+), 160 deletions(-) diff --git a/nest/route.h b/nest/route.h index 34536609..59daf803 100644 --- a/nest/route.h +++ b/nest/route.h @@ -219,11 +219,12 @@ typedef struct rte { } u; } rte; -#define REF_COW 1 /* Copy this rte on write */ +#define REF_COW 1 /* Copy this rte on write */ /* Types of route announcement, also used as flags */ -#define RA_OPTIMAL 1 /* Announcement of optimal route change */ -#define RA_ANY 2 /* Announcement of any route change */ +#define RA_OPTIMAL 1 /* Announcement of optimal route change */ +#define RA_ACCEPTED 2 /* Announcement of first accepted route */ +#define RA_ANY 3 /* Announcement of any route change */ struct config; diff --git a/nest/rt-table.c b/nest/rt-table.c index 310c1afd..578a6ba3 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -182,86 +182,73 @@ rte_trace_out(unsigned int flag, struct proto *p, rte *e, char *msg) rte_trace(p, e, '<', msg); } -static inline void -do_rte_announce(struct announce_hook *ah, int type UNUSED, net *net, rte *new, rte *old, ea_list *tmpa, int refeed) +static rte * +export_filter(struct announce_hook *ah, rte *rt0, rte **rt_free, ea_list **tmpa, int silent) { struct proto *p = ah->proto; struct filter *filter = ah->out_filter; struct proto_stats *stats = ah->stats; + ea_list *tmpb = NULL; + rte *rt; + int v; - rte *new0 = new; - rte *old0 = old; - int ok; + rt = rt0; + *rt_free = NULL; - if (new) + /* If called does not care for eattrs, we prepare one internally */ + if (!tmpa) { - stats->exp_updates_received++; - - char *drop_reason = NULL; - if ((ok = p->import_control ? p->import_control(p, &new, &tmpa, rte_update_pool) : 0) < 0) - { - stats->exp_updates_rejected++; - drop_reason = "rejected by protocol"; - } - else if (ok) - rte_trace_out(D_FILTERS, p, new, "forced accept by protocol"); - else if ((filter == FILTER_REJECT) || - (filter && f_run(filter, &new, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) > F_ACCEPT)) - { - stats->exp_updates_filtered++; - drop_reason = "filtered out"; - } - if (drop_reason) - { - rte_trace_out(D_FILTERS, p, new, drop_reason); - if (new != new0) - rte_free(new); - new = NULL; - } - } - else - stats->exp_withdraws_received++; - - /* - * This is a tricky part - we don't know whether route 'old' was - * exported to protocol 'p' or was filtered by the export filter. - * We try tu run the export filter to know this to have a correct - * value in 'old' argument of rte_update (and proper filter value) - * - * FIXME - this is broken because 'configure soft' may change - * filters but keep routes. Refeed is expected to be called after - * change of the filters and with old == new, therefore we do not - * even try to run the filter on an old route, This may lead to - * 'spurious withdraws' but ensure that there are no 'missing - * withdraws'. - * - * This is not completely safe as there is a window between - * reconfiguration and the end of refeed - if a newly filtered - * route disappears during this period, proper withdraw is not - * sent (because old would be also filtered) and the route is - * not refeeded (because it disappeared before that). - */ - - if (old && !refeed) - { - if (filter == FILTER_REJECT) - old = NULL; - else - { - ea_list *tmpb = p->make_tmp_attrs ? p->make_tmp_attrs(old, rte_update_pool) : NULL; - ok = p->import_control ? p->import_control(p, &old, &tmpb, rte_update_pool) : 0; - if (ok < 0 || (!ok && filter && f_run(filter, &old, &tmpb, rte_update_pool, FF_FORCE_TMPATTR) > F_ACCEPT)) - { - if (old != old0) - rte_free(old); - old = NULL; - } - } + struct proto *src = rt->attrs->proto; + tmpb = src->make_tmp_attrs ? src->make_tmp_attrs(rt, rte_update_pool) : NULL; + tmpa = &tmpb; } - /* FIXME - This is broken because of incorrect 'old' value (see above) */ - if (!new && !old) - return; + v = p->import_control ? p->import_control(p, &rt, tmpa, rte_update_pool) : 0; + if (v < 0) + { + if (silent) + goto reject; + + stats->exp_updates_rejected++; + rte_trace_out(D_FILTERS, p, rt, "rejected by protocol"); + goto reject; + } + if (v > 0) + { + if (!silent) + rte_trace_out(D_FILTERS, p, rt, "forced accept by protocol"); + goto accept; + } + + v = filter && ((filter == FILTER_REJECT) || + (f_run(filter, &rt, tmpa, rte_update_pool, FF_FORCE_TMPATTR) > F_ACCEPT)); + if (v) + { + if (silent) + goto reject; + + stats->exp_updates_filtered++; + rte_trace_out(D_FILTERS, p, rt, "filtered out"); + goto reject; + } + + accept: + if (rt != rt0) + *rt_free = rt; + return rt; + + reject: + /* Discard temporary rte */ + if (rt != rt0) + rte_free(rt); + return NULL; +} + +static void +do_rt_notify(struct announce_hook *ah, net *net, rte *new, rte *old, ea_list *tmpa, int refeed) +{ + struct proto *p = ah->proto; + struct proto_stats *stats = ah->stats; if (new) stats->exp_updates_accepted++; @@ -297,10 +284,174 @@ do_rte_announce(struct announce_hook *ah, int type UNUSED, net *net, rte *new, r } else p->rt_notify(p, ah->table, net, new, old, new->attrs->eattrs); - if (new && new != new0) /* Discard temporary rte's */ - rte_free(new); - if (old && old != old0) - rte_free(old); + +} + + +static void +rt_notify_basic(struct announce_hook *ah, net *net, rte *new, rte *old, ea_list *tmpa, int refeed) +{ + // struct proto *p = ah->proto; + struct proto_stats *stats = ah->stats; + + rte *new_free = NULL; + rte *old_free = NULL; + + if (new) + stats->exp_updates_received++; + else + stats->exp_withdraws_received++; + + /* + * This is a tricky part - we don't know whether route 'old' was + * exported to protocol 'p' or was filtered by the export filter. + * We try to run the export filter to know this to have a correct + * value in 'old' argument of rte_update (and proper filter value) + * + * FIXME - this is broken because 'configure soft' may change + * filters but keep routes. Refeed is expected to be called after + * change of the filters and with old == new, therefore we do not + * even try to run the filter on an old route, This may lead to + * 'spurious withdraws' but ensure that there are no 'missing + * withdraws'. + * + * This is not completely safe as there is a window between + * reconfiguration and the end of refeed - if a newly filtered + * route disappears during this period, proper withdraw is not + * sent (because old would be also filtered) and the route is + * not refeeded (because it disappeared before that). + */ + + if (new) + new = export_filter(ah, new, &new_free, &tmpa, 0); + + if (old && !refeed) + old = export_filter(ah, old, &old_free, NULL, 1); + + /* FIXME - This is broken because of incorrect 'old' value (see above) */ + if (!new && !old) + return; + + do_rt_notify(ah, net, new, old, tmpa, refeed); + + /* Discard temporary rte's */ + if (new_free) + rte_free(new_free); + if (old_free) + rte_free(old_free); +} + +static void +rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *old_changed, rte *before_old, + ea_list *tmpa, int feed) +{ + // struct proto *p = ah->proto; + struct proto_stats *stats = ah->stats; + + rte *new_best = NULL; + rte *old_best = NULL; + rte *new_free = NULL; + rte *old_free = NULL; + rte *r; + + /* Used to track whethe we met old_changed position. If it is NULL + it was the first and met it implicitly before current best route. */ + int old_meet = (old_changed && !before_old) ? 1 : 0; + + if (new_changed) + stats->exp_updates_received++; + else + stats->exp_withdraws_received++; + + /* First, find the new_best route - first accepted by filters */ + for (r=net->routes; r; r=r->next) + { + if (new_best = export_filter(ah, r, &new_free, &tmpa, 0)) + break; + + /* Note if we walked around the position of old_changed route */ + if (r == before_old) + old_meet = 1; + } + + /* + * Second, handle the feed case. That means we do not care for + * old_best. It is NULL for feed, and the new_best for refeed. + * For refeed, there is a hack similar to one in rt_notify_basic() + * to ensure withdraws in case of changed filters + */ + if (feed) + { + if (feed == 2) /* refeed */ + old_best = new_best ? new_best : net->routes; + else + old_best = NULL; + + if (!new_best && !old_best) + return; + + goto found; + } + + /* + * Now, we find the old_best route. Generally, it is the same as the + * new_best, unless new_best is the same as new_changed or + * old_changed is accepted before new_best. + * + * There are four cases: + * + * - We would find and accept old_changed before new_best, therefore + * old_changed is old_best. In remaining cases we suppose this + * is not true. + * + * - We found no new_best, therefore there is also no old_best and + * we ignore this withdraw. + * + * - We found new_best different than new_changed, therefore + * old_best is the same as new_best and we ignore this update. + * + * - We found new_best the same as new_changed, therefore it cannot + * be old_best and we have to continue search for old_best. + */ + + /* First case */ + if (old_meet) + if (old_best = export_filter(ah, old_changed, &old_free, NULL, 1)) + goto found; + + /* Second case */ + if (!new_best) + return; + + /* Third case, we use r insead of new_best, because export_filter() could change it */ + if (r != new_changed) + { + if (new_free) + rte_free(new_free); + return; + } + + /* Fourth case */ + for (; r; r=r->next) + { + if (old_best = export_filter(ah, r, &old_free, NULL, 1)) + goto found; + + if (r == before_old) + if (old_best = export_filter(ah, old_changed, &old_free, NULL, 1)) + goto found; + } + + /* Implicitly, old_best is NULL and new_best is non-NULL */ + + found: + do_rt_notify(ah, net, new_best, old_best, tmpa, (feed == 2)); + + /* Discard temporary rte's */ + if (new_free) + rte_free(new_free); + if (old_free) + rte_free(old_free); } /** @@ -333,7 +484,7 @@ do_rte_announce(struct announce_hook *ah, int type UNUSED, net *net, rte *new, r * the protocol gets called. */ static void -rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old, ea_list *tmpa) +rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old, rte *before_old, ea_list *tmpa) { struct announce_hook *a; @@ -352,11 +503,13 @@ rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old, ea_list * { ASSERT(a->proto->core_state == FS_HAPPY || a->proto->core_state == FS_FEEDING); if (a->proto->accept_ra_types == type) - do_rte_announce(a, type, net, new, old, tmpa, 0); + if (type == RA_ACCEPTED) + rt_notify_accepted(a, net, new, old, before_old, tmpa, 0); + else + rt_notify_basic(a, net, new, old, tmpa, 0); } } - static inline int rte_validate(rte *e) { @@ -419,9 +572,10 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, ea_list *tmpa, str struct proto *p = ah->proto; struct rtable *table = ah->table; struct proto_stats *stats = ah->stats; + rte *before_old = NULL; rte *old_best = net->routes; rte *old = NULL; - rte **k, *r, *s; + rte **k; k = &net->routes; /* Find and remove original route from the same protocol */ while (old = *k) @@ -466,8 +620,12 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, ea_list *tmpa, str break; } k = &old->next; + before_old = old; } + if (!old) + before_old = NULL; + if (!old && !new) { stats->imp_withdraws_ignored++; @@ -484,82 +642,21 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, ea_list *tmpa, str if (old) stats->imp_routes--; - rte_announce(table, RA_ANY, net, new, old, tmpa); - - if (src->rte_recalculate && src->rte_recalculate(table, net, new, old, old_best)) - goto do_recalculate; - - if (new && rte_better(new, old_best)) + if (new) { - /* The first case - the new route is cleary optimal, we link it - at the first position and announce it */ + for (k=&net->routes; *k; k=&(*k)->next) + if (rte_better(new, *k)) + break; - rte_trace_in(D_ROUTES, p, new, "added [best]"); - rte_announce(table, RA_OPTIMAL, net, new, old_best, tmpa); - new->next = net->routes; - net->routes = new; - } - else if (old == old_best) - { - /* The second case - the old best route disappeared, we add the - new route (if we have any) to the list (we don't care about - position) and then we elect the new optimal route and relink - that route at the first position and announce it. New optimal - route might be NULL if there is no more routes */ + new->lastmod = now; + new->next = *k; + *k = new; - do_recalculate: - /* Add the new route to the list */ - if (new) - { - rte_trace_in(D_ROUTES, p, new, "added"); - new->next = net->routes; - net->routes = new; - } - - /* Find new optimal route */ - r = NULL; - for (s=net->routes; s; s=s->next) - if (rte_better(s, r)) - r = s; - - /* Announce optimal route */ - rte_announce(table, RA_OPTIMAL, net, r, old_best, tmpa); - - /* And relink it (if there is any) */ - if (r) - { - k = &net->routes; - while (s = *k) - { - if (s == r) - { - *k = r->next; - break; - } - k = &s->next; - } - r->next = net->routes; - net->routes = r; - } - else if (table->gc_counter++ >= table->config->gc_max_ops && - table->gc_time + table->config->gc_min_time <= now) - rt_schedule_gc(table); - } - else if (new) - { - /* The third case - the new route is not better than the old - best route (therefore old_best != NULL) and the old best - route was not removed (therefore old_best == net->routes). - We just link the new route after the old best route. */ - - ASSERT(net->routes != NULL); - new->next = net->routes->next; - net->routes->next = new; - rte_trace_in(D_ROUTES, p, new, "added"); + rte_trace_in(D_ROUTES, p, new, net->routes == new ? "added [best]" : "added"); } /* Log the route removal */ - if (!new && old && (p->debug & D_ROUTES)) + if (!new && (p->debug & D_ROUTES)) { if (old != old_best) rte_trace_in(D_ROUTES, p, old, "removed"); @@ -569,6 +666,15 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, ea_list *tmpa, str rte_trace_in(D_ROUTES, p, old, "removed [sole]"); } + rte_announce(table, RA_ANY, net, new, old, NULL, tmpa); + rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL, tmpa); + rte_announce(table, RA_ACCEPTED, net, new, old, before_old, tmpa); + + if (!net->routes && + (table->gc_counter++ >= table->config->gc_max_ops) && + (table->gc_time + table->config->gc_min_time <= now)) + rt_schedule_gc(table); + if (old) { if (p->rte_remove) @@ -577,7 +683,6 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, ea_list *tmpa, str } if (new) { - new->lastmod = now; if (p->rte_insert) p->rte_insert(net, new); } @@ -709,7 +814,7 @@ rte_announce_i(rtable *tab, unsigned type, net *n, rte *new, rte *old) rte_update_lock(); src = new->attrs->proto; tmpa = src->make_tmp_attrs ? src->make_tmp_attrs(new, rte_update_pool) : NULL; - rte_announce(tab, type, n, new, old, tmpa); + rte_announce(tab, type, n, new, old, NULL, tmpa); rte_update_unlock(); } @@ -1259,12 +1364,15 @@ rt_commit(struct config *new, struct config *old) static inline void do_feed_baby(struct proto *p, int type, struct announce_hook *h, net *n, rte *e) { - struct proto *q = e->attrs->proto; + struct proto *src = e->attrs->proto; ea_list *tmpa; rte_update_lock(); - tmpa = q->make_tmp_attrs ? q->make_tmp_attrs(e, rte_update_pool) : NULL; - do_rte_announce(h, type, n, e, p->refeeding ? e : NULL, tmpa, p->refeeding); + tmpa = src->make_tmp_attrs ? src->make_tmp_attrs(e, rte_update_pool) : NULL; + if (type == RA_ACCEPTED) + rt_notify_accepted(h, n, e, NULL, NULL, tmpa, p->refeeding ? 2 : 1); + else + rt_notify_basic(h, n, e, p->refeeding ? e : NULL, tmpa, p->refeeding); rte_update_unlock(); } diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 4dd4b7be..099a39a9 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -909,7 +909,7 @@ bgp_init(struct proto_config *C) struct proto *P = proto_new(C, sizeof(struct bgp_proto)); struct bgp_proto *p = (struct bgp_proto *) P; - P->accept_ra_types = RA_OPTIMAL; + P->accept_ra_types = c->secondary ? RA_ACCEPTED : RA_OPTIMAL; P->rt_notify = bgp_rt_notify; P->rte_better = bgp_rte_better; P->import_control = bgp_import_control; @@ -955,12 +955,14 @@ bgp_check_config(struct bgp_config *c) if (internal && c->rs_client) cf_error("Only external neighbor can be RS client"); + /* if (c->multihop && (c->gw_mode == GW_DIRECT)) cf_error("Multihop BGP cannot use direct gateway mode"); if (c->multihop && (ipa_has_link_scope(c->remote_ip) || ipa_has_link_scope(c->source_addr))) cf_error("Multihop BGP cannot be used with link-local addresses"); + */ /* Different default based on rs_client */ if (!c->missing_lladdr) @@ -968,7 +970,8 @@ bgp_check_config(struct bgp_config *c) /* Different default for gw_mode */ if (!c->gw_mode) - c->gw_mode = (c->multihop || internal) ? GW_RECURSIVE : GW_DIRECT; + // c->gw_mode = (c->multihop || internal) ? GW_RECURSIVE : GW_DIRECT; + c->gw_mode = GW_DIRECT; } static int diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index a8c5818e..87734425 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -43,6 +43,7 @@ struct bgp_config { u32 route_limit; /* Number of routes that may be imported, 0 means disable limit */ int passive; /* Do not initiate outgoing connection */ int interpret_communities; /* Hardwired handling of well-known communities */ + int secondary; /* Accept also non-best routes (i.e. RA_ACCEPTED) */ unsigned connect_retry_time; unsigned hold_time, initial_hold_time; unsigned keepalive_time; diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y index 78ca52de..356415a2 100644 --- a/proto/bgp/config.Y +++ b/proto/bgp/config.Y @@ -25,7 +25,8 @@ CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, CLUSTER, ID, AS4, ADVERTISE, IPV4, CAPABILITIES, LIMIT, PASSIVE, PREFER, OLDER, MISSING, LLADDR, DROP, IGNORE, ROUTE, REFRESH, INTERPRET, COMMUNITIES, BGP_ORIGINATOR_ID, BGP_CLUSTER_LIST, IGP, - TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL, SECURITY, DETERMINISTIC) + TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL, SECURITY, DETERMINISTIC, + SECONDARY) CF_GRAMMAR @@ -73,19 +74,25 @@ bgp_proto: | bgp_proto STARTUP HOLD TIME expr ';' { BGP_CFG->initial_hold_time = $5; } | bgp_proto CONNECT RETRY TIME expr ';' { BGP_CFG->connect_retry_time = $5; } | bgp_proto KEEPALIVE TIME expr ';' { BGP_CFG->keepalive_time = $4; } +/* | bgp_proto MULTIHOP ';' { BGP_CFG->multihop = 64; } | bgp_proto MULTIHOP expr ';' { BGP_CFG->multihop = $3; if (($3<1) || ($3>255)) cf_error("Multihop must be in range 1-255"); } +*/ | bgp_proto NEXT HOP SELF ';' { BGP_CFG->next_hop_self = 1; } | bgp_proto MISSING LLADDR SELF ';' { BGP_CFG->missing_lladdr = MLL_SELF; } | bgp_proto MISSING LLADDR DROP ';' { BGP_CFG->missing_lladdr = MLL_DROP; } | bgp_proto MISSING LLADDR IGNORE ';' { BGP_CFG->missing_lladdr = MLL_IGNORE; } +/* | bgp_proto GATEWAY DIRECT ';' { BGP_CFG->gw_mode = GW_DIRECT; } | bgp_proto GATEWAY RECURSIVE ';' { BGP_CFG->gw_mode = GW_RECURSIVE; } +*/ | bgp_proto PATH METRIC bool ';' { BGP_CFG->compare_path_lengths = $4; } | bgp_proto MED METRIC bool ';' { BGP_CFG->med_metric = $4; } | bgp_proto IGP METRIC bool ';' { BGP_CFG->igp_metric = $4; } | bgp_proto PREFER OLDER bool ';' { BGP_CFG->prefer_older = $4; } +/* | bgp_proto DETERMINISTIC MED bool ';' { BGP_CFG->deterministic_med = $4; } +*/ | bgp_proto DEFAULT BGP_MED expr ';' { BGP_CFG->default_med = $4; } | bgp_proto DEFAULT BGP_LOCAL_PREF expr ';' { BGP_CFG->default_local_pref = $4; } | bgp_proto SOURCE ADDRESS ipa ';' { BGP_CFG->source_addr = $4; } @@ -101,6 +108,7 @@ bgp_proto: | bgp_proto ROUTE LIMIT expr ';' { BGP_CFG->route_limit = $4; } | bgp_proto PASSIVE bool ';' { BGP_CFG->passive = $3; } | bgp_proto INTERPRET COMMUNITIES bool ';' { BGP_CFG->interpret_communities = $4; } + | bgp_proto SECONDARY bool ';' { BGP_CFG->secondary = $3; } | bgp_proto IGP TABLE rtable ';' { BGP_CFG->igp_table = $4; } | bgp_proto TTL SECURITY bool ';' { BGP_CFG->ttl_security = $4; } ; From 553e4054609e7aa8dcb92849c92a6fea73354f0e Mon Sep 17 00:00:00 2001 From: Ondrej Filip Date: Thu, 19 Apr 2012 17:12:13 +0200 Subject: [PATCH 2/5] Small clean up in debug texts --- sysdep/linux/netlink/netlink.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sysdep/linux/netlink/netlink.c b/sysdep/linux/netlink/netlink.c index c8eed0f3..d9d22d4c 100644 --- a/sysdep/linux/netlink/netlink.c +++ b/sysdep/linux/netlink/netlink.c @@ -756,11 +756,11 @@ nl_parse_route(struct nlmsghdr *h, int scan) if (a[RTA_OIF]) memcpy(&oif, RTA_DATA(a[RTA_OIF]), sizeof(oif)); - DBG("KRT: Got %I/%d, type=%d, oif=%d, table=%d, prid=%d, proto=%s\n", dst, i->rtm_dst_len, i->rtm_type, oif, i->rtm_table, i->rtm_protocol, p->p.name); - p = nl_table_map[i->rtm_table]; /* Do we know this table? */ + DBG("KRT: Got %I/%d, type=%d, oif=%d, table=%d, prid=%d, proto=%s\n", dst, i->rtm_dst_len, i->rtm_type, oif, i->rtm_table, i->rtm_protocol, p ? p->p.name : "(none)"); if (!p) - SKIP("unknown table %d", i->rtm_table); + SKIP("unknown table %d\n", i->rtm_table); + #ifdef IPV6 if (a[RTA_IIF]) From 9b2b502be521b58a736f7b78644e89ee01b4418b Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Fri, 20 Apr 2012 21:04:55 +0200 Subject: [PATCH 3/5] Fixes missing device attributes when exporting routes to kernel. Thanks to Howden Nick for the bugreport. --- sysdep/linux/netlink/netlink.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/sysdep/linux/netlink/netlink.c b/sysdep/linux/netlink/netlink.c index c8eed0f3..2b0d7633 100644 --- a/sysdep/linux/netlink/netlink.c +++ b/sysdep/linux/netlink/netlink.c @@ -588,9 +588,9 @@ krt_capable(rte *e) switch (a->dest) { case RTD_ROUTER: - if (ipa_has_link_scope(a->gw) && (a->iface == NULL)) - return 0; case RTD_DEVICE: + if (a->iface == NULL) + return 0; case RTD_BLACKHOLE: case RTD_UNREACHABLE: case RTD_PROHIBIT: @@ -653,20 +653,16 @@ nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int new) if (ea = ea_find(eattrs, EA_KRT_REALM)) nl_add_attr_u32(&r.h, sizeof(r), RTA_FLOW, ea->u.data); + /* a->iface != NULL checked in krt_capable() for router and device routes */ + switch (a->dest) { case RTD_ROUTER: r.r.rtm_type = RTN_UNICAST; + nl_add_attr_u32(&r.h, sizeof(r), RTA_OIF, a->iface->index); nl_add_attr_ipa(&r.h, sizeof(r), RTA_GATEWAY, a->gw); - - /* a->iface != NULL checked in krt_capable() */ - if (ipa_has_link_scope(a->gw)) - nl_add_attr_u32(&r.h, sizeof(r), RTA_OIF, a->iface->index); - break; case RTD_DEVICE: - if (!a->iface) - return -1; r.r.rtm_type = RTN_UNICAST; nl_add_attr_u32(&r.h, sizeof(r), RTA_OIF, a->iface->index); break; From b7f3df79054aca327654c1fb4739c4ff02e59e6e Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Fri, 11 May 2012 12:01:27 +0200 Subject: [PATCH 4/5] Fixes a bug in RA_ACCEPTED handling. --- nest/rt-table.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/nest/rt-table.c b/nest/rt-table.c index 578a6ba3..32feafba 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -354,7 +354,7 @@ rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *ol rte *old_free = NULL; rte *r; - /* Used to track whethe we met old_changed position. If it is NULL + /* Used to track whether we met old_changed position. If it is NULL it was the first and met it implicitly before current best route. */ int old_meet = (old_changed && !before_old) ? 1 : 0; @@ -1415,12 +1415,13 @@ again: return 0; } - if (p->accept_ra_types == RA_OPTIMAL) + if ((p->accept_ra_types == RA_OPTIMAL) || + (p->accept_ra_types == RA_ACCEPTED)) if (e) { if (p->core_state != FS_FEEDING) return 1; /* In the meantime, the protocol fell down. */ - do_feed_baby(p, RA_OPTIMAL, h, n, e); + do_feed_baby(p, p->accept_ra_types, h, n, e); max_feed--; } From 26822d8fe1376b2ffd902a3b5caa47f81a88e74e Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Wed, 4 Jul 2012 21:31:03 +0200 Subject: [PATCH 5/5] Finalize RA_ACCEPTED handling. --- nest/config.Y | 15 +++++-- nest/route.h | 1 + nest/rt-table.c | 103 +++++++++++++++++++++++++++++++++++++++------ proto/bgp/attrs.c | 3 +- proto/bgp/bgp.c | 17 ++++++-- proto/bgp/config.Y | 6 --- 6 files changed, 117 insertions(+), 28 deletions(-) diff --git a/nest/config.Y b/nest/config.Y index f889828a..2bc5a4ab 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -46,7 +46,7 @@ CF_KEYWORDS(ROUTER, ID, PROTOCOL, TEMPLATE, PREFERENCE, DISABLED, DEBUG, ALL, OF CF_KEYWORDS(INTERFACE, IMPORT, EXPORT, FILTER, NONE, TABLE, STATES, ROUTES, FILTERS) CF_KEYWORDS(PASSWORD, FROM, PASSIVE, TO, ID, EVENTS, PACKETS, PROTOCOLS, INTERFACES) CF_KEYWORDS(PRIMARY, STATS, COUNT, FOR, COMMANDS, PREEXPORT, GENERATE, ROA, MAX, FLUSH) -CF_KEYWORDS(LISTEN, BGP, V6ONLY, DUAL, ADDRESS, PORT, PASSWORDS, DESCRIPTION) +CF_KEYWORDS(LISTEN, BGP, V6ONLY, DUAL, ADDRESS, PORT, PASSWORDS, DESCRIPTION, SORTED) CF_KEYWORDS(RELOAD, IN, OUT, MRTDUMP, MESSAGES, RESTRICT, MEMORY, IGP_METRIC) CF_ENUM(T_ENUM_RTS, RTS_, DUMMY, STATIC, INHERIT, DEVICE, STATIC_DEVICE, REDIRECT, @@ -64,7 +64,7 @@ CF_ENUM(T_ENUM_ROA, ROA_, UNKNOWN, VALID, INVALID) %type roa_args %type roa_table_arg %type sym_args -%type proto_start echo_mask echo_size debug_mask debug_list debug_flag mrtdump_mask mrtdump_list mrtdump_flag export_or_preexport roa_mode +%type proto_start echo_mask echo_size debug_mask debug_list debug_flag mrtdump_mask mrtdump_list mrtdump_flag export_or_preexport roa_mode tab_sorted %type proto_patt proto_patt2 CF_GRAMMAR @@ -110,10 +110,17 @@ listen_opt: /* Creation of routing tables */ +tab_sorted: + { $$ = 0; } + | SORTED { $$ = 1; } + ; + CF_ADDTO(conf, newtab) -newtab: TABLE SYM { - rt_new_table($2); +newtab: TABLE SYM tab_sorted { + struct rtable_config *cf; + cf = rt_new_table($2); + cf->sorted = $3; } ; diff --git a/nest/route.h b/nest/route.h index 59daf803..524e69b3 100644 --- a/nest/route.h +++ b/nest/route.h @@ -121,6 +121,7 @@ struct rtable_config { struct proto_config *krt_attached; /* Kernel syncer attached to this table */ int gc_max_ops; /* Maximum number of operations before GC is run */ int gc_min_time; /* Minimum time between two consecutive GC runs */ + byte sorted; /* Routes of network are sorted according to rte_better() */ }; typedef struct rtable { diff --git a/nest/rt-table.c b/nest/rt-table.c index 32feafba..eb8304f7 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -423,7 +423,7 @@ rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *ol if (!new_best) return; - /* Third case, we use r insead of new_best, because export_filter() could change it */ + /* Third case, we use r instead of new_best, because export_filter() could change it */ if (r != new_changed) { if (new_free) @@ -432,7 +432,7 @@ rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *ol } /* Fourth case */ - for (; r; r=r->next) + for (r=r->next; r; r=r->next) { if (old_best = export_filter(ah, r, &old_free, NULL, 1)) goto found; @@ -642,20 +642,92 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, ea_list *tmpa, str if (old) stats->imp_routes--; - if (new) + if (table->config->sorted) { - for (k=&net->routes; *k; k=&(*k)->next) - if (rte_better(new, *k)) - break; + /* If routes are sorted, just insert new route to appropriate position */ + if (new) + { + if (before_old && !rte_better(new, before_old)) + k = &before_old->next; + else + k = &net->routes; - new->lastmod = now; - new->next = *k; - *k = new; + for (; *k; k=&(*k)->next) + if (rte_better(new, *k)) + break; - rte_trace_in(D_ROUTES, p, new, net->routes == new ? "added [best]" : "added"); + new->next = *k; + *k = new; + } + } + else + { + /* If routes are not sorted, find the best route and move it on + the first position. There are several optimized cases. */ + + if (src->rte_recalculate && src->rte_recalculate(table, net, new, old, old_best)) + goto do_recalculate; + + if (new && rte_better(new, old_best)) + { + /* The first case - the new route is cleary optimal, + we link it at the first position */ + + new->next = net->routes; + net->routes = new; + } + else if (old == old_best) + { + /* The second case - the old best route disappeared, we add the + new route (if we have any) to the list (we don't care about + position) and then we elect the new optimal route and relink + that route at the first position and announce it. New optimal + route might be NULL if there is no more routes */ + + do_recalculate: + /* Add the new route to the list */ + if (new) + { + new->next = net->routes; + net->routes = new; + } + + /* Find a new optimal route (if there is any) */ + if (net->routes) + { + rte **bp = &net->routes; + for (k=&(*bp)->next; *k; k=&(*k)->next) + if (rte_better(*k, *bp)) + bp = k; + + /* And relink it */ + rte *best = *bp; + *bp = best->next; + best->next = net->routes; + net->routes = best; + } + } + else if (new) + { + /* The third case - the new route is not better than the old + best route (therefore old_best != NULL) and the old best + route was not removed (therefore old_best == net->routes). + We just link the new route after the old best route. */ + + ASSERT(net->routes != NULL); + new->next = net->routes->next; + net->routes->next = new; + } + /* The fourth (empty) case - suboptimal route was removed, nothing to do */ } - /* Log the route removal */ + if (new) + new->lastmod = now; + + /* Log the route change */ + if (new) + rte_trace_in(D_ROUTES, p, new, net->routes == new ? "added [best]" : "added"); + if (!new && (p->debug & D_ROUTES)) { if (old != old_best) @@ -666,9 +738,12 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, ea_list *tmpa, str rte_trace_in(D_ROUTES, p, old, "removed [sole]"); } + /* Propagate the route change */ rte_announce(table, RA_ANY, net, new, old, NULL, tmpa); - rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL, tmpa); - rte_announce(table, RA_ACCEPTED, net, new, old, before_old, tmpa); + if (net->routes != old_best) + rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL, tmpa); + if (table->config->sorted) + rte_announce(table, RA_ACCEPTED, net, new, old, before_old, tmpa); if (!net->routes && (table->gc_counter++ >= table->config->gc_max_ops) && @@ -1336,6 +1411,8 @@ rt_commit(struct config *new, struct config *old) r->table = ot; ot->name = r->name; ot->config = r; + if (o->sorted != r->sorted) + log(L_WARN "Reconfiguration of rtable sorted flag not implemented"); } else { diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index 4495c039..e5bc84dd 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -1258,7 +1258,8 @@ same_group(rte *r, u32 lpref, u32 lasn) static inline int use_deterministic_med(rte *r) { - return ((struct bgp_proto *) r->attrs->proto)->cf->deterministic_med; + struct proto *P = r->attrs->proto; + return (P->proto == &proto_bgp) && ((struct bgp_proto *) P)->cf->deterministic_med; } int diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 099a39a9..d59b4308 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -955,14 +955,14 @@ bgp_check_config(struct bgp_config *c) if (internal && c->rs_client) cf_error("Only external neighbor can be RS client"); - /* + if (c->multihop && (c->gw_mode == GW_DIRECT)) cf_error("Multihop BGP cannot use direct gateway mode"); if (c->multihop && (ipa_has_link_scope(c->remote_ip) || ipa_has_link_scope(c->source_addr))) cf_error("Multihop BGP cannot be used with link-local addresses"); - */ + /* Different default based on rs_client */ if (!c->missing_lladdr) @@ -970,8 +970,17 @@ bgp_check_config(struct bgp_config *c) /* Different default for gw_mode */ if (!c->gw_mode) - // c->gw_mode = (c->multihop || internal) ? GW_RECURSIVE : GW_DIRECT; - c->gw_mode = GW_DIRECT; + c->gw_mode = (c->multihop || internal) ? GW_RECURSIVE : GW_DIRECT; + + + if ((c->gw_mode == GW_RECURSIVE) && c->c.table->sorted) + cf_error("BGP in recursive mode prohibits sorted table"); + + if (c->deterministic_med && c->c.table->sorted) + cf_error("BGP with deterministic MED prohibits sorted table"); + + if (c->secondary && !c->c.table->sorted) + cf_error("BGP with secondary option requires sorted table"); } static int diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y index 356415a2..f9a5be65 100644 --- a/proto/bgp/config.Y +++ b/proto/bgp/config.Y @@ -74,25 +74,19 @@ bgp_proto: | bgp_proto STARTUP HOLD TIME expr ';' { BGP_CFG->initial_hold_time = $5; } | bgp_proto CONNECT RETRY TIME expr ';' { BGP_CFG->connect_retry_time = $5; } | bgp_proto KEEPALIVE TIME expr ';' { BGP_CFG->keepalive_time = $4; } -/* | bgp_proto MULTIHOP ';' { BGP_CFG->multihop = 64; } | bgp_proto MULTIHOP expr ';' { BGP_CFG->multihop = $3; if (($3<1) || ($3>255)) cf_error("Multihop must be in range 1-255"); } -*/ | bgp_proto NEXT HOP SELF ';' { BGP_CFG->next_hop_self = 1; } | bgp_proto MISSING LLADDR SELF ';' { BGP_CFG->missing_lladdr = MLL_SELF; } | bgp_proto MISSING LLADDR DROP ';' { BGP_CFG->missing_lladdr = MLL_DROP; } | bgp_proto MISSING LLADDR IGNORE ';' { BGP_CFG->missing_lladdr = MLL_IGNORE; } -/* | bgp_proto GATEWAY DIRECT ';' { BGP_CFG->gw_mode = GW_DIRECT; } | bgp_proto GATEWAY RECURSIVE ';' { BGP_CFG->gw_mode = GW_RECURSIVE; } -*/ | bgp_proto PATH METRIC bool ';' { BGP_CFG->compare_path_lengths = $4; } | bgp_proto MED METRIC bool ';' { BGP_CFG->med_metric = $4; } | bgp_proto IGP METRIC bool ';' { BGP_CFG->igp_metric = $4; } | bgp_proto PREFER OLDER bool ';' { BGP_CFG->prefer_older = $4; } -/* | bgp_proto DETERMINISTIC MED bool ';' { BGP_CFG->deterministic_med = $4; } -*/ | bgp_proto DEFAULT BGP_MED expr ';' { BGP_CFG->default_med = $4; } | bgp_proto DEFAULT BGP_LOCAL_PREF expr ';' { BGP_CFG->default_local_pref = $4; } | bgp_proto SOURCE ADDRESS ipa ';' { BGP_CFG->source_addr = $4; }