diff --git a/lib/lists.c b/lib/lists.c index 8f95c7c2..cabfddba 100644 --- a/lib/lists.c +++ b/lib/lists.c @@ -35,10 +35,10 @@ check_list(list *l, node *n) if (!l) { ASSERT_DIE(n); + ASSERT_DIE(n->prev); node *nn = n; - while (nn->prev) - nn = nn->prev; + do { nn = nn->prev; } while (nn->prev); l = SKIP_BACK(list, head_node, nn); } @@ -61,7 +61,7 @@ check_list(list *l, node *n) } ASSERT_DIE(cur == &(l->tail_node)); - ASSERT_DIE(!n || (seen == 1) || (n == &l->head_node) || (n == &l->tail_node)); + ASSERT_DIE(!n || (seen == 1)); return 1; } @@ -121,7 +121,7 @@ add_head(list *l, node *n) LIST_INLINE void insert_node(node *n, node *after) { - EXPENSIVE_CHECK(check_list(NULL, after)); + EXPENSIVE_CHECK((after->prev == NULL) || check_list(NULL, after)); ASSUME(n->prev == NULL); ASSUME(n->next == NULL); @@ -142,7 +142,7 @@ insert_node(node *n, node *after) LIST_INLINE void rem_node(node *n) { - EXPENSIVE_CHECK((n == n->prev) && (n == n->next) || check_list(NULL, n)); + EXPENSIVE_CHECK((n->prev == n) && (n->next == n) || check_list(NULL, n)); node *z = n->prev; node *x = n->next; diff --git a/lib/macro.h b/lib/macro.h index 24fc3393..8f5d4b0e 100644 --- a/lib/macro.h +++ b/lib/macro.h @@ -26,6 +26,8 @@ #define MACRO_DROP(...) #define MACRO_UNPAREN(...) __VA_ARGS__ #define MACRO_SEP(a, b, sep) a sep b +#define MACRO_STR(a) #a +#define MACRO_STR_AFTER(a) MACRO_STR(a) /* Aliases for some special chars */ #define MACRO_COMMA , diff --git a/lib/tlists.h b/lib/tlists.h index e1ed79ea..1437e17e 100644 --- a/lib/tlists.h +++ b/lib/tlists.h @@ -147,9 +147,14 @@ static inline void TLIST_NAME(rem_node)(TLIST_LIST_STRUCT *list, TLIST_TYPE *nod #error "You should first include lib/tlists.h without requesting a TLIST" #endif -#define TLIST_NODE(_name, _type) struct _name##_node { _type *next; _type *prev; } +#define TLIST_NODE_CONTENTS(_type) { _type *next; _type *prev; } +#define TLIST_NODE(_name, _type) struct _name##_node TLIST_NODE_CONTENTS(_type) +#define TLIST_DEFAULT_NODE struct MACRO_CONCAT_AFTER(TLIST_PREFIX,_node) \ + TLIST_NODE_CONTENTS(TLIST_TYPE) TLIST_ITEM + #define TLIST_LIST(_name) struct _name##_list + /* Use ->first and ->last to access HEAD and TAIL */ #define THEAD(_name, _list) (_list)->first #define TTAIL(_name, _list) (_list)->last diff --git a/misc/ips.c b/misc/ips.c index 467cc25d..ea8eef7f 100644 --- a/misc/ips.c +++ b/misc/ips.c @@ -60,10 +60,12 @@ main(int argc, char **argv) { uint i, e; if (scanf("%x/%d", &i, &e) != 2) + { if (feof(stdin)) break; - else - fprintf(stderr, "BUGGG\n"); + else + fprintf(stderr, "BUGGG\n"); + } // i >>= (32-e); // i |= (i >> e); cnt++; diff --git a/nest/iface.c b/nest/iface.c index fc896e26..c49ad95e 100644 --- a/nest/iface.c +++ b/nest/iface.c @@ -34,6 +34,9 @@ #include "conf/conf.h" #include "sysdep/unix/krt.h" + +static TLIST_LIST(ifsub) iface_sub_list; +static slab *iface_sub_slab; static pool *if_pool; list iface_list; @@ -140,13 +143,53 @@ if_copy(struct iface *to, struct iface *from) to->flags = from->flags | (to->flags & IF_TMP_DOWN); to->mtu = from->mtu; to->master_index = from->master_index; - to->master = from->master; + + if_unlink(to->master); + if_link(to->master = from->master); +} + +void +if_enqueue_notify_to(struct iface_notification x, struct iface_subscription *s) +{ + switch (x.type) { + case IFNOT_ADDRESS: + if (!s->ifa_notify) return; + ifa_link(x.a); + break; + case IFNOT_INTERFACE: + if (!s->if_notify) return; + if_link(x.i); + break; + case IFNOT_NEIGHBOR: + if (!s->neigh_notify) return; + neigh_link(x.n); + break; + default: + bug("Unknown interface notification type: %d", x.type); + } + + struct iface_notification *in = sl_alloc(iface_sub_slab); + *in = x; + + debug("Enqueue notify %d/%p (%p) to %p\n", x.type, x.a, in, s); + + ifnot_add_tail(&s->queue, in); + ev_schedule(&s->event); +} + +void +if_enqueue_notify(struct iface_notification x) +{ + WALK_TLIST(ifsub, s, &iface_sub_list) + if_enqueue_notify_to(x, s); } static inline void -ifa_send_notify(struct proto *p, unsigned c, struct ifa *a) +ifa_send_notify(struct iface_subscription *s, unsigned c, struct ifa *a) { - if (p->ifa_notify && + struct proto *p = SKIP_BACK(struct proto, iface_sub, s); + + if (s->ifa_notify && (p->proto_state != PS_DOWN) && (!p->vrf || p->vrf == a->iface->master)) { @@ -154,19 +197,21 @@ ifa_send_notify(struct proto *p, unsigned c, struct ifa *a) log(L_TRACE "%s < address %N on interface %s %s", p->name, &a->prefix, a->iface->name, (c & IF_CHANGE_UP) ? "added" : "removed"); - p->ifa_notify(p, c, a); + s->ifa_notify(p, c, a); } } static void ifa_notify_change_(unsigned c, struct ifa *a) { - struct proto *p; - DBG("IFA change notification (%x) for %s:%I\n", c, a->iface->name, a->ip); - WALK_LIST(p, proto_list) - ifa_send_notify(p, c, a); + if_enqueue_notify((struct iface_notification) { + .type = IFNOT_ADDRESS, + .a = a, + .flags = c, + }); + } static inline void @@ -182,9 +227,11 @@ ifa_notify_change(unsigned c, struct ifa *a) } static inline void -if_send_notify(struct proto *p, unsigned c, struct iface *i) +if_send_notify(struct iface_subscription *s, unsigned c, struct iface *i) { - if (p->if_notify && + struct proto *p = SKIP_BACK(struct proto, iface_sub, s); + + if (s->if_notify && (p->proto_state != PS_DOWN) && (!p->vrf || p->vrf == i->master)) { @@ -197,14 +244,13 @@ if_send_notify(struct proto *p, unsigned c, struct iface *i) (c & IF_CHANGE_PREFERRED) ? "changes preferred address" : (c & IF_CHANGE_CREATE) ? "created" : "sends unknown event"); - p->if_notify(p, c, i); + s->if_notify(p, c, i); } } static void if_notify_change(unsigned c, struct iface *i) { - struct proto *p; struct ifa *a; if (i->flags & IF_JUST_CREATED) @@ -225,8 +271,11 @@ if_notify_change(unsigned c, struct iface *i) WALK_LIST(a, i->addrs) ifa_notify_change_(IF_CHANGE_DOWN, a); - WALK_LIST(p, proto_list) - if_send_notify(p, c, i); + if_enqueue_notify((struct iface_notification) { + .type = IFNOT_INTERFACE, + .i = i, + .flags = c, + }); if (c & IF_CHANGE_UP) WALK_LIST(a, i->addrs) @@ -320,6 +369,7 @@ if_update(struct iface *new) new->llv6 = i->llv6; new->sysdep = i->sysdep; memcpy(&new->addrs, &i->addrs, sizeof(i->addrs)); + memcpy(&new->neighbors, &i->neighbors, sizeof(i->neighbors)); memcpy(i, new, sizeof(*i)); i->flags &= ~IF_UP; /* IF_TMP_DOWN will be added later */ goto newif; @@ -334,9 +384,10 @@ if_update(struct iface *new) } i = mb_alloc(if_pool, sizeof(struct iface)); memcpy(i, new, sizeof(*i)); + if_link(i->master); init_list(&i->addrs); -newif: init_list(&i->neighbors); +newif: i->flags |= IF_UPDATED | IF_TMP_DOWN; /* Tmp down as we don't have addresses yet */ add_tail(&iface_list, &i->n); return i; @@ -387,39 +438,117 @@ if_end_update(void) } void -if_flush_ifaces(struct proto *p) +if_link(struct iface *i) { - if (p->debug & D_EVENTS) - log(L_TRACE "%s: Flushing interfaces", p->name); - if_start_update(); - if_end_update(); + if (i) + i->uc++; } +void +if_unlink(struct iface *i) +{ + if (i) + i->uc--; + /* TODO: Do some interface object cleanup */ +} + +static void +iface_notify_hook(void *_s) +{ + struct iface_subscription *s = _s; + + while (!EMPTY_TLIST(ifnot, &s->queue)) + { + struct iface_notification *n = THEAD(ifnot, &s->queue); + debug("Process notify %d/%p (%p) to %p\n", n->type, n->a, n, s); + switch (n->type) { + case IFNOT_ADDRESS: + ifa_send_notify(s, n->flags, n->a); + ifa_unlink(n->a); + break; + case IFNOT_INTERFACE: + if_send_notify(s, n->flags, n->i); + if_unlink(n->i); + break; + case IFNOT_NEIGHBOR: + s->neigh_notify(n->n); + neigh_unlink(n->n); + break; + default: + bug("Bad interface notification type: %d", n->type); + } + + ifnot_rem_node(&s->queue, n); + sl_free(n); + } +} + + /** - * if_feed_baby - advertise interfaces to a new protocol - * @p: protocol to feed + * iface_subscribe - request interface updates + * @s: subscription structure * * When a new protocol starts, this function sends it a series * of notifications about all existing interfaces. */ void -if_feed_baby(struct proto *p) +iface_subscribe(struct iface_subscription *s) { - struct iface *i; - struct ifa *a; + ifsub_add_tail(&iface_sub_list, s); + s->event = (event) { + .hook = iface_notify_hook, + .data = s, + }; - if (!p->if_notify && !p->ifa_notify) /* shortcut */ + if (!s->if_notify && !s->ifa_notify) /* shortcut */ return; + + struct iface *i; DBG("Announcing interfaces to new protocol %s\n", p->name); WALK_LIST(i, iface_list) { - if_send_notify(p, IF_CHANGE_CREATE | ((i->flags & IF_UP) ? IF_CHANGE_UP : 0), i); + if_send_notify(s, IF_CHANGE_CREATE | ((i->flags & IF_UP) ? IF_CHANGE_UP : 0), i); + + struct ifa *a; if (i->flags & IF_UP) WALK_LIST(a, i->addrs) - ifa_send_notify(p, IF_CHANGE_CREATE | IF_CHANGE_UP, a); + ifa_send_notify(s, IF_CHANGE_CREATE | IF_CHANGE_UP, a); } } +/** + * iface_unsubscribe - unsubscribe from interface updates + * @s: subscription structure + */ +void +iface_unsubscribe(struct iface_subscription *s) +{ + ifsub_rem_node(&iface_sub_list, s); + ev_postpone(&s->event); + + WALK_TLIST_DELSAFE(ifnot, n, &s->queue) + { + debug("Drop notify %d/%p (%p) to %p\n", n->type, n->a, n, s); + switch (n->type) + { + case IFNOT_ADDRESS: + ifa_unlink(n->a); + break; + case IFNOT_INTERFACE: + if_unlink(n->i); + break; + case IFNOT_NEIGHBOR: + neigh_unlink(n->n); + break; + default: + bug("Bad interface notification type: %d", n->type); + } + + ifnot_rem_node(&s->queue, n); + sl_free(n); + } +} + /** * if_find_by_index - find interface by ifindex * @idx: ifindex @@ -609,6 +738,8 @@ ifa_update(struct ifa *a) b = mb_alloc(if_pool, sizeof(struct ifa)); memcpy(b, a, sizeof(struct ifa)); + ifa_link(b); + if_link(i); add_tail(&i->addrs, &b->n); b->flags |= IA_UPDATED; @@ -655,11 +786,36 @@ ifa_delete(struct ifa *a) if (i->flags & IF_UP) ifa_notify_change(IF_CHANGE_DOWN, b); - mb_free(b); + ifa_unlink(b); return; } } +void ifa_link(struct ifa *a) +{ + if (a) + { + debug("ifa_link: %p %d\n", a, a->uc); + a->uc++; + } +} + +void ifa_unlink(struct ifa *a) +{ + if (!a) + return; + + debug("ifa_unlink: %p %d\n", a, a->uc); + if (--a->uc) + return; + + if_unlink(a->iface); +#if DEBUGGING + memset(a, 0x5b, sizeof(struct ifa)); +#endif + mb_free(a); +} + u32 if_choose_router_id(struct iface_patt *mask, u32 old_id) { @@ -715,6 +871,7 @@ if_init(void) { if_pool = rp_new(&root_pool, "Interfaces"); init_list(&iface_list); + iface_sub_slab = sl_new(if_pool, sizeof(struct iface_notification)); strcpy(default_vrf.name, "default"); neigh_init(if_pool); } diff --git a/nest/iface.h b/nest/iface.h index 13f3bd12..a3f4f30a 100644 --- a/nest/iface.h +++ b/nest/iface.h @@ -9,7 +9,9 @@ #ifndef _BIRD_IFACE_H_ #define _BIRD_IFACE_H_ +#include "lib/event.h" #include "lib/lists.h" +#include "lib/tlists.h" #include "lib/ip.h" extern list iface_list; @@ -26,6 +28,7 @@ struct ifa { /* Interface address */ ip_addr opposite; /* Opposite end of a point-to-point link */ unsigned scope; /* Interface address scope */ unsigned flags; /* Analogous to iface->flags */ + unsigned uc; /* Use (link) count */ }; extern struct iface default_vrf; @@ -44,6 +47,7 @@ struct iface { struct ifa *llv6; /* Primary link-local address for IPv6 */ ip4_addr sysdep; /* Arbitrary IPv4 address for internal sysdep use */ list neighbors; /* All neighbors on this interface */ + unsigned uc; /* Use (link) count */ }; #define IF_UP 1 /* Currently just IF_ADMIN_UP */ @@ -114,19 +118,22 @@ void ifa_delete(struct ifa *); void if_start_update(void); void if_end_partial_update(struct iface *); void if_end_update(void); -void if_flush_ifaces(struct proto *p); -void if_feed_baby(struct proto *); struct iface *if_find_by_index(unsigned); struct iface *if_find_by_name(const char *); struct iface *if_get_by_name(const char *); void if_recalc_all_preferred_addresses(void); +void if_link(struct iface *); +void if_unlink(struct iface *); +void ifa_link(struct ifa *); +void ifa_unlink(struct ifa *); /* The Neighbor Cache */ typedef struct neighbor { node n; /* Node in neighbor hash table chain */ node if_n; /* Node in per-interface neighbor list */ + TLIST_NODE(proto_neigh, struct neighbor) proto_n; ip_addr addr; /* Address of the neighbor */ struct ifa *ifa; /* Ifa on related iface */ struct iface *iface; /* Interface it's connected to */ @@ -137,8 +144,16 @@ typedef struct neighbor { u16 flags; /* NEF_* flags */ s16 scope; /* Address scope, -1 for unreachable neighbors, SCOPE_HOST when it's our own address */ + uint uc; /* Use (link) count */ } neighbor; +#define TLIST_PREFIX proto_neigh +#define TLIST_TYPE struct neighbor +#define TLIST_ITEM proto_n +#define TLIST_WANT_WALK +#define TLIST_WANT_ADD_TAIL +#include "lib/tlists.h" + #define NEF_STICKY 1 #define NEF_ONLINK 2 #define NEF_IFACE 4 /* Entry for whole iface */ @@ -148,7 +163,7 @@ neighbor *neigh_find(struct proto *p, ip_addr a, struct iface *ifa, uint flags); void neigh_dump(neighbor *); void neigh_dump_all(void); -void neigh_prune(void); +void neigh_prune(struct proto *); void neigh_if_up(struct iface *); void neigh_if_down(struct iface *); void neigh_if_link(struct iface *); @@ -156,6 +171,63 @@ void neigh_ifa_up(struct ifa *a); void neigh_ifa_down(struct ifa *a); void neigh_init(struct pool *); +void neigh_link(neighbor *); +void neigh_unlink(neighbor *); + +/* + * Notification mechanism + */ + +#define TLIST_PREFIX ifnot +#define TLIST_TYPE struct iface_notification +#define TLIST_ITEM nn +#define TLIST_WANT_WALK +#define TLIST_WANT_ADD_TAIL + +struct iface_notification { + TLIST_DEFAULT_NODE; + enum { + IFNOT_INVALID, + IFNOT_ADDRESS, + IFNOT_INTERFACE, + IFNOT_NEIGHBOR, + } type; + unsigned flags; + union { + struct ifa *a; + struct iface *i; + neighbor *n; + }; +}; + +#include "lib/tlists.h" + +#define TLIST_PREFIX ifsub +#define TLIST_TYPE struct iface_subscription +#define TLIST_ITEM n +#define TLIST_WANT_WALK +#define TLIST_WANT_ADD_TAIL + +struct iface_subscription { + TLIST_DEFAULT_NODE; + + event event; + TLIST_LIST(ifnot) queue; + + void (*if_notify)(struct proto *, unsigned flags, struct iface *i); + void (*ifa_notify)(struct proto *, unsigned flags, struct ifa *a); + void (*neigh_notify)(struct neighbor *neigh); +}; + +#include "lib/tlists.h" + +void if_enqueue_notify(struct iface_notification); +void if_enqueue_notify_to(struct iface_notification x, struct iface_subscription *s); + +void iface_flush_notifications(struct iface_subscription *); +void iface_subscribe(struct iface_subscription *); +void iface_unsubscribe(struct iface_subscription *); + /* * Interface Pattern Lists */ diff --git a/nest/neighbor.c b/nest/neighbor.c index 81da24d5..88ac2860 100644 --- a/nest/neighbor.c +++ b/nest/neighbor.c @@ -256,14 +256,17 @@ neigh_find(struct proto *p, ip_addr a, struct iface *iface, uint flags) n = sl_allocz(neigh_slab); add_tail(&neigh_hash_table[h], &n->n); add_tail((scope >= 0) ? &iface->neighbors : &sticky_neigh_list, &n->if_n); + proto_neigh_add_tail(&p->neighbors, n); n->addr = a; - n->ifa = addr; - n->iface = iface; - n->ifreq = ifreq; + ifa_link(n->ifa = addr); + if_link(n->iface = iface); + if_link(n->ifreq = ifreq); n->proto = p; n->flags = flags; n->scope = scope; + neigh_link(n); + return n; } @@ -308,19 +311,20 @@ neigh_dump_all(void) static inline void neigh_notify(neighbor *n) { - if (n->proto->neigh_notify && (n->proto->proto_state != PS_STOP)) - n->proto->neigh_notify(n); + if_enqueue_notify_to((struct iface_notification) { .type = IFNOT_NEIGHBOR, .n = n, }, &n->proto->iface_sub); } static void neigh_up(neighbor *n, struct iface *i, struct ifa *a, int scope) { DBG("Waking up sticky neighbor %I\n", n->addr); - n->iface = i; - n->ifa = a; + if_link(n->iface = i); + ifa_link(n->ifa = a); + n->scope = scope; - rem_node(&n->if_n); + rem_node(&n->if_n); /* HACK: Here the neighbor is always in the sticky list, + regardless whether it is sticky or not */ add_tail(&i->neighbors, &n->if_n); neigh_notify(n); @@ -330,21 +334,48 @@ static void neigh_down(neighbor *n) { DBG("Flushing neighbor %I on %s\n", n->addr, n->iface->name); - n->iface = NULL; - n->ifa = NULL; + n->scope = -1; rem_node(&n->if_n); add_tail(&sticky_neigh_list, &n->if_n); + ifa_unlink(n->ifa); + n->ifa = NULL; + + if_unlink(n->iface); + n->iface = NULL; + neigh_notify(n); } -static inline void -neigh_free(neighbor *n) +void +neigh_link(neighbor *n) { + n->uc++; +} + +void +neigh_unlink(neighbor *n) +{ + if (--n->uc) + return; + + struct proto *p = n->proto; + proto_neigh_rem_node(&p->neighbors, n); + + if ((p->proto_state == PS_DOWN) && EMPTY_TLIST(proto_neigh, &p->neighbors)) + ev_schedule(p->event); + + n->proto = NULL; + rem_node(&n->n); rem_node(&n->if_n); + + ifa_unlink(n->ifa); + if_unlink(n->iface); + if_unlink(n->ifreq); + sl_free(n); } @@ -394,7 +425,8 @@ neigh_update(neighbor *n, struct iface *iface) { if (ifa != n->ifa) { - n->ifa = ifa; + ifa_unlink(n->ifa); + ifa_link(n->ifa = ifa); neigh_notify(n); } @@ -408,7 +440,7 @@ neigh_update(neighbor *n, struct iface *iface) if ((n->scope < 0) && !(n->flags & NEF_STICKY)) { - neigh_free(n); + neigh_unlink(n); return; } @@ -519,15 +551,6 @@ neigh_ifa_down(struct ifa *a) neigh_update(n, i); } -static inline void -neigh_prune_one(neighbor *n) -{ - if (n->proto->proto_state != PS_DOWN) - return; - - neigh_free(n); -} - /** * neigh_prune - prune neighbor cache * @@ -536,16 +559,12 @@ neigh_prune_one(neighbor *n) * is shut down to get rid of all its heritage. */ void -neigh_prune(void) +neigh_prune(struct proto *p) { - neighbor *n; - node *m; - int i; + WALK_TLIST_DELSAFE(proto_neigh, n, &p->neighbors) + neigh_unlink(n); - DBG("Pruning neighbors\n"); - for(i=0; ineighbors)); } /** diff --git a/nest/proto.c b/nest/proto.c index bec30560..6e4b7d29 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -1134,6 +1134,8 @@ proto_configure_channel(struct proto *p, struct channel **pc, struct channel_con static void proto_cleanup(struct proto *p) { + CALL(p->proto->cleanup, p); + rfree(p->pool); p->pool = NULL; @@ -1162,8 +1164,8 @@ proto_event(void *ptr) if (p->do_stop) { - if (p->proto == &proto_unix_iface) - if_flush_ifaces(p); + iface_unsubscribe(&p->iface_sub); + neigh_prune(p); p->do_stop = 0; } @@ -2053,7 +2055,7 @@ proto_do_start(struct proto *p) p->sources.class = &default_rte_owner_class; if (!p->cf->late_if_feed) - if_feed_baby(p); + iface_subscribe(&p->iface_sub); } static void @@ -2066,7 +2068,7 @@ proto_do_up(struct proto *p) proto_start_channels(p); if (p->cf->late_if_feed) - if_feed_baby(p); + iface_subscribe(&p->iface_sub); } static inline void @@ -2098,7 +2100,6 @@ static void proto_do_down(struct proto *p) { p->down_code = 0; - neigh_prune(); /* Shutdown is finished in the protocol event */ if (proto_is_done(p)) diff --git a/nest/protocol.h b/nest/protocol.h index 101e0926..fdd0373a 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -9,9 +9,10 @@ #ifndef _BIRD_PROTOCOL_H_ #define _BIRD_PROTOCOL_H_ -#include "lib/lists.h" +#include "lib/tlists.h" #include "lib/resource.h" #include "lib/event.h" +#include "nest/iface.h" #include "lib/settle.h" #include "nest/rt.h" #include "nest/limit.h" @@ -59,6 +60,7 @@ struct protocol { void (*dump)(struct proto *); /* Debugging dump */ int (*start)(struct proto *); /* Start the instance */ int (*shutdown)(struct proto *); /* Stop the instance */ + void (*cleanup)(struct proto *); /* Cleanup the instance right before tearing it all down */ void (*get_status)(struct proto *, byte *buf); /* Get instance status (for `show protocols' command) */ // int (*get_attr)(const struct eattr *, byte *buf, int buflen); /* ASCIIfy dynamic attribute (returns GA_*) */ void (*show_proto_info)(struct proto *); /* Show protocol info (for `show protocols all' command) */ @@ -128,8 +130,10 @@ struct proto { struct rte_src *main_source; /* Primary route source */ struct rte_owner sources; /* Route source owner structure */ struct iface *vrf; /* Related VRF instance, NULL if global */ + TLIST_LIST(proto_neigh) neighbors; /* List of neighbor structures */ + struct iface_subscription iface_sub; /* Interface notification subscription */ - const char *name; /* Name of this instance (== cf->name) */ + const char *name; /* Name of this instance (== cf->name) */ u32 debug; /* Debugging flags */ u32 mrtdump; /* MRTDump flags */ uint active_channels; /* Number of active channels */ @@ -167,10 +171,7 @@ struct proto { * feed_end Notify channel about finish of route feeding. */ - void (*if_notify)(struct proto *, unsigned flags, struct iface *i); - void (*ifa_notify)(struct proto *, unsigned flags, struct ifa *a); void (*rt_notify)(struct proto *, struct channel *, const net_addr *net, struct rte *new, const struct rte *old); - void (*neigh_notify)(struct neighbor *neigh); int (*preexport)(struct channel *, struct rte *rt); void (*reload_routes)(struct channel *); void (*feed_begin)(struct channel *, int initial); @@ -337,7 +338,13 @@ void proto_notify_state(struct proto *p, unsigned state); */ static inline int proto_is_inactive(struct proto *p) -{ return (p->active_channels == 0) && (p->active_loops == 0) && (p->sources.uc == 0); } +{ + return (p->active_channels == 0) + && (p->active_loops == 0) + && (p->sources.uc == 0) + && EMPTY_TLIST(proto_neigh, &p->neighbors) + ; +} /* diff --git a/nest/rt-dev.c b/nest/rt-dev.c index 4199e17c..8ae563b5 100644 --- a/nest/rt-dev.c +++ b/nest/rt-dev.c @@ -145,8 +145,8 @@ dev_init(struct proto_config *CF) proto_configure_channel(P, &p->ip4_channel, cf->ip4_channel); proto_configure_channel(P, &p->ip6_channel, cf->ip6_channel); - P->if_notify = dev_if_notify; - P->ifa_notify = dev_ifa_notify; + P->iface_sub.if_notify = dev_if_notify; + P->iface_sub.ifa_notify = dev_ifa_notify; return P; } diff --git a/proto/babel/babel.c b/proto/babel/babel.c index 4db7c66f..c2de7599 100644 --- a/proto/babel/babel.c +++ b/proto/babel/babel.c @@ -2438,7 +2438,7 @@ babel_init(struct proto_config *CF) proto_configure_channel(P, &p->ip4_channel, cf->ip4_channel); proto_configure_channel(P, &p->ip6_channel, cf->ip6_channel); - P->if_notify = babel_if_notify; + P->iface_sub.if_notify = babel_if_notify; P->rt_notify = babel_rt_notify; P->preexport = babel_preexport; diff --git a/proto/bfd/bfd.c b/proto/bfd/bfd.c index 25ff19ac..575ebc3c 100644 --- a/proto/bfd/bfd.c +++ b/proto/bfd/bfd.c @@ -994,7 +994,7 @@ bfd_init(struct proto_config *c) { struct proto *p = proto_new(c); - p->neigh_notify = bfd_neigh_notify; + p->iface_sub.neigh_notify = bfd_neigh_notify; return p; } diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index c74b8273..f350c8ca 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -127,7 +127,11 @@ #include "bgp.h" +static void bgp_listen_create(void *); + static list STATIC_LIST_INIT(bgp_sockets); /* Global list of listening sockets */ +static list STATIC_LIST_INIT(bgp_listen_pending); /* Global list of listening socket open requests */ +static event bgp_listen_event = { .hook = bgp_listen_create }; static void bgp_connect(struct bgp_proto *p); @@ -139,76 +143,36 @@ static void bgp_update_bfd(struct bgp_proto *p, const struct bfd_options *bfd); static int bgp_incoming_connection(sock *sk, uint dummy UNUSED); static void bgp_listen_sock_err(sock *sk UNUSED, int err); +static void bgp_initiate_disable(struct bgp_proto *p, int err_val); static void bgp_graceful_restart_feed(struct bgp_channel *c); -/** - * bgp_open - open a BGP instance - * @p: BGP instance - * - * This function allocates and configures shared BGP resources, mainly listening - * sockets. Should be called as the last step during initialization (when lock - * is acquired and neighbor is ready). When error, caller should change state to - * PS_DOWN and return immediately. - */ -static int -bgp_open(struct bgp_proto *p) +static inline int +bgp_setup_auth(struct bgp_proto *p, int enable) { - struct bgp_socket *bs = NULL; - struct iface *ifa = p->cf->strict_bind ? p->cf->iface : NULL; - ip_addr addr = p->cf->strict_bind ? p->cf->local_ip : - (p->ipv4 ? IPA_NONE4 : IPA_NONE6); - uint port = p->cf->local_port; - uint flags = p->cf->free_bind ? SKF_FREEBIND : 0; - uint flag_mask = SKF_FREEBIND; + if (p->cf->password && p->listen.sock) + { + ip_addr prefix = p->cf->remote_ip; + int pxlen = -1; - /* We assume that cf->iface is defined iff cf->local_ip is link-local */ - - WALK_LIST(bs, bgp_sockets) - if (ipa_equal(bs->sk->saddr, addr) && - (bs->sk->sport == port) && - (bs->sk->iface == ifa) && - (bs->sk->vrf == p->p.vrf) && - ((bs->sk->flags & flag_mask) == flags)) + if (p->cf->remote_range) { - bs->uc++; - p->sock = bs; - return 0; + prefix = net_prefix(p->cf->remote_range); + pxlen = net_pxlen(p->cf->remote_range); } - sock *sk = sk_new(proto_pool); - sk->type = SK_TCP_PASSIVE; - sk->ttl = 255; - sk->saddr = addr; - sk->sport = port; - sk->iface = ifa; - sk->vrf = p->p.vrf; - sk->flags = flags; - sk->tos = IP_PREC_INTERNET_CONTROL; - sk->rbsize = BGP_RX_BUFFER_SIZE; - sk->tbsize = BGP_TX_BUFFER_SIZE; - sk->rx_hook = bgp_incoming_connection; - sk->err_hook = bgp_listen_sock_err; + int rv = sk_set_md5_auth(p->listen.sock->sk, + p->cf->local_ip, prefix, pxlen, p->cf->iface, + enable ? p->cf->password : NULL, p->cf->setkey); - if (sk_open(sk) < 0) - goto err; + if (rv < 0) + sk_log_error(p->listen.sock->sk, p->p.name); - bs = mb_allocz(proto_pool, sizeof(struct bgp_socket)); - bs->sk = sk; - bs->uc = 1; - p->sock = bs; - sk->data = bs; - - add_tail(&bgp_sockets, &bs->n); - - return 0; - -err: - sk_log_error(sk, p->p.name); - log(L_ERR "%s: Cannot open listening socket", p->p.name); - rfree(sk); - return -1; + return rv; + } + else + return 0; } /** @@ -220,43 +184,119 @@ err: static void bgp_close(struct bgp_proto *p) { - struct bgp_socket *bs = p->sock; + struct bgp_listen_request *req = &p->listen; + struct bgp_socket *bs = req->sock; - ASSERT(bs && bs->uc); + ASSERT(bs); - if (--bs->uc) - return; + req->sock = NULL; + rem_node(&req->n); - rfree(bs->sk); - rem_node(&bs->n); - mb_free(bs); + if (EMPTY_LIST(bs->requests)) + ev_schedule(&bgp_listen_event); } -static inline int -bgp_setup_auth(struct bgp_proto *p, int enable) +/** + * bgp_open - open a BGP instance + * @p: BGP instance + * + * This function allocates and configures shared BGP resources, mainly listening + * sockets. Should be called as the last step during initialization (when lock + * is acquired and neighbor is ready). When error, caller should change state to + * PS_DOWN and return immediately. + */ +static void +bgp_open(struct bgp_proto *p) { - if (p->cf->password) - { - ip_addr prefix = p->cf->remote_ip; - int pxlen = -1; + struct bgp_listen_request *req = &p->listen; + /* We assume that cf->iface is defined iff cf->local_ip is link-local */ + req->iface = p->cf->strict_bind ? p->cf->iface : NULL; + req->vrf = p->p.vrf; + req->addr = p->cf->strict_bind ? p->cf->local_ip : + (p->ipv4 ? IPA_NONE4 : IPA_NONE6); + req->port = p->cf->local_port; + req->flags = p->cf->free_bind ? SKF_FREEBIND : 0; - if (p->cf->remote_range) + add_tail(&bgp_listen_pending, &req->n); + ev_schedule(&bgp_listen_event); +} + +static void +bgp_listen_create(void *_ UNUSED) +{ + uint flag_mask = SKF_FREEBIND; + + struct bgp_listen_request *req; + WALK_LIST_FIRST(req, bgp_listen_pending) + { + struct bgp_proto *p = SKIP_BACK(struct bgp_proto, listen, req); + rem_node(&req->n); + + /* First try to find existing socket */ + struct bgp_socket *bs; + WALK_LIST(bs, bgp_sockets) + if (ipa_equal(bs->sk->saddr, req->addr) && + (bs->sk->sport == req->port) && + (bs->sk->iface == req->iface) && + (bs->sk->vrf == req->vrf) && + ((bs->sk->flags & flag_mask) == req->flags)) + break; + + /* Not found any */ + if (!NODE_VALID(bs)) { - prefix = net_prefix(p->cf->remote_range); - pxlen = net_pxlen(p->cf->remote_range); + sock *sk = sk_new(proto_pool); + sk->type = SK_TCP_PASSIVE; + sk->ttl = 255; + sk->saddr = req->addr; + sk->sport = req->port; + sk->iface = req->iface; + sk->vrf = req->vrf; + sk->flags = req->flags; + sk->tos = IP_PREC_INTERNET_CONTROL; + sk->rbsize = BGP_RX_BUFFER_SIZE; + sk->tbsize = BGP_TX_BUFFER_SIZE; + sk->rx_hook = bgp_incoming_connection; + sk->err_hook = bgp_listen_sock_err; + + if (sk_open(sk) < 0) + { + sk_log_error(sk, p->p.name); + log(L_ERR "%s: Cannot open listening socket", p->p.name); + rfree(sk); + bgp_initiate_disable(p, BEM_NO_SOCKET); + + continue; + } + + bs = mb_allocz(proto_pool, sizeof(struct bgp_socket)); + bs->sk = sk; + sk->data = bs; + + init_list(&bs->requests); + add_tail(&bgp_sockets, &bs->n); } - int rv = sk_set_md5_auth(p->sock->sk, - p->cf->local_ip, prefix, pxlen, p->cf->iface, - enable ? p->cf->password : NULL, p->cf->setkey); + add_tail(&bs->requests, &req->n); + req->sock = bs; - if (rv < 0) - sk_log_error(p->sock->sk, p->p.name); - - return rv; + if (bgp_setup_auth(p, 1) < 0) + { + bgp_close(p); + bgp_initiate_disable(p, BEM_INVALID_MD5); + } } - else - return 0; + + /* Cleanup leftover listening sockets */ + struct bgp_socket *bs; + node *nxt; + WALK_LIST_DELSAFE(bs, nxt, bgp_sockets) + if (EMPTY_LIST(bs->requests)) + { + rfree(bs->sk); + rem_node(&bs->n); + mb_free(bs); + } } static inline struct bgp_channel * @@ -299,13 +339,7 @@ bgp_startup_timeout(timer *t) static void bgp_initiate(struct bgp_proto *p) { - int err_val; - - if (bgp_open(p) < 0) - { err_val = BEM_NO_SOCKET; goto err1; } - - if (bgp_setup_auth(p, 1) < 0) - { err_val = BEM_INVALID_MD5; goto err2; } + bgp_open(p); if (p->cf->bfd) bgp_update_bfd(p, p->cf->bfd); @@ -318,12 +352,11 @@ bgp_initiate(struct bgp_proto *p) } else bgp_startup(p); +} - return; - -err2: - bgp_close(p); -err1: +static void +bgp_initiate_disable(struct bgp_proto *p, int err_val) +{ p->p.disabled = 1; bgp_store_error(p, NULL, BE_MISC, err_val); @@ -1199,12 +1232,15 @@ static struct bgp_proto * bgp_find_proto(sock *sk) { struct bgp_proto *best = NULL; - struct bgp_proto *p; + struct bgp_socket *bs = sk->data; + struct bgp_listen_request *req; /* sk->iface is valid only if src or dst address is link-local */ int link = ipa_is_link_local(sk->saddr) || ipa_is_link_local(sk->daddr); - WALK_LIST(p, proto_list) + WALK_LIST(req, bs->requests) + { + struct bgp_proto *p = SKIP_BACK(struct bgp_proto, listen, req); if ((p->p.proto == &proto_bgp) && (ipa_equal(p->remote_ip, sk->daddr) || bgp_is_dynamic(p)) && (!p->cf->remote_range || ipa_in_netX(sk->daddr, p->cf->remote_range)) && @@ -1218,6 +1254,7 @@ bgp_find_proto(sock *sk) if (!bgp_is_dynamic(p)) break; } + } return best; } @@ -1757,7 +1794,7 @@ bgp_init(struct proto_config *CF) P->rt_notify = bgp_rt_notify; P->preexport = bgp_preexport; - P->neigh_notify = bgp_neigh_notify; + P->iface_sub.neigh_notify = bgp_neigh_notify; P->reload_routes = bgp_reload_routes; P->feed_begin = bgp_feed_begin; P->feed_end = bgp_feed_end; diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index 6402921a..acd46f00 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -266,8 +266,8 @@ struct bgp_caps { struct bgp_socket { node n; /* Node in global bgp_sockets */ + list requests; /* Listen requests */ sock *sk; /* Real listening socket */ - u32 uc; /* Use count */ }; struct bgp_stats { @@ -302,6 +302,16 @@ struct bgp_conn { uint hold_time, keepalive_time; /* Times calculated from my and neighbor's requirements */ }; +struct bgp_listen_request { + node n; /* Node in bgp_socket / pending list */ + struct bgp_socket *sock; /* Assigned socket */ + ip_addr addr; + struct iface *iface; + struct iface *vrf; + uint port; + uint flags; +}; + struct bgp_proto { struct proto p; const struct bgp_config *cf; /* Shortcut to BGP configuration */ @@ -333,7 +343,7 @@ struct bgp_proto { struct bgp_conn incoming_conn; /* Incoming connection we have neither accepted nor rejected yet */ struct object_lock *lock; /* Lock for neighbor connection */ struct neighbor *neigh; /* Neighbor entry corresponding to remote ip, NULL if multihop */ - struct bgp_socket *sock; /* Shared listening socket */ + struct bgp_listen_request listen; /* Shared listening socket */ struct bfd_request *bfd_req; /* BFD request, if BFD is used */ struct birdsock *postponed_sk; /* Postponed incoming socket for dynamic BGP */ event *uncork_ev; /* Uncork event in case of congestion */ diff --git a/proto/ospf/ospf.c b/proto/ospf/ospf.c index 4e29f960..54d67fd6 100644 --- a/proto/ospf/ospf.c +++ b/proto/ospf/ospf.c @@ -371,8 +371,8 @@ ospf_init(struct proto_config *CF) P->main_channel = proto_add_channel(P, proto_cf_main_channel(CF)); P->rt_notify = ospf_rt_notify; - P->if_notify = ospf_if_notify; - P->ifa_notify = cf->ospf2 ? ospf_ifa_notify2 : ospf_ifa_notify3; + P->iface_sub.if_notify = ospf_if_notify; + P->iface_sub.ifa_notify = cf->ospf2 ? ospf_ifa_notify2 : ospf_ifa_notify3; P->preexport = ospf_preexport; P->reload_routes = ospf_reload_routes; P->feed_begin = ospf_feed_begin; diff --git a/proto/perf/perf.c b/proto/perf/perf.c index 9adafe5a..dc5bbf2f 100644 --- a/proto/perf/perf.c +++ b/proto/perf/perf.c @@ -272,7 +272,7 @@ perf_init(struct proto_config *CF) switch (p->mode) { case PERF_MODE_IMPORT: - P->ifa_notify = perf_ifa_notify; + P->iface_sub.ifa_notify = perf_ifa_notify; break; case PERF_MODE_EXPORT: P->rt_notify = perf_rt_notify; diff --git a/proto/radv/radv.c b/proto/radv/radv.c index b7c8d7be..a23b8945 100644 --- a/proto/radv/radv.c +++ b/proto/radv/radv.c @@ -583,8 +583,8 @@ radv_init(struct proto_config *CF) P->preexport = radv_preexport; P->rt_notify = radv_rt_notify; - P->if_notify = radv_if_notify; - P->ifa_notify = radv_ifa_notify; + P->iface_sub.if_notify = radv_if_notify; + P->iface_sub.ifa_notify = radv_ifa_notify; return P; } diff --git a/proto/rip/rip.c b/proto/rip/rip.c index b3a4e81e..e9aaf7b1 100644 --- a/proto/rip/rip.c +++ b/proto/rip/rip.c @@ -1156,9 +1156,9 @@ rip_init(struct proto_config *CF) P->main_channel = proto_add_channel(P, proto_cf_main_channel(CF)); - P->if_notify = rip_if_notify; + P->iface_sub.if_notify = rip_if_notify; P->rt_notify = rip_rt_notify; - P->neigh_notify = rip_neigh_notify; + P->iface_sub.neigh_notify = rip_neigh_notify; P->reload_routes = rip_reload_routes; P->sources.class = &rip_rte_owner_class; diff --git a/proto/static/static.c b/proto/static/static.c index 42fd20b7..82fbfe7a 100644 --- a/proto/static/static.c +++ b/proto/static/static.c @@ -460,7 +460,7 @@ static_init(struct proto_config *CF) P->main_channel = proto_add_channel(P, proto_cf_main_channel(CF)); - P->neigh_notify = static_neigh_notify; + P->iface_sub.neigh_notify = static_neigh_notify; P->reload_routes = static_reload_routes; P->sources.class = &static_rte_owner_class; diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c index b3681c9c..fdfaa2d4 100644 --- a/sysdep/unix/krt.c +++ b/sysdep/unix/krt.c @@ -163,6 +163,15 @@ kif_shutdown(struct proto *P) return PS_DOWN; } +static void +kif_cleanup(struct proto *p) +{ + if (p->debug & D_EVENTS) + log(L_TRACE "%s: Flushing interfaces", p->name); + if_start_update(); + if_end_update(); +} + static int kif_reconfigure(struct proto *p, struct proto_config *new) { @@ -238,6 +247,7 @@ struct protocol proto_unix_iface = { .init = kif_init, .start = kif_start, .shutdown = kif_shutdown, + .cleanup = kif_cleanup, .reconfigure = kif_reconfigure, .copy_config = kif_copy_config }; @@ -845,7 +855,7 @@ krt_init(struct proto_config *CF) p->p.preexport = krt_preexport; p->p.rt_notify = krt_rt_notify; - p->p.if_notify = krt_if_notify; + p->p.iface_sub.if_notify = krt_if_notify; p->p.reload_routes = krt_reload_routes; p->p.feed_end = krt_feed_end;