mirror of
https://gitlab.nic.cz/labs/bird.git
synced 2024-11-18 17:18:42 +00:00
Route refresh in tables uses a stale counter.
Until now, we were marking routes as REF_STALE and REF_DISCARD to cleanup old routes after route refresh. This needed a synchronous route table walk at both beginning and the end of route refresh routine, marking the routes by the flags. We avoid these walks by using a stale counter. Every route contains: u8 stale_cycle; Every import hook contains: u8 stale_set; u8 stale_valid; u8 stale_pruned; u8 stale_pruning; In base_state, stale_set == stale_valid == stale_pruned == stale_pruning and all routes' stale_cycle also have the same value. The route refresh looks like follows: + ----------- + --------- + ----------- + ------------- + ------------ + | | stale_set | stale_valid | stale_pruning | stale_pruned | | Base | x | x | x | x | | Begin | x+1 | x | x | x | ... now routes are being inserted with stale_cycle == (x+1) | End | x+1 | x+1 | x | x | ... now table pruning routine is scheduled | Prune begin | x+1 | x+1 | x+1 | x | ... now routes with stale_cycle not between stale_set and stale_valid are deleted | Prune end | x+1 | x+1 | x+1 | x+1 | + ----------- + --------- + ----------- + ------------- + ------------ + The pruning routine is asynchronous and may have high latency in high-load environments. Therefore, multiple route refresh requests may happen before the pruning routine starts, leading to this situation: | Prune begin | x+k | x+k | x -> x+k | x | ... or even | Prune begin | x+k+1 | x+k | x -> x+k | x | ... if the prune event starts while another route refresh is running. In such a case, the pruning routine still deletes routes not fitting between stale_set and and stale_valid, effectively pruning the remnants of all unpruned route refreshes from before: | Prune end | x+k | x+k | x+k | x+k | In extremely rare cases, there may happen too many route refreshes before any route prune routine finishes. If the difference between stale_valid and stale_pruned becomes more than 128 when requesting for another route refresh, the routine walks the table synchronously and resets all the stale values to a base state, while logging a warning.
This commit is contained in:
parent
4ef2262bd5
commit
080cbd1219
@ -29,11 +29,10 @@ typedef struct rte {
|
|||||||
u8 generation; /* If this route import is based on other previously exported route,
|
u8 generation; /* If this route import is based on other previously exported route,
|
||||||
this value should be 1 + MAX(generation of the parent routes).
|
this value should be 1 + MAX(generation of the parent routes).
|
||||||
Otherwise the route is independent and this value is zero. */
|
Otherwise the route is independent and this value is zero. */
|
||||||
|
u8 stale_cycle; /* Auxiliary value for route refresh */
|
||||||
} rte;
|
} rte;
|
||||||
|
|
||||||
#define REF_FILTERED 2 /* Route is rejected by import filter */
|
#define REF_FILTERED 2 /* Route is rejected by import filter */
|
||||||
#define REF_STALE 4 /* Route is stale in a refresh cycle */
|
|
||||||
#define REF_DISCARD 8 /* Route is scheduled for discard */
|
|
||||||
#define REF_MODIFY 16 /* Route is scheduled for modify */
|
#define REF_MODIFY 16 /* Route is scheduled for modify */
|
||||||
#define REF_PENDING 32 /* Route has not propagated completely yet */
|
#define REF_PENDING 32 /* Route has not propagated completely yet */
|
||||||
|
|
||||||
|
@ -77,7 +77,11 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, int primary
|
|||||||
e->src->proto->name, tm, from, primary ? (sync_error ? " !" : " *") : "", info);
|
e->src->proto->name, tm, from, primary ? (sync_error ? " !" : " *") : "", info);
|
||||||
|
|
||||||
if (d->verbose)
|
if (d->verbose)
|
||||||
|
{
|
||||||
ea_show_list(c, a);
|
ea_show_list(c, a);
|
||||||
|
cli_printf(c, -1008, "\tInternal route handling values: %uL %uG %uS",
|
||||||
|
e->src->private_id, e->src->global_id, e->stale_cycle);
|
||||||
|
}
|
||||||
else if (dest == RTD_UNICAST)
|
else if (dest == RTD_UNICAST)
|
||||||
ea_show_nexthop_list(c, nhad);
|
ea_show_nexthop_list(c, nhad);
|
||||||
else if (had)
|
else if (had)
|
||||||
|
@ -1323,7 +1323,8 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr
|
|||||||
{
|
{
|
||||||
/* No changes, ignore the new route and refresh the old one */
|
/* No changes, ignore the new route and refresh the old one */
|
||||||
|
|
||||||
old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY);
|
old->flags &= ~REF_MODIFY;
|
||||||
|
old->stale_cycle = new->stale_cycle;
|
||||||
|
|
||||||
if (!rte_is_filtered(new))
|
if (!rte_is_filtered(new))
|
||||||
{
|
{
|
||||||
@ -1639,6 +1640,9 @@ rte_import(struct rt_import_request *req, const net_addr *n, rte *new, struct rt
|
|||||||
nn = net_get(hook->table, n);
|
nn = net_get(hook->table, n);
|
||||||
new->net = nn->n.addr;
|
new->net = nn->n.addr;
|
||||||
new->sender = hook;
|
new->sender = hook;
|
||||||
|
|
||||||
|
/* Set the stale cycle */
|
||||||
|
new->stale_cycle = hook->stale_set;
|
||||||
}
|
}
|
||||||
else if (!(nn = net_find(hook->table, n)))
|
else if (!(nn = net_find(hook->table, n)))
|
||||||
{
|
{
|
||||||
@ -1916,15 +1920,38 @@ rt_stop_export(struct rt_export_request *req, void (*stopped)(struct rt_export_r
|
|||||||
* flag in rt_refresh_end() and then removing such routes in the prune loop.
|
* flag in rt_refresh_end() and then removing such routes in the prune loop.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
rt_refresh_begin(rtable *t, struct rt_import_request *req)
|
rt_refresh_begin(struct rt_import_request *req)
|
||||||
{
|
{
|
||||||
FIB_WALK(&t->fib, net, n)
|
struct rt_import_hook *hook = req->hook;
|
||||||
{
|
ASSERT_DIE(hook);
|
||||||
for (struct rte_storage *e = n->routes; e; e = e->next)
|
ASSERT_DIE(hook->stale_set == hook->stale_valid);
|
||||||
if (e->rte.sender == req->hook)
|
|
||||||
e->rte.flags |= REF_STALE;
|
/* If the pruning routine is too slow */
|
||||||
}
|
if ((hook->stale_pruned < hook->stale_valid) && (hook->stale_pruned + 128 < hook->stale_valid)
|
||||||
FIB_WALK_END;
|
|| (hook->stale_pruned > hook->stale_valid) && (hook->stale_pruned > hook->stale_valid + 128))
|
||||||
|
{
|
||||||
|
log(L_WARN "Route refresh flood in table %s", hook->table->name);
|
||||||
|
FIB_WALK(&hook->table->fib, net, n)
|
||||||
|
{
|
||||||
|
for (struct rte_storage *e = n->routes; e; e = e->next)
|
||||||
|
if (e->rte.sender == req->hook)
|
||||||
|
e->rte.stale_cycle = 0;
|
||||||
|
}
|
||||||
|
FIB_WALK_END;
|
||||||
|
hook->stale_set = 1;
|
||||||
|
hook->stale_valid = 0;
|
||||||
|
hook->stale_pruned = 0;
|
||||||
|
}
|
||||||
|
/* Setting a new value of the stale modifier */
|
||||||
|
else if (!++hook->stale_set)
|
||||||
|
{
|
||||||
|
/* Let's reserve the stale_cycle zero value for always-invalid routes */
|
||||||
|
hook->stale_set = 1;
|
||||||
|
hook->stale_valid = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (req->trace_routes & D_STATES)
|
||||||
|
log(L_TRACE "%s: route refresh begin [%u]", req->name, hook->stale_set);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -1936,34 +1963,32 @@ rt_refresh_begin(rtable *t, struct rt_import_request *req)
|
|||||||
* hook. See rt_refresh_begin() for description of refresh cycles.
|
* hook. See rt_refresh_begin() for description of refresh cycles.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
rt_refresh_end(rtable *t, struct rt_import_request *req)
|
rt_refresh_end(struct rt_import_request *req)
|
||||||
{
|
{
|
||||||
int prune = 0;
|
struct rt_import_hook *hook = req->hook;
|
||||||
|
ASSERT_DIE(hook);
|
||||||
|
|
||||||
FIB_WALK(&t->fib, net, n)
|
hook->stale_valid++;
|
||||||
{
|
ASSERT_DIE(hook->stale_set == hook->stale_valid);
|
||||||
for (struct rte_storage *e = n->routes; e; e = e->next)
|
|
||||||
if ((e->rte.sender == req->hook) && (e->rte.flags & REF_STALE))
|
|
||||||
{
|
|
||||||
e->rte.flags |= REF_DISCARD;
|
|
||||||
prune = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
FIB_WALK_END;
|
|
||||||
|
|
||||||
if (prune)
|
rt_schedule_prune(hook->table);
|
||||||
rt_schedule_prune(t);
|
|
||||||
|
if (req->trace_routes & D_STATES)
|
||||||
|
log(L_TRACE "%s: route refresh end [%u]", req->name, hook->stale_valid);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
rt_modify_stale(rtable *t, struct rt_import_request *req)
|
rt_modify_stale(rtable *t, struct rt_import_request *req)
|
||||||
{
|
{
|
||||||
int prune = 0;
|
int prune = 0;
|
||||||
|
struct rt_import_hook *s = req->hook;
|
||||||
|
|
||||||
FIB_WALK(&t->fib, net, n)
|
FIB_WALK(&t->fib, net, n)
|
||||||
{
|
{
|
||||||
for (struct rte_storage *e = n->routes; e; e = e->next)
|
for (struct rte_storage *e = n->routes; e; e = e->next)
|
||||||
if ((e->rte.sender == req->hook) && (e->rte.flags & REF_STALE) && !(e->rte.flags & REF_FILTERED))
|
if ((e->rte.sender == s) &&
|
||||||
|
!(e->rte.flags & REF_FILTERED) &&
|
||||||
|
(e->rte.stale_cycle + 1 == s->stale_set))
|
||||||
{
|
{
|
||||||
e->rte.flags |= REF_MODIFY;
|
e->rte.flags |= REF_MODIFY;
|
||||||
prune = 1;
|
prune = 1;
|
||||||
@ -2434,6 +2459,13 @@ rt_prune_table(rtable *tab)
|
|||||||
WALK_LIST2(ih, n, tab->imports, n)
|
WALK_LIST2(ih, n, tab->imports, n)
|
||||||
if (ih->import_state == TIS_STOP)
|
if (ih->import_state == TIS_STOP)
|
||||||
rt_set_import_state(ih, TIS_FLUSHING);
|
rt_set_import_state(ih, TIS_FLUSHING);
|
||||||
|
else if ((ih->stale_valid != ih->stale_pruning) && (ih->stale_pruning == ih->stale_pruned))
|
||||||
|
{
|
||||||
|
ih->stale_pruning = ih->stale_valid;
|
||||||
|
|
||||||
|
if (ih->req->trace_routes & D_STATES)
|
||||||
|
log(L_TRACE "%s: table prune after refresh begin [%u]", ih->req->name, ih->stale_pruning);
|
||||||
|
}
|
||||||
|
|
||||||
FIB_ITERATE_INIT(fit, &tab->fib);
|
FIB_ITERATE_INIT(fit, &tab->fib);
|
||||||
tab->prune_state = 2;
|
tab->prune_state = 2;
|
||||||
@ -2459,7 +2491,10 @@ again:
|
|||||||
|
|
||||||
for (struct rte_storage *e=n->routes; e; e=e->next)
|
for (struct rte_storage *e=n->routes; e; e=e->next)
|
||||||
{
|
{
|
||||||
if ((e->rte.sender->import_state == TIS_FLUSHING) || (e->rte.flags & REF_DISCARD))
|
struct rt_import_hook *s = e->rte.sender;
|
||||||
|
if ((s->import_state == TIS_FLUSHING) ||
|
||||||
|
(e->rte.stale_cycle < s->stale_valid) ||
|
||||||
|
(e->rte.stale_cycle > s->stale_set))
|
||||||
{
|
{
|
||||||
rte_discard(n, &e->rte);
|
rte_discard(n, &e->rte);
|
||||||
limit--;
|
limit--;
|
||||||
@ -2544,6 +2579,12 @@ again:
|
|||||||
mb_free(ih);
|
mb_free(ih);
|
||||||
rt_unlock_table(tab);
|
rt_unlock_table(tab);
|
||||||
}
|
}
|
||||||
|
else if (ih->stale_pruning != ih->stale_pruned)
|
||||||
|
{
|
||||||
|
ih->stale_pruned = ih->stale_pruning;
|
||||||
|
if (ih->req->trace_routes & D_STATES)
|
||||||
|
log(L_TRACE "%s: table prune after refresh end [%u]", ih->req->name, ih->stale_pruned);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -201,6 +201,10 @@ struct rt_import_hook {
|
|||||||
btime last_state_change; /* Time of last state transition */
|
btime last_state_change; /* Time of last state transition */
|
||||||
|
|
||||||
u8 import_state; /* IS_* */
|
u8 import_state; /* IS_* */
|
||||||
|
u8 stale_set; /* Set this stale_cycle to imported routes */
|
||||||
|
u8 stale_valid; /* Routes with this stale_cycle and bigger are considered valid */
|
||||||
|
u8 stale_pruned; /* Last prune finished when this value was set at stale_valid */
|
||||||
|
u8 stale_pruning; /* Last prune started when this value was set at stale_valid */
|
||||||
|
|
||||||
void (*stopped)(struct rt_import_request *); /* Stored callback when import is stopped */
|
void (*stopped)(struct rt_import_request *); /* Stored callback when import is stopped */
|
||||||
};
|
};
|
||||||
@ -381,8 +385,8 @@ net *net_get(rtable *tab, const net_addr *addr);
|
|||||||
net *net_route(rtable *tab, const net_addr *n);
|
net *net_route(rtable *tab, const net_addr *n);
|
||||||
int rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter);
|
int rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter);
|
||||||
rte *rt_export_merged(struct channel *c, rte ** feed, uint count, linpool *pool, int silent);
|
rte *rt_export_merged(struct channel *c, rte ** feed, uint count, linpool *pool, int silent);
|
||||||
void rt_refresh_begin(rtable *t, struct rt_import_request *);
|
void rt_refresh_begin(struct rt_import_request *);
|
||||||
void rt_refresh_end(rtable *t, struct rt_import_request *);
|
void rt_refresh_end(struct rt_import_request *);
|
||||||
void rt_modify_stale(rtable *t, struct rt_import_request *);
|
void rt_modify_stale(rtable *t, struct rt_import_request *);
|
||||||
void rt_schedule_prune(rtable *t);
|
void rt_schedule_prune(rtable *t);
|
||||||
void rte_dump(struct rte_storage *);
|
void rte_dump(struct rte_storage *);
|
||||||
|
@ -760,16 +760,16 @@ bgp_handle_graceful_restart(struct bgp_proto *p)
|
|||||||
{
|
{
|
||||||
case BGP_GRS_NONE:
|
case BGP_GRS_NONE:
|
||||||
c->gr_active = BGP_GRS_ACTIVE;
|
c->gr_active = BGP_GRS_ACTIVE;
|
||||||
rt_refresh_begin(c->c.table, &c->c.in_req);
|
rt_refresh_begin(&c->c.in_req);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case BGP_GRS_ACTIVE:
|
case BGP_GRS_ACTIVE:
|
||||||
rt_refresh_end(c->c.table, &c->c.in_req);
|
rt_refresh_end(&c->c.in_req);
|
||||||
rt_refresh_begin(c->c.table, &c->c.in_req);
|
rt_refresh_begin(&c->c.in_req);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case BGP_GRS_LLGR:
|
case BGP_GRS_LLGR:
|
||||||
rt_refresh_begin(c->c.table, &c->c.in_req);
|
rt_refresh_begin(&c->c.in_req);
|
||||||
rt_modify_stale(c->c.table, &c->c.in_req);
|
rt_modify_stale(c->c.table, &c->c.in_req);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -777,8 +777,8 @@ bgp_handle_graceful_restart(struct bgp_proto *p)
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* Just flush the routes */
|
/* Just flush the routes */
|
||||||
rt_refresh_begin(c->c.table, &c->c.in_req);
|
rt_refresh_begin(&c->c.in_req);
|
||||||
rt_refresh_end(c->c.table, &c->c.in_req);
|
rt_refresh_end(&c->c.in_req);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Reset bucket and prefix tables */
|
/* Reset bucket and prefix tables */
|
||||||
@ -819,7 +819,7 @@ bgp_graceful_restart_done(struct bgp_channel *c)
|
|||||||
BGP_TRACE(D_EVENTS, "Neighbor graceful restart done");
|
BGP_TRACE(D_EVENTS, "Neighbor graceful restart done");
|
||||||
|
|
||||||
tm_stop(c->stale_timer);
|
tm_stop(c->stale_timer);
|
||||||
rt_refresh_end(c->c.table, &c->c.in_req);
|
rt_refresh_end(&c->c.in_req);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -899,7 +899,7 @@ bgp_refresh_begin(struct bgp_channel *c)
|
|||||||
{ log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; }
|
{ log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; }
|
||||||
|
|
||||||
c->load_state = BFS_REFRESHING;
|
c->load_state = BFS_REFRESHING;
|
||||||
rt_refresh_begin(c->c.table, &c->c.in_req);
|
rt_refresh_begin(&c->c.in_req);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -920,7 +920,7 @@ bgp_refresh_end(struct bgp_channel *c)
|
|||||||
{ log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; }
|
{ log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; }
|
||||||
|
|
||||||
c->load_state = BFS_NONE;
|
c->load_state = BFS_NONE;
|
||||||
rt_refresh_end(c->c.table, &c->c.in_req);
|
rt_refresh_end(&c->c.in_req);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -661,9 +661,9 @@ rpki_handle_cache_response_pdu(struct rpki_cache *cache, const struct pdu_cache_
|
|||||||
* a refresh cycle.
|
* a refresh cycle.
|
||||||
*/
|
*/
|
||||||
if (cache->p->roa4_channel)
|
if (cache->p->roa4_channel)
|
||||||
rt_refresh_begin(cache->p->roa4_channel->table, &cache->p->roa4_channel->in_req);
|
rt_refresh_begin(&cache->p->roa4_channel->in_req);
|
||||||
if (cache->p->roa6_channel)
|
if (cache->p->roa6_channel)
|
||||||
rt_refresh_begin(cache->p->roa6_channel->table, &cache->p->roa6_channel->in_req);
|
rt_refresh_begin(&cache->p->roa6_channel->in_req);
|
||||||
|
|
||||||
cache->p->refresh_channels = 1;
|
cache->p->refresh_channels = 1;
|
||||||
}
|
}
|
||||||
@ -846,9 +846,9 @@ rpki_handle_end_of_data_pdu(struct rpki_cache *cache, const struct pdu_end_of_da
|
|||||||
{
|
{
|
||||||
cache->p->refresh_channels = 0;
|
cache->p->refresh_channels = 0;
|
||||||
if (cache->p->roa4_channel)
|
if (cache->p->roa4_channel)
|
||||||
rt_refresh_end(cache->p->roa4_channel->table, &cache->p->roa4_channel->in_req);
|
rt_refresh_end(&cache->p->roa4_channel->in_req);
|
||||||
if (cache->p->roa6_channel)
|
if (cache->p->roa6_channel)
|
||||||
rt_refresh_end(cache->p->roa6_channel->table, &cache->p->roa6_channel->in_req);
|
rt_refresh_end(&cache->p->roa6_channel->in_req);
|
||||||
}
|
}
|
||||||
|
|
||||||
cache->last_update = current_time();
|
cache->last_update = current_time();
|
||||||
|
Loading…
Reference in New Issue
Block a user