mirror of
https://gitlab.nic.cz/labs/bird.git
synced 2024-12-22 17:51:53 +00:00
bc2ce4aaa8
For BGP LLGR purposes, there was an API allowing a protocol to directly modify their stale routes in table before flushing them. This API was called by the table prune routine which violates the future locking requirements. Instead of this, BGP now requests a special route export and reimports these routes into the table, allowing for asynchronous execution without locking the table on export.
2691 lines
70 KiB
C
2691 lines
70 KiB
C
/*
|
|
* BIRD -- BGP Attributes
|
|
*
|
|
* (c) 2000 Martin Mares <mj@ucw.cz>
|
|
* (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
|
|
* (c) 2008--2016 CZ.NIC z.s.p.o.
|
|
*
|
|
* Can be freely distributed and used under the terms of the GNU GPL.
|
|
*/
|
|
|
|
#undef LOCAL_DEBUG
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include "nest/bird.h"
|
|
#include "nest/iface.h"
|
|
#include "nest/protocol.h"
|
|
#include "nest/rt.h"
|
|
#include "lib/attrs.h"
|
|
#include "conf/conf.h"
|
|
#include "lib/resource.h"
|
|
#include "lib/string.h"
|
|
#include "lib/unaligned.h"
|
|
#include "lib/macro.h"
|
|
|
|
#include "bgp.h"
|
|
|
|
/*
|
|
* UPDATE message error handling
|
|
*
|
|
* All checks from RFC 4271 6.3 are done as specified with these exceptions:
|
|
* - The semantic check of an IP address from NEXT_HOP attribute is missing.
|
|
* - Checks of some optional attribute values are missing.
|
|
* - Syntactic and semantic checks of NLRIs (done in DECODE_PREFIX())
|
|
* are probably inadequate.
|
|
*
|
|
* Loop detection based on AS_PATH causes updates to be withdrawn. RFC
|
|
* 4271 does not explicitly specify the behavior in that case.
|
|
*
|
|
* Loop detection related to route reflection (based on ORIGINATOR_ID
|
|
* and CLUSTER_LIST) causes updates to be withdrawn. RFC 4456 8
|
|
* specifies that such updates should be ignored, but that is generally
|
|
* a bad idea.
|
|
*
|
|
* BGP attribute table has several hooks:
|
|
*
|
|
* export - Hook that validates and normalizes attribute during export phase.
|
|
* Receives eattr, may modify it (e.g., sort community lists for canonical
|
|
* representation), UNSET() it (e.g., skip empty lists), or REJECT() the route
|
|
* if necessary. May assume that eattr has value valid w.r.t. its type, but may
|
|
* be invalid w.r.t. BGP constraints. Optional.
|
|
*
|
|
* encode - Hook that converts internal representation to external one during
|
|
* packet writing. Receives eattr and puts it in the buffer (including attribute
|
|
* header). Returns number of bytes, or -1 if not enough space. May assume that
|
|
* eattr has value valid w.r.t. its type and validated by export hook. Mandatory
|
|
* for all known attributes that exist internally after export phase (i.e., all
|
|
* except pseudoattributes MP_(UN)REACH_NLRI).
|
|
*
|
|
* decode - Hook that converts external representation to internal one during
|
|
* packet parsing. Receives attribute data in buffer, validates it and adds
|
|
* attribute to ea_list. If data are invalid, steps DISCARD(), WITHDRAW() or
|
|
* bgp_parse_error() may be used to escape. Mandatory for all known attributes.
|
|
*
|
|
* format - Optional hook that converts eattr to textual representation.
|
|
*/
|
|
|
|
union bgp_attr_desc {
|
|
struct ea_class class;
|
|
struct {
|
|
EA_CLASS_INSIDE;
|
|
uint flags;
|
|
void (*export)(struct bgp_export_state *s, eattr *a);
|
|
int (*encode)(struct bgp_write_state *s, eattr *a, byte *buf, uint size);
|
|
void (*decode)(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to);
|
|
};
|
|
};
|
|
|
|
static union bgp_attr_desc bgp_attr_table[];
|
|
static inline const union bgp_attr_desc *bgp_find_attr_desc(eattr *a)
|
|
{
|
|
const struct ea_class *class = ea_class_find(a->id);
|
|
|
|
if ((class < &bgp_attr_table[0].class) || (class >= &bgp_attr_table[BGP_ATTR_MAX].class))
|
|
return NULL;
|
|
|
|
return (const union bgp_attr_desc *) class;
|
|
}
|
|
|
|
#define BGP_EA_ID(code) (bgp_attr_table[code].id)
|
|
#define EA_BGP_ID(code) (((union bgp_attr_desc *) ea_class_find(code)) - bgp_attr_table)
|
|
|
|
void bgp_set_attr_u32(ea_list **to, uint code, uint flags, u32 val)
|
|
{
|
|
const union bgp_attr_desc *desc = &bgp_attr_table[code];
|
|
|
|
ea_set_attr(to, EA_LITERAL_EMBEDDED(
|
|
&desc->class,
|
|
flags & ~BAF_EXT_LEN,
|
|
val
|
|
));
|
|
}
|
|
|
|
void bgp_set_attr_ptr(ea_list **to, uint code, uint flags, const struct adata *ad)
|
|
{
|
|
const union bgp_attr_desc *desc = &bgp_attr_table[code];
|
|
|
|
ea_set_attr(to, EA_LITERAL_DIRECT_ADATA(
|
|
&desc->class,
|
|
flags & ~BAF_EXT_LEN,
|
|
ad
|
|
));
|
|
}
|
|
|
|
void
|
|
bgp_set_attr_data(ea_list **to, uint code, uint flags, void *data, uint len)
|
|
{
|
|
const union bgp_attr_desc *desc = &bgp_attr_table[code];
|
|
|
|
ea_set_attr(to, EA_LITERAL_STORE_ADATA(
|
|
&desc->class,
|
|
flags & ~BAF_EXT_LEN,
|
|
data,
|
|
len
|
|
));
|
|
}
|
|
|
|
void
|
|
bgp_unset_attr(ea_list **to, uint code)
|
|
{
|
|
const union bgp_attr_desc *desc = &bgp_attr_table[code];
|
|
ea_unset_attr(to, 0, &desc->class);
|
|
}
|
|
|
|
#define REPORT(msg, args...) \
|
|
({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); })
|
|
|
|
#define DISCARD(msg, args...) \
|
|
({ REPORT(msg, ## args); return; })
|
|
|
|
#define WITHDRAW(msg, args...) \
|
|
({ REPORT(msg, ## args); s->err_withdraw = 1; return; })
|
|
|
|
#define UNSET(a) \
|
|
({ a->undef = 1; return; })
|
|
|
|
#define REJECT(msg, args...) \
|
|
({ log(L_ERR "%s: " msg, s->proto->p.name, ## args); s->err_reject = 1; return; })
|
|
|
|
#define NEW_BGP "Discarding %s attribute received from AS4-aware neighbor"
|
|
#define BAD_EBGP "Discarding %s attribute received from EBGP neighbor"
|
|
#define BAD_LENGTH "Malformed %s attribute - invalid length (%u)"
|
|
#define BAD_VALUE "Malformed %s attribute - invalid value (%u)"
|
|
#define NO_MANDATORY "Missing mandatory %s attribute"
|
|
|
|
|
|
static inline int
|
|
bgp_put_attr_hdr3(byte *buf, uint code, uint flags, uint len)
|
|
{
|
|
*buf++ = flags & ~BAF_EXT_LEN;
|
|
*buf++ = code;
|
|
*buf++ = len;
|
|
return 3;
|
|
}
|
|
|
|
static inline int
|
|
bgp_put_attr_hdr4(byte *buf, uint code, uint flags, uint len)
|
|
{
|
|
*buf++ = flags | BAF_EXT_LEN;
|
|
*buf++ = code;
|
|
put_u16(buf, len);
|
|
return 4;
|
|
}
|
|
|
|
static inline int
|
|
bgp_put_attr_hdr(byte *buf, uint code, uint flags, uint len)
|
|
{
|
|
if (len < 256)
|
|
return bgp_put_attr_hdr3(buf, code, flags, len);
|
|
else
|
|
return bgp_put_attr_hdr4(buf, code, flags, len);
|
|
}
|
|
|
|
static int
|
|
bgp_encode_u8(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
|
|
{
|
|
if (size < (3+1))
|
|
return -1;
|
|
|
|
bgp_put_attr_hdr3(buf, EA_BGP_ID(a->id), a->flags, 1);
|
|
buf[3] = a->u.data;
|
|
|
|
return 3+1;
|
|
}
|
|
|
|
static int
|
|
bgp_encode_u32(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
|
|
{
|
|
if (size < (3+4))
|
|
return -1;
|
|
|
|
bgp_put_attr_hdr3(buf, EA_BGP_ID(a->id), a->flags, 4);
|
|
put_u32(buf+3, a->u.data);
|
|
|
|
return 3+4;
|
|
}
|
|
|
|
static int
|
|
bgp_encode_u32s(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
|
|
{
|
|
uint len = a->u.ptr->length;
|
|
|
|
if (size < (4+len))
|
|
return -1;
|
|
|
|
uint hdr = bgp_put_attr_hdr(buf, EA_BGP_ID(a->id), a->flags, len);
|
|
put_u32s(buf + hdr, (u32 *) a->u.ptr->data, len / 4);
|
|
|
|
return hdr + len;
|
|
}
|
|
|
|
static int
|
|
bgp_put_attr(byte *buf, uint size, uint code, uint flags, const byte *data, uint len)
|
|
{
|
|
if (size < (4+len))
|
|
return -1;
|
|
|
|
uint hdr = bgp_put_attr_hdr(buf, code, flags, len);
|
|
memcpy(buf + hdr, data, len);
|
|
|
|
return hdr + len;
|
|
}
|
|
|
|
static int
|
|
bgp_encode_raw(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
|
|
{
|
|
return bgp_put_attr(buf, size, EA_BGP_ID(a->id), a->flags, a->u.ptr->data, a->u.ptr->length);
|
|
}
|
|
|
|
|
|
/*
|
|
* AIGP handling
|
|
*/
|
|
|
|
static int
|
|
bgp_aigp_valid(byte *data, uint len, char *err, uint elen)
|
|
{
|
|
byte *pos = data;
|
|
char *err_dsc = NULL;
|
|
uint err_val = 0;
|
|
|
|
#define BAD(DSC,VAL) ({ err_dsc = DSC; err_val = VAL; goto bad; })
|
|
while (len)
|
|
{
|
|
if (len < 3)
|
|
BAD("TLV framing error", len);
|
|
|
|
/* Process one TLV */
|
|
uint ptype = pos[0];
|
|
uint plen = get_u16(pos + 1);
|
|
|
|
if (len < plen)
|
|
BAD("TLV framing error", plen);
|
|
|
|
if (plen < 3)
|
|
BAD("Bad TLV length", plen);
|
|
|
|
if ((ptype == BGP_AIGP_METRIC) && (plen != 11))
|
|
BAD("Bad AIGP TLV length", plen);
|
|
|
|
ADVANCE(pos, len, plen);
|
|
}
|
|
#undef BAD
|
|
|
|
return 1;
|
|
|
|
bad:
|
|
if (err)
|
|
if (bsnprintf(err, elen, "%s (%u) at %d", err_dsc, err_val, (int) (pos - data)) < 0)
|
|
err[0] = 0;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static const byte *
|
|
bgp_aigp_get_tlv(const struct adata *ad, uint type)
|
|
{
|
|
if (!ad)
|
|
return NULL;
|
|
|
|
uint len = ad->length;
|
|
const byte *pos = ad->data;
|
|
|
|
while (len)
|
|
{
|
|
uint ptype = pos[0];
|
|
uint plen = get_u16(pos + 1);
|
|
|
|
if (ptype == type)
|
|
return pos;
|
|
|
|
ADVANCE(pos, len, plen);
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
static const struct adata *
|
|
bgp_aigp_set_tlv(struct linpool *pool, const struct adata *ad, uint type, byte *data, uint dlen)
|
|
{
|
|
uint len = ad ? ad->length : 0;
|
|
const byte *pos = ad ? ad->data : NULL;
|
|
struct adata *res = lp_alloc_adata(pool, len + 3 + dlen);
|
|
byte *dst = res->data;
|
|
byte *tlv = NULL;
|
|
int del = 0;
|
|
|
|
while (len)
|
|
{
|
|
uint ptype = pos[0];
|
|
uint plen = get_u16(pos + 1);
|
|
|
|
/* Find position for new TLV */
|
|
if ((ptype >= type) && !tlv)
|
|
{
|
|
tlv = dst;
|
|
dst += 3 + dlen;
|
|
}
|
|
|
|
/* Skip first matching TLV, copy others */
|
|
if ((ptype == type) && !del)
|
|
del = 1;
|
|
else
|
|
{
|
|
memcpy(dst, pos, plen);
|
|
dst += plen;
|
|
}
|
|
|
|
ADVANCE(pos, len, plen);
|
|
}
|
|
|
|
if (!tlv)
|
|
{
|
|
tlv = dst;
|
|
dst += 3 + dlen;
|
|
}
|
|
|
|
/* Store the TLD */
|
|
put_u8(tlv + 0, type);
|
|
put_u16(tlv + 1, 3 + dlen);
|
|
memcpy(tlv + 3, data, dlen);
|
|
|
|
/* Update length */
|
|
res->length = dst - res->data;
|
|
|
|
return res;
|
|
}
|
|
|
|
static u64 UNUSED
|
|
bgp_aigp_get_metric(const struct adata *ad, u64 def)
|
|
{
|
|
const byte *b = bgp_aigp_get_tlv(ad, BGP_AIGP_METRIC);
|
|
return b ? get_u64(b + 3) : def;
|
|
}
|
|
|
|
static const struct adata *
|
|
bgp_aigp_set_metric(struct linpool *pool, const struct adata *ad, u64 metric)
|
|
{
|
|
byte data[8];
|
|
put_u64(data, metric);
|
|
return bgp_aigp_set_tlv(pool, ad, BGP_AIGP_METRIC, data, 8);
|
|
}
|
|
|
|
int
|
|
bgp_total_aigp_metric_(const rte *e, u64 *metric, const struct adata **ad)
|
|
{
|
|
eattr *a = ea_find(e->attrs, BGP_EA_ID(BA_AIGP));
|
|
if (!a)
|
|
return 0;
|
|
|
|
const byte *b = bgp_aigp_get_tlv(a->u.ptr, BGP_AIGP_METRIC);
|
|
if (!b)
|
|
return 0;
|
|
|
|
u64 aigp = get_u64(b + 3);
|
|
u64 step = rt_get_igp_metric(e);
|
|
|
|
if (!rte_resolvable(e) || (step >= IGP_METRIC_UNKNOWN))
|
|
step = BGP_AIGP_MAX;
|
|
|
|
if (!step)
|
|
step = 1;
|
|
|
|
*ad = a->u.ptr;
|
|
*metric = aigp + step;
|
|
if (*metric < aigp)
|
|
*metric = BGP_AIGP_MAX;
|
|
|
|
return 1;
|
|
}
|
|
|
|
static inline int
|
|
bgp_init_aigp_metric(rte *e, u64 *metric, const struct adata **ad)
|
|
{
|
|
if (rt_get_source_attr(e) == RTS_BGP)
|
|
return 0;
|
|
|
|
*metric = rt_get_igp_metric(e);
|
|
*ad = NULL;
|
|
return *metric < IGP_METRIC_UNKNOWN;
|
|
}
|
|
|
|
u32
|
|
bgp_rte_igp_metric(const rte *rt)
|
|
{
|
|
u64 metric = bgp_total_aigp_metric(rt);
|
|
return (u32) MIN(metric, (u64) IGP_METRIC_UNKNOWN);
|
|
}
|
|
|
|
|
|
/*
|
|
* Attribute hooks
|
|
*/
|
|
|
|
static void
|
|
bgp_export_origin(struct bgp_export_state *s, eattr *a)
|
|
{
|
|
if (a->u.data > 2)
|
|
REJECT(BAD_VALUE, "ORIGIN", a->u.data);
|
|
}
|
|
|
|
static void
|
|
bgp_decode_origin(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
|
|
{
|
|
if (len != 1)
|
|
WITHDRAW(BAD_LENGTH, "ORIGIN", len);
|
|
|
|
if (data[0] > 2)
|
|
WITHDRAW(BAD_VALUE, "ORIGIN", data[0]);
|
|
|
|
bgp_set_attr_u32(to, BA_ORIGIN, flags, data[0]);
|
|
}
|
|
|
|
static void
|
|
bgp_format_origin(const eattr *a, byte *buf, uint size UNUSED)
|
|
{
|
|
static const char *bgp_origin_names[] = { "IGP", "EGP", "Incomplete" };
|
|
|
|
bsprintf(buf, (a->u.data <= 2) ? bgp_origin_names[a->u.data] : "?");
|
|
}
|
|
|
|
|
|
static inline int
|
|
bgp_as_path_first_as_equal(const byte *data, uint len, u32 asn)
|
|
{
|
|
return (len >= 6) &&
|
|
((data[0] == AS_PATH_SEQUENCE) || (data[0] == AS_PATH_CONFED_SEQUENCE)) &&
|
|
(data[1] > 0) &&
|
|
(get_u32(data+2) == asn);
|
|
}
|
|
|
|
static int
|
|
bgp_encode_as_path(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
|
|
{
|
|
const byte *data = a->u.ptr->data;
|
|
uint len = a->u.ptr->length;
|
|
|
|
if (!s->as4_session)
|
|
{
|
|
/* Prepare 16-bit AS_PATH (from 32-bit one) in a temporary buffer */
|
|
byte *dst = alloca(len);
|
|
len = as_path_32to16(dst, data, len);
|
|
data = dst;
|
|
}
|
|
|
|
return bgp_put_attr(buf, size, BA_AS_PATH, a->flags, data, len);
|
|
}
|
|
|
|
static void
|
|
bgp_decode_as_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
|
|
{
|
|
struct bgp_proto *p = s->proto;
|
|
int as_length = s->as4_session ? 4 : 2;
|
|
int as_sets = p->cf->allow_as_sets;
|
|
int as_confed = p->cf->confederation && p->is_interior;
|
|
char err[128];
|
|
|
|
if (!as_path_valid(data, len, as_length, as_sets, as_confed, err, sizeof(err)))
|
|
WITHDRAW("Malformed AS_PATH attribute - %s", err);
|
|
|
|
if (!s->as4_session)
|
|
{
|
|
/* Prepare 32-bit AS_PATH (from 16-bit one) in a temporary buffer */
|
|
byte *src = data;
|
|
data = alloca(2*len);
|
|
len = as_path_16to32(data, src, len);
|
|
}
|
|
|
|
/* In some circumstances check for initial AS_CONFED_SEQUENCE; RFC 5065 5.0 */
|
|
if (p->is_interior && !p->is_internal &&
|
|
((len < 2) || (data[0] != AS_PATH_CONFED_SEQUENCE)))
|
|
WITHDRAW("Malformed AS_PATH attribute - %s", "missing initial AS_CONFED_SEQUENCE");
|
|
|
|
/* Reject routes with first AS in AS_PATH not matching neighbor AS; RFC 4271 6.3 */
|
|
if (!p->is_internal && p->cf->enforce_first_as &&
|
|
!bgp_as_path_first_as_equal(data, len, p->remote_as))
|
|
WITHDRAW("Malformed AS_PATH attribute - %s", "First AS differs from neigbor AS");
|
|
|
|
bgp_set_attr_data(to, BA_AS_PATH, flags, data, len);
|
|
}
|
|
|
|
|
|
static int
|
|
bgp_encode_next_hop(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
|
|
{
|
|
/*
|
|
* The NEXT_HOP attribute is used only in traditional (IPv4) BGP. In MP-BGP,
|
|
* the next hop is encoded as a part of the MP_REACH_NLRI attribute, so we
|
|
* store it and encode it later by AFI-specific hooks.
|
|
*/
|
|
|
|
if (!s->mp_reach)
|
|
{
|
|
// ASSERT(a->u.ptr->length == sizeof(ip_addr));
|
|
|
|
/* FIXME: skip IPv6 next hops for IPv4 routes during MRT dump */
|
|
ip_addr *addr = (void *) a->u.ptr->data;
|
|
if ((a->u.ptr->length != sizeof(ip_addr)) || !ipa_is_ip4(*addr))
|
|
return 0;
|
|
|
|
if (size < (3+4))
|
|
return -1;
|
|
|
|
bgp_put_attr_hdr3(buf, BA_NEXT_HOP, a->flags, 4);
|
|
put_ip4(buf+3, ipa_to_ip4(*addr));
|
|
|
|
return 3+4;
|
|
}
|
|
else
|
|
{
|
|
s->mp_next_hop = a;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
static void
|
|
bgp_decode_next_hop(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
|
|
{
|
|
if (len != 4)
|
|
WITHDRAW(BAD_LENGTH, "NEXT_HOP", len);
|
|
|
|
/* Semantic checks are done later */
|
|
s->ip_next_hop_len = len;
|
|
s->ip_next_hop_data = data;
|
|
}
|
|
|
|
/* TODO: This function should use AF-specific hook */
|
|
static void
|
|
bgp_format_next_hop(const eattr *a, byte *buf, uint size UNUSED)
|
|
{
|
|
ip_addr *nh = (void *) a->u.ptr->data;
|
|
uint len = a->u.ptr->length;
|
|
|
|
ASSERT((len == 16) || (len == 32));
|
|
|
|
/* in IPv6, we may have two addresses in NEXT HOP */
|
|
if ((len == 16) || ipa_zero(nh[1]))
|
|
bsprintf(buf, "%I", nh[0]);
|
|
else
|
|
bsprintf(buf, "%I %I", nh[0], nh[1]);
|
|
}
|
|
|
|
|
|
static void
|
|
bgp_decode_med(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
|
|
{
|
|
if (len != 4)
|
|
WITHDRAW(BAD_LENGTH, "MULTI_EXIT_DISC", len);
|
|
|
|
u32 val = get_u32(data);
|
|
bgp_set_attr_u32(to, BA_MULTI_EXIT_DISC, flags, val);
|
|
}
|
|
|
|
|
|
static void
|
|
bgp_export_local_pref(struct bgp_export_state *s, eattr *a)
|
|
{
|
|
if (!s->proto->is_interior && !s->proto->cf->allow_local_pref)
|
|
UNSET(a);
|
|
}
|
|
|
|
static void
|
|
bgp_decode_local_pref(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
|
|
{
|
|
if (!s->proto->is_interior && !s->proto->cf->allow_local_pref)
|
|
DISCARD(BAD_EBGP, "LOCAL_PREF");
|
|
|
|
if (len != 4)
|
|
WITHDRAW(BAD_LENGTH, "LOCAL_PREF", len);
|
|
|
|
u32 val = get_u32(data);
|
|
bgp_set_attr_u32(to, BA_LOCAL_PREF, flags, val);
|
|
}
|
|
|
|
|
|
static void
|
|
bgp_decode_atomic_aggr(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data UNUSED, uint len, ea_list **to)
|
|
{
|
|
if (len != 0)
|
|
DISCARD(BAD_LENGTH, "ATOMIC_AGGR", len);
|
|
|
|
bgp_set_attr_data(to, BA_ATOMIC_AGGR, flags, NULL, 0);
|
|
}
|
|
|
|
static int
|
|
bgp_encode_aggregator(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
|
|
{
|
|
const byte *data = a->u.ptr->data;
|
|
uint len = a->u.ptr->length;
|
|
|
|
if (!s->as4_session)
|
|
{
|
|
/* Prepare 16-bit AGGREGATOR (from 32-bit one) in a temporary buffer */
|
|
byte *dst = alloca(6);
|
|
len = aggregator_32to16(dst, data);
|
|
data = dst;
|
|
}
|
|
|
|
return bgp_put_attr(buf, size, BA_AGGREGATOR, a->flags, data, len);
|
|
}
|
|
|
|
static void
|
|
bgp_decode_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
|
|
{
|
|
if (len != (s->as4_session ? 8 : 6))
|
|
DISCARD(BAD_LENGTH, "AGGREGATOR", len);
|
|
|
|
if (!s->as4_session)
|
|
{
|
|
/* Prepare 32-bit AGGREGATOR (from 16-bit one) in a temporary buffer */
|
|
byte *src = data;
|
|
data = alloca(8);
|
|
len = aggregator_16to32(data, src);
|
|
}
|
|
|
|
bgp_set_attr_data(to, BA_AGGREGATOR, flags, data, len);
|
|
}
|
|
|
|
static void
|
|
bgp_format_aggregator(const eattr *a, byte *buf, uint size UNUSED)
|
|
{
|
|
const byte *data = a->u.ptr->data;
|
|
|
|
bsprintf(buf, "%I4 AS%u", get_ip4(data+4), get_u32(data+0));
|
|
}
|
|
|
|
|
|
static void
|
|
bgp_export_community(struct bgp_export_state *s, eattr *a)
|
|
{
|
|
if (a->u.ptr->length == 0)
|
|
UNSET(a);
|
|
|
|
a->u.ptr = int_set_sort(s->pool, a->u.ptr);
|
|
}
|
|
|
|
static void
|
|
bgp_decode_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
|
|
{
|
|
if (!len || (len % 4))
|
|
WITHDRAW(BAD_LENGTH, "COMMUNITY", len);
|
|
|
|
struct adata *ad = lp_alloc_adata(s->pool, len);
|
|
get_u32s(data, (u32 *) ad->data, len / 4);
|
|
bgp_set_attr_ptr(to, BA_COMMUNITY, flags, ad);
|
|
}
|
|
|
|
|
|
static void
|
|
bgp_export_originator_id(struct bgp_export_state *s, eattr *a)
|
|
{
|
|
if (!s->proto->is_internal)
|
|
UNSET(a);
|
|
}
|
|
|
|
static void
|
|
bgp_decode_originator_id(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
|
|
{
|
|
if (!s->proto->is_internal)
|
|
DISCARD(BAD_EBGP, "ORIGINATOR_ID");
|
|
|
|
if (len != 4)
|
|
WITHDRAW(BAD_LENGTH, "ORIGINATOR_ID", len);
|
|
|
|
u32 val = get_u32(data);
|
|
bgp_set_attr_u32(to, BA_ORIGINATOR_ID, flags, val);
|
|
}
|
|
|
|
|
|
static void
|
|
bgp_export_cluster_list(struct bgp_export_state *s UNUSED, eattr *a)
|
|
{
|
|
if (!s->proto->is_internal)
|
|
UNSET(a);
|
|
|
|
if (a->u.ptr->length == 0)
|
|
UNSET(a);
|
|
}
|
|
|
|
static void
|
|
bgp_decode_cluster_list(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
|
|
{
|
|
if (!s->proto->is_internal)
|
|
DISCARD(BAD_EBGP, "CLUSTER_LIST");
|
|
|
|
if (!len || (len % 4))
|
|
WITHDRAW(BAD_LENGTH, "CLUSTER_LIST", len);
|
|
|
|
struct adata *ad = lp_alloc_adata(s->pool, len);
|
|
get_u32s(data, (u32 *) ad->data, len / 4);
|
|
bgp_set_attr_ptr(to, BA_CLUSTER_LIST, flags, ad);
|
|
}
|
|
|
|
static void
|
|
bgp_format_cluster_list(const eattr *a, byte *buf, uint size)
|
|
{
|
|
/* Truncates cluster lists larger than buflen, probably not a problem */
|
|
int_set_format(a->u.ptr, 0, -1, buf, size);
|
|
}
|
|
|
|
|
|
int
|
|
bgp_encode_mp_reach_mrt(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
|
|
{
|
|
/*
|
|
* Limited version of MP_REACH_NLRI used for MRT table dumps (IPv6 only):
|
|
*
|
|
* 3 B MP_REACH_NLRI header
|
|
* 1 B MP_REACH_NLRI data - Length of Next Hop Network Address
|
|
* var MP_REACH_NLRI data - Network Address of Next Hop
|
|
*/
|
|
|
|
ip_addr *nh = (void *) a->u.ptr->data;
|
|
uint len = a->u.ptr->length;
|
|
|
|
ASSERT((len == 16) || (len == 32));
|
|
|
|
if (size < (3+1+len))
|
|
return -1;
|
|
|
|
bgp_put_attr_hdr3(buf, BA_MP_REACH_NLRI, BAF_OPTIONAL, 1+len);
|
|
buf[3] = len;
|
|
buf += 4;
|
|
|
|
put_ip6(buf, ipa_to_ip6(nh[0]));
|
|
|
|
if (len == 32)
|
|
put_ip6(buf+16, ipa_to_ip6(nh[1]));
|
|
|
|
return 3+1+len;
|
|
}
|
|
|
|
static inline u32
|
|
get_af3(byte *buf)
|
|
{
|
|
return (get_u16(buf) << 16) | buf[2];
|
|
}
|
|
|
|
static void
|
|
bgp_decode_mp_reach_nlri(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
|
|
{
|
|
/*
|
|
* 2 B MP_REACH_NLRI data - Address Family Identifier
|
|
* 1 B MP_REACH_NLRI data - Subsequent Address Family Identifier
|
|
* 1 B MP_REACH_NLRI data - Length of Next Hop Network Address
|
|
* var MP_REACH_NLRI data - Network Address of Next Hop
|
|
* 1 B MP_REACH_NLRI data - Reserved (zero)
|
|
* var MP_REACH_NLRI data - Network Layer Reachability Information
|
|
*/
|
|
|
|
if ((len < 5) || (len < (5 + (uint) data[3])))
|
|
bgp_parse_error(s, 9);
|
|
|
|
s->mp_reach_af = get_af3(data);
|
|
s->mp_next_hop_len = data[3];
|
|
s->mp_next_hop_data = data + 4;
|
|
s->mp_reach_len = len - 5 - s->mp_next_hop_len;
|
|
s->mp_reach_nlri = data + 5 + s->mp_next_hop_len;
|
|
}
|
|
|
|
|
|
static void
|
|
bgp_decode_mp_unreach_nlri(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
|
|
{
|
|
/*
|
|
* 2 B MP_UNREACH_NLRI data - Address Family Identifier
|
|
* 1 B MP_UNREACH_NLRI data - Subsequent Address Family Identifier
|
|
* var MP_UNREACH_NLRI data - Network Layer Reachability Information
|
|
*/
|
|
|
|
if (len < 3)
|
|
bgp_parse_error(s, 9);
|
|
|
|
s->mp_unreach_af = get_af3(data);
|
|
s->mp_unreach_len = len - 3;
|
|
s->mp_unreach_nlri = data + 3;
|
|
}
|
|
|
|
|
|
static void
|
|
bgp_export_ext_community(struct bgp_export_state *s, eattr *a)
|
|
{
|
|
if (!s->proto->is_interior)
|
|
{
|
|
struct adata *ad = ec_set_del_nontrans(s->pool, a->u.ptr);
|
|
|
|
if (ad->length == 0)
|
|
UNSET(a);
|
|
|
|
ec_set_sort_x(ad);
|
|
a->u.ptr = ad;
|
|
}
|
|
else
|
|
{
|
|
if (a->u.ptr->length == 0)
|
|
UNSET(a);
|
|
|
|
a->u.ptr = ec_set_sort(s->pool, a->u.ptr);
|
|
}
|
|
}
|
|
|
|
static void
|
|
bgp_decode_ext_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
|
|
{
|
|
if (!len || (len % 8))
|
|
WITHDRAW(BAD_LENGTH, "EXT_COMMUNITY", len);
|
|
|
|
struct adata *ad = lp_alloc_adata(s->pool, len);
|
|
get_u32s(data, (u32 *) ad->data, len / 4);
|
|
bgp_set_attr_ptr(to, BA_EXT_COMMUNITY, flags, ad);
|
|
}
|
|
|
|
|
|
static void
|
|
bgp_decode_as4_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
|
|
{
|
|
if (s->as4_session)
|
|
DISCARD(NEW_BGP, "AS4_AGGREGATOR");
|
|
|
|
if (len != 8)
|
|
DISCARD(BAD_LENGTH, "AS4_AGGREGATOR", len);
|
|
|
|
bgp_set_attr_data(to, BA_AS4_AGGREGATOR, flags, data, len);
|
|
}
|
|
|
|
static void
|
|
bgp_decode_as4_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
|
|
{
|
|
struct bgp_proto *p = s->proto;
|
|
int sets = p->cf->allow_as_sets;
|
|
|
|
char err[128];
|
|
|
|
if (s->as4_session)
|
|
DISCARD(NEW_BGP, "AS4_PATH");
|
|
|
|
if (len < 6)
|
|
DISCARD(BAD_LENGTH, "AS4_PATH", len);
|
|
|
|
if (!as_path_valid(data, len, 4, sets, 1, err, sizeof(err)))
|
|
DISCARD("Malformed AS4_PATH attribute - %s", err);
|
|
|
|
struct adata *a = lp_alloc_adata(s->pool, len);
|
|
memcpy(a->data, data, len);
|
|
|
|
/* AS_CONFED* segments are invalid in AS4_PATH; RFC 6793 6 */
|
|
if (as_path_contains_confed(a))
|
|
{
|
|
REPORT("Discarding AS_CONFED* segment from AS4_PATH attribute");
|
|
a = as_path_strip_confed(s->pool, a);
|
|
}
|
|
|
|
bgp_set_attr_ptr(to, BA_AS4_PATH, flags, a);
|
|
}
|
|
|
|
|
|
static void
|
|
bgp_export_aigp(struct bgp_export_state *s, eattr *a)
|
|
{
|
|
if (!s->channel->cf->aigp)
|
|
UNSET(a);
|
|
}
|
|
|
|
static void
|
|
bgp_decode_aigp(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
|
|
{
|
|
char err[128];
|
|
|
|
/* Acceptability test postponed to bgp_finish_attrs() */
|
|
|
|
if ((flags ^ bgp_attr_table[BA_AIGP].flags) & (BAF_OPTIONAL | BAF_TRANSITIVE))
|
|
DISCARD("Malformed AIGP attribute - conflicting flags (%02x)", flags);
|
|
|
|
if (!bgp_aigp_valid(data, len, err, sizeof(err)))
|
|
DISCARD("Malformed AIGP attribute - %s", err);
|
|
|
|
bgp_set_attr_data(to, BA_AIGP, flags, data, len);
|
|
}
|
|
|
|
static void
|
|
bgp_format_aigp(const eattr *a, byte *buf, uint size UNUSED)
|
|
{
|
|
const byte *b = bgp_aigp_get_tlv(a->u.ptr, BGP_AIGP_METRIC);
|
|
|
|
if (!b)
|
|
bsprintf(buf, "?");
|
|
else
|
|
bsprintf(buf, "%lu", get_u64(b + 3));
|
|
}
|
|
|
|
|
|
static void
|
|
bgp_export_large_community(struct bgp_export_state *s, eattr *a)
|
|
{
|
|
if (a->u.ptr->length == 0)
|
|
UNSET(a);
|
|
|
|
a->u.ptr = lc_set_sort(s->pool, a->u.ptr);
|
|
}
|
|
|
|
static void
|
|
bgp_decode_large_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
|
|
{
|
|
if (!len || (len % 12))
|
|
WITHDRAW(BAD_LENGTH, "LARGE_COMMUNITY", len);
|
|
|
|
struct adata *ad = lp_alloc_adata(s->pool, len);
|
|
get_u32s(data, (u32 *) ad->data, len / 4);
|
|
bgp_set_attr_ptr(to, BA_LARGE_COMMUNITY, flags, ad);
|
|
}
|
|
|
|
static void
|
|
bgp_export_mpls_label_stack(struct bgp_export_state *s, eattr *a)
|
|
{
|
|
const net_addr *n = s->route->net;
|
|
u32 *labels = (u32 *) a->u.ptr->data;
|
|
uint lnum = a->u.ptr->length / 4;
|
|
|
|
/* Perhaps we should just ignore it? */
|
|
if (!s->mpls)
|
|
REJECT("Unexpected MPLS stack");
|
|
|
|
/* Empty MPLS stack is not allowed */
|
|
if (!lnum)
|
|
REJECT("Malformed MPLS stack - empty");
|
|
|
|
/* This is ugly, but we must ensure that labels fit into NLRI field */
|
|
if ((24*lnum + (net_is_vpn(n) ? 64 : 0) + net_pxlen(n)) > 255)
|
|
REJECT("Malformed MPLS stack - too many labels (%u)", lnum);
|
|
|
|
for (uint i = 0; i < lnum; i++)
|
|
{
|
|
if (labels[i] > 0xfffff)
|
|
REJECT("Malformed MPLS stack - invalid label (%u)", labels[i]);
|
|
|
|
/* TODO: Check for special-purpose label values? */
|
|
}
|
|
}
|
|
|
|
static int
|
|
bgp_encode_mpls_label_stack(struct bgp_write_state *s, eattr *a, byte *buf UNUSED, uint size UNUSED)
|
|
{
|
|
/*
|
|
* MPLS labels are encoded as a part of the NLRI in MP_REACH_NLRI attribute,
|
|
* so we store MPLS_LABEL_STACK and encode it later by AFI-specific hooks.
|
|
*/
|
|
|
|
s->mpls_labels = a->u.ptr;
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
bgp_decode_mpls_label_stack(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data UNUSED, uint len UNUSED, ea_list **to UNUSED)
|
|
{
|
|
DISCARD("Discarding received attribute #0");
|
|
}
|
|
|
|
static void
|
|
bgp_format_mpls_label_stack(const eattr *a, byte *buf, uint size)
|
|
{
|
|
u32 *labels = (u32 *) a->u.ptr->data;
|
|
uint lnum = a->u.ptr->length / 4;
|
|
char *pos = buf;
|
|
|
|
for (uint i = 0; i < lnum; i++)
|
|
{
|
|
if (size < 20)
|
|
{
|
|
bsprintf(pos, "...");
|
|
return;
|
|
}
|
|
|
|
uint l = bsprintf(pos, "%d/", labels[i]);
|
|
ADVANCE(pos, size, l);
|
|
}
|
|
|
|
/* Clear last slash or terminate empty string */
|
|
pos[lnum ? -1 : 0] = 0;
|
|
}
|
|
|
|
static inline void
|
|
bgp_export_unknown(struct bgp_export_state *s UNUSED, eattr *a)
|
|
{
|
|
if (!(a->flags & BAF_TRANSITIVE))
|
|
UNSET(a);
|
|
|
|
a->flags |= BAF_PARTIAL;
|
|
}
|
|
|
|
static inline void
|
|
bgp_decode_unknown(struct bgp_parse_state *s UNUSED, uint code, uint flags, byte *data, uint len, ea_list **to)
|
|
{
|
|
if (!(flags & BAF_OPTIONAL))
|
|
WITHDRAW("Unknown attribute (code %u) - conflicting flags (%02x)", code, flags);
|
|
|
|
/* Cannot use bgp_set_attr_data() as it works on known attributes only */
|
|
ea_set_attr_data(to, &bgp_attr_table[code].class, flags, data, len);
|
|
}
|
|
|
|
static inline void
|
|
bgp_format_unknown(const eattr *a, byte *buf, uint size)
|
|
{
|
|
if (a->flags & BAF_TRANSITIVE)
|
|
bsnprintf(buf, size, "(transitive)");
|
|
}
|
|
|
|
|
|
/*
|
|
* Attribute table
|
|
*/
|
|
|
|
static union bgp_attr_desc bgp_attr_table[BGP_ATTR_MAX] = {
|
|
[BA_ORIGIN] = {
|
|
.name = "bgp_origin",
|
|
.type = T_ENUM_BGP_ORIGIN,
|
|
.flags = BAF_TRANSITIVE,
|
|
.export = bgp_export_origin,
|
|
.encode = bgp_encode_u8,
|
|
.decode = bgp_decode_origin,
|
|
.format = bgp_format_origin,
|
|
},
|
|
[BA_AS_PATH] = {
|
|
.name = "bgp_path",
|
|
.type = T_PATH,
|
|
.flags = BAF_TRANSITIVE,
|
|
.encode = bgp_encode_as_path,
|
|
.decode = bgp_decode_as_path,
|
|
},
|
|
[BA_NEXT_HOP] = {
|
|
.name = "bgp_next_hop",
|
|
.type = T_IP,
|
|
.flags = BAF_TRANSITIVE,
|
|
.encode = bgp_encode_next_hop,
|
|
.decode = bgp_decode_next_hop,
|
|
.format = bgp_format_next_hop,
|
|
},
|
|
[BA_MULTI_EXIT_DISC] = {
|
|
.name = "bgp_med",
|
|
.type = T_INT,
|
|
.flags = BAF_OPTIONAL,
|
|
.encode = bgp_encode_u32,
|
|
.decode = bgp_decode_med,
|
|
},
|
|
[BA_LOCAL_PREF] = {
|
|
.name = "bgp_local_pref",
|
|
.type = T_INT,
|
|
.flags = BAF_TRANSITIVE,
|
|
.export = bgp_export_local_pref,
|
|
.encode = bgp_encode_u32,
|
|
.decode = bgp_decode_local_pref,
|
|
},
|
|
[BA_ATOMIC_AGGR] = {
|
|
.name = "bgp_atomic_aggr",
|
|
.type = T_OPAQUE,
|
|
.flags = BAF_TRANSITIVE,
|
|
.encode = bgp_encode_raw,
|
|
.decode = bgp_decode_atomic_aggr,
|
|
},
|
|
[BA_AGGREGATOR] = {
|
|
.name = "bgp_aggregator",
|
|
.type = T_OPAQUE,
|
|
.flags = BAF_OPTIONAL | BAF_TRANSITIVE,
|
|
.encode = bgp_encode_aggregator,
|
|
.decode = bgp_decode_aggregator,
|
|
.format = bgp_format_aggregator,
|
|
},
|
|
[BA_COMMUNITY] = {
|
|
.name = "bgp_community",
|
|
.type = T_CLIST,
|
|
.flags = BAF_OPTIONAL | BAF_TRANSITIVE,
|
|
.export = bgp_export_community,
|
|
.encode = bgp_encode_u32s,
|
|
.decode = bgp_decode_community,
|
|
},
|
|
[BA_ORIGINATOR_ID] = {
|
|
.name = "bgp_originator_id",
|
|
.type = T_QUAD,
|
|
.flags = BAF_OPTIONAL,
|
|
.export = bgp_export_originator_id,
|
|
.encode = bgp_encode_u32,
|
|
.decode = bgp_decode_originator_id,
|
|
},
|
|
[BA_CLUSTER_LIST] = {
|
|
.name = "bgp_cluster_list",
|
|
.type = T_CLIST,
|
|
.flags = BAF_OPTIONAL,
|
|
.export = bgp_export_cluster_list,
|
|
.encode = bgp_encode_u32s,
|
|
.decode = bgp_decode_cluster_list,
|
|
.format = bgp_format_cluster_list,
|
|
},
|
|
[BA_MP_REACH_NLRI] = {
|
|
.name = "bgp_mp_reach_nlri",
|
|
.type = T_OPAQUE,
|
|
.hidden = 1,
|
|
.flags = BAF_OPTIONAL,
|
|
.decode = bgp_decode_mp_reach_nlri,
|
|
},
|
|
[BA_MP_UNREACH_NLRI] = {
|
|
.name = "bgp_mp_unreach_nlri",
|
|
.type = T_OPAQUE,
|
|
.hidden = 1,
|
|
.flags = BAF_OPTIONAL,
|
|
.decode = bgp_decode_mp_unreach_nlri,
|
|
},
|
|
[BA_EXT_COMMUNITY] = {
|
|
.name = "bgp_ext_community",
|
|
.type = T_ECLIST,
|
|
.flags = BAF_OPTIONAL | BAF_TRANSITIVE,
|
|
.export = bgp_export_ext_community,
|
|
.encode = bgp_encode_u32s,
|
|
.decode = bgp_decode_ext_community,
|
|
},
|
|
[BA_AS4_PATH] = {
|
|
.name = "bgp_as4_path",
|
|
.type = T_PATH,
|
|
.hidden = 1,
|
|
.flags = BAF_OPTIONAL | BAF_TRANSITIVE,
|
|
.encode = bgp_encode_raw,
|
|
.decode = bgp_decode_as4_path,
|
|
},
|
|
[BA_AS4_AGGREGATOR] = {
|
|
.name = "bgp_as4_aggregator",
|
|
.type = T_OPAQUE,
|
|
.hidden = 1,
|
|
.flags = BAF_OPTIONAL | BAF_TRANSITIVE,
|
|
.encode = bgp_encode_raw,
|
|
.decode = bgp_decode_as4_aggregator,
|
|
.format = bgp_format_aggregator,
|
|
},
|
|
[BA_AIGP] = {
|
|
.name = "bgp_aigp",
|
|
.type = T_OPAQUE,
|
|
.flags = BAF_OPTIONAL | BAF_DECODE_FLAGS,
|
|
.export = bgp_export_aigp,
|
|
.encode = bgp_encode_raw,
|
|
.decode = bgp_decode_aigp,
|
|
.format = bgp_format_aigp,
|
|
},
|
|
[BA_LARGE_COMMUNITY] = {
|
|
.name = "bgp_large_community",
|
|
.type = T_LCLIST,
|
|
.flags = BAF_OPTIONAL | BAF_TRANSITIVE,
|
|
.export = bgp_export_large_community,
|
|
.encode = bgp_encode_u32s,
|
|
.decode = bgp_decode_large_community,
|
|
},
|
|
[BA_MPLS_LABEL_STACK] = {
|
|
.name = "bgp_mpls_label_stack",
|
|
.type = T_CLIST,
|
|
.readonly = 1,
|
|
.export = bgp_export_mpls_label_stack,
|
|
.encode = bgp_encode_mpls_label_stack,
|
|
.decode = bgp_decode_mpls_label_stack,
|
|
.format = bgp_format_mpls_label_stack,
|
|
},
|
|
};
|
|
|
|
eattr *
|
|
bgp_find_attr(ea_list *attrs, uint code)
|
|
{
|
|
return ea_find(attrs, BGP_EA_ID(code));
|
|
}
|
|
|
|
void
|
|
bgp_register_attrs(void)
|
|
{
|
|
for (uint i=0; i<ARRAY_SIZE(bgp_attr_table); i++)
|
|
{
|
|
if (!bgp_attr_table[i].name)
|
|
bgp_attr_table[i] = (union bgp_attr_desc) {
|
|
.name = mb_sprintf(&root_pool, "bgp_unknown_0x%02x", i),
|
|
.type = T_OPAQUE,
|
|
.flags = BAF_OPTIONAL,
|
|
.readonly = 1,
|
|
.export = bgp_export_unknown,
|
|
.encode = bgp_encode_raw,
|
|
.decode = bgp_decode_unknown,
|
|
.format = bgp_format_unknown,
|
|
};
|
|
|
|
ea_register_init(&bgp_attr_table[i].class);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Attribute export
|
|
*/
|
|
|
|
static inline void
|
|
bgp_export_attr(struct bgp_export_state *s, eattr *a, ea_list *to)
|
|
{
|
|
const union bgp_attr_desc *desc = bgp_find_attr_desc(a);
|
|
if (!desc)
|
|
return;
|
|
|
|
/* The flags might have been zero if the attr was added locally */
|
|
a->flags = (a->flags & BAF_PARTIAL) | desc->flags;
|
|
|
|
/* Set partial bit if new opt-trans attribute is attached to non-local route */
|
|
if ((s->src != NULL) && (a->originated) &&
|
|
(a->flags & BAF_OPTIONAL) && (a->flags & BAF_TRANSITIVE))
|
|
a->flags |= BAF_PARTIAL;
|
|
|
|
/* Call specific hook */
|
|
CALL(desc->export, s, a);
|
|
|
|
/* Attribute might become undefined in hook */
|
|
if (a->undef)
|
|
return;
|
|
|
|
/* Append updated attribute */
|
|
to->attrs[to->count++] = *a;
|
|
}
|
|
|
|
/**
|
|
* bgp_export_attrs - export BGP attributes
|
|
* @s: BGP export state
|
|
* @attrs: a list of extended attributes
|
|
*
|
|
* The bgp_export_attrs() function takes a list of attributes and merges it to
|
|
* one newly allocated and sorted segment. Attributes are validated and
|
|
* normalized by type-specific export hooks and attribute flags are updated.
|
|
* Some attributes may be eliminated (e.g. unknown non-tranitive attributes, or
|
|
* empty community sets).
|
|
*
|
|
* Result: one sorted attribute list segment, or NULL if attributes are unsuitable.
|
|
*/
|
|
static inline ea_list *
|
|
bgp_export_attrs(struct bgp_export_state *s, ea_list *a)
|
|
{
|
|
/* Merge the attribute list */
|
|
ea_list *new = ea_normalize(a, 0);
|
|
ASSERT_DIE(new);
|
|
|
|
uint i, count;
|
|
count = new->count;
|
|
new->count = 0;
|
|
|
|
/* Export each attribute */
|
|
for (i = 0; i < count; i++)
|
|
bgp_export_attr(s, &new->attrs[i], new);
|
|
|
|
if (s->err_reject)
|
|
return NULL;
|
|
|
|
return new;
|
|
}
|
|
|
|
|
|
/*
|
|
* Attribute encoding
|
|
*/
|
|
|
|
static inline int
|
|
bgp_encode_attr(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
|
|
{
|
|
const union bgp_attr_desc *desc = bgp_find_attr_desc(a);
|
|
ASSERT_DIE(desc);
|
|
return desc->encode(s, a, buf, size);
|
|
}
|
|
|
|
/**
|
|
* bgp_encode_attrs - encode BGP attributes
|
|
* @s: BGP write state
|
|
* @attrs: a list of extended attributes
|
|
* @buf: buffer
|
|
* @end: buffer end
|
|
*
|
|
* The bgp_encode_attrs() function takes a list of extended attributes
|
|
* and converts it to its BGP representation (a part of an Update message).
|
|
* BGP write state may be fake when called from MRT protocol.
|
|
*
|
|
* Result: Length of the attribute block generated or -1 if not enough space.
|
|
*/
|
|
int
|
|
bgp_encode_attrs(struct bgp_write_state *s, ea_list *attrs, byte *buf, byte *end)
|
|
{
|
|
byte *pos = buf;
|
|
int i, len;
|
|
|
|
for (i = 0; i < attrs->count; i++)
|
|
{
|
|
len = bgp_encode_attr(s, &attrs->attrs[i], pos, end - pos);
|
|
|
|
if (len < 0)
|
|
return -1;
|
|
|
|
pos += len;
|
|
}
|
|
|
|
return pos - buf;
|
|
}
|
|
|
|
|
|
/*
|
|
* Attribute decoding
|
|
*/
|
|
|
|
static void bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool);
|
|
|
|
static inline int
|
|
bgp_as_path_loopy(struct bgp_proto *p, ea_list *attrs, u32 asn)
|
|
{
|
|
eattr *e = bgp_find_attr(attrs, BA_AS_PATH);
|
|
int num = p->cf->allow_local_as + 1;
|
|
return (e && (num > 0) && as_path_contains(e->u.ptr, asn, num));
|
|
}
|
|
|
|
static inline int
|
|
bgp_originator_id_loopy(struct bgp_proto *p, ea_list *attrs)
|
|
{
|
|
eattr *e = bgp_find_attr(attrs, BA_ORIGINATOR_ID);
|
|
return (e && (e->u.data == p->local_id));
|
|
}
|
|
|
|
static inline int
|
|
bgp_cluster_list_loopy(struct bgp_proto *p, ea_list *attrs)
|
|
{
|
|
eattr *e = bgp_find_attr(attrs, BA_CLUSTER_LIST);
|
|
return (e && int_set_contains(e->u.ptr, p->rr_cluster_id));
|
|
}
|
|
|
|
static inline void
|
|
bgp_decode_attr(struct bgp_parse_state *s, byte code, byte flags, byte *data, uint len, ea_list **to)
|
|
{
|
|
/* Handle duplicate attributes; RFC 7606 3 (g) */
|
|
if (BIT32_TEST(s->attrs_seen, code))
|
|
{
|
|
if ((code == BA_MP_REACH_NLRI) || (code == BA_MP_UNREACH_NLRI))
|
|
bgp_parse_error(s, 1);
|
|
else
|
|
DISCARD("Discarding duplicate attribute (code %u)", code);
|
|
}
|
|
BIT32_SET(s->attrs_seen, code);
|
|
|
|
ASSERT_DIE(bgp_attr_table[code].id);
|
|
const union bgp_attr_desc *desc = &bgp_attr_table[code];
|
|
|
|
/* Handle conflicting flags; RFC 7606 3 (c) */
|
|
if (((flags ^ desc->flags) & (BAF_OPTIONAL | BAF_TRANSITIVE)) &&
|
|
!(desc->flags & BAF_DECODE_FLAGS))
|
|
WITHDRAW("Malformed %s attribute - conflicting flags (%02x, expected %02x)", desc->name, flags, desc->flags);
|
|
|
|
desc->decode(s, code, flags, data, len, to);
|
|
}
|
|
|
|
/**
|
|
* bgp_decode_attrs - check and decode BGP attributes
|
|
* @s: BGP parse state
|
|
* @data: start of attribute block
|
|
* @len: length of attribute block
|
|
*
|
|
* This function takes a BGP attribute block (a part of an Update message), checks
|
|
* its consistency and converts it to a list of BIRD route attributes represented
|
|
* by an (uncached) &rta.
|
|
*/
|
|
ea_list *
|
|
bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len)
|
|
{
|
|
struct bgp_proto *p = s->proto;
|
|
ea_list *attrs = NULL;
|
|
uint alen;
|
|
byte code, flags;
|
|
byte *pos = data;
|
|
|
|
/* Parse the attributes */
|
|
while (len)
|
|
{
|
|
alen = 0;
|
|
|
|
/* Read attribute type */
|
|
if (len < 2)
|
|
goto framing_error;
|
|
flags = pos[0];
|
|
code = pos[1];
|
|
ADVANCE(pos, len, 2);
|
|
|
|
/* Read attribute length */
|
|
if (flags & BAF_EXT_LEN)
|
|
{
|
|
if (len < 2)
|
|
goto framing_error;
|
|
alen = get_u16(pos);
|
|
ADVANCE(pos, len, 2);
|
|
}
|
|
else
|
|
{
|
|
if (len < 1)
|
|
goto framing_error;
|
|
alen = *pos;
|
|
ADVANCE(pos, len, 1);
|
|
}
|
|
|
|
if (alen > len)
|
|
goto framing_error;
|
|
|
|
DBG("Attr %02x %02x %u\n", code, flags, alen);
|
|
|
|
bgp_decode_attr(s, code, flags, pos, alen, &attrs);
|
|
ADVANCE(pos, len, alen);
|
|
}
|
|
|
|
if (s->err_withdraw)
|
|
goto withdraw;
|
|
|
|
/* If there is no reachability NLRI, we are finished */
|
|
if (!s->ip_reach_len && !s->mp_reach_len)
|
|
return NULL;
|
|
|
|
|
|
/* Handle missing mandatory attributes; RFC 7606 3 (d) */
|
|
if (!BIT32_TEST(s->attrs_seen, BA_ORIGIN))
|
|
{ REPORT(NO_MANDATORY, "ORIGIN"); goto withdraw; }
|
|
|
|
if (!BIT32_TEST(s->attrs_seen, BA_AS_PATH))
|
|
{ REPORT(NO_MANDATORY, "AS_PATH"); goto withdraw; }
|
|
|
|
if (s->ip_reach_len && !BIT32_TEST(s->attrs_seen, BA_NEXT_HOP))
|
|
{ REPORT(NO_MANDATORY, "NEXT_HOP"); goto withdraw; }
|
|
|
|
/* When receiving attributes from non-AS4-aware BGP speaker, we have to
|
|
reconstruct AS_PATH and AGGREGATOR attributes; RFC 6793 4.2.3 */
|
|
if (!p->as4_session)
|
|
bgp_process_as4_attrs(&attrs, s->pool);
|
|
|
|
/* Reject routes with our ASN in AS_PATH attribute */
|
|
if (bgp_as_path_loopy(p, attrs, p->local_as))
|
|
goto loop;
|
|
|
|
/* Reject routes with our Confederation ID in AS_PATH attribute; RFC 5065 4.0 */
|
|
if ((p->public_as != p->local_as) && bgp_as_path_loopy(p, attrs, p->public_as))
|
|
goto loop;
|
|
|
|
/* Reject routes with our Router ID in ORIGINATOR_ID attribute; RFC 4456 8 */
|
|
if (p->is_internal && bgp_originator_id_loopy(p, attrs))
|
|
goto loop;
|
|
|
|
/* Reject routes with our Cluster ID in CLUSTER_LIST attribute; RFC 4456 8 */
|
|
if (p->rr_client && bgp_cluster_list_loopy(p, attrs))
|
|
goto loop;
|
|
|
|
/* If there is no local preference, define one */
|
|
if (!BIT32_TEST(s->attrs_seen, BA_LOCAL_PREF))
|
|
bgp_set_attr_u32(&attrs, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
|
|
|
|
return attrs;
|
|
|
|
|
|
framing_error:
|
|
/* RFC 7606 4 - handle attribute framing errors */
|
|
REPORT("Malformed attribute list - framing error (%u/%u) at %d",
|
|
alen, len, (int) (pos - s->attrs));
|
|
|
|
withdraw:
|
|
/* RFC 7606 5.2 - handle missing NLRI during errors */
|
|
if (!s->ip_reach_len && !s->mp_reach_len)
|
|
bgp_parse_error(s, 1);
|
|
|
|
s->err_withdraw = 1;
|
|
return NULL;
|
|
|
|
loop:
|
|
/* Loops are handled as withdraws, but ignored silently. Do not set err_withdraw. */
|
|
return NULL;
|
|
}
|
|
|
|
void
|
|
bgp_finish_attrs(struct bgp_parse_state *s, ea_list **to)
|
|
{
|
|
/* AIGP test here instead of in bgp_decode_aigp() - we need to know channel */
|
|
if (BIT32_TEST(s->attrs_seen, BA_AIGP) && !s->channel->cf->aigp)
|
|
{
|
|
REPORT("Discarding AIGP attribute received on non-AIGP session");
|
|
bgp_unset_attr(to, BA_AIGP);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* Route bucket hash table
|
|
*/
|
|
|
|
#define RBH_KEY(b) b->eattrs, b->hash
|
|
#define RBH_NEXT(b) b->next
|
|
#define RBH_EQ(a1,h1,a2,h2) h1 == h2 && ea_same(a1, a2)
|
|
#define RBH_FN(a,h) h
|
|
|
|
#define RBH_REHASH bgp_rbh_rehash
|
|
#define RBH_PARAMS /8, *2, 2, 2, 12, 20
|
|
|
|
|
|
HASH_DEFINE_REHASH_FN(RBH, struct bgp_bucket)
|
|
|
|
void
|
|
bgp_init_bucket_table(struct bgp_channel *c)
|
|
{
|
|
HASH_INIT(c->bucket_hash, c->pool, 8);
|
|
|
|
init_list(&c->bucket_queue);
|
|
c->withdraw_bucket = NULL;
|
|
}
|
|
|
|
void
|
|
bgp_free_bucket_table(struct bgp_channel *c)
|
|
{
|
|
HASH_FREE(c->bucket_hash);
|
|
|
|
struct bgp_bucket *b;
|
|
WALK_LIST_FIRST(b, c->bucket_queue)
|
|
{
|
|
rem_node(&b->send_node);
|
|
mb_free(b);
|
|
}
|
|
|
|
mb_free(c->withdraw_bucket);
|
|
c->withdraw_bucket = NULL;
|
|
}
|
|
|
|
static struct bgp_bucket *
|
|
bgp_get_bucket(struct bgp_channel *c, ea_list *new)
|
|
{
|
|
/* Hash and lookup */
|
|
u32 hash = ea_hash(new);
|
|
struct bgp_bucket *b = HASH_FIND(c->bucket_hash, RBH, new, hash);
|
|
|
|
if (b)
|
|
return b;
|
|
|
|
/* Scan the list for total size */
|
|
uint ea_size = BIRD_CPU_ALIGN(ea_list_size(new));
|
|
uint size = sizeof(struct bgp_bucket) + ea_size;
|
|
|
|
/* Allocate the bucket */
|
|
b = mb_alloc(c->pool, size);
|
|
*b = (struct bgp_bucket) { };
|
|
init_list(&b->prefixes);
|
|
b->hash = hash;
|
|
|
|
/* Copy the ea_list */
|
|
ea_list_copy(b->eattrs, new, ea_size);
|
|
|
|
/* Insert the bucket to bucket hash */
|
|
HASH_INSERT2(c->bucket_hash, RBH, c->pool, b);
|
|
|
|
return b;
|
|
}
|
|
|
|
static struct bgp_bucket *
|
|
bgp_get_withdraw_bucket(struct bgp_channel *c)
|
|
{
|
|
if (!c->withdraw_bucket)
|
|
{
|
|
c->withdraw_bucket = mb_allocz(c->pool, sizeof(struct bgp_bucket));
|
|
init_list(&c->withdraw_bucket->prefixes);
|
|
}
|
|
|
|
return c->withdraw_bucket;
|
|
}
|
|
|
|
static void
|
|
bgp_free_bucket_xx(struct bgp_channel *c, struct bgp_bucket *b)
|
|
{
|
|
HASH_REMOVE2(c->bucket_hash, RBH, c->pool, b);
|
|
mb_free(b);
|
|
}
|
|
|
|
int
|
|
bgp_done_bucket(struct bgp_channel *c, struct bgp_bucket *b)
|
|
{
|
|
/* Won't free the withdraw bucket */
|
|
if (b == c->withdraw_bucket)
|
|
return 0;
|
|
|
|
if (EMPTY_LIST(b->prefixes))
|
|
rem_node(&b->send_node);
|
|
|
|
if (b->px_uc || !EMPTY_LIST(b->prefixes))
|
|
return 0;
|
|
|
|
bgp_free_bucket_xx(c, b);
|
|
return 1;
|
|
}
|
|
|
|
void
|
|
bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b)
|
|
{
|
|
rem_node(&b->send_node);
|
|
add_tail(&c->bucket_queue, &b->send_node);
|
|
}
|
|
|
|
void
|
|
bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b)
|
|
{
|
|
struct bgp_proto *p = (void *) c->c.proto;
|
|
struct bgp_bucket *wb = bgp_get_withdraw_bucket(c);
|
|
|
|
log(L_ERR "%s: Attribute list too long", p->p.name);
|
|
while (!EMPTY_LIST(b->prefixes))
|
|
{
|
|
struct bgp_prefix *px = HEAD(b->prefixes);
|
|
|
|
log(L_ERR "%s: - withdrawing %N", p->p.name, &px->net);
|
|
rem_node(&px->buck_node_xx);
|
|
add_tail(&wb->prefixes, &px->buck_node_xx);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* Prefix hash table
|
|
*/
|
|
|
|
#define PXH_KEY(px) px->net, px->path_id, px->hash
|
|
#define PXH_NEXT(px) px->next
|
|
#define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && (c->add_path_tx ? (i1 == i2) : 1) && net_equal(n1, n2)
|
|
#define PXH_FN(n,i,h) h
|
|
|
|
#define PXH_REHASH bgp_pxh_rehash
|
|
#define PXH_PARAMS /8, *2, 2, 2, 12, 24
|
|
|
|
|
|
HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix)
|
|
|
|
void
|
|
bgp_init_prefix_table(struct bgp_channel *c)
|
|
{
|
|
HASH_INIT(c->prefix_hash, c->pool, 8);
|
|
|
|
uint alen = net_addr_length[c->c.net_type];
|
|
c->prefix_slab = alen ? sl_new(c->pool, sizeof(struct bgp_prefix) + alen) : NULL;
|
|
}
|
|
|
|
void
|
|
bgp_free_prefix_table(struct bgp_channel *c)
|
|
{
|
|
HASH_FREE(c->prefix_hash);
|
|
|
|
rfree(c->prefix_slab);
|
|
c->prefix_slab = NULL;
|
|
}
|
|
|
|
static struct bgp_prefix *
|
|
bgp_get_prefix(struct bgp_channel *c, const net_addr *net, u32 path_id)
|
|
{
|
|
u32 path_id_hash = c->add_path_tx ? path_id : 0;
|
|
/* We must use a different hash function than the rtable */
|
|
u32 hash = u32_hash(net_hash(net) ^ u32_hash(path_id_hash));
|
|
struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id_hash, hash);
|
|
|
|
if (px)
|
|
return px;
|
|
|
|
if (c->prefix_slab)
|
|
px = sl_alloc(c->prefix_slab);
|
|
else
|
|
px = mb_alloc(c->pool, sizeof(struct bgp_prefix) + net->length);
|
|
|
|
*px = (struct bgp_prefix) { };
|
|
px->hash = hash;
|
|
px->path_id = path_id;
|
|
net_copy(px->net, net);
|
|
|
|
HASH_INSERT2(c->prefix_hash, PXH, c->pool, px);
|
|
|
|
return px;
|
|
}
|
|
|
|
static void bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px);
|
|
|
|
static inline int
|
|
bgp_update_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *b)
|
|
{
|
|
#define BPX_TRACE(what) do { \
|
|
if (c->c.debug & D_ROUTES) log(L_TRACE "%s.%s < %s %N %uG %s", \
|
|
c->c.proto->name, c->c.name, what, \
|
|
px->net, px->path_id, (b == c->withdraw_bucket) ? "withdraw" : "update"); } while (0)
|
|
px->lastmod = current_time();
|
|
|
|
/* Already queued for the same bucket */
|
|
if (px->cur == b)
|
|
{
|
|
BPX_TRACE("already queued");
|
|
return 0;
|
|
}
|
|
|
|
/* Unqueue from the old bucket */
|
|
if (px->cur)
|
|
{
|
|
rem_node(&px->buck_node_xx);
|
|
bgp_done_bucket(c, px->cur);
|
|
}
|
|
|
|
/* The new bucket is the same as we sent before */
|
|
if ((px->last == b) || c->c.out_table && !px->last && (b == c->withdraw_bucket))
|
|
{
|
|
if (px->cur)
|
|
BPX_TRACE("reverted");
|
|
else
|
|
BPX_TRACE("already sent");
|
|
|
|
/* Well, we haven't sent anything yet */
|
|
if (!px->last)
|
|
bgp_free_prefix(c, px);
|
|
|
|
px->cur = NULL;
|
|
return 0;
|
|
}
|
|
|
|
/* Enqueue the bucket if it has been empty */
|
|
if ((b != c->withdraw_bucket) && EMPTY_LIST(b->prefixes))
|
|
add_tail(&c->bucket_queue, &b->send_node);
|
|
|
|
/* Enqueue to the new bucket and indicate the change */
|
|
add_tail(&b->prefixes, &px->buck_node_xx);
|
|
px->cur = b;
|
|
|
|
BPX_TRACE("queued");
|
|
return 1;
|
|
|
|
#undef BPX_TRACE
|
|
}
|
|
|
|
static void
|
|
bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px)
|
|
{
|
|
HASH_REMOVE2(c->prefix_hash, PXH, c->pool, px);
|
|
|
|
if (c->prefix_slab)
|
|
sl_free(px);
|
|
else
|
|
mb_free(px);
|
|
}
|
|
|
|
void
|
|
bgp_done_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *buck)
|
|
{
|
|
/* Cleanup: We're called from bucket senders. */
|
|
ASSERT_DIE(px->cur == buck);
|
|
rem_node(&px->buck_node_xx);
|
|
|
|
/* We may want to store the updates */
|
|
if (c->c.out_table)
|
|
{
|
|
/* Nothing to be sent right now */
|
|
px->cur = NULL;
|
|
|
|
/* Unref the previous sent version */
|
|
if (px->last)
|
|
px->last->px_uc--;
|
|
|
|
/* Ref the current sent version */
|
|
if (buck != c->withdraw_bucket)
|
|
{
|
|
px->last = buck;
|
|
px->last->px_uc++;
|
|
return;
|
|
}
|
|
|
|
/* Prefixes belonging to the withdraw bucket are freed always */
|
|
}
|
|
|
|
bgp_free_prefix(c, px);
|
|
}
|
|
|
|
|
|
/*
|
|
* Prefix hash table exporter
|
|
*/
|
|
|
|
static void
|
|
bgp_out_table_feed(void *data)
|
|
{
|
|
struct rt_export_hook *hook = data;
|
|
struct bgp_channel *c = SKIP_BACK(struct bgp_channel, prefix_exporter, hook->table);
|
|
|
|
int max = 512;
|
|
|
|
const net_addr *neq = (hook->req->addr_mode == TE_ADDR_EQUAL) ? hook->req->addr : NULL;
|
|
const net_addr *cand = NULL;
|
|
|
|
do {
|
|
HASH_WALK_ITER(c->prefix_hash, PXH, n, hook->hash_iter)
|
|
{
|
|
switch (hook->req->addr_mode)
|
|
{
|
|
case TE_ADDR_IN:
|
|
if (!net_in_netX(n->net, hook->req->addr))
|
|
continue;
|
|
/* fall through */
|
|
case TE_ADDR_NONE:
|
|
/* Splitting only for multi-net exports */
|
|
if (--max <= 0)
|
|
HASH_WALK_ITER_PUT;
|
|
break;
|
|
|
|
case TE_ADDR_FOR:
|
|
if (!neq)
|
|
{
|
|
if (net_in_netX(hook->req->addr, n->net) && (!cand || (n->net->length > cand->length)))
|
|
cand = n->net;
|
|
continue;
|
|
}
|
|
/* fall through */
|
|
case TE_ADDR_EQUAL:
|
|
if (!net_equal(n->net, neq))
|
|
continue;
|
|
break;
|
|
}
|
|
|
|
struct bgp_bucket *buck = n->cur ?: n->last;
|
|
ea_list *ea = NULL;
|
|
if (buck == c->withdraw_bucket)
|
|
ea_set_dest(&ea, 0, RTD_UNREACHABLE);
|
|
else
|
|
{
|
|
ea = buck->eattrs;
|
|
eattr *eanh = bgp_find_attr(ea, BA_NEXT_HOP);
|
|
ASSERT_DIE(eanh);
|
|
const ip_addr *nh = (const void *) eanh->u.ptr->data;
|
|
|
|
struct nexthop_adata nhad = {
|
|
.ad = { .length = sizeof (struct nexthop_adata) - sizeof (struct adata), },
|
|
.nh = { .gw = nh[0], },
|
|
};
|
|
|
|
ea_set_attr(&ea, EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, tmp_copy_adata(&nhad.ad)));
|
|
}
|
|
|
|
struct rte_storage es = {
|
|
.rte = {
|
|
.attrs = ea,
|
|
.net = n->net,
|
|
.src = rt_find_source_global(n->path_id),
|
|
.sender = NULL,
|
|
.lastmod = n->lastmod,
|
|
.flags = n->cur ? REF_PENDING : 0,
|
|
},
|
|
};
|
|
|
|
struct rt_pending_export rpe = {
|
|
.new = &es, .new_best = &es,
|
|
};
|
|
|
|
if (hook->req->export_bulk)
|
|
{
|
|
rte *feed = &es.rte;
|
|
hook->req->export_bulk(hook->req, n->net, &rpe, &feed, 1);
|
|
}
|
|
else if (hook->req->export_one)
|
|
hook->req->export_one(hook->req, n->net, &rpe);
|
|
else
|
|
bug("No export method in export request");
|
|
}
|
|
HASH_WALK_ITER_END;
|
|
|
|
neq = cand;
|
|
cand = NULL;
|
|
} while (neq);
|
|
|
|
if (hook->hash_iter)
|
|
ev_schedule_work(hook->event);
|
|
else
|
|
rt_set_export_state(hook, TES_READY);
|
|
}
|
|
|
|
static struct rt_export_hook *
|
|
bgp_out_table_export_start(struct rt_exporter *re, struct rt_export_request *req UNUSED)
|
|
{
|
|
struct bgp_channel *c = SKIP_BACK(struct bgp_channel, prefix_exporter, re);
|
|
pool *p = rp_new(c->c.proto->pool, "Export hook");
|
|
struct rt_export_hook *hook = mb_allocz(p, sizeof(struct rt_export_hook));
|
|
hook->pool = p;
|
|
hook->lp = lp_new_default(p);
|
|
hook->event = ev_new_init(p, bgp_out_table_feed, hook);
|
|
hook->feed_type = TFT_HASH;
|
|
|
|
return hook;
|
|
}
|
|
|
|
void
|
|
bgp_setup_out_table(struct bgp_channel *c)
|
|
{
|
|
ASSERT_DIE(c->c.out_table == NULL);
|
|
|
|
c->prefix_exporter = (struct rt_exporter) {
|
|
.addr_type = c->c.table->addr_type,
|
|
.start = bgp_out_table_export_start,
|
|
};
|
|
|
|
init_list(&c->prefix_exporter.hooks);
|
|
|
|
c->c.out_table = &c->prefix_exporter;
|
|
}
|
|
|
|
|
|
/*
|
|
* BGP protocol glue
|
|
*/
|
|
|
|
int
|
|
bgp_preexport(struct channel *c, rte *e)
|
|
{
|
|
struct proto *SRC = e->src->proto;
|
|
struct bgp_proto *p = (struct bgp_proto *) (c->proto);
|
|
struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (struct bgp_proto *) SRC : NULL;
|
|
|
|
/* Reject our routes */
|
|
if (src == p)
|
|
return -1;
|
|
|
|
/* Accept non-BGP routes */
|
|
if (src == NULL)
|
|
return 0;
|
|
|
|
/* Reject flowspec that failed validation */
|
|
if (net_is_flow(e->net))
|
|
switch (rt_get_flowspec_valid(e))
|
|
{
|
|
case FLOWSPEC_VALID:
|
|
break;
|
|
case FLOWSPEC_INVALID:
|
|
return -1;
|
|
case FLOWSPEC_UNKNOWN:
|
|
ASSUME((rt_get_source_attr(e) != RTS_BGP) ||
|
|
!((struct bgp_channel *) SKIP_BACK(struct channel, in_req, e->sender->req))->base_table);
|
|
break;
|
|
case FLOWSPEC__MAX:
|
|
bug("This never happens.");
|
|
}
|
|
|
|
/* IBGP route reflection, RFC 4456 */
|
|
if (p->is_internal && src->is_internal && (p->local_as == src->local_as))
|
|
{
|
|
/* Rejected unless configured as route reflector */
|
|
if (!p->rr_client && !src->rr_client)
|
|
return -1;
|
|
|
|
/* Generally, this should be handled when path is received, but we check it
|
|
also here as rr_cluster_id may be undefined or different in src. */
|
|
if (p->rr_cluster_id && bgp_cluster_list_loopy(p, e->attrs))
|
|
return -1;
|
|
}
|
|
|
|
/* Handle well-known communities, RFC 1997 */
|
|
struct eattr *com;
|
|
if (p->cf->interpret_communities &&
|
|
(com = ea_find(e->attrs, BGP_EA_ID(BA_COMMUNITY))))
|
|
{
|
|
const struct adata *d = com->u.ptr;
|
|
|
|
/* Do not export anywhere */
|
|
if (int_set_contains(d, BGP_COMM_NO_ADVERTISE))
|
|
return -1;
|
|
|
|
/* Do not export outside of AS (or member-AS) */
|
|
if (!p->is_internal && int_set_contains(d, BGP_COMM_NO_EXPORT_SUBCONFED))
|
|
return -1;
|
|
|
|
/* Do not export outside of AS (or confederation) */
|
|
if (!p->is_interior && int_set_contains(d, BGP_COMM_NO_EXPORT))
|
|
return -1;
|
|
|
|
/* Do not export LLGR_STALE routes to LLGR-ignorant peers */
|
|
if (!p->conn->remote_caps->llgr_aware && int_set_contains(d, BGP_COMM_LLGR_STALE))
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static ea_list *
|
|
bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *attrs0, struct linpool *pool)
|
|
{
|
|
struct proto *SRC = e->src->proto;
|
|
struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (void *) SRC : NULL;
|
|
struct bgp_export_state s = { .proto = p, .channel = c, .pool = pool, .src = src, .route = e, .mpls = c->desc->mpls };
|
|
ea_list *attrs = attrs0;
|
|
eattr *a;
|
|
const adata *ad;
|
|
|
|
/* ORIGIN attribute - mandatory, attach if missing */
|
|
if (! bgp_find_attr(attrs0, BA_ORIGIN))
|
|
bgp_set_attr_u32(&attrs, BA_ORIGIN, 0, src ? ORIGIN_INCOMPLETE : ORIGIN_IGP);
|
|
|
|
/* AS_PATH attribute - mandatory */
|
|
a = bgp_find_attr(attrs0, BA_AS_PATH);
|
|
ad = a ? a->u.ptr : &null_adata;
|
|
|
|
/* AS_PATH attribute - strip AS_CONFED* segments outside confederation */
|
|
if ((!p->cf->confederation || !p->is_interior) && as_path_contains_confed(ad))
|
|
ad = as_path_strip_confed(pool, ad);
|
|
|
|
/* AS_PATH attribute - keep or prepend ASN */
|
|
if (p->is_internal || p->rs_client)
|
|
{
|
|
/* IBGP or route server -> just ensure there is one */
|
|
if (!a)
|
|
bgp_set_attr_ptr(&attrs, BA_AS_PATH, 0, &null_adata);
|
|
}
|
|
else if (p->is_interior)
|
|
{
|
|
/* Confederation -> prepend ASN as AS_CONFED_SEQUENCE */
|
|
ad = as_path_prepend2(pool, ad, AS_PATH_CONFED_SEQUENCE, p->public_as);
|
|
bgp_set_attr_ptr(&attrs, BA_AS_PATH, 0, ad);
|
|
}
|
|
else /* Regular EBGP (no RS, no confederation) */
|
|
{
|
|
/* Regular EBGP -> prepend ASN as regular sequence */
|
|
ad = as_path_prepend2(pool, ad, AS_PATH_SEQUENCE, p->public_as);
|
|
bgp_set_attr_ptr(&attrs, BA_AS_PATH, 0, ad);
|
|
|
|
/* MULTI_EXIT_DESC attribute - accept only if set in export filter */
|
|
a = bgp_find_attr(attrs0, BA_MULTI_EXIT_DISC);
|
|
if (a && !(a->fresh))
|
|
bgp_unset_attr(&attrs, BA_MULTI_EXIT_DISC);
|
|
}
|
|
|
|
/* NEXT_HOP attribute - delegated to AF-specific hook */
|
|
a = bgp_find_attr(attrs0, BA_NEXT_HOP);
|
|
bgp_update_next_hop(&s, a, &attrs);
|
|
|
|
/* LOCAL_PREF attribute - required for IBGP, attach if missing */
|
|
if (p->is_interior && ! bgp_find_attr(attrs0, BA_LOCAL_PREF))
|
|
bgp_set_attr_u32(&attrs, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
|
|
|
|
/* AIGP attribute - accumulate local metric or originate new one */
|
|
u64 metric;
|
|
if (s.local_next_hop &&
|
|
(bgp_total_aigp_metric_(e, &metric, &ad) ||
|
|
(c->cf->aigp_originate && bgp_init_aigp_metric(e, &metric, &ad))))
|
|
{
|
|
ad = bgp_aigp_set_metric(pool, ad, metric);
|
|
bgp_set_attr_ptr(&attrs, BA_AIGP, 0, ad);
|
|
}
|
|
|
|
/* IBGP route reflection, RFC 4456 */
|
|
if (src && src->is_internal && p->is_internal && (src->local_as == p->local_as))
|
|
{
|
|
/* ORIGINATOR_ID attribute - attach if not already set */
|
|
if (! bgp_find_attr(attrs0, BA_ORIGINATOR_ID))
|
|
bgp_set_attr_u32(&attrs, BA_ORIGINATOR_ID, 0, src->remote_id);
|
|
|
|
/* CLUSTER_LIST attribute - prepend cluster ID */
|
|
a = bgp_find_attr(attrs0, BA_CLUSTER_LIST);
|
|
ad = a ? a->u.ptr : NULL;
|
|
|
|
/* Prepend src cluster ID */
|
|
if (src->rr_cluster_id)
|
|
ad = int_set_prepend(pool, ad, src->rr_cluster_id);
|
|
|
|
/* Prepend dst cluster ID if src and dst clusters are different */
|
|
if (p->rr_cluster_id && (src->rr_cluster_id != p->rr_cluster_id))
|
|
ad = int_set_prepend(pool, ad, p->rr_cluster_id);
|
|
|
|
/* Should be at least one prepended cluster ID */
|
|
bgp_set_attr_ptr(&attrs, BA_CLUSTER_LIST, 0, ad);
|
|
}
|
|
|
|
/* AS4_* transition attributes, RFC 6793 4.2.2 */
|
|
if (! p->as4_session)
|
|
{
|
|
a = bgp_find_attr(attrs, BA_AS_PATH);
|
|
if (a && as_path_contains_as4(a->u.ptr))
|
|
{
|
|
bgp_set_attr_ptr(&attrs, BA_AS_PATH, 0, as_path_to_old(pool, a->u.ptr));
|
|
bgp_set_attr_ptr(&attrs, BA_AS4_PATH, 0, as_path_strip_confed(pool, a->u.ptr));
|
|
}
|
|
|
|
a = bgp_find_attr(attrs, BA_AGGREGATOR);
|
|
if (a && aggregator_contains_as4(a->u.ptr))
|
|
{
|
|
bgp_set_attr_ptr(&attrs, BA_AGGREGATOR, 0, aggregator_to_old(pool, a->u.ptr));
|
|
bgp_set_attr_ptr(&attrs, BA_AS4_AGGREGATOR, 0, a->u.ptr);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Presence of mandatory attributes ORIGIN and AS_PATH is ensured by above
|
|
* conditions. Presence and validity of quasi-mandatory NEXT_HOP attribute
|
|
* should be checked in AF-specific hooks.
|
|
*/
|
|
|
|
/* Apply per-attribute export hooks for validatation and normalization */
|
|
return bgp_export_attrs(&s, attrs);
|
|
}
|
|
|
|
void
|
|
bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, const rte *old)
|
|
{
|
|
struct bgp_proto *p = (void *) P;
|
|
struct bgp_channel *c = (void *) C;
|
|
struct bgp_bucket *buck;
|
|
u32 path;
|
|
|
|
if (new)
|
|
{
|
|
struct ea_list *attrs = bgp_update_attrs(p, c, new, new->attrs, tmp_linpool);
|
|
|
|
/* Error during attribute processing */
|
|
if (!attrs)
|
|
log(L_ERR "%s: Invalid route %N withdrawn", p->p.name, n);
|
|
|
|
/* If attributes are invalid, we fail back to withdraw */
|
|
buck = attrs ? bgp_get_bucket(c, attrs) : bgp_get_withdraw_bucket(c);
|
|
path = new->src->global_id;
|
|
}
|
|
else
|
|
{
|
|
buck = bgp_get_withdraw_bucket(c);
|
|
path = old->src->global_id;
|
|
}
|
|
|
|
if (bgp_update_prefix(c, bgp_get_prefix(c, n, path), buck))
|
|
bgp_schedule_packet(p->conn, c, PKT_UPDATE);
|
|
}
|
|
|
|
|
|
static inline u32
|
|
bgp_get_neighbor(rte *r)
|
|
{
|
|
eattr *e = ea_find(r->attrs, BGP_EA_ID(BA_AS_PATH));
|
|
u32 as;
|
|
|
|
if (e && as_path_get_first_regular(e->u.ptr, &as))
|
|
return as;
|
|
|
|
/* If AS_PATH is not defined, we treat rte as locally originated */
|
|
struct bgp_proto *p = (void *) r->src->proto;
|
|
return p->cf->confederation ?: p->local_as;
|
|
}
|
|
|
|
static inline int
|
|
rte_stale(rte *r)
|
|
{
|
|
if (r->pflags & BGP_REF_STALE)
|
|
return 1;
|
|
|
|
if (r->pflags & BGP_REF_NOT_STALE)
|
|
return 0;
|
|
|
|
/* If staleness is unknown, compute and cache it */
|
|
eattr *a = ea_find(r->attrs, BGP_EA_ID(BA_COMMUNITY));
|
|
if (a && int_set_contains(a->u.ptr, BGP_COMM_LLGR_STALE))
|
|
{
|
|
r->pflags |= BGP_REF_STALE;
|
|
return 1;
|
|
}
|
|
else
|
|
{
|
|
r->pflags |= BGP_REF_NOT_STALE;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
int
|
|
bgp_rte_better(rte *new, rte *old)
|
|
{
|
|
struct bgp_proto *new_bgp = (struct bgp_proto *) new->src->proto;
|
|
struct bgp_proto *old_bgp = (struct bgp_proto *) old->src->proto;
|
|
eattr *x, *y;
|
|
u32 n, o;
|
|
|
|
/* Skip suppressed routes (see bgp_rte_recalculate()) */
|
|
n = new->pflags & BGP_REF_SUPPRESSED;
|
|
o = old->pflags & BGP_REF_SUPPRESSED;
|
|
if (n > o)
|
|
return 0;
|
|
if (n < o)
|
|
return 1;
|
|
|
|
/* RFC 4271 9.1.2.1. Route resolvability test */
|
|
n = rte_resolvable(new);
|
|
o = rte_resolvable(old);
|
|
if (n > o)
|
|
return 1;
|
|
if (n < o)
|
|
return 0;
|
|
|
|
/* LLGR draft - depreference stale routes */
|
|
n = rte_stale(new);
|
|
o = rte_stale(old);
|
|
if (n > o)
|
|
return 0;
|
|
if (n < o)
|
|
return 1;
|
|
|
|
/* Start with local preferences */
|
|
x = ea_find(new->attrs, BGP_EA_ID(BA_LOCAL_PREF));
|
|
y = ea_find(old->attrs, BGP_EA_ID(BA_LOCAL_PREF));
|
|
n = x ? x->u.data : new_bgp->cf->default_local_pref;
|
|
o = y ? y->u.data : old_bgp->cf->default_local_pref;
|
|
if (n > o)
|
|
return 1;
|
|
if (n < o)
|
|
return 0;
|
|
|
|
/* RFC 7311 4.1 - Apply AIGP metric */
|
|
u64 n2 = bgp_total_aigp_metric(new);
|
|
u64 o2 = bgp_total_aigp_metric(old);
|
|
if (n2 < o2)
|
|
return 1;
|
|
if (n2 > o2)
|
|
return 0;
|
|
|
|
/* RFC 4271 9.1.2.2. a) Use AS path lengths */
|
|
if (new_bgp->cf->compare_path_lengths || old_bgp->cf->compare_path_lengths)
|
|
{
|
|
x = ea_find(new->attrs, BGP_EA_ID(BA_AS_PATH));
|
|
y = ea_find(old->attrs, BGP_EA_ID(BA_AS_PATH));
|
|
n = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
|
|
o = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
|
|
if (n < o)
|
|
return 1;
|
|
if (n > o)
|
|
return 0;
|
|
}
|
|
|
|
/* RFC 4271 9.1.2.2. b) Use origins */
|
|
x = ea_find(new->attrs, BGP_EA_ID(BA_ORIGIN));
|
|
y = ea_find(old->attrs, BGP_EA_ID(BA_ORIGIN));
|
|
n = x ? x->u.data : ORIGIN_INCOMPLETE;
|
|
o = y ? y->u.data : ORIGIN_INCOMPLETE;
|
|
if (n < o)
|
|
return 1;
|
|
if (n > o)
|
|
return 0;
|
|
|
|
/* RFC 4271 9.1.2.2. c) Compare MED's */
|
|
/* Proper RFC 4271 path selection cannot be interpreted as finding
|
|
* the best path in some ordering. It is implemented partially in
|
|
* bgp_rte_recalculate() when deterministic_med option is
|
|
* active. Without that option, the behavior is just an
|
|
* approximation, which in specific situations may lead to
|
|
* persistent routing loops, because it is nondeterministic - it
|
|
* depends on the order in which routes appeared. But it is also the
|
|
* same behavior as used by default in Cisco routers, so it is
|
|
* probably not a big issue.
|
|
*/
|
|
if (new_bgp->cf->med_metric || old_bgp->cf->med_metric ||
|
|
(bgp_get_neighbor(new) == bgp_get_neighbor(old)))
|
|
{
|
|
x = ea_find(new->attrs, BGP_EA_ID(BA_MULTI_EXIT_DISC));
|
|
y = ea_find(old->attrs, BGP_EA_ID(BA_MULTI_EXIT_DISC));
|
|
n = x ? x->u.data : new_bgp->cf->default_med;
|
|
o = y ? y->u.data : old_bgp->cf->default_med;
|
|
if (n < o)
|
|
return 1;
|
|
if (n > o)
|
|
return 0;
|
|
}
|
|
|
|
/* RFC 4271 9.1.2.2. d) Prefer external peers */
|
|
if (new_bgp->is_interior > old_bgp->is_interior)
|
|
return 0;
|
|
if (new_bgp->is_interior < old_bgp->is_interior)
|
|
return 1;
|
|
|
|
/* RFC 4271 9.1.2.2. e) Compare IGP metrics */
|
|
n = new_bgp->cf->igp_metric ? rt_get_igp_metric(new) : 0;
|
|
o = old_bgp->cf->igp_metric ? rt_get_igp_metric(old) : 0;
|
|
if (n < o)
|
|
return 1;
|
|
if (n > o)
|
|
return 0;
|
|
|
|
/* RFC 4271 9.1.2.2. f) Compare BGP identifiers */
|
|
/* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighbor ID */
|
|
x = ea_find(new->attrs, BGP_EA_ID(BA_ORIGINATOR_ID));
|
|
y = ea_find(old->attrs, BGP_EA_ID(BA_ORIGINATOR_ID));
|
|
n = x ? x->u.data : new_bgp->remote_id;
|
|
o = y ? y->u.data : old_bgp->remote_id;
|
|
|
|
/* RFC 5004 - prefer older routes */
|
|
/* (if both are external and from different peer) */
|
|
if ((new_bgp->cf->prefer_older || old_bgp->cf->prefer_older) &&
|
|
!new_bgp->is_internal && n != o)
|
|
return 0;
|
|
|
|
/* rest of RFC 4271 9.1.2.2. f) */
|
|
if (n < o)
|
|
return 1;
|
|
if (n > o)
|
|
return 0;
|
|
|
|
/* RFC 4456 9. b) Compare cluster list lengths */
|
|
x = ea_find(new->attrs, BGP_EA_ID(BA_CLUSTER_LIST));
|
|
y = ea_find(old->attrs, BGP_EA_ID(BA_CLUSTER_LIST));
|
|
n = x ? int_set_get_size(x->u.ptr) : 0;
|
|
o = y ? int_set_get_size(y->u.ptr) : 0;
|
|
if (n < o)
|
|
return 1;
|
|
if (n > o)
|
|
return 0;
|
|
|
|
/* RFC 4271 9.1.2.2. g) Compare peer IP adresses */
|
|
return ipa_compare(new_bgp->remote_ip, old_bgp->remote_ip) < 0;
|
|
}
|
|
|
|
|
|
int
|
|
bgp_rte_mergable(rte *pri, rte *sec)
|
|
{
|
|
struct bgp_proto *pri_bgp = (struct bgp_proto *) pri->src->proto;
|
|
struct bgp_proto *sec_bgp = (struct bgp_proto *) sec->src->proto;
|
|
eattr *x, *y;
|
|
u32 p, s;
|
|
|
|
/* Skip suppressed routes (see bgp_rte_recalculate()) */
|
|
if ((pri->pflags ^ sec->pflags) & BGP_REF_SUPPRESSED)
|
|
return 0;
|
|
|
|
/* RFC 4271 9.1.2.1. Route resolvability test */
|
|
if (rte_resolvable(pri) != rte_resolvable(sec))
|
|
return 0;
|
|
|
|
/* LLGR draft - depreference stale routes */
|
|
if (rte_stale(pri) != rte_stale(sec))
|
|
return 0;
|
|
|
|
/* Start with local preferences */
|
|
x = ea_find(pri->attrs, BGP_EA_ID(BA_LOCAL_PREF));
|
|
y = ea_find(sec->attrs, BGP_EA_ID(BA_LOCAL_PREF));
|
|
p = x ? x->u.data : pri_bgp->cf->default_local_pref;
|
|
s = y ? y->u.data : sec_bgp->cf->default_local_pref;
|
|
if (p != s)
|
|
return 0;
|
|
|
|
/* RFC 4271 9.1.2.2. a) Use AS path lengths */
|
|
if (pri_bgp->cf->compare_path_lengths || sec_bgp->cf->compare_path_lengths)
|
|
{
|
|
x = ea_find(pri->attrs, BGP_EA_ID(BA_AS_PATH));
|
|
y = ea_find(sec->attrs, BGP_EA_ID(BA_AS_PATH));
|
|
p = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
|
|
s = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
|
|
|
|
if (p != s)
|
|
return 0;
|
|
|
|
// if (DELTA(p, s) > pri_bgp->cf->relax_multipath)
|
|
// return 0;
|
|
}
|
|
|
|
/* RFC 4271 9.1.2.2. b) Use origins */
|
|
x = ea_find(pri->attrs, BGP_EA_ID(BA_ORIGIN));
|
|
y = ea_find(sec->attrs, BGP_EA_ID(BA_ORIGIN));
|
|
p = x ? x->u.data : ORIGIN_INCOMPLETE;
|
|
s = y ? y->u.data : ORIGIN_INCOMPLETE;
|
|
if (p != s)
|
|
return 0;
|
|
|
|
/* RFC 4271 9.1.2.2. c) Compare MED's */
|
|
if (pri_bgp->cf->med_metric || sec_bgp->cf->med_metric ||
|
|
(bgp_get_neighbor(pri) == bgp_get_neighbor(sec)))
|
|
{
|
|
x = ea_find(pri->attrs, BGP_EA_ID(BA_MULTI_EXIT_DISC));
|
|
y = ea_find(sec->attrs, BGP_EA_ID(BA_MULTI_EXIT_DISC));
|
|
p = x ? x->u.data : pri_bgp->cf->default_med;
|
|
s = y ? y->u.data : sec_bgp->cf->default_med;
|
|
if (p != s)
|
|
return 0;
|
|
}
|
|
|
|
/* RFC 4271 9.1.2.2. d) Prefer external peers */
|
|
if (pri_bgp->is_interior != sec_bgp->is_interior)
|
|
return 0;
|
|
|
|
/* RFC 4271 9.1.2.2. e) Compare IGP metrics */
|
|
p = pri_bgp->cf->igp_metric ? rt_get_igp_metric(pri) : 0;
|
|
s = sec_bgp->cf->igp_metric ? rt_get_igp_metric(sec) : 0;
|
|
if (p != s)
|
|
return 0;
|
|
|
|
/* Remaining criteria are ignored */
|
|
|
|
return 1;
|
|
}
|
|
|
|
|
|
static inline int
|
|
same_group(rte *r, u32 lpref, u32 lasn)
|
|
{
|
|
return (rt_get_preference(r) == lpref) && (bgp_get_neighbor(r) == lasn);
|
|
}
|
|
|
|
static inline int
|
|
use_deterministic_med(struct rte_storage *r)
|
|
{
|
|
struct proto *P = r->rte.src->proto;
|
|
return (P->proto == &proto_bgp) && ((struct bgp_proto *) P)->cf->deterministic_med;
|
|
}
|
|
|
|
int
|
|
bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best)
|
|
{
|
|
rte *key = new ? new : old;
|
|
u32 lpref = rt_get_preference(key);
|
|
u32 lasn = bgp_get_neighbor(key);
|
|
int old_suppressed = old ? !!(old->pflags & BGP_REF_SUPPRESSED) : 0;
|
|
|
|
/*
|
|
* Proper RFC 4271 path selection is a bit complicated, it cannot be
|
|
* implemented just by rte_better(), because it is not a linear
|
|
* ordering. But it can be splitted to two levels, where the lower
|
|
* level chooses the best routes in each group of routes from the
|
|
* same neighboring AS and higher level chooses the best route (with
|
|
* a slightly different ordering) between the best-in-group routes.
|
|
*
|
|
* When deterministic_med is disabled, we just ignore this issue and
|
|
* choose the best route by bgp_rte_better() alone. If enabled, the
|
|
* lower level of the route selection is done here (for the group
|
|
* to which the changed route belongs), all routes in group are
|
|
* marked as suppressed, just chosen best-in-group is not.
|
|
*
|
|
* Global best route selection then implements higher level by
|
|
* choosing between non-suppressed routes (as they are always
|
|
* preferred over suppressed routes). Routes from BGP protocols
|
|
* that do not set deterministic_med are just never suppressed. As
|
|
* they do not participate in the lower level selection, it is OK
|
|
* that this fn is not called for them.
|
|
*
|
|
* The idea is simple, the implementation is more problematic,
|
|
* mostly because of optimizations in rte_recalculate() that
|
|
* avoids full recalculation in most cases.
|
|
*
|
|
* We can assume that at least one of new, old is non-NULL and both
|
|
* are from the same protocol with enabled deterministic_med. We
|
|
* group routes by both neighbor AS (lasn) and preference (lpref),
|
|
* because bgp_rte_better() does not handle preference itself.
|
|
*/
|
|
|
|
/* If new and old are from different groups, we just process that
|
|
as two independent events */
|
|
if (new && old && !same_group(old, lpref, lasn))
|
|
{
|
|
int i1, i2;
|
|
i1 = bgp_rte_recalculate(table, net, NULL, old, old_best);
|
|
i2 = bgp_rte_recalculate(table, net, new, NULL, old_best);
|
|
return i1 || i2;
|
|
}
|
|
|
|
/*
|
|
* We could find the best-in-group and then make some shortcuts like
|
|
* in rte_recalculate, but as we would have to walk through all
|
|
* net->routes just to find it, it is probably not worth. So we
|
|
* just have one simple fast case that use just the old route.
|
|
* We also set suppressed flag to avoid using it in bgp_rte_better().
|
|
*/
|
|
|
|
if (new)
|
|
new->pflags |= BGP_REF_SUPPRESSED;
|
|
|
|
if (old)
|
|
{
|
|
old->pflags |= BGP_REF_SUPPRESSED;
|
|
|
|
/* The fast case - replace not best with worse (or remove not best) */
|
|
if (old_suppressed && !(new && bgp_rte_better(new, old)))
|
|
return 0;
|
|
}
|
|
|
|
/* The default case - find a new best-in-group route */
|
|
rte *r = new; /* new may not be in the list */
|
|
for (struct rte_storage *s = net->routes; rte_is_valid(RTE_OR_NULL(s)); s = s->next)
|
|
if (use_deterministic_med(s) && same_group(&s->rte, lpref, lasn))
|
|
{
|
|
s->rte.pflags |= BGP_REF_SUPPRESSED;
|
|
if (!r || bgp_rte_better(&s->rte, r))
|
|
r = &s->rte;
|
|
}
|
|
|
|
/* Simple case - the last route in group disappears */
|
|
if (!r)
|
|
return 0;
|
|
|
|
/* Found if new is mergable with best-in-group */
|
|
if (new && (new != r) && bgp_rte_mergable(r, new))
|
|
new->pflags &= ~BGP_REF_SUPPRESSED;
|
|
|
|
/* Found all existing routes mergable with best-in-group */
|
|
for (struct rte_storage *s = net->routes; rte_is_valid(RTE_OR_NULL(s)); s = s->next)
|
|
if (use_deterministic_med(s) && same_group(&s->rte, lpref, lasn))
|
|
if ((&s->rte != r) && bgp_rte_mergable(r, &s->rte))
|
|
s->rte.pflags &= ~BGP_REF_SUPPRESSED;
|
|
|
|
/* Found best-in-group */
|
|
r->pflags &= ~BGP_REF_SUPPRESSED;
|
|
|
|
/*
|
|
* There are generally two reasons why we have to force
|
|
* recalculation (return 1): First, the new route may be wrongfully
|
|
* chosen to be the best in the first case check in
|
|
* rte_recalculate(), this may happen only if old_best is from the
|
|
* same group. Second, another (different than new route)
|
|
* best-in-group is chosen and that may be the proper best (although
|
|
* rte_recalculate() without ignore that possibility).
|
|
*
|
|
* There are three possible cases according to whether the old route
|
|
* was the best in group (OBG, i.e. !old_suppressed) and whether the
|
|
* new route is the best in group (NBG, tested by r == new). These
|
|
* cases work even if old or new is NULL.
|
|
*
|
|
* NBG -> new is a possible candidate for the best route, so we just
|
|
* check for the first reason using same_group().
|
|
*
|
|
* !NBG && OBG -> Second reason applies, return 1
|
|
*
|
|
* !NBG && !OBG -> Best in group does not change, old != old_best,
|
|
* rte_better(new, old_best) is false and therefore
|
|
* the first reason does not apply, return 0
|
|
*/
|
|
|
|
if (r == new)
|
|
return old_best && same_group(old_best, lpref, lasn);
|
|
else
|
|
return !old_suppressed;
|
|
}
|
|
|
|
void
|
|
bgp_rte_modify_stale(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe UNUSED, rte **feed, uint count)
|
|
{
|
|
struct bgp_channel *c = SKIP_BACK(struct bgp_channel, stale_feed, req);
|
|
struct rt_import_hook *irh = c->c.in_req.hook;
|
|
|
|
/* Find our routes among others */
|
|
for (uint i=0; i<count; i++)
|
|
{
|
|
rte *r = feed[i];
|
|
|
|
/* Not our route */
|
|
if (r->sender != irh)
|
|
continue;
|
|
|
|
/* A new route, do not mark as stale */
|
|
if (r->stale_cycle == irh->stale_set)
|
|
continue;
|
|
|
|
eattr *ea = ea_find(r->attrs, BGP_EA_ID(BA_COMMUNITY));
|
|
const struct adata *ad = ea ? ea->u.ptr : NULL;
|
|
uint flags = ea ? ea->flags : BAF_PARTIAL;
|
|
|
|
/* LLGR not allowed, withdraw the route */
|
|
if (ad && int_set_contains(ad, BGP_COMM_NO_LLGR))
|
|
{
|
|
rte_import(&c->c.in_req, n, NULL, r->src);
|
|
continue;
|
|
}
|
|
|
|
/* Route already marked as LLGR, do nothing */
|
|
if (ad && int_set_contains(ad, BGP_COMM_LLGR_STALE))
|
|
continue;
|
|
|
|
/* Store the tmp_linpool state to aggresively save memory */
|
|
struct lp_state tmpp;
|
|
lp_save(tmp_linpool, &tmpp);
|
|
|
|
/* Mark the route as LLGR */
|
|
rte e0 = *r;
|
|
bgp_set_attr_ptr(&e0.attrs, BA_COMMUNITY, flags, int_set_add(tmp_linpool, ad, BGP_COMM_LLGR_STALE));
|
|
e0.pflags &= ~BGP_REF_NOT_STALE;
|
|
e0.pflags |= BGP_REF_STALE;
|
|
|
|
/* We need to update the route but keep it stale. */
|
|
ASSERT_DIE(irh->stale_set == irh->stale_valid + 1);
|
|
irh->stale_set--;
|
|
rte_import(&c->c.in_req, n, &e0, r->src);
|
|
irh->stale_set++;
|
|
|
|
/* Restore the memory state */
|
|
lp_restore(tmp_linpool, &tmpp);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* Reconstruct AS_PATH and AGGREGATOR according to RFC 6793 4.2.3
|
|
*/
|
|
static void
|
|
bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool)
|
|
{
|
|
eattr *p2 = bgp_find_attr(*attrs, BA_AS_PATH);
|
|
eattr *p4 = bgp_find_attr(*attrs, BA_AS4_PATH);
|
|
eattr *a2 = bgp_find_attr(*attrs, BA_AGGREGATOR);
|
|
eattr *a4 = bgp_find_attr(*attrs, BA_AS4_AGGREGATOR);
|
|
|
|
/* First, unset AS4_* attributes */
|
|
if (p4) bgp_unset_attr(attrs, BA_AS4_PATH);
|
|
if (a4) bgp_unset_attr(attrs, BA_AS4_AGGREGATOR);
|
|
|
|
/* Handle AGGREGATOR attribute */
|
|
if (a2 && a4)
|
|
{
|
|
u32 a2_asn = get_u32(a2->u.ptr->data);
|
|
|
|
/* If routes were aggregated by an old router, then AS4_PATH and
|
|
AS4_AGGREGATOR are invalid. In that case we give up. */
|
|
if (a2_asn != AS_TRANS)
|
|
return;
|
|
|
|
/* Use AS4_AGGREGATOR instead of AGGREGATOR */
|
|
a2->u.ptr = a4->u.ptr;
|
|
}
|
|
|
|
/* Handle AS_PATH attribute */
|
|
if (p2 && p4)
|
|
{
|
|
/* Both as_path_getlen() and as_path_cut() take AS_CONFED* as zero length */
|
|
int p2_len = as_path_getlen(p2->u.ptr);
|
|
int p4_len = as_path_getlen(p4->u.ptr);
|
|
|
|
/* AS_PATH is too short, give up */
|
|
if (p2_len < p4_len)
|
|
return;
|
|
|
|
/* Merge AS_PATH and AS4_PATH */
|
|
struct adata *apc = as_path_cut(pool, p2->u.ptr, p2_len - p4_len);
|
|
p2->u.ptr = as_path_merge(pool, apc, p4->u.ptr);
|
|
}
|
|
}
|
|
|
|
void
|
|
bgp_get_route_info(rte *e, byte *buf)
|
|
{
|
|
eattr *p = ea_find(e->attrs, BGP_EA_ID(BA_AS_PATH));
|
|
eattr *o = ea_find(e->attrs, BGP_EA_ID(BA_ORIGIN));
|
|
u32 origas;
|
|
|
|
buf += bsprintf(buf, " (%d", rt_get_preference(e));
|
|
|
|
if (!net_is_flow(e->net))
|
|
{
|
|
if (e->pflags & BGP_REF_SUPPRESSED)
|
|
buf += bsprintf(buf, "-");
|
|
|
|
if (rte_stale(e))
|
|
buf += bsprintf(buf, "s");
|
|
|
|
u64 metric = bgp_total_aigp_metric(e);
|
|
if (metric < BGP_AIGP_MAX)
|
|
{
|
|
buf += bsprintf(buf, "/%lu", metric);
|
|
}
|
|
else if (metric = rt_get_igp_metric(e))
|
|
{
|
|
if (!rte_resolvable(e))
|
|
buf += bsprintf(buf, "/-");
|
|
else if (metric >= IGP_METRIC_UNKNOWN)
|
|
buf += bsprintf(buf, "/?");
|
|
else
|
|
buf += bsprintf(buf, "/%d", metric);
|
|
}
|
|
}
|
|
buf += bsprintf(buf, ") [");
|
|
|
|
if (p && as_path_get_last(p->u.ptr, &origas))
|
|
buf += bsprintf(buf, "AS%u", origas);
|
|
if (o)
|
|
buf += bsprintf(buf, "%c", "ie?"[o->u.data]);
|
|
strcpy(buf, "]");
|
|
}
|