/* * BIRD -- The Border Gateway Protocol * * (c) 2000 Martin Mares * * Can be freely distributed and used under the terms of the GNU GPL. * * * Code added from Parsons, Inc. (BGPSEC additions) * (c) 2013-2013 * * Can be used under either license: * - Freely distributed and used under the terms of the GNU GPLv2. * - Freely distributed and used under a BSD license, See README.bgpsec. */ #ifndef _BIRD_BGP_H_ #define _BIRD_BGP_H_ #include #include "nest/route.h" #include "nest/bfd.h" #include "lib/hash.h" struct linpool; struct eattr; #ifdef CONFIG_BGPSEC /* BGPSec constants */ #define BGPSEC_VERSION 0 /* currently capability is arbitrary number from private use */ #define BGPSEC_CAPABILITY 212 #define BGPSEC_SKI_LENGTH 20 #define BGPSEC_ALGO_ID 1 /* XXX this needs to be changed */ #define BGPSEC_MAX_SIG_LENGTH 80 /* sig hash length is somewhat arbitrary, = 20 + MaxASPathLength*(28 + max_sig_length). As of 2016, max unique AS Path length found is 14. This value will allowy for for a hash buffer that can handle an AS path length ~47 long */ #define BGPSEC_SIG_HASH_LENGTH 5120 #define BGPSEC_MAX_INFO_ATTR_LENGTH 0 /* XXX this needs to be checked */ #endif struct bgp_config { struct proto_config c; u32 local_as, remote_as; ip_addr remote_ip; ip_addr source_addr; /* Source address to use */ struct iface *iface; /* Interface for link-local addresses */ u16 remote_port; /* Neighbor destination port */ int multihop; /* Number of hops if multihop */ int ttl_security; /* Enable TTL security [RFC5082] */ int next_hop_self; /* Always set next hop to local IP address */ int next_hop_keep; /* Do not touch next hop attribute */ int missing_lladdr; /* What we will do when we don' know link-local addr, see MLL_* */ int gw_mode; /* How we compute route gateway from next_hop attr, see GW_* */ int compare_path_lengths; /* Use path lengths when selecting best route */ int med_metric; /* Compare MULTI_EXIT_DISC even between routes from differen ASes */ int igp_metric; /* Use IGP metrics when selecting best route */ int prefer_older; /* Prefer older routes according to RFC 5004 */ int deterministic_med; /* Use more complicated algo to have strict RFC 4271 MED comparison */ u32 default_local_pref; /* Default value for LOCAL_PREF attribute */ u32 default_med; /* Default value for MULTI_EXIT_DISC attribute */ int capabilities; /* Enable capability handshake [RFC3392] */ int enable_refresh; /* Enable local support for route refresh [RFC2918] */ int enable_as4; /* Enable local support for 4B AS numbers [RFC4893] */ /* BGPSec */ /* cannot be ifdef'd out due to config.Y compatibility */ int enable_bgpsec; /* Whether neighbor should be a BGPSec peer */ int bgpsec_prefer; /* Whether validly signed BGPsec routes are prefered during route selection */ int bgpsec_require; /* Whether neighbor should be a BGPSec peer */ char *bgpsec_ski; /* local subject key id */ u8 bgpsec_bski[BGPSEC_SKI_LENGTH]; /* binary local SKI */ char *bgpsec_key_repo_path; /* Path to the public key repository */ char *bgpsec_priv_key_path; /* Path to the private key location */ int bgpsec_save_binary_keys; /* Save a copy of the binary key */ int bgpsec_no_pcount0; /* allow peer to have pcount 0, xxx current default allows */ int bgpsec_no_invalid_routes; /* should invalid routes be dropped */ u32 rr_cluster_id; /* Route reflector cluster ID, if different from local ID */ int rr_client; /* Whether neighbor is RR client of me */ int rs_client; /* Whether neighbor is RS client of me */ int advertise_ipv4; /* Whether we should add IPv4 capability advertisement to OPEN message */ int passive; /* Do not initiate outgoing connection */ int interpret_communities; /* Hardwired handling of well-known communities */ int secondary; /* Accept also non-best routes (i.e. RA_ACCEPTED) */ int add_path; /* Use ADD-PATH extension [draft] */ int allow_local_as; /* Allow that number of local ASNs in incoming AS_PATHs */ int gr_mode; /* Graceful restart mode (BGP_GR_*) */ unsigned gr_time; /* Graceful restart timeout */ unsigned connect_delay_time; /* Minimum delay between connect attempts */ unsigned connect_retry_time; /* Timeout for connect attempts */ unsigned hold_time, initial_hold_time; unsigned keepalive_time; unsigned error_amnesia_time; /* Errors are forgotten after */ unsigned error_delay_time_min; /* Time to wait after an error is detected */ unsigned error_delay_time_max; unsigned disable_after_error; /* Disable the protocol when error is detected */ char *password; /* Password used for MD5 authentication */ struct rtable_config *igp_table; /* Table used for recursive next hop lookups */ int check_link; /* Use iface link state for liveness detection */ int bfd; /* Use BFD for liveness detection */ }; #define MLL_SELF 1 #define MLL_DROP 2 #define MLL_IGNORE 3 #define GW_DIRECT 1 #define GW_RECURSIVE 2 #define ADD_PATH_RX 1 #define ADD_PATH_TX 2 #define ADD_PATH_FULL 3 #define BGP_GR_ABLE 1 #define BGP_GR_AWARE 2 /* For peer_gr_flags */ #define BGP_GRF_RESTART 0x80 /* For peer_gr_aflags */ #define BGP_GRF_FORWARDING 0x80 struct bgp_conn { struct bgp_proto *bgp; struct birdsock *sk; unsigned int state; /* State of connection state machine */ struct timer *connect_retry_timer; struct timer *hold_timer; struct timer *keepalive_timer; struct event *tx_ev; int packets_to_send; /* Bitmap of packet types to be sent */ int notify_code, notify_subcode, notify_size; byte *notify_data; u32 advertised_as; /* Temporary value for AS number received */ int start_state; /* protocol start_state snapshot when connection established */ #ifdef CONFIG_BGPSEC /* BGPsec */ u8 peer_bgpsec_support; /* Peer supports BGPSec */ #endif u8 peer_refresh_support; /* Peer supports route refresh [RFC2918] */ u8 peer_as4_support; /* Peer supports 4B AS numbers [RFC4893] */ u8 peer_add_path; /* Peer supports ADD-PATH [draft] */ u8 peer_enhanced_refresh_support; /* Peer supports enhanced refresh [RFC7313] */ u8 peer_gr_aware; u8 peer_gr_able; u16 peer_gr_time; u8 peer_gr_flags; u8 peer_gr_aflags; unsigned hold_time, keepalive_time; /* Times calculated from my and neighbor's requirements */ }; struct bgp_proto { struct proto p; struct bgp_config *cf; /* Shortcut to BGP configuration */ u32 local_as, remote_as; int start_state; /* Substates that partitions BS_START */ #ifdef CONFIG_BGPSEC /* BGPsec */ u8 bgpsec_send; /* Sender can send BGPSec messages */ u8 bgpsec_receive; /* Sender can receive BGPSec messages */ u8 bgpsec_ipv4; /* Sender uses BGPSec over iPv4 */ u8 bgpsec_ipv6; /* Sender uses BGPSec over iPv6 */ #endif u8 is_internal; /* Internal BGP connection (local_as == remote_as) */ u8 as4_session; /* Session uses 4B AS numbers in AS_PATH (both sides support it) */ u8 add_path_rx; /* Session expects receive of ADD-PATH extended NLRI */ u8 add_path_tx; /* Session expects transmit of ADD-PATH extended NLRI */ u32 local_id; /* BGP identifier of this router */ u32 remote_id; /* BGP identifier of the neighbor */ u32 rr_cluster_id; /* Route reflector cluster ID */ int rr_client; /* Whether neighbor is RR client of me */ int rs_client; /* Whether neighbor is RS client of me */ u8 gr_ready; /* Neighbor could do graceful restart */ u8 gr_active; /* Neighbor is doing graceful restart */ u8 feed_state; /* Feed state (TX) for EoR, RR packets, see BFS_* */ u8 load_state; /* Load state (RX) for EoR, RR packets, see BFS_* */ struct bgp_conn *conn; /* Connection we have established */ struct bgp_conn outgoing_conn; /* Outgoing connection we're working with */ struct bgp_conn incoming_conn; /* Incoming connection we have neither accepted nor rejected yet */ struct object_lock *lock; /* Lock for neighbor connection */ struct neighbor *neigh; /* Neighbor entry corresponding to remote ip, NULL if multihop */ struct bfd_request *bfd_req; /* BFD request, if BFD is used */ ip_addr source_addr; /* Local address used as an advertised next hop */ rtable *igp_table; /* Table used for recursive next hop lookups */ struct event *event; /* Event for respawning and shutting process */ struct timer *startup_timer; /* Timer used to delay protocol startup due to previous errors (startup_delay) */ struct timer *gr_timer; /* Timer waiting for reestablishment after graceful restart */ struct bgp_bucket **bucket_hash; /* Hash table of attribute buckets */ unsigned int hash_size, hash_count, hash_limit; HASH(struct bgp_prefix) prefix_hash; /* Prefixes to be sent */ slab *prefix_slab; /* Slab holding prefix nodes */ list bucket_queue; /* Queue of buckets to send */ struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */ unsigned startup_delay; /* Time to delay protocol startup by due to errors */ bird_clock_t last_proto_error; /* Time of last error that leads to protocol stop */ u8 last_error_class; /* Error class of last error */ u32 last_error_code; /* Error code of last error. BGP protocol errors are encoded as (bgp_err_code << 16 | bgp_err_subcode) */ #if defined(IPV6) || defined CONFIG_BGPSEC byte *mp_reach_start, *mp_unreach_start; /* Multiprotocol BGP attribute notes */ unsigned mp_reach_len, mp_unreach_len; ip_addr local_link; /* Link-level version of source_addr */ #endif }; struct bgp_prefix { struct { ip_addr prefix; int pxlen; } n; u32 path_id; struct bgp_prefix *next; node bucket_node; /* Node in per-bucket list */ }; struct bgp_bucket { node send_node; /* Node in send queue */ struct bgp_bucket *hash_next, *hash_prev; /* Node in bucket hash table */ unsigned hash; /* Hash over extended attributes */ list prefixes; /* Prefixes in this buckets */ ea_list eattrs[0]; /* Per-bucket extended attributes */ }; #define BGP_PORT 179 #define BGP_VERSION 4 #define BGP_HEADER_LENGTH 19 #define BGP_MAX_PACKET_LENGTH 4096 #define BGP_RX_BUFFER_SIZE 4096 #define BGP_TX_BUFFER_SIZE BGP_MAX_PACKET_LENGTH extern struct linpool *bgp_linpool; void bgp_start_timer(struct timer *t, int value); void bgp_check_config(struct bgp_config *c); void bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len); void bgp_close_conn(struct bgp_conn *c); void bgp_update_startup_delay(struct bgp_proto *p); void bgp_conn_enter_openconfirm_state(struct bgp_conn *conn); void bgp_conn_enter_established_state(struct bgp_conn *conn); void bgp_conn_enter_close_state(struct bgp_conn *conn); void bgp_conn_enter_idle_state(struct bgp_conn *conn); void bgp_handle_graceful_restart(struct bgp_proto *p); void bgp_graceful_restart_done(struct bgp_proto *p); void bgp_refresh_begin(struct bgp_proto *p); void bgp_refresh_end(struct bgp_proto *p); void bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code); void bgp_stop(struct bgp_proto *p, unsigned subcode); struct rte_source *bgp_find_source(struct bgp_proto *p, u32 path_id); struct rte_source *bgp_get_source(struct bgp_proto *p, u32 path_id); #ifdef LOCAL_DEBUG #define BGP_FORCE_DEBUG 1 #else #define BGP_FORCE_DEBUG 0 #endif #define BGP_TRACE(flags, msg, args...) do { if ((p->p.debug & flags) || BGP_FORCE_DEBUG) \ log(L_TRACE "%s: " msg, p->p.name , ## args ); } while(0) #define BGP_TRACE_RL(rl, flags, msg, args...) do { if ((p->p.debug & flags) || BGP_FORCE_DEBUG) \ log_rl(rl, L_TRACE "%s: " msg, p->p.name , ## args ); } while(0) /* attrs.c */ /* Hack: although BA_NEXT_HOP attribute has type EAF_TYPE_IP_ADDRESS, in IPv6 * we store two addesses in it - a global address and a link local address. */ #ifdef IPV6 #define NEXT_HOP_LENGTH (2*sizeof(ip_addr)) static inline void set_next_hop(byte *b, ip_addr addr) { ((ip_addr *) b)[0] = addr; ((ip_addr *) b)[1] = IPA_NONE; } #else #define NEXT_HOP_LENGTH sizeof(ip_addr) static inline void set_next_hop(byte *b, ip_addr addr) { ((ip_addr *) b)[0] = addr; } #endif void bgp_attach_attr(struct ea_list **to, struct linpool *pool, unsigned attr, uintptr_t val); byte *bgp_attach_attr_wa(struct ea_list **to, struct linpool *pool, unsigned attr, unsigned len); struct rta *bgp_decode_attrs(struct bgp_conn *conn, byte *attr, unsigned int len, struct linpool *pool, byte * nlri, int nlri_len); int bgp_get_attr(struct eattr *e, byte *buf, int buflen); int bgp_rte_better(struct rte *, struct rte *); int bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best); void bgp_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *attrs); int bgp_import_control(struct proto *, struct rte **, struct ea_list **, struct linpool *); void bgp_init_bucket_table(struct bgp_proto *); void bgp_free_bucket(struct bgp_proto *p, struct bgp_bucket *buck); void bgp_init_prefix_table(struct bgp_proto *p, u32 order); void bgp_free_prefix(struct bgp_proto *p, struct bgp_prefix *bp); unsigned int bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains); void bgp_get_route_info(struct rte *, byte *buf, struct ea_list *attrs); inline static void bgp_attach_attr_ip(struct ea_list **to, struct linpool *pool, unsigned attr, ip_addr a) { *(ip_addr *) bgp_attach_attr_wa(to, pool, attr, sizeof(ip_addr)) = a; } /* packets.c */ void mrt_dump_bgp_state_change(struct bgp_conn *conn, unsigned old, unsigned new); void bgp_schedule_packet(struct bgp_conn *conn, int type); void bgp_kick_tx(void *vconn); void bgp_tx(struct birdsock *sk); int bgp_rx(struct birdsock *sk, int size); const char * bgp_error_dsc(unsigned code, unsigned subcode); void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len); /* Packet types */ #define PKT_OPEN 0x01 #define PKT_UPDATE 0x02 #define PKT_NOTIFICATION 0x03 #define PKT_KEEPALIVE 0x04 #define PKT_ROUTE_REFRESH 0x05 /* [RFC2918] */ #define PKT_BEGIN_REFRESH 0x1e /* Dummy type for BoRR packet [RFC7313] */ #define PKT_SCHEDULE_CLOSE 0x1f /* Used internally to schedule socket close */ /* Attributes */ #define BAF_OPTIONAL 0x80 #define BAF_TRANSITIVE 0x40 #define BAF_PARTIAL 0x20 #define BAF_EXT_LEN 0x10 /* Note: these must match location in the bgp_attr_table */ #define BA_ORIGIN 0x01 /* [RFC1771] */ /* WM */ #define BA_AS_PATH 0x02 /* WM */ #define BA_NEXT_HOP 0x03 /* WM */ #define BA_MULTI_EXIT_DISC 0x04 /* ON */ #define BA_LOCAL_PREF 0x05 /* WD */ #define BA_ATOMIC_AGGR 0x06 /* WD */ #define BA_AGGREGATOR 0x07 /* OT */ #define BA_COMMUNITY 0x08 /* [RFC1997] */ /* OT */ #define BA_ORIGINATOR_ID 0x09 /* [RFC1966] */ /* ON */ #define BA_CLUSTER_LIST 0x0a /* [RFC4456] */ /* We don't support these: */ #define BA_DPA 0x0b /* DPA deprecated */ #define BA_ADVERTISER 0x0c /* [RFC1863] */ #define BA_RCID_PATH 0x0d /* [RFC1863] */ /* supported? */ #define BA_MP_REACH_NLRI 0x0e /* [RFC4760] */ #define BA_MP_UNREACH_NLRI 0x0f /* [RFC4760] */ #define BA_EXT_COMMUNITY 0x10 /* [RFC4360] */ #define BA_AS4_PATH 0x11 /* [RFC6793] */ #define BA_AS4_AGGREGATOR 0x12 /* [RFC6793] */ /* not supported */ #define BA_SSA 0x13 /* SAFI Specific Attribute (SSA) (deprecated) */ #define BA_CONNECTOR_ATTR 0x14 /* (deprecated) [RFC6037] */ #define BA_AS_PATHLIMIT 0x15 /* (deprecated) [draft-ietf-idr-as-pathlimit] */ #define BA_PMSI_TUNNEL 0x16 /* [RFC6514] */ #define BA_TUNNEL_ENCAP 0x17 /* Tunnel Encapsulation [RFC5512] */ #define BA_TUNNEL_ENGINEERING 0x18 /* Traffic Engineering [RFC5543] */ #define BA_IPV6_EXT_COMMUNITY 0x19 /* IPv6 Address Specific Extended Community [RFC5701] */ #define BA_AIGP 0x1a /* AIGP (TEMPORARY, expired 2013-04-25) [draft-ietf-idr-aigp][Rex_Fernando][Pradosh_Mohapatra][Eric_Rosen][James_Uttaro] */ #define BA_PE_DIST_LABELS 0x1b /* PE Distinguisher Labels [RFC6514] */ #define BA_ENTROPY_LABELS 0x1c /* BGP Entropy Label Capability Attribute [RFC6790] */ #define BA_LS_ATTRIBUTE 0x1d /* BGP-LS Attribute (TEMPORARY, expired 2014-03-11) [draft-ietf-idr-ls-distribution] */ /* Supported */ #define BA_BGPSEC_SIGNATURE 0x1E /* XXX 30 is best guess, draft-ietf-sidr-bgpsec-protocol */ /* internal use only */ #define BA_INTERNAL_BGPSEC_VALID 0xdd /* BGP connection states */ #define BS_IDLE 0 #define BS_CONNECT 1 /* Attempting to connect */ #define BS_ACTIVE 2 /* Waiting for connection retry & listening */ #define BS_OPENSENT 3 #define BS_OPENCONFIRM 4 #define BS_ESTABLISHED 5 #define BS_CLOSE 6 /* Used during transition to BS_IDLE */ #define BS_MAX 7 /* BGP start states * * Used in PS_START for fine-grained specification of starting state. * * When BGP protocol is started by core, it goes to BSS_PREPARE. When BGP * protocol done what is neccessary to start itself (like acquiring the lock), * it goes to BSS_CONNECT. When some connection attempt failed because of * option or capability error, it goes to BSS_CONNECT_NOCAP. */ #define BSS_PREPARE 0 /* Used before ordinary BGP started, i. e. waiting for lock */ #define BSS_DELAY 1 /* Startup delay due to previous errors */ #define BSS_CONNECT 2 /* Ordinary BGP connecting */ #define BSS_CONNECT_NOCAP 3 /* Legacy BGP connecting (without capabilities) */ /* BGP feed states (TX) * * RFC 4724 specifies that an initial feed should end with End-of-RIB mark. * * RFC 7313 specifies that a route refresh should be demarcated by BoRR and EoRR packets. * * These states (stored in p->feed_state) are used to keep track of these * requirements. When such feed is started, BFS_LOADING / BFS_REFRESHING is * set. When it ended, BFS_LOADED / BFS_REFRESHED is set to schedule End-of-RIB * or EoRR packet. When the packet is sent, the state returned to BFS_NONE. * * Note that when a non-demarcated feed (e.g. plain RFC 4271 initial load * without End-of-RIB or plain RFC 2918 route refresh without BoRR/EoRR * demarcation) is active, BFS_NONE is set. * * BFS_NONE, BFS_LOADING and BFS_REFRESHING are also used as load states (RX) * with correspondent semantics (-, expecting End-of-RIB, expecting EoRR). */ #define BFS_NONE 0 /* No feed or original non-demarcated feed */ #define BFS_LOADING 1 /* Initial feed active, End-of-RIB planned */ #define BFS_LOADED 2 /* Loading done, End-of-RIB marker scheduled */ #define BFS_REFRESHING 3 /* Route refresh (introduced by BoRR) active */ #define BFS_REFRESHED 4 /* Refresh done, EoRR packet scheduled */ /* Error classes */ #define BE_NONE 0 #define BE_MISC 1 /* Miscellaneous error */ #define BE_SOCKET 2 /* Socket error */ #define BE_BGP_RX 3 /* BGP protocol error notification received */ #define BE_BGP_TX 4 /* BGP protocol error notification sent */ #define BE_AUTO_DOWN 5 /* Automatic shutdown */ #define BE_MAN_DOWN 6 /* Manual shutdown */ /* Misc error codes */ #define BEM_NEIGHBOR_LOST 1 #define BEM_INVALID_NEXT_HOP 2 #define BEM_INVALID_MD5 3 /* MD5 authentication kernel request failed (possibly not supported) */ #define BEM_NO_SOCKET 4 #define BEM_LINK_DOWN 5 #define BEM_BFD_DOWN 6 #define BEM_GRACEFUL_RESTART 7 /* Automatic shutdown error codes */ #define BEA_ROUTE_LIMIT_EXCEEDED 1 /* BGP Update Error codes */ #define BGP_UPD_ERROR_MALFORMED_ATTR 1 #define BGP_UPD_ERROR_UNRCGNZD_WK_ATTR 2 #define BGP_UPD_ERROR_MISSING_WK_ATTR 3 #define BGP_UPD_ERROR_ATTR_FLAG 4 #define BGP_UPD_ERROR_ATTR_LENGTH 5 #define BGP_UPD_ERROR_INVALID_ORGIN 6 #define BGP_UPD_ERROR_INVALID_HOP 8 #define BGP_UPD_ERROR_OPT_ATTR 9 #define BGP_UPD_ERROR_INVALID_NETWORK 10 #define BGP_UPD_ERROR_MALFORMED_ASPATH 11 /* Well-known communities */ #define BGP_COMM_NO_EXPORT 0xffffff01 /* Don't export outside local AS / confed. */ #define BGP_COMM_NO_ADVERTISE 0xffffff02 /* Don't export at all */ #define BGP_COMM_NO_EXPORT_SUBCONFED 0xffffff03 /* NO_EXPORT even in local confederation */ /* Origins */ #define ORIGIN_IGP 0 #define ORIGIN_EGP 1 #define ORIGIN_INCOMPLETE 2 /* Address families */ #define BGP_AF_IPV4 1 #define BGP_AF_IPV6 2 #ifdef IPV6 #define BGP_AF BGP_AF_IPV6 #else #define BGP_AF BGP_AF_IPV4 #endif #define DO_NLRI(name) \ start = x = p->name##_start; \ len = len0 = p->name##_len; \ if (len) \ { \ if (len < 3) { err=9; goto done; } \ af = get_u16(x); \ sub = x[2]; \ x += 3; \ len -= 3; \ DBG("\tNLRI AF=%d sub=%d len=%d\n", af, sub, len);\ } \ else \ af = 0; \ if ((af == BGP_AF_IPV6) || (af == BGP_AF_IPV4)) #define DECODE_PREFIX(pp, ll) do { \ if (p->add_path_rx) \ { \ if (ll < 5) { err=1; goto done; } \ path_id = get_u32(pp); \ pp += 4; \ ll -= 4; \ } \ int b = *pp++; \ int q; \ ll--; \ if (b > BITS_PER_IP_ADDRESS) { err=10; goto done; } \ q = (b+7) / 8; \ if (ll < q) { err=1; goto done; } \ memcpy(&prefix, pp, q); \ pp += q; \ ll -= q; \ ipa_ntoh(prefix); \ prefix = ipa_and(prefix, ipa_mkmask(b)); \ pxlen = b; \ } while (0) #endif