/* * BIRD -- Multi-Threaded Routing Toolkit (MRT) Protocol * * (c) 2017--2018 Ondrej Zajicek * (c) 2017--2018 CZ.NIC z.s.p.o. * * Can be freely distributed and used under the terms of the GNU GPL. */ /** * DOC: Multi-Threaded Routing Toolkit (MRT) protocol * * The MRT protocol is implemented in just one file: |mrt.c|. It contains of * several parts: Generic functions for preparing MRT messages in a buffer, * functions for MRT table dump (called from timer or CLI), functions for MRT * BGP4MP dump (called from BGP), and the usual protocol glue. For the MRT table * dump, the key structure is struct mrt_table_dump_state, which contains all * necessary data and created when the MRT dump cycle is started for the * duration of the MRT dump. The MBGP4MP dump is currently not bound to MRT * protocol instance and uses the config->mrtdump_file fd. * * The protocol is simple, just periodically scans routing table and export it * to a file. It does not use the regular update mechanism, but a direct access * in order to handle iteration through multiple routing tables. The table dump * needs to dump all peers first and then use indexes to address the peers, we * use a hash table (@peer_hash) to find peer index based on BGP protocol key * attributes. * * One thing worth documenting is the locking. During processing, the currently * processed table (@table field in the state structure) is locked and also the * explicitly named table is locked (@table_ptr field in the state structure) if * specified. Between dumps no table is locked. Also the current config is * locked (by config_add_obstacle()) during table dumps as some data (strings, * filters) are shared from the config and the running table dump may be * interrupted by reconfiguration. * * Supported standards: * - RFC 6396 - MRT format standard * - RFC 8050 - ADD_PATH extension */ #include #include #include #include #include "mrt.h" #include "lib/io-loop.h" #include "nest/cli.h" #include "filter/filter.h" #include "proto/bgp/bgp.h" #include "sysdep/unix/unix.h" #include "sysdep/unix/io-loop.h" #ifdef PATH_MAX #define BIRD_PATH_MAX PATH_MAX #else #define BIRD_PATH_MAX 4096 #endif #define mrt_log(s, msg, args...) \ ({ \ if (s->cli) \ cli_printf(s->cli, -8009, msg, ## args); \ if (s->proto) \ log(L_ERR "%s: " msg, s->proto->p.name, ## args); \ }) extern struct proto_attrs *proto_state_table; /* * MRT buffer code */ static void mrt_buffer_init(buffer *b, pool *pool, size_t n) { b->start = mb_alloc(pool, n); b->pos = b->start; b->end = b->start + n; } static void mrt_buffer_grow(buffer *b, size_t n) { size_t used = b->pos - b->start; size_t size = b->end - b->start; size_t req = used + n; while (size < req) size = size * 3 / 2; b->start = mb_realloc(b->start, size); b->pos = b->start + used; b->end = b->start + size; } static inline void mrt_buffer_need(buffer *b, size_t n) { if (b->pos + n > b->end) mrt_buffer_grow(b, n); } static inline uint mrt_buffer_pos(buffer *b) { return b->pos - b->start; } static inline void mrt_buffer_flush(buffer *b) { b->pos = b->start; } #define MRT_DEFINE_TYPE(S, T) \ static inline void UNUSED mrt_put_##S##_(buffer *b, T x) \ { \ put_##S(b->pos, x); \ b->pos += sizeof(T); \ } \ \ static inline void UNUSED mrt_put_##S(buffer *b, T x) \ { \ mrt_buffer_need(b, sizeof(T)); \ put_##S(b->pos, x); \ b->pos += sizeof(T); \ } MRT_DEFINE_TYPE(u8, u8) MRT_DEFINE_TYPE(u16, u16) MRT_DEFINE_TYPE(u32, u32) MRT_DEFINE_TYPE(u64, u64) MRT_DEFINE_TYPE(ip4, ip4_addr) MRT_DEFINE_TYPE(ip6, ip6_addr) static inline void mrt_put_ipa(buffer *b, ip_addr x) { if (ipa_is_ip4(x)) mrt_put_ip4(b, ipa_to_ip4(x)); else mrt_put_ip6(b, ipa_to_ip6(x)); } static inline void mrt_put_data(buffer *b, const void *src, size_t n) { if (!n) return; mrt_buffer_need(b, n); memcpy(b->pos, src, n); b->pos += n; } static void mrt_init_message(buffer *b, u16 type, u16 subtype) { /* Reset buffer */ mrt_buffer_flush(b); mrt_buffer_need(b, MRT_HDR_LENGTH); /* Prepare header */ mrt_put_u32_(b, current_real_time() TO_S); /* now_real */ mrt_put_u16_(b, type); mrt_put_u16_(b, subtype); /* Message length, will be fixed later */ mrt_put_u32_(b, 0); } rtable * get_tab(struct mrt_table_dump_state *s) { if (s->table->head) return (rtable*)SKIP_BACK(rtable, n, s->table->head); else return NULL; } static void mrt_dump_message(buffer *b, int fd) { uint len = mrt_buffer_pos(b); /* Fix message length */ ASSERT(len >= MRT_HDR_LENGTH); put_u32(b->start + 8, len - MRT_HDR_LENGTH); if (fd < 0) return; if (write(fd, b->start, len) < 0) log(L_ERR "Write to MRT file failed: %m"); /* TODO: name of file */ } static int bstrsub(char *dst, size_t n, const char *src, const char *key, const char *val) { const char *last, *next; char *pos = dst; size_t step, klen = strlen(key), vlen = strlen(val); for (last = src; next = strstr(last, key); last = next + klen) { step = next - last; if (n <= step + vlen) return 0; memcpy(pos, last, step); ADVANCE(pos, n, step); memcpy(pos, val, vlen); ADVANCE(pos, n, vlen); } step = strlen(last); if (n <= step) return 0; memcpy(pos, last, step); ADVANCE(pos, n, step); pos[0] = 0; return 1; } static inline rtable * mrt_next_table_(rtable *tab, rtable *tab_ptr, const char *pattern) { /* Handle explicit table, return it in the first pass */ if (tab_ptr) return !tab ? tab_ptr : NULL; /* Walk routing_tables list, starting after tab (if non-NULL) */ for (node *tn = tab ? tab->n.next : HEAD(routing_tables); NODE_VALID(tn); tn = tn->next) { tab = SKIP_BACK(rtable, n, tn); if (patmatch(pattern, tab->name) && ((tab->addr_type == NET_IP4) || (tab->addr_type == NET_IP6))) return tab; } return NULL; } static rtable * mrt_next_table(struct mrt_table_dump_state *s) { rtable *tab = mrt_next_table_(get_tab(s), s->table_ptr, s->table_expr); if (s->table->head) RT_LOCKED(get_tab(s), tab) rt_unlock_table(tab); if (tab == NULL) { s->ipv4 = 0; return NULL; } s->table->head = &tab->n; s->ipv4 = tab ? (tab->addr_type == NET_IP4) : 0; if (s->table->head) { rtable *t = get_tab(s); RT_LOCKED(t, tab) rt_lock_table(tab); } return get_tab(s); } static int mrt_open_file(struct mrt_table_dump_state *s) { char fmt1[BIRD_PATH_MAX]; char name[BIRD_PATH_MAX]; btime now = current_time(); btime now_real = current_real_time(); rtable *tab = get_tab(s); if (!bstrsub(fmt1, sizeof(fmt1), s->filename, "%N", tab->name) || !tm_format_real_time(name, sizeof(name), fmt1, now_real)) { mrt_log(s, "Invalid filename '%s'", s->filename); return 0; } s->file = rf_open(s->pool, name, RF_APPEND, 0); if (!s->file) { mrt_log(s, "Unable to open MRT file '%s': %m", name); return 0; } s->fd = rf_fileno(s->file); s->time_offset = now_real - now; return 1; } static void mrt_close_file(struct mrt_table_dump_state *s) { rfree(s->file); s->file = NULL; s->fd = -1; } /* * MRT Table Dump: Peer Index Table */ #define PEER_KEY(n) n->peer_id, n->peer_as, n->peer_ip #define PEER_NEXT(n) n->next #define PEER_EQ(id1,as1,ip1,id2,as2,ip2) \ id1 == id2 && as1 == as2 && ipa_equal(ip1, ip2) #define PEER_FN(id,as,ip) ipa_hash(ip) static void mrt_peer_table_header(struct mrt_table_dump_state *s, u32 router_id, const char *name) { buffer *b = &s->buf; /* Collector BGP ID */ mrt_put_u32(b, router_id); /* View Name */ uint name_length = name ? strlen(name) : 0; name_length = MIN(name_length, 65535); mrt_put_u16(b, name_length); mrt_put_data(b, name, name_length); /* Peer Count, will be fixed later */ s->peer_count = 0; s->peer_count_offset = mrt_buffer_pos(b); mrt_put_u16(b, 0); HASH_INIT(s->peer_hash, s->pool, 10); } static void mrt_peer_table_entry(struct mrt_table_dump_state *s, u32 peer_id, u32 peer_as, ip_addr peer_ip) { buffer *b = &s->buf; uint type = MRT_PEER_TYPE_32BIT_ASN; if (ipa_is_ip6(peer_ip)) type |= MRT_PEER_TYPE_IPV6; /* Dump peer to buffer */ mrt_put_u8(b, type); mrt_put_u32(b, peer_id); mrt_put_ipa(b, peer_ip); mrt_put_u32(b, peer_as); /* Add peer to hash table */ struct mrt_peer_entry *n = lp_allocz(s->peer_lp, sizeof(struct mrt_peer_entry)); n->peer_id = peer_id; n->peer_as = peer_as; n->peer_ip = peer_ip; n->index = s->peer_count++; HASH_INSERT(s->peer_hash, PEER, n); } static void mrt_peer_table_dump(struct mrt_table_dump_state *s) { mrt_init_message(&s->buf, MRT_TABLE_DUMP_V2, MRT_PEER_INDEX_TABLE); mrt_peer_table_header(s, config->router_id, get_tab(s)->name); /* 0 is fake peer for non-BGP routes */ mrt_peer_table_entry(s, 0, 0, IPA_NONE); #ifdef CONFIG_BGP for(u32 i = 0; ilength; i++) { rcu_read_lock(); ea_list *eal = proto_state_table->attrs[i]; if (eal) ea_free_later(ea_ref(eal)); else { rcu_read_unlock(); continue; } rcu_read_unlock(); struct protocol **type = (struct protocol **)ea_get_adata(eal, &ea_protocol_type)->data; int state = ea_get_int(eal, &ea_state, 0); if ((*type == &proto_bgp) && (state != PS_DOWN)) { int rem_id = ea_get_int(eal, &ea_bgp_rem_id, 0); int rem_as = ea_get_int(eal, &ea_bgp_rem_as, 0); ip_addr *rem_ip = (ip_addr *)ea_get_adata(eal, &ea_bgp_rem_ip)->data; mrt_peer_table_entry(s, rem_id, rem_as, *rem_ip); } } #endif /* Fix Peer Count */ put_u16(s->buf.start + s->peer_count_offset, s->peer_count); mrt_dump_message(&s->buf, s->fd); } static void mrt_peer_table_flush(struct mrt_table_dump_state *s) { lp_flush(s->peer_lp); HASH_FREE(s->peer_hash); } /* * MRT Table Dump: RIB Table */ static void mrt_rib_table_header(struct mrt_table_dump_state *s, const net_addr *n) { buffer *b = &s->buf; /* Sequence Number */ mrt_put_u32(b, s->seqnum); /* Network Prefix */ if (s->ipv4) { ASSERT(n->type == NET_IP4); ip4_addr a = ip4_hton(net4_prefix(n)); uint len = net4_pxlen(n); mrt_put_u8(b, len); mrt_put_data(b, &a, BYTES(len)); } else { ASSERT(n->type == NET_IP6); ip6_addr a = ip6_hton(net6_prefix(n)); uint len = net6_pxlen(n); mrt_put_u8(b, len); mrt_put_data(b, &a, BYTES(len)); } /* Entry Count, will be fixed later */ s->entry_count = 0; s->entry_count_offset = mrt_buffer_pos(b); mrt_put_u16(b, 0); } #ifdef CONFIG_BGP static void mrt_rib_table_entry_bgp_attrs(struct mrt_table_dump_state *s, rte *r) { struct ea_list *eattrs = r->attrs; buffer *b = &s->buf; if (!eattrs) return; /* Attribute list must be normalized for bgp_encode_attrs() */ if (!r->attrs->stored) eattrs = ea_normalize(eattrs, 0); mrt_buffer_need(b, MRT_ATTR_BUFFER_SIZE); byte *pos = b->pos; s->bws->mp_reach = !s->ipv4; s->bws->mp_next_hop = NULL; s->bws->ignore_non_bgp_attrs = 1; /* Encode BGP attributes */ int len = bgp_encode_attrs(s->bws, eattrs, pos, b->end); if (len < 0) goto fail; pos += len; /* Encode IPv6 next hop separately as fake MP_REACH_NLRI attribute */ if (s->bws->mp_next_hop) { len = bgp_encode_mp_reach_mrt(s->bws, s->bws->mp_next_hop, pos, b->end - pos); if (len < 0) goto fail; pos += len; } /* Update attribute length and advance buffer pos */ put_u16(b->pos - 2, pos - b->pos); b->pos = pos; return; fail: mrt_log(s, "Attribute list too long for %N", r->net); } #endif static void mrt_rib_table_entry(struct mrt_table_dump_state *s, rte *r) { buffer *b = &s->buf; uint peer = 0; #ifdef CONFIG_BGP /* Find peer index */ struct bgp_proto *p = bgp_rte_proto(r); if (p) { struct mrt_peer_entry *n = HASH_FIND(s->peer_hash, PEER, p->remote_id, p->remote_as, p->remote_ip); peer = n ? n->index : 0; } #endif /* Peer Index and Originated Time */ mrt_put_u16(b, peer); mrt_put_u32(b, (r->lastmod + s->time_offset) TO_S); /* Path Identifier */ if (s->add_path) mrt_put_u32(b, (u32) r->src->private_id); /* Route Attributes */ mrt_put_u16(b, 0); #ifdef CONFIG_BGP mrt_rib_table_entry_bgp_attrs(s, r); #endif s->entry_count++; } static void mrt_rib_table_dump(struct mrt_table_dump_state *s, const struct rt_export_feed *feed, int add_path) { s->add_path = s->bws->add_path = add_path; int subtype = s->ipv4 ? (!add_path ? MRT_RIB_IPV4_UNICAST : MRT_RIB_IPV4_UNICAST_ADDPATH) : (!add_path ? MRT_RIB_IPV6_UNICAST : MRT_RIB_IPV6_UNICAST_ADDPATH); mrt_init_message(&s->buf, MRT_TABLE_DUMP_V2, subtype); mrt_rib_table_header(s, feed->block[0].net); for (uint i = 0; i < feed->count_routes; i++) { rte *rte = &feed->block[i]; if (rte_is_filtered(rte)) continue; /* Skip routes that should be reported in the other phase */ if (!s->always_add_path && (!rte->src->private_id != !s->add_path)) { s->want_add_path = 1; continue; } if (f_run(s->filter, rte, 0) <= F_ACCEPT) mrt_rib_table_entry(s, rte); lp_flush(s->linpool); } /* Fix Entry Count */ put_u16(s->buf.start + s->entry_count_offset, s->entry_count); /* Update max counter */ s->max -= 1 + s->entry_count; /* Skip empty entries */ if (!s->entry_count) return; s->seqnum++; mrt_dump_message(&s->buf, s->fd); } /* * MRT Table Dump: main logic */ static struct mrt_table_dump_state * mrt_table_dump_init(pool *pp) { pool *pool = rp_new(pp, pp->domain, "MRT Table Dump"); struct mrt_table_dump_state *s = mb_allocz(pool, sizeof(struct mrt_table_dump_state)); s->table = (list*)mb_allocz(pool, sizeof(list)); s->pool = pool; s->linpool = lp_new(pool); s->peer_lp = lp_new(pool); mrt_buffer_init(&s->buf, pool, 2 * MRT_ATTR_BUFFER_SIZE); /* We lock the current config as we may reference it indirectly by filter */ s->config = config; config_add_obstacle(s->config); s->fd = -1; return s; } static void mrt_table_dump_free(struct mrt_table_dump_state *s) { config_del_obstacle(s->config); rp_free(s->pool); } static int mrt_table_dump_step(struct mrt_table_dump_state *s) { struct bgp_write_state bws = { .as4_session = 1 }; s->max = 2048; s->bws = &bws; if (s->table_open) goto step; while (mrt_next_table(s)) { if (!mrt_open_file(s)) continue; mrt_peer_table_dump(s); s->table_open = 1; step: ; struct rt_export_feeder feeder = (struct rt_export_feeder) { .name = "cli.feeder", }; RT_LOCKED(get_tab(s), tab) rt_feeder_subscribe(&tab->export_all, &feeder); RT_FEED_WALK(&feeder, route_feed) { if (s->max < 0) return 0; /* With Always ADD_PATH option, we jump directly to second phase */ s->want_add_path = s->always_add_path; if (s->want_add_path == 0) mrt_rib_table_dump(s, route_feed, 0); if (s->want_add_path == 1) mrt_rib_table_dump(s, route_feed, 1); } s->table_open = 0; mrt_close_file(s); mrt_peer_table_flush(s); rt_feeder_unsubscribe(&feeder); } return 1; } static void mrt_timer(timer *t) { struct mrt_proto *p = t->data; struct mrt_config *cf = (void *) (p->p.cf); if (p->table_dump) { log(L_WARN "%s: Earlier RIB table dump still not finished, skipping next one", p->p.name); return; } TRACE(D_EVENTS, "RIB table dump started"); struct mrt_table_dump_state *s = mrt_table_dump_init(p->p.pool); s->proto = p; s->table_expr = cf->table_expr; s->table_ptr = cf->table_cf ? cf->table_cf->table : NULL; s->filter = cf->filter; s->filename = cf->filename; s->always_add_path = cf->always_add_path; if (s->table_ptr) RT_LOCKED(s->table_ptr, tab) rt_lock_table(tab); p->table_dump = s; ev_send_loop(p->p.loop, p->event); } static void mrt_event(void *P) { struct mrt_proto *p = P; if (!p->table_dump) return; if (!mrt_table_dump_step(p->table_dump)) { ev_schedule(p->event); return; } mrt_table_dump_free(p->table_dump); p->table_dump = NULL; TRACE(D_EVENTS, "RIB table dump done"); if (p->p.proto_state == PS_STOP) proto_notify_state(&p->p, PS_DOWN); } /* * MRT Table Dump: CLI command */ static void mrt_dump_cont(struct cli *c) { if (!mrt_table_dump_step(c->rover)) return; cli_printf(c, 0, ""); mrt_table_dump_free(c->rover); c->cont = NULL; c->cleanup = NULL; c->rover = NULL; } void mrt_dump_cleanup(struct cli *c) { mrt_table_dump_free(c->rover); c->rover = NULL; } void mrt_dump_cmd(struct mrt_dump_data *d) { if (cli_access_restricted()) return; if (!d->table_expr && !d->table_ptr) cf_error("Table not specified"); if (!d->filename) cf_error("File not specified"); struct mrt_table_dump_state *s = mrt_table_dump_init(this_cli->pool); s->cli = this_cli; s->table_expr = d->table_expr; s->table_ptr = d->table_ptr; s->filter = d->filter; s->filename = d->filename; if (s->table_ptr) RT_LOCKED(s->table_ptr, tab) rt_lock_table(tab); this_cli->cont = mrt_dump_cont; this_cli->cleanup = mrt_dump_cleanup; this_cli->rover = s; } /* * MRT BGP4MP dump */ static buffer * mrt_bgp_buffer(void) { /* Static buffer for BGP4MP dump, TODO: change to use MRT protocol */ static buffer b; ASSERT(this_metaloop); log("loop in mrt %x, pool %i (main loop is %x, pool %i)", this_metaloop, this_metaloop->pool, &main_birdloop, main_birdloop.pool); ASSERT(this_metaloop->pool); if (!b.start) mrt_buffer_init(&b, this_metaloop->pool, 1024); return &b; } static void mrt_bgp_header(buffer *b, struct mrt_bgp_data *d) { if (d->as4) { mrt_put_u32(b, d->peer_as); mrt_put_u32(b, d->local_as); } else { mrt_put_u16(b, (d->peer_as <= 0xFFFF) ? d->peer_as : AS_TRANS); mrt_put_u16(b, (d->local_as <= 0xFFFF) ? d->local_as : AS_TRANS); } mrt_put_u16(b, (d->index <= 0xFFFF) ? d->index : 0); mrt_put_u16(b, d->af); if (d->af == BGP_AFI_IPV4) { mrt_put_ip4(b, ipa_to_ip4(d->peer_ip)); mrt_put_ip4(b, ipa_to_ip4(d->local_ip)); } else { mrt_put_ip6(b, ipa_to_ip6(d->peer_ip)); mrt_put_ip6(b, ipa_to_ip6(d->local_ip)); } } void mrt_dump_bgp_message(struct mrt_bgp_data *d) { const u16 subtypes[] = { MRT_BGP4MP_MESSAGE, MRT_BGP4MP_MESSAGE_AS4, MRT_BGP4MP_MESSAGE_LOCAL, MRT_BGP4MP_MESSAGE_AS4_LOCAL, MRT_BGP4MP_MESSAGE_ADDPATH, MRT_BGP4MP_MESSAGE_AS4_ADDPATH, MRT_BGP4MP_MESSAGE_LOCAL_ADDPATH, MRT_BGP4MP_MESSAGE_AS4_LOCAL_ADDPATH, }; buffer *b = mrt_bgp_buffer(); mrt_init_message(b, MRT_BGP4MP, subtypes[d->as4 + 4*d->add_path]); mrt_bgp_header(b, d); mrt_put_data(b, d->message, d->msg_len); mrt_dump_message(b, rf_fileno(config->mrtdump_file)); } void mrt_dump_bgp_state_change(struct mrt_bgp_data *d) { /* Convert state from our BS_* values to values used in MRTDump */ const u16 states[BS_MAX] = {1, 2, 3, 4, 5, 6, 1}; if (states[d->old_state] == states[d->new_state]) return; /* Always use AS4 mode for STATE_CHANGE */ d->as4 = 1; buffer *b = mrt_bgp_buffer(); mrt_init_message(b, MRT_BGP4MP, MRT_BGP4MP_STATE_CHANGE_AS4); mrt_bgp_header(b, d); mrt_put_u16(b, states[d->old_state]); mrt_put_u16(b, states[d->new_state]); mrt_dump_message(b, rf_fileno(config->mrtdump_file)); } /* * MRT protocol glue */ void mrt_check_config(struct proto_config *CF) { struct mrt_config *cf = (void *) CF; if (!cf->table_expr && !cf->table_cf) cf_error("Table not specified"); if (!cf->filename) cf_error("File not specified"); if (!cf->period) cf_error("Period not specified"); } static struct proto * mrt_init(struct proto_config *CF) { struct proto *P = proto_new(CF); return P; } static int mrt_start(struct proto *P) { struct mrt_proto *p = (void *) P; struct mrt_config *cf = (void *) (P->cf); p->timer = tm_new_init(P->pool, mrt_timer, p, cf->period S, 0); p->event = ev_new_init(P->pool, mrt_event, p); tm_start_in(p->timer, cf->period S, p->p.loop); return PS_UP; } static int mrt_shutdown(struct proto *P) { struct mrt_proto *p = (void *) P; return p->table_dump ? PS_STOP : PS_DOWN; } static int mrt_reconfigure(struct proto *P, struct proto_config *CF) { struct mrt_proto *p = (void *) P; struct mrt_config *old = (void *) (P->cf); struct mrt_config *new = (void *) CF; if (new->period != old->period) { TRACE(D_EVENTS, "Changing period from %u to %u s", old->period, new->period); btime now = current_time(); btime new_time = p->timer->expires - (old->period S) + (new->period S); p->timer->recurrent = new->period S; tm_set_in(p->timer, MAX(now, new_time), p->p.loop); } return 1; } static void mrt_copy_config(struct proto_config *dest UNUSED, struct proto_config *src UNUSED) { /* Do nothing */ } struct protocol proto_mrt = { .name = "MRT", .template = "mrt%d", .proto_size = sizeof(struct mrt_proto), .config_size = sizeof(struct mrt_config), .init = mrt_init, .start = mrt_start, .shutdown = mrt_shutdown, .reconfigure = mrt_reconfigure, .copy_config = mrt_copy_config, }; void mrt_build(void) { proto_build(&proto_mrt); }