/* * BIRD -- Multi-Threaded Routing Toolkit (MRT) Protocol * * (c) 2017--2018 Ondrej Zajicek * (c) 2017--2018 CZ.NIC z.s.p.o. * * Can be freely distributed and used under the terms of the GNU GPL. */ /** * DOC: Multi-Threaded Routing Toolkit (MRT) protocol * * The MRT protocol is implemented in just one file: |mrt.c|. It contains of * several parts: Generic functions for preparing MRT messages in a buffer, * functions for MRT table dump (called from timer or CLI), functions for MRT * BGP4MP dump (called from BGP), and the usual protocol glue. For the MRT table * dump, the key structure is struct mrt_table_dump_state, which contains all * necessary data and created when the MRT dump cycle is started for the * duration of the MRT dump. The MBGP4MP dump is currently not bound to MRT * protocol instance and uses the config->mrtdump_file fd. * * The protocol is simple, just periodically scans routing table and export it * to a file. It does not use the regular update mechanism, but a direct access * in order to handle iteration through multiple routing tables. The table dump * needs to dump all peers first and then use indexes to address the peers, we * use a hash table (@peer_hash) to find peer index based on BGP protocol key * attributes. * * One thing worth documenting is the locking. During processing, the currently * processed table (@table field in the state structure) is locked and also the * explicitly named table is locked (@table_ptr field in the state structure) if * specified. Between dumps no table is locked. Also the current config is * locked (by config_add_obstacle()) during table dumps as some data (strings, * filters) are shared from the config and the running table dump may be * interrupted by reconfiguration. * * Supported standards: * - RFC 6396 - MRT format standard * - RFC 8050 - ADD_PATH extension */ #include #include #include #include "mrt.h" #include "nest/cli.h" #include "filter/filter.h" #include "proto/bgp/bgp.h" #include "sysdep/unix/unix.h" #ifdef PATH_MAX #define BIRD_PATH_MAX PATH_MAX #else #define BIRD_PATH_MAX 4096 #endif #define mrt_log(s, msg, args...) \ ({ \ if (s->cli) \ cli_printf(s->cli, -8009, msg, ## args); \ if (s->proto) \ log(L_ERR "%s: " msg, s->proto->p.name, ## args); \ }) /* * MRT buffer code */ static void mrt_buffer_init(buffer *b, pool *pool, size_t n) { b->start = mb_alloc(pool, n); b->pos = b->start; b->end = b->start + n; } static void mrt_buffer_grow(buffer *b, size_t n) { size_t used = b->pos - b->start; size_t size = b->end - b->start; size_t req = used + n; while (size < req) size = size * 3 / 2; b->start = mb_realloc(b->start, size); b->pos = b->start + used; b->end = b->start + size; } static inline void mrt_buffer_need(buffer *b, size_t n) { if (b->pos + n > b->end) mrt_buffer_grow(b, n); } static inline uint mrt_buffer_pos(buffer *b) { return b->pos - b->start; } static inline void mrt_buffer_flush(buffer *b) { b->pos = b->start; } #define MRT_DEFINE_TYPE(S, T) \ static inline void UNUSED mrt_put_##S##_(buffer *b, T x) \ { \ put_##S(b->pos, x); \ b->pos += sizeof(T); \ } \ \ static inline void UNUSED mrt_put_##S(buffer *b, T x) \ { \ mrt_buffer_need(b, sizeof(T)); \ put_##S(b->pos, x); \ b->pos += sizeof(T); \ } MRT_DEFINE_TYPE(u8, u8) MRT_DEFINE_TYPE(u16, u16) MRT_DEFINE_TYPE(u32, u32) MRT_DEFINE_TYPE(u64, u64) MRT_DEFINE_TYPE(ip4, ip4_addr) MRT_DEFINE_TYPE(ip6, ip6_addr) static inline void mrt_put_ipa(buffer *b, ip_addr x) { if (ipa_is_ip4(x)) mrt_put_ip4(b, ipa_to_ip4(x)); else mrt_put_ip6(b, ipa_to_ip6(x)); } static inline void mrt_put_data(buffer *b, const void *src, size_t n) { if (!n) return; mrt_buffer_need(b, n); memcpy(b->pos, src, n); b->pos += n; } static void mrt_init_message(buffer *b, u16 type, u16 subtype) { /* Reset buffer */ mrt_buffer_flush(b); mrt_buffer_need(b, MRT_HDR_LENGTH); /* Prepare header */ mrt_put_u32_(b, current_real_time() TO_S); /* now_real */ mrt_put_u16_(b, type); mrt_put_u16_(b, subtype); /* Message length, will be fixed later */ mrt_put_u32_(b, 0); } static void mrt_dump_message(buffer *b, int fd) { uint len = mrt_buffer_pos(b); /* Fix message length */ ASSERT(len >= MRT_HDR_LENGTH); put_u32(b->start + 8, len - MRT_HDR_LENGTH); if (fd < 0) return; if (write(fd, b->start, len) < 0) log(L_ERR "Write to MRT file failed: %m"); /* TODO: name of file */ } static int bstrsub(char *dst, size_t n, const char *src, const char *key, const char *val) { const char *last, *next; char *pos = dst; size_t step, klen = strlen(key), vlen = strlen(val); for (last = src; next = strstr(last, key); last = next + klen) { step = next - last; if (n <= step + vlen) return 0; memcpy(pos, last, step); ADVANCE(pos, n, step); memcpy(pos, val, vlen); ADVANCE(pos, n, vlen); } step = strlen(last); if (n <= step) return 0; memcpy(pos, last, step); ADVANCE(pos, n, step); pos[0] = 0; return 1; } static inline rtable * mrt_next_table_(rtable *tab, rtable *tab_ptr, const char *pattern) { /* Handle explicit table, return it in the first pass */ if (tab_ptr) return !tab ? tab_ptr : NULL; /* Walk routing_tables list, starting after tab (if non-NULL) */ for (node *tn = tab ? tab->n.next : HEAD(routing_tables); NODE_VALID(tn); tn = tn->next) { tab = SKIP_BACK(struct rtable, n, tn); if (patmatch(pattern, tab->name) && ((tab->addr_type == NET_IP4) || (tab->addr_type == NET_IP6))) return tab; } return NULL; } static rtable * mrt_next_table(struct mrt_table_dump_state *s) { rtable *tab = mrt_next_table_(s->table, s->table_ptr, s->table_expr); if (s->table) rt_unlock_table(s->table); s->table = tab; s->ipv4 = tab ? (tab->addr_type == NET_IP4) : 0; if (s->table) rt_lock_table(s->table); return s->table; } static int mrt_open_file(struct mrt_table_dump_state *s) { char fmt1[BIRD_PATH_MAX]; char name[BIRD_PATH_MAX]; btime now = current_time(); btime now_real = current_real_time(); if (!bstrsub(fmt1, sizeof(fmt1), s->filename, "%N", s->table->name) || !tm_format_real_time(name, sizeof(name), fmt1, now_real)) { mrt_log(s, "Invalid filename '%s'", s->filename); return 0; } s->file = rf_open(s->pool, name, "a"); if (!s->file) { mrt_log(s, "Unable to open MRT file '%s': %m", name); return 0; } s->fd = rf_fileno(s->file); s->time_offset = now_real - now; return 1; } static void mrt_close_file(struct mrt_table_dump_state *s) { rfree(s->file); s->file = NULL; s->fd = -1; } /* * MRT Table Dump: Peer Index Table */ #define PEER_KEY(n) n->peer_id, n->peer_as, n->peer_ip #define PEER_NEXT(n) n->next #define PEER_EQ(id1,as1,ip1,id2,as2,ip2) \ id1 == id2 && as1 == as2 && ipa_equal(ip1, ip2) #define PEER_FN(id,as,ip) ipa_hash(ip) static void mrt_peer_table_header(struct mrt_table_dump_state *s, u32 router_id, const char *name) { buffer *b = &s->buf; /* Collector BGP ID */ mrt_put_u32(b, router_id); /* View Name */ uint name_length = name ? strlen(name) : 0; name_length = MIN(name_length, 65535); mrt_put_u16(b, name_length); mrt_put_data(b, name, name_length); /* Peer Count, will be fixed later */ s->peer_count = 0; s->peer_count_offset = mrt_buffer_pos(b); mrt_put_u16(b, 0); HASH_INIT(s->peer_hash, s->pool, 10); } static void mrt_peer_table_entry(struct mrt_table_dump_state *s, u32 peer_id, u32 peer_as, ip_addr peer_ip) { buffer *b = &s->buf; uint type = MRT_PEER_TYPE_32BIT_ASN; if (ipa_is_ip6(peer_ip)) type |= MRT_PEER_TYPE_IPV6; /* Dump peer to buffer */ mrt_put_u8(b, type); mrt_put_u32(b, peer_id); mrt_put_ipa(b, peer_ip); mrt_put_u32(b, peer_as); /* Add peer to hash table */ struct mrt_peer_entry *n = lp_allocz(s->peer_lp, sizeof(struct mrt_peer_entry)); n->peer_id = peer_id; n->peer_as = peer_as; n->peer_ip = peer_ip; n->index = s->peer_count++; HASH_INSERT(s->peer_hash, PEER, n); } static void mrt_peer_table_dump(struct mrt_table_dump_state *s) { mrt_init_message(&s->buf, MRT_TABLE_DUMP_V2, MRT_PEER_INDEX_TABLE); mrt_peer_table_header(s, config->router_id, s->table->name); /* 0 is fake peer for non-BGP routes */ mrt_peer_table_entry(s, 0, 0, IPA_NONE); #ifdef CONFIG_BGP struct proto *P; WALK_LIST(P, proto_list) if ((P->proto == &proto_bgp) && (P->proto_state != PS_DOWN)) { struct bgp_proto *p = (void *) P; mrt_peer_table_entry(s, p->remote_id, p->remote_as, p->remote_ip); } #endif /* Fix Peer Count */ put_u16(s->buf.start + s->peer_count_offset, s->peer_count); mrt_dump_message(&s->buf, s->fd); } static void mrt_peer_table_flush(struct mrt_table_dump_state *s) { lp_flush(s->peer_lp); HASH_FREE(s->peer_hash); } /* * MRT Table Dump: RIB Table */ static void mrt_rib_table_header(struct mrt_table_dump_state *s, net_addr *n) { buffer *b = &s->buf; /* Sequence Number */ mrt_put_u32(b, s->seqnum); /* Network Prefix */ if (s->ipv4) { ASSERT(n->type == NET_IP4); ip4_addr a = ip4_hton(net4_prefix(n)); uint len = net4_pxlen(n); mrt_put_u8(b, len); mrt_put_data(b, &a, BYTES(len)); } else { ASSERT(n->type == NET_IP6); ip6_addr a = ip6_hton(net6_prefix(n)); uint len = net6_pxlen(n); mrt_put_u8(b, len); mrt_put_data(b, &a, BYTES(len)); } /* Entry Count, will be fixed later */ s->entry_count = 0; s->entry_count_offset = mrt_buffer_pos(b); mrt_put_u16(b, 0); } #ifdef CONFIG_BGP static void mrt_rib_table_entry_bgp_attrs(struct mrt_table_dump_state *s, rte *r) { struct ea_list *eattrs = r->attrs->eattrs; buffer *b = &s->buf; if (!eattrs) return; /* Attribute list must be normalized for bgp_encode_attrs() */ if (!rta_is_cached(r->attrs)) ea_normalize(eattrs); mrt_buffer_need(b, MRT_ATTR_BUFFER_SIZE); byte *pos = b->pos; s->bws->mp_reach = !s->ipv4; s->bws->mp_next_hop = NULL; /* Encode BGP attributes */ int len = bgp_encode_attrs(s->bws, eattrs, pos, b->end); if (len < 0) goto fail; pos += len; /* Encode IPv6 next hop separately as fake MP_REACH_NLRI attribute */ if (s->bws->mp_next_hop) { len = bgp_encode_mp_reach_mrt(s->bws, s->bws->mp_next_hop, pos, b->end - pos); if (len < 0) goto fail; pos += len; } /* Update attribute length and advance buffer pos */ put_u16(b->pos - 2, pos - b->pos); b->pos = pos; return; fail: mrt_log(s, "Attribute list too long for %N", r->net->n.addr); } #endif static void mrt_rib_table_entry(struct mrt_table_dump_state *s, rte *r) { buffer *b = &s->buf; uint peer = 0; #ifdef CONFIG_BGP /* Find peer index */ if (r->src->proto->proto == &proto_bgp) { struct bgp_proto *p = (void *) r->src->proto; struct mrt_peer_entry *n = HASH_FIND(s->peer_hash, PEER, p->remote_id, p->remote_as, p->remote_ip); peer = n ? n->index : 0; } #endif /* Peer Index and Originated Time */ mrt_put_u16(b, peer); mrt_put_u32(b, (r->lastmod + s->time_offset) TO_S); /* Path Identifier */ if (s->add_path) mrt_put_u32(b, (u32) r->src->private_id); /* Route Attributes */ mrt_put_u16(b, 0); #ifdef CONFIG_BGP mrt_rib_table_entry_bgp_attrs(s, r); #endif s->entry_count++; } static void mrt_rib_table_dump(struct mrt_table_dump_state *s, net *n, int add_path) { s->add_path = s->bws->add_path = add_path; int subtype = s->ipv4 ? (!add_path ? MRT_RIB_IPV4_UNICAST : MRT_RIB_IPV4_UNICAST_ADDPATH) : (!add_path ? MRT_RIB_IPV6_UNICAST : MRT_RIB_IPV6_UNICAST_ADDPATH); mrt_init_message(&s->buf, MRT_TABLE_DUMP_V2, subtype); mrt_rib_table_header(s, n->n.addr); rte *rt, *rt0; for (rt0 = n->routes; rt = rt0; rt0 = rt0->next) { if (rte_is_filtered(rt)) continue; /* Skip routes that should be reported in the other phase */ if (!s->always_add_path && (!rt->src->private_id != !s->add_path)) { s->want_add_path = 1; continue; } if (f_run(s->filter, &rt, s->linpool, 0) <= F_ACCEPT) mrt_rib_table_entry(s, rt); if (rt != rt0) rte_free(rt); lp_flush(s->linpool); } /* Fix Entry Count */ put_u16(s->buf.start + s->entry_count_offset, s->entry_count); /* Update max counter */ s->max -= 1 + s->entry_count; /* Skip empty entries */ if (!s->entry_count) return; s->seqnum++; mrt_dump_message(&s->buf, s->fd); } /* * MRT Table Dump: main logic */ static struct mrt_table_dump_state * mrt_table_dump_init(pool *pp) { pool *pool = rp_new(pp, "MRT Table Dump"); struct mrt_table_dump_state *s = mb_allocz(pool, sizeof(struct mrt_table_dump_state)); s->pool = pool; s->linpool = lp_new(pool); s->peer_lp = lp_new(pool); mrt_buffer_init(&s->buf, pool, 2 * MRT_ATTR_BUFFER_SIZE); /* We lock the current config as we may reference it indirectly by filter */ s->config = config; config_add_obstacle(s->config); s->fd = -1; return s; } static void mrt_table_dump_free(struct mrt_table_dump_state *s) { if (s->table_open) FIB_ITERATE_UNLINK(&s->fit, &s->table->fib); if (s->table) rt_unlock_table(s->table); if (s->table_ptr) rt_unlock_table(s->table_ptr); config_del_obstacle(s->config); rfree(s->pool); } static int mrt_table_dump_step(struct mrt_table_dump_state *s) { struct bgp_write_state bws = { .as4_session = 1, .ignore_non_bgp_attrs = 1, }; s->max = 2048; s->bws = &bws; if (s->table_open) goto step; while (mrt_next_table(s)) { if (!mrt_open_file(s)) continue; mrt_peer_table_dump(s); FIB_ITERATE_INIT(&s->fit, &s->table->fib); s->table_open = 1; step: FIB_ITERATE_START(&s->table->fib, &s->fit, net, n) { if (s->max < 0) { FIB_ITERATE_PUT(&s->fit); return 0; } /* With Always ADD_PATH option, we jump directly to second phase */ s->want_add_path = s->always_add_path; if (s->want_add_path == 0) mrt_rib_table_dump(s, n, 0); if (s->want_add_path == 1) mrt_rib_table_dump(s, n, 1); } FIB_ITERATE_END; s->table_open = 0; mrt_close_file(s); mrt_peer_table_flush(s); } return 1; } static void mrt_timer(timer *t) { struct mrt_proto *p = t->data; struct mrt_config *cf = (void *) (p->p.cf); if (p->table_dump) { log(L_WARN "%s: Earlier RIB table dump still not finished, skipping next one", p->p.name); return; } TRACE(D_EVENTS, "RIB table dump started"); struct mrt_table_dump_state *s = mrt_table_dump_init(p->p.pool); s->proto = p; s->table_expr = cf->table_expr; s->table_ptr = cf->table_cf ? cf->table_cf->table : NULL; s->filter = cf->filter; s->filename = cf->filename; s->always_add_path = cf->always_add_path; if (s->table_ptr) rt_lock_table(s->table_ptr); p->table_dump = s; ev_schedule(p->event); } static void mrt_event(void *P) { struct mrt_proto *p = P; if (!p->table_dump) return; if (!mrt_table_dump_step(p->table_dump)) { ev_schedule(p->event); return; } mrt_table_dump_free(p->table_dump); p->table_dump = NULL; TRACE(D_EVENTS, "RIB table dump done"); if (p->p.proto_state == PS_STOP) proto_notify_state(&p->p, PS_DOWN); } /* * MRT Table Dump: CLI command */ static void mrt_dump_cont(struct cli *c) { if (!mrt_table_dump_step(c->rover)) return; cli_printf(c, 0, ""); mrt_table_dump_free(c->rover); c->cont = c->cleanup = c->rover = NULL; } static void mrt_dump_cleanup(struct cli *c) { mrt_table_dump_free(c->rover); c->rover = NULL; } void mrt_dump_cmd(struct mrt_dump_data *d) { if (cli_access_restricted()) return; if (!d->table_expr && !d->table_ptr) cf_error("Table not specified"); if (!d->filename) cf_error("File not specified"); struct mrt_table_dump_state *s = mrt_table_dump_init(this_cli->pool); s->cli = this_cli; s->table_expr = d->table_expr; s->table_ptr = d->table_ptr; s->filter = d->filter; s->filename = d->filename; if (s->table_ptr) rt_lock_table(s->table_ptr); this_cli->cont = mrt_dump_cont; this_cli->cleanup = mrt_dump_cleanup; this_cli->rover = s; } /* * MRT BGP4MP dump */ static buffer * mrt_bgp_buffer(void) { /* Static buffer for BGP4MP dump, TODO: change to use MRT protocol */ static buffer b; if (!b.start) mrt_buffer_init(&b, &root_pool, 1024); return &b; } static void mrt_bgp_header(buffer *b, struct mrt_bgp_data *d) { if (d->as4) { mrt_put_u32(b, d->peer_as); mrt_put_u32(b, d->local_as); } else { mrt_put_u16(b, (d->peer_as <= 0xFFFF) ? d->peer_as : AS_TRANS); mrt_put_u16(b, (d->local_as <= 0xFFFF) ? d->local_as : AS_TRANS); } mrt_put_u16(b, (d->index <= 0xFFFF) ? d->index : 0); mrt_put_u16(b, d->af); if (d->af == BGP_AFI_IPV4) { mrt_put_ip4(b, ipa_to_ip4(d->peer_ip)); mrt_put_ip4(b, ipa_to_ip4(d->local_ip)); } else { mrt_put_ip6(b, ipa_to_ip6(d->peer_ip)); mrt_put_ip6(b, ipa_to_ip6(d->local_ip)); } } void mrt_dump_bgp_message(struct mrt_bgp_data *d) { const u16 subtypes[] = { MRT_BGP4MP_MESSAGE, MRT_BGP4MP_MESSAGE_AS4, MRT_BGP4MP_MESSAGE_LOCAL, MRT_BGP4MP_MESSAGE_AS4_LOCAL, MRT_BGP4MP_MESSAGE_ADDPATH, MRT_BGP4MP_MESSAGE_AS4_ADDPATH, MRT_BGP4MP_MESSAGE_LOCAL_ADDPATH, MRT_BGP4MP_MESSAGE_AS4_LOCAL_ADDPATH, }; buffer *b = mrt_bgp_buffer(); mrt_init_message(b, MRT_BGP4MP, subtypes[d->as4 + 4*d->add_path]); mrt_bgp_header(b, d); mrt_put_data(b, d->message, d->msg_len); mrt_dump_message(b, config->mrtdump_file); } void mrt_dump_bgp_state_change(struct mrt_bgp_data *d) { /* Convert state from our BS_* values to values used in MRTDump */ const u16 states[BS_MAX] = {1, 2, 3, 4, 5, 6, 1}; if (states[d->old_state] == states[d->new_state]) return; /* Always use AS4 mode for STATE_CHANGE */ d->as4 = 1; buffer *b = mrt_bgp_buffer(); mrt_init_message(b, MRT_BGP4MP, MRT_BGP4MP_STATE_CHANGE_AS4); mrt_bgp_header(b, d); mrt_put_u16(b, states[d->old_state]); mrt_put_u16(b, states[d->new_state]); mrt_dump_message(b, config->mrtdump_file); } /* * MRT protocol glue */ void mrt_check_config(struct proto_config *CF) { struct mrt_config *cf = (void *) CF; if (!cf->table_expr && !cf->table_cf) cf_error("Table not specified"); if (!cf->filename) cf_error("File not specified"); if (!cf->period) cf_error("Period not specified"); } static struct proto * mrt_init(struct proto_config *CF) { struct proto *P = proto_new(CF); return P; } static int mrt_start(struct proto *P) { struct mrt_proto *p = (void *) P; struct mrt_config *cf = (void *) (P->cf); p->timer = tm_new_init(P->pool, mrt_timer, p, cf->period S, 0); p->event = ev_new_init(P->pool, mrt_event, p); tm_start(p->timer, cf->period S); return PS_UP; } static int mrt_shutdown(struct proto *P) { struct mrt_proto *p = (void *) P; return p->table_dump ? PS_STOP : PS_DOWN; } static int mrt_reconfigure(struct proto *P, struct proto_config *CF) { struct mrt_proto *p = (void *) P; struct mrt_config *old = (void *) (P->cf); struct mrt_config *new = (void *) CF; if (new->period != old->period) { TRACE(D_EVENTS, "Changing period from %u to %u s", old->period, new->period); btime now = current_time(); btime new_time = p->timer->expires - (old->period S) + (new->period S); p->timer->recurrent = new->period S; tm_set(p->timer, MAX(now, new_time)); } return 1; } static void mrt_copy_config(struct proto_config *dest UNUSED, struct proto_config *src UNUSED) { /* Do nothing */ } struct protocol proto_mrt = { .name = "MRT", .template = "mrt%d", .class = PROTOCOL_MRT, .proto_size = sizeof(struct mrt_proto), .config_size = sizeof(struct mrt_config), .init = mrt_init, .start = mrt_start, .shutdown = mrt_shutdown, .reconfigure = mrt_reconfigure, .copy_config = mrt_copy_config, }; void mrt_build(void) { proto_build(&proto_mrt); }