0
0
mirror of https://gitlab.nic.cz/labs/bird.git synced 2025-01-14 21:11:53 +00:00

Flock: Now the container code looks like doing at least something

This commit is contained in:
Maria Matejka 2024-09-17 22:51:19 +02:00
parent 444454aa66
commit 5b41168dcd
4 changed files with 499 additions and 126 deletions

View File

@ -6,10 +6,18 @@
#include <stdlib.h>
static struct container_config {
const char *hostname;
const char *workdir;
const char *basedir;
} ccf;
static int container_forker_fd = -1;
static void
container_mainloop(struct flock_machine_container_config *cfg, int fd)
container_mainloop(int fd)
{
log(L_INFO "container mainloop");
log(L_INFO "container mainloop with fd %d", fd);
/* TODO cleanup the loops from the forked process */
while (1)
{
@ -18,103 +26,11 @@ container_mainloop(struct flock_machine_container_config *cfg, int fd)
}
}
struct container_start_callback {
callback cb;
sock *s, *skm;
struct birdloop *loop;
pool *pool;
/* Stored socket hooks */
int (*rx_hook)(sock *, uint size);
void (*err_hook)(sock *, int);
void *data;
/* Actual config */
struct flock_machine_container_config cfg;
};
static void
container_start_sk_err(sock *s, int e)
container_start(void)
{
struct container_start_callback *cb = s->data;
cb->skm->data = NULL;
s->data = cb->data;
s->err_hook = cb->err_hook;
mb_free(cb);
s->err_hook(s, e);
}
static int
container_parent_rx(sock *skm, uint size)
{
bug("container_parent_rx");
ASSERT_DIE(size >= 3);
ASSERT_DIE(skm->rbuf[0] == 0xa1);
switch (skm->rbuf[1])
{
case 0:
{
pid_t pid;
if (skm->rbuf[2] < 24)
pid = skm->rbuf[2];
else if (skm->rbuf[2] == 24)
pid = skm->rbuf[3];
else if (skm->rbuf[2] == 25)
pid = skm->rbuf[3] << 8 + skm->rbuf[4];
else if (skm->rbuf[3] == 26)
pid = skm->rbuf[3] << 32 + skm->rbuf[4] << 24 + skm->rbuf[5] << 16 + skm->rbuf[6];
else
bug("not implemented");
log(L_INFO "Machine started with PID %d", pid);
if (!skm->data)
return 1;
struct container_start_callback *cb = skm->data;
struct linpool *lp = lp_new(cb->s->pool);
struct cbor_writer *cw = cbor_init(cb->s->tbuf, cb->s->tbsize, lp);
cbor_open_block_with_length(cw, 1);
cbor_add_int(cw, -1);
cbor_add_string(cw, "OK");
sk_send(cb->s, cw->pt);
rfree(lp);
cb->s->data = cb->data;
cb->s->err_hook = cb->err_hook;
sk_resume_rx(cb->s->loop, cb->s, cb->rx_hook);
mb_free(cb);
return 1;
}
default:
bug("unimplemented");
}
return 1;
}
static void
container_parent_err(sock *s, int e)
{
log(L_ERR "Container parent error hook not implemented: %d (%s)", e, strerror(e));
sk_close(s);
}
static void
container_start_callback(struct callback *_cb)
{
SKIP_BACK_DECLARE(struct container_start_callback, cb, cb, _cb);
ASSERT_DIE(birdloop_inside(&main_birdloop));
log(L_INFO "Requested to start a container, name %s, base %s, work %s",
cb->cfg.cf.name, cb->cfg.basedir, cb->cfg.workdir);
ccf.hostname, ccf.basedir, ccf.workdir);
/* create socketpair before forking to do communication */
int fds[2];
@ -129,45 +45,493 @@ container_start_callback(struct callback *_cb)
if (!pid)
{
close(fds[0]);
container_mainloop(&cb->cfg, fds[1]); /* this never returns */
container_mainloop(fds[1]); /* this never returns */
bug("container_mainloop has returned");
}
close(fds[1]);
birdloop_enter(cb->loop);
sock *skm = sk_new(cb->pool);
skm->type = SK_MAGIC;
skm->fd = fds[0];
skm->rx_hook = container_parent_rx;
skm->err_hook = container_parent_err;
skm->data = cb;
cb->skm = skm;
byte outbuf[128];
linpool *lp = lp_new(&root_pool);
struct cbor_writer *cw = cbor_init(outbuf, sizeof outbuf, lp);
cbor_open_block_with_length(cw, 1);
cbor_add_int(cw, -2);
cbor_add_int(cw, pid);
struct iovec v = {
.iov_base = outbuf,
.iov_len = cw->pt,
};
byte cbuf[CMSG_SPACE(sizeof fds[0])];
struct msghdr m = {
.msg_iov = &v,
.msg_iovlen = 1,
.msg_control = &cbuf,
.msg_controllen = sizeof cbuf,
};
struct cmsghdr *c = CMSG_FIRSTHDR(&m);
c->cmsg_level = SOL_SOCKET;
c->cmsg_type = SCM_RIGHTS;
c->cmsg_len = CMSG_LEN(sizeof fds[0]);
memcpy(CMSG_DATA(c), &fds[0], sizeof fds[0]);
if (sk_open(skm, cb->loop) < 0)
bug("Container listener: sk_open failed");
e = sendmsg(container_forker_fd, &m, 0);
if (e < 0)
log(L_ERR "Failed to send socket: %m");
birdloop_leave(cb->loop);
close(fds[0]);
rfree(lp);
}
/* The Parent */
static int
hypervisor_container_rx(sock *sk, uint sz)
{
log(L_INFO "received %u data from %p (container_rx)", sz, sk);
return 1;
}
static int
hypervisor_container_forker_rx(sock *sk, uint _sz UNUSED)
{
int sfd = -1;
byte buf[128], cbuf[CMSG_SPACE(sizeof sfd)];
struct iovec v = {
.iov_base = buf,
.iov_len = sizeof buf,
};
struct msghdr m = {
.msg_iov = &v,
.msg_iovlen = 1,
.msg_control = &cbuf,
.msg_controllen = sizeof cbuf,
};
int e = recvmsg(sk->fd, &m, 0);
if (e < 3)
{
log(L_ERR "Container forker RX hangup, what the hell");
sk_close(sk);
ev_send_loop(&main_birdloop, &poweroff_event);
return 0;
}
struct cmsghdr *c = CMSG_FIRSTHDR(&m);
memcpy(&sfd, CMSG_DATA(c), sizeof sfd);
ASSERT_DIE(buf[0] == 0xa1);
ASSERT_DIE(buf[1] == 0x21);
pid_t pid;
if (buf[2] < 0x18)
pid = buf[2];
else if (buf[2] == 24)
pid = buf[3];
else if (buf[2] == 25)
pid = buf[3] << 8 + buf[4];
else if (buf[3] == 26)
pid = buf[3] << 32 + buf[4] << 24 + buf[5] << 16 + buf[6];
else
bug("not implemented");
log(L_INFO "Machine started with PID %d", pid);
u16 port = ntohs(*((u16 *) &buf[3]));
log(L_INFO "RX %d bytes, fd %d, port %u", e, sfd, port);
sock *skl = sk_new(sk->pool);
skl->type = SK_MAGIC;
skl->rx_hook = hypervisor_container_rx;
skl->fd = sfd;
if (sk_open(skl, sk->loop) < 0)
bug("Machine control socket: sk_open failed");
/* TODO: create the machine struct
struct hexp_received_telnet *hrt = mb_allocz(he.p, sizeof *hrt);
*hrt = (struct hexp_received_telnet) {
.e = {
.hook = hexp_received_telnet,
.data = hrt,
},
.p = sk->data,
.port = port,
.fd = sfd,
};
ev_send_loop(hcs_loop, &hrt->e);
*/
return 0;
}
static void
hypervisor_container_forker_err(sock *sk, int e UNUSED)
{
sk_close(sk);
}
/* The child */
static struct hypervisor_container_forker {
sock *s;
pool *p;
struct birdloop *loop;
} hcf;
void
hypervisor_container_request(const char *name, const char *basedir, const char *workdir)
{
birdloop_enter(hcf.loop);
linpool *lp = lp_new(hcf.p);
struct cbor_writer *cw = cbor_init(hcf.s->tbuf, hcf.s->tbsize, lp);
cbor_open_block_with_length(cw, 3);
cbor_add_int(cw, 0);
cbor_add_string(cw, name);
cbor_add_int(cw, 1);
cbor_add_string(cw, basedir);
cbor_add_int(cw, 2);
cbor_add_string(cw, workdir);
sk_send(hcf.s, cw->pt);
birdloop_leave(hcf.loop);
}
struct cbor_parser_context {
linpool *lp;
PACKED enum {
CPE_TYPE = 0,
CPE_READ_INT,
CPE_COMPLETE_INT,
CPE_READ_BYTE,
} partial_state, partial_next;
byte type;
u64 value;
u64 partial_countdown;
u64 bytes_consumed;
byte *target_buf;
uint target_len;
u64 major_state;
const char *error;
#define LOCAL_STACK_MAX_DEPTH 3
u64 stack_countdown[LOCAL_STACK_MAX_DEPTH];
uint stack_pos;
};
#define CBOR_PARSER_ERROR bug
#define CBOR_PARSER_READ_INT(next) do { \
ctx->partial_state = CPE_READ_INT; \
ctx->partial_countdown = (1 << (ctx->value - 24)); \
ctx->value = 0; \
ctx->partial_next = next; \
} while (0)
static struct cbor_parser_context ctx_, *ctx = &ctx_;
static void
hcf_parse(byte *buf, int size)
{
ASSERT_DIE(size > 0);
for (int pos = 0; pos < size; pos++)
{
const byte bp = buf[pos];
bool value_is_special = 0;
bool exit_stack = false;
switch (ctx->partial_state)
{
case CPE_TYPE:
/* Split the byte to type and value */
ctx->type = bp >> 5;
ctx->value = bp & 0x1f;
if (ctx->type == 7)
{
if (ctx->value < 20)
CBOR_PARSER_ERROR("Unknown simple value %u", ctx->value);
else if (ctx->value < 24)
; /* false, true, null, undefined */
else if (ctx->value < 28)
{
/* Need more data */
CBOR_PARSER_READ_INT(CPE_COMPLETE_INT);
break;
}
else if (ctx->value == 31)
; /* break-stop */
else
CBOR_PARSER_ERROR("Unknown simple value %u", ctx->value);
}
else
{
if (ctx->value < 24)
; /* Immediate value, fall through */
else if (ctx->value < 28)
{
/* Need more data */
CBOR_PARSER_READ_INT(CPE_COMPLETE_INT);
break;
}
else if ((ctx->value == 31) && (ctx->type >= 2) && (ctx->type <= 5))
/* Indefinite length, fall through */
value_is_special = 1;
else
CBOR_PARSER_ERROR("Garbled additional value %u for type %u", ctx->value, ctx->type);
}
/* fall through */
case CPE_READ_INT:
if (ctx->partial_state == CPE_READ_INT)
{
/* Reading a network order integer */
ctx->value <<= 8;
ctx->value |= bp;
if (--ctx->partial_countdown)
break;
}
/* fall through */
case CPE_COMPLETE_INT:
/* TODO: exception for 7-31 end of long thing */
/* Check type acceptance */
switch (ctx->major_state)
{
case 0: /* toplevel */
if (ctx->type != 5)
CBOR_PARSER_ERROR("Expected mapping, got %u", ctx->type);
ccf = (struct container_config) {};
ctx->major_state = 1;
break;
case 1: /* inside toplevel mapping */
if (ctx->type != 0)
CBOR_PARSER_ERROR("Expected integer, got %u", ctx->type);
if (ctx->value >= 3)
CBOR_PARSER_ERROR("Mapping key too high, got %lu", ctx->value);
ctx->major_state = ctx->value + 2;
break;
case 2: /* machine hostname */
if (ctx->type != 3)
CBOR_PARSER_ERROR("Expected string, got %u", ctx->type);
if (value_is_special)
CBOR_PARSER_ERROR("Variable length string not supported yet");
if (ccf.hostname)
CBOR_PARSER_ERROR("Duplicate argument 0 / hostname");
ASSERT_DIE(!ctx->target_buf);
ccf.hostname = ctx->target_buf = lp_alloc(ctx->lp, ctx->value + 1);
ctx->target_len = ctx->value;
break;
case 3: /* workdir */
if (ctx->type != 3)
CBOR_PARSER_ERROR("Expected string, got %u", ctx->type);
if (value_is_special)
CBOR_PARSER_ERROR("Variable length string not supported yet");
if (ccf.workdir)
CBOR_PARSER_ERROR("Duplicate argument 1 / workdir");
ASSERT_DIE(!ctx->target_buf);
ccf.workdir = ctx->target_buf = lp_alloc(ctx->lp, ctx->value + 1);
ctx->target_len = ctx->value;
break;
case 4: /* basedir */
if (ctx->type != 3)
CBOR_PARSER_ERROR("Expected string, got %u", ctx->type);
if (value_is_special)
CBOR_PARSER_ERROR("Variable length string not supported yet");
if (ccf.basedir)
CBOR_PARSER_ERROR("Duplicate argument 1 / basedir");
ASSERT_DIE(!ctx->target_buf);
ccf.basedir = ctx->target_buf = lp_alloc(ctx->lp, ctx->value + 1);
ctx->target_len = ctx->value;
break;
default:
bug("invalid parser state");
}
/* Some types are completely parsed, some not yet */
switch (ctx->type)
{
case 0:
case 1:
case 7:
exit_stack = !--ctx->stack_countdown[ctx->stack_pos];
ctx->partial_state = CPE_TYPE;
break;
case 2:
case 3:
ctx->partial_state = CPE_READ_BYTE;
ctx->partial_countdown = ctx->value;
ctx->target_buf = ctx->target_buf ?: lp_allocu(
ctx->lp, ctx->target_len = (ctx->target_len ?: ctx->value));
break;
case 4:
case 5:
if (++ctx->stack_pos >= LOCAL_STACK_MAX_DEPTH)
CBOR_PARSER_ERROR("Stack too deep");
/* set array/map size;
* once for arrays, twice for maps;
* ~0 for indefinite */
ctx->stack_countdown[ctx->stack_pos] = value_is_special ? ~0ULL :
(ctx->value * (ctx->type - 3));
ctx->partial_state = CPE_TYPE;
break;
}
break;
case CPE_READ_BYTE:
*ctx->target_buf = bp;
ctx->target_buf++;
if (--ctx->target_len)
break;
/* Read completely! */
switch (ctx->major_state)
{
case 2:
case 3:
case 4:
ctx->major_state = 1;
break;
default:
bug("Unexpected state to end a (byte)string in");
/* Code to run at the end of a (byte)string */
}
ctx->target_buf = NULL;
ctx->partial_state = CPE_TYPE;
exit_stack = !--ctx->stack_countdown[ctx->stack_pos];
}
/* End of array or map */
while (exit_stack)
{
switch (ctx->major_state)
{
/* Code to run at the end of the mapping */
case 0: /* toplevel item ended */
ctx->major_state = ~0ULL;
ctx->bytes_consumed = 0;
if (size > pos + 1)
hcf_parse(buf + pos + 1, size - pos - 1);
return;
case 1: /* the mapping ended */
if (!ccf.hostname)
CBOR_PARSER_ERROR("Missing hostname");
if (!ccf.workdir)
CBOR_PARSER_ERROR("Missing workdir");
if (!ccf.basedir)
CBOR_PARSER_ERROR("Missing basedir");
container_start();
ctx->major_state = 0;
break;
default:
bug("Unexpected state to end a mapping in");
}
/* Check exit from the next item */
ASSERT_DIE(ctx->stack_pos);
exit_stack = !--ctx->stack_countdown[--ctx->stack_pos];
}
}
ctx->bytes_consumed += size;
}
void
container_start(struct birdsock *s, struct flock_machine_container_config *cfg)
hypervisor_container_fork(void)
{
struct container_start_callback *cb = mb_alloc(s->pool, sizeof *cb);
*cb = (struct container_start_callback) {
.cb = callback_init(&cb->cb, container_start_callback, &main_birdloop),
.s = s,
.loop = s->loop,
.pool = s->pool,
.rx_hook = s->rx_hook,
.err_hook = s->err_hook,
.data = s->data,
.cfg = *cfg,
};
int fds[2], e;
sk_pause_rx(s->loop, s);
s->err_hook = container_start_sk_err;
s->data = cb;
/* create socketpair before forking to do communication */
e = socketpair(AF_UNIX, SOCK_STREAM, 0, fds);
if (e < 0)
die("Failed to create internal socketpair: %m");
callback_activate(&cb->cb);
e = fork();
if (e < 0)
die("Failed to fork exposed: %m");
if (e)
{
/* parent side */
hcf.loop = birdloop_new(&root_pool, DOMAIN_ORDER(proto), 0, "Container forker");
birdloop_enter(hcf.loop);
hcf.p = rp_new(birdloop_pool(hcf.loop), birdloop_domain(hcf.loop), "Container forker pool");
hcf.s = sk_new(hcf.p);
hcf.s->type = SK_MAGIC;
/* Set the hooks and fds according to the side we are at */
hcf.s->rx_hook = hypervisor_container_forker_rx;
hcf.s->err_hook = hypervisor_container_forker_err;
sk_set_tbsize(hcf.s, 16384);
hcf.s->fd = fds[0];
close(fds[1]);
if (sk_open(hcf.s, hcf.loop) < 0)
bug("Container forker parent: sk_open failed");
birdloop_leave(hcf.loop);
return;
}
/* noreturn child side */
close(fds[0]);
container_forker_fd = fds[1];
/* initialize the forker */
ctx->lp = lp_new(&root_pool);
ctx->type = 0xff;
ctx->stack_countdown[0] = 1;
while (true)
{
byte buf[4096];
ssize_t rx = read(fds[1], buf, sizeof buf);
if (rx == 0)
{
log(L_INFO "Container forker socket closed, exiting");
exit(0);
}
if (rx < 0)
bug("Container forker child: failed to read: %m");
hcf_parse(buf, rx);
}
}

View File

@ -386,7 +386,11 @@ hcs_parse(struct cbor_parser_context *ctx, const byte *buf, s64 size)
if (!ctx->cfg.container.basedir)
CBOR_PARSER_ERROR("Machine basedir not specified");
container_start(ctx->sock, &ctx->cfg.container);
hypervisor_container_request(
ctx->cfg.cf.name,
ctx->cfg.container.basedir,
ctx->cfg.container.workdir);
ctx->major_state = 1;
break;

View File

@ -285,6 +285,9 @@ main(int argc, char **argv, char **argh UNUSED)
/* And now finally we can go for unsharing the networks */
SYSCALL(unshare, CLONE_NEWNET);
/* Before resuming, we also need to fork the container forker */
hypervisor_container_fork();
/* Set signal handlers as this process is init in its PID namespace */
signal(SIGTERM, hypervisor_poweroff_sighandler);
signal(SIGINT, hypervisor_poweroff_sighandler);

View File

@ -8,6 +8,7 @@
#include "lib/resource.h"
#include "lib/socket.h"
void hypervisor_container_fork(void);
void hypervisor_exposed_fork(void);
void hypervisor_control_socket(void);
@ -42,7 +43,8 @@ union flock_machine_config {
} container;
};
void container_start(sock *, struct flock_machine_container_config *);
void hypervisor_container_request(const char *name, const char *basedir, const char *workdir);
extern event reboot_event, poweroff_event;
extern event_list shutdown_event_list;