Message ID | 20201118083253.4150-1-mariuszx.dudek@intel.com |
---|---|
Headers | show |
Series | libbpf: add support for privileged/unprivileged control separation | expand |
On Wed, Nov 18, 2020 at 9:34 AM <mariusz.dudek@gmail.com> wrote: > > From: Mariusz Dudek <mariuszx.dudek@intel.com> > > Add support for separation of eBPF program load and xsk socket > creation. > > This is needed for use-case when you want to privide as little > privileges as possible to the data plane application that will > handle xsk socket creation and incoming traffic. > > With this patch the data entity container can be run with only > CAP_NET_RAW capability to fulfill its purpose of creating xsk > socket and handling packages. In case your umem is larger or > equal process limit for MEMLOCK you need either increase the > limit or CAP_IPC_LOCK capability. > > To resolve privileges issue two APIs are introduced: > > - xsk_setup_xdp_prog - loads the built in XDP program. It can > also return xsks_map_fd which is needed by unprivileged process > to update xsks_map with AF_XDP socket "fd" > > - xsk_socket__update_xskmap - inserts an AF_XDP socket into an xskmap > for a particular xsk_socket > > Signed-off-by: Mariusz Dudek <mariuszx.dudek@intel.com> > --- > tools/lib/bpf/libbpf.map | 2 + > tools/lib/bpf/xsk.c | 97 ++++++++++++++++++++++++++++++++++++---- > tools/lib/bpf/xsk.h | 5 +++ > 3 files changed, 95 insertions(+), 9 deletions(-) > > diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map > index 29ff4807b909..d939d5ac092e 100644 > --- a/tools/lib/bpf/libbpf.map > +++ b/tools/lib/bpf/libbpf.map > @@ -345,4 +345,6 @@ LIBBPF_0.3.0 { > btf__parse_split; > btf__new_empty_split; > btf__new_split; > + xsk_setup_xdp_prog; > + xsk_socket__update_xskmap; > } LIBBPF_0.2.0; > diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c > index 9bc537d0b92d..e16f920d2ef9 100644 > --- a/tools/lib/bpf/xsk.c > +++ b/tools/lib/bpf/xsk.c > @@ -566,8 +566,42 @@ static int xsk_set_bpf_maps(struct xsk_socket *xsk) > &xsk->fd, 0); > } > > -static int xsk_setup_xdp_prog(struct xsk_socket *xsk) > +static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk) > { > + char ifname[IFNAMSIZ]; > + struct xsk_ctx *ctx; > + char *interface; > + int res = -1; No need to set it to -1 anymore, due to the below. > + > + ctx = calloc(1, sizeof(*ctx)); > + if (!ctx) > + goto error_ctx; return an -ENOMEM here directly. > + > + interface = if_indextoname(ifindex, &ifname[0]); > + if (!interface) { > + res = -errno; > + goto error_ifindex; > + } > + > + ctx->ifindex = ifindex; > + strncpy(ctx->ifname, ifname, IFNAMSIZ - 1); > + ctx->ifname[IFNAMSIZ - 1] = 0; > + > + xsk->ctx = ctx; > + > + return 0; > + > +error_ifindex: > + free(ctx); > +error_ctx: And you can get rid of this label. > + return res; > +} > + > +static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp, > + bool force_set_map, force_set_map always seems to be false now. Correct? If it is, then it is not needed anymore. What was the original use case of this boolean? > + int *xsks_map_fd) > +{ > + struct xsk_socket *xsk = _xdp; > struct xsk_ctx *ctx = xsk->ctx; > __u32 prog_id = 0; > int err; > @@ -584,8 +618,7 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk) > > err = xsk_load_xdp_prog(xsk); > if (err) { > - xsk_delete_bpf_maps(xsk); > - return err; > + goto err_load_xdp_prog; > } > } else { > ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id); > @@ -598,15 +631,29 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk) > } > } > > - if (xsk->rx) > + if (xsk->rx || force_set_map) { > err = xsk_set_bpf_maps(xsk); > - if (err) { > - xsk_delete_bpf_maps(xsk); > - close(ctx->prog_fd); > - return err; > + if (err) { > + if (!prog_id) { > + goto err_set_bpf_maps; > + } else { > + close(ctx->prog_fd); > + return err; > + } > + } > } > + if (xsks_map_fd) > + *xsks_map_fd = ctx->xsks_map_fd; > > return 0; > + > +err_set_bpf_maps: > + close(ctx->prog_fd); > + bpf_set_link_xdp_fd(ctx->ifindex, -1, 0); > +err_load_xdp_prog: > + xsk_delete_bpf_maps(xsk); > + > + return err; > } > > static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex, > @@ -689,6 +736,38 @@ static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk, > return ctx; > } > > +static void xsk_destroy_xsk_struct(struct xsk_socket *xsk) > +{ > + free(xsk->ctx); > + free(xsk); > +} > + > +int xsk_socket__update_xskmap(struct xsk_socket *xsk, int fd) > +{ > + xsk->ctx->xsks_map_fd = fd; > + return xsk_set_bpf_maps(xsk); > +} > + > +int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd) > +{ > + struct xsk_socket *xsk; > + int res = -1; > + > + xsk = calloc(1, sizeof(*xsk)); > + if (!xsk) > + return res; > + > + res = xsk_create_xsk_struct(ifindex, xsk); > + if (res) > + return -EINVAL; Here you can now return the error from the function, i.e. return res, as we returned -ENOMEM in that function. You are however leaking the xsk struct you just allocated in case of error. Needs to be deallocated. > + > + res = __xsk_setup_xdp_prog(xsk, false, xsks_map_fd); > + > + xsk_destroy_xsk_struct(xsk); > + > + return res; > +} > + > int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, > const char *ifname, > __u32 queue_id, struct xsk_umem *umem, > @@ -838,7 +917,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, > ctx->prog_fd = -1; > > if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { > - err = xsk_setup_xdp_prog(xsk); > + err = __xsk_setup_xdp_prog(xsk, false, NULL); > if (err) > goto out_mmap_tx; > } > diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h > index 1069c46364ff..5b74c17ed3d4 100644 > --- a/tools/lib/bpf/xsk.h > +++ b/tools/lib/bpf/xsk.h > @@ -201,6 +201,11 @@ struct xsk_umem_config { > __u32 flags; > }; > > +LIBBPF_API int xsk_setup_xdp_prog(int ifindex, > + int *xsks_map_fd); > +LIBBPF_API int xsk_socket__update_xskmap(struct xsk_socket *xsk, > + int xsks_map_fd); > + > /* Flags for the libbpf_flags field. */ > #define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0) > > -- > 2.20.1 >
On Wed, Nov 25, 2020 at 3:30 PM Magnus Karlsson <magnus.karlsson@gmail.com> wrote: > > On Wed, Nov 18, 2020 at 9:34 AM <mariusz.dudek@gmail.com> wrote: > > > > From: Mariusz Dudek <mariuszx.dudek@intel.com> > > > > Add support for separation of eBPF program load and xsk socket > > creation. > > > > This is needed for use-case when you want to privide as little > > privileges as possible to the data plane application that will > > handle xsk socket creation and incoming traffic. > > > > With this patch the data entity container can be run with only > > CAP_NET_RAW capability to fulfill its purpose of creating xsk > > socket and handling packages. In case your umem is larger or > > equal process limit for MEMLOCK you need either increase the > > limit or CAP_IPC_LOCK capability. > > > > To resolve privileges issue two APIs are introduced: > > > > - xsk_setup_xdp_prog - loads the built in XDP program. It can > > also return xsks_map_fd which is needed by unprivileged process > > to update xsks_map with AF_XDP socket "fd" > > > > - xsk_socket__update_xskmap - inserts an AF_XDP socket into an xskmap > > for a particular xsk_socket > > > > Signed-off-by: Mariusz Dudek <mariuszx.dudek@intel.com> > > --- > > tools/lib/bpf/libbpf.map | 2 + > > tools/lib/bpf/xsk.c | 97 ++++++++++++++++++++++++++++++++++++---- > > tools/lib/bpf/xsk.h | 5 +++ > > 3 files changed, 95 insertions(+), 9 deletions(-) > > > > diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map > > index 29ff4807b909..d939d5ac092e 100644 > > --- a/tools/lib/bpf/libbpf.map > > +++ b/tools/lib/bpf/libbpf.map > > @@ -345,4 +345,6 @@ LIBBPF_0.3.0 { > > btf__parse_split; > > btf__new_empty_split; > > btf__new_split; > > + xsk_setup_xdp_prog; > > + xsk_socket__update_xskmap; > > } LIBBPF_0.2.0; > > diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c > > index 9bc537d0b92d..e16f920d2ef9 100644 > > --- a/tools/lib/bpf/xsk.c > > +++ b/tools/lib/bpf/xsk.c > > @@ -566,8 +566,42 @@ static int xsk_set_bpf_maps(struct xsk_socket *xsk) > > &xsk->fd, 0); > > } > > > > -static int xsk_setup_xdp_prog(struct xsk_socket *xsk) > > +static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk) > > { > > + char ifname[IFNAMSIZ]; > > + struct xsk_ctx *ctx; > > + char *interface; > > + int res = -1; > > No need to set it to -1 anymore, due to the below. Will fix this > > > + > > + ctx = calloc(1, sizeof(*ctx)); > > + if (!ctx) > > + goto error_ctx; > > return an -ENOMEM here directly. -ENOMEM will be returned > > > + > > + interface = if_indextoname(ifindex, &ifname[0]); > > + if (!interface) { > > + res = -errno; > > + goto error_ifindex; > > + } > > + > > + ctx->ifindex = ifindex; > > + strncpy(ctx->ifname, ifname, IFNAMSIZ - 1); > > + ctx->ifname[IFNAMSIZ - 1] = 0; > > + > > + xsk->ctx = ctx; > > + > > + return 0; > > + > > +error_ifindex: > > + free(ctx); > > +error_ctx: > > And you can get rid of this label. I will get rid of both labels as I can return either -ENOMEM or -errno from both places directly > > > + return res; > > +} > > + > > +static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp, > > + bool force_set_map, > > force_set_map always seems to be false now. Correct? If it is, then it > is not needed anymore. What was the original use case of this boolean? > force_set_map was used before for setting xsk bpf maps, but after code change it is no longer needed. I will remove it. > > + int *xsks_map_fd) > > +{ > > + struct xsk_socket *xsk = _xdp; > > struct xsk_ctx *ctx = xsk->ctx; > > __u32 prog_id = 0; > > int err; > > @@ -584,8 +618,7 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk) > > > > err = xsk_load_xdp_prog(xsk); > > if (err) { > > - xsk_delete_bpf_maps(xsk); > > - return err; > > + goto err_load_xdp_prog; > > } > > } else { > > ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id); > > @@ -598,15 +631,29 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk) > > } > > } > > > > - if (xsk->rx) > > + if (xsk->rx || force_set_map) { > > err = xsk_set_bpf_maps(xsk); > > - if (err) { > > - xsk_delete_bpf_maps(xsk); > > - close(ctx->prog_fd); > > - return err; > > + if (err) { > > + if (!prog_id) { > > + goto err_set_bpf_maps; > > + } else { > > + close(ctx->prog_fd); > > + return err; > > + } > > + } > > } > > + if (xsks_map_fd) > > + *xsks_map_fd = ctx->xsks_map_fd; > > > > return 0; > > + > > +err_set_bpf_maps: > > + close(ctx->prog_fd); > > + bpf_set_link_xdp_fd(ctx->ifindex, -1, 0); > > +err_load_xdp_prog: > > + xsk_delete_bpf_maps(xsk); > > + > > + return err; > > } > > > > static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex, > > @@ -689,6 +736,38 @@ static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk, > > return ctx; > > } > > > > +static void xsk_destroy_xsk_struct(struct xsk_socket *xsk) > > +{ > > + free(xsk->ctx); > > + free(xsk); > > +} > > + > > +int xsk_socket__update_xskmap(struct xsk_socket *xsk, int fd) > > +{ > > + xsk->ctx->xsks_map_fd = fd; > > + return xsk_set_bpf_maps(xsk); > > +} > > + > > +int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd) > > +{ > > + struct xsk_socket *xsk; > > + int res = -1; > > + > > + xsk = calloc(1, sizeof(*xsk)); > > + if (!xsk) > > + return res; > > + > > + res = xsk_create_xsk_struct(ifindex, xsk); > > + if (res) > > + return -EINVAL; > > Here you can now return the error from the function, i.e. return res, > as we returned -ENOMEM in that function. You are however leaking the > xsk struct you just allocated in case of error. Needs to be > deallocated. > xsk struct deallocated. -ENOMEM returned in case calloc fails. > > + > > + res = __xsk_setup_xdp_prog(xsk, false, xsks_map_fd); > > + > > + xsk_destroy_xsk_struct(xsk); > > + > > + return res; > > +} > > + > > int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, > > const char *ifname, > > __u32 queue_id, struct xsk_umem *umem, > > @@ -838,7 +917,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, > > ctx->prog_fd = -1; > > > > if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { > > - err = xsk_setup_xdp_prog(xsk); > > + err = __xsk_setup_xdp_prog(xsk, false, NULL); > > if (err) > > goto out_mmap_tx; > > } > > diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h > > index 1069c46364ff..5b74c17ed3d4 100644 > > --- a/tools/lib/bpf/xsk.h > > +++ b/tools/lib/bpf/xsk.h > > @@ -201,6 +201,11 @@ struct xsk_umem_config { > > __u32 flags; > > }; > > > > +LIBBPF_API int xsk_setup_xdp_prog(int ifindex, > > + int *xsks_map_fd); > > +LIBBPF_API int xsk_socket__update_xskmap(struct xsk_socket *xsk, > > + int xsks_map_fd); > > + > > /* Flags for the libbpf_flags field. */ > > #define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0) > > > > -- > > 2.20.1 > >
From: Mariusz Dudek <mariuszx.dudek@intel.com> This patch series adds support for separation of eBPF program load and xsk socket creation. In for example a Kubernetes environment you can have an AF_XDP CNI or daemonset that is responsible for launching pods that execute an application using AF_XDP sockets. It is desirable that the pod runs with as low privileges as possible, CAP_NET_RAW in this case, and that all operations that require privileges are contained in the CNI or daemonset. In this case, you have to be able separate ePBF program load from xsk socket creation. Currently, this will not work with the xsk_socket__create APIs because you need to have CAP_NET_ADMIN privileges to load eBPF program and CAP_SYS_ADMIN privileges to create update xsk_bpf_maps. To be exact xsk_set_bpf_maps does not need those privileges but it takes the prog_fd and xsks_map_fd and those are known only to process that was loading eBPF program. The api bpf_prog_get_fd_by_id that looks up the fd of the prog using an prog_id and bpf_map_get_fd_by_id that looks for xsks_map_fd usinb map_id both requires CAP_SYS_ADMIN. With this patch, the pod can be run with CAP_NET_RAW capability only. In case your umem is larger or equal process limit for MEMLOCK you need either increase the limit or CAP_IPC_LOCK capability. Without this patch in case of insufficient rights ENOPERM is returned by xsk_socket__create. To resolve this privileges issue two new APIs are introduced: - xsk_setup_xdp_prog - loads the built in XDP program. It can also return xsks_map_fd which is needed by unprivileged process to update xsks_map with AF_XDP socket "fd" - xsk_sokcet__update_xskmap - inserts an AF_XDP socket into an xskmap for a particular xsk_socket Usage example: int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd) int xsk_socket__update_xskmap(struct xsk_socket *xsk, int xsks_map_fd); Inserts AF_XDP socket "fd" into the xskmap. The first patch introduces the new APIs. The second patch provides a new sample applications working as control and modification to existing xdpsock application to work with less privileges. This patch set is based on bpf-next commit ea87ae85c9b3 ("bpf: Add tests for bpf_bprm_opts_set helper") Since v2: - new APIs moved itto LIBBPF_0.3.0 section - struct bpf_prog_cfg_opts removed - loading own eBPF program via xsk_setup_xdp_prog functionality removed Since v1: - struct bpf_prog_cfg improved for backward/forward compatibility - API xsk_update_xskmap renamed to xsk_socket__update_xskmap - commit message formatting fixed Mariusz Dudek (2): libbpf: separate XDP program load with xsk socket creation samples/bpf: sample application for eBPF load and socket creation split samples/bpf/Makefile | 4 +- samples/bpf/xdpsock.h | 8 ++ samples/bpf/xdpsock_ctrl_proc.c | 184 ++++++++++++++++++++++++++++++++ samples/bpf/xdpsock_user.c | 146 +++++++++++++++++++++++-- tools/lib/bpf/libbpf.map | 2 + tools/lib/bpf/xsk.c | 97 +++++++++++++++-- tools/lib/bpf/xsk.h | 5 + 7 files changed, 427 insertions(+), 19 deletions(-) create mode 100644 samples/bpf/xdpsock_ctrl_proc.c