@@ -1276,16 +1276,23 @@ static bool __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp,
static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen)
{
- if (unlikely(max_optlen > PAGE_SIZE) || max_optlen < 0)
+ if (unlikely(max_optlen < 0))
return -EINVAL;
+ if (unlikely(max_optlen > PAGE_SIZE)) {
+ /* We don't expose optvals that are greater than PAGE_SIZE
+ * to the BPF program.
+ */
+ max_optlen = PAGE_SIZE;
+ }
+
ctx->optval = kzalloc(max_optlen, GFP_USER);
if (!ctx->optval)
return -ENOMEM;
ctx->optval_end = ctx->optval + max_optlen;
- return 0;
+ return max_optlen;
}
static void sockopt_free_buf(struct bpf_sockopt_kern *ctx)
@@ -1319,13 +1326,13 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
*/
max_optlen = max_t(int, 16, *optlen);
- ret = sockopt_alloc_buf(&ctx, max_optlen);
- if (ret)
- return ret;
+ max_optlen = sockopt_alloc_buf(&ctx, max_optlen);
+ if (max_optlen < 0)
+ return max_optlen;
ctx.optlen = *optlen;
- if (copy_from_user(ctx.optval, optval, *optlen) != 0) {
+ if (copy_from_user(ctx.optval, optval, min(*optlen, max_optlen)) != 0) {
ret = -EFAULT;
goto out;
}
@@ -1353,8 +1360,14 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
/* export any potential modifications */
*level = ctx.level;
*optname = ctx.optname;
- *optlen = ctx.optlen;
- *kernel_optval = ctx.optval;
+
+ /* optlen == 0 from BPF indicates that we should
+ * use original userspace data.
+ */
+ if (ctx.optlen != 0) {
+ *optlen = ctx.optlen;
+ *kernel_optval = ctx.optval;
+ }
}
out:
@@ -1385,12 +1398,12 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
__cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_GETSOCKOPT))
return retval;
- ret = sockopt_alloc_buf(&ctx, max_optlen);
- if (ret)
- return ret;
-
ctx.optlen = max_optlen;
+ max_optlen = sockopt_alloc_buf(&ctx, max_optlen);
+ if (max_optlen < 0)
+ return max_optlen;
+
if (!retval) {
/* If kernel getsockopt finished successfully,
* copy whatever was returned to the user back
@@ -1404,10 +1417,8 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
goto out;
}
- if (ctx.optlen > max_optlen)
- ctx.optlen = max_optlen;
-
- if (copy_from_user(ctx.optval, optval, ctx.optlen) != 0) {
+ if (copy_from_user(ctx.optval, optval,
+ min(ctx.optlen, max_optlen)) != 0) {
ret = -EFAULT;
goto out;
}
@@ -1436,10 +1447,12 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
goto out;
}
- if (copy_to_user(optval, ctx.optval, ctx.optlen) ||
- put_user(ctx.optlen, optlen)) {
- ret = -EFAULT;
- goto out;
+ if (ctx.optlen != 0) {
+ if (copy_to_user(optval, ctx.optval, ctx.optlen) ||
+ put_user(ctx.optlen, optlen)) {
+ ret = -EFAULT;
+ goto out;
+ }
}
ret = ctx.retval;
Attaching to these hooks can break iptables because its optval is usually quite big, or at least bigger than the current PAGE_SIZE limit. David also mentioned some SCTP options can be big (around 256k). For such optvals we expose only the first PAGE_SIZE bytes to the BPF program. BPF program has two options: 1. Set ctx->optlen to 0 to indicate that the BPF's optval should be ignored and the kernel should use original userspace value. 2. Set ctx->optlen to something that's smaller than the PAGE_SIZE. v5: * use ctx->optlen == 0 with trimmed buffer (Alexei Starovoitov) * update the docs accordingly v4: * use temporary buffer to avoid optval == optval_end == NULL; this removes the corner case in the verifier that might assume non-zero PTR_TO_PACKET/PTR_TO_PACKET_END. v3: * don't increase the limit, bypass the argument v2: * proper comments formatting (Jakub Kicinski) Fixes: 0d01da6afc54 ("bpf: implement getsockopt and setsockopt hooks") Cc: David Laight <David.Laight@ACULAB.COM> Signed-off-by: Stanislav Fomichev <sdf@google.com> --- kernel/bpf/cgroup.c | 53 ++++++++++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 20 deletions(-)