diff mbox series

[bpf,v2,v2] bpf: handle return value of BPF_PROG_TYPE_STRUCT_OPS prog

Message ID 20210914023351.3664499-1-houtao1@huawei.com
State New
Headers show
Series [bpf,v2,v2] bpf: handle return value of BPF_PROG_TYPE_STRUCT_OPS prog | expand

Commit Message

Hou Tao Sept. 14, 2021, 2:33 a.m. UTC
Currently if a function ptr in struct_ops has a return value, its
caller will get a random return value from it, because the return
value of related BPF_PROG_TYPE_STRUCT_OPS prog is just dropped.

So adding a new flag BPF_TRAMP_F_RET_FENTRY_RET to tell bpf trampoline
to save and return the return value of struct_ops prog if ret_size of
the function ptr is greater than 0. Also restricting the flag to be
used alone.

Fixes: 85d33df357b6 ("bpf: Introduce BPF_MAP_TYPE_STRUCT_OPS")
Signed-off-by: Hou Tao <houtao1@huawei.com>
---
 arch/x86/net/bpf_jit_comp.c | 53 ++++++++++++++++++++++++++++---------
 include/linux/bpf.h         |  3 ++-
 kernel/bpf/bpf_struct_ops.c |  7 +++--
 3 files changed, 47 insertions(+), 16 deletions(-)

Comments

Hou Tao Sept. 14, 2021, 2:45 a.m. UTC | #1
Hi,

On 9/14/2021 10:33 AM, Hou Tao wrote:
> Currently if a function ptr in struct_ops has a return value, its
> caller will get a random return value from it, because the return
> value of related BPF_PROG_TYPE_STRUCT_OPS prog is just dropped.
>
> So adding a new flag BPF_TRAMP_F_RET_FENTRY_RET to tell bpf trampoline
> to save and return the return value of struct_ops prog if ret_size of
> the function ptr is greater than 0. Also restricting the flag to be
> used alone.
>
> Fixes: 85d33df357b6 ("bpf: Introduce BPF_MAP_TYPE_STRUCT_OPS")
> Signed-off-by: Hou Tao <houtao1@huawei.com>
> ---
Forget to add the changelog:

v2: rebased on bpf tree
>  arch/x86/net/bpf_jit_comp.c | 53 ++++++++++++++++++++++++++++---------
>  include/linux/bpf.h         |  3 ++-
>  kernel/bpf/bpf_struct_ops.c |  7 +++--
>  3 files changed, 47 insertions(+), 16 deletions(-)
>
> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> index 0fe6aacef3db..d24a512fd6f3 100644
> --- a/arch/x86/net/bpf_jit_comp.c
> +++ b/arch/x86/net/bpf_jit_comp.c
> @@ -1744,7 +1744,7 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args,
>  }
>  
>  static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
> -			   struct bpf_prog *p, int stack_size, bool mod_ret)
> +			   struct bpf_prog *p, int stack_size, bool save_ret)
>  {
>  	u8 *prog = *pprog;
>  	u8 *jmp_insn;
> @@ -1777,11 +1777,15 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
>  	if (emit_call(&prog, p->bpf_func, prog))
>  		return -EINVAL;
>  
> -	/* BPF_TRAMP_MODIFY_RETURN trampolines can modify the return
> +	/*
> +	 * BPF_TRAMP_MODIFY_RETURN trampolines can modify the return
>  	 * of the previous call which is then passed on the stack to
>  	 * the next BPF program.
> +	 *
> +	 * BPF_TRAMP_FENTRY trampoline may need to return the return
> +	 * value of BPF_PROG_TYPE_STRUCT_OPS prog.
>  	 */
> -	if (mod_ret)
> +	if (save_ret)
>  		emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
>  
>  	/* replace 2 nops with JE insn, since jmp target is known */
> @@ -1828,13 +1832,15 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
>  }
>  
>  static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
> -		      struct bpf_tramp_progs *tp, int stack_size)
> +		      struct bpf_tramp_progs *tp, int stack_size,
> +		      bool save_ret)
>  {
>  	int i;
>  	u8 *prog = *pprog;
>  
>  	for (i = 0; i < tp->nr_progs; i++) {
> -		if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, false))
> +		if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size,
> +				    save_ret))
>  			return -EINVAL;
>  	}
>  	*pprog = prog;
> @@ -1877,6 +1883,23 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
>  	return 0;
>  }
>  
> +static bool is_valid_bpf_tramp_flags(unsigned int flags)
> +{
> +	if ((flags & BPF_TRAMP_F_RESTORE_REGS) &&
> +	    (flags & BPF_TRAMP_F_SKIP_FRAME))
> +		return false;
> +
> +	/*
> +	 * BPF_TRAMP_F_RET_FENTRY_RET is only used by bpf_struct_ops,
> +	 * and it must be used alone.
> +	 */
> +	if ((flags & BPF_TRAMP_F_RET_FENTRY_RET) &&
> +	    (flags & ~BPF_TRAMP_F_RET_FENTRY_RET))
> +		return false;
> +
> +	return true;
> +}
> +
>  /* Example:
>   * __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
>   * its 'struct btf_func_model' will be nr_args=2
> @@ -1949,17 +1972,19 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
>  	struct bpf_tramp_progs *fmod_ret = &tprogs[BPF_TRAMP_MODIFY_RETURN];
>  	u8 **branches = NULL;
>  	u8 *prog;
> +	bool save_ret;
>  
>  	/* x86-64 supports up to 6 arguments. 7+ can be added in the future */
>  	if (nr_args > 6)
>  		return -ENOTSUPP;
>  
> -	if ((flags & BPF_TRAMP_F_RESTORE_REGS) &&
> -	    (flags & BPF_TRAMP_F_SKIP_FRAME))
> +	if (!is_valid_bpf_tramp_flags(flags))
>  		return -EINVAL;
>  
> -	if (flags & BPF_TRAMP_F_CALL_ORIG)
> -		stack_size += 8; /* room for return value of orig_call */
> +	/* room for return value of orig_call or fentry prog */
> +	save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET);
> +	if (save_ret)
> +		stack_size += 8;
>  
>  	if (flags & BPF_TRAMP_F_IP_ARG)
>  		stack_size += 8; /* room for IP address argument */
> @@ -2005,7 +2030,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
>  	}
>  
>  	if (fentry->nr_progs)
> -		if (invoke_bpf(m, &prog, fentry, stack_size))
> +		if (invoke_bpf(m, &prog, fentry, stack_size,
> +			       flags & BPF_TRAMP_F_RET_FENTRY_RET))
>  			return -EINVAL;
>  
>  	if (fmod_ret->nr_progs) {
> @@ -2052,7 +2078,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
>  	}
>  
>  	if (fexit->nr_progs)
> -		if (invoke_bpf(m, &prog, fexit, stack_size)) {
> +		if (invoke_bpf(m, &prog, fexit, stack_size, false)) {
>  			ret = -EINVAL;
>  			goto cleanup;
>  		}
> @@ -2072,9 +2098,10 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
>  			ret = -EINVAL;
>  			goto cleanup;
>  		}
> -		/* restore original return value back into RAX */
> -		emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
>  	}
> +	/* restore return value of orig_call or fentry prog back into RAX */
> +	if (save_ret)
> +		emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
>  
>  	EMIT1(0x5B); /* pop rbx */
>  	EMIT1(0xC9); /* leave */
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index f4c16f19f83e..020a7d5bf470 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -578,11 +578,12 @@ struct btf_func_model {
>   * programs only. Should not be used with normal calls and indirect calls.
>   */
>  #define BPF_TRAMP_F_SKIP_FRAME		BIT(2)
> -
>  /* Store IP address of the caller on the trampoline stack,
>   * so it's available for trampoline's programs.
>   */
>  #define BPF_TRAMP_F_IP_ARG		BIT(3)
> +/* Return the return value of fentry prog. Only used by bpf_struct_ops. */
> +#define BPF_TRAMP_F_RET_FENTRY_RET	BIT(4)
>  
>  /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
>   * bytes on x86.  Pick a number to fit into BPF_IMAGE_SIZE / 2
> diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
> index d6731c32864e..9abcc33f02cf 100644
> --- a/kernel/bpf/bpf_struct_ops.c
> +++ b/kernel/bpf/bpf_struct_ops.c
> @@ -368,6 +368,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>  		const struct btf_type *mtype, *ptype;
>  		struct bpf_prog *prog;
>  		u32 moff;
> +		u32 flags;
>  
>  		moff = btf_member_bit_offset(t, member) / 8;
>  		ptype = btf_type_resolve_ptr(btf_vmlinux, member->type, NULL);
> @@ -431,10 +432,12 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>  
>  		tprogs[BPF_TRAMP_FENTRY].progs[0] = prog;
>  		tprogs[BPF_TRAMP_FENTRY].nr_progs = 1;
> +		flags = st_ops->func_models[i].ret_size > 0 ?
> +			BPF_TRAMP_F_RET_FENTRY_RET : 0;
>  		err = arch_prepare_bpf_trampoline(NULL, image,
>  						  st_map->image + PAGE_SIZE,
> -						  &st_ops->func_models[i], 0,
> -						  tprogs, NULL);
> +						  &st_ops->func_models[i],
> +						  flags, tprogs, NULL);
>  		if (err < 0)
>  			goto reset_unlock;
>
diff mbox series

Patch

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 0fe6aacef3db..d24a512fd6f3 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1744,7 +1744,7 @@  static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args,
 }
 
 static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
-			   struct bpf_prog *p, int stack_size, bool mod_ret)
+			   struct bpf_prog *p, int stack_size, bool save_ret)
 {
 	u8 *prog = *pprog;
 	u8 *jmp_insn;
@@ -1777,11 +1777,15 @@  static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
 	if (emit_call(&prog, p->bpf_func, prog))
 		return -EINVAL;
 
-	/* BPF_TRAMP_MODIFY_RETURN trampolines can modify the return
+	/*
+	 * BPF_TRAMP_MODIFY_RETURN trampolines can modify the return
 	 * of the previous call which is then passed on the stack to
 	 * the next BPF program.
+	 *
+	 * BPF_TRAMP_FENTRY trampoline may need to return the return
+	 * value of BPF_PROG_TYPE_STRUCT_OPS prog.
 	 */
-	if (mod_ret)
+	if (save_ret)
 		emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
 
 	/* replace 2 nops with JE insn, since jmp target is known */
@@ -1828,13 +1832,15 @@  static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
 }
 
 static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
-		      struct bpf_tramp_progs *tp, int stack_size)
+		      struct bpf_tramp_progs *tp, int stack_size,
+		      bool save_ret)
 {
 	int i;
 	u8 *prog = *pprog;
 
 	for (i = 0; i < tp->nr_progs; i++) {
-		if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, false))
+		if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size,
+				    save_ret))
 			return -EINVAL;
 	}
 	*pprog = prog;
@@ -1877,6 +1883,23 @@  static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
 	return 0;
 }
 
+static bool is_valid_bpf_tramp_flags(unsigned int flags)
+{
+	if ((flags & BPF_TRAMP_F_RESTORE_REGS) &&
+	    (flags & BPF_TRAMP_F_SKIP_FRAME))
+		return false;
+
+	/*
+	 * BPF_TRAMP_F_RET_FENTRY_RET is only used by bpf_struct_ops,
+	 * and it must be used alone.
+	 */
+	if ((flags & BPF_TRAMP_F_RET_FENTRY_RET) &&
+	    (flags & ~BPF_TRAMP_F_RET_FENTRY_RET))
+		return false;
+
+	return true;
+}
+
 /* Example:
  * __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
  * its 'struct btf_func_model' will be nr_args=2
@@ -1949,17 +1972,19 @@  int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
 	struct bpf_tramp_progs *fmod_ret = &tprogs[BPF_TRAMP_MODIFY_RETURN];
 	u8 **branches = NULL;
 	u8 *prog;
+	bool save_ret;
 
 	/* x86-64 supports up to 6 arguments. 7+ can be added in the future */
 	if (nr_args > 6)
 		return -ENOTSUPP;
 
-	if ((flags & BPF_TRAMP_F_RESTORE_REGS) &&
-	    (flags & BPF_TRAMP_F_SKIP_FRAME))
+	if (!is_valid_bpf_tramp_flags(flags))
 		return -EINVAL;
 
-	if (flags & BPF_TRAMP_F_CALL_ORIG)
-		stack_size += 8; /* room for return value of orig_call */
+	/* room for return value of orig_call or fentry prog */
+	save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET);
+	if (save_ret)
+		stack_size += 8;
 
 	if (flags & BPF_TRAMP_F_IP_ARG)
 		stack_size += 8; /* room for IP address argument */
@@ -2005,7 +2030,8 @@  int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
 	}
 
 	if (fentry->nr_progs)
-		if (invoke_bpf(m, &prog, fentry, stack_size))
+		if (invoke_bpf(m, &prog, fentry, stack_size,
+			       flags & BPF_TRAMP_F_RET_FENTRY_RET))
 			return -EINVAL;
 
 	if (fmod_ret->nr_progs) {
@@ -2052,7 +2078,7 @@  int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
 	}
 
 	if (fexit->nr_progs)
-		if (invoke_bpf(m, &prog, fexit, stack_size)) {
+		if (invoke_bpf(m, &prog, fexit, stack_size, false)) {
 			ret = -EINVAL;
 			goto cleanup;
 		}
@@ -2072,9 +2098,10 @@  int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
 			ret = -EINVAL;
 			goto cleanup;
 		}
-		/* restore original return value back into RAX */
-		emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
 	}
+	/* restore return value of orig_call or fentry prog back into RAX */
+	if (save_ret)
+		emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
 
 	EMIT1(0x5B); /* pop rbx */
 	EMIT1(0xC9); /* leave */
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f4c16f19f83e..020a7d5bf470 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -578,11 +578,12 @@  struct btf_func_model {
  * programs only. Should not be used with normal calls and indirect calls.
  */
 #define BPF_TRAMP_F_SKIP_FRAME		BIT(2)
-
 /* Store IP address of the caller on the trampoline stack,
  * so it's available for trampoline's programs.
  */
 #define BPF_TRAMP_F_IP_ARG		BIT(3)
+/* Return the return value of fentry prog. Only used by bpf_struct_ops. */
+#define BPF_TRAMP_F_RET_FENTRY_RET	BIT(4)
 
 /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
  * bytes on x86.  Pick a number to fit into BPF_IMAGE_SIZE / 2
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index d6731c32864e..9abcc33f02cf 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -368,6 +368,7 @@  static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
 		const struct btf_type *mtype, *ptype;
 		struct bpf_prog *prog;
 		u32 moff;
+		u32 flags;
 
 		moff = btf_member_bit_offset(t, member) / 8;
 		ptype = btf_type_resolve_ptr(btf_vmlinux, member->type, NULL);
@@ -431,10 +432,12 @@  static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
 
 		tprogs[BPF_TRAMP_FENTRY].progs[0] = prog;
 		tprogs[BPF_TRAMP_FENTRY].nr_progs = 1;
+		flags = st_ops->func_models[i].ret_size > 0 ?
+			BPF_TRAMP_F_RET_FENTRY_RET : 0;
 		err = arch_prepare_bpf_trampoline(NULL, image,
 						  st_map->image + PAGE_SIZE,
-						  &st_ops->func_models[i], 0,
-						  tprogs, NULL);
+						  &st_ops->func_models[i],
+						  flags, tprogs, NULL);
 		if (err < 0)
 			goto reset_unlock;