diff mbox series

[v3,bpf-next,1/2] bpf: af_unix: Implement BPF iterator for UNIX domain socket.

Message ID 20210804070851.97834-2-kuniyu@amazon.co.jp
State New
Headers show
Series BPF iterator for UNIX domain socket. | expand

Commit Message

Kuniyuki Iwashima Aug. 4, 2021, 7:08 a.m. UTC
This patch implements the BPF iterator for the UNIX domain socket and
exports some functions under GPL for the CONFIG_UNIX=m case.

Currently, the batch optimization introduced for the TCP iterator in the
commit 04c7820b776f ("bpf: tcp: Bpf iter batching and lock_sock") is not
applied.  It will require replacing the big lock for the hash table with
small locks for each hash list not to block other processes.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
---
 fs/proc/proc_net.c      |  2 +
 include/linux/btf_ids.h |  3 +-
 kernel/bpf/bpf_iter.c   |  3 ++
 net/core/filter.c       |  1 +
 net/unix/af_unix.c      | 93 +++++++++++++++++++++++++++++++++++++++++
 5 files changed, 101 insertions(+), 1 deletion(-)

Comments

Martin KaFai Lau Aug. 6, 2021, 12:41 a.m. UTC | #1
On Wed, Aug 04, 2021 at 04:08:50PM +0900, Kuniyuki Iwashima wrote:
> Currently, the batch optimization introduced for the TCP iterator in the

> commit 04c7820b776f ("bpf: tcp: Bpf iter batching and lock_sock") is not

> applied.  It will require replacing the big lock for the hash table with

may be s/applied/used/.  I thought it meant the commit is not landed.

> small locks for each hash list not to block other processes.

Right, I don't think it can be directly reused.  Not necessary
related to the big lock though.  Actually, a big lock will still
work for batching but just less ideal.

Batching is needed for supporting bpf_setsockopt.  It can be added later
together with the bpf_setsockopt support.
Kuniyuki Iwashima Aug. 6, 2021, 1:05 a.m. UTC | #2
From:   Martin KaFai Lau <kafai@fb.com>

Date:   Thu, 5 Aug 2021 17:41:14 -0700
> On Wed, Aug 04, 2021 at 04:08:50PM +0900, Kuniyuki Iwashima wrote:

> > Currently, the batch optimization introduced for the TCP iterator in the

> > commit 04c7820b776f ("bpf: tcp: Bpf iter batching and lock_sock") is not

> > applied.  It will require replacing the big lock for the hash table with

> may be s/applied/used/.  I thought it meant the commit is not landed.


Ah sorry, I meant the same optimisation logic is not used for now.
I'll change the word.


> > small locks for each hash list not to block other processes.

> Right, I don't think it can be directly reused.  Not necessary

> related to the big lock though.  Actually, a big lock will still

> work for batching but just less ideal.


Yes, batching can be done with a big lock.


> Batching is needed for supporting bpf_setsockopt.  It can be added later

> together with the bpf_setsockopt support.


I'm trying to replace the big lock, so I'll submit another set for batching
and bpf_setsockopt support.

Thank you!
diff mbox series

Patch

diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 15c2e55d2ed2..887a8102da9f 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -91,6 +91,7 @@  int bpf_iter_init_seq_net(void *priv_data, struct bpf_iter_aux_info *aux)
 #endif
 	return 0;
 }
+EXPORT_SYMBOL_GPL(bpf_iter_init_seq_net);
 
 void bpf_iter_fini_seq_net(void *priv_data)
 {
@@ -100,6 +101,7 @@  void bpf_iter_fini_seq_net(void *priv_data)
 	put_net(p->net);
 #endif
 }
+EXPORT_SYMBOL_GPL(bpf_iter_fini_seq_net);
 
 struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode,
 		struct proc_dir_entry *parent, const struct seq_operations *ops,
diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h
index 57890b357f85..bed4b9964581 100644
--- a/include/linux/btf_ids.h
+++ b/include/linux/btf_ids.h
@@ -172,7 +172,8 @@  extern struct btf_id_set name;
 	BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, tcp_timewait_sock)		\
 	BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock)			\
 	BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock)			\
-	BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock)
+	BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock)			\
+	BTF_SOCK_TYPE(BTF_SOCK_TYPE_UNIX, unix_sock)
 
 enum {
 #define BTF_SOCK_TYPE(name, str) name,
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 2e9d47bb40ff..cb77dcb7a7dc 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -300,6 +300,7 @@  int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(bpf_iter_reg_target);
 
 void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info)
 {
@@ -679,6 +680,7 @@  struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop)
 
 	return iter_priv->prog;
 }
+EXPORT_SYMBOL_GPL(bpf_iter_get_info);
 
 int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
 {
@@ -698,6 +700,7 @@  int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
 	 */
 	return ret == 0 ? 0 : -EAGAIN;
 }
+EXPORT_SYMBOL_GPL(bpf_iter_run_prog);
 
 BPF_CALL_4(bpf_for_each_map_elem, struct bpf_map *, map, void *, callback_fn,
 	   void *, callback_ctx, u64, flags)
diff --git a/net/core/filter.c b/net/core/filter.c
index faf29fd82276..640734e8d61b 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -10549,6 +10549,7 @@  BTF_SOCK_TYPE_xxx
 #else
 u32 btf_sock_ids[MAX_BTF_SOCK_TYPE];
 #endif
+EXPORT_SYMBOL_GPL(btf_sock_ids);
 
 BPF_CALL_1(bpf_skc_to_tcp6_sock, struct sock *, sk)
 {
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 256c4e31132e..675ed1f3107e 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -113,6 +113,7 @@ 
 #include <linux/security.h>
 #include <linux/freezer.h>
 #include <linux/file.h>
+#include <linux/btf_ids.h>
 
 #include "scm.h"
 
@@ -2982,6 +2983,64 @@  static const struct seq_operations unix_seq_ops = {
 	.stop   = unix_seq_stop,
 	.show   = unix_seq_show,
 };
+
+#ifdef CONFIG_BPF_SYSCALL
+struct bpf_iter__unix {
+	__bpf_md_ptr(struct bpf_iter_meta *, meta);
+	__bpf_md_ptr(struct unix_sock *, unix_sk);
+	uid_t uid __aligned(8);
+};
+
+static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
+			      struct unix_sock *unix_sk, uid_t uid)
+{
+	struct bpf_iter__unix ctx;
+
+	meta->seq_num--;  /* skip SEQ_START_TOKEN */
+	ctx.meta = meta;
+	ctx.unix_sk = unix_sk;
+	ctx.uid = uid;
+	return bpf_iter_run_prog(prog, &ctx);
+}
+
+static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
+{
+	struct bpf_iter_meta meta;
+	struct bpf_prog *prog;
+	struct sock *sk = v;
+	uid_t uid;
+
+	if (v == SEQ_START_TOKEN)
+		return 0;
+
+	uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
+	meta.seq = seq;
+	prog = bpf_iter_get_info(&meta, false);
+	return unix_prog_seq_show(prog, &meta, v, uid);
+}
+
+static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
+{
+	struct bpf_iter_meta meta;
+	struct bpf_prog *prog;
+
+	if (!v) {
+		meta.seq = seq;
+		prog = bpf_iter_get_info(&meta, true);
+		if (prog)
+			(void)unix_prog_seq_show(prog, &meta, v, 0);
+	}
+
+	unix_seq_stop(seq, v);
+}
+
+static const struct seq_operations bpf_iter_unix_seq_ops = {
+	.start	= unix_seq_start,
+	.next	= unix_seq_next,
+	.stop	= bpf_iter_unix_seq_stop,
+	.show	= bpf_iter_unix_seq_show,
+};
+#endif
 #endif
 
 static const struct net_proto_family unix_family_ops = {
@@ -3022,6 +3081,35 @@  static struct pernet_operations unix_net_ops = {
 	.exit = unix_net_exit,
 };
 
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
+		     struct unix_sock *unix_sk, uid_t uid)
+
+static const struct bpf_iter_seq_info unix_seq_info = {
+	.seq_ops		= &bpf_iter_unix_seq_ops,
+	.init_seq_private	= bpf_iter_init_seq_net,
+	.fini_seq_private	= bpf_iter_fini_seq_net,
+	.seq_priv_size		= sizeof(struct seq_net_private),
+};
+
+static struct bpf_iter_reg unix_reg_info = {
+	.target			= "unix",
+	.ctx_arg_info_size	= 1,
+	.ctx_arg_info		= {
+		{ offsetof(struct bpf_iter__unix, unix_sk),
+		  PTR_TO_BTF_ID_OR_NULL },
+	},
+	.seq_info		= &unix_seq_info,
+};
+
+static void __init bpf_iter_register(void)
+{
+	unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX];
+	if (bpf_iter_reg_target(&unix_reg_info))
+		pr_warn("Warning: could not register bpf iterator unix\n");
+}
+#endif
+
 static int __init af_unix_init(void)
 {
 	int rc = -1;
@@ -3037,6 +3125,11 @@  static int __init af_unix_init(void)
 	sock_register(&unix_family_ops);
 	register_pernet_subsys(&unix_net_ops);
 	unix_bpf_build_proto();
+
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+	bpf_iter_register();
+#endif
+
 out:
 	return rc;
 }