@@ -1021,6 +1021,8 @@ static inline void bpf_enable_instrumentation(void)
extern const struct file_operations bpf_map_fops;
extern const struct file_operations bpf_prog_fops;
+extern const struct file_operations bpf_link_fops;
+extern const struct file_operations bpffs_iter_fops;
#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
extern const struct bpf_prog_ops _name ## _prog_ops; \
@@ -1136,6 +1138,7 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
int bpf_iter_link_replace(struct bpf_link *link, struct bpf_prog *old_prog,
struct bpf_prog *new_prog);
int bpf_iter_new_fd(struct bpf_link *link);
+void *bpf_iter_get_from_fd(u32 ufd);
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
@@ -123,7 +123,8 @@ struct bpf_prog *bpf_iter_get_prog(struct seq_file *seq, u32 priv_data_size,
{
struct extra_priv_data *extra_data;
- if (seq->file->f_op != &anon_bpf_iter_fops)
+ if (seq->file->f_op != &anon_bpf_iter_fops &&
+ seq->file->f_op != &bpffs_iter_fops)
return NULL;
extra_data = get_extra_priv_dptr(seq->private, priv_data_size);
@@ -310,3 +311,48 @@ int bpf_iter_new_fd(struct bpf_link *link)
put_unused_fd(fd);
return err;
}
+
+static int bpffs_iter_open(struct inode *inode, struct file *file)
+{
+ struct bpf_iter_link *link = inode->i_private;
+
+ return prepare_seq_file(file, link);
+}
+
+static int bpffs_iter_release(struct inode *inode, struct file *file)
+{
+ return anon_iter_release(inode, file);
+}
+
+const struct file_operations bpffs_iter_fops = {
+ .open = bpffs_iter_open,
+ .read = seq_read,
+ .release = bpffs_iter_release,
+};
+
+void *bpf_iter_get_from_fd(u32 ufd)
+{
+ struct bpf_link *link;
+ struct bpf_prog *prog;
+ struct fd f;
+
+ f = fdget(ufd);
+ if (!f.file)
+ return ERR_PTR(-EBADF);
+ if (f.file->f_op != &bpf_link_fops) {
+ link = ERR_PTR(-EINVAL);
+ goto out;
+ }
+
+ link = f.file->private_data;
+ prog = link->prog;
+ if (prog->expected_attach_type != BPF_TRACE_ITER) {
+ link = ERR_PTR(-EINVAL);
+ goto out;
+ }
+
+ bpf_link_inc(link);
+out:
+ fdput(f);
+ return link;
+}
@@ -26,6 +26,7 @@ enum bpf_type {
BPF_TYPE_PROG,
BPF_TYPE_MAP,
BPF_TYPE_LINK,
+ BPF_TYPE_ITER,
};
static void *bpf_any_get(void *raw, enum bpf_type type)
@@ -38,6 +39,7 @@ static void *bpf_any_get(void *raw, enum bpf_type type)
bpf_map_inc_with_uref(raw);
break;
case BPF_TYPE_LINK:
+ case BPF_TYPE_ITER:
bpf_link_inc(raw);
break;
default:
@@ -58,6 +60,7 @@ static void bpf_any_put(void *raw, enum bpf_type type)
bpf_map_put_with_uref(raw);
break;
case BPF_TYPE_LINK:
+ case BPF_TYPE_ITER:
bpf_link_put(raw);
break;
default:
@@ -82,6 +85,15 @@ static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type)
return raw;
}
+ /* check bpf_iter before bpf_link as
+ * ufd is also a link.
+ */
+ raw = bpf_iter_get_from_fd(ufd);
+ if (!IS_ERR(raw)) {
+ *type = BPF_TYPE_ITER;
+ return raw;
+ }
+
raw = bpf_link_get_from_fd(ufd);
if (!IS_ERR(raw)) {
*type = BPF_TYPE_LINK;
@@ -96,6 +108,7 @@ static const struct inode_operations bpf_dir_iops;
static const struct inode_operations bpf_prog_iops = { };
static const struct inode_operations bpf_map_iops = { };
static const struct inode_operations bpf_link_iops = { };
+static const struct inode_operations bpf_iter_iops = { };
static struct inode *bpf_get_inode(struct super_block *sb,
const struct inode *dir,
@@ -135,6 +148,8 @@ static int bpf_inode_type(const struct inode *inode, enum bpf_type *type)
*type = BPF_TYPE_MAP;
else if (inode->i_op == &bpf_link_iops)
*type = BPF_TYPE_LINK;
+ else if (inode->i_op == &bpf_iter_iops)
+ *type = BPF_TYPE_ITER;
else
return -EACCES;
@@ -362,6 +377,12 @@ static int bpf_mklink(struct dentry *dentry, umode_t mode, void *arg)
&bpffs_obj_fops);
}
+static int bpf_mkiter(struct dentry *dentry, umode_t mode, void *arg)
+{
+ return bpf_mkobj_ops(dentry, mode, arg, &bpf_iter_iops,
+ &bpffs_iter_fops);
+}
+
static struct dentry *
bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags)
{
@@ -441,6 +462,9 @@ static int bpf_obj_do_pin(const char __user *pathname, void *raw,
case BPF_TYPE_LINK:
ret = vfs_mkobj(dentry, mode, bpf_mklink, raw);
break;
+ case BPF_TYPE_ITER:
+ ret = vfs_mkobj(dentry, mode, bpf_mkiter, raw);
+ break;
default:
ret = -EPERM;
}
@@ -519,6 +543,8 @@ int bpf_obj_get_user(const char __user *pathname, int flags)
ret = bpf_map_new_fd(raw, f_flags);
else if (type == BPF_TYPE_LINK)
ret = bpf_link_new_fd(raw);
+ else if (type == BPF_TYPE_ITER)
+ ret = bpf_iter_new_fd(raw);
else
return -ENOENT;
@@ -538,6 +564,8 @@ static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type
return ERR_PTR(-EINVAL);
if (inode->i_op == &bpf_link_iops)
return ERR_PTR(-EINVAL);
+ if (inode->i_op == &bpf_iter_iops)
+ return ERR_PTR(-EINVAL);
if (inode->i_op != &bpf_prog_iops)
return ERR_PTR(-EACCES);
@@ -2285,7 +2285,7 @@ static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)
}
#endif
-static const struct file_operations bpf_link_fops = {
+const struct file_operations bpf_link_fops = {
#ifdef CONFIG_PROC_FS
.show_fdinfo = bpf_link_show_fdinfo,
#endif
A new obj type BPF_TYPE_ITER is added to bpffs. To produce a file bpf iterator, the fd must be corresponding to a link_fd assocciated with a trace/iter program. When the pinned file is opened, a seq_file will be generated. Signed-off-by: Yonghong Song <yhs@fb.com> --- include/linux/bpf.h | 3 +++ kernel/bpf/bpf_iter.c | 48 ++++++++++++++++++++++++++++++++++++++++++- kernel/bpf/inode.c | 28 +++++++++++++++++++++++++ kernel/bpf/syscall.c | 2 +- 4 files changed, 79 insertions(+), 2 deletions(-)