@@ -1110,10 +1110,15 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd);
int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
int bpf_obj_get_user(const char __user *pathname, int flags);
+#define BPF_DUMP_SEQ_NET_PRIVATE BIT(0)
+
int bpf_dump_reg_target(const char *target, const char *target_proto,
const struct seq_operations *seq_ops,
u32 seq_priv_size, u32 target_feature);
int bpf_dump_set_target_info(u32 target_fd, struct bpf_prog *prog);
+int bpf_dump_create(u32 prog_fd, const char __user *dumper_name);
+struct bpf_prog *bpf_dump_get_prog(struct seq_file *seq, u32 priv_data_size,
+ u64 *seq_num);
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
@@ -354,6 +354,7 @@ enum {
/* Flags for accessing BPF object from syscall side. */
BPF_F_RDONLY = (1U << 3),
BPF_F_WRONLY = (1U << 4),
+ BPF_F_DUMP = (1U << 5),
/* Flag for stack_map, store build_id+offset instead of pointer */
BPF_F_STACK_BUILD_ID = (1U << 5),
@@ -481,7 +482,10 @@ union bpf_attr {
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
- __aligned_u64 pathname;
+ union {
+ __aligned_u64 pathname;
+ __aligned_u64 dumper_name;
+ };
__u32 bpf_fd;
__u32 file_flags;
};
@@ -30,22 +30,173 @@ struct bpfdump_targets {
struct mutex dumper_mutex;
};
+struct dumper_inode_info {
+ struct bpfdump_target_info *tinfo;
+ struct bpf_prog *prog;
+};
+
+struct dumper_info {
+ struct list_head list;
+ /* file to identify an anon dumper,
+ * dentry to identify a file dumper.
+ */
+ union {
+ struct file *file;
+ struct dentry *dentry;
+ };
+ struct bpfdump_target_info *tinfo;
+ struct bpf_prog *prog;
+};
+
+struct dumpers {
+ struct list_head dumpers;
+ struct mutex dumper_mutex;
+};
+
+struct extra_priv_data {
+ struct bpf_prog *prog;
+ u64 seq_num;
+};
+
/* registered dump targets */
static struct bpfdump_targets dump_targets;
static struct dentry *bpfdump_dentry;
+static struct dumpers anon_dumpers, file_dumpers;
+
+static const struct file_operations bpf_dumper_ops;
+static const struct inode_operations bpf_dir_iops;
+
+static struct dentry *bpfdump_add_file(const char *name, struct dentry *parent,
+ const struct file_operations *f_ops,
+ void *data);
static struct dentry *bpfdump_add_dir(const char *name, struct dentry *parent,
const struct inode_operations *i_ops,
void *data);
static int __bpfdump_init(void);
+static u32 get_total_priv_dsize(u32 old_size)
+{
+ return roundup(old_size, 8) + sizeof(struct extra_priv_data);
+}
+
+static void *get_extra_priv_dptr(void *old_ptr, u32 old_size)
+{
+ return old_ptr + roundup(old_size, 8);
+}
+
+#ifdef CONFIG_PROC_FS
+static void dumper_show_fdinfo(struct seq_file *m, struct file *filp)
+{
+ struct dumper_inode_info *i_info = filp->f_inode->i_private;
+
+ seq_printf(m, "target:\t%s\n"
+ "prog_id:\t%u\n",
+ i_info->tinfo->target,
+ i_info->prog->aux->id);
+}
+
+static void anon_dumper_show_fdinfo(struct seq_file *m, struct file *filp)
+{
+ struct dumper_info *dinfo;
+
+ mutex_lock(&anon_dumpers.dumper_mutex);
+ list_for_each_entry(dinfo, &anon_dumpers.dumpers, list) {
+ if (dinfo->file == filp) {
+ seq_printf(m, "target:\t%s\n"
+ "prog_id:\t%u\n",
+ dinfo->tinfo->target,
+ dinfo->prog->aux->id);
+ break;
+ }
+ }
+ mutex_unlock(&anon_dumpers.dumper_mutex);
+}
+
+#endif
+
+static void process_target_feature(u32 feature, void *priv_data)
+{
+ /* use the current net namespace */
+ if (feature & BPF_DUMP_SEQ_NET_PRIVATE)
+ set_seq_net_private((struct seq_net_private *)priv_data,
+ current->nsproxy->net_ns);
+}
+
+static int dumper_open(struct inode *inode, struct file *file)
+{
+ struct dumper_inode_info *i_info = inode->i_private;
+ struct extra_priv_data *extra_data;
+ u32 old_priv_size, total_priv_size;
+ void *priv_data;
+
+ old_priv_size = i_info->tinfo->seq_priv_size;
+ total_priv_size = get_total_priv_dsize(old_priv_size);
+ priv_data = __seq_open_private(file, i_info->tinfo->seq_ops,
+ total_priv_size);
+ if (!priv_data)
+ return -ENOMEM;
+
+ process_target_feature(i_info->tinfo->target_feature, priv_data);
+
+ extra_data = get_extra_priv_dptr(priv_data, old_priv_size);
+ extra_data->prog = i_info->prog;
+ extra_data->seq_num = 0;
+
+ return 0;
+}
+
+static int anon_dumper_release(struct inode *inode, struct file *file)
+{
+ struct dumper_info *dinfo;
+
+ /* release the bpf program */
+ mutex_lock(&anon_dumpers.dumper_mutex);
+ list_for_each_entry(dinfo, &anon_dumpers.dumpers, list) {
+ if (dinfo->file == file) {
+ bpf_prog_put(dinfo->prog);
+ list_del(&dinfo->list);
+ break;
+ }
+ }
+ mutex_unlock(&anon_dumpers.dumper_mutex);
+
+ return seq_release_private(inode, file);
+}
+
+static int dumper_release(struct inode *inode, struct file *file)
+{
+ return seq_release_private(inode, file);
+}
+
static int dumper_unlink(struct inode *dir, struct dentry *dentry)
{
- kfree(d_inode(dentry)->i_private);
+ struct dumper_inode_info *i_info = d_inode(dentry)->i_private;
+
+ bpf_prog_put(i_info->prog);
+ kfree(i_info);
+
return simple_unlink(dir, dentry);
}
+static const struct file_operations bpf_dumper_ops = {
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = dumper_show_fdinfo,
+#endif
+ .open = dumper_open,
+ .read = seq_read,
+ .release = dumper_release,
+};
+
+static const struct file_operations anon_bpf_dumper_ops = {
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = anon_dumper_show_fdinfo,
+#endif
+ .read = seq_read,
+ .release = anon_dumper_release,
+};
+
static const struct inode_operations bpf_dir_iops = {
.lookup = simple_lookup,
.unlink = dumper_unlink,
@@ -88,6 +239,179 @@ int bpf_dump_set_target_info(u32 target_fd, struct bpf_prog *prog)
return err;
}
+static int create_anon_dumper(struct bpfdump_target_info *tinfo,
+ struct bpf_prog *prog)
+{
+ struct extra_priv_data *extra_data;
+ u32 old_priv_size, total_priv_size;
+ struct dumper_info *dinfo;
+ struct file *file;
+ int err, anon_fd;
+ void *priv_data;
+ struct fd fd;
+
+ anon_fd = anon_inode_getfd("bpf-dumper", &anon_bpf_dumper_ops,
+ NULL, O_CLOEXEC);
+ if (anon_fd < 0)
+ return anon_fd;
+
+ /* setup seq_file for anon dumper */
+ fd = fdget(anon_fd);
+ file = fd.file;
+
+ dinfo = kmalloc(sizeof(*dinfo), GFP_KERNEL);
+ if (!dinfo) {
+ err = -ENOMEM;
+ goto free_fd;
+ }
+
+ old_priv_size = tinfo->seq_priv_size;
+ total_priv_size = get_total_priv_dsize(old_priv_size);
+
+ priv_data = __seq_open_private(file, tinfo->seq_ops,
+ total_priv_size);
+ if (!priv_data) {
+ err = -ENOMEM;
+ goto free_dinfo;
+ }
+
+ dinfo->file = file;
+ dinfo->tinfo = tinfo;
+ dinfo->prog = prog;
+
+ mutex_lock(&anon_dumpers.dumper_mutex);
+ list_add(&dinfo->list, &anon_dumpers.dumpers);
+ mutex_unlock(&anon_dumpers.dumper_mutex);
+
+ process_target_feature(tinfo->target_feature, priv_data);
+
+ extra_data = get_extra_priv_dptr(priv_data, old_priv_size);
+ extra_data->prog = prog;
+ extra_data->seq_num = 0;
+
+ fdput(fd);
+ return anon_fd;
+
+free_dinfo:
+ kfree(dinfo);
+free_fd:
+ fdput(fd);
+ return err;
+}
+
+static int create_dumper(struct bpfdump_target_info *tinfo,
+ const char __user *dumper_name,
+ struct bpf_prog *prog)
+{
+ struct dumper_inode_info *i_info;
+ struct dumper_info *dinfo;
+ struct dentry *dentry;
+ const char *dname;
+ int err = 0;
+
+ i_info = kmalloc(sizeof(*i_info), GFP_KERNEL);
+ if (!i_info)
+ return -ENOMEM;
+
+ i_info->tinfo = tinfo;
+ i_info->prog = prog;
+
+ dinfo = kmalloc(sizeof(*dinfo), GFP_KERNEL);
+ if (!dinfo) {
+ err = -ENOMEM;
+ goto free_i_info;
+ }
+
+ dname = strndup_user(dumper_name, PATH_MAX);
+ if (!dname) {
+ err = -ENOMEM;
+ goto free_dinfo;
+ }
+
+ dentry = bpfdump_add_file(dname, tinfo->dir_dentry,
+ &bpf_dumper_ops, i_info);
+ kfree(dname);
+ if (IS_ERR(dentry)) {
+ err = PTR_ERR(dentry);
+ goto free_dinfo;
+ }
+
+ dinfo->dentry = dentry;
+ dinfo->tinfo = tinfo;
+ dinfo->prog = prog;
+
+ mutex_lock(&file_dumpers.dumper_mutex);
+ list_add(&dinfo->list, &file_dumpers.dumpers);
+ mutex_unlock(&file_dumpers.dumper_mutex);
+
+ return 0;
+
+free_dinfo:
+ kfree(dinfo);
+free_i_info:
+ kfree(i_info);
+ return err;
+}
+
+int bpf_dump_create(u32 prog_fd, const char __user *dumper_name)
+{
+ struct bpfdump_target_info *tinfo;
+ const char *target;
+ struct bpf_prog *prog;
+ bool existed = false;
+ int err = 0;
+
+ prog = bpf_prog_get(prog_fd);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ target = prog->aux->dump_target;
+ if (!target) {
+ err = -EINVAL;
+ goto free_prog;
+ }
+
+ mutex_lock(&dump_targets.dumper_mutex);
+ list_for_each_entry(tinfo, &dump_targets.dumpers, list) {
+ if (strcmp(tinfo->target, target) == 0) {
+ existed = true;
+ break;
+ }
+ }
+ mutex_unlock(&dump_targets.dumper_mutex);
+
+ if (!existed) {
+ err = -EINVAL;
+ goto free_prog;
+ }
+
+ err = dumper_name ? create_dumper(tinfo, dumper_name, prog)
+ : create_anon_dumper(tinfo, prog);
+ if (err < 0)
+ goto free_prog;
+
+ return err;
+
+free_prog:
+ bpf_prog_put(prog);
+ return err;
+}
+
+struct bpf_prog *bpf_dump_get_prog(struct seq_file *seq, u32 priv_data_size,
+ u64 *seq_num)
+{
+ struct extra_priv_data *extra_data;
+
+ if (seq->file->f_op != &bpf_dumper_ops &&
+ seq->file->f_op != &anon_bpf_dumper_ops)
+ return NULL;
+
+ extra_data = get_extra_priv_dptr(seq->private, priv_data_size);
+ *seq_num = extra_data->seq_num++;
+
+ return extra_data->prog;
+}
+
int bpf_dump_reg_target(const char *target,
const char *target_proto,
const struct seq_operations *seq_ops,
@@ -211,6 +535,14 @@ bpfdump_create_dentry(const char *name, umode_t mode, struct dentry *parent,
return dentry;
}
+static struct dentry *
+bpfdump_add_file(const char *name, struct dentry *parent,
+ const struct file_operations *f_ops, void *data)
+{
+ return bpfdump_create_dentry(name, S_IFREG | 0444, parent,
+ data, NULL, f_ops);
+}
+
static struct dentry *
bpfdump_add_dir(const char *name, struct dentry *parent,
const struct inode_operations *i_ops, void *data)
@@ -290,6 +622,10 @@ static int __bpfdump_init(void)
INIT_LIST_HEAD(&dump_targets.dumpers);
mutex_init(&dump_targets.dumper_mutex);
+ INIT_LIST_HEAD(&anon_dumpers.dumpers);
+ mutex_init(&anon_dumpers.dumper_mutex);
+ INIT_LIST_HEAD(&file_dumpers.dumpers);
+ mutex_init(&file_dumpers.dumper_mutex);
return 0;
remove_mount:
@@ -2173,9 +2173,13 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
static int bpf_obj_pin(const union bpf_attr *attr)
{
- if (CHECK_ATTR(BPF_OBJ) || attr->file_flags != 0)
+ if (CHECK_ATTR(BPF_OBJ) || attr->file_flags & ~BPF_F_DUMP)
return -EINVAL;
+ if (attr->file_flags == BPF_F_DUMP)
+ return bpf_dump_create(attr->bpf_fd,
+ u64_to_user_ptr(attr->dumper_name));
+
return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname));
}
@@ -2605,6 +2609,8 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
case BPF_CGROUP_GETSOCKOPT:
case BPF_CGROUP_SETSOCKOPT:
return BPF_PROG_TYPE_CGROUP_SOCKOPT;
+ case BPF_TRACE_DUMP:
+ return BPF_PROG_TYPE_TRACING;
default:
return BPF_PROG_TYPE_UNSPEC;
}
@@ -2663,6 +2669,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
case BPF_PROG_TYPE_SOCK_OPS:
ret = cgroup_bpf_prog_attach(attr, ptype, prog);
break;
+ case BPF_PROG_TYPE_TRACING:
+ ret = bpf_dump_create(attr->attach_bpf_fd, (void __user *)NULL);
+ break;
default:
ret = -EINVAL;
}
@@ -354,6 +354,7 @@ enum {
/* Flags for accessing BPF object from syscall side. */
BPF_F_RDONLY = (1U << 3),
BPF_F_WRONLY = (1U << 4),
+ BPF_F_DUMP = (1U << 5),
/* Flag for stack_map, store build_id+offset instead of pointer */
BPF_F_STACK_BUILD_ID = (1U << 5),
@@ -481,7 +482,10 @@ union bpf_attr {
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
- __aligned_u64 pathname;
+ union {
+ __aligned_u64 pathname;
+ __aligned_u64 dumper_name;
+ };
__u32 bpf_fd;
__u32 file_flags;
};
Given a loaded dumper bpf program, which already knows which target it should bind to, there two ways to create a dumper: - a file based dumper under hierarchy of /sys/kernel/bpfdump/ which uses can "cat" to print out the output. - an anonymous dumper which user application can "read" the dumping output. For file based dumper, BPF_OBJ_PIN syscall interface is used. For anonymous dumper, BPF_PROG_ATTACH syscall interface is used. To facilitate target seq_ops->show() to get the bpf program easily, dumper creation increased the target-provided seq_file private data size so bpf program pointer is also stored in seq_file private data. Further, a seq_num which represents how many bpf_dump_get_prog() has been called is also available to the target seq_ops->show(). Such information can be used to e.g., print banner before printing out actual data. Note the seq_num does not represent the num of unique kernel objects the bpf program has seen. But it should be a good approximate. A target feature BPF_DUMP_SEQ_NET_PRIVATE is implemented specifically useful for net based dumpers. It sets net namespace as the current process net namespace. This avoids changing existing net seq_ops in order to retrieve net namespace from the seq_file pointer. For open dumper files, anonymous or not, the fdinfo will show the target and prog_id associated with that file descriptor. For dumper file itself, a kernel interface will be provided to retrieve the prog_id in one of the later patches. Signed-off-by: Yonghong Song <yhs@fb.com> --- include/linux/bpf.h | 5 + include/uapi/linux/bpf.h | 6 +- kernel/bpf/dump.c | 338 ++++++++++++++++++++++++++++++++- kernel/bpf/syscall.c | 11 +- tools/include/uapi/linux/bpf.h | 6 +- 5 files changed, 362 insertions(+), 4 deletions(-)