@@ -1142,6 +1142,7 @@ struct bpf_iter_reg {
int bpf_iter_reg_target(struct bpf_iter_reg *reg_info);
bool bpf_iter_prog_supported(struct bpf_prog *prog);
+int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
@@ -124,3 +124,4 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing)
#ifdef CONFIG_CGROUP_BPF
BPF_LINK_TYPE(BPF_LINK_TYPE_CGROUP, cgroup)
#endif
+BPF_LINK_TYPE(BPF_LINK_TYPE_ITER, iter)
@@ -229,6 +229,7 @@ enum bpf_link_type {
BPF_LINK_TYPE_RAW_TRACEPOINT = 1,
BPF_LINK_TYPE_TRACING = 2,
BPF_LINK_TYPE_CGROUP = 3,
+ BPF_LINK_TYPE_ITER = 4,
MAX_BPF_LINK_TYPE,
};
@@ -15,6 +15,11 @@ struct bpf_iter_target_info {
u32 btf_id;
};
+struct bpf_iter_link {
+ struct bpf_link link;
+ struct bpf_iter_target_info *tinfo;
+};
+
static struct list_head targets = LIST_HEAD_INIT(targets);
static DEFINE_MUTEX(targets_mutex);
@@ -68,3 +73,60 @@ bool bpf_iter_prog_supported(struct bpf_prog *prog)
return supported;
}
+
+static void bpf_iter_link_release(struct bpf_link *link)
+{
+}
+
+static void bpf_iter_link_dealloc(struct bpf_link *link)
+{
+ struct bpf_iter_link *iter_link =
+ container_of(link, struct bpf_iter_link, link);
+
+ kfree(iter_link);
+}
+
+static const struct bpf_link_ops bpf_iter_link_lops = {
+ .release = bpf_iter_link_release,
+ .dealloc = bpf_iter_link_dealloc,
+};
+
+int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+ struct bpf_link_primer link_primer;
+ struct bpf_iter_target_info *tinfo;
+ struct bpf_iter_link *link;
+ bool existed = false;
+ u32 prog_btf_id;
+ int err;
+
+ if (attr->link_create.target_fd || attr->link_create.flags)
+ return -EINVAL;
+
+ prog_btf_id = prog->aux->attach_btf_id;
+ mutex_lock(&targets_mutex);
+ list_for_each_entry(tinfo, &targets, list) {
+ if (tinfo->btf_id == prog_btf_id) {
+ existed = true;
+ break;
+ }
+ }
+ mutex_unlock(&targets_mutex);
+ if (!existed)
+ return -ENOENT;
+
+ link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN);
+ if (!link)
+ return -ENOMEM;
+
+ bpf_link_init(&link->link, BPF_LINK_TYPE_ITER, &bpf_iter_link_lops, prog);
+ link->tinfo = tinfo;
+
+ err = bpf_link_prime(&link->link, &link_primer);
+ if (err) {
+ kfree(link);
+ return err;
+ }
+
+ return bpf_link_settle(&link_primer);
+}
@@ -2729,6 +2729,8 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
case BPF_CGROUP_GETSOCKOPT:
case BPF_CGROUP_SETSOCKOPT:
return BPF_PROG_TYPE_CGROUP_SOCKOPT;
+ case BPF_TRACE_ITER:
+ return BPF_PROG_TYPE_TRACING;
default:
return BPF_PROG_TYPE_UNSPEC;
}
@@ -3729,6 +3731,15 @@ static int bpf_map_do_batch(const union bpf_attr *attr,
return err;
}
+static int tracing_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+ if (attr->link_create.attach_type == BPF_TRACE_ITER &&
+ prog->expected_attach_type == BPF_TRACE_ITER)
+ return bpf_iter_link_attach(attr, prog);
+
+ return -EINVAL;
+}
+
#define BPF_LINK_CREATE_LAST_FIELD link_create.flags
static int link_create(union bpf_attr *attr)
{
@@ -3765,6 +3776,9 @@ static int link_create(union bpf_attr *attr)
case BPF_PROG_TYPE_CGROUP_SOCKOPT:
ret = cgroup_bpf_link_attach(attr, prog);
break;
+ case BPF_PROG_TYPE_TRACING:
+ ret = tracing_bpf_link_attach(attr, prog);
+ break;
default:
ret = -EINVAL;
}
@@ -229,6 +229,7 @@ enum bpf_link_type {
BPF_LINK_TYPE_RAW_TRACEPOINT = 1,
BPF_LINK_TYPE_TRACING = 2,
BPF_LINK_TYPE_CGROUP = 3,
+ BPF_LINK_TYPE_ITER = 4,
MAX_BPF_LINK_TYPE,
};
Given a bpf program, the step to create an anonymous bpf iterator is: - create a bpf_iter_link, which combines bpf program and the target. In the future, there could be more information recorded in the link. A link_fd will be returned to the user space. - create an anonymous bpf iterator with the given link_fd. The bpf_iter_link can be pinned to bpffs mount file system to create a file based bpf iterator as well. The benefit to use of bpf_iter_link: - using bpf link simplifies design and implementation as bpf link is used for other tracing bpf programs. - for file based bpf iterator, bpf_iter_link provides a standard way to replace underlying bpf programs. - for both anonymous and free based iterators, bpf link query capability can be leveraged. The patch added support of tracing/iter programs for BPF_LINK_CREATE. A new link type BPF_LINK_TYPE_ITER is added to facilitate link querying. Currently, only prog_id is needed, so there is no additional in-kernel show_fdinfo() and fill_link_info() hook is needed for BPF_LINK_TYPE_ITER link. Signed-off-by: Yonghong Song <yhs@fb.com> --- include/linux/bpf.h | 1 + include/linux/bpf_types.h | 1 + include/uapi/linux/bpf.h | 1 + kernel/bpf/bpf_iter.c | 62 ++++++++++++++++++++++++++++++++++ kernel/bpf/syscall.c | 14 ++++++++ tools/include/uapi/linux/bpf.h | 1 + 6 files changed, 80 insertions(+)