@@ -1129,6 +1129,9 @@ int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
int bpf_obj_get_user(const char __user *pathname, int flags);
#define BPF_ITER_FUNC_PREFIX "__bpf_iter__"
+#define DEFINE_BPF_ITER_FUNC(target, args...) \
+ extern int __bpf_iter__ ## target(args); \
+ int __init __bpf_iter__ ## target(args) { return 0; }
typedef int (*bpf_iter_init_seq_priv_t)(void *private_data);
typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data);
@@ -1141,11 +1144,19 @@ struct bpf_iter_reg {
u32 seq_priv_size;
};
+struct bpf_iter_meta {
+ __bpf_md_ptr(struct seq_file *, seq);
+ u64 session_id;
+ u64 seq_num;
+};
+
int bpf_iter_reg_target(struct bpf_iter_reg *reg_info);
bool bpf_iter_prog_supported(struct bpf_prog *prog);
int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
int bpf_iter_new_fd(struct bpf_link *link);
bool bpf_link_is_iter(struct bpf_link *link);
+struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop);
+int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx);
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
@@ -43,6 +43,42 @@ static atomic64_t session_id;
static int prepare_seq_file(struct file *file, struct bpf_iter_link *link);
+static void bpf_iter_inc_seq_num(struct seq_file *seq)
+{
+ struct bpf_iter_priv_data *iter_priv;
+
+ iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
+ target_private);
+ iter_priv->seq_num++;
+}
+
+static void bpf_iter_dec_seq_num(struct seq_file *seq)
+{
+ struct bpf_iter_priv_data *iter_priv;
+
+ iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
+ target_private);
+ iter_priv->seq_num--;
+}
+
+static void bpf_iter_set_stop(struct seq_file *seq)
+{
+ struct bpf_iter_priv_data *iter_priv;
+
+ iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
+ target_private);
+ iter_priv->do_stop++;
+}
+
+static void bpf_iter_unset_stop(struct seq_file *seq)
+{
+ struct bpf_iter_priv_data *iter_priv;
+
+ iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
+ target_private);
+ iter_priv->do_stop--;
+}
+
/* bpf_seq_read, a customized and simpler version for bpf iterator.
* no_llseek is assumed for this file.
* The following are differences from seq_read():
@@ -83,12 +119,15 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
if (!p || IS_ERR(p))
goto Stop;
+ bpf_iter_inc_seq_num(seq);
err = seq->op->show(seq, p);
if (seq_has_overflowed(seq)) {
+ bpf_iter_dec_seq_num(seq);
err = -E2BIG;
goto Error_show;
} else if (err) {
/* < 0: go out, > 0: skip */
+ bpf_iter_dec_seq_num(seq);
if (likely(err < 0))
goto Error_show;
seq->count = 0;
@@ -113,8 +152,10 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
if (seq->count >= size)
break;
+ bpf_iter_inc_seq_num(seq);
err = seq->op->show(seq, p);
if (seq_has_overflowed(seq)) {
+ bpf_iter_dec_seq_num(seq);
if (offs == 0) {
err = -E2BIG;
goto Error_show;
@@ -122,6 +163,7 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
seq->count = offs;
break;
} else if (err) {
+ bpf_iter_dec_seq_num(seq);
/* < 0: go out, > 0: skip */
seq->count = offs;
if (likely(err < 0)) {
@@ -134,11 +176,17 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
Stop:
offs = seq->count;
/* may call bpf program */
- seq->op->stop(seq, p);
- if (seq_has_overflowed(seq)) {
- if (offs == 0)
- goto Error_stop;
- seq->count = offs;
+ if (!p) {
+ bpf_iter_set_stop(seq);
+ seq->op->stop(seq, p);
+ if (seq_has_overflowed(seq)) {
+ bpf_iter_unset_stop(seq);
+ if (offs == 0)
+ goto Error_stop;
+ seq->count = offs;
+ }
+ } else {
+ seq->op->stop(seq, p);
}
n = min(seq->count, size);
@@ -432,3 +480,39 @@ int bpf_iter_new_fd(struct bpf_link *link)
put_unused_fd(fd);
return err;
}
+
+struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop)
+{
+ struct bpf_iter_priv_data *iter_priv;
+ struct seq_file *seq;
+ void *seq_priv;
+
+ seq = meta->seq;
+ if (seq->file->f_op != &bpf_iter_fops)
+ return NULL;
+
+ seq_priv = seq->private;
+ iter_priv = container_of(seq_priv, struct bpf_iter_priv_data,
+ target_private);
+
+ if (in_stop && iter_priv->do_stop != 1)
+ return NULL;
+
+ meta->session_id = iter_priv->session_id;
+ meta->seq_num = iter_priv->seq_num;
+
+ return iter_priv->prog;
+}
+
+int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
+{
+ int ret;
+
+ rcu_read_lock();
+ migrate_disable();
+ ret = BPF_PROG_RUN(prog, ctx);
+ migrate_enable();
+ rcu_read_unlock();
+
+ return ret == 0 ? 0 : -EAGAIN;
+}
Macro DEFINE_BPF_ITER_FUNC is implemented so target can define an init function to capture the BTF type which represents the target. The bpf_iter_meta is a structure holding meta data, common to all targets in the bpf program. Additional marker functions are called before/after bpf_seq_read() show() and stop() callback functions to help calculate precise seq_num and whether call bpf_prog inside stop(). Two functions, bpf_iter_get_info() and bpf_iter_run_prog(), are implemented so target can get needed information from bpf_iter infrastructure and can run the program. Signed-off-by: Yonghong Song <yhs@fb.com> --- include/linux/bpf.h | 11 +++++ kernel/bpf/bpf_iter.c | 94 ++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 100 insertions(+), 5 deletions(-)