Message ID | 20241029002208.1947947-3-dolinux.peng@gmail.com |
---|---|
State | New |
Headers | show |
Series | bpf: Using binary search to improve the performance of btf_find_by_name_kind | expand |
On Wed, Oct 30, 2024 at 6:13 AM Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote: > > On Mon, Oct 28, 2024 at 5:22 PM Donglin Peng <dolinux.peng@gmail.com> wrote: > > > > Currently, we are only using the linear search method to find the type id > > by the name, which has a time complexity of O(n). This change involves > > sorting the names of btf types in ascending order and using binary search, > > which has a time complexity of O(log(n)). This idea was inspired by the > > following patch: > > > > 60443c88f3a8 ("kallsyms: Improve the performance of kallsyms_lookup_name()"). > > > > At present, this improvement is only for searching in vmlinux's and > > module's BTFs. > > > > Another change is the search direction, where we search the BTF first and > > then its base, the type id of the first matched btf_type will be returned. > > > > Here is a time-consuming result that finding 87590 type ids by their names in > > vmlinux's BTF. > > > > Before: 158426 ms > > After: 114 ms > > > > The average lookup performance has improved more than 1000x in the above scenario. > > > > Tested-by: Masami Hiramatsu (Google) <mhiramat@kernel.org> > > Signed-off-by: Donglin Peng <dolinux.peng@gmail.com> > > --- > > v4: > > - move the modification of libbpf to another patch > > > > v3: > > - Link: https://lore.kernel.org/all/20240608140835.965949-1-dolinux.peng@gmail.com/ > > - Sort btf_types during build process other than during boot, to reduce the > > overhead of memory and boot time. > > > > v2: > > - Link: https://lore.kernel.org/all/20230909091646.420163-1-pengdonglin@sangfor.com.cn > > --- > > include/linux/btf.h | 1 + > > kernel/bpf/btf.c | 157 ++++++++++++++++++++++++++++++++++++++++---- > > 2 files changed, 147 insertions(+), 11 deletions(-) > > > > diff --git a/include/linux/btf.h b/include/linux/btf.h > > index b8a583194c4a..64c35aaa22fa 100644 > > --- a/include/linux/btf.h > > +++ b/include/linux/btf.h > > @@ -216,6 +216,7 @@ bool btf_is_module(const struct btf *btf); > > bool btf_is_vmlinux(const struct btf *btf); > > struct module *btf_try_get_module(const struct btf *btf); > > u32 btf_nr_types(const struct btf *btf); > > +u32 btf_type_cnt(const struct btf *btf); > > struct btf *btf_base_btf(const struct btf *btf); > > bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, > > const struct btf_member *m, > > diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c > > index 5cd1c7a23848..6d0d58989640 100644 > > --- a/kernel/bpf/btf.c > > +++ b/kernel/bpf/btf.c > > @@ -262,6 +262,7 @@ struct btf { > > u32 data_size; > > refcount_t refcnt; > > u32 id; > > + u32 nr_types_sorted; > > struct rcu_head rcu; > > struct btf_kfunc_set_tab *kfunc_set_tab; > > struct btf_id_dtor_kfunc_tab *dtor_kfunc_tab; > > @@ -548,23 +549,102 @@ u32 btf_nr_types(const struct btf *btf) > > return total; > > } > > > > -s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind) > > +u32 btf_type_cnt(const struct btf *btf) > > +{ > > + return btf->start_id + btf->nr_types; > > +} > > + > > +static s32 btf_find_by_name_bsearch(const struct btf *btf, const char *name, > > + int *start, int *end) > > { > > const struct btf_type *t; > > - const char *tname; > > - u32 i, total; > > + const char *name_buf; > > + int low, low_start, mid, high, high_end; > > + int ret, start_id; > > + > > + start_id = btf->base_btf ? btf->start_id : 1; > > + low_start = low = start_id; > > + high_end = high = start_id + btf->nr_types_sorted - 1; > > + > > + while (low <= high) { > > + mid = low + (high - low) / 2; > > + t = btf_type_by_id(btf, mid); > > + name_buf = btf_name_by_offset(btf, t->name_off); > > + ret = strcmp(name, name_buf); > > + if (ret > 0) > > + low = mid + 1; > > + else if (ret < 0) > > + high = mid - 1; > > + else > > + break; > > + } > > > > - total = btf_nr_types(btf); > > - for (i = 1; i < total; i++) { > > - t = btf_type_by_id(btf, i); > > - if (BTF_INFO_KIND(t->info) != kind) > > - continue; > > + if (low > high) > > + return -ESRCH; > > > > - tname = btf_name_by_offset(btf, t->name_off); > > - if (!strcmp(tname, name)) > > - return i; > > + if (start) { > > + low = mid; > > + while (low > low_start) { > > + t = btf_type_by_id(btf, low-1); > > + name_buf = btf_name_by_offset(btf, t->name_off); > > + if (strcmp(name, name_buf)) > > + break; > > + low--; > > + } > > + *start = low; > > + } > > + > > + if (end) { > > + high = mid; > > + while (high < high_end) { > > + t = btf_type_by_id(btf, high+1); > > + name_buf = btf_name_by_offset(btf, t->name_off); > > + if (strcmp(name, name_buf)) > > + break; > > + high++; > > + } > > + *end = high; > > } > > > > this is an overcomplicated implementation, you need something like > find_linfo() implementation in kernel/bpf/log.c. Note how much shorter > and leaner it is. > > I also don't think you need to return `end`. Given you always start > from start and linearly scan forward, you just need to make sure that > you never go beyond the BTF type array, for which you can use > btf_type_cnt(). So no need for doing this linear scan twice. Thank you, but the situation here is different. When the btf file is sorted, the btf_types with a name are placed at the beginning of the file, while those without a name are placed at the end. Additionally, if there are multiple btf_types with the same name in a btf file, they will have different kinds, and these btf_types with the same name will be grouped together. For example, in the following case: ... [13561] FUNC 'bp_constraints_unlock' type_id=105264 linkage=static [13562] STRUCT 'bp_cpuinfo' size=20 vlen=2 'cpu_pinned' type_id=66670 bits_offset=0 'tsk_pinned' type_id=13568 bits_offset=32 [13563] VAR 'bp_cpuinfo' type_id=103076, linkage=static [13564] FUNC 'bp_init_aperfmperf' type_id=70013 linkage=static [13565] STRUCT 'bp_patching_desc' size=16 vlen=3 ... Both 13562 and 13563 have the name 'bp_cpuinfo', but their kinds are different. Therefore, when using the btf_find_by_name_bsearch function to find the btf_type named 'bp_cpuinfo', the start parameter will be set to 11562 and the end parameter will be set to 11563. We can then check their kind to obtain the correct btf_type. > > > + return mid; > > +} > > + > > +s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind) > > +{ > > + const struct btf_type *t; > > + const char *tname; > > + int start, end; > > + s32 id, total; > > + > > + do { > > + if (btf->nr_types_sorted) { > > + /* binary search */ > > + id = btf_find_by_name_bsearch(btf, name, &start, &end); > > + if (id > 0) { > > + while (start <= end) { > > + t = btf_type_by_id(btf, start); > > + if (BTF_INFO_KIND(t->info) == kind) > > + return start; > > + start++; > > + } > > + } > > + } else { > > + /* linear search */ > > + total = btf_type_cnt(btf); > > + for (id = btf->base_btf ? btf->start_id : 1; > > + id < total; id++) { > > + t = btf_type_by_id(btf, id); > > + if (BTF_INFO_KIND(t->info) != kind) > > + continue; > > + > > + tname = btf_name_by_offset(btf, t->name_off); > > + if (!strcmp(tname, name)) > > + return id; > > + } > > + } > > + btf = btf->base_btf; > > + } while (btf); > > + > > return -ENOENT; > > } > > > > @@ -6141,6 +6221,53 @@ int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_ty > > return kctx_type_id; > > } > > > > +static int btf_check_sort(struct btf *btf, int start_id) > > +{ > > + int i, n, nr_names = 0; > > + > > + n = btf_nr_types(btf); > > + for (i = start_id; i < n; i++) { > > + const struct btf_type *t; > > + const char *name; > > + > > + t = btf_type_by_id(btf, i); > > + if (!t) > > + return -EINVAL; > > + > > + name = btf_str_by_offset(btf, t->name_off); > > + if (!str_is_empty(name)) > > + nr_names++; > > + } > > + > > this loop makes zero sense to me, what are you trying to achieve with > it and why? As previously mentioned, if the btf file is sorted, the btf_type with a name will be placed at the beginning of the file in ascending order, while those without a name will be placed at the end. Therefore, we can verify if the btf file is sorted by following these steps: Step 1: Count the number of btf_types with a name and store it as nr_names. Step 2: Inspect the first nr_names btf_types. If any of the following cases occur, it indicates that the btf file is not sorted: 1. A btf_type without a name is encountered. 2. The name of the current btf_type is greater than the name of the next btf_type. > > > + for (i = 0; i < nr_names - 1; i++) { > > just start from start_id + 1, all the way to n, and check that sorting > invariant holds for all items > > > + const struct btf_type *t1, *t2; > > + const char *s1, *s2; > > + > > + t1 = btf_type_by_id(btf, start_id + i); > > + if (!t1) > > + return -EINVAL; > > + > > + s1 = btf_str_by_offset(btf, t1->name_off); > > + if (str_is_empty(s1)) > > + goto out; > > + > > + t2 = btf_type_by_id(btf, start_id + i + 1); > > + if (!t2) > > + return -EINVAL; > > + > > + s2 = btf_str_by_offset(btf, t2->name_off); > > + if (str_is_empty(s2)) > > + goto out; > > + > > + if (strcmp(s1, s2) > 0) > > + goto out; > > + } > > + > > + btf->nr_types_sorted = nr_names; > > +out: > > + return 0; > > +} > > [...]
On Wed, Oct 30, 2024 at 8:00 AM Donglin Peng <dolinux.peng@gmail.com> wrote: > > On Wed, Oct 30, 2024 at 6:13 AM Andrii Nakryiko > <andrii.nakryiko@gmail.com> wrote: > > > > On Mon, Oct 28, 2024 at 5:22 PM Donglin Peng <dolinux.peng@gmail.com> wrote: > > > > > > Currently, we are only using the linear search method to find the type id > > > by the name, which has a time complexity of O(n). This change involves > > > sorting the names of btf types in ascending order and using binary search, > > > which has a time complexity of O(log(n)). This idea was inspired by the > > > following patch: > > > > > > 60443c88f3a8 ("kallsyms: Improve the performance of kallsyms_lookup_name()"). > > > > > > At present, this improvement is only for searching in vmlinux's and > > > module's BTFs. > > > > > > Another change is the search direction, where we search the BTF first and > > > then its base, the type id of the first matched btf_type will be returned. > > > > > > Here is a time-consuming result that finding 87590 type ids by their names in > > > vmlinux's BTF. > > > > > > Before: 158426 ms > > > After: 114 ms > > > > > > The average lookup performance has improved more than 1000x in the above scenario. > > > > > > Tested-by: Masami Hiramatsu (Google) <mhiramat@kernel.org> > > > Signed-off-by: Donglin Peng <dolinux.peng@gmail.com> > > > --- > > > v4: > > > - move the modification of libbpf to another patch > > > > > > v3: > > > - Link: https://lore.kernel.org/all/20240608140835.965949-1-dolinux.peng@gmail.com/ > > > - Sort btf_types during build process other than during boot, to reduce the > > > overhead of memory and boot time. > > > > > > v2: > > > - Link: https://lore.kernel.org/all/20230909091646.420163-1-pengdonglin@sangfor.com.cn > > > --- > > > include/linux/btf.h | 1 + > > > kernel/bpf/btf.c | 157 ++++++++++++++++++++++++++++++++++++++++---- > > > 2 files changed, 147 insertions(+), 11 deletions(-) > > > > > > diff --git a/include/linux/btf.h b/include/linux/btf.h > > > index b8a583194c4a..64c35aaa22fa 100644 > > > --- a/include/linux/btf.h > > > +++ b/include/linux/btf.h > > > @@ -216,6 +216,7 @@ bool btf_is_module(const struct btf *btf); > > > bool btf_is_vmlinux(const struct btf *btf); > > > struct module *btf_try_get_module(const struct btf *btf); > > > u32 btf_nr_types(const struct btf *btf); > > > +u32 btf_type_cnt(const struct btf *btf); > > > struct btf *btf_base_btf(const struct btf *btf); > > > bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, > > > const struct btf_member *m, > > > diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c > > > index 5cd1c7a23848..6d0d58989640 100644 > > > --- a/kernel/bpf/btf.c > > > +++ b/kernel/bpf/btf.c > > > @@ -262,6 +262,7 @@ struct btf { > > > u32 data_size; > > > refcount_t refcnt; > > > u32 id; > > > + u32 nr_types_sorted; > > > struct rcu_head rcu; > > > struct btf_kfunc_set_tab *kfunc_set_tab; > > > struct btf_id_dtor_kfunc_tab *dtor_kfunc_tab; > > > @@ -548,23 +549,102 @@ u32 btf_nr_types(const struct btf *btf) > > > return total; > > > } > > > > > > -s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind) > > > +u32 btf_type_cnt(const struct btf *btf) > > > +{ > > > + return btf->start_id + btf->nr_types; > > > +} > > > + > > > +static s32 btf_find_by_name_bsearch(const struct btf *btf, const char *name, > > > + int *start, int *end) > > > { > > > const struct btf_type *t; > > > - const char *tname; > > > - u32 i, total; > > > + const char *name_buf; > > > + int low, low_start, mid, high, high_end; > > > + int ret, start_id; > > > + > > > + start_id = btf->base_btf ? btf->start_id : 1; > > > + low_start = low = start_id; > > > + high_end = high = start_id + btf->nr_types_sorted - 1; > > > + > > > + while (low <= high) { > > > + mid = low + (high - low) / 2; > > > + t = btf_type_by_id(btf, mid); > > > + name_buf = btf_name_by_offset(btf, t->name_off); > > > + ret = strcmp(name, name_buf); > > > + if (ret > 0) > > > + low = mid + 1; > > > + else if (ret < 0) > > > + high = mid - 1; > > > + else > > > + break; > > > + } > > > > > > - total = btf_nr_types(btf); > > > - for (i = 1; i < total; i++) { > > > - t = btf_type_by_id(btf, i); > > > - if (BTF_INFO_KIND(t->info) != kind) > > > - continue; > > > + if (low > high) > > > + return -ESRCH; > > > > > > - tname = btf_name_by_offset(btf, t->name_off); > > > - if (!strcmp(tname, name)) > > > - return i; > > > + if (start) { > > > + low = mid; > > > + while (low > low_start) { > > > + t = btf_type_by_id(btf, low-1); > > > + name_buf = btf_name_by_offset(btf, t->name_off); > > > + if (strcmp(name, name_buf)) > > > + break; > > > + low--; > > > + } > > > + *start = low; > > > + } > > > + > > > + if (end) { > > > + high = mid; > > > + while (high < high_end) { > > > + t = btf_type_by_id(btf, high+1); > > > + name_buf = btf_name_by_offset(btf, t->name_off); > > > + if (strcmp(name, name_buf)) > > > + break; > > > + high++; > > > + } > > > + *end = high; > > > } > > > > > > > this is an overcomplicated implementation, you need something like > > find_linfo() implementation in kernel/bpf/log.c. Note how much shorter > > and leaner it is. > > > > I also don't think you need to return `end`. Given you always start > > from start and linearly scan forward, you just need to make sure that > > you never go beyond the BTF type array, for which you can use > > btf_type_cnt(). So no need for doing this linear scan twice. > > Thank you, but the situation here is different. When > the btf file is sorted, the btf_types with a name are > placed at the beginning of the file, while those without > a name are placed at the end. Additionally, if there > are multiple btf_types with the same name in a btf file, > they will have different kinds, and these btf_types with > the same name will be grouped together. For example, in > the following case: > > ... > [13561] FUNC 'bp_constraints_unlock' type_id=105264 linkage=static > [13562] STRUCT 'bp_cpuinfo' size=20 vlen=2 > 'cpu_pinned' type_id=66670 bits_offset=0 > 'tsk_pinned' type_id=13568 bits_offset=32 > [13563] VAR 'bp_cpuinfo' type_id=103076, linkage=static > [13564] FUNC 'bp_init_aperfmperf' type_id=70013 linkage=static > [13565] STRUCT 'bp_patching_desc' size=16 vlen=3 > ... > > Both 13562 and 13563 have the name 'bp_cpuinfo', but their > kinds are different. Therefore, when using the btf_find_by_name_bsearch > function to find the btf_type named 'bp_cpuinfo', the start > parameter will be set to 11562 and the end parameter will > be set to 11563. We can then check their kind to obtain the > correct btf_type. I understand that, thank you. find_linfo() shows an example of binary search to find the rightmost item that's <= than requested search key. You have a similar problem here. You need to find the leftmost item that's equal to the search key. Both of these problems are solved by binary search without any extra post-processing and linear scans. > > > > > > + return mid; > > > +} > > > + > > > +s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind) > > > +{ > > > + const struct btf_type *t; > > > + const char *tname; > > > + int start, end; > > > + s32 id, total; > > > + > > > + do { > > > + if (btf->nr_types_sorted) { > > > + /* binary search */ > > > + id = btf_find_by_name_bsearch(btf, name, &start, &end); > > > + if (id > 0) { > > > + while (start <= end) { > > > + t = btf_type_by_id(btf, start); > > > + if (BTF_INFO_KIND(t->info) == kind) > > > + return start; > > > + start++; > > > + } > > > + } > > > + } else { > > > + /* linear search */ > > > + total = btf_type_cnt(btf); > > > + for (id = btf->base_btf ? btf->start_id : 1; > > > + id < total; id++) { > > > + t = btf_type_by_id(btf, id); > > > + if (BTF_INFO_KIND(t->info) != kind) > > > + continue; > > > + > > > + tname = btf_name_by_offset(btf, t->name_off); > > > + if (!strcmp(tname, name)) > > > + return id; > > > + } > > > + } > > > + btf = btf->base_btf; > > > + } while (btf); > > > + > > > return -ENOENT; > > > } > > > > > > @@ -6141,6 +6221,53 @@ int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_ty > > > return kctx_type_id; > > > } > > > > > > +static int btf_check_sort(struct btf *btf, int start_id) > > > +{ > > > + int i, n, nr_names = 0; > > > + > > > + n = btf_nr_types(btf); > > > + for (i = start_id; i < n; i++) { > > > + const struct btf_type *t; > > > + const char *name; > > > + > > > + t = btf_type_by_id(btf, i); > > > + if (!t) > > > + return -EINVAL; > > > + > > > + name = btf_str_by_offset(btf, t->name_off); > > > + if (!str_is_empty(name)) > > > + nr_names++; > > > + } > > > + > > > > this loop makes zero sense to me, what are you trying to achieve with > > it and why? > > As previously mentioned, if the btf file is sorted, the > btf_type with a name will be placed at the beginning of > the file in ascending order, while those without a name > will be placed at the end. Therefore, we can verify if > the btf file is sorted by following these steps: > > Step 1: Count the number of btf_types with a name and > store it as nr_names. > > Step 2: Inspect the first nr_names btf_types. If any of > the following cases occur, it indicates that the > btf file is not sorted: > 1. A btf_type without a name is encountered. > 2. The name of the current btf_type is greater than > the name of the next btf_type. This is convoluted and unnecessary. Just go over all items and validate that each pair maintains the sorting criteria. > > > > > > + for (i = 0; i < nr_names - 1; i++) { > > > > just start from start_id + 1, all the way to n, and check that sorting > > invariant holds for all items > > > > > + const struct btf_type *t1, *t2; > > > + const char *s1, *s2; > > > + > > > + t1 = btf_type_by_id(btf, start_id + i); > > > + if (!t1) > > > + return -EINVAL; > > > + > > > + s1 = btf_str_by_offset(btf, t1->name_off); > > > + if (str_is_empty(s1)) > > > + goto out; > > > + > > > + t2 = btf_type_by_id(btf, start_id + i + 1); > > > + if (!t2) > > > + return -EINVAL; > > > + > > > + s2 = btf_str_by_offset(btf, t2->name_off); > > > + if (str_is_empty(s2)) > > > + goto out; > > > + > > > + if (strcmp(s1, s2) > 0) > > > + goto out; > > > + } > > > + > > > + btf->nr_types_sorted = nr_names; > > > +out: > > > + return 0; > > > +} > > > > [...]
On Thu, Oct 31, 2024 at 1:33 AM Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote: > > On Wed, Oct 30, 2024 at 8:00 AM Donglin Peng <dolinux.peng@gmail.com> wrote: > > > > On Wed, Oct 30, 2024 at 6:13 AM Andrii Nakryiko > > <andrii.nakryiko@gmail.com> wrote: > > > > > > On Mon, Oct 28, 2024 at 5:22 PM Donglin Peng <dolinux.peng@gmail.com> wrote: > > > > > > > > Currently, we are only using the linear search method to find the type id > > > > by the name, which has a time complexity of O(n). This change involves > > > > sorting the names of btf types in ascending order and using binary search, > > > > which has a time complexity of O(log(n)). This idea was inspired by the > > > > following patch: > > > > > > > > 60443c88f3a8 ("kallsyms: Improve the performance of kallsyms_lookup_name()"). > > > > > > > > At present, this improvement is only for searching in vmlinux's and > > > > module's BTFs. > > > > > > > > Another change is the search direction, where we search the BTF first and > > > > then its base, the type id of the first matched btf_type will be returned. > > > > > > > > Here is a time-consuming result that finding 87590 type ids by their names in > > > > vmlinux's BTF. > > > > > > > > Before: 158426 ms > > > > After: 114 ms > > > > > > > > The average lookup performance has improved more than 1000x in the above scenario. > > > > > > > > Tested-by: Masami Hiramatsu (Google) <mhiramat@kernel.org> > > > > Signed-off-by: Donglin Peng <dolinux.peng@gmail.com> > > > > --- > > > > v4: > > > > - move the modification of libbpf to another patch > > > > > > > > v3: > > > > - Link: https://lore.kernel.org/all/20240608140835.965949-1-dolinux.peng@gmail.com/ > > > > - Sort btf_types during build process other than during boot, to reduce the > > > > overhead of memory and boot time. > > > > > > > > v2: > > > > - Link: https://lore.kernel.org/all/20230909091646.420163-1-pengdonglin@sangfor.com.cn > > > > --- > > > > include/linux/btf.h | 1 + > > > > kernel/bpf/btf.c | 157 ++++++++++++++++++++++++++++++++++++++++---- > > > > 2 files changed, 147 insertions(+), 11 deletions(-) > > > > > > > > diff --git a/include/linux/btf.h b/include/linux/btf.h > > > > index b8a583194c4a..64c35aaa22fa 100644 > > > > --- a/include/linux/btf.h > > > > +++ b/include/linux/btf.h > > > > @@ -216,6 +216,7 @@ bool btf_is_module(const struct btf *btf); > > > > bool btf_is_vmlinux(const struct btf *btf); > > > > struct module *btf_try_get_module(const struct btf *btf); > > > > u32 btf_nr_types(const struct btf *btf); > > > > +u32 btf_type_cnt(const struct btf *btf); > > > > struct btf *btf_base_btf(const struct btf *btf); > > > > bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, > > > > const struct btf_member *m, > > > > diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c > > > > index 5cd1c7a23848..6d0d58989640 100644 > > > > --- a/kernel/bpf/btf.c > > > > +++ b/kernel/bpf/btf.c > > > > @@ -262,6 +262,7 @@ struct btf { > > > > u32 data_size; > > > > refcount_t refcnt; > > > > u32 id; > > > > + u32 nr_types_sorted; > > > > struct rcu_head rcu; > > > > struct btf_kfunc_set_tab *kfunc_set_tab; > > > > struct btf_id_dtor_kfunc_tab *dtor_kfunc_tab; > > > > @@ -548,23 +549,102 @@ u32 btf_nr_types(const struct btf *btf) > > > > return total; > > > > } > > > > > > > > -s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind) > > > > +u32 btf_type_cnt(const struct btf *btf) > > > > +{ > > > > + return btf->start_id + btf->nr_types; > > > > +} > > > > + > > > > +static s32 btf_find_by_name_bsearch(const struct btf *btf, const char *name, > > > > + int *start, int *end) > > > > { > > > > const struct btf_type *t; > > > > - const char *tname; > > > > - u32 i, total; > > > > + const char *name_buf; > > > > + int low, low_start, mid, high, high_end; > > > > + int ret, start_id; > > > > + > > > > + start_id = btf->base_btf ? btf->start_id : 1; > > > > + low_start = low = start_id; > > > > + high_end = high = start_id + btf->nr_types_sorted - 1; > > > > + > > > > + while (low <= high) { > > > > + mid = low + (high - low) / 2; > > > > + t = btf_type_by_id(btf, mid); > > > > + name_buf = btf_name_by_offset(btf, t->name_off); > > > > + ret = strcmp(name, name_buf); > > > > + if (ret > 0) > > > > + low = mid + 1; > > > > + else if (ret < 0) > > > > + high = mid - 1; > > > > + else > > > > + break; > > > > + } > > > > > > > > - total = btf_nr_types(btf); > > > > - for (i = 1; i < total; i++) { > > > > - t = btf_type_by_id(btf, i); > > > > - if (BTF_INFO_KIND(t->info) != kind) > > > > - continue; > > > > + if (low > high) > > > > + return -ESRCH; > > > > > > > > - tname = btf_name_by_offset(btf, t->name_off); > > > > - if (!strcmp(tname, name)) > > > > - return i; > > > > + if (start) { > > > > + low = mid; > > > > + while (low > low_start) { > > > > + t = btf_type_by_id(btf, low-1); > > > > + name_buf = btf_name_by_offset(btf, t->name_off); > > > > + if (strcmp(name, name_buf)) > > > > + break; > > > > + low--; > > > > + } > > > > + *start = low; > > > > + } > > > > + > > > > + if (end) { > > > > + high = mid; > > > > + while (high < high_end) { > > > > + t = btf_type_by_id(btf, high+1); > > > > + name_buf = btf_name_by_offset(btf, t->name_off); > > > > + if (strcmp(name, name_buf)) > > > > + break; > > > > + high++; > > > > + } > > > > + *end = high; > > > > } > > > > > > > > > > this is an overcomplicated implementation, you need something like > > > find_linfo() implementation in kernel/bpf/log.c. Note how much shorter > > > and leaner it is. > > > > > > I also don't think you need to return `end`. Given you always start > > > from start and linearly scan forward, you just need to make sure that > > > you never go beyond the BTF type array, for which you can use > > > btf_type_cnt(). So no need for doing this linear scan twice. > > > > Thank you, but the situation here is different. When > > the btf file is sorted, the btf_types with a name are > > placed at the beginning of the file, while those without > > a name are placed at the end. Additionally, if there > > are multiple btf_types with the same name in a btf file, > > they will have different kinds, and these btf_types with > > the same name will be grouped together. For example, in > > the following case: > > > > ... > > [13561] FUNC 'bp_constraints_unlock' type_id=105264 linkage=static > > [13562] STRUCT 'bp_cpuinfo' size=20 vlen=2 > > 'cpu_pinned' type_id=66670 bits_offset=0 > > 'tsk_pinned' type_id=13568 bits_offset=32 > > [13563] VAR 'bp_cpuinfo' type_id=103076, linkage=static > > [13564] FUNC 'bp_init_aperfmperf' type_id=70013 linkage=static > > [13565] STRUCT 'bp_patching_desc' size=16 vlen=3 > > ... > > > > Both 13562 and 13563 have the name 'bp_cpuinfo', but their > > kinds are different. Therefore, when using the btf_find_by_name_bsearch > > function to find the btf_type named 'bp_cpuinfo', the start > > parameter will be set to 11562 and the end parameter will > > be set to 11563. We can then check their kind to obtain the > > correct btf_type. > > I understand that, thank you. find_linfo() shows an example of binary > search to find the rightmost item that's <= than requested search key. > You have a similar problem here. You need to find the leftmost item > that's equal to the search key. Thank you, I will modify it. > > Both of these problems are solved by binary search without any extra > post-processing and linear scans. Thank you, I get it. > > > > > > > > > > + return mid; > > > > +} > > > > + > > > > +s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind) > > > > +{ > > > > + const struct btf_type *t; > > > > + const char *tname; > > > > + int start, end; > > > > + s32 id, total; > > > > + > > > > + do { > > > > + if (btf->nr_types_sorted) { > > > > + /* binary search */ > > > > + id = btf_find_by_name_bsearch(btf, name, &start, &end); > > > > + if (id > 0) { > > > > + while (start <= end) { > > > > + t = btf_type_by_id(btf, start); > > > > + if (BTF_INFO_KIND(t->info) == kind) > > > > + return start; > > > > + start++; > > > > + } > > > > + } > > > > + } else { > > > > + /* linear search */ > > > > + total = btf_type_cnt(btf); > > > > + for (id = btf->base_btf ? btf->start_id : 1; > > > > + id < total; id++) { > > > > + t = btf_type_by_id(btf, id); > > > > + if (BTF_INFO_KIND(t->info) != kind) > > > > + continue; > > > > + > > > > + tname = btf_name_by_offset(btf, t->name_off); > > > > + if (!strcmp(tname, name)) > > > > + return id; > > > > + } > > > > + } > > > > + btf = btf->base_btf; > > > > + } while (btf); > > > > + > > > > return -ENOENT; > > > > } > > > > > > > > @@ -6141,6 +6221,53 @@ int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_ty > > > > return kctx_type_id; > > > > } > > > > > > > > +static int btf_check_sort(struct btf *btf, int start_id) > > > > +{ > > > > + int i, n, nr_names = 0; > > > > + > > > > + n = btf_nr_types(btf); > > > > + for (i = start_id; i < n; i++) { > > > > + const struct btf_type *t; > > > > + const char *name; > > > > + > > > > + t = btf_type_by_id(btf, i); > > > > + if (!t) > > > > + return -EINVAL; > > > > + > > > > + name = btf_str_by_offset(btf, t->name_off); > > > > + if (!str_is_empty(name)) > > > > + nr_names++; > > > > + } > > > > + > > > > > > this loop makes zero sense to me, what are you trying to achieve with > > > it and why? > > > > As previously mentioned, if the btf file is sorted, the > > btf_type with a name will be placed at the beginning of > > the file in ascending order, while those without a name > > will be placed at the end. Therefore, we can verify if > > the btf file is sorted by following these steps: > > > > Step 1: Count the number of btf_types with a name and > > store it as nr_names. > > > > Step 2: Inspect the first nr_names btf_types. If any of > > the following cases occur, it indicates that the > > btf file is not sorted: > > 1. A btf_type without a name is encountered. > > 2. The name of the current btf_type is greater than > > the name of the next btf_type. > > This is convoluted and unnecessary. Just go over all items and > validate that each pair maintains the sorting criteria. Thank you, I will modify it. > > > > > > > > > > + for (i = 0; i < nr_names - 1; i++) { > > > > > > just start from start_id + 1, all the way to n, and check that sorting > > > invariant holds for all items > > > > > > > + const struct btf_type *t1, *t2; > > > > + const char *s1, *s2; > > > > + > > > > + t1 = btf_type_by_id(btf, start_id + i); > > > > + if (!t1) > > > > + return -EINVAL; > > > > + > > > > + s1 = btf_str_by_offset(btf, t1->name_off); > > > > + if (str_is_empty(s1)) > > > > + goto out; > > > > + > > > > + t2 = btf_type_by_id(btf, start_id + i + 1); > > > > + if (!t2) > > > > + return -EINVAL; > > > > + > > > > + s2 = btf_str_by_offset(btf, t2->name_off); > > > > + if (str_is_empty(s2)) > > > > + goto out; > > > > + > > > > + if (strcmp(s1, s2) > 0) > > > > + goto out; > > > > + } > > > > + > > > > + btf->nr_types_sorted = nr_names; > > > > +out: > > > > + return 0; > > > > +} > > > > > > [...]
diff --git a/include/linux/btf.h b/include/linux/btf.h index b8a583194c4a..64c35aaa22fa 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -216,6 +216,7 @@ bool btf_is_module(const struct btf *btf); bool btf_is_vmlinux(const struct btf *btf); struct module *btf_try_get_module(const struct btf *btf); u32 btf_nr_types(const struct btf *btf); +u32 btf_type_cnt(const struct btf *btf); struct btf *btf_base_btf(const struct btf *btf); bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, const struct btf_member *m, diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 5cd1c7a23848..6d0d58989640 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -262,6 +262,7 @@ struct btf { u32 data_size; refcount_t refcnt; u32 id; + u32 nr_types_sorted; struct rcu_head rcu; struct btf_kfunc_set_tab *kfunc_set_tab; struct btf_id_dtor_kfunc_tab *dtor_kfunc_tab; @@ -548,23 +549,102 @@ u32 btf_nr_types(const struct btf *btf) return total; } -s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind) +u32 btf_type_cnt(const struct btf *btf) +{ + return btf->start_id + btf->nr_types; +} + +static s32 btf_find_by_name_bsearch(const struct btf *btf, const char *name, + int *start, int *end) { const struct btf_type *t; - const char *tname; - u32 i, total; + const char *name_buf; + int low, low_start, mid, high, high_end; + int ret, start_id; + + start_id = btf->base_btf ? btf->start_id : 1; + low_start = low = start_id; + high_end = high = start_id + btf->nr_types_sorted - 1; + + while (low <= high) { + mid = low + (high - low) / 2; + t = btf_type_by_id(btf, mid); + name_buf = btf_name_by_offset(btf, t->name_off); + ret = strcmp(name, name_buf); + if (ret > 0) + low = mid + 1; + else if (ret < 0) + high = mid - 1; + else + break; + } - total = btf_nr_types(btf); - for (i = 1; i < total; i++) { - t = btf_type_by_id(btf, i); - if (BTF_INFO_KIND(t->info) != kind) - continue; + if (low > high) + return -ESRCH; - tname = btf_name_by_offset(btf, t->name_off); - if (!strcmp(tname, name)) - return i; + if (start) { + low = mid; + while (low > low_start) { + t = btf_type_by_id(btf, low-1); + name_buf = btf_name_by_offset(btf, t->name_off); + if (strcmp(name, name_buf)) + break; + low--; + } + *start = low; + } + + if (end) { + high = mid; + while (high < high_end) { + t = btf_type_by_id(btf, high+1); + name_buf = btf_name_by_offset(btf, t->name_off); + if (strcmp(name, name_buf)) + break; + high++; + } + *end = high; } + return mid; +} + +s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind) +{ + const struct btf_type *t; + const char *tname; + int start, end; + s32 id, total; + + do { + if (btf->nr_types_sorted) { + /* binary search */ + id = btf_find_by_name_bsearch(btf, name, &start, &end); + if (id > 0) { + while (start <= end) { + t = btf_type_by_id(btf, start); + if (BTF_INFO_KIND(t->info) == kind) + return start; + start++; + } + } + } else { + /* linear search */ + total = btf_type_cnt(btf); + for (id = btf->base_btf ? btf->start_id : 1; + id < total; id++) { + t = btf_type_by_id(btf, id); + if (BTF_INFO_KIND(t->info) != kind) + continue; + + tname = btf_name_by_offset(btf, t->name_off); + if (!strcmp(tname, name)) + return id; + } + } + btf = btf->base_btf; + } while (btf); + return -ENOENT; } @@ -6141,6 +6221,53 @@ int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_ty return kctx_type_id; } +static int btf_check_sort(struct btf *btf, int start_id) +{ + int i, n, nr_names = 0; + + n = btf_nr_types(btf); + for (i = start_id; i < n; i++) { + const struct btf_type *t; + const char *name; + + t = btf_type_by_id(btf, i); + if (!t) + return -EINVAL; + + name = btf_str_by_offset(btf, t->name_off); + if (!str_is_empty(name)) + nr_names++; + } + + for (i = 0; i < nr_names - 1; i++) { + const struct btf_type *t1, *t2; + const char *s1, *s2; + + t1 = btf_type_by_id(btf, start_id + i); + if (!t1) + return -EINVAL; + + s1 = btf_str_by_offset(btf, t1->name_off); + if (str_is_empty(s1)) + goto out; + + t2 = btf_type_by_id(btf, start_id + i + 1); + if (!t2) + return -EINVAL; + + s2 = btf_str_by_offset(btf, t2->name_off); + if (str_is_empty(s2)) + goto out; + + if (strcmp(s1, s2) > 0) + goto out; + } + + btf->nr_types_sorted = nr_names; +out: + return 0; +} + BTF_ID_LIST(bpf_ctx_convert_btf_id) BTF_ID(struct, bpf_ctx_convert) @@ -6212,6 +6339,10 @@ struct btf *btf_parse_vmlinux(void) if (IS_ERR(btf)) goto err_out; + err = btf_check_sort(btf, 1); + if (err) + goto err_out; + /* btf_parse_vmlinux() runs under bpf_verifier_lock */ bpf_ctx_convert.t = btf_type_by_id(btf, bpf_ctx_convert_btf_id[0]); err = btf_alloc_id(btf); @@ -6315,6 +6446,10 @@ static struct btf *btf_parse_module(const char *module_name, const void *data, base_btf = vmlinux_btf; } + err = btf_check_sort(btf, btf_nr_types(base_btf)); + if (err) + goto errout; + btf_verifier_env_free(env); refcount_set(&btf->refcnt, 1); return btf;