Message ID | 912af13c205a31080cc61d40bec29d7402185d6e.1738059345.git.jerome.forissier@linaro.org |
---|---|
State | New |
Headers | show |
Series | Coroutines | expand |
On 1/28/25 11:19, Jerome Forissier wrote: > Adds the COROUTINES Kconfig symbol which introduces a new internal API > for coroutines support. As explained in the Kconfig file, this is meant > to provide some kind of cooperative multi-tasking with the goal to > improve performance by overlapping lengthy operations. > > The API as well as the implementation is very much inspired from libaco > [1]. The reference implementation is simplified to remove all things > not needed in U-Boot, the coding style is updated, and the aco_ prefix > is replaced by co_. > > I believe the stack handling could be simplified: the stack of the main Please use imperative mood. > coroutine could probably probably be used by the secondary coroutines > instead of allocating a new stack dynamically. > > Only i386, x86_64 and aarch64 are supported at the moment. Other > architectures need to provide a _co_switch() function in assembly. Likely should be updated. > > Only aarch64 has been tested. > > [1] https://github.com/hnes/libaco/ > > Signed-off-by: Jerome Forissier <jerome.forissier@linaro.org> > --- > arch/arm/cpu/armv8/Makefile | 1 + > arch/arm/cpu/armv8/co_switch.S | 36 +++++++ > include/coroutines.h | 130 ++++++++++++++++++++++++++ > lib/Kconfig | 10 ++ > lib/Makefile | 2 + > lib/coroutines.c | 165 +++++++++++++++++++++++++++++++++ Checkpatch is reporting some issues. Please fix them. total: 17 errors, 19 warnings, 1 checks, 359 lines checked > 6 files changed, 344 insertions(+) > create mode 100644 arch/arm/cpu/armv8/co_switch.S > create mode 100644 include/coroutines.h > create mode 100644 lib/coroutines.c > > diff --git a/arch/arm/cpu/armv8/Makefile b/arch/arm/cpu/armv8/Makefile > index 2e71ff2dc97..6d07b6aa9f9 100644 > --- a/arch/arm/cpu/armv8/Makefile > +++ b/arch/arm/cpu/armv8/Makefile > @@ -46,3 +46,4 @@ obj-$(CONFIG_TARGET_BCMNS3) += bcmns3/ > obj-$(CONFIG_XEN) += xen/ > obj-$(CONFIG_ARMV8_CE_SHA1) += sha1_ce_glue.o sha1_ce_core.o > obj-$(CONFIG_ARMV8_CE_SHA256) += sha256_ce_glue.o sha256_ce_core.o > +obj-$(CONFIG_COROUTINES) += co_switch.o > diff --git a/arch/arm/cpu/armv8/co_switch.S b/arch/arm/cpu/armv8/co_switch.S > new file mode 100644 > index 00000000000..4405e89ec56 > --- /dev/null > +++ b/arch/arm/cpu/armv8/co_switch.S > @@ -0,0 +1,36 @@ > +/* SPDX-License-Identifier: GPL-2.0+ */ > +/* void _co_switch(struct uco *from_co, struct uco *to_co); */ > +.text > +.globl _co_switch > +.type _co_switch, @function > +_co_switch: > + // x0: from_co > + // x1: to_co > + // from_co and to_co layout: { pc, sp, x19-x29 } > + > + // Save context to from_co (x0) > + // AAPCS64 says "A subroutine invocation must preserve the contents of the > + // registers r19-r29 and SP" > + adr x2, 1f // pc we should use to resume after this function > + mov x3, sp > + stp x2, x3, [x0, #0] // pc, sp > + stp x19, x20, [x0, #16] > + stp x21, x22, [x0, #32] > + stp x23, x24, [x0, #48] > + stp x25, x26, [x0, #64] > + stp x27, x28, [x0, #80] > + stp x29, x30, [x0, #96] > + > + // Load new context from to_co (x1) > + ldp x2, x3, [x1, #0] // pc, sp > + ldp x19, x20, [x1, #16] > + ldp x21, x22, [x1, #32] > + ldp x23, x24, [x1, #48] > + ldp x25, x26, [x1, #64] > + ldp x27, x28, [x1, #80] > + ldp x29, x30, [x1, #96] > + mov sp, x3 > + br x2 > + > +1: // Return to the caller > + ret > diff --git a/include/coroutines.h b/include/coroutines.h > new file mode 100644 > index 00000000000..b85b656127c > --- /dev/null > +++ b/include/coroutines.h > @@ -0,0 +1,130 @@ > +/* SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later */ > +/* > + * Copyright 2018 Sen Han <00hnes@gmail.com> > + * Copyright 2025 Linaro Limited > + */ > + > +#ifndef _COROUTINES_H_ > +#define _COROUTINES_H_ > + > +#ifndef CONFIG_COROUTINES > + > +static inline void co_yield(void) {} > +static inline void co_exit(void) {} > + > +#else > + > +#ifdef __UBOOT__ > +#include <log.h> > +#else > +#include <assert.h> > +#endif Do we need ifdef at all? Are you testing it out of u-boot too? Or is this just untested code? > +#include <limits.h> > +#include <stdbool.h> > +#include <stdint.h> > +#include <stdio.h> > +#include <stdlib.h> > +#include <string.h> > +#include <time.h> > + > +#ifdef __aarch64__ > +#define CO_REG_IDX_RETADDR 0 > +#define CO_REG_IDX_SP 1 > +#else > +#error Architecture no supported > +#endif > + > +struct co_save_stack { > + void* ptr; > + size_t sz; > + size_t valid_sz; > + size_t max_cpsz; /* max copy size in bytes */ > +}; > + > +struct co_stack { > + void *ptr; > + size_t sz; > + void *align_highptr; > + void *align_retptr; > + size_t align_validsz; > + size_t align_limit; > + struct co *owner; > + void *real_ptr; > + size_t real_sz; > +}; > + > +struct co { > + /* CPU state: callee-saved registers plus SP and PC */ > + void *reg[14]; // pc, sp, x19-x29, x30 (lr) > + > + struct co *main_co; > + void *arg; > + bool done; > + > + void (*fp)(void); > + > + struct co_save_stack save_stack; > + struct co_stack *stack; > +}; > + > +extern struct co *current_co; > + > +static inline struct co *co_get_co(void) > +{ > + return current_co; > +} > + > +static inline void *co_get_arg(void) > +{ > + return co_get_co()->arg; > +} > + > +struct co_stack *co_stack_new(size_t sz); > + > +void co_stack_destroy(struct co_stack *s); > + > +struct co *co_create(struct co *main_co, > + struct co_stack *stack, > + size_t save_stack_sz, void (*fp)(void), > + void *arg); > + > +void co_resume(struct co *resume_co); > + > +void co_destroy(struct co *co); > + > +void *_co_switch(struct co *from_co, struct co *to_co); > + > +static inline void _co_yield_to_main_co(struct co *yield_co) > +{ > + assert(yield_co); > + assert(yield_co->main_co); > + _co_switch(yield_co, yield_co->main_co); > +} > + > +static inline void co_yield(void) > +{ > + if (current_co) > + _co_yield_to_main_co(current_co); > +} > + > +static inline bool co_is_main_co(struct co *co) > +{ > + return !co->main_co; > +} > + > +static inline void co_exit(void) > +{ > + struct co *co = co_get_co(); > + > + if (!co) > + return; > + co->done = true; > + assert(co->stack->owner == co); > + co->stack->owner = NULL; > + co->stack->align_validsz = 0; > + _co_yield_to_main_co(co); > + assert(false); > +} > + > +#endif /* CONFIG_COROUTINES */ > +#endif /* _COROUTINES_H_ */ > diff --git a/lib/Kconfig b/lib/Kconfig > index 8f1a96d98c4..b6c1380b927 100644 > --- a/lib/Kconfig > +++ b/lib/Kconfig > @@ -1226,6 +1226,16 @@ config PHANDLE_CHECK_SEQ > enable this config option to distinguish them using > phandles in fdtdec_get_alias_seq() function. > > +config COROUTINES > + bool "Enable coroutine support" > + help > + Coroutines allow to implement a simple form of cooperative > + multi-tasking. The main thread of execution registers one or > + more functions as coroutine entry points, then it schedules one > + of them. At any point the scheduled coroutine may yield, that is, > + suspend its execution and return back to the main thread. At this > + point another coroutine may be scheduled and so on until all the > + registered coroutines are done. newline > endmenu > > source "lib/fwu_updates/Kconfig" > diff --git a/lib/Makefile b/lib/Makefile > index 5cb3278d2ef..7b809151f5a 100644 > --- a/lib/Makefile > +++ b/lib/Makefile > @@ -159,6 +159,8 @@ obj-$(CONFIG_LIB_ELF) += elf.o > > obj-$(CONFIG_$(PHASE_)SEMIHOSTING) += semihosting.o > > +obj-$(CONFIG_COROUTINES) += coroutines.o > + > # > # Build a fast OID lookup registry from include/linux/oid_registry.h > # > diff --git a/lib/coroutines.c b/lib/coroutines.c > new file mode 100644 > index 00000000000..20c5aba5510 > --- /dev/null > +++ b/lib/coroutines.c > @@ -0,0 +1,165 @@ > +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later > + > +// Copyright 2018 Sen Han <00hnes@gmail.com> > +// Copyright 2025 Linaro Limited > + > +#include <coroutines.h> > +#include <stdio.h> > +#include <stdint.h> > + > + > +/* Current co-routine */ > +struct co *current_co; > + > +struct co_stack *co_stack_new(size_t sz) > +{ > + struct co_stack *p = calloc(1, sizeof(*p)); > + uintptr_t u_p; > + > + if (!p) > + return NULL; > + > + if (sz < 4096) > + sz = 4096; > + > + p->sz = sz; > + p->ptr = malloc(sz); > + if (!p->ptr) { > + free(p); > + return NULL; > + } > + > + p->owner = NULL; > + u_p = (uintptr_t)(p->sz - (sizeof(void*) << 1) + (uintptr_t)p->ptr); > + u_p = (u_p >> 4) << 4; > + p->align_highptr = (void*)u_p; > + p->align_retptr = (void*)(u_p - sizeof(void*)); > + assert(p->sz > (16 + (sizeof(void*) << 1) + sizeof(void*))); > + p->align_limit = p->sz - 16 - (sizeof(void*) << 1); > + > + return p; > +} > + > +void co_stack_destroy(struct co_stack *s){ > + if (!s) > + return; > + free(s->ptr); > + free(s); > +} > + > +struct co *co_create(struct co *main_co, > + struct co_stack *stack, > + size_t save_stack_sz, > + void (*fp)(void), void *arg) > +{ > + struct co *p = malloc(sizeof(*p)); > + assert(p); > + memset(p, 0, sizeof(*p)); > + > + if (main_co) { > + assert(stack); > + p->stack = stack; > + p->reg[CO_REG_IDX_RETADDR] = (void *)fp; > + // FIXME original code uses align_retptr; causes a crash > + p->reg[CO_REG_IDX_SP] = p->stack->align_highptr; > + p->main_co = main_co; > + p->arg = arg; > + p->fp = fp; > + if (!save_stack_sz) > + save_stack_sz = 64; > + p->save_stack.ptr = malloc(save_stack_sz); > + assert(p->save_stack.ptr); > + p->save_stack.sz = save_stack_sz; > + p->save_stack.valid_sz = 0; > + } else { > + p->main_co = NULL; > + p->arg = arg; > + p->fp = fp; > + p->stack = NULL; > + p->save_stack.ptr = NULL; > + } > + return p; > +} > + > +static void grab_stack(struct co *resume_co) > +{ > + struct co *owner_co = resume_co->stack->owner; > + > + if (owner_co) { > + assert(owner_co->stack == resume_co->stack); > + assert((uintptr_t)(owner_co->stack->align_retptr) >= > + (uintptr_t)(owner_co->reg[CO_REG_IDX_SP])); > + assert((uintptr_t)owner_co->stack->align_highptr - > + (uintptr_t)owner_co->stack->align_limit > + <= (uintptr_t)owner_co->reg[CO_REG_IDX_SP]); > + owner_co->save_stack.valid_sz = > + (uintptr_t)owner_co->stack->align_retptr - > + (uintptr_t)owner_co->reg[CO_REG_IDX_SP]; > + if (owner_co->save_stack.sz < owner_co->save_stack.valid_sz) { > + free(owner_co->save_stack.ptr); > + owner_co->save_stack.ptr = NULL; > + do { > + owner_co->save_stack.sz <<= 1; > + assert(owner_co->save_stack.sz > 0); > + } while (owner_co->save_stack.sz < > + owner_co->save_stack.valid_sz); > + owner_co->save_stack.ptr = > + malloc(owner_co->save_stack.sz); > + assert(owner_co->save_stack.ptr); > + } > + if (owner_co->save_stack.valid_sz > 0) > + memcpy(owner_co->save_stack.ptr, > + owner_co->reg[CO_REG_IDX_SP], > + owner_co->save_stack.valid_sz); > + if (owner_co->save_stack.valid_sz > > + owner_co->save_stack.max_cpsz) > + owner_co->save_stack.max_cpsz = > + owner_co->save_stack.valid_sz; > + owner_co->stack->owner = NULL; > + owner_co->stack->align_validsz = 0; > + } > + assert(!resume_co->stack->owner); > + assert(resume_co->save_stack.valid_sz <= > + resume_co->stack->align_limit - sizeof(void *)); > + if (resume_co->save_stack.valid_sz > 0) > + memcpy((void*) > + (uintptr_t)(resume_co->stack->align_retptr) - > + resume_co->save_stack.valid_sz, > + resume_co->save_stack.ptr, > + resume_co->save_stack.valid_sz); > + if (resume_co->save_stack.valid_sz > resume_co->save_stack.max_cpsz) > + resume_co->save_stack.max_cpsz = resume_co->save_stack.valid_sz; > + resume_co->stack->align_validsz = > + resume_co->save_stack.valid_sz + sizeof(void *); > + resume_co->stack->owner = resume_co; > +} > + > +void co_resume(struct co *resume_co) > +{ > + assert(resume_co && resume_co->main_co && !resume_co->done); > + > + if (resume_co->stack->owner != resume_co) > + grab_stack(resume_co); > + > + current_co = resume_co; > + _co_switch(resume_co->main_co, resume_co); > + current_co = resume_co->main_co; > +} > + > +void co_destroy(struct co *co){ obviously this is not u-boot coding style. I expect checkpatch reports this too. > + if (!co) > + return; > + > + if(co_is_main_co(co)){ > + free(co); > + current_co = NULL; > + } else { > + if(co->stack->owner == co){ > + co->stack->owner = NULL; > + co->stack->align_validsz = 0; > + } > + free(co->save_stack.ptr); > + co->save_stack.ptr = NULL; > + free(co); > + } > +} Thanks, Michal
On 1/28/25 12:09, Michal Simek wrote: > > > On 1/28/25 11:19, Jerome Forissier wrote: >> Adds the COROUTINES Kconfig symbol which introduces a new internal API >> for coroutines support. As explained in the Kconfig file, this is meant >> to provide some kind of cooperative multi-tasking with the goal to >> improve performance by overlapping lengthy operations. >> >> The API as well as the implementation is very much inspired from libaco >> [1]. The reference implementation is simplified to remove all things >> not needed in U-Boot, the coding style is updated, and the aco_ prefix >> is replaced by co_. >> >> I believe the stack handling could be simplified: the stack of the main > > Please use imperative mood. OK. I will drop the remark about stack handling simplification because it is not very helpful here. >> coroutine could probably probably be used by the secondary coroutines >> instead of allocating a new stack dynamically. >> >> Only i386, x86_64 and aarch64 are supported at the moment. Other >> architectures need to provide a _co_switch() function in assembly. > > Likely should be updated. OK > >> >> Only aarch64 has been tested. >> >> [1] https://github.com/hnes/libaco/ >> >> Signed-off-by: Jerome Forissier <jerome.forissier@linaro.org> >> --- >> arch/arm/cpu/armv8/Makefile | 1 + >> arch/arm/cpu/armv8/co_switch.S | 36 +++++++ >> include/coroutines.h | 130 ++++++++++++++++++++++++++ >> lib/Kconfig | 10 ++ >> lib/Makefile | 2 + >> lib/coroutines.c | 165 +++++++++++++++++++++++++++++++++ > > Checkpatch is reporting some issues. Please fix them. > > total: 17 errors, 19 warnings, 1 checks, 359 lines checked All the checkpatch issues will be fixed in v3. >> 6 files changed, 344 insertions(+) >> create mode 100644 arch/arm/cpu/armv8/co_switch.S >> create mode 100644 include/coroutines.h >> create mode 100644 lib/coroutines.c >> >> diff --git a/arch/arm/cpu/armv8/Makefile b/arch/arm/cpu/armv8/Makefile >> index 2e71ff2dc97..6d07b6aa9f9 100644 >> --- a/arch/arm/cpu/armv8/Makefile >> +++ b/arch/arm/cpu/armv8/Makefile >> @@ -46,3 +46,4 @@ obj-$(CONFIG_TARGET_BCMNS3) += bcmns3/ >> obj-$(CONFIG_XEN) += xen/ >> obj-$(CONFIG_ARMV8_CE_SHA1) += sha1_ce_glue.o sha1_ce_core.o >> obj-$(CONFIG_ARMV8_CE_SHA256) += sha256_ce_glue.o sha256_ce_core.o >> +obj-$(CONFIG_COROUTINES) += co_switch.o >> diff --git a/arch/arm/cpu/armv8/co_switch.S b/arch/arm/cpu/armv8/co_switch.S >> new file mode 100644 >> index 00000000000..4405e89ec56 >> --- /dev/null >> +++ b/arch/arm/cpu/armv8/co_switch.S >> @@ -0,0 +1,36 @@ >> +/* SPDX-License-Identifier: GPL-2.0+ */ >> +/* void _co_switch(struct uco *from_co, struct uco *to_co); */ >> +.text >> +.globl _co_switch >> +.type _co_switch, @function >> +_co_switch: >> + // x0: from_co >> + // x1: to_co >> + // from_co and to_co layout: { pc, sp, x19-x29 } >> + >> + // Save context to from_co (x0) >> + // AAPCS64 says "A subroutine invocation must preserve the contents of the >> + // registers r19-r29 and SP" >> + adr x2, 1f // pc we should use to resume after this function >> + mov x3, sp >> + stp x2, x3, [x0, #0] // pc, sp >> + stp x19, x20, [x0, #16] >> + stp x21, x22, [x0, #32] >> + stp x23, x24, [x0, #48] >> + stp x25, x26, [x0, #64] >> + stp x27, x28, [x0, #80] >> + stp x29, x30, [x0, #96] >> + >> + // Load new context from to_co (x1) >> + ldp x2, x3, [x1, #0] // pc, sp >> + ldp x19, x20, [x1, #16] >> + ldp x21, x22, [x1, #32] >> + ldp x23, x24, [x1, #48] >> + ldp x25, x26, [x1, #64] >> + ldp x27, x28, [x1, #80] >> + ldp x29, x30, [x1, #96] >> + mov sp, x3 >> + br x2 >> + >> +1: // Return to the caller >> + ret >> diff --git a/include/coroutines.h b/include/coroutines.h >> new file mode 100644 >> index 00000000000..b85b656127c >> --- /dev/null >> +++ b/include/coroutines.h >> @@ -0,0 +1,130 @@ >> +/* SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later */ >> +/* >> + * Copyright 2018 Sen Han <00hnes@gmail.com> >> + * Copyright 2025 Linaro Limited >> + */ >> + >> +#ifndef _COROUTINES_H_ >> +#define _COROUTINES_H_ >> + >> +#ifndef CONFIG_COROUTINES >> + >> +static inline void co_yield(void) {} >> +static inline void co_exit(void) {} >> + >> +#else >> + >> +#ifdef __UBOOT__ >> +#include <log.h> >> +#else >> +#include <assert.h> >> +#endif > > Do we need ifdef at all? Are you testing it out of u-boot too? Or is this just untested code? It is a leftover from when I used to test outside U-Boot (in a regular Linux application) but that is not useful anymore. I'll remove the conditional. >> +#include <limits.h> >> +#include <stdbool.h> >> +#include <stdint.h> >> +#include <stdio.h> >> +#include <stdlib.h> >> +#include <string.h> >> +#include <time.h> >> + >> +#ifdef __aarch64__ >> +#define CO_REG_IDX_RETADDR 0 >> +#define CO_REG_IDX_SP 1 >> +#else >> +#error Architecture no supported >> +#endif >> + >> +struct co_save_stack { >> + void* ptr; >> + size_t sz; >> + size_t valid_sz; >> + size_t max_cpsz; /* max copy size in bytes */ >> +}; >> + >> +struct co_stack { >> + void *ptr; >> + size_t sz; >> + void *align_highptr; >> + void *align_retptr; >> + size_t align_validsz; >> + size_t align_limit; >> + struct co *owner; >> + void *real_ptr; >> + size_t real_sz; >> +}; >> + >> +struct co { >> + /* CPU state: callee-saved registers plus SP and PC */ >> + void *reg[14]; // pc, sp, x19-x29, x30 (lr) >> + >> + struct co *main_co; >> + void *arg; >> + bool done; >> + >> + void (*fp)(void); >> + >> + struct co_save_stack save_stack; >> + struct co_stack *stack; >> +}; >> + >> +extern struct co *current_co; >> + >> +static inline struct co *co_get_co(void) >> +{ >> + return current_co; >> +} >> + >> +static inline void *co_get_arg(void) >> +{ >> + return co_get_co()->arg; >> +} >> + >> +struct co_stack *co_stack_new(size_t sz); >> + >> +void co_stack_destroy(struct co_stack *s); >> + >> +struct co *co_create(struct co *main_co, >> + struct co_stack *stack, >> + size_t save_stack_sz, void (*fp)(void), >> + void *arg); >> + >> +void co_resume(struct co *resume_co); >> + >> +void co_destroy(struct co *co); >> + >> +void *_co_switch(struct co *from_co, struct co *to_co); >> + >> +static inline void _co_yield_to_main_co(struct co *yield_co) >> +{ >> + assert(yield_co); >> + assert(yield_co->main_co); >> + _co_switch(yield_co, yield_co->main_co); >> +} >> + >> +static inline void co_yield(void) >> +{ >> + if (current_co) >> + _co_yield_to_main_co(current_co); >> +} >> + >> +static inline bool co_is_main_co(struct co *co) >> +{ >> + return !co->main_co; >> +} >> + >> +static inline void co_exit(void) >> +{ >> + struct co *co = co_get_co(); >> + >> + if (!co) >> + return; >> + co->done = true; >> + assert(co->stack->owner == co); >> + co->stack->owner = NULL; >> + co->stack->align_validsz = 0; >> + _co_yield_to_main_co(co); >> + assert(false); >> +} >> + >> +#endif /* CONFIG_COROUTINES */ >> +#endif /* _COROUTINES_H_ */ >> diff --git a/lib/Kconfig b/lib/Kconfig >> index 8f1a96d98c4..b6c1380b927 100644 >> --- a/lib/Kconfig >> +++ b/lib/Kconfig >> @@ -1226,6 +1226,16 @@ config PHANDLE_CHECK_SEQ >> enable this config option to distinguish them using >> phandles in fdtdec_get_alias_seq() function. >> +config COROUTINES >> + bool "Enable coroutine support" >> + help >> + Coroutines allow to implement a simple form of cooperative >> + multi-tasking. The main thread of execution registers one or >> + more functions as coroutine entry points, then it schedules one >> + of them. At any point the scheduled coroutine may yield, that is, >> + suspend its execution and return back to the main thread. At this >> + point another coroutine may be scheduled and so on until all the >> + registered coroutines are done. > > newline > >> endmenu >> source "lib/fwu_updates/Kconfig" >> diff --git a/lib/Makefile b/lib/Makefile >> index 5cb3278d2ef..7b809151f5a 100644 >> --- a/lib/Makefile >> +++ b/lib/Makefile >> @@ -159,6 +159,8 @@ obj-$(CONFIG_LIB_ELF) += elf.o >> obj-$(CONFIG_$(PHASE_)SEMIHOSTING) += semihosting.o >> +obj-$(CONFIG_COROUTINES) += coroutines.o >> + >> # >> # Build a fast OID lookup registry from include/linux/oid_registry.h >> # >> diff --git a/lib/coroutines.c b/lib/coroutines.c >> new file mode 100644 >> index 00000000000..20c5aba5510 >> --- /dev/null >> +++ b/lib/coroutines.c >> @@ -0,0 +1,165 @@ >> +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later >> + >> +// Copyright 2018 Sen Han <00hnes@gmail.com> >> +// Copyright 2025 Linaro Limited >> + >> +#include <coroutines.h> >> +#include <stdio.h> >> +#include <stdint.h> >> + >> + >> +/* Current co-routine */ >> +struct co *current_co; >> + >> +struct co_stack *co_stack_new(size_t sz) >> +{ >> + struct co_stack *p = calloc(1, sizeof(*p)); >> + uintptr_t u_p; >> + >> + if (!p) >> + return NULL; >> + >> + if (sz < 4096) >> + sz = 4096; >> + >> + p->sz = sz; >> + p->ptr = malloc(sz); >> + if (!p->ptr) { >> + free(p); >> + return NULL; >> + } >> + >> + p->owner = NULL; >> + u_p = (uintptr_t)(p->sz - (sizeof(void*) << 1) + (uintptr_t)p->ptr); >> + u_p = (u_p >> 4) << 4; >> + p->align_highptr = (void*)u_p; >> + p->align_retptr = (void*)(u_p - sizeof(void*)); >> + assert(p->sz > (16 + (sizeof(void*) << 1) + sizeof(void*))); >> + p->align_limit = p->sz - 16 - (sizeof(void*) << 1); >> + >> + return p; >> +} >> + >> +void co_stack_destroy(struct co_stack *s){ >> + if (!s) >> + return; >> + free(s->ptr); >> + free(s); >> +} >> + >> +struct co *co_create(struct co *main_co, >> + struct co_stack *stack, >> + size_t save_stack_sz, >> + void (*fp)(void), void *arg) >> +{ >> + struct co *p = malloc(sizeof(*p)); >> + assert(p); >> + memset(p, 0, sizeof(*p)); >> + >> + if (main_co) { >> + assert(stack); >> + p->stack = stack; >> + p->reg[CO_REG_IDX_RETADDR] = (void *)fp; >> + // FIXME original code uses align_retptr; causes a crash >> + p->reg[CO_REG_IDX_SP] = p->stack->align_highptr; >> + p->main_co = main_co; >> + p->arg = arg; >> + p->fp = fp; >> + if (!save_stack_sz) >> + save_stack_sz = 64; >> + p->save_stack.ptr = malloc(save_stack_sz); >> + assert(p->save_stack.ptr); >> + p->save_stack.sz = save_stack_sz; >> + p->save_stack.valid_sz = 0; >> + } else { >> + p->main_co = NULL; >> + p->arg = arg; >> + p->fp = fp; >> + p->stack = NULL; >> + p->save_stack.ptr = NULL; >> + } >> + return p; >> +} >> + >> +static void grab_stack(struct co *resume_co) >> +{ >> + struct co *owner_co = resume_co->stack->owner; >> + >> + if (owner_co) { >> + assert(owner_co->stack == resume_co->stack); >> + assert((uintptr_t)(owner_co->stack->align_retptr) >= >> + (uintptr_t)(owner_co->reg[CO_REG_IDX_SP])); >> + assert((uintptr_t)owner_co->stack->align_highptr - >> + (uintptr_t)owner_co->stack->align_limit >> + <= (uintptr_t)owner_co->reg[CO_REG_IDX_SP]); >> + owner_co->save_stack.valid_sz = >> + (uintptr_t)owner_co->stack->align_retptr - >> + (uintptr_t)owner_co->reg[CO_REG_IDX_SP]; >> + if (owner_co->save_stack.sz < owner_co->save_stack.valid_sz) { >> + free(owner_co->save_stack.ptr); >> + owner_co->save_stack.ptr = NULL; >> + do { >> + owner_co->save_stack.sz <<= 1; >> + assert(owner_co->save_stack.sz > 0); >> + } while (owner_co->save_stack.sz < >> + owner_co->save_stack.valid_sz); >> + owner_co->save_stack.ptr = >> + malloc(owner_co->save_stack.sz); >> + assert(owner_co->save_stack.ptr); >> + } >> + if (owner_co->save_stack.valid_sz > 0) >> + memcpy(owner_co->save_stack.ptr, >> + owner_co->reg[CO_REG_IDX_SP], >> + owner_co->save_stack.valid_sz); >> + if (owner_co->save_stack.valid_sz > >> + owner_co->save_stack.max_cpsz) >> + owner_co->save_stack.max_cpsz = >> + owner_co->save_stack.valid_sz; >> + owner_co->stack->owner = NULL; >> + owner_co->stack->align_validsz = 0; >> + } >> + assert(!resume_co->stack->owner); >> + assert(resume_co->save_stack.valid_sz <= >> + resume_co->stack->align_limit - sizeof(void *)); >> + if (resume_co->save_stack.valid_sz > 0) >> + memcpy((void*) >> + (uintptr_t)(resume_co->stack->align_retptr) - >> + resume_co->save_stack.valid_sz, >> + resume_co->save_stack.ptr, >> + resume_co->save_stack.valid_sz); >> + if (resume_co->save_stack.valid_sz > resume_co->save_stack.max_cpsz) >> + resume_co->save_stack.max_cpsz = resume_co->save_stack.valid_sz; >> + resume_co->stack->align_validsz = >> + resume_co->save_stack.valid_sz + sizeof(void *); >> + resume_co->stack->owner = resume_co; >> +} >> + >> +void co_resume(struct co *resume_co) >> +{ >> + assert(resume_co && resume_co->main_co && !resume_co->done); >> + >> + if (resume_co->stack->owner != resume_co) >> + grab_stack(resume_co); >> + >> + current_co = resume_co; >> + _co_switch(resume_co->main_co, resume_co); >> + current_co = resume_co->main_co; >> +} >> + >> +void co_destroy(struct co *co){ > > obviously this is not u-boot coding style. I expect checkpatch reports this too. Yep. Addressed in v3. >> + if (!co) >> + return; >> + >> + if(co_is_main_co(co)){ >> + free(co); >> + current_co = NULL; >> + } else { >> + if(co->stack->owner == co){ >> + co->stack->owner = NULL; >> + co->stack->align_validsz = 0; >> + } >> + free(co->save_stack.ptr); >> + co->save_stack.ptr = NULL; >> + free(co); >> + } >> +} > > > Thanks, > Michal Thanks for the review.
diff --git a/arch/arm/cpu/armv8/Makefile b/arch/arm/cpu/armv8/Makefile index 2e71ff2dc97..6d07b6aa9f9 100644 --- a/arch/arm/cpu/armv8/Makefile +++ b/arch/arm/cpu/armv8/Makefile @@ -46,3 +46,4 @@ obj-$(CONFIG_TARGET_BCMNS3) += bcmns3/ obj-$(CONFIG_XEN) += xen/ obj-$(CONFIG_ARMV8_CE_SHA1) += sha1_ce_glue.o sha1_ce_core.o obj-$(CONFIG_ARMV8_CE_SHA256) += sha256_ce_glue.o sha256_ce_core.o +obj-$(CONFIG_COROUTINES) += co_switch.o diff --git a/arch/arm/cpu/armv8/co_switch.S b/arch/arm/cpu/armv8/co_switch.S new file mode 100644 index 00000000000..4405e89ec56 --- /dev/null +++ b/arch/arm/cpu/armv8/co_switch.S @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* void _co_switch(struct uco *from_co, struct uco *to_co); */ +.text +.globl _co_switch +.type _co_switch, @function +_co_switch: + // x0: from_co + // x1: to_co + // from_co and to_co layout: { pc, sp, x19-x29 } + + // Save context to from_co (x0) + // AAPCS64 says "A subroutine invocation must preserve the contents of the + // registers r19-r29 and SP" + adr x2, 1f // pc we should use to resume after this function + mov x3, sp + stp x2, x3, [x0, #0] // pc, sp + stp x19, x20, [x0, #16] + stp x21, x22, [x0, #32] + stp x23, x24, [x0, #48] + stp x25, x26, [x0, #64] + stp x27, x28, [x0, #80] + stp x29, x30, [x0, #96] + + // Load new context from to_co (x1) + ldp x2, x3, [x1, #0] // pc, sp + ldp x19, x20, [x1, #16] + ldp x21, x22, [x1, #32] + ldp x23, x24, [x1, #48] + ldp x25, x26, [x1, #64] + ldp x27, x28, [x1, #80] + ldp x29, x30, [x1, #96] + mov sp, x3 + br x2 + +1: // Return to the caller + ret diff --git a/include/coroutines.h b/include/coroutines.h new file mode 100644 index 00000000000..b85b656127c --- /dev/null +++ b/include/coroutines.h @@ -0,0 +1,130 @@ +/* SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later */ +/* + * Copyright 2018 Sen Han <00hnes@gmail.com> + * Copyright 2025 Linaro Limited + */ + +#ifndef _COROUTINES_H_ +#define _COROUTINES_H_ + +#ifndef CONFIG_COROUTINES + +static inline void co_yield(void) {} +static inline void co_exit(void) {} + +#else + +#ifdef __UBOOT__ +#include <log.h> +#else +#include <assert.h> +#endif +#include <limits.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#ifdef __aarch64__ +#define CO_REG_IDX_RETADDR 0 +#define CO_REG_IDX_SP 1 +#else +#error Architecture no supported +#endif + +struct co_save_stack { + void* ptr; + size_t sz; + size_t valid_sz; + size_t max_cpsz; /* max copy size in bytes */ +}; + +struct co_stack { + void *ptr; + size_t sz; + void *align_highptr; + void *align_retptr; + size_t align_validsz; + size_t align_limit; + struct co *owner; + void *real_ptr; + size_t real_sz; +}; + +struct co { + /* CPU state: callee-saved registers plus SP and PC */ + void *reg[14]; // pc, sp, x19-x29, x30 (lr) + + struct co *main_co; + void *arg; + bool done; + + void (*fp)(void); + + struct co_save_stack save_stack; + struct co_stack *stack; +}; + +extern struct co *current_co; + +static inline struct co *co_get_co(void) +{ + return current_co; +} + +static inline void *co_get_arg(void) +{ + return co_get_co()->arg; +} + +struct co_stack *co_stack_new(size_t sz); + +void co_stack_destroy(struct co_stack *s); + +struct co *co_create(struct co *main_co, + struct co_stack *stack, + size_t save_stack_sz, void (*fp)(void), + void *arg); + +void co_resume(struct co *resume_co); + +void co_destroy(struct co *co); + +void *_co_switch(struct co *from_co, struct co *to_co); + +static inline void _co_yield_to_main_co(struct co *yield_co) +{ + assert(yield_co); + assert(yield_co->main_co); + _co_switch(yield_co, yield_co->main_co); +} + +static inline void co_yield(void) +{ + if (current_co) + _co_yield_to_main_co(current_co); +} + +static inline bool co_is_main_co(struct co *co) +{ + return !co->main_co; +} + +static inline void co_exit(void) +{ + struct co *co = co_get_co(); + + if (!co) + return; + co->done = true; + assert(co->stack->owner == co); + co->stack->owner = NULL; + co->stack->align_validsz = 0; + _co_yield_to_main_co(co); + assert(false); +} + +#endif /* CONFIG_COROUTINES */ +#endif /* _COROUTINES_H_ */ diff --git a/lib/Kconfig b/lib/Kconfig index 8f1a96d98c4..b6c1380b927 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -1226,6 +1226,16 @@ config PHANDLE_CHECK_SEQ enable this config option to distinguish them using phandles in fdtdec_get_alias_seq() function. +config COROUTINES + bool "Enable coroutine support" + help + Coroutines allow to implement a simple form of cooperative + multi-tasking. The main thread of execution registers one or + more functions as coroutine entry points, then it schedules one + of them. At any point the scheduled coroutine may yield, that is, + suspend its execution and return back to the main thread. At this + point another coroutine may be scheduled and so on until all the + registered coroutines are done. endmenu source "lib/fwu_updates/Kconfig" diff --git a/lib/Makefile b/lib/Makefile index 5cb3278d2ef..7b809151f5a 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -159,6 +159,8 @@ obj-$(CONFIG_LIB_ELF) += elf.o obj-$(CONFIG_$(PHASE_)SEMIHOSTING) += semihosting.o +obj-$(CONFIG_COROUTINES) += coroutines.o + # # Build a fast OID lookup registry from include/linux/oid_registry.h # diff --git a/lib/coroutines.c b/lib/coroutines.c new file mode 100644 index 00000000000..20c5aba5510 --- /dev/null +++ b/lib/coroutines.c @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later + +// Copyright 2018 Sen Han <00hnes@gmail.com> +// Copyright 2025 Linaro Limited + +#include <coroutines.h> +#include <stdio.h> +#include <stdint.h> + + +/* Current co-routine */ +struct co *current_co; + +struct co_stack *co_stack_new(size_t sz) +{ + struct co_stack *p = calloc(1, sizeof(*p)); + uintptr_t u_p; + + if (!p) + return NULL; + + if (sz < 4096) + sz = 4096; + + p->sz = sz; + p->ptr = malloc(sz); + if (!p->ptr) { + free(p); + return NULL; + } + + p->owner = NULL; + u_p = (uintptr_t)(p->sz - (sizeof(void*) << 1) + (uintptr_t)p->ptr); + u_p = (u_p >> 4) << 4; + p->align_highptr = (void*)u_p; + p->align_retptr = (void*)(u_p - sizeof(void*)); + assert(p->sz > (16 + (sizeof(void*) << 1) + sizeof(void*))); + p->align_limit = p->sz - 16 - (sizeof(void*) << 1); + + return p; +} + +void co_stack_destroy(struct co_stack *s){ + if (!s) + return; + free(s->ptr); + free(s); +} + +struct co *co_create(struct co *main_co, + struct co_stack *stack, + size_t save_stack_sz, + void (*fp)(void), void *arg) +{ + struct co *p = malloc(sizeof(*p)); + assert(p); + memset(p, 0, sizeof(*p)); + + if (main_co) { + assert(stack); + p->stack = stack; + p->reg[CO_REG_IDX_RETADDR] = (void *)fp; + // FIXME original code uses align_retptr; causes a crash + p->reg[CO_REG_IDX_SP] = p->stack->align_highptr; + p->main_co = main_co; + p->arg = arg; + p->fp = fp; + if (!save_stack_sz) + save_stack_sz = 64; + p->save_stack.ptr = malloc(save_stack_sz); + assert(p->save_stack.ptr); + p->save_stack.sz = save_stack_sz; + p->save_stack.valid_sz = 0; + } else { + p->main_co = NULL; + p->arg = arg; + p->fp = fp; + p->stack = NULL; + p->save_stack.ptr = NULL; + } + return p; +} + +static void grab_stack(struct co *resume_co) +{ + struct co *owner_co = resume_co->stack->owner; + + if (owner_co) { + assert(owner_co->stack == resume_co->stack); + assert((uintptr_t)(owner_co->stack->align_retptr) >= + (uintptr_t)(owner_co->reg[CO_REG_IDX_SP])); + assert((uintptr_t)owner_co->stack->align_highptr - + (uintptr_t)owner_co->stack->align_limit + <= (uintptr_t)owner_co->reg[CO_REG_IDX_SP]); + owner_co->save_stack.valid_sz = + (uintptr_t)owner_co->stack->align_retptr - + (uintptr_t)owner_co->reg[CO_REG_IDX_SP]; + if (owner_co->save_stack.sz < owner_co->save_stack.valid_sz) { + free(owner_co->save_stack.ptr); + owner_co->save_stack.ptr = NULL; + do { + owner_co->save_stack.sz <<= 1; + assert(owner_co->save_stack.sz > 0); + } while (owner_co->save_stack.sz < + owner_co->save_stack.valid_sz); + owner_co->save_stack.ptr = + malloc(owner_co->save_stack.sz); + assert(owner_co->save_stack.ptr); + } + if (owner_co->save_stack.valid_sz > 0) + memcpy(owner_co->save_stack.ptr, + owner_co->reg[CO_REG_IDX_SP], + owner_co->save_stack.valid_sz); + if (owner_co->save_stack.valid_sz > + owner_co->save_stack.max_cpsz) + owner_co->save_stack.max_cpsz = + owner_co->save_stack.valid_sz; + owner_co->stack->owner = NULL; + owner_co->stack->align_validsz = 0; + } + assert(!resume_co->stack->owner); + assert(resume_co->save_stack.valid_sz <= + resume_co->stack->align_limit - sizeof(void *)); + if (resume_co->save_stack.valid_sz > 0) + memcpy((void*) + (uintptr_t)(resume_co->stack->align_retptr) - + resume_co->save_stack.valid_sz, + resume_co->save_stack.ptr, + resume_co->save_stack.valid_sz); + if (resume_co->save_stack.valid_sz > resume_co->save_stack.max_cpsz) + resume_co->save_stack.max_cpsz = resume_co->save_stack.valid_sz; + resume_co->stack->align_validsz = + resume_co->save_stack.valid_sz + sizeof(void *); + resume_co->stack->owner = resume_co; +} + +void co_resume(struct co *resume_co) +{ + assert(resume_co && resume_co->main_co && !resume_co->done); + + if (resume_co->stack->owner != resume_co) + grab_stack(resume_co); + + current_co = resume_co; + _co_switch(resume_co->main_co, resume_co); + current_co = resume_co->main_co; +} + +void co_destroy(struct co *co){ + if (!co) + return; + + if(co_is_main_co(co)){ + free(co); + current_co = NULL; + } else { + if(co->stack->owner == co){ + co->stack->owner = NULL; + co->stack->align_validsz = 0; + } + free(co->save_stack.ptr); + co->save_stack.ptr = NULL; + free(co); + } +}
Adds the COROUTINES Kconfig symbol which introduces a new internal API for coroutines support. As explained in the Kconfig file, this is meant to provide some kind of cooperative multi-tasking with the goal to improve performance by overlapping lengthy operations. The API as well as the implementation is very much inspired from libaco [1]. The reference implementation is simplified to remove all things not needed in U-Boot, the coding style is updated, and the aco_ prefix is replaced by co_. I believe the stack handling could be simplified: the stack of the main coroutine could probably probably be used by the secondary coroutines instead of allocating a new stack dynamically. Only i386, x86_64 and aarch64 are supported at the moment. Other architectures need to provide a _co_switch() function in assembly. Only aarch64 has been tested. [1] https://github.com/hnes/libaco/ Signed-off-by: Jerome Forissier <jerome.forissier@linaro.org> --- arch/arm/cpu/armv8/Makefile | 1 + arch/arm/cpu/armv8/co_switch.S | 36 +++++++ include/coroutines.h | 130 ++++++++++++++++++++++++++ lib/Kconfig | 10 ++ lib/Makefile | 2 + lib/coroutines.c | 165 +++++++++++++++++++++++++++++++++ 6 files changed, 344 insertions(+) create mode 100644 arch/arm/cpu/armv8/co_switch.S create mode 100644 include/coroutines.h create mode 100644 lib/coroutines.c