diff mbox series

[RFC,v2,1/3] Introduce coroutines framework

Message ID 912af13c205a31080cc61d40bec29d7402185d6e.1738059345.git.jerome.forissier@linaro.org
State New
Headers show
Series Coroutines | expand

Commit Message

Jerome Forissier Jan. 28, 2025, 10:19 a.m. UTC
Adds the COROUTINES Kconfig symbol which introduces a new internal API
for coroutines support. As explained in the Kconfig file, this is meant
to provide some kind of cooperative multi-tasking with the goal to
improve performance by overlapping lengthy operations.

The API as well as the implementation is very much inspired from libaco
[1]. The reference implementation is simplified to remove all things
not needed in U-Boot, the coding style is updated, and the aco_ prefix
is replaced by co_.

I believe the stack handling could be simplified: the stack of the main
coroutine could probably probably be used by the secondary coroutines
instead of allocating a new stack dynamically.

Only i386, x86_64 and aarch64 are supported at the moment. Other
architectures need to provide a _co_switch() function in assembly.

Only aarch64 has been tested.

[1] https://github.com/hnes/libaco/

Signed-off-by: Jerome Forissier <jerome.forissier@linaro.org>
---
 arch/arm/cpu/armv8/Makefile    |   1 +
 arch/arm/cpu/armv8/co_switch.S |  36 +++++++
 include/coroutines.h           | 130 ++++++++++++++++++++++++++
 lib/Kconfig                    |  10 ++
 lib/Makefile                   |   2 +
 lib/coroutines.c               | 165 +++++++++++++++++++++++++++++++++
 6 files changed, 344 insertions(+)
 create mode 100644 arch/arm/cpu/armv8/co_switch.S
 create mode 100644 include/coroutines.h
 create mode 100644 lib/coroutines.c

Comments

Michal Simek Jan. 28, 2025, 11:09 a.m. UTC | #1
On 1/28/25 11:19, Jerome Forissier wrote:
> Adds the COROUTINES Kconfig symbol which introduces a new internal API
> for coroutines support. As explained in the Kconfig file, this is meant
> to provide some kind of cooperative multi-tasking with the goal to
> improve performance by overlapping lengthy operations.
> 
> The API as well as the implementation is very much inspired from libaco
> [1]. The reference implementation is simplified to remove all things
> not needed in U-Boot, the coding style is updated, and the aco_ prefix
> is replaced by co_.
> 
> I believe the stack handling could be simplified: the stack of the main

Please use imperative mood.


> coroutine could probably probably be used by the secondary coroutines
> instead of allocating a new stack dynamically.
> 
> Only i386, x86_64 and aarch64 are supported at the moment. Other
> architectures need to provide a _co_switch() function in assembly.

Likely should be updated.

> 
> Only aarch64 has been tested.
> 
> [1] https://github.com/hnes/libaco/
> 
> Signed-off-by: Jerome Forissier <jerome.forissier@linaro.org>
> ---
>   arch/arm/cpu/armv8/Makefile    |   1 +
>   arch/arm/cpu/armv8/co_switch.S |  36 +++++++
>   include/coroutines.h           | 130 ++++++++++++++++++++++++++
>   lib/Kconfig                    |  10 ++
>   lib/Makefile                   |   2 +
>   lib/coroutines.c               | 165 +++++++++++++++++++++++++++++++++

Checkpatch is reporting some issues. Please fix them.

total: 17 errors, 19 warnings, 1 checks, 359 lines checked




>   6 files changed, 344 insertions(+)
>   create mode 100644 arch/arm/cpu/armv8/co_switch.S
>   create mode 100644 include/coroutines.h
>   create mode 100644 lib/coroutines.c
> 
> diff --git a/arch/arm/cpu/armv8/Makefile b/arch/arm/cpu/armv8/Makefile
> index 2e71ff2dc97..6d07b6aa9f9 100644
> --- a/arch/arm/cpu/armv8/Makefile
> +++ b/arch/arm/cpu/armv8/Makefile
> @@ -46,3 +46,4 @@ obj-$(CONFIG_TARGET_BCMNS3) += bcmns3/
>   obj-$(CONFIG_XEN) += xen/
>   obj-$(CONFIG_ARMV8_CE_SHA1) += sha1_ce_glue.o sha1_ce_core.o
>   obj-$(CONFIG_ARMV8_CE_SHA256) += sha256_ce_glue.o sha256_ce_core.o
> +obj-$(CONFIG_COROUTINES) += co_switch.o
> diff --git a/arch/arm/cpu/armv8/co_switch.S b/arch/arm/cpu/armv8/co_switch.S
> new file mode 100644
> index 00000000000..4405e89ec56
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/co_switch.S
> @@ -0,0 +1,36 @@
> +/* SPDX-License-Identifier: GPL-2.0+ */
> +/* void _co_switch(struct uco *from_co, struct uco *to_co); */
> +.text
> +.globl _co_switch
> +.type  _co_switch, @function
> +_co_switch:
> +    // x0: from_co
> +    // x1: to_co
> +    // from_co and to_co layout: { pc, sp, x19-x29 }
> +
> +    // Save context to from_co (x0)
> +    // AAPCS64 says "A subroutine invocation must preserve the contents of the
> +    // registers r19-r29 and SP"
> +    adr x2, 1f  // pc we should use to resume after this function
> +    mov x3, sp
> +    stp x2, x3, [x0, #0]  // pc, sp
> +    stp x19, x20, [x0, #16]
> +    stp x21, x22, [x0, #32]
> +    stp x23, x24, [x0, #48]
> +    stp x25, x26, [x0, #64]
> +    stp x27, x28, [x0, #80]
> +    stp x29, x30, [x0, #96]
> +
> +    // Load new context from to_co (x1)
> +    ldp x2, x3, [x1, #0]  // pc, sp
> +    ldp x19, x20, [x1, #16]
> +    ldp x21, x22, [x1, #32]
> +    ldp x23, x24, [x1, #48]
> +    ldp x25, x26, [x1, #64]
> +    ldp x27, x28, [x1, #80]
> +    ldp x29, x30, [x1, #96]
> +    mov sp, x3
> +    br x2
> +
> +1:  // Return to the caller
> +    ret
> diff --git a/include/coroutines.h b/include/coroutines.h
> new file mode 100644
> index 00000000000..b85b656127c
> --- /dev/null
> +++ b/include/coroutines.h
> @@ -0,0 +1,130 @@
> +/* SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later */
> +/*
> + * Copyright 2018 Sen Han <00hnes@gmail.com>
> + * Copyright 2025 Linaro Limited
> + */
> +
> +#ifndef _COROUTINES_H_
> +#define _COROUTINES_H_
> +
> +#ifndef CONFIG_COROUTINES
> +
> +static inline void co_yield(void) {}
> +static inline void co_exit(void) {}
> +
> +#else
> +
> +#ifdef __UBOOT__
> +#include <log.h>
> +#else
> +#include <assert.h>
> +#endif

Do we need ifdef at all? Are you testing it out of u-boot too? Or is this just 
untested code?

> +#include <limits.h>
> +#include <stdbool.h>
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <time.h>
> +
> +#ifdef __aarch64__
> +#define CO_REG_IDX_RETADDR 0
> +#define CO_REG_IDX_SP 1
> +#else
> +#error Architecture no supported
> +#endif
> +
> +struct co_save_stack {
> +    void*  ptr;
> +    size_t sz;
> +    size_t valid_sz;
> +    size_t max_cpsz; /* max copy size in bytes */
> +};
> +
> +struct co_stack {
> +    void *ptr;
> +    size_t sz;
> +    void *align_highptr;
> +    void *align_retptr;
> +    size_t align_validsz;
> +    size_t align_limit;
> +    struct co *owner;
> +    void *real_ptr;
> +    size_t real_sz;
> +};
> +
> +struct co {
> +	/* CPU state: callee-saved registers plus SP and PC */
> +	void *reg[14];  // pc, sp, x19-x29, x30 (lr)
> +
> +	struct co *main_co;
> +	void *arg;
> +	bool done;
> +
> +	void (*fp)(void);
> +
> +	struct co_save_stack save_stack;
> +	struct co_stack *stack;
> +};
> +
> +extern struct co *current_co;
> +
> +static inline struct co *co_get_co(void)
> +{
> +	return current_co;
> +}
> +
> +static inline void *co_get_arg(void)
> +{
> +	return co_get_co()->arg;
> +}
> +
> +struct co_stack *co_stack_new(size_t sz);
> +
> +void co_stack_destroy(struct co_stack *s);
> +
> +struct co *co_create(struct co *main_co,
> +		     struct co_stack *stack,
> +		     size_t save_stack_sz, void (*fp)(void),
> +		     void *arg);
> +
> +void co_resume(struct co *resume_co);
> +
> +void co_destroy(struct co *co);
> +
> +void *_co_switch(struct co *from_co, struct co *to_co);
> +
> +static inline void _co_yield_to_main_co(struct co *yield_co)
> +{
> +    assert(yield_co);
> +    assert(yield_co->main_co);
> +    _co_switch(yield_co, yield_co->main_co);
> +}
> +
> +static inline void co_yield(void)
> +{
> +	if (current_co)
> +		_co_yield_to_main_co(current_co);
> +}
> +
> +static inline bool co_is_main_co(struct co *co)
> +{
> +	return !co->main_co;
> +}
> +
> +static inline void co_exit(void)
> +{
> +	struct co *co = co_get_co();
> +
> +	if (!co)
> +		return;
> +	co->done = true;
> +	assert(co->stack->owner == co);
> +	co->stack->owner = NULL;
> +	co->stack->align_validsz = 0;
> +	_co_yield_to_main_co(co);
> +	assert(false);
> +}
> +
> +#endif /* CONFIG_COROUTINES */
> +#endif /* _COROUTINES_H_ */
> diff --git a/lib/Kconfig b/lib/Kconfig
> index 8f1a96d98c4..b6c1380b927 100644
> --- a/lib/Kconfig
> +++ b/lib/Kconfig
> @@ -1226,6 +1226,16 @@ config PHANDLE_CHECK_SEQ
>   	  enable this config option to distinguish them using
>   	  phandles in fdtdec_get_alias_seq() function.
>   
> +config COROUTINES
> +	bool "Enable coroutine support"
> +	help
> +	  Coroutines allow to implement a simple form of cooperative
> +	  multi-tasking. The main thread of execution registers one or
> +	  more functions as coroutine entry points, then it schedules one
> +	  of them. At any point the scheduled coroutine may yield, that is,
> +	  suspend its execution and return back to the main thread. At this
> +	  point another coroutine may be scheduled and so on until all the
> +	  registered coroutines are done.

newline

>   endmenu
>   
>   source "lib/fwu_updates/Kconfig"
> diff --git a/lib/Makefile b/lib/Makefile
> index 5cb3278d2ef..7b809151f5a 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -159,6 +159,8 @@ obj-$(CONFIG_LIB_ELF) += elf.o
>   
>   obj-$(CONFIG_$(PHASE_)SEMIHOSTING) += semihosting.o
>   
> +obj-$(CONFIG_COROUTINES) += coroutines.o
> +
>   #
>   # Build a fast OID lookup registry from include/linux/oid_registry.h
>   #
> diff --git a/lib/coroutines.c b/lib/coroutines.c
> new file mode 100644
> index 00000000000..20c5aba5510
> --- /dev/null
> +++ b/lib/coroutines.c
> @@ -0,0 +1,165 @@
> +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
> +
> +// Copyright 2018 Sen Han <00hnes@gmail.com>
> +// Copyright 2025 Linaro Limited
> +
> +#include <coroutines.h>
> +#include <stdio.h>
> +#include <stdint.h>
> +
> +
> +/* Current co-routine */
> +struct co *current_co;
> +
> +struct co_stack *co_stack_new(size_t sz)
> +{
> +	struct co_stack *p = calloc(1, sizeof(*p));
> +	uintptr_t u_p;
> +
> +	if (!p)
> +		return NULL;
> +
> +	if (sz < 4096)
> +		sz = 4096;
> +
> +	p->sz = sz;
> +	p->ptr = malloc(sz);
> +	if (!p->ptr) {
> +		free(p);
> +		return NULL;
> +	}
> +
> +	p->owner = NULL;
> +	u_p = (uintptr_t)(p->sz - (sizeof(void*) << 1) + (uintptr_t)p->ptr);
> +	u_p = (u_p >> 4) << 4;
> +	p->align_highptr = (void*)u_p;
> +	p->align_retptr  = (void*)(u_p - sizeof(void*));
> +	assert(p->sz > (16 + (sizeof(void*) << 1) + sizeof(void*)));
> +	p->align_limit = p->sz - 16 - (sizeof(void*) << 1);
> +
> +	return p;
> +}
> +
> +void co_stack_destroy(struct co_stack *s){
> +	if (!s)
> +		return;
> +	free(s->ptr);
> +	free(s);
> +}
> +
> +struct co *co_create(struct co *main_co,
> +		     struct co_stack *stack,
> +		     size_t save_stack_sz,
> +		     void (*fp)(void), void *arg)
> +{
> +	struct co *p = malloc(sizeof(*p));
> +	assert(p);
> +	memset(p, 0, sizeof(*p));
> +
> +	if (main_co) {
> +		assert(stack);
> +		p->stack = stack;
> +		p->reg[CO_REG_IDX_RETADDR] = (void *)fp;
> +		// FIXME original code uses align_retptr; causes a crash
> +		p->reg[CO_REG_IDX_SP] = p->stack->align_highptr;
> +		p->main_co = main_co;
> +		p->arg = arg;
> +		p->fp = fp;
> +		if (!save_stack_sz)
> +			save_stack_sz = 64;
> +		p->save_stack.ptr = malloc(save_stack_sz);
> +		assert(p->save_stack.ptr);
> +		p->save_stack.sz = save_stack_sz;
> +		p->save_stack.valid_sz = 0;
> +	} else {
> +		p->main_co = NULL;
> +		p->arg = arg;
> +		p->fp = fp;
> +		p->stack = NULL;
> +		p->save_stack.ptr = NULL;
> +	}
> +	return p;
> +}
> +
> +static void grab_stack(struct co *resume_co)
> +{
> +	struct co *owner_co = resume_co->stack->owner;
> +
> +	if (owner_co) {
> +		assert(owner_co->stack == resume_co->stack);
> +		assert((uintptr_t)(owner_co->stack->align_retptr) >=
> +		       (uintptr_t)(owner_co->reg[CO_REG_IDX_SP]));
> +		assert((uintptr_t)owner_co->stack->align_highptr -
> +				(uintptr_t)owner_co->stack->align_limit
> +			<= (uintptr_t)owner_co->reg[CO_REG_IDX_SP]);
> +		owner_co->save_stack.valid_sz =
> +			(uintptr_t)owner_co->stack->align_retptr -
> +			(uintptr_t)owner_co->reg[CO_REG_IDX_SP];
> +		if (owner_co->save_stack.sz < owner_co->save_stack.valid_sz) {
> +			free(owner_co->save_stack.ptr);
> +			owner_co->save_stack.ptr = NULL;
> +			do {
> +				owner_co->save_stack.sz <<= 1;
> +				assert(owner_co->save_stack.sz > 0);
> +			} while (owner_co->save_stack.sz <
> +				 owner_co->save_stack.valid_sz);
> +			owner_co->save_stack.ptr =
> +				malloc(owner_co->save_stack.sz);
> +			assert(owner_co->save_stack.ptr);
> +		}
> +		if (owner_co->save_stack.valid_sz > 0)
> +			memcpy(owner_co->save_stack.ptr,
> +			       owner_co->reg[CO_REG_IDX_SP],
> +			       owner_co->save_stack.valid_sz);
> +		if (owner_co->save_stack.valid_sz >
> +		    owner_co->save_stack.max_cpsz)
> +			owner_co->save_stack.max_cpsz =
> +				owner_co->save_stack.valid_sz;
> +		owner_co->stack->owner = NULL;
> +		owner_co->stack->align_validsz = 0;
> +	}
> +	assert(!resume_co->stack->owner);
> +	assert(resume_co->save_stack.valid_sz <=
> +	       resume_co->stack->align_limit - sizeof(void *));
> +	if (resume_co->save_stack.valid_sz > 0)
> +		memcpy((void*)
> +		       (uintptr_t)(resume_co->stack->align_retptr) -
> +				resume_co->save_stack.valid_sz,
> +		       resume_co->save_stack.ptr,
> +		       resume_co->save_stack.valid_sz);
> +	if (resume_co->save_stack.valid_sz > resume_co->save_stack.max_cpsz)
> +		resume_co->save_stack.max_cpsz = resume_co->save_stack.valid_sz;
> +	resume_co->stack->align_validsz =
> +		resume_co->save_stack.valid_sz + sizeof(void *);
> +	resume_co->stack->owner = resume_co;
> +}
> +
> +void co_resume(struct co *resume_co)
> +{
> +	assert(resume_co && resume_co->main_co && !resume_co->done);
> +
> +	if (resume_co->stack->owner != resume_co)
> +		grab_stack(resume_co);
> +
> +	current_co = resume_co;
> +	_co_switch(resume_co->main_co, resume_co);
> +	current_co = resume_co->main_co;
> +}
> +
> +void co_destroy(struct co *co){

obviously this is not u-boot coding style. I expect checkpatch reports this too.

> +	if (!co)
> +		return;
> +
> +	if(co_is_main_co(co)){
> +		free(co);
> +		current_co = NULL;
> +	} else {
> +		if(co->stack->owner == co){
> +			co->stack->owner = NULL;
> +			co->stack->align_validsz = 0;
> +		}
> +		free(co->save_stack.ptr);
> +		co->save_stack.ptr = NULL;
> +		free(co);
> +	}
> +}


Thanks,
Michal
Jerome Forissier Jan. 28, 2025, 1:30 p.m. UTC | #2
On 1/28/25 12:09, Michal Simek wrote:
> 
> 
> On 1/28/25 11:19, Jerome Forissier wrote:
>> Adds the COROUTINES Kconfig symbol which introduces a new internal API
>> for coroutines support. As explained in the Kconfig file, this is meant
>> to provide some kind of cooperative multi-tasking with the goal to
>> improve performance by overlapping lengthy operations.
>>
>> The API as well as the implementation is very much inspired from libaco
>> [1]. The reference implementation is simplified to remove all things
>> not needed in U-Boot, the coding style is updated, and the aco_ prefix
>> is replaced by co_.
>>
>> I believe the stack handling could be simplified: the stack of the main
> 
> Please use imperative mood.

OK. I will drop the remark about stack handling simplification because it
is not very helpful here.

  
>> coroutine could probably probably be used by the secondary coroutines
>> instead of allocating a new stack dynamically.
>>
>> Only i386, x86_64 and aarch64 are supported at the moment. Other
>> architectures need to provide a _co_switch() function in assembly.
> 
> Likely should be updated.

OK

> 
>>
>> Only aarch64 has been tested.
>>
>> [1] https://github.com/hnes/libaco/
>>
>> Signed-off-by: Jerome Forissier <jerome.forissier@linaro.org>
>> ---
>>   arch/arm/cpu/armv8/Makefile    |   1 +
>>   arch/arm/cpu/armv8/co_switch.S |  36 +++++++
>>   include/coroutines.h           | 130 ++++++++++++++++++++++++++
>>   lib/Kconfig                    |  10 ++
>>   lib/Makefile                   |   2 +
>>   lib/coroutines.c               | 165 +++++++++++++++++++++++++++++++++
> 
> Checkpatch is reporting some issues. Please fix them.
> 
> total: 17 errors, 19 warnings, 1 checks, 359 lines checked

All the checkpatch issues will be fixed in v3.

>>   6 files changed, 344 insertions(+)
>>   create mode 100644 arch/arm/cpu/armv8/co_switch.S
>>   create mode 100644 include/coroutines.h
>>   create mode 100644 lib/coroutines.c
>>
>> diff --git a/arch/arm/cpu/armv8/Makefile b/arch/arm/cpu/armv8/Makefile
>> index 2e71ff2dc97..6d07b6aa9f9 100644
>> --- a/arch/arm/cpu/armv8/Makefile
>> +++ b/arch/arm/cpu/armv8/Makefile
>> @@ -46,3 +46,4 @@ obj-$(CONFIG_TARGET_BCMNS3) += bcmns3/
>>   obj-$(CONFIG_XEN) += xen/
>>   obj-$(CONFIG_ARMV8_CE_SHA1) += sha1_ce_glue.o sha1_ce_core.o
>>   obj-$(CONFIG_ARMV8_CE_SHA256) += sha256_ce_glue.o sha256_ce_core.o
>> +obj-$(CONFIG_COROUTINES) += co_switch.o
>> diff --git a/arch/arm/cpu/armv8/co_switch.S b/arch/arm/cpu/armv8/co_switch.S
>> new file mode 100644
>> index 00000000000..4405e89ec56
>> --- /dev/null
>> +++ b/arch/arm/cpu/armv8/co_switch.S
>> @@ -0,0 +1,36 @@
>> +/* SPDX-License-Identifier: GPL-2.0+ */
>> +/* void _co_switch(struct uco *from_co, struct uco *to_co); */
>> +.text
>> +.globl _co_switch
>> +.type  _co_switch, @function
>> +_co_switch:
>> +    // x0: from_co
>> +    // x1: to_co
>> +    // from_co and to_co layout: { pc, sp, x19-x29 }
>> +
>> +    // Save context to from_co (x0)
>> +    // AAPCS64 says "A subroutine invocation must preserve the contents of the
>> +    // registers r19-r29 and SP"
>> +    adr x2, 1f  // pc we should use to resume after this function
>> +    mov x3, sp
>> +    stp x2, x3, [x0, #0]  // pc, sp
>> +    stp x19, x20, [x0, #16]
>> +    stp x21, x22, [x0, #32]
>> +    stp x23, x24, [x0, #48]
>> +    stp x25, x26, [x0, #64]
>> +    stp x27, x28, [x0, #80]
>> +    stp x29, x30, [x0, #96]
>> +
>> +    // Load new context from to_co (x1)
>> +    ldp x2, x3, [x1, #0]  // pc, sp
>> +    ldp x19, x20, [x1, #16]
>> +    ldp x21, x22, [x1, #32]
>> +    ldp x23, x24, [x1, #48]
>> +    ldp x25, x26, [x1, #64]
>> +    ldp x27, x28, [x1, #80]
>> +    ldp x29, x30, [x1, #96]
>> +    mov sp, x3
>> +    br x2
>> +
>> +1:  // Return to the caller
>> +    ret
>> diff --git a/include/coroutines.h b/include/coroutines.h
>> new file mode 100644
>> index 00000000000..b85b656127c
>> --- /dev/null
>> +++ b/include/coroutines.h
>> @@ -0,0 +1,130 @@
>> +/* SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later */
>> +/*
>> + * Copyright 2018 Sen Han <00hnes@gmail.com>
>> + * Copyright 2025 Linaro Limited
>> + */
>> +
>> +#ifndef _COROUTINES_H_
>> +#define _COROUTINES_H_
>> +
>> +#ifndef CONFIG_COROUTINES
>> +
>> +static inline void co_yield(void) {}
>> +static inline void co_exit(void) {}
>> +
>> +#else
>> +
>> +#ifdef __UBOOT__
>> +#include <log.h>
>> +#else
>> +#include <assert.h>
>> +#endif
> 
> Do we need ifdef at all? Are you testing it out of u-boot too? Or is this just untested code?

It is a leftover from when I used to test outside U-Boot (in a regular
Linux application) but that is not useful anymore. I'll remove the
conditional.

>> +#include <limits.h>
>> +#include <stdbool.h>
>> +#include <stdint.h>
>> +#include <stdio.h>
>> +#include <stdlib.h>
>> +#include <string.h>
>> +#include <time.h>
>> +
>> +#ifdef __aarch64__
>> +#define CO_REG_IDX_RETADDR 0
>> +#define CO_REG_IDX_SP 1
>> +#else
>> +#error Architecture no supported
>> +#endif
>> +
>> +struct co_save_stack {
>> +    void*  ptr;
>> +    size_t sz;
>> +    size_t valid_sz;
>> +    size_t max_cpsz; /* max copy size in bytes */
>> +};
>> +
>> +struct co_stack {
>> +    void *ptr;
>> +    size_t sz;
>> +    void *align_highptr;
>> +    void *align_retptr;
>> +    size_t align_validsz;
>> +    size_t align_limit;
>> +    struct co *owner;
>> +    void *real_ptr;
>> +    size_t real_sz;
>> +};
>> +
>> +struct co {
>> +    /* CPU state: callee-saved registers plus SP and PC */
>> +    void *reg[14];  // pc, sp, x19-x29, x30 (lr)
>> +
>> +    struct co *main_co;
>> +    void *arg;
>> +    bool done;
>> +
>> +    void (*fp)(void);
>> +
>> +    struct co_save_stack save_stack;
>> +    struct co_stack *stack;
>> +};
>> +
>> +extern struct co *current_co;
>> +
>> +static inline struct co *co_get_co(void)
>> +{
>> +    return current_co;
>> +}
>> +
>> +static inline void *co_get_arg(void)
>> +{
>> +    return co_get_co()->arg;
>> +}
>> +
>> +struct co_stack *co_stack_new(size_t sz);
>> +
>> +void co_stack_destroy(struct co_stack *s);
>> +
>> +struct co *co_create(struct co *main_co,
>> +             struct co_stack *stack,
>> +             size_t save_stack_sz, void (*fp)(void),
>> +             void *arg);
>> +
>> +void co_resume(struct co *resume_co);
>> +
>> +void co_destroy(struct co *co);
>> +
>> +void *_co_switch(struct co *from_co, struct co *to_co);
>> +
>> +static inline void _co_yield_to_main_co(struct co *yield_co)
>> +{
>> +    assert(yield_co);
>> +    assert(yield_co->main_co);
>> +    _co_switch(yield_co, yield_co->main_co);
>> +}
>> +
>> +static inline void co_yield(void)
>> +{
>> +    if (current_co)
>> +        _co_yield_to_main_co(current_co);
>> +}
>> +
>> +static inline bool co_is_main_co(struct co *co)
>> +{
>> +    return !co->main_co;
>> +}
>> +
>> +static inline void co_exit(void)
>> +{
>> +    struct co *co = co_get_co();
>> +
>> +    if (!co)
>> +        return;
>> +    co->done = true;
>> +    assert(co->stack->owner == co);
>> +    co->stack->owner = NULL;
>> +    co->stack->align_validsz = 0;
>> +    _co_yield_to_main_co(co);
>> +    assert(false);
>> +}
>> +
>> +#endif /* CONFIG_COROUTINES */
>> +#endif /* _COROUTINES_H_ */
>> diff --git a/lib/Kconfig b/lib/Kconfig
>> index 8f1a96d98c4..b6c1380b927 100644
>> --- a/lib/Kconfig
>> +++ b/lib/Kconfig
>> @@ -1226,6 +1226,16 @@ config PHANDLE_CHECK_SEQ
>>         enable this config option to distinguish them using
>>         phandles in fdtdec_get_alias_seq() function.
>>   +config COROUTINES
>> +    bool "Enable coroutine support"
>> +    help
>> +      Coroutines allow to implement a simple form of cooperative
>> +      multi-tasking. The main thread of execution registers one or
>> +      more functions as coroutine entry points, then it schedules one
>> +      of them. At any point the scheduled coroutine may yield, that is,
>> +      suspend its execution and return back to the main thread. At this
>> +      point another coroutine may be scheduled and so on until all the
>> +      registered coroutines are done.
> 
> newline
> 
>>   endmenu
>>     source "lib/fwu_updates/Kconfig"
>> diff --git a/lib/Makefile b/lib/Makefile
>> index 5cb3278d2ef..7b809151f5a 100644
>> --- a/lib/Makefile
>> +++ b/lib/Makefile
>> @@ -159,6 +159,8 @@ obj-$(CONFIG_LIB_ELF) += elf.o
>>     obj-$(CONFIG_$(PHASE_)SEMIHOSTING) += semihosting.o
>>   +obj-$(CONFIG_COROUTINES) += coroutines.o
>> +
>>   #
>>   # Build a fast OID lookup registry from include/linux/oid_registry.h
>>   #
>> diff --git a/lib/coroutines.c b/lib/coroutines.c
>> new file mode 100644
>> index 00000000000..20c5aba5510
>> --- /dev/null
>> +++ b/lib/coroutines.c
>> @@ -0,0 +1,165 @@
>> +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
>> +
>> +// Copyright 2018 Sen Han <00hnes@gmail.com>
>> +// Copyright 2025 Linaro Limited
>> +
>> +#include <coroutines.h>
>> +#include <stdio.h>
>> +#include <stdint.h>
>> +
>> +
>> +/* Current co-routine */
>> +struct co *current_co;
>> +
>> +struct co_stack *co_stack_new(size_t sz)
>> +{
>> +    struct co_stack *p = calloc(1, sizeof(*p));
>> +    uintptr_t u_p;
>> +
>> +    if (!p)
>> +        return NULL;
>> +
>> +    if (sz < 4096)
>> +        sz = 4096;
>> +
>> +    p->sz = sz;
>> +    p->ptr = malloc(sz);
>> +    if (!p->ptr) {
>> +        free(p);
>> +        return NULL;
>> +    }
>> +
>> +    p->owner = NULL;
>> +    u_p = (uintptr_t)(p->sz - (sizeof(void*) << 1) + (uintptr_t)p->ptr);
>> +    u_p = (u_p >> 4) << 4;
>> +    p->align_highptr = (void*)u_p;
>> +    p->align_retptr  = (void*)(u_p - sizeof(void*));
>> +    assert(p->sz > (16 + (sizeof(void*) << 1) + sizeof(void*)));
>> +    p->align_limit = p->sz - 16 - (sizeof(void*) << 1);
>> +
>> +    return p;
>> +}
>> +
>> +void co_stack_destroy(struct co_stack *s){
>> +    if (!s)
>> +        return;
>> +    free(s->ptr);
>> +    free(s);
>> +}
>> +
>> +struct co *co_create(struct co *main_co,
>> +             struct co_stack *stack,
>> +             size_t save_stack_sz,
>> +             void (*fp)(void), void *arg)
>> +{
>> +    struct co *p = malloc(sizeof(*p));
>> +    assert(p);
>> +    memset(p, 0, sizeof(*p));
>> +
>> +    if (main_co) {
>> +        assert(stack);
>> +        p->stack = stack;
>> +        p->reg[CO_REG_IDX_RETADDR] = (void *)fp;
>> +        // FIXME original code uses align_retptr; causes a crash
>> +        p->reg[CO_REG_IDX_SP] = p->stack->align_highptr;
>> +        p->main_co = main_co;
>> +        p->arg = arg;
>> +        p->fp = fp;
>> +        if (!save_stack_sz)
>> +            save_stack_sz = 64;
>> +        p->save_stack.ptr = malloc(save_stack_sz);
>> +        assert(p->save_stack.ptr);
>> +        p->save_stack.sz = save_stack_sz;
>> +        p->save_stack.valid_sz = 0;
>> +    } else {
>> +        p->main_co = NULL;
>> +        p->arg = arg;
>> +        p->fp = fp;
>> +        p->stack = NULL;
>> +        p->save_stack.ptr = NULL;
>> +    }
>> +    return p;
>> +}
>> +
>> +static void grab_stack(struct co *resume_co)
>> +{
>> +    struct co *owner_co = resume_co->stack->owner;
>> +
>> +    if (owner_co) {
>> +        assert(owner_co->stack == resume_co->stack);
>> +        assert((uintptr_t)(owner_co->stack->align_retptr) >=
>> +               (uintptr_t)(owner_co->reg[CO_REG_IDX_SP]));
>> +        assert((uintptr_t)owner_co->stack->align_highptr -
>> +                (uintptr_t)owner_co->stack->align_limit
>> +            <= (uintptr_t)owner_co->reg[CO_REG_IDX_SP]);
>> +        owner_co->save_stack.valid_sz =
>> +            (uintptr_t)owner_co->stack->align_retptr -
>> +            (uintptr_t)owner_co->reg[CO_REG_IDX_SP];
>> +        if (owner_co->save_stack.sz < owner_co->save_stack.valid_sz) {
>> +            free(owner_co->save_stack.ptr);
>> +            owner_co->save_stack.ptr = NULL;
>> +            do {
>> +                owner_co->save_stack.sz <<= 1;
>> +                assert(owner_co->save_stack.sz > 0);
>> +            } while (owner_co->save_stack.sz <
>> +                 owner_co->save_stack.valid_sz);
>> +            owner_co->save_stack.ptr =
>> +                malloc(owner_co->save_stack.sz);
>> +            assert(owner_co->save_stack.ptr);
>> +        }
>> +        if (owner_co->save_stack.valid_sz > 0)
>> +            memcpy(owner_co->save_stack.ptr,
>> +                   owner_co->reg[CO_REG_IDX_SP],
>> +                   owner_co->save_stack.valid_sz);
>> +        if (owner_co->save_stack.valid_sz >
>> +            owner_co->save_stack.max_cpsz)
>> +            owner_co->save_stack.max_cpsz =
>> +                owner_co->save_stack.valid_sz;
>> +        owner_co->stack->owner = NULL;
>> +        owner_co->stack->align_validsz = 0;
>> +    }
>> +    assert(!resume_co->stack->owner);
>> +    assert(resume_co->save_stack.valid_sz <=
>> +           resume_co->stack->align_limit - sizeof(void *));
>> +    if (resume_co->save_stack.valid_sz > 0)
>> +        memcpy((void*)
>> +               (uintptr_t)(resume_co->stack->align_retptr) -
>> +                resume_co->save_stack.valid_sz,
>> +               resume_co->save_stack.ptr,
>> +               resume_co->save_stack.valid_sz);
>> +    if (resume_co->save_stack.valid_sz > resume_co->save_stack.max_cpsz)
>> +        resume_co->save_stack.max_cpsz = resume_co->save_stack.valid_sz;
>> +    resume_co->stack->align_validsz =
>> +        resume_co->save_stack.valid_sz + sizeof(void *);
>> +    resume_co->stack->owner = resume_co;
>> +}
>> +
>> +void co_resume(struct co *resume_co)
>> +{
>> +    assert(resume_co && resume_co->main_co && !resume_co->done);
>> +
>> +    if (resume_co->stack->owner != resume_co)
>> +        grab_stack(resume_co);
>> +
>> +    current_co = resume_co;
>> +    _co_switch(resume_co->main_co, resume_co);
>> +    current_co = resume_co->main_co;
>> +}
>> +
>> +void co_destroy(struct co *co){
> 
> obviously this is not u-boot coding style. I expect checkpatch reports this too.

Yep. Addressed in v3.

>> +    if (!co)
>> +        return;
>> +
>> +    if(co_is_main_co(co)){
>> +        free(co);
>> +        current_co = NULL;
>> +    } else {
>> +        if(co->stack->owner == co){
>> +            co->stack->owner = NULL;
>> +            co->stack->align_validsz = 0;
>> +        }
>> +        free(co->save_stack.ptr);
>> +        co->save_stack.ptr = NULL;
>> +        free(co);
>> +    }
>> +}
> 
> 
> Thanks,
> Michal

Thanks for the review.
diff mbox series

Patch

diff --git a/arch/arm/cpu/armv8/Makefile b/arch/arm/cpu/armv8/Makefile
index 2e71ff2dc97..6d07b6aa9f9 100644
--- a/arch/arm/cpu/armv8/Makefile
+++ b/arch/arm/cpu/armv8/Makefile
@@ -46,3 +46,4 @@  obj-$(CONFIG_TARGET_BCMNS3) += bcmns3/
 obj-$(CONFIG_XEN) += xen/
 obj-$(CONFIG_ARMV8_CE_SHA1) += sha1_ce_glue.o sha1_ce_core.o
 obj-$(CONFIG_ARMV8_CE_SHA256) += sha256_ce_glue.o sha256_ce_core.o
+obj-$(CONFIG_COROUTINES) += co_switch.o
diff --git a/arch/arm/cpu/armv8/co_switch.S b/arch/arm/cpu/armv8/co_switch.S
new file mode 100644
index 00000000000..4405e89ec56
--- /dev/null
+++ b/arch/arm/cpu/armv8/co_switch.S
@@ -0,0 +1,36 @@ 
+/* SPDX-License-Identifier: GPL-2.0+ */
+/* void _co_switch(struct uco *from_co, struct uco *to_co); */
+.text
+.globl _co_switch
+.type  _co_switch, @function
+_co_switch:
+    // x0: from_co
+    // x1: to_co
+    // from_co and to_co layout: { pc, sp, x19-x29 }
+
+    // Save context to from_co (x0)
+    // AAPCS64 says "A subroutine invocation must preserve the contents of the
+    // registers r19-r29 and SP"
+    adr x2, 1f  // pc we should use to resume after this function
+    mov x3, sp
+    stp x2, x3, [x0, #0]  // pc, sp
+    stp x19, x20, [x0, #16]
+    stp x21, x22, [x0, #32]
+    stp x23, x24, [x0, #48]
+    stp x25, x26, [x0, #64]
+    stp x27, x28, [x0, #80]
+    stp x29, x30, [x0, #96]
+
+    // Load new context from to_co (x1)
+    ldp x2, x3, [x1, #0]  // pc, sp
+    ldp x19, x20, [x1, #16]
+    ldp x21, x22, [x1, #32]
+    ldp x23, x24, [x1, #48]
+    ldp x25, x26, [x1, #64]
+    ldp x27, x28, [x1, #80]
+    ldp x29, x30, [x1, #96]
+    mov sp, x3
+    br x2
+
+1:  // Return to the caller
+    ret
diff --git a/include/coroutines.h b/include/coroutines.h
new file mode 100644
index 00000000000..b85b656127c
--- /dev/null
+++ b/include/coroutines.h
@@ -0,0 +1,130 @@ 
+/* SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later */
+/*
+ * Copyright 2018 Sen Han <00hnes@gmail.com>
+ * Copyright 2025 Linaro Limited
+ */
+
+#ifndef _COROUTINES_H_
+#define _COROUTINES_H_
+
+#ifndef CONFIG_COROUTINES
+
+static inline void co_yield(void) {}
+static inline void co_exit(void) {}
+
+#else
+
+#ifdef __UBOOT__
+#include <log.h>
+#else
+#include <assert.h>
+#endif
+#include <limits.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#ifdef __aarch64__
+#define CO_REG_IDX_RETADDR 0
+#define CO_REG_IDX_SP 1
+#else
+#error Architecture no supported
+#endif
+
+struct co_save_stack {
+    void*  ptr;
+    size_t sz;
+    size_t valid_sz;
+    size_t max_cpsz; /* max copy size in bytes */
+};
+
+struct co_stack {
+    void *ptr;
+    size_t sz;
+    void *align_highptr;
+    void *align_retptr;
+    size_t align_validsz;
+    size_t align_limit;
+    struct co *owner;
+    void *real_ptr;
+    size_t real_sz;
+};
+
+struct co {
+	/* CPU state: callee-saved registers plus SP and PC */
+	void *reg[14];  // pc, sp, x19-x29, x30 (lr)
+
+	struct co *main_co;
+	void *arg;
+	bool done;
+
+	void (*fp)(void);
+
+	struct co_save_stack save_stack;
+	struct co_stack *stack;
+};
+
+extern struct co *current_co;
+
+static inline struct co *co_get_co(void)
+{
+	return current_co;
+}
+
+static inline void *co_get_arg(void)
+{
+	return co_get_co()->arg;
+}
+
+struct co_stack *co_stack_new(size_t sz);
+
+void co_stack_destroy(struct co_stack *s);
+
+struct co *co_create(struct co *main_co,
+		     struct co_stack *stack,
+		     size_t save_stack_sz, void (*fp)(void),
+		     void *arg);
+
+void co_resume(struct co *resume_co);
+
+void co_destroy(struct co *co);
+
+void *_co_switch(struct co *from_co, struct co *to_co);
+
+static inline void _co_yield_to_main_co(struct co *yield_co)
+{
+    assert(yield_co);
+    assert(yield_co->main_co);
+    _co_switch(yield_co, yield_co->main_co);
+}
+
+static inline void co_yield(void)
+{
+	if (current_co)
+		_co_yield_to_main_co(current_co);
+}
+
+static inline bool co_is_main_co(struct co *co)
+{
+	return !co->main_co;
+}
+
+static inline void co_exit(void)
+{
+	struct co *co = co_get_co();
+
+	if (!co)
+		return;
+	co->done = true;
+	assert(co->stack->owner == co);
+	co->stack->owner = NULL;
+	co->stack->align_validsz = 0;
+	_co_yield_to_main_co(co);
+	assert(false);
+}
+
+#endif /* CONFIG_COROUTINES */
+#endif /* _COROUTINES_H_ */
diff --git a/lib/Kconfig b/lib/Kconfig
index 8f1a96d98c4..b6c1380b927 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -1226,6 +1226,16 @@  config PHANDLE_CHECK_SEQ
 	  enable this config option to distinguish them using
 	  phandles in fdtdec_get_alias_seq() function.
 
+config COROUTINES
+	bool "Enable coroutine support"
+	help
+	  Coroutines allow to implement a simple form of cooperative
+	  multi-tasking. The main thread of execution registers one or
+	  more functions as coroutine entry points, then it schedules one
+	  of them. At any point the scheduled coroutine may yield, that is,
+	  suspend its execution and return back to the main thread. At this
+	  point another coroutine may be scheduled and so on until all the
+	  registered coroutines are done.
 endmenu
 
 source "lib/fwu_updates/Kconfig"
diff --git a/lib/Makefile b/lib/Makefile
index 5cb3278d2ef..7b809151f5a 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -159,6 +159,8 @@  obj-$(CONFIG_LIB_ELF) += elf.o
 
 obj-$(CONFIG_$(PHASE_)SEMIHOSTING) += semihosting.o
 
+obj-$(CONFIG_COROUTINES) += coroutines.o
+
 #
 # Build a fast OID lookup registry from include/linux/oid_registry.h
 #
diff --git a/lib/coroutines.c b/lib/coroutines.c
new file mode 100644
index 00000000000..20c5aba5510
--- /dev/null
+++ b/lib/coroutines.c
@@ -0,0 +1,165 @@ 
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+
+// Copyright 2018 Sen Han <00hnes@gmail.com>
+// Copyright 2025 Linaro Limited
+
+#include <coroutines.h>
+#include <stdio.h>
+#include <stdint.h>
+
+
+/* Current co-routine */
+struct co *current_co;
+
+struct co_stack *co_stack_new(size_t sz)
+{
+	struct co_stack *p = calloc(1, sizeof(*p));
+	uintptr_t u_p;
+
+	if (!p)
+		return NULL;
+
+	if (sz < 4096)
+		sz = 4096;
+
+	p->sz = sz;
+	p->ptr = malloc(sz);
+	if (!p->ptr) {
+		free(p);
+		return NULL;
+	}
+
+	p->owner = NULL;
+	u_p = (uintptr_t)(p->sz - (sizeof(void*) << 1) + (uintptr_t)p->ptr);
+	u_p = (u_p >> 4) << 4;
+	p->align_highptr = (void*)u_p;
+	p->align_retptr  = (void*)(u_p - sizeof(void*));
+	assert(p->sz > (16 + (sizeof(void*) << 1) + sizeof(void*)));
+	p->align_limit = p->sz - 16 - (sizeof(void*) << 1);
+
+	return p;
+}
+
+void co_stack_destroy(struct co_stack *s){
+	if (!s)
+		return;
+	free(s->ptr);
+	free(s);
+}
+
+struct co *co_create(struct co *main_co,
+		     struct co_stack *stack,
+		     size_t save_stack_sz,
+		     void (*fp)(void), void *arg)
+{
+	struct co *p = malloc(sizeof(*p));
+	assert(p);
+	memset(p, 0, sizeof(*p));
+
+	if (main_co) {
+		assert(stack);
+		p->stack = stack;
+		p->reg[CO_REG_IDX_RETADDR] = (void *)fp;
+		// FIXME original code uses align_retptr; causes a crash
+		p->reg[CO_REG_IDX_SP] = p->stack->align_highptr;
+		p->main_co = main_co;
+		p->arg = arg;
+		p->fp = fp;
+		if (!save_stack_sz)
+			save_stack_sz = 64;
+		p->save_stack.ptr = malloc(save_stack_sz);
+		assert(p->save_stack.ptr);
+		p->save_stack.sz = save_stack_sz;
+		p->save_stack.valid_sz = 0;
+	} else {
+		p->main_co = NULL;
+		p->arg = arg;
+		p->fp = fp;
+		p->stack = NULL;
+		p->save_stack.ptr = NULL;
+	}
+	return p;
+}
+
+static void grab_stack(struct co *resume_co)
+{
+	struct co *owner_co = resume_co->stack->owner;
+
+	if (owner_co) {
+		assert(owner_co->stack == resume_co->stack);
+		assert((uintptr_t)(owner_co->stack->align_retptr) >=
+		       (uintptr_t)(owner_co->reg[CO_REG_IDX_SP]));
+		assert((uintptr_t)owner_co->stack->align_highptr -
+				(uintptr_t)owner_co->stack->align_limit
+			<= (uintptr_t)owner_co->reg[CO_REG_IDX_SP]);
+		owner_co->save_stack.valid_sz =
+			(uintptr_t)owner_co->stack->align_retptr -
+			(uintptr_t)owner_co->reg[CO_REG_IDX_SP];
+		if (owner_co->save_stack.sz < owner_co->save_stack.valid_sz) {
+			free(owner_co->save_stack.ptr);
+			owner_co->save_stack.ptr = NULL;
+			do {
+				owner_co->save_stack.sz <<= 1;
+				assert(owner_co->save_stack.sz > 0);
+			} while (owner_co->save_stack.sz <
+				 owner_co->save_stack.valid_sz);
+			owner_co->save_stack.ptr =
+				malloc(owner_co->save_stack.sz);
+			assert(owner_co->save_stack.ptr);
+		}
+		if (owner_co->save_stack.valid_sz > 0)
+			memcpy(owner_co->save_stack.ptr,
+			       owner_co->reg[CO_REG_IDX_SP],
+			       owner_co->save_stack.valid_sz);
+		if (owner_co->save_stack.valid_sz >
+		    owner_co->save_stack.max_cpsz)
+			owner_co->save_stack.max_cpsz =
+				owner_co->save_stack.valid_sz;
+		owner_co->stack->owner = NULL;
+		owner_co->stack->align_validsz = 0;
+	}
+	assert(!resume_co->stack->owner);
+	assert(resume_co->save_stack.valid_sz <=
+	       resume_co->stack->align_limit - sizeof(void *));
+	if (resume_co->save_stack.valid_sz > 0)
+		memcpy((void*)
+		       (uintptr_t)(resume_co->stack->align_retptr) -
+				resume_co->save_stack.valid_sz,
+		       resume_co->save_stack.ptr,
+		       resume_co->save_stack.valid_sz);
+	if (resume_co->save_stack.valid_sz > resume_co->save_stack.max_cpsz)
+		resume_co->save_stack.max_cpsz = resume_co->save_stack.valid_sz;
+	resume_co->stack->align_validsz =
+		resume_co->save_stack.valid_sz + sizeof(void *);
+	resume_co->stack->owner = resume_co;
+}
+
+void co_resume(struct co *resume_co)
+{
+	assert(resume_co && resume_co->main_co && !resume_co->done);
+
+	if (resume_co->stack->owner != resume_co)
+		grab_stack(resume_co);
+
+	current_co = resume_co;
+	_co_switch(resume_co->main_co, resume_co);
+	current_co = resume_co->main_co;
+}
+
+void co_destroy(struct co *co){
+	if (!co)
+		return;
+
+	if(co_is_main_co(co)){
+		free(co);
+		current_co = NULL;
+	} else {
+		if(co->stack->owner == co){
+			co->stack->owner = NULL;
+			co->stack->align_validsz = 0;
+		}
+		free(co->save_stack.ptr);
+		co->save_stack.ptr = NULL;
+		free(co);
+	}
+}