diff mbox series

[v9,05/15] arm64: barrier: add support for smp_cond_relaxed_timeout()

Message ID 20241107190818.522639-6-ankur.a.arora@oracle.com
State New
Headers show
Series arm64: support poll_idle() | expand

Commit Message

Ankur Arora Nov. 7, 2024, 7:08 p.m. UTC
Support a waited variant of polling on a conditional variable
via smp_cond_relaxed_timeout().

This uses the __cmpwait_relaxed() primitive to do the actual
waiting, when the wait can be guaranteed to not block forever
(in case there are no stores to the waited for cacheline.)
For this we depend on the availability of the event-stream.

For cases when the event-stream is unavailable, we fallback to
a spin-waited implementation which is identical to the generic
variant.

Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
 arch/arm64/include/asm/barrier.h | 54 ++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

Comments

Will Deacon Dec. 10, 2024, 1:50 p.m. UTC | #1
On Thu, Nov 07, 2024 at 11:08:08AM -0800, Ankur Arora wrote:
> Support a waited variant of polling on a conditional variable
> via smp_cond_relaxed_timeout().
> 
> This uses the __cmpwait_relaxed() primitive to do the actual
> waiting, when the wait can be guaranteed to not block forever
> (in case there are no stores to the waited for cacheline.)
> For this we depend on the availability of the event-stream.
> 
> For cases when the event-stream is unavailable, we fallback to
> a spin-waited implementation which is identical to the generic
> variant.
> 
> Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
> ---
>  arch/arm64/include/asm/barrier.h | 54 ++++++++++++++++++++++++++++++++
>  1 file changed, 54 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
> index 1ca947d5c939..ab2515ecd6ca 100644
> --- a/arch/arm64/include/asm/barrier.h
> +++ b/arch/arm64/include/asm/barrier.h
> @@ -216,6 +216,60 @@ do {									\
>  	(typeof(*ptr))VAL;						\
>  })
>  
> +#define __smp_cond_load_timeout_spin(ptr, cond_expr,			\
> +				     time_expr_ns, time_limit_ns)	\
> +({									\
> +	typeof(ptr) __PTR = (ptr);					\
> +	__unqual_scalar_typeof(*ptr) VAL;				\
> +	unsigned int __count = 0;					\
> +	for (;;) {							\
> +		VAL = READ_ONCE(*__PTR);				\
> +		if (cond_expr)						\
> +			break;						\
> +		cpu_relax();						\
> +		if (__count++ < smp_cond_time_check_count)		\
> +			continue;					\
> +		if ((time_expr_ns) >= time_limit_ns)			\
> +			break;						\
> +		__count = 0;						\
> +	}								\
> +	(typeof(*ptr))VAL;						\
> +})

This is a carbon-copy of the asm-generic timeout implementation. Please
can you avoid duplicating that in the arch code?

Will
Ankur Arora Dec. 10, 2024, 8:14 p.m. UTC | #2
Will Deacon <will@kernel.org> writes:

> On Thu, Nov 07, 2024 at 11:08:08AM -0800, Ankur Arora wrote:
>> Support a waited variant of polling on a conditional variable
>> via smp_cond_relaxed_timeout().
>>
>> This uses the __cmpwait_relaxed() primitive to do the actual
>> waiting, when the wait can be guaranteed to not block forever
>> (in case there are no stores to the waited for cacheline.)
>> For this we depend on the availability of the event-stream.
>>
>> For cases when the event-stream is unavailable, we fallback to
>> a spin-waited implementation which is identical to the generic
>> variant.
>>
>> Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
>> ---
>>  arch/arm64/include/asm/barrier.h | 54 ++++++++++++++++++++++++++++++++
>>  1 file changed, 54 insertions(+)
>>
>> diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
>> index 1ca947d5c939..ab2515ecd6ca 100644
>> --- a/arch/arm64/include/asm/barrier.h
>> +++ b/arch/arm64/include/asm/barrier.h
>> @@ -216,6 +216,60 @@ do {									\
>>  	(typeof(*ptr))VAL;						\
>>  })
>>
>> +#define __smp_cond_load_timeout_spin(ptr, cond_expr,			\
>> +				     time_expr_ns, time_limit_ns)	\
>> +({									\
>> +	typeof(ptr) __PTR = (ptr);					\
>> +	__unqual_scalar_typeof(*ptr) VAL;				\
>> +	unsigned int __count = 0;					\
>> +	for (;;) {							\
>> +		VAL = READ_ONCE(*__PTR);				\
>> +		if (cond_expr)						\
>> +			break;						\
>> +		cpu_relax();						\
>> +		if (__count++ < smp_cond_time_check_count)		\
>> +			continue;					\
>> +		if ((time_expr_ns) >= time_limit_ns)			\
>> +			break;						\
>> +		__count = 0;						\
>> +	}								\
>> +	(typeof(*ptr))VAL;						\
>> +})
>
> This is a carbon-copy of the asm-generic timeout implementation. Please
> can you avoid duplicating that in the arch code?

Yeah I realized a bit late that I could avoid the duplication quite
simply. Will fix.

Thanks

--
ankur
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 1ca947d5c939..ab2515ecd6ca 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -216,6 +216,60 @@  do {									\
 	(typeof(*ptr))VAL;						\
 })
 
+#define __smp_cond_load_timeout_spin(ptr, cond_expr,			\
+				     time_expr_ns, time_limit_ns)	\
+({									\
+	typeof(ptr) __PTR = (ptr);					\
+	__unqual_scalar_typeof(*ptr) VAL;				\
+	unsigned int __count = 0;					\
+	for (;;) {							\
+		VAL = READ_ONCE(*__PTR);				\
+		if (cond_expr)						\
+			break;						\
+		cpu_relax();						\
+		if (__count++ < smp_cond_time_check_count)		\
+			continue;					\
+		if ((time_expr_ns) >= time_limit_ns)			\
+			break;						\
+		__count = 0;						\
+	}								\
+	(typeof(*ptr))VAL;						\
+})
+
+#define __smp_cond_load_timeout_wait(ptr, cond_expr,			\
+				     time_expr_ns, time_limit_ns)	\
+({									\
+	typeof(ptr) __PTR = (ptr);					\
+	__unqual_scalar_typeof(*ptr) VAL;				\
+	for (;;) {							\
+		VAL = READ_ONCE(*__PTR);				\
+		if (cond_expr)						\
+			break;						\
+		__cmpwait_relaxed(__PTR, VAL);				\
+		if ((time_expr_ns) >= time_limit_ns)			\
+			break;						\
+	}								\
+	(typeof(*ptr))VAL;						\
+})
+
+#define smp_cond_load_relaxed_timeout(ptr, cond_expr,			\
+				      time_expr_ns, time_limit_ns)	\
+({									\
+	__unqual_scalar_typeof(*ptr) _val;				\
+									\
+	int __wfe = arch_timer_evtstrm_available();			\
+	if (likely(__wfe))						\
+		_val = __smp_cond_load_timeout_wait(ptr, cond_expr,	\
+						   time_expr_ns,	\
+						   time_limit_ns);	\
+	else								\
+		_val = __smp_cond_load_timeout_spin(ptr, cond_expr,	\
+						   time_expr_ns,	\
+						   time_limit_ns);	\
+	(typeof(*ptr))_val;						\
+})
+
+
 #include <asm-generic/barrier.h>
 
 #endif	/* __ASSEMBLY__ */