diff mbox series

[03/17] x86: Replace open-coded parity calculation with parity8()

Message ID 20250223164217.2139331-4-visitorckw@gmail.com
State New
Headers show
Series [01/17] bitops: Add generic parity calculation for u32 | expand

Commit Message

Kuan-Wei Chiu Feb. 23, 2025, 4:42 p.m. UTC
Refactor parity calculations to use the standard parity8() helper. This
change eliminates redundant implementations and improves code
efficiency.

Co-developed-by: Yu-Chun Lin <eleanor15x@gmail.com>
Signed-off-by: Yu-Chun Lin <eleanor15x@gmail.com>
Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
---
 arch/x86/kernel/bootflag.c | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

Comments

H. Peter Anvin Feb. 24, 2025, 10:21 p.m. UTC | #1
On February 24, 2025 2:17:29 PM PST, Yury Norov <yury.norov@gmail.com> wrote:
>On Mon, Feb 24, 2025 at 01:55:28PM -0800, H. Peter Anvin wrote:
>> On 2/24/25 07:24, Uros Bizjak wrote:
>> > 
>> > 
>> > On 23. 02. 25 17:42, Kuan-Wei Chiu wrote:
>> > > Refactor parity calculations to use the standard parity8() helper. This
>> > > change eliminates redundant implementations and improves code
>> > > efficiency.
>> > 
>> > The patch improves parity assembly code in bootflag.o from:
>> > 
>> >    58:    89 de                    mov    %ebx,%esi
>> >    5a:    b9 08 00 00 00           mov    $0x8,%ecx
>> >    5f:    31 d2                    xor    %edx,%edx
>> >    61:    89 f0                    mov    %esi,%eax
>> >    63:    89 d7                    mov    %edx,%edi
>> >    65:    40 d0 ee                 shr    %sil
>> >    68:    83 e0 01                 and    $0x1,%eax
>> >    6b:    31 c2                    xor    %eax,%edx
>> >    6d:    83 e9 01                 sub    $0x1,%ecx
>> >    70:    75 ef                    jne    61 <sbf_init+0x51>
>> >    72:    39 c7                    cmp    %eax,%edi
>> >    74:    74 7f                    je     f5 <sbf_init+0xe5>
>> >    76:
>> > 
>> > to:
>> > 
>> >    54:    89 d8                    mov    %ebx,%eax
>> >    56:    ba 96 69 00 00           mov    $0x6996,%edx
>> >    5b:    c0 e8 04                 shr    $0x4,%al
>> >    5e:    31 d8                    xor    %ebx,%eax
>> >    60:    83 e0 0f                 and    $0xf,%eax
>> >    63:    0f a3 c2                 bt     %eax,%edx
>> >    66:    73 64                    jae    cc <sbf_init+0xbc>
>> >    68:
>> > 
>> > which is faster and smaller (-10 bytes) code.
>> > 
>> 
>> Of course, on x86, parity8() and parity16() can be implemented very simply:
>> 
>> (Also, the parity functions really ought to return bool, and be flagged
>> __attribute_const__.)
>
>There was a discussion regarding return type when parity8() was added.
>The integer type was taken over bool with a sort of consideration that
>bool should be returned as an answer to some question, like parity_odd().
>
>To me it's not a big deal. We can switch to boolean and describe in
>comment what the 'true' means for the parity() function.

Bool is really the single-bit type, and gives the compiler more information. You could argue that the function really should be called parity_odd*() in general, but that's kind of excessive IMO.
H. Peter Anvin Feb. 25, 2025, 3:36 a.m. UTC | #2
On 2/24/25 14:08, Uros Bizjak wrote:
> On Mon, Feb 24, 2025 at 10:56 PM H. Peter Anvin <hpa@zytor.com> wrote:
>>
>> On 2/24/25 07:24, Uros Bizjak wrote:
>>>
>>>
>>> On 23. 02. 25 17:42, Kuan-Wei Chiu wrote:
>>>> Refactor parity calculations to use the standard parity8() helper. This
>>>> change eliminates redundant implementations and improves code
>>>> efficiency.
>>>
>>> The patch improves parity assembly code in bootflag.o from:
>>>
>>>     58:    89 de                    mov    %ebx,%esi
>>>     5a:    b9 08 00 00 00           mov    $0x8,%ecx
>>>     5f:    31 d2                    xor    %edx,%edx
>>>     61:    89 f0                    mov    %esi,%eax
>>>     63:    89 d7                    mov    %edx,%edi
>>>     65:    40 d0 ee                 shr    %sil
>>>     68:    83 e0 01                 and    $0x1,%eax
>>>     6b:    31 c2                    xor    %eax,%edx
>>>     6d:    83 e9 01                 sub    $0x1,%ecx
>>>     70:    75 ef                    jne    61 <sbf_init+0x51>
>>>     72:    39 c7                    cmp    %eax,%edi
>>>     74:    74 7f                    je     f5 <sbf_init+0xe5>
>>>     76:
>>>
>>> to:
>>>
>>>     54:    89 d8                    mov    %ebx,%eax
>>>     56:    ba 96 69 00 00           mov    $0x6996,%edx
>>>     5b:    c0 e8 04                 shr    $0x4,%al
>>>     5e:    31 d8                    xor    %ebx,%eax
>>>     60:    83 e0 0f                 and    $0xf,%eax
>>>     63:    0f a3 c2                 bt     %eax,%edx
>>>     66:    73 64                    jae    cc <sbf_init+0xbc>
>>>     68:
>>>
>>> which is faster and smaller (-10 bytes) code.
>>>
>>
>> Of course, on x86, parity8() and parity16() can be implemented very simply:
>>
>> (Also, the parity functions really ought to return bool, and be flagged
>> __attribute_const__.)
>>
>> static inline __attribute_const__ bool _arch_parity8(u8 val)
>> {
>>          bool parity;
>>          asm("and %0,%0" : "=@ccnp" (parity) : "q" (val));
> 
> asm("test %0,%0" : "=@ccnp" (parity) : "q" (val));
> 
> because we are interested only in flags.
> 

Also, needs to be %1,%1 (my mistake, thought flags outputs didn't count.)

Finally, this is kind of an obvious improvement:

  static void __init sbf_write(u8 v)
  {
         unsigned long flags;

         if (sbf_port != -1) {
-               v &= ~SBF_PARITY;
                 if (!parity(v))
-                       v |= SBF_PARITY;
+                       v ^= SBF_PARITY;

	-hpa
David Laight Feb. 25, 2025, 10:46 p.m. UTC | #3
On Mon, 24 Feb 2025 13:55:28 -0800
"H. Peter Anvin" <hpa@zytor.com> wrote:

> On 2/24/25 07:24, Uros Bizjak wrote:
> > 
> > 
> > On 23. 02. 25 17:42, Kuan-Wei Chiu wrote:  
> >> Refactor parity calculations to use the standard parity8() helper. This
> >> change eliminates redundant implementations and improves code
> >> efficiency.  
...
> Of course, on x86, parity8() and parity16() can be implemented very simply:
> 
> (Also, the parity functions really ought to return bool, and be flagged 
> __attribute_const__.)
> 
> static inline __attribute_const__ bool _arch_parity8(u8 val)
> {
> 	bool parity;
> 	asm("and %0,%0" : "=@ccnp" (parity) : "q" (val));
> 	return parity;
> }
> 
> static inline __attribute_const__ bool _arch_parity16(u16 val)
> {
> 	bool parity;
> 	asm("xor %h0,%b0" : "=@ccnp" (parity), "+Q" (val));
> 	return parity;
> }

The same (with fixes) can be done for parity64() on 32bit.

> 
> In the generic algorithm, you probably should implement parity16() in 
> terms of parity8(), parity32() in terms of parity16() and so on:
> 
> static inline __attribute_const__ bool parity16(u16 val)
> {
> #ifdef ARCH_HAS_PARITY16
> 	if (!__builtin_const_p(val))
> 		return _arch_parity16(val);
> #endif
> 	return parity8(val ^ (val >> 8));
> }
> 
> This picks up the architectural versions when available.

Not the best way to do that.
Make the name in the #ifdef the same as the function and define
a default one if the architecture doesn't define one.
So:

static inline parity16(u16 val)
{
	return __builtin_const_p(val) ? _parity_const(val) : _parity16(val);
}

#ifndef _parity16
static inline _parity16(u15 val)
{
	return _parity8(val ^ (val >> 8));
}
#endif

You only need one _parity_const().

> 
> Furthermore, if a popcnt instruction is known to exist, then the parity 
> is simply popcnt(x) & 1.

Beware that some popcnt instructions are slow.

	David

> 
> 	-hpa
> 
>
H. Peter Anvin Feb. 26, 2025, 12:26 a.m. UTC | #4
On February 25, 2025 2:46:23 PM PST, David Laight <david.laight.linux@gmail.com> wrote:
>On Mon, 24 Feb 2025 13:55:28 -0800
>"H. Peter Anvin" <hpa@zytor.com> wrote:
>
>> On 2/24/25 07:24, Uros Bizjak wrote:
>> > 
>> > 
>> > On 23. 02. 25 17:42, Kuan-Wei Chiu wrote:  
>> >> Refactor parity calculations to use the standard parity8() helper. This
>> >> change eliminates redundant implementations and improves code
>> >> efficiency.  
>...
>> Of course, on x86, parity8() and parity16() can be implemented very simply:
>> 
>> (Also, the parity functions really ought to return bool, and be flagged 
>> __attribute_const__.)
>> 
>> static inline __attribute_const__ bool _arch_parity8(u8 val)
>> {
>> 	bool parity;
>> 	asm("and %0,%0" : "=@ccnp" (parity) : "q" (val));
>> 	return parity;
>> }
>> 
>> static inline __attribute_const__ bool _arch_parity16(u16 val)
>> {
>> 	bool parity;
>> 	asm("xor %h0,%b0" : "=@ccnp" (parity), "+Q" (val));
>> 	return parity;
>> }
>
>The same (with fixes) can be done for parity64() on 32bit.
>
>> 
>> In the generic algorithm, you probably should implement parity16() in 
>> terms of parity8(), parity32() in terms of parity16() and so on:
>> 
>> static inline __attribute_const__ bool parity16(u16 val)
>> {
>> #ifdef ARCH_HAS_PARITY16
>> 	if (!__builtin_const_p(val))
>> 		return _arch_parity16(val);
>> #endif
>> 	return parity8(val ^ (val >> 8));
>> }
>> 
>> This picks up the architectural versions when available.
>
>Not the best way to do that.
>Make the name in the #ifdef the same as the function and define
>a default one if the architecture doesn't define one.
>So:
>
>static inline parity16(u16 val)
>{
>	return __builtin_const_p(val) ? _parity_const(val) : _parity16(val);
>}
>
>#ifndef _parity16
>static inline _parity16(u15 val)
>{
>	return _parity8(val ^ (val >> 8));
>}
>#endif
>
>You only need one _parity_const().
>
>> 
>> Furthermore, if a popcnt instruction is known to exist, then the parity 
>> is simply popcnt(x) & 1.
>
>Beware that some popcnt instructions are slow.
>
>	David
>
>> 
>> 	-hpa
>> 
>> 
>

Seems more verbose than just #ifdef _arch_parity8 et al since the const and generic code cases are the same (which they aren't always.)

But that part is a good idea, especially since on at least *some* architectures like x86 doing: 

#define _arch_parity8(x) __builtin_parity(x)

... etc is entirely reasonable and lets gcc use an already available parity flag should one be available.

The inline wrapper, of course, takes care of the type mangling.
diff mbox series

Patch

diff --git a/arch/x86/kernel/bootflag.c b/arch/x86/kernel/bootflag.c
index 3fed7ae58b60..314ff0e84900 100644
--- a/arch/x86/kernel/bootflag.c
+++ b/arch/x86/kernel/bootflag.c
@@ -8,6 +8,7 @@ 
 #include <linux/string.h>
 #include <linux/spinlock.h>
 #include <linux/acpi.h>
+#include <linux/bitops.h>
 #include <asm/io.h>
 
 #include <linux/mc146818rtc.h>
@@ -20,26 +21,13 @@ 
 
 int sbf_port __initdata = -1;	/* set via acpi_boot_init() */
 
-static int __init parity(u8 v)
-{
-	int x = 0;
-	int i;
-
-	for (i = 0; i < 8; i++) {
-		x ^= (v & 1);
-		v >>= 1;
-	}
-
-	return x;
-}
-
 static void __init sbf_write(u8 v)
 {
 	unsigned long flags;
 
 	if (sbf_port != -1) {
 		v &= ~SBF_PARITY;
-		if (!parity(v))
+		if (!parity8(v))
 			v |= SBF_PARITY;
 
 		printk(KERN_INFO "Simple Boot Flag at 0x%x set to 0x%x\n",
@@ -70,7 +58,7 @@  static int __init sbf_value_valid(u8 v)
 {
 	if (v & SBF_RESERVED)		/* Reserved bits */
 		return 0;
-	if (!parity(v))
+	if (!parity8(v))
 		return 0;
 
 	return 1;