diff mbox series

[v5,5/5] powerpc/vdso: Wire up getrandom() vDSO implementation on VDSO64

Message ID de334b1de27260f217d6bc65e02c841e8eff75be.1725304404.git.christophe.leroy@csgroup.eu
State Accepted
Commit 8072b39c3a75b63bc08737a74b24c263b7909ba0
Headers show
Series Wire up getrandom() vDSO implementation on powerpc | expand

Commit Message

Christophe Leroy Sept. 2, 2024, 7:17 p.m. UTC
Extend getrandom() vDSO implementation to VDSO64

Tested on QEMU on both ppc64_defconfig and ppc64le_defconfig.

The results are not precise as it is QEMU on an x86 laptop, but
no need to be precise to see the benefit.

~ # ./vdso_test_getrandom bench-single
   vdso: 25000000 times in 4.977777162 seconds
   libc: 25000000 times in 75.516749981 seconds
syscall: 25000000 times in 86.842242014 seconds

~ # ./vdso_test_getrandom bench-single
   vdso: 25000000 times in 6.473814156 seconds
   libc: 25000000 times in 73.875109463 seconds
syscall: 25000000 times in 71.805066229 seconds

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
v5:
- VDSO32 for both PPC32 and PPC64 is in previous patch. This patch have the logic for VDSO64.

v4:
- Use __BIG_ENDIAN__ which is defined by GCC instead of CONFIG_CPU_BIG_ENDIAN which is unknown by selftests
- Implement a cleaner/smaller output copy for little endian instead of keeping compat macro.

v3: New (split out of previous patch)
---
 arch/powerpc/Kconfig                         |  2 +-
 arch/powerpc/kernel/vdso/Makefile            |  8 ++-
 arch/powerpc/kernel/vdso/getrandom.S         |  8 +++
 arch/powerpc/kernel/vdso/vdso64.lds.S        |  1 +
 arch/powerpc/kernel/vdso/vgetrandom-chacha.S | 53 ++++++++++++++++++++
 5 files changed, 69 insertions(+), 3 deletions(-)

Comments

Madhavan Srinivasan Sept. 4, 2024, 11:46 a.m. UTC | #1
On 9/3/24 12:47 AM, Christophe Leroy wrote:
> Extend getrandom() vDSO implementation to VDSO64
>
> Tested on QEMU on both ppc64_defconfig and ppc64le_defconfig.
>
> The results are not precise as it is QEMU on an x86 laptop, but
> no need to be precise to see the benefit.
>
> ~ # ./vdso_test_getrandom bench-single
>     vdso: 25000000 times in 4.977777162 seconds
>     libc: 25000000 times in 75.516749981 seconds
> syscall: 25000000 times in 86.842242014 seconds
>
> ~ # ./vdso_test_getrandom bench-single
>     vdso: 25000000 times in 6.473814156 seconds
>     libc: 25000000 times in 73.875109463 seconds
> syscall: 25000000 times in 71.805066229 seconds

Tried the patchset on top of

https://kernel.googlesource.com/pub/scm/linux/kernel/git/crng/random.git
(commit 963233ff013377bc2aa0d641b9efbb7fd4c2b72c (origin/master, 
origin/HEAD, master))

Results from a Power9 (PowerNV)
# ./vdso_test_getrandom bench-single
    vdso: 25000000 times in 0.787943615 seconds
    libc: 25000000 times in 14.101887252 seconds
    syscall: 25000000 times in 14.047475082 seconds

Impressive, thanks for enabling it.

Tested-by: Madhavan Srinivasan <maddy@linux.ibm.com>

> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
> ---
> v5:
> - VDSO32 for both PPC32 and PPC64 is in previous patch. This patch have the logic for VDSO64.
>
> v4:
> - Use __BIG_ENDIAN__ which is defined by GCC instead of CONFIG_CPU_BIG_ENDIAN which is unknown by selftests
> - Implement a cleaner/smaller output copy for little endian instead of keeping compat macro.
>
> v3: New (split out of previous patch)
> ---
>   arch/powerpc/Kconfig                         |  2 +-
>   arch/powerpc/kernel/vdso/Makefile            |  8 ++-
>   arch/powerpc/kernel/vdso/getrandom.S         |  8 +++
>   arch/powerpc/kernel/vdso/vdso64.lds.S        |  1 +
>   arch/powerpc/kernel/vdso/vgetrandom-chacha.S | 53 ++++++++++++++++++++
>   5 files changed, 69 insertions(+), 3 deletions(-)
>
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index e500a59ddecc..b45452ac4a73 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -311,7 +311,7 @@ config PPC
>   	select SYSCTL_EXCEPTION_TRACE
>   	select THREAD_INFO_IN_TASK
>   	select TRACE_IRQFLAGS_SUPPORT
> -	select VDSO_GETRANDOM			if VDSO32
> +	select VDSO_GETRANDOM
>   	#
>   	# Please keep this list sorted alphabetically.
>   	#
> diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile
> index 7a4a935406d8..56fb1633529a 100644
> --- a/arch/powerpc/kernel/vdso/Makefile
> +++ b/arch/powerpc/kernel/vdso/Makefile
> @@ -9,6 +9,7 @@ obj-vdso32 = sigtramp32-32.o gettimeofday-32.o datapage-32.o cacheflush-32.o not
>   obj-vdso64 = sigtramp64-64.o gettimeofday-64.o datapage-64.o cacheflush-64.o note-64.o getcpu-64.o
>   
>   obj-vdso32 += getrandom-32.o vgetrandom-chacha-32.o
> +obj-vdso64 += getrandom-64.o vgetrandom-chacha-64.o
>   
>   ifneq ($(c-gettimeofday-y),)
>     CFLAGS_vgettimeofday-32.o += -include $(c-gettimeofday-y)
> @@ -21,6 +22,7 @@ endif
>   
>   ifneq ($(c-getrandom-y),)
>     CFLAGS_vgetrandom-32.o += -include $(c-getrandom-y)
> +  CFLAGS_vgetrandom-64.o += -include $(c-getrandom-y) $(call cc-option, -ffixed-r30)
>   endif
>   
>   # Build rules
> @@ -34,7 +36,7 @@ endif
>   targets := $(obj-vdso32) vdso32.so.dbg vgettimeofday-32.o vgetrandom-32.o
>   targets += crtsavres-32.o
>   obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
> -targets += $(obj-vdso64) vdso64.so.dbg vgettimeofday-64.o
> +targets += $(obj-vdso64) vdso64.so.dbg vgettimeofday-64.o vgetrandom-64.o
>   obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
>   
>   ccflags-y := -fno-common -fno-builtin -DBUILD_VDSO
> @@ -71,7 +73,7 @@ CPPFLAGS_vdso64.lds += -P -C
>   # link rule for the .so file, .lds has to be first
>   $(obj)/vdso32.so.dbg: $(obj)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday-32.o $(obj)/vgetrandom-32.o $(obj)/crtsavres-32.o FORCE
>   	$(call if_changed,vdso32ld_and_check)
> -$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday-64.o FORCE
> +$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday-64.o $(obj)/vgetrandom-64.o FORCE
>   	$(call if_changed,vdso64ld_and_check)
>   
>   # assembly rules for the .S files
> @@ -87,6 +89,8 @@ $(obj-vdso64): %-64.o: %.S FORCE
>   	$(call if_changed_dep,vdso64as)
>   $(obj)/vgettimeofday-64.o: %-64.o: %.c FORCE
>   	$(call if_changed_dep,cc_o_c)
> +$(obj)/vgetrandom-64.o: %-64.o: %.c FORCE
> +	$(call if_changed_dep,cc_o_c)
>   
>   # Generate VDSO offsets using helper script
>   gen-vdso32sym := $(src)/gen_vdso32_offsets.sh
> diff --git a/arch/powerpc/kernel/vdso/getrandom.S b/arch/powerpc/kernel/vdso/getrandom.S
> index 21773ef3fc1d..a957cd2b2b03 100644
> --- a/arch/powerpc/kernel/vdso/getrandom.S
> +++ b/arch/powerpc/kernel/vdso/getrandom.S
> @@ -27,10 +27,18 @@
>     .cfi_adjust_cfa_offset PPC_MIN_STKFRM
>   	PPC_STL		r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1)
>     .cfi_rel_offset lr, PPC_MIN_STKFRM + PPC_LR_STKOFF
> +#ifdef __powerpc64__
> +	PPC_STL		r2, PPC_MIN_STKFRM + STK_GOT(r1)
> +  .cfi_rel_offset r2, PPC_MIN_STKFRM + STK_GOT
> +#endif
>   	get_datapage	r8
>   	addi		r8, r8, VDSO_RNG_DATA_OFFSET
>   	bl		CFUNC(DOTSYM(\funct))
>   	PPC_LL		r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1)
> +#ifdef __powerpc64__
> +	PPC_LL		r2, PPC_MIN_STKFRM + STK_GOT(r1)
> +  .cfi_restore r2
> +#endif
>   	cmpwi		r3, 0
>   	mtlr		r0
>   	addi		r1, r1, 2 * PPC_MIN_STKFRM
> diff --git a/arch/powerpc/kernel/vdso/vdso64.lds.S b/arch/powerpc/kernel/vdso/vdso64.lds.S
> index 400819258c06..9481e4b892ed 100644
> --- a/arch/powerpc/kernel/vdso/vdso64.lds.S
> +++ b/arch/powerpc/kernel/vdso/vdso64.lds.S
> @@ -123,6 +123,7 @@ VERSION
>   		__kernel_sigtramp_rt64;
>   		__kernel_getcpu;
>   		__kernel_time;
> +		__kernel_getrandom;
>   
>   	local: *;
>   	};
> diff --git a/arch/powerpc/kernel/vdso/vgetrandom-chacha.S b/arch/powerpc/kernel/vdso/vgetrandom-chacha.S
> index ac85788205cb..7f9061a9e8b4 100644
> --- a/arch/powerpc/kernel/vdso/vgetrandom-chacha.S
> +++ b/arch/powerpc/kernel/vdso/vgetrandom-chacha.S
> @@ -124,6 +124,26 @@
>    */
>   SYM_FUNC_START(__arch_chacha20_blocks_nostack)
>   #ifdef __powerpc64__
> +	std	counter, -216(r1)
> +
> +	std	r14, -144(r1)
> +	std	r15, -136(r1)
> +	std	r16, -128(r1)
> +	std	r17, -120(r1)
> +	std	r18, -112(r1)
> +	std	r19, -104(r1)
> +	std	r20, -96(r1)
> +	std	r21, -88(r1)
> +	std	r22, -80(r1)
> +	std	r23, -72(r1)
> +	std	r24, -64(r1)
> +	std	r25, -56(r1)
> +	std	r26, -48(r1)
> +	std	r27, -40(r1)
> +	std	r28, -32(r1)
> +	std	r29, -24(r1)
> +	std	r30, -16(r1)
> +	std	r31, -8(r1)
>   #else
>   	stwu	r1, -96(r1)
>   	stw	counter, 20(r1)
> @@ -149,9 +169,13 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack)
>   	stw	r30, 88(r1)
>   	stw	r31, 92(r1)
>   #endif
> +#endif	/* __powerpc64__ */
>   
>   	lwz	counter0, 0(counter)
>   	lwz	counter1, 4(counter)
> +#ifdef __powerpc64__
> +	rldimi	counter0, counter1, 32, 0
> +#endif
>   	mr	idx_r0, nblocks
>   	subi	dst_bytes, dst_bytes, 4
>   
> @@ -267,12 +291,21 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack)
>   
>   	subic.	idx_r0, idx_r0, 1	/* subi. can't use r0 as source */
>   
> +#ifdef __powerpc64__
> +	addi	counter0, counter0, 1
> +	srdi	counter1, counter0, 32
> +#else
>   	addic	counter0, counter0, 1
>   	addze	counter1, counter1
> +#endif
>   
>   	bne	.Lblock
>   
> +#ifdef __powerpc64__
> +	ld	counter, -216(r1)
> +#else
>   	lwz	counter, 20(r1)
> +#endif
>   	stw	counter0, 0(counter)
>   	stw	counter1, 4(counter)
>   
> @@ -284,6 +317,26 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack)
>   	li	r11, 0
>   	li	r12, 0
>   
> +#ifdef __powerpc64__
> +	ld	r14, -144(r1)
> +	ld	r15, -136(r1)
> +	ld	r16, -128(r1)
> +	ld	r17, -120(r1)
> +	ld	r18, -112(r1)
> +	ld	r19, -104(r1)
> +	ld	r20, -96(r1)
> +	ld	r21, -88(r1)
> +	ld	r22, -80(r1)
> +	ld	r23, -72(r1)
> +	ld	r24, -64(r1)
> +	ld	r25, -56(r1)
> +	ld	r26, -48(r1)
> +	ld	r27, -40(r1)
> +	ld	r28, -32(r1)
> +	ld	r29, -24(r1)
> +	ld	r30, -16(r1)
> +	ld	r31, -8(r1)
> +#else
>   #ifdef __BIG_ENDIAN__
>   	lmw	r14, 24(r1)
>   #else
diff mbox series

Patch

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index e500a59ddecc..b45452ac4a73 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -311,7 +311,7 @@  config PPC
 	select SYSCTL_EXCEPTION_TRACE
 	select THREAD_INFO_IN_TASK
 	select TRACE_IRQFLAGS_SUPPORT
-	select VDSO_GETRANDOM			if VDSO32
+	select VDSO_GETRANDOM
 	#
 	# Please keep this list sorted alphabetically.
 	#
diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile
index 7a4a935406d8..56fb1633529a 100644
--- a/arch/powerpc/kernel/vdso/Makefile
+++ b/arch/powerpc/kernel/vdso/Makefile
@@ -9,6 +9,7 @@  obj-vdso32 = sigtramp32-32.o gettimeofday-32.o datapage-32.o cacheflush-32.o not
 obj-vdso64 = sigtramp64-64.o gettimeofday-64.o datapage-64.o cacheflush-64.o note-64.o getcpu-64.o
 
 obj-vdso32 += getrandom-32.o vgetrandom-chacha-32.o
+obj-vdso64 += getrandom-64.o vgetrandom-chacha-64.o
 
 ifneq ($(c-gettimeofday-y),)
   CFLAGS_vgettimeofday-32.o += -include $(c-gettimeofday-y)
@@ -21,6 +22,7 @@  endif
 
 ifneq ($(c-getrandom-y),)
   CFLAGS_vgetrandom-32.o += -include $(c-getrandom-y)
+  CFLAGS_vgetrandom-64.o += -include $(c-getrandom-y) $(call cc-option, -ffixed-r30)
 endif
 
 # Build rules
@@ -34,7 +36,7 @@  endif
 targets := $(obj-vdso32) vdso32.so.dbg vgettimeofday-32.o vgetrandom-32.o
 targets += crtsavres-32.o
 obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
-targets += $(obj-vdso64) vdso64.so.dbg vgettimeofday-64.o
+targets += $(obj-vdso64) vdso64.so.dbg vgettimeofday-64.o vgetrandom-64.o
 obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
 
 ccflags-y := -fno-common -fno-builtin -DBUILD_VDSO
@@ -71,7 +73,7 @@  CPPFLAGS_vdso64.lds += -P -C
 # link rule for the .so file, .lds has to be first
 $(obj)/vdso32.so.dbg: $(obj)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday-32.o $(obj)/vgetrandom-32.o $(obj)/crtsavres-32.o FORCE
 	$(call if_changed,vdso32ld_and_check)
-$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday-64.o FORCE
+$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday-64.o $(obj)/vgetrandom-64.o FORCE
 	$(call if_changed,vdso64ld_and_check)
 
 # assembly rules for the .S files
@@ -87,6 +89,8 @@  $(obj-vdso64): %-64.o: %.S FORCE
 	$(call if_changed_dep,vdso64as)
 $(obj)/vgettimeofday-64.o: %-64.o: %.c FORCE
 	$(call if_changed_dep,cc_o_c)
+$(obj)/vgetrandom-64.o: %-64.o: %.c FORCE
+	$(call if_changed_dep,cc_o_c)
 
 # Generate VDSO offsets using helper script
 gen-vdso32sym := $(src)/gen_vdso32_offsets.sh
diff --git a/arch/powerpc/kernel/vdso/getrandom.S b/arch/powerpc/kernel/vdso/getrandom.S
index 21773ef3fc1d..a957cd2b2b03 100644
--- a/arch/powerpc/kernel/vdso/getrandom.S
+++ b/arch/powerpc/kernel/vdso/getrandom.S
@@ -27,10 +27,18 @@ 
   .cfi_adjust_cfa_offset PPC_MIN_STKFRM
 	PPC_STL		r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1)
   .cfi_rel_offset lr, PPC_MIN_STKFRM + PPC_LR_STKOFF
+#ifdef __powerpc64__
+	PPC_STL		r2, PPC_MIN_STKFRM + STK_GOT(r1)
+  .cfi_rel_offset r2, PPC_MIN_STKFRM + STK_GOT
+#endif
 	get_datapage	r8
 	addi		r8, r8, VDSO_RNG_DATA_OFFSET
 	bl		CFUNC(DOTSYM(\funct))
 	PPC_LL		r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1)
+#ifdef __powerpc64__
+	PPC_LL		r2, PPC_MIN_STKFRM + STK_GOT(r1)
+  .cfi_restore r2
+#endif
 	cmpwi		r3, 0
 	mtlr		r0
 	addi		r1, r1, 2 * PPC_MIN_STKFRM
diff --git a/arch/powerpc/kernel/vdso/vdso64.lds.S b/arch/powerpc/kernel/vdso/vdso64.lds.S
index 400819258c06..9481e4b892ed 100644
--- a/arch/powerpc/kernel/vdso/vdso64.lds.S
+++ b/arch/powerpc/kernel/vdso/vdso64.lds.S
@@ -123,6 +123,7 @@  VERSION
 		__kernel_sigtramp_rt64;
 		__kernel_getcpu;
 		__kernel_time;
+		__kernel_getrandom;
 
 	local: *;
 	};
diff --git a/arch/powerpc/kernel/vdso/vgetrandom-chacha.S b/arch/powerpc/kernel/vdso/vgetrandom-chacha.S
index ac85788205cb..7f9061a9e8b4 100644
--- a/arch/powerpc/kernel/vdso/vgetrandom-chacha.S
+++ b/arch/powerpc/kernel/vdso/vgetrandom-chacha.S
@@ -124,6 +124,26 @@ 
  */
 SYM_FUNC_START(__arch_chacha20_blocks_nostack)
 #ifdef __powerpc64__
+	std	counter, -216(r1)
+
+	std	r14, -144(r1)
+	std	r15, -136(r1)
+	std	r16, -128(r1)
+	std	r17, -120(r1)
+	std	r18, -112(r1)
+	std	r19, -104(r1)
+	std	r20, -96(r1)
+	std	r21, -88(r1)
+	std	r22, -80(r1)
+	std	r23, -72(r1)
+	std	r24, -64(r1)
+	std	r25, -56(r1)
+	std	r26, -48(r1)
+	std	r27, -40(r1)
+	std	r28, -32(r1)
+	std	r29, -24(r1)
+	std	r30, -16(r1)
+	std	r31, -8(r1)
 #else
 	stwu	r1, -96(r1)
 	stw	counter, 20(r1)
@@ -149,9 +169,13 @@  SYM_FUNC_START(__arch_chacha20_blocks_nostack)
 	stw	r30, 88(r1)
 	stw	r31, 92(r1)
 #endif
+#endif	/* __powerpc64__ */
 
 	lwz	counter0, 0(counter)
 	lwz	counter1, 4(counter)
+#ifdef __powerpc64__
+	rldimi	counter0, counter1, 32, 0
+#endif
 	mr	idx_r0, nblocks
 	subi	dst_bytes, dst_bytes, 4
 
@@ -267,12 +291,21 @@  SYM_FUNC_START(__arch_chacha20_blocks_nostack)
 
 	subic.	idx_r0, idx_r0, 1	/* subi. can't use r0 as source */
 
+#ifdef __powerpc64__
+	addi	counter0, counter0, 1
+	srdi	counter1, counter0, 32
+#else
 	addic	counter0, counter0, 1
 	addze	counter1, counter1
+#endif
 
 	bne	.Lblock
 
+#ifdef __powerpc64__
+	ld	counter, -216(r1)
+#else
 	lwz	counter, 20(r1)
+#endif
 	stw	counter0, 0(counter)
 	stw	counter1, 4(counter)
 
@@ -284,6 +317,26 @@  SYM_FUNC_START(__arch_chacha20_blocks_nostack)
 	li	r11, 0
 	li	r12, 0
 
+#ifdef __powerpc64__
+	ld	r14, -144(r1)
+	ld	r15, -136(r1)
+	ld	r16, -128(r1)
+	ld	r17, -120(r1)
+	ld	r18, -112(r1)
+	ld	r19, -104(r1)
+	ld	r20, -96(r1)
+	ld	r21, -88(r1)
+	ld	r22, -80(r1)
+	ld	r23, -72(r1)
+	ld	r24, -64(r1)
+	ld	r25, -56(r1)
+	ld	r26, -48(r1)
+	ld	r27, -40(r1)
+	ld	r28, -32(r1)
+	ld	r29, -24(r1)
+	ld	r30, -16(r1)
+	ld	r31, -8(r1)
+#else
 #ifdef __BIG_ENDIAN__
 	lmw	r14, 24(r1)
 #else