@@ -237,6 +237,7 @@ config ARM64
select HAVE_KPROBES
select HAVE_KRETPROBES
select HAVE_GENERIC_VDSO
+ select VDSO_GETRANDOM
select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
new file mode 100644
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_VDSO_GETRANDOM_H
+#define __ASM_VDSO_GETRANDOM_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/unistd.h>
+#include <vdso/datapage.h>
+
+/**
+ * getrandom_syscall - Invoke the getrandom() syscall.
+ * @buffer: Destination buffer to fill with random bytes.
+ * @len: Size of @buffer in bytes.
+ * @flags: Zero or more GRND_* flags.
+ * Returns: The number of random bytes written to @buffer, or a negative value indicating an error.
+ */
+static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags)
+{
+ register long int x8 asm ("x8") = __NR_getrandom;
+ register long int x0 asm ("x0") = (long int) buffer;
+ register long int x1 asm ("x1") = (long int) len;
+ register long int x2 asm ("x2") = (long int) flags;
+
+ asm ("svc 0" : "=r"(x0) : "r"(x8), "0"(x0), "r"(x1), "r"(x2));
+
+ return x0;
+}
+
+static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void)
+{
+ return &_vdso_rng_data;
+}
+
+/**
+ * __arch_chacha20_blocks_nostack - Generate ChaCha20 stream without using the stack.
+ * @dst_bytes: Destination buffer to hold @nblocks * 64 bytes of output.
+ * @key: 32-byte input key.
+ * @counter: 8-byte counter, read on input and updated on return.
+ * @nblocks: Number of blocks to generate.
+ *
+ * Generates a given positive number of blocks of ChaCha20 output with nonce=0, and does not write
+ * to any stack or memory outside of the parameters passed to it, in order to mitigate stack data
+ * leaking into forked child processes.
+ */
+extern void __arch_chacha20_blocks_nostack(u8 *dst_bytes, const u32 *key, u32 *counter, size_t nblocks);
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_GETRANDOM_H */
@@ -2,6 +2,8 @@
#ifndef __ASM_VDSO_VSYSCALL_H
#define __ASM_VDSO_VSYSCALL_H
+#define __VDSO_RND_DATA_OFFSET 480
+
#ifndef __ASSEMBLY__
#include <linux/timekeeper_internal.h>
@@ -21,6 +23,13 @@ struct vdso_data *__arm64_get_k_vdso_data(void)
}
#define __arch_get_k_vdso_data __arm64_get_k_vdso_data
+static __always_inline
+struct vdso_rng_data *__arm64_get_k_vdso_rnd_data(void)
+{
+ return (void *)__arm64_get_k_vdso_data() + __VDSO_RND_DATA_OFFSET;
+}
+#define __arch_get_k_vdso_rng_data __arm64_get_k_vdso_rnd_data
+
static __always_inline
void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk)
{
@@ -9,7 +9,7 @@
# Include the generic Makefile to check the built vdso.
include $(srctree)/lib/vdso/Makefile
-obj-vdso := vgettimeofday.o note.o sigreturn.o
+obj-vdso := vgettimeofday.o note.o sigreturn.o vgetrandom.o vgetrandom-chacha.o
# Build rules
targets := $(obj-vdso) vdso.so vdso.so.dbg
@@ -40,8 +40,13 @@ CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \
$(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \
$(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \
-Wmissing-prototypes -Wmissing-declarations
+CFLAGS_REMOVE_vgetrandom.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \
+ $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \
+ $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \
+ -Wmissing-prototypes -Wmissing-declarations
CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -fasynchronous-unwind-tables
+CFLAGS_vgetrandom.o = -O2 -mcmodel=tiny -fasynchronous-unwind-tables
ifneq ($(c-gettimeofday-y),)
CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y)
@@ -12,6 +12,8 @@
#include <asm/page.h>
#include <asm/vdso.h>
#include <asm-generic/vmlinux.lds.h>
+#include <vdso/datapage.h>
+#include <asm/vdso/vsyscall.h>
OUTPUT_FORMAT("elf64-littleaarch64", "elf64-bigaarch64", "elf64-littleaarch64")
OUTPUT_ARCH(aarch64)
@@ -19,6 +21,7 @@ OUTPUT_ARCH(aarch64)
SECTIONS
{
PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
+ PROVIDE(_vdso_rng_data = _vdso_data + __VDSO_RND_DATA_OFFSET);
#ifdef CONFIG_TIME_NS
PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
#endif
@@ -102,6 +105,7 @@ VERSION
__kernel_gettimeofday;
__kernel_clock_gettime;
__kernel_clock_getres;
+ __kernel_getrandom;
local: *;
};
}
new file mode 100644
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/linkage.h>
+#include <asm/cache.h>
+
+ .text
+
+/*
+ * ARM64 ChaCha20 implementation meant for vDSO. Produces a given positive
+ * number of blocks of output with nonnce 0, taking an input key and 8-bytes
+ * counter. Importantly does not spill to the stack.
+ *
+ * void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes,
+ * const uint8_t *key,
+ * uint32_t *counter,
+ * size_t nblocks)
+ *
+ * x0: output bytes
+ * x1: 32-byte key input
+ * x2: 8-byte counter input/output
+ * x3: number of 64-byte block to write to output
+ */
+SYM_FUNC_START(__arch_chacha20_blocks_nostack)
+
+ /* v0 = "expand 32-byte k" */
+ adr_l x8, CTES
+ ld1 {v5.4s}, [x8]
+ /* v1,v2 = key */
+ ld1 { v6.4s, v7.4s }, [x1]
+ /* v3 = counter || zero noonce */
+ ldr d8, [x2]
+
+ adr_l x8, ONE
+ ldr q13, [x8]
+
+ adr_l x10, ROT8
+ ld1 {v12.4s}, [x10]
+.Lblock:
+ /* copy state to auxiliary vectors for the final add after the permute. */
+ mov v0.16b, v5.16b
+ mov v1.16b, v6.16b
+ mov v2.16b, v7.16b
+ mov v3.16b, v8.16b
+
+ mov w4, 20
+.Lpermute:
+ /*
+ * Permute one 64-byte block where the state matrix is stored in the four NEON
+ * registers v0-v3. It performs matrix operations on four words in parallel,
+ * but requires shuffling to rearrange the words after each round.
+ */
+
+.Ldoubleround:
+ /* x0 += x1, x3 = rotl32(x3 ^ x0, 16) */
+ add v0.4s, v0.4s, v1.4s
+ eor v3.16b, v3.16b, v0.16b
+ rev32 v3.8h, v3.8h
+
+ /* x2 += x3, x1 = rotl32(x1 ^ x2, 12) */
+ add v2.4s, v2.4s, v3.4s
+ eor v4.16b, v1.16b, v2.16b
+ shl v1.4s, v4.4s, #12
+ sri v1.4s, v4.4s, #20
+
+ /* x0 += x1, x3 = rotl32(x3 ^ x0, 8) */
+ add v0.4s, v0.4s, v1.4s
+ eor v3.16b, v3.16b, v0.16b
+ tbl v3.16b, {v3.16b}, v12.16b
+
+ /* x2 += x3, x1 = rotl32(x1 ^ x2, 7) */
+ add v2.4s, v2.4s, v3.4s
+ eor v4.16b, v1.16b, v2.16b
+ shl v1.4s, v4.4s, #7
+ sri v1.4s, v4.4s, #25
+
+ /* x1 = shuffle32(x1, MASK(0, 3, 2, 1)) */
+ ext v1.16b, v1.16b, v1.16b, #4
+ /* x2 = shuffle32(x2, MASK(1, 0, 3, 2)) */
+ ext v2.16b, v2.16b, v2.16b, #8
+ /* x3 = shuffle32(x3, MASK(2, 1, 0, 3)) */
+ ext v3.16b, v3.16b, v3.16b, #12
+
+ /* x0 += x1, x3 = rotl32(x3 ^ x0, 16) */
+ add v0.4s, v0.4s, v1.4s
+ eor v3.16b, v3.16b, v0.16b
+ rev32 v3.8h, v3.8h
+
+ /* x2 += x3, x1 = rotl32(x1 ^ x2, 12) */
+ add v2.4s, v2.4s, v3.4s
+ eor v4.16b, v1.16b, v2.16b
+ shl v1.4s, v4.4s, #12
+ sri v1.4s, v4.4s, #20
+
+ /* x0 += x1, x3 = rotl32(x3 ^ x0, 8) */
+ add v0.4s, v0.4s, v1.4s
+ eor v3.16b, v3.16b, v0.16b
+ tbl v3.16b, {v3.16b}, v12.16b
+
+ /* x2 += x3, x1 = rotl32(x1 ^ x2, 7) */
+ add v2.4s, v2.4s, v3.4s
+ eor v4.16b, v1.16b, v2.16b
+ shl v1.4s, v4.4s, #7
+ sri v1.4s, v4.4s, #25
+
+ /* x1 = shuffle32(x1, MASK(2, 1, 0, 3)) */
+ ext v1.16b, v1.16b, v1.16b, #12
+ /* x2 = shuffle32(x2, MASK(1, 0, 3, 2)) */
+ ext v2.16b, v2.16b, v2.16b, #8
+ /* x3 = shuffle32(x3, MASK(0, 3, 2, 1)) */
+ ext v3.16b, v3.16b, v3.16b, #4
+
+ subs w4, w4, #2
+ b.ne .Ldoubleround
+
+ /* output0 = state0 + v0 */
+ add v0.4s, v0.4s, v5.4s
+ /* output1 = state1 + v1 */
+ add v1.4s, v1.4s, v6.4s
+ /* output2 = state2 + v2 */
+ add v2.4s, v2.4s, v7.4s
+ /* output2 = state3 + v3 */
+ add v3.4s, v3.4s, v8.4s
+ st1 { v0.4s - v3.4s }, [x0]
+
+ /* ++copy3.counter */
+ add d8, d8, d13
+
+ /* output += 64, --nblocks */
+ add x0, x0, 64
+ subs x3, x3, #1
+ b.ne .Lblock
+
+ /* counter = copy3.counter */
+ str d8, [x2]
+
+ /* Zero out the potentially sensitive regs, in case nothing uses these again. */
+ eor v0.16b, v0.16b, v0.16b
+ eor v1.16b, v1.16b, v1.16b
+ eor v2.16b, v2.16b, v2.16b
+ eor v3.16b, v3.16b, v3.16b
+ eor v6.16b, v6.16b, v6.16b
+ eor v7.16b, v7.16b, v7.16b
+ ret
+SYM_FUNC_END(__arch_chacha20_blocks_nostack)
+
+ .section ".rodata", "a", %progbits
+ .align L1_CACHE_SHIFT
+
+CTES: .word 1634760805, 857760878, 2036477234, 1797285236
+ONE: .xword 1, 0
+ROT8: .word 0x02010003, 0x06050407, 0x0a09080b, 0x0e0d0c0f
+
+emit_aarch64_feature_1_and
new file mode 100644
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/types.h>
+#include <linux/mm.h>
+
+#include "../../../../lib/vdso/getrandom.c"
+
+ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len);
+
+ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len)
+{
+ return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len);
+}
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
uname_M := $(shell uname -m 2>/dev/null || echo not)
-ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/)
SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null)
TEST_GEN_PROGS := vdso_test_gettimeofday
@@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64))
TEST_GEN_PROGS += vdso_standalone_test_x86
endif
TEST_GEN_PROGS += vdso_test_correctness
-ifeq ($(uname_M),x86_64)
+ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M)))
TEST_GEN_PROGS += vdso_test_getrandom
ifneq ($(SODIUM),)
TEST_GEN_PROGS += vdso_test_chacha
Hook up the generic vDSO implementation to the aarch64 vDSO data page. The _vdso_rng_data required data is placed within the _vdso_data vvar page, by using a offset larger than the vdso_data (__VDSO_RND_DATA_OFFSET). The vDSO function requires a ChaCha20 implementation that does not write to the stack, and that can do an entire ChaCha20 permutation. The one provided is based on the current chacha-neon-core.S and uses NEON on the permute operation. Signed-off-by: Adhemerval Zanella <adhemerval.zanella@linaro.org> --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/vdso/getrandom.h | 50 +++++++ arch/arm64/include/asm/vdso/vsyscall.h | 9 ++ arch/arm64/kernel/vdso/Makefile | 7 +- arch/arm64/kernel/vdso/vdso.lds.S | 4 + arch/arm64/kernel/vdso/vgetrandom-chacha.S | 153 +++++++++++++++++++++ arch/arm64/kernel/vdso/vgetrandom.c | 13 ++ tools/testing/selftests/vDSO/Makefile | 4 +- 8 files changed, 238 insertions(+), 3 deletions(-) create mode 100644 arch/arm64/include/asm/vdso/getrandom.h create mode 100644 arch/arm64/kernel/vdso/vgetrandom-chacha.S create mode 100644 arch/arm64/kernel/vdso/vgetrandom.c