Message ID | 20250310-v5_user_cfi_series-v11-0-86b36cbfb910@rivosinc.com |
---|---|
Headers | show |
Series | riscv control-flow integrity for usermode | expand |
On Tue, Mar 11, 2025 at 1:50 AM Deepak Gupta <debug@rivosinc.com> wrote: > > Adds kselftest for RISC-V control flow integrity implementation for user > mode. There is not a lot going on in kernel for enabling landing pad for > user mode. cfi selftest are intended to be compiled with zicfilp and > zicfiss enabled compiler. Thus kselftest simply checks if landing pad and > shadow stack for the binary and process are enabled or not. selftest then > register a signal handler for SIGSEGV. Any control flow violation are > reported as SIGSEGV with si_code = SEGV_CPERR. Test will fail on receiving > any SEGV_CPERR. Shadow stack part has more changes in kernel and thus there > are separate tests for that > > - Exercise `map_shadow_stack` syscall > - `fork` test to make sure COW works for shadow stack pages > - gup tests > Kernel uses FOLL_FORCE when access happens to memory via > /proc/<pid>/mem. Not breaking that for shadow stack. > - signal test. Make sure signal delivery results in token creation on > shadow stack and consumes (and verifies) token on sigreturn > - shadow stack protection test. attempts to write using regular store > instruction on shadow stack memory must result in access faults > > Test outut > ========== > > """ > TAP version 13 > 1..5 > This is to ensure shadow stack is indeed enabled and working > This is to ensure shadow stack is indeed enabled and working > ok 1 shstk fork test > ok 2 map shadow stack syscall > ok 3 shadow stack gup tests > ok 4 shadow stack signal tests > ok 5 memory protections of shadow stack memory > """ > > Signed-off-by: Deepak Gupta <debug@rivosinc.com> > --- > tools/testing/selftests/riscv/Makefile | 2 +- > tools/testing/selftests/riscv/cfi/.gitignore | 3 + > tools/testing/selftests/riscv/cfi/Makefile | 10 + > tools/testing/selftests/riscv/cfi/cfi_rv_test.h | 84 +++++ > tools/testing/selftests/riscv/cfi/riscv_cfi_test.c | 78 +++++ > tools/testing/selftests/riscv/cfi/shadowstack.c | 375 +++++++++++++++++++++ > tools/testing/selftests/riscv/cfi/shadowstack.h | 37 ++ > 7 files changed, 588 insertions(+), 1 deletion(-) > > diff --git a/tools/testing/selftests/riscv/Makefile b/tools/testing/selftests/riscv/Makefile > index 099b8c1f46f8..5671b4405a12 100644 > --- a/tools/testing/selftests/riscv/Makefile > +++ b/tools/testing/selftests/riscv/Makefile > @@ -5,7 +5,7 @@ > ARCH ?= $(shell uname -m 2>/dev/null || echo not) > > ifneq (,$(filter $(ARCH),riscv)) > -RISCV_SUBTARGETS ?= abi hwprobe mm sigreturn vector > +RISCV_SUBTARGETS ?= abi hwprobe mm sigreturn vector cfi > else > RISCV_SUBTARGETS := > endif > diff --git a/tools/testing/selftests/riscv/cfi/.gitignore b/tools/testing/selftests/riscv/cfi/.gitignore > new file mode 100644 > index 000000000000..82545863bac6 > --- /dev/null > +++ b/tools/testing/selftests/riscv/cfi/.gitignore > @@ -0,0 +1,3 @@ > +cfitests > +riscv_cfi_test > +shadowstack > diff --git a/tools/testing/selftests/riscv/cfi/Makefile b/tools/testing/selftests/riscv/cfi/Makefile > new file mode 100644 > index 000000000000..b65f7ff38a32 > --- /dev/null > +++ b/tools/testing/selftests/riscv/cfi/Makefile > @@ -0,0 +1,10 @@ > +CFLAGS += -I$(top_srcdir)/tools/include > + > +CFLAGS += -march=rv64gc_zicfilp_zicfiss > + > +TEST_GEN_PROGS := cfitests > + > +include ../../lib.mk > + > +$(OUTPUT)/cfitests: riscv_cfi_test.c shadowstack.c > + $(CC) -o$@ $(CFLAGS) $(LDFLAGS) $^ > diff --git a/tools/testing/selftests/riscv/cfi/cfi_rv_test.h b/tools/testing/selftests/riscv/cfi/cfi_rv_test.h > new file mode 100644 > index 000000000000..a9d5d6f8e29c > --- /dev/null > +++ b/tools/testing/selftests/riscv/cfi/cfi_rv_test.h > @@ -0,0 +1,84 @@ > +/* SPDX-License-Identifier: GPL-2.0-only */ > + > +#ifndef SELFTEST_RISCV_CFI_H > +#define SELFTEST_RISCV_CFI_H > +#include <stddef.h> > +#include <sys/types.h> > +#include "shadowstack.h" > + > +#define RISCV_CFI_SELFTEST_COUNT RISCV_SHADOW_STACK_TESTS 'RISCV_CFI_SELFTEST_COUNT' doesn't seems to be used anywhere > + > +#define CHILD_EXIT_CODE_SSWRITE 10 > +#define CHILD_EXIT_CODE_SIG_TEST 11 > + > +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ > +({ \ > + register long _num __asm__ ("a7") = (num); \ > + register long _arg1 __asm__ ("a0") = (long)(arg1); \ > + register long _arg2 __asm__ ("a1") = (long)(arg2); \ > + register long _arg3 __asm__ ("a2") = (long)(arg3); \ > + register long _arg4 __asm__ ("a3") = (long)(arg4); \ > + register long _arg5 __asm__ ("a4") = (long)(arg5); \ > + \ > + __asm__ volatile( \ > + "ecall\n" \ > + : "+r" \ > + (_arg1) \ > + : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ > + "r"(_num) \ > + : "memory", "cc" \ > + ); \ > + _arg1; \ > +}) > + > +#define my_syscall3(num, arg1, arg2, arg3) \ > +({ \ > + register long _num __asm__ ("a7") = (num); \ > + register long _arg1 __asm__ ("a0") = (long)(arg1); \ > + register long _arg2 __asm__ ("a1") = (long)(arg2); \ > + register long _arg3 __asm__ ("a2") = (long)(arg3); \ > + \ > + __asm__ volatile( \ > + "ecall\n" \ > + : "+r" (_arg1) \ > + : "r"(_arg2), "r"(_arg3), \ > + "r"(_num) \ > + : "memory", "cc" \ > + ); \ > + _arg1; \ > +}) > + > +#ifndef __NR_prctl > +#define __NR_prctl 167 > +#endif > + > +#ifndef __NR_map_shadow_stack > +#define __NR_map_shadow_stack 453 > +#endif > + > +#define CSR_SSP 0x011 > + > +#ifdef __ASSEMBLY__ > +#define __ASM_STR(x) x > +#else > +#define __ASM_STR(x) #x > +#endif > + > +#define csr_read(csr) \ > +({ \ > + register unsigned long __v; \ > + __asm__ __volatile__ ("csrr %0, " __ASM_STR(csr) \ > + : "=r" (__v) : \ > + : "memory"); \ > + __v; \ > +}) > + > +#define csr_write(csr, val) \ > +({ \ > + unsigned long __v = (unsigned long)(val); \ > + __asm__ __volatile__ ("csrw " __ASM_STR(csr) ", %0" \ > + : : "rK" (__v) \ > + : "memory"); \ > +}) > + > +#endif > diff --git a/tools/testing/selftests/riscv/cfi/riscv_cfi_test.c b/tools/testing/selftests/riscv/cfi/riscv_cfi_test.c > new file mode 100644 > index 000000000000..cf33aa25cc73 > --- /dev/null > +++ b/tools/testing/selftests/riscv/cfi/riscv_cfi_test.c > @@ -0,0 +1,78 @@ > +// SPDX-License-Identifier: GPL-2.0-only > + > +#include "../../kselftest.h" > +#include <sys/signal.h> > +#include <asm/ucontext.h> > +#include <linux/prctl.h> > +#include "cfi_rv_test.h" > + > +/* do not optimize cfi related test functions */ > +#pragma GCC push_options > +#pragma GCC optimize("O0") > + > +void sigsegv_handler(int signum, siginfo_t *si, void *uc) > +{ > + struct ucontext *ctx = (struct ucontext *)uc; > + > + if (si->si_code == SEGV_CPERR) { > + ksft_print_msg("Control flow violation happened somewhere\n"); > + ksft_print_msg("PC where violation happened %lx\n", ctx->uc_mcontext.gregs[0]); > + exit(-1); > + } > + > + /* all other cases are expected to be of shadow stack write case */ > + exit(CHILD_EXIT_CODE_SSWRITE); > +} > + > +bool register_signal_handler(void) > +{ > + struct sigaction sa = {}; > + > + sa.sa_sigaction = sigsegv_handler; > + sa.sa_flags = SA_SIGINFO; > + if (sigaction(SIGSEGV, &sa, NULL)) { > + ksft_print_msg("Registering signal handler for landing pad violation failed\n"); > + return false; > + } > + > + return true; > +} > + > +int main(int argc, char *argv[]) > +{ > + int ret = 0; > + unsigned long lpad_status = 0, ss_status = 0; > + > + ksft_print_header(); > + > + ksft_print_msg("Starting risc-v tests\n"); > + > + /* > + * Landing pad test. Not a lot of kernel changes to support landing > + * pad for user mode except lighting up a bit in senvcfg via a prctl > + * Enable landing pad through out the execution of test binary > + */ > + ret = my_syscall5(__NR_prctl, PR_GET_INDIR_BR_LP_STATUS, &lpad_status, 0, 0, 0); > + if (ret) > + ksft_exit_fail_msg("Get landing pad status failed with %d\n", ret); > + > + if (!(lpad_status & PR_INDIR_BR_LP_ENABLE)) > + ksft_exit_fail_msg("Landing pad is not enabled, should be enabled via glibc\n"); > + > + ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, &ss_status, 0, 0, 0); > + if (ret) > + ksft_exit_fail_msg("Get shadow stack failed with %d\n", ret); > + > + if (!(ss_status & PR_SHADOW_STACK_ENABLE)) > + ksft_exit_fail_msg("Shadow stack is not enabled, should be enabled via glibc\n"); > + > + if (!register_signal_handler()) > + ksft_exit_fail_msg("Registering signal handler for SIGSEGV failed\n"); > + > + ksft_print_msg("Landing pad and shadow stack are enabled for binary\n"); > + execute_shadow_stack_tests(); > + > + return 0; > +} > + > +#pragma GCC pop_options > diff --git a/tools/testing/selftests/riscv/cfi/shadowstack.c b/tools/testing/selftests/riscv/cfi/shadowstack.c > new file mode 100644 > index 000000000000..a0ef066e98ab > --- /dev/null > +++ b/tools/testing/selftests/riscv/cfi/shadowstack.c > @@ -0,0 +1,375 @@ > +// SPDX-License-Identifier: GPL-2.0-only > + > +#include "../../kselftest.h" > +#include <sys/wait.h> > +#include <signal.h> > +#include <fcntl.h> > +#include <asm-generic/unistd.h> > +#include <sys/mman.h> > +#include "shadowstack.h" > +#include "cfi_rv_test.h" > + > +/* do not optimize shadow stack related test functions */ > +#pragma GCC push_options > +#pragma GCC optimize("O0") > + > +void zar(void) > +{ > + unsigned long ssp = 0; > + > + ssp = csr_read(CSR_SSP); > + ksft_print_msg("Spewing out shadow stack ptr: %lx\n" > + " This is to ensure shadow stack is indeed enabled and working\n", > + ssp); > +} > + > +void bar(void) > +{ > + zar(); > +} > + > +void foo(void) > +{ > + bar(); > +} > + > +void zar_child(void) > +{ > + unsigned long ssp = 0; > + > + ssp = csr_read(CSR_SSP); > + ksft_print_msg("Spewing out shadow stack ptr: %lx\n" > + " This is to ensure shadow stack is indeed enabled and working\n", > + ssp); > +} > + > +void bar_child(void) > +{ > + zar_child(); > +} > + > +void foo_child(void) > +{ > + bar_child(); > +} > + > +typedef void (call_func_ptr)(void); > +/* > + * call couple of functions to test push pop. > + */ > +int shadow_stack_call_tests(call_func_ptr fn_ptr, bool parent) > +{ > + ksft_print_msg("dummy calls for sspush and sspopchk in context of %s\n", > + parent ? "parent" : "child"); > + > + (fn_ptr)(); > + > + return 0; > +} > + > +/* forks a thread, and ensure shadow stacks fork out */ > +bool shadow_stack_fork_test(unsigned long test_num, void *ctx) > +{ > + int pid = 0, child_status = 0, parent_pid = 0, ret = 0; > + unsigned long ss_status = 0; > + > + ksft_print_msg("Exercising shadow stack fork test\n"); > + > + ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, &ss_status, 0, 0, 0); > + if (ret) { > + ksft_exit_skip("Shadow stack get status prctl failed with errorcode %d\n", ret); > + return false; > + } > + > + if (!(ss_status & PR_SHADOW_STACK_ENABLE)) > + ksft_exit_skip("Shadow stack is not enabled, should be enabled via glibc\n"); > + > + parent_pid = getpid(); > + pid = fork(); > + > + if (pid) { > + ksft_print_msg("Parent pid %d and child pid %d\n", parent_pid, pid); > + shadow_stack_call_tests(&foo, true); > + } else { > + shadow_stack_call_tests(&foo_child, false); > + } > + > + if (pid) { > + ksft_print_msg("Waiting on child to finish\n"); > + wait(&child_status); > + } else { > + /* exit child gracefully */ > + exit(0); > + } > + > + if (pid && WIFSIGNALED(child_status)) { > + ksft_print_msg("Child faulted, fork test failed\n"); > + return false; > + } > + > + return true; > +} > + > +/* exercise `map_shadow_stack`, pivot to it and call some functions to ensure it works */ > +#define SHADOW_STACK_ALLOC_SIZE 4096 > +bool shadow_stack_map_test(unsigned long test_num, void *ctx) > +{ > + unsigned long shdw_addr; > + int ret = 0; > + > + ksft_print_msg("Exercising shadow stack map test\n"); > + > + shdw_addr = my_syscall3(__NR_map_shadow_stack, NULL, SHADOW_STACK_ALLOC_SIZE, 0); > + > + if (((long)shdw_addr) <= 0) { > + ksft_print_msg("map_shadow_stack failed with error code %d\n", > + (int)shdw_addr); > + return false; > + } > + > + ret = munmap((void *)shdw_addr, SHADOW_STACK_ALLOC_SIZE); > + > + if (ret) { > + ksft_print_msg("munmap failed with error code %d\n", ret); > + return false; > + } > + > + return true; > +} > + > +/* > + * shadow stack protection tests. map a shadow stack and > + * validate all memory protections work on it > + */ > +bool shadow_stack_protection_test(unsigned long test_num, void *ctx) > +{ > + unsigned long shdw_addr; > + unsigned long *write_addr = NULL; > + int ret = 0, pid = 0, child_status = 0; > + > + ksft_print_msg("Exercising shadow stack protection test (WPT)\n"); > + > + shdw_addr = my_syscall3(__NR_map_shadow_stack, NULL, SHADOW_STACK_ALLOC_SIZE, 0); > + > + if (((long)shdw_addr) <= 0) { > + ksft_print_msg("map_shadow_stack failed with error code %d\n", > + (int)shdw_addr); > + return false; > + } > + > + write_addr = (unsigned long *)shdw_addr; > + pid = fork(); > + > + /* no child was created, return false */ > + if (pid == -1) > + return false; > + > + /* > + * try to perform a store from child on shadow stack memory > + * it should result in SIGSEGV > + */ > + if (!pid) { > + /* below write must lead to SIGSEGV */ > + *write_addr = 0xdeadbeef; > + } else { > + wait(&child_status); > + } > + > + /* test fail, if 0xdeadbeef present on shadow stack address */ > + if (*write_addr == 0xdeadbeef) { > + ksft_print_msg("Shadow stack WPT failed\n"); > + return false; > + } > + > + /* if child reached here, then fail */ > + if (!pid) { > + ksft_print_msg("Shadow stack WPT failed: child reached unreachable state\n"); > + return false; > + } > + > + /* if child exited via signal handler but not for write on ss */ > + if (WIFEXITED(child_status) && > + WEXITSTATUS(child_status) != CHILD_EXIT_CODE_SSWRITE) { > + ksft_print_msg("Shadow stack WPT failed: child wasn't signaled for write\n"); > + return false; > + } > + > + ret = munmap(write_addr, SHADOW_STACK_ALLOC_SIZE); > + if (ret) { > + ksft_print_msg("Shadow stack WPT failed: munmap failed, error code %d\n", > + ret); > + return false; > + } > + > + return true; > +} > + > +#define SS_MAGIC_WRITE_VAL 0xbeefdead > + > +int gup_tests(int mem_fd, unsigned long *shdw_addr) > +{ > + unsigned long val = 0; > + > + lseek(mem_fd, (unsigned long)shdw_addr, SEEK_SET); > + if (read(mem_fd, &val, sizeof(val)) < 0) { > + ksft_print_msg("Reading shadow stack mem via gup failed\n"); > + return 1; > + } > + > + val = SS_MAGIC_WRITE_VAL; > + lseek(mem_fd, (unsigned long)shdw_addr, SEEK_SET); > + if (write(mem_fd, &val, sizeof(val)) < 0) { > + ksft_print_msg("Writing shadow stack mem via gup failed\n"); > + return 1; > + } > + > + if (*shdw_addr != SS_MAGIC_WRITE_VAL) { > + ksft_print_msg("GUP write to shadow stack memory failed\n"); > + return 1; > + } > + > + return 0; > +} > + > +bool shadow_stack_gup_tests(unsigned long test_num, void *ctx) > +{ > + unsigned long shdw_addr = 0; > + unsigned long *write_addr = NULL; > + int fd = 0; > + bool ret = false; > + > + ksft_print_msg("Exercising shadow stack gup tests\n"); > + shdw_addr = my_syscall3(__NR_map_shadow_stack, NULL, SHADOW_STACK_ALLOC_SIZE, 0); > + > + if (((long)shdw_addr) <= 0) { > + ksft_print_msg("map_shadow_stack failed with error code %d\n", (int)shdw_addr); > + return false; > + } > + > + write_addr = (unsigned long *)shdw_addr; > + > + fd = open("/proc/self/mem", O_RDWR); > + if (fd == -1) > + return false; > + > + if (gup_tests(fd, write_addr)) { > + ksft_print_msg("gup tests failed\n"); > + goto out; > + } > + > + ret = true; > +out: > + if (shdw_addr && munmap(write_addr, SHADOW_STACK_ALLOC_SIZE)) { > + ksft_print_msg("munmap failed with error code %d\n", ret); > + ret = false; > + } > + > + return ret; > +} > + > +volatile bool break_loop; > + > +void sigusr1_handler(int signo) > +{ > + break_loop = true; > +} > + > +bool sigusr1_signal_test(void) > +{ > + struct sigaction sa = {}; > + > + sa.sa_handler = sigusr1_handler; > + sa.sa_flags = 0; > + sigemptyset(&sa.sa_mask); > + if (sigaction(SIGUSR1, &sa, NULL)) { > + ksft_print_msg("Registering signal handler for SIGUSR1 failed\n"); > + return false; > + } > + > + return true; > +} > + > +/* > + * shadow stack signal test. shadow stack must be enabled. > + * register a signal, fork another thread which is waiting > + * on signal. Send a signal from parent to child, verify > + * that signal was received by child. If not test fails > + */ > +bool shadow_stack_signal_test(unsigned long test_num, void *ctx) > +{ > + int pid = 0, child_status = 0, ret = 0; > + unsigned long ss_status = 0; > + > + ksft_print_msg("Exercising shadow stack signal test\n"); > + > + ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, &ss_status, 0, 0, 0); > + if (ret) { > + ksft_print_msg("Shadow stack get status prctl failed with errorcode %d\n", ret); > + return false; > + } > + > + if (!(ss_status & PR_SHADOW_STACK_ENABLE)) > + ksft_print_msg("Shadow stack is not enabled, should be enabled via glibc\n"); > + > + /* this should be caught by signal handler and do an exit */ > + if (!sigusr1_signal_test()) { > + ksft_print_msg("Registering sigusr1 handler failed\n"); > + exit(-1); > + } > + > + pid = fork(); > + > + if (pid == -1) { > + ksft_print_msg("Signal test: fork failed\n"); > + goto out; > + } > + > + if (pid == 0) { > + while (!break_loop) > + sleep(1); > + > + exit(11); > + /* child shouldn't go beyond here */ > + } > + > + /* send SIGUSR1 to child */ > + kill(pid, SIGUSR1); > + wait(&child_status); > + > +out: > + > + return (WIFEXITED(child_status) && > + WEXITSTATUS(child_status) == 11); > +} > + > +int execute_shadow_stack_tests(void) > +{ > + int ret = 0; > + unsigned long test_count = 0; > + unsigned long shstk_status = 0; > + bool test_pass = false; > + > + ksft_print_msg("Executing RISC-V shadow stack self tests\n"); > + ksft_set_plan(RISCV_SHADOW_STACK_TESTS); > + > + ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, &shstk_status, 0, 0, 0); > + > + if (ret != 0) > + ksft_exit_fail_msg("Get shadow stack status failed with %d\n", ret); > + > + /* > + * If we are here that means get shadow stack status succeeded and > + * thus shadow stack support is baked in the kernel. > + */ > + while (test_count < ARRAY_SIZE(shstk_tests)) { I saw we have RISCV_SHADOW_STACK_TESTS for ARRAY_SIZE(shstk_tests), perhaps we can use that macro. > + test_pass = (*shstk_tests[test_count].t_func)(test_count, NULL); > + ksft_test_result(test_pass, shstk_tests[test_count].name); > + test_count++; > + } > + > + ksft_finished(); > + > + return 0; > +} > + > +#pragma GCC pop_options > diff --git a/tools/testing/selftests/riscv/cfi/shadowstack.h b/tools/testing/selftests/riscv/cfi/shadowstack.h > new file mode 100644 > index 000000000000..b43e74136a26 > --- /dev/null > +++ b/tools/testing/selftests/riscv/cfi/shadowstack.h > @@ -0,0 +1,37 @@ > +/* SPDX-License-Identifier: GPL-2.0-only */ > + > +#ifndef SELFTEST_SHADOWSTACK_TEST_H > +#define SELFTEST_SHADOWSTACK_TEST_H > +#include <stddef.h> > +#include <linux/prctl.h> > + > +/* > + * a cfi test returns true for success or false for fail > + * takes a number for test number to index into array and void pointer. > + */ > +typedef bool (*shstk_test_func)(unsigned long test_num, void *); > + > +struct shadow_stack_tests { > + char *name; > + shstk_test_func t_func; > +}; > + > +bool shadow_stack_fork_test(unsigned long test_num, void *ctx); > +bool shadow_stack_map_test(unsigned long test_num, void *ctx); > +bool shadow_stack_protection_test(unsigned long test_num, void *ctx); > +bool shadow_stack_gup_tests(unsigned long test_num, void *ctx); > +bool shadow_stack_signal_test(unsigned long test_num, void *ctx); > + > +static struct shadow_stack_tests shstk_tests[] = { > + { "shstk fork test\n", shadow_stack_fork_test }, > + { "map shadow stack syscall\n", shadow_stack_map_test }, > + { "shadow stack gup tests\n", shadow_stack_gup_tests }, > + { "shadow stack signal tests\n", shadow_stack_signal_test}, > + { "memory protections of shadow stack memory\n", shadow_stack_protection_test } > +}; > + > +#define RISCV_SHADOW_STACK_TESTS ARRAY_SIZE(shstk_tests) I still got the following compile warning, maybe we can move them to the shadowstack.c shadowstack.h:25:34: warning: 'shstk_tests' defined but not used [-Wunused-variable] 25 | static struct shadow_stack_tests shstk_tests[] = { | ^~~~~~~~~~~ > + > +int execute_shadow_stack_tests(void); > + > +#endif > > -- > 2.34.1 > > > _______________________________________________ > linux-riscv mailing list > linux-riscv@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv
On Mon, Mar 10, 2025 at 11:42 PM Deepak Gupta <debug@rivosinc.com> wrote: > > Three architectures (x86, aarch64, riscv) have support for indirect branch > tracking feature in a very similar fashion. On a very high level, indirect > branch tracking is a CPU feature where CPU tracks branches which uses > memory operand to perform control transfer in program. As part of this > tracking on indirect branches, CPU goes in a state where it expects a > landing pad instr on target and if not found then CPU raises some fault > (architecture dependent) > > x86 landing pad instr - `ENDBRANCH` > aarch64 landing pad instr - `BTI` > riscv landing instr - `lpad` > > Given that three major arches have support for indirect branch tracking, > This patch makes `prctl` for indirect branch tracking arch agnostic. > > To allow userspace to enable this feature for itself, following prtcls are > defined: > - PR_GET_INDIR_BR_LP_STATUS: Gets current configured status for indirect > branch tracking. > - PR_SET_INDIR_BR_LP_STATUS: Sets a configuration for indirect branch > tracking. > Following status options are allowed > - PR_INDIR_BR_LP_ENABLE: Enables indirect branch tracking on user > thread. > - PR_INDIR_BR_LP_DISABLE; Disables indirect branch tracking on user > thread. > - PR_LOCK_INDIR_BR_LP_STATUS: Locks configured status for indirect branch > tracking for user thread. > > Signed-off-by: Deepak Gupta <debug@rivosinc.com> > Reviewed-by: Mark Brown <broonie@kernel.org> > --- > arch/riscv/include/asm/usercfi.h | 16 ++++++++- > arch/riscv/kernel/entry.S | 2 +- > arch/riscv/kernel/process.c | 5 +++ > arch/riscv/kernel/usercfi.c | 76 ++++++++++++++++++++++++++++++++++++++++ > include/linux/cpu.h | 4 +++ > include/uapi/linux/prctl.h | 27 ++++++++++++++ > kernel/sys.c | 30 ++++++++++++++++ > 7 files changed, 158 insertions(+), 2 deletions(-) > > diff --git a/arch/riscv/include/asm/usercfi.h b/arch/riscv/include/asm/usercfi.h > index c4dcd256f19a..a8cec7c14d1d 100644 > --- a/arch/riscv/include/asm/usercfi.h > +++ b/arch/riscv/include/asm/usercfi.h > @@ -16,7 +16,9 @@ struct kernel_clone_args; > struct cfi_status { > unsigned long ubcfi_en : 1; /* Enable for backward cfi. */ > unsigned long ubcfi_locked : 1; > - unsigned long rsvd : ((sizeof(unsigned long) * 8) - 2); > + unsigned long ufcfi_en : 1; /* Enable for forward cfi. Note that ELP goes in sstatus */ > + unsigned long ufcfi_locked : 1; > + unsigned long rsvd : ((sizeof(unsigned long) * 8) - 4); > unsigned long user_shdw_stk; /* Current user shadow stack pointer */ > unsigned long shdw_stk_base; /* Base address of shadow stack */ > unsigned long shdw_stk_size; /* size of shadow stack */ > @@ -33,6 +35,10 @@ bool is_shstk_locked(struct task_struct *task); > bool is_shstk_allocated(struct task_struct *task); > void set_shstk_lock(struct task_struct *task); > void set_shstk_status(struct task_struct *task, bool enable); > +bool is_indir_lp_enabled(struct task_struct *task); > +bool is_indir_lp_locked(struct task_struct *task); > +void set_indir_lp_status(struct task_struct *task, bool enable); > +void set_indir_lp_lock(struct task_struct *task); > > #define PR_SHADOW_STACK_SUPPORTED_STATUS_MASK (PR_SHADOW_STACK_ENABLE) > > @@ -58,6 +64,14 @@ void set_shstk_status(struct task_struct *task, bool enable); > > #define set_shstk_status(task, enable) > > +#define is_indir_lp_enabled(task) false > + > +#define is_indir_lp_locked(task) false > + > +#define set_indir_lp_status(task, enable) > + > +#define set_indir_lp_lock(task) > + > #endif /* CONFIG_RISCV_USER_CFI */ > > #endif /* __ASSEMBLY__ */ > diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S > index 68c99124ea55..00494b54ff4a 100644 > --- a/arch/riscv/kernel/entry.S > +++ b/arch/riscv/kernel/entry.S > @@ -143,7 +143,7 @@ SYM_CODE_START(handle_exception) > * Disable the FPU/Vector to detect illegal usage of floating point > * or vector in kernel space. > */ > - li t0, SR_SUM | SR_FS_VS > + li t0, SR_SUM | SR_FS_VS | SR_ELP > > REG_L s0, TASK_TI_USER_SP(tp) > csrrc s1, CSR_STATUS, t0 > diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c > index cd11667593fe..4587201dd81d 100644 > --- a/arch/riscv/kernel/process.c > +++ b/arch/riscv/kernel/process.c > @@ -160,6 +160,11 @@ void start_thread(struct pt_regs *regs, unsigned long pc, > set_shstk_status(current, false); > set_shstk_base(current, 0, 0); > set_active_shstk(current, 0); > + /* > + * disable indirect branch tracking on exec. > + * libc will enable it later via prctl. > + */ > + set_indir_lp_status(current, false); In set_indir_lp_status and set_shstk_status, the $senvcfg.LPE and $senvcfg.SSE fields are set. However, if the CPU does not support this CSR, writing to it will trigger an illegal instruction exception. Should we add sanity checks to handle this situation? Thanks > > #ifdef CONFIG_64BIT > regs->status &= ~SR_UXL; > diff --git a/arch/riscv/kernel/usercfi.c b/arch/riscv/kernel/usercfi.c > index 37d6fb8144e7..3a66f149a4ef 100644 > --- a/arch/riscv/kernel/usercfi.c > +++ b/arch/riscv/kernel/usercfi.c > @@ -69,6 +69,32 @@ void set_shstk_lock(struct task_struct *task) > task->thread_info.user_cfi_state.ubcfi_locked = 1; > } > > +bool is_indir_lp_enabled(struct task_struct *task) > +{ > + return task->thread_info.user_cfi_state.ufcfi_en ? true : false; > +} > + > +bool is_indir_lp_locked(struct task_struct *task) > +{ > + return task->thread_info.user_cfi_state.ufcfi_locked ? true : false; > +} > + > +void set_indir_lp_status(struct task_struct *task, bool enable) > +{ > + task->thread_info.user_cfi_state.ufcfi_en = enable ? 1 : 0; > + > + if (enable) > + task->thread.envcfg |= ENVCFG_LPE; > + else > + task->thread.envcfg &= ~ENVCFG_LPE; > + > + csr_write(CSR_ENVCFG, task->thread.envcfg); > +} > + > +void set_indir_lp_lock(struct task_struct *task) > +{ > + task->thread_info.user_cfi_state.ufcfi_locked = 1; > +} > /* > * If size is 0, then to be compatible with regular stack we want it to be as big as > * regular stack. Else PAGE_ALIGN it and return back > @@ -369,3 +395,53 @@ int arch_lock_shadow_stack_status(struct task_struct *task, > > return 0; > } > + > +int arch_get_indir_br_lp_status(struct task_struct *t, unsigned long __user *status) > +{ > + unsigned long fcfi_status = 0; > + > + if (!cpu_supports_indirect_br_lp_instr()) > + return -EINVAL; > + > + /* indirect branch tracking is enabled on the task or not */ > + fcfi_status |= (is_indir_lp_enabled(t) ? PR_INDIR_BR_LP_ENABLE : 0); > + > + return copy_to_user(status, &fcfi_status, sizeof(fcfi_status)) ? -EFAULT : 0; > +} > + > +int arch_set_indir_br_lp_status(struct task_struct *t, unsigned long status) > +{ > + bool enable_indir_lp = false; > + > + if (!cpu_supports_indirect_br_lp_instr()) > + return -EINVAL; > + > + /* indirect branch tracking is locked and further can't be modified by user */ > + if (is_indir_lp_locked(t)) > + return -EINVAL; > + > + /* Reject unknown flags */ > + if (status & ~PR_INDIR_BR_LP_ENABLE) > + return -EINVAL; > + > + enable_indir_lp = (status & PR_INDIR_BR_LP_ENABLE) ? true : false; > + set_indir_lp_status(t, enable_indir_lp); > + > + return 0; > +} > + > +int arch_lock_indir_br_lp_status(struct task_struct *task, > + unsigned long arg) > +{ > + /* > + * If indirect branch tracking is not supported or not enabled on task, > + * nothing to lock here > + */ > + if (!cpu_supports_indirect_br_lp_instr() || > + !is_indir_lp_enabled(task) || arg != 0) > + return -EINVAL; > + > + set_indir_lp_lock(task); > + > + return 0; > +} > diff --git a/include/linux/cpu.h b/include/linux/cpu.h > index 6a0a8f1c7c90..fb0c394430c6 100644 > --- a/include/linux/cpu.h > +++ b/include/linux/cpu.h > @@ -204,4 +204,8 @@ static inline bool cpu_mitigations_auto_nosmt(void) > } > #endif > > +int arch_get_indir_br_lp_status(struct task_struct *t, unsigned long __user *status); > +int arch_set_indir_br_lp_status(struct task_struct *t, unsigned long status); > +int arch_lock_indir_br_lp_status(struct task_struct *t, unsigned long status); > + > #endif /* _LINUX_CPU_H_ */ > diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h > index 5c6080680cb2..6cd90460cbad 100644 > --- a/include/uapi/linux/prctl.h > +++ b/include/uapi/linux/prctl.h > @@ -353,4 +353,31 @@ struct prctl_mm_map { > */ > #define PR_LOCK_SHADOW_STACK_STATUS 76 > > +/* > + * Get the current indirect branch tracking configuration for the current > + * thread, this will be the value configured via PR_SET_INDIR_BR_LP_STATUS. > + */ > +#define PR_GET_INDIR_BR_LP_STATUS 77 > + > +/* > + * Set the indirect branch tracking configuration. PR_INDIR_BR_LP_ENABLE will > + * enable cpu feature for user thread, to track all indirect branches and ensure > + * they land on arch defined landing pad instruction. > + * x86 - If enabled, an indirect branch must land on `ENDBRANCH` instruction. > + * arch64 - If enabled, an indirect branch must land on `BTI` instruction. > + * riscv - If enabled, an indirect branch must land on `lpad` instruction. > + * PR_INDIR_BR_LP_DISABLE will disable feature for user thread and indirect > + * branches will no more be tracked by cpu to land on arch defined landing pad > + * instruction. > + */ > +#define PR_SET_INDIR_BR_LP_STATUS 78 > +# define PR_INDIR_BR_LP_ENABLE (1UL << 0) > + > +/* > + * Prevent further changes to the specified indirect branch tracking > + * configuration. All bits may be locked via this call, including > + * undefined bits. > + */ > +#define PR_LOCK_INDIR_BR_LP_STATUS 79 > + > #endif /* _LINUX_PRCTL_H */ > diff --git a/kernel/sys.c b/kernel/sys.c > index cb366ff8703a..f347f3518d0b 100644 > --- a/kernel/sys.c > +++ b/kernel/sys.c > @@ -2336,6 +2336,21 @@ int __weak arch_lock_shadow_stack_status(struct task_struct *t, unsigned long st > return -EINVAL; > } > > +int __weak arch_get_indir_br_lp_status(struct task_struct *t, unsigned long __user *status) > +{ > + return -EINVAL; > +} > + > +int __weak arch_set_indir_br_lp_status(struct task_struct *t, unsigned long status) > +{ > + return -EINVAL; > +} > + > +int __weak arch_lock_indir_br_lp_status(struct task_struct *t, unsigned long status) > +{ > + return -EINVAL; > +} > + > #define PR_IO_FLUSHER (PF_MEMALLOC_NOIO | PF_LOCAL_THROTTLE) > > #ifdef CONFIG_ANON_VMA_NAME > @@ -2811,6 +2826,21 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, > return -EINVAL; > error = arch_lock_shadow_stack_status(me, arg2); > break; > + case PR_GET_INDIR_BR_LP_STATUS: > + if (arg3 || arg4 || arg5) > + return -EINVAL; > + error = arch_get_indir_br_lp_status(me, (unsigned long __user *)arg2); > + break; > + case PR_SET_INDIR_BR_LP_STATUS: > + if (arg3 || arg4 || arg5) > + return -EINVAL; > + error = arch_set_indir_br_lp_status(me, arg2); > + break; > + case PR_LOCK_INDIR_BR_LP_STATUS: > + if (arg3 || arg4 || arg5) > + return -EINVAL; > + error = arch_lock_indir_br_lp_status(me, arg2); > + break; > default: > trace_task_prctl_unknown(option, arg2, arg3, arg4, arg5); > error = -EINVAL; > > -- > 2.34.1 > > > _______________________________________________ > linux-riscv mailing list > linux-riscv@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv
On Mon, Mar 10, 2025 at 11:42 PM Deepak Gupta <debug@rivosinc.com> wrote: > > Carves out space in arch specific thread struct for cfi status and shadow > stack in usermode on riscv. > > This patch does following > - defines a new structure cfi_status with status bit for cfi feature > - defines shadow stack pointer, base and size in cfi_status structure > - defines offsets to new member fields in thread in asm-offsets.c > - Saves and restore shadow stack pointer on trap entry (U --> S) and exit > (S --> U) > > Shadow stack save/restore is gated on feature availiblity and implemented > using alternative. CSR can be context switched in `switch_to` as well but > soon as kernel shadow stack support gets rolled in, shadow stack pointer > will need to be switched at trap entry/exit point (much like `sp`). It can > be argued that kernel using shadow stack deployment scenario may not be as > prevalant as user mode using this feature. But even if there is some > minimal deployment of kernel shadow stack, that means that it needs to be > supported. And thus save/restore of shadow stack pointer in entry.S instead > of in `switch_to.h`. > > Signed-off-by: Deepak Gupta <debug@rivosinc.com> > Reviewed-by: Charlie Jenkins <charlie@rivosinc.com> > --- > arch/riscv/include/asm/processor.h | 1 + > arch/riscv/include/asm/thread_info.h | 3 +++ > arch/riscv/include/asm/usercfi.h | 24 ++++++++++++++++++++++++ > arch/riscv/kernel/asm-offsets.c | 4 ++++ > arch/riscv/kernel/entry.S | 26 ++++++++++++++++++++++++++ > 5 files changed, 58 insertions(+) > > diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h > index e3aba3336e63..d851bb5c6da0 100644 > --- a/arch/riscv/include/asm/processor.h > +++ b/arch/riscv/include/asm/processor.h > @@ -14,6 +14,7 @@ > > #include <asm/ptrace.h> > #include <asm/hwcap.h> > +#include <asm/usercfi.h> > > #define arch_get_mmap_end(addr, len, flags) \ > ({ \ > diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h > index f5916a70879a..a0cfe00c2ca6 100644 > --- a/arch/riscv/include/asm/thread_info.h > +++ b/arch/riscv/include/asm/thread_info.h > @@ -62,6 +62,9 @@ struct thread_info { > long user_sp; /* User stack pointer */ > int cpu; > unsigned long syscall_work; /* SYSCALL_WORK_ flags */ > +#ifdef CONFIG_RISCV_USER_CFI > + struct cfi_status user_cfi_state; > +#endif > #ifdef CONFIG_SHADOW_CALL_STACK > void *scs_base; > void *scs_sp; > diff --git a/arch/riscv/include/asm/usercfi.h b/arch/riscv/include/asm/usercfi.h > new file mode 100644 > index 000000000000..5f2027c51917 > --- /dev/null > +++ b/arch/riscv/include/asm/usercfi.h > @@ -0,0 +1,24 @@ > +/* SPDX-License-Identifier: GPL-2.0 > + * Copyright (C) 2024 Rivos, Inc. > + * Deepak Gupta <debug@rivosinc.com> > + */ > +#ifndef _ASM_RISCV_USERCFI_H > +#define _ASM_RISCV_USERCFI_H > + > +#ifndef __ASSEMBLY__ > +#include <linux/types.h> > + > +#ifdef CONFIG_RISCV_USER_CFI > +struct cfi_status { > + unsigned long ubcfi_en : 1; /* Enable for backward cfi. */ > + unsigned long rsvd : ((sizeof(unsigned long) * 8) - 1); > + unsigned long user_shdw_stk; /* Current user shadow stack pointer */ > + unsigned long shdw_stk_base; /* Base address of shadow stack */ > + unsigned long shdw_stk_size; /* size of shadow stack */ > +}; > + > +#endif /* CONFIG_RISCV_USER_CFI */ > + > +#endif /* __ASSEMBLY__ */ > + > +#endif /* _ASM_RISCV_USERCFI_H */ > diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c > index e89455a6a0e5..0c188aaf3925 100644 > --- a/arch/riscv/kernel/asm-offsets.c > +++ b/arch/riscv/kernel/asm-offsets.c > @@ -50,6 +50,10 @@ void asm_offsets(void) > #endif > > OFFSET(TASK_TI_CPU_NUM, task_struct, thread_info.cpu); > +#ifdef CONFIG_RISCV_USER_CFI > + OFFSET(TASK_TI_CFI_STATUS, task_struct, thread_info.user_cfi_state); > + OFFSET(TASK_TI_USER_SSP, task_struct, thread_info.user_cfi_state.user_shdw_stk); > +#endif > OFFSET(TASK_THREAD_F0, task_struct, thread.fstate.f[0]); > OFFSET(TASK_THREAD_F1, task_struct, thread.fstate.f[1]); > OFFSET(TASK_THREAD_F2, task_struct, thread.fstate.f[2]); > diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S > index 33a5a9f2a0d4..68c99124ea55 100644 > --- a/arch/riscv/kernel/entry.S > +++ b/arch/riscv/kernel/entry.S > @@ -147,6 +147,20 @@ SYM_CODE_START(handle_exception) > > REG_L s0, TASK_TI_USER_SP(tp) > csrrc s1, CSR_STATUS, t0 > + /* > + * If previous mode was U, capture shadow stack pointer and save it away > + * Zero CSR_SSP at the same time for sanitization. > + */ > + ALTERNATIVE("nop; nop; nop; nop", > + __stringify( \ > + andi s2, s1, SR_SPP; \ > + bnez s2, skip_ssp_save; \ > + csrrw s2, CSR_SSP, x0; \ > + REG_S s2, TASK_TI_USER_SSP(tp); \ > + skip_ssp_save:), > + 0, > + RISCV_ISA_EXT_ZICFISS, > + CONFIG_RISCV_USER_CFI) > csrr s2, CSR_EPC > csrr s3, CSR_TVAL > csrr s4, CSR_CAUSE > @@ -236,6 +250,18 @@ SYM_CODE_START_NOALIGN(ret_from_exception) > * structures again. > */ > csrw CSR_SCRATCH, tp > + > + /* > + * Going back to U mode, restore shadow stack pointer > + */ > + ALTERNATIVE("nop; nop", > + __stringify( \ > + REG_L s3, TASK_TI_USER_SSP(tp); \ > + csrw CSR_SSP, s3), > + 0, > + RISCV_ISA_EXT_ZICFISS, > + CONFIG_RISCV_USER_CFI) > + > 1: > #ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE > move a0, sp > LGTM. Reviewed-by: Zong Li <zong.li@sifive.com> > -- > 2.34.1 > > > _______________________________________________ > linux-riscv mailing list > linux-riscv@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv
On Mon, Mar 10, 2025 at 11:42 PM Deepak Gupta <debug@rivosinc.com> wrote: > > This patch implements creating shadow stack pte (on riscv). Creating > shadow stack PTE on riscv means that clearing RWX and then setting W=1. > > Signed-off-by: Deepak Gupta <debug@rivosinc.com> > Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com> > --- > arch/riscv/include/asm/pgtable.h | 10 ++++++++++ > 1 file changed, 10 insertions(+) > > diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h > index 8c528cd7347a..ede43185ffdf 100644 > --- a/arch/riscv/include/asm/pgtable.h > +++ b/arch/riscv/include/asm/pgtable.h > @@ -421,6 +421,11 @@ static inline pte_t pte_mkwrite_novma(pte_t pte) > return __pte(pte_val(pte) | _PAGE_WRITE); > } > > +static inline pte_t pte_mkwrite_shstk(pte_t pte) > +{ > + return __pte((pte_val(pte) & ~(_PAGE_LEAF)) | _PAGE_WRITE); > +} > + > /* static inline pte_t pte_mkexec(pte_t pte) */ > > static inline pte_t pte_mkdirty(pte_t pte) > @@ -749,6 +754,11 @@ static inline pmd_t pmd_mkwrite_novma(pmd_t pmd) > return pte_pmd(pte_mkwrite_novma(pmd_pte(pmd))); > } > > +static inline pmd_t pmd_mkwrite_shstk(pmd_t pte) > +{ > + return __pmd((pmd_val(pte) & ~(_PAGE_LEAF)) | _PAGE_WRITE); > +} > + > static inline pmd_t pmd_wrprotect(pmd_t pmd) > { > return pte_pmd(pte_wrprotect(pmd_pte(pmd))); > LGTM. Reviewed-by: Zong Li <zong.li@sifive.com> > -- > 2.34.1 > > > _______________________________________________ > linux-riscv mailing list > linux-riscv@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv
On Mon, Mar 10, 2025 at 11:42 PM Deepak Gupta <debug@rivosinc.com> wrote: > > `fork` implements copy on write (COW) by making pages readonly in child > and parent both. > > ptep_set_wrprotect and pte_wrprotect clears _PAGE_WRITE in PTE. > Assumption is that page is readable and on fault copy on write happens. > > To implement COW on shadow stack pages, clearing up W bit makes them XWR = > 000. This will result in wrong PTE setting which says no perms but V=1 and > PFN field pointing to final page. Instead desired behavior is to turn it > into a readable page, take an access (load/store) fault on sspush/sspop > (shadow stack) and then perform COW on such pages. This way regular reads > would still be allowed and not lead to COW maintaining current behavior > of COW on non-shadow stack but writeable memory. > > On the other hand it doesn't interfere with existing COW for read-write > memory. Assumption is always that _PAGE_READ must have been set and thus > setting _PAGE_READ is harmless. > > Signed-off-by: Deepak Gupta <debug@rivosinc.com> > Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com> > --- > arch/riscv/include/asm/pgtable.h | 12 ++++++++++-- > 1 file changed, 10 insertions(+), 2 deletions(-) > > diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h > index ccd2fa34afb8..54707686f042 100644 > --- a/arch/riscv/include/asm/pgtable.h > +++ b/arch/riscv/include/asm/pgtable.h > @@ -411,7 +411,7 @@ static inline int pte_devmap(pte_t pte) > > static inline pte_t pte_wrprotect(pte_t pte) > { > - return __pte(pte_val(pte) & ~(_PAGE_WRITE)); > + return __pte((pte_val(pte) & ~(_PAGE_WRITE)) | (_PAGE_READ)); > } > > /* static inline pte_t pte_mkread(pte_t pte) */ > @@ -612,7 +612,15 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, > static inline void ptep_set_wrprotect(struct mm_struct *mm, > unsigned long address, pte_t *ptep) > { > - atomic_long_and(~(unsigned long)_PAGE_WRITE, (atomic_long_t *)ptep); > + pte_t read_pte = READ_ONCE(*ptep); > + /* > + * ptep_set_wrprotect can be called for shadow stack ranges too. > + * shadow stack memory is XWR = 010 and thus clearing _PAGE_WRITE will lead to > + * encoding 000b which is wrong encoding with V = 1. This should lead to page fault > + * but we dont want this wrong configuration to be set in page tables. > + */ > + atomic_long_set((atomic_long_t *)ptep, > + ((pte_val(read_pte) & ~(unsigned long)_PAGE_WRITE) | _PAGE_READ)); > } > > #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH > LGTM. Reviewed-by: Zong Li <zong.li@sifive.com> > -- > 2.34.1 > > > _______________________________________________ > linux-riscv mailing list > linux-riscv@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv
On Mon, Mar 10, 2025 at 11:42 PM Deepak Gupta <debug@rivosinc.com> wrote: > > Userspace specifies CLONE_VM to share address space and spawn new thread. > `clone` allow userspace to specify a new stack for new thread. However > there is no way to specify new shadow stack base address without changing > API. This patch allocates a new shadow stack whenever CLONE_VM is given. > > In case of CLONE_VFORK, parent is suspended until child finishes and thus > can child use parent shadow stack. In case of !CLONE_VM, COW kicks in > because entire address space is copied from parent to child. > > `clone3` is extensible and can provide mechanisms using which shadow stack > as an input parameter can be provided. This is not settled yet and being > extensively discussed on mailing list. Once that's settled, this commit > will adapt to that. > > Signed-off-by: Deepak Gupta <debug@rivosinc.com> > --- > arch/riscv/include/asm/mmu_context.h | 7 ++ > arch/riscv/include/asm/usercfi.h | 25 ++++++++ > arch/riscv/kernel/process.c | 9 +++ > arch/riscv/kernel/usercfi.c | 120 +++++++++++++++++++++++++++++++++++ > 4 files changed, 161 insertions(+) > > diff --git a/arch/riscv/include/asm/mmu_context.h b/arch/riscv/include/asm/mmu_context.h > index 8c4bc49a3a0f..dbf27a78df6c 100644 > --- a/arch/riscv/include/asm/mmu_context.h > +++ b/arch/riscv/include/asm/mmu_context.h > @@ -48,6 +48,13 @@ static inline unsigned long mm_untag_mask(struct mm_struct *mm) > } > #endif > > +#define deactivate_mm deactivate_mm > +static inline void deactivate_mm(struct task_struct *tsk, > + struct mm_struct *mm) > +{ > + shstk_release(tsk); > +} > + > #include <asm-generic/mmu_context.h> > > #endif /* _ASM_RISCV_MMU_CONTEXT_H */ > diff --git a/arch/riscv/include/asm/usercfi.h b/arch/riscv/include/asm/usercfi.h > index 5f2027c51917..82d28ac98d76 100644 > --- a/arch/riscv/include/asm/usercfi.h > +++ b/arch/riscv/include/asm/usercfi.h > @@ -8,6 +8,9 @@ > #ifndef __ASSEMBLY__ > #include <linux/types.h> > > +struct task_struct; > +struct kernel_clone_args; > + > #ifdef CONFIG_RISCV_USER_CFI > struct cfi_status { > unsigned long ubcfi_en : 1; /* Enable for backward cfi. */ > @@ -17,6 +20,28 @@ struct cfi_status { > unsigned long shdw_stk_size; /* size of shadow stack */ > }; > > +unsigned long shstk_alloc_thread_stack(struct task_struct *tsk, > + const struct kernel_clone_args *args); > +void shstk_release(struct task_struct *tsk); > +void set_shstk_base(struct task_struct *task, unsigned long shstk_addr, unsigned long size); > +unsigned long get_shstk_base(struct task_struct *task, unsigned long *size); > +void set_active_shstk(struct task_struct *task, unsigned long shstk_addr); > +bool is_shstk_enabled(struct task_struct *task); > + > +#else > + > +#define shstk_alloc_thread_stack(tsk, args) 0 > + > +#define shstk_release(tsk) > + > +#define get_shstk_base(task, size) 0UL > + > +#define set_shstk_base(task, shstk_addr, size) > + > +#define set_active_shstk(task, shstk_addr) > + > +#define is_shstk_enabled(task) false > + > #endif /* CONFIG_RISCV_USER_CFI */ > > #endif /* __ASSEMBLY__ */ > diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c > index 7c244de77180..99acb6342a37 100644 > --- a/arch/riscv/kernel/process.c > +++ b/arch/riscv/kernel/process.c > @@ -29,6 +29,7 @@ > #include <asm/vector.h> > #include <asm/cpufeature.h> > #include <asm/exec.h> > +#include <asm/usercfi.h> > > #if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK) > #include <linux/stackprotector.h> > @@ -211,6 +212,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) > unsigned long clone_flags = args->flags; > unsigned long usp = args->stack; > unsigned long tls = args->tls; > + unsigned long ssp = 0; > struct pt_regs *childregs = task_pt_regs(p); > > /* Ensure all threads in this mm have the same pointer masking mode. */ > @@ -229,11 +231,18 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) > p->thread.s[0] = (unsigned long)args->fn; > p->thread.s[1] = (unsigned long)args->fn_arg; > } else { > + /* allocate new shadow stack if needed. In case of CLONE_VM we have to */ > + ssp = shstk_alloc_thread_stack(p, args); > + if (IS_ERR_VALUE(ssp)) > + return PTR_ERR((void *)ssp); > + > *childregs = *(current_pt_regs()); > /* Turn off status.VS */ > riscv_v_vstate_off(childregs); > if (usp) /* User fork */ > childregs->sp = usp; > + /* if needed, set new ssp */ > + ssp ? set_active_shstk(p, ssp) : 0; > if (clone_flags & CLONE_SETTLS) > childregs->tp = tls; > childregs->a0 = 0; /* Return value of fork() */ > diff --git a/arch/riscv/kernel/usercfi.c b/arch/riscv/kernel/usercfi.c > index 24022809a7b5..73cf87dab186 100644 > --- a/arch/riscv/kernel/usercfi.c > +++ b/arch/riscv/kernel/usercfi.c > @@ -19,6 +19,41 @@ > > #define SHSTK_ENTRY_SIZE sizeof(void *) > > +bool is_shstk_enabled(struct task_struct *task) > +{ > + return task->thread_info.user_cfi_state.ubcfi_en ? true : false; > +} > + > +void set_shstk_base(struct task_struct *task, unsigned long shstk_addr, unsigned long size) > +{ > + task->thread_info.user_cfi_state.shdw_stk_base = shstk_addr; > + task->thread_info.user_cfi_state.shdw_stk_size = size; > +} > + > +unsigned long get_shstk_base(struct task_struct *task, unsigned long *size) > +{ > + if (size) > + *size = task->thread_info.user_cfi_state.shdw_stk_size; > + return task->thread_info.user_cfi_state.shdw_stk_base; > +} > + > +void set_active_shstk(struct task_struct *task, unsigned long shstk_addr) > +{ > + task->thread_info.user_cfi_state.user_shdw_stk = shstk_addr; > +} > + > +/* > + * If size is 0, then to be compatible with regular stack we want it to be as big as > + * regular stack. Else PAGE_ALIGN it and return back > + */ > +static unsigned long calc_shstk_size(unsigned long size) > +{ > + if (size) > + return PAGE_ALIGN(size); > + > + return PAGE_ALIGN(min_t(unsigned long long, rlimit(RLIMIT_STACK), SZ_4G)); > +} > + > /* > * Writes on shadow stack can either be `sspush` or `ssamoswap`. `sspush` can happen > * implicitly on current shadow stack pointed to by CSR_SSP. `ssamoswap` takes pointer to > @@ -142,3 +177,88 @@ SYSCALL_DEFINE3(map_shadow_stack, unsigned long, addr, unsigned long, size, unsi > > return allocate_shadow_stack(addr, aligned_size, size, set_tok); > } > + > +/* > + * This gets called during clone/clone3/fork. And is needed to allocate a shadow stack for > + * cases where CLONE_VM is specified and thus a different stack is specified by user. We > + * thus need a separate shadow stack too. How does separate shadow stack is specified by > + * user is still being debated. Once that's settled, remove this part of the comment. > + * This function simply returns 0 if shadow stack are not supported or if separate shadow > + * stack allocation is not needed (like in case of !CLONE_VM) > + */ > +unsigned long shstk_alloc_thread_stack(struct task_struct *tsk, > + const struct kernel_clone_args *args) > +{ > + unsigned long addr, size; > + > + /* If shadow stack is not supported, return 0 */ > + if (!cpu_supports_shadow_stack()) > + return 0; > + > + /* > + * If shadow stack is not enabled on the new thread, skip any > + * switch to a new shadow stack. > + */ > + if (!is_shstk_enabled(tsk)) > + return 0; > + > + /* > + * For CLONE_VFORK the child will share the parents shadow stack. > + * Set base = 0 and size = 0, this is special means to track this state > + * so the freeing logic run for child knows to leave it alone. > + */ > + if (args->flags & CLONE_VFORK) { > + set_shstk_base(tsk, 0, 0); > + return 0; > + } > + > + /* > + * For !CLONE_VM the child will use a copy of the parents shadow > + * stack. > + */ > + if (!(args->flags & CLONE_VM)) > + return 0; > + > + /* > + * reaching here means, CLONE_VM was specified and thus a separate shadow > + * stack is needed for new cloned thread. Note: below allocation is happening > + * using current mm. > + */ > + size = calc_shstk_size(args->stack_size); > + addr = allocate_shadow_stack(0, size, 0, false); > + if (IS_ERR_VALUE(addr)) > + return addr; > + > + set_shstk_base(tsk, addr, size); > + > + return addr + size; > +} > + > +void shstk_release(struct task_struct *tsk) > +{ > + unsigned long base = 0, size = 0; > + /* If shadow stack is not supported or not enabled, nothing to release */ > + if (!cpu_supports_shadow_stack() || !is_shstk_enabled(tsk)) > + return; > + > + /* > + * When fork() with CLONE_VM fails, the child (tsk) already has a > + * shadow stack allocated, and exit_thread() calls this function to > + * free it. In this case the parent (current) and the child share > + * the same mm struct. Move forward only when they're same. > + */ > + if (!tsk->mm || tsk->mm != current->mm) > + return; > + > + /* > + * We know shadow stack is enabled but if base is NULL, then > + * this task is not managing its own shadow stack (CLONE_VFORK). So > + * skip freeing it. > + */ > + base = get_shstk_base(tsk, &size); > + if (!base) > + return; > + > + vm_munmap(base, size); > + set_shstk_base(tsk, 0, 0); > +} > LGTM. Reviewed-by: Zong Li <zong.li@sifive.com> > -- > 2.34.1 > > > _______________________________________________ > linux-riscv mailing list > linux-riscv@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv
On Mon, Mar 10, 2025 at 11:42 PM Deepak Gupta <debug@rivosinc.com> wrote: > > Adding enumeration of zicfilp and zicfiss extensions in hwprobe syscall. > > Signed-off-by: Deepak Gupta <debug@rivosinc.com> > --- > arch/riscv/include/uapi/asm/hwprobe.h | 2 ++ > arch/riscv/kernel/sys_hwprobe.c | 2 ++ > 2 files changed, 4 insertions(+) > > diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h > index c3c1cc951cb9..c1b537b50158 100644 > --- a/arch/riscv/include/uapi/asm/hwprobe.h > +++ b/arch/riscv/include/uapi/asm/hwprobe.h > @@ -73,6 +73,8 @@ struct riscv_hwprobe { > #define RISCV_HWPROBE_EXT_ZCMOP (1ULL << 47) > #define RISCV_HWPROBE_EXT_ZAWRS (1ULL << 48) > #define RISCV_HWPROBE_EXT_SUPM (1ULL << 49) > +#define RISCV_HWPROBE_EXT_ZICFILP (1ULL << 50) > +#define RISCV_HWPROBE_EXT_ZICFISS (1ULL << 51) > #define RISCV_HWPROBE_KEY_CPUPERF_0 5 > #define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0) > #define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0) > diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c > index bcd3b816306c..d802ff707913 100644 > --- a/arch/riscv/kernel/sys_hwprobe.c > +++ b/arch/riscv/kernel/sys_hwprobe.c > @@ -108,6 +108,8 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, > EXT_KEY(ZCB); > EXT_KEY(ZCMOP); > EXT_KEY(ZICBOZ); > + EXT_KEY(ZICFILP); > + EXT_KEY(ZICFISS); > EXT_KEY(ZICOND); > EXT_KEY(ZIHINTNTL); > EXT_KEY(ZIHINTPAUSE); > LGTM. Reviewed-by: Zong Li <zong.li@sifive.com> > -- > 2.34.1 > > > _______________________________________________ > linux-riscv mailing list > linux-riscv@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv
On Mon, Mar 10, 2025 at 11:44 PM Deepak Gupta <debug@rivosinc.com> wrote: > > Kernel will have to perform shadow stack operations on user shadow stack. > Like during signal delivery and sigreturn, shadow stack token must be > created and validated respectively. Thus shadow stack access for kernel > must be enabled. > > In future when kernel shadow stacks are enabled for linux kernel, it must > be enabled as early as possible for better coverage and prevent imbalance > between regular stack and shadow stack. After `relocate_enable_mmu` has > been done, this is as early as possible it can enabled. > > Signed-off-by: Deepak Gupta <debug@rivosinc.com> > --- > arch/riscv/kernel/asm-offsets.c | 4 ++++ > arch/riscv/kernel/head.S | 12 ++++++++++++ > 2 files changed, 16 insertions(+) > > diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c > index 0c188aaf3925..21f99d5757b6 100644 > --- a/arch/riscv/kernel/asm-offsets.c > +++ b/arch/riscv/kernel/asm-offsets.c > @@ -515,4 +515,8 @@ void asm_offsets(void) > DEFINE(FREGS_A6, offsetof(struct __arch_ftrace_regs, a6)); > DEFINE(FREGS_A7, offsetof(struct __arch_ftrace_regs, a7)); > #endif > + DEFINE(SBI_EXT_FWFT, SBI_EXT_FWFT); > + DEFINE(SBI_EXT_FWFT_SET, SBI_EXT_FWFT_SET); > + DEFINE(SBI_FWFT_SHADOW_STACK, SBI_FWFT_SHADOW_STACK); > + DEFINE(SBI_FWFT_SET_FLAG_LOCK, SBI_FWFT_SET_FLAG_LOCK); > } > diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S > index 356d5397b2a2..6244408ca917 100644 > --- a/arch/riscv/kernel/head.S > +++ b/arch/riscv/kernel/head.S > @@ -164,6 +164,12 @@ secondary_start_sbi: > call relocate_enable_mmu > #endif > call .Lsetup_trap_vector > + li a7, SBI_EXT_FWFT > + li a6, SBI_EXT_FWFT_SET > + li a0, SBI_FWFT_SHADOW_STACK > + li a1, 1 /* enable supervisor to access shadow stack access */ > + li a2, SBI_FWFT_SET_FLAG_LOCK > + ecall > scs_load_current > call smp_callin > #endif /* CONFIG_SMP */ > @@ -320,6 +326,12 @@ SYM_CODE_START(_start_kernel) > la tp, init_task > la sp, init_thread_union + THREAD_SIZE > addi sp, sp, -PT_SIZE_ON_STACK > + li a7, SBI_EXT_FWFT > + li a6, SBI_EXT_FWFT_SET > + li a0, SBI_FWFT_SHADOW_STACK > + li a1, 1 /* enable supervisor to access shadow stack access */ > + li a2, SBI_FWFT_SET_FLAG_LOCK > + ecall > scs_load_current > > #ifdef CONFIG_KASAN > LGTM. Reviewed-by: Zong Li <zong.li@sifive.com> > -- > 2.34.1 > > > _______________________________________________ > linux-riscv mailing list > linux-riscv@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv
On Fri, Mar 14, 2025 at 04:25:59PM +0800, Zong Li wrote: >On Mon, Mar 10, 2025 at 11:42 PM Deepak Gupta <debug@rivosinc.com> wrote: >> >> Three architectures (x86, aarch64, riscv) have support for indirect branch >> tracking feature in a very similar fashion. On a very high level, indirect >> branch tracking is a CPU feature where CPU tracks branches which uses >> memory operand to perform control transfer in program. As part of this >> tracking on indirect branches, CPU goes in a state where it expects a >> landing pad instr on target and if not found then CPU raises some fault >> (architecture dependent) >> >> x86 landing pad instr - `ENDBRANCH` >> aarch64 landing pad instr - `BTI` >> riscv landing instr - `lpad` >> >> Given that three major arches have support for indirect branch tracking, >> This patch makes `prctl` for indirect branch tracking arch agnostic. >> >> To allow userspace to enable this feature for itself, following prtcls are >> defined: >> - PR_GET_INDIR_BR_LP_STATUS: Gets current configured status for indirect >> branch tracking. >> - PR_SET_INDIR_BR_LP_STATUS: Sets a configuration for indirect branch >> tracking. >> Following status options are allowed >> - PR_INDIR_BR_LP_ENABLE: Enables indirect branch tracking on user >> thread. >> - PR_INDIR_BR_LP_DISABLE; Disables indirect branch tracking on user >> thread. >> - PR_LOCK_INDIR_BR_LP_STATUS: Locks configured status for indirect branch >> tracking for user thread. >> >> Signed-off-by: Deepak Gupta <debug@rivosinc.com> >> Reviewed-by: Mark Brown <broonie@kernel.org> >> --- >> arch/riscv/include/asm/usercfi.h | 16 ++++++++- >> arch/riscv/kernel/entry.S | 2 +- >> arch/riscv/kernel/process.c | 5 +++ >> arch/riscv/kernel/usercfi.c | 76 ++++++++++++++++++++++++++++++++++++++++ >> include/linux/cpu.h | 4 +++ >> include/uapi/linux/prctl.h | 27 ++++++++++++++ >> kernel/sys.c | 30 ++++++++++++++++ >> 7 files changed, 158 insertions(+), 2 deletions(-) >> >> diff --git a/arch/riscv/include/asm/usercfi.h b/arch/riscv/include/asm/usercfi.h >> index c4dcd256f19a..a8cec7c14d1d 100644 >> --- a/arch/riscv/include/asm/usercfi.h >> +++ b/arch/riscv/include/asm/usercfi.h >> @@ -16,7 +16,9 @@ struct kernel_clone_args; >> struct cfi_status { >> unsigned long ubcfi_en : 1; /* Enable for backward cfi. */ >> unsigned long ubcfi_locked : 1; >> - unsigned long rsvd : ((sizeof(unsigned long) * 8) - 2); >> + unsigned long ufcfi_en : 1; /* Enable for forward cfi. Note that ELP goes in sstatus */ >> + unsigned long ufcfi_locked : 1; >> + unsigned long rsvd : ((sizeof(unsigned long) * 8) - 4); >> unsigned long user_shdw_stk; /* Current user shadow stack pointer */ >> unsigned long shdw_stk_base; /* Base address of shadow stack */ >> unsigned long shdw_stk_size; /* size of shadow stack */ >> @@ -33,6 +35,10 @@ bool is_shstk_locked(struct task_struct *task); >> bool is_shstk_allocated(struct task_struct *task); >> void set_shstk_lock(struct task_struct *task); >> void set_shstk_status(struct task_struct *task, bool enable); >> +bool is_indir_lp_enabled(struct task_struct *task); >> +bool is_indir_lp_locked(struct task_struct *task); >> +void set_indir_lp_status(struct task_struct *task, bool enable); >> +void set_indir_lp_lock(struct task_struct *task); >> >> #define PR_SHADOW_STACK_SUPPORTED_STATUS_MASK (PR_SHADOW_STACK_ENABLE) >> >> @@ -58,6 +64,14 @@ void set_shstk_status(struct task_struct *task, bool enable); >> >> #define set_shstk_status(task, enable) >> >> +#define is_indir_lp_enabled(task) false >> + >> +#define is_indir_lp_locked(task) false >> + >> +#define set_indir_lp_status(task, enable) >> + >> +#define set_indir_lp_lock(task) >> + >> #endif /* CONFIG_RISCV_USER_CFI */ >> >> #endif /* __ASSEMBLY__ */ >> diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S >> index 68c99124ea55..00494b54ff4a 100644 >> --- a/arch/riscv/kernel/entry.S >> +++ b/arch/riscv/kernel/entry.S >> @@ -143,7 +143,7 @@ SYM_CODE_START(handle_exception) >> * Disable the FPU/Vector to detect illegal usage of floating point >> * or vector in kernel space. >> */ >> - li t0, SR_SUM | SR_FS_VS >> + li t0, SR_SUM | SR_FS_VS | SR_ELP >> >> REG_L s0, TASK_TI_USER_SP(tp) >> csrrc s1, CSR_STATUS, t0 >> diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c >> index cd11667593fe..4587201dd81d 100644 >> --- a/arch/riscv/kernel/process.c >> +++ b/arch/riscv/kernel/process.c >> @@ -160,6 +160,11 @@ void start_thread(struct pt_regs *regs, unsigned long pc, >> set_shstk_status(current, false); >> set_shstk_base(current, 0, 0); >> set_active_shstk(current, 0); >> + /* >> + * disable indirect branch tracking on exec. >> + * libc will enable it later via prctl. >> + */ >> + set_indir_lp_status(current, false); > >In set_indir_lp_status and set_shstk_status, the $senvcfg.LPE and >$senvcfg.SSE fields are set. However, if the CPU does not support this >CSR, writing to it will trigger an illegal instruction exception. >Should we add sanity checks to handle this situation? Thanks hmm. these were two patches. something happened in my workflow and two were squash together it seems. I need to split them (one for introduction of generic prctls and another which implements them on riscv) Being said that, yes good point here. I'll make that change. >> >> #ifdef CONFIG_64BIT >> regs->status &= ~SR_UXL; >> diff --git a/arch/riscv/kernel/usercfi.c b/arch/riscv/kernel/usercfi.c >> index 37d6fb8144e7..3a66f149a4ef 100644 >> --- a/arch/riscv/kernel/usercfi.c >> +++ b/arch/riscv/kernel/usercfi.c >> @@ -69,6 +69,32 @@ void set_shstk_lock(struct task_struct *task) >> task->thread_info.user_cfi_state.ubcfi_locked = 1; >> } >> >> +bool is_indir_lp_enabled(struct task_struct *task) >> +{ >> + return task->thread_info.user_cfi_state.ufcfi_en ? true : false; >> +} >> + >> +bool is_indir_lp_locked(struct task_struct *task) >> +{ >> + return task->thread_info.user_cfi_state.ufcfi_locked ? true : false; >> +} >> + >> +void set_indir_lp_status(struct task_struct *task, bool enable) >> +{ >> + task->thread_info.user_cfi_state.ufcfi_en = enable ? 1 : 0; >> + >> + if (enable) >> + task->thread.envcfg |= ENVCFG_LPE; >> + else >> + task->thread.envcfg &= ~ENVCFG_LPE; >> + >> + csr_write(CSR_ENVCFG, task->thread.envcfg); >> +} >> + >> +void set_indir_lp_lock(struct task_struct *task) >> +{ >> + task->thread_info.user_cfi_state.ufcfi_locked = 1; >> +} >> /* >> * If size is 0, then to be compatible with regular stack we want it to be as big as >> * regular stack. Else PAGE_ALIGN it and return back >> @@ -369,3 +395,53 @@ int arch_lock_shadow_stack_status(struct task_struct *task, >> >> return 0; >> } >> + >> +int arch_get_indir_br_lp_status(struct task_struct *t, unsigned long __user *status) >> +{ >> + unsigned long fcfi_status = 0; >> + >> + if (!cpu_supports_indirect_br_lp_instr()) >> + return -EINVAL; >> + >> + /* indirect branch tracking is enabled on the task or not */ >> + fcfi_status |= (is_indir_lp_enabled(t) ? PR_INDIR_BR_LP_ENABLE : 0); >> + >> + return copy_to_user(status, &fcfi_status, sizeof(fcfi_status)) ? -EFAULT : 0; >> +} >> + >> +int arch_set_indir_br_lp_status(struct task_struct *t, unsigned long status) >> +{ >> + bool enable_indir_lp = false; >> + >> + if (!cpu_supports_indirect_br_lp_instr()) >> + return -EINVAL; >> + >> + /* indirect branch tracking is locked and further can't be modified by user */ >> + if (is_indir_lp_locked(t)) >> + return -EINVAL; >> + >> + /* Reject unknown flags */ >> + if (status & ~PR_INDIR_BR_LP_ENABLE) >> + return -EINVAL; >> + >> + enable_indir_lp = (status & PR_INDIR_BR_LP_ENABLE) ? true : false; >> + set_indir_lp_status(t, enable_indir_lp); >> + >> + return 0; >> +} >> + >> +int arch_lock_indir_br_lp_status(struct task_struct *task, >> + unsigned long arg) >> +{ >> + /* >> + * If indirect branch tracking is not supported or not enabled on task, >> + * nothing to lock here >> + */ >> + if (!cpu_supports_indirect_br_lp_instr() || >> + !is_indir_lp_enabled(task) || arg != 0) >> + return -EINVAL; >> + >> + set_indir_lp_lock(task); >> + >> + return 0; >> +} >> diff --git a/include/linux/cpu.h b/include/linux/cpu.h >> index 6a0a8f1c7c90..fb0c394430c6 100644 >> --- a/include/linux/cpu.h >> +++ b/include/linux/cpu.h >> @@ -204,4 +204,8 @@ static inline bool cpu_mitigations_auto_nosmt(void) >> } >> #endif >> >> +int arch_get_indir_br_lp_status(struct task_struct *t, unsigned long __user *status); >> +int arch_set_indir_br_lp_status(struct task_struct *t, unsigned long status); >> +int arch_lock_indir_br_lp_status(struct task_struct *t, unsigned long status); >> + >> #endif /* _LINUX_CPU_H_ */ >> diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h >> index 5c6080680cb2..6cd90460cbad 100644 >> --- a/include/uapi/linux/prctl.h >> +++ b/include/uapi/linux/prctl.h >> @@ -353,4 +353,31 @@ struct prctl_mm_map { >> */ >> #define PR_LOCK_SHADOW_STACK_STATUS 76 >> >> +/* >> + * Get the current indirect branch tracking configuration for the current >> + * thread, this will be the value configured via PR_SET_INDIR_BR_LP_STATUS. >> + */ >> +#define PR_GET_INDIR_BR_LP_STATUS 77 >> + >> +/* >> + * Set the indirect branch tracking configuration. PR_INDIR_BR_LP_ENABLE will >> + * enable cpu feature for user thread, to track all indirect branches and ensure >> + * they land on arch defined landing pad instruction. >> + * x86 - If enabled, an indirect branch must land on `ENDBRANCH` instruction. >> + * arch64 - If enabled, an indirect branch must land on `BTI` instruction. >> + * riscv - If enabled, an indirect branch must land on `lpad` instruction. >> + * PR_INDIR_BR_LP_DISABLE will disable feature for user thread and indirect >> + * branches will no more be tracked by cpu to land on arch defined landing pad >> + * instruction. >> + */ >> +#define PR_SET_INDIR_BR_LP_STATUS 78 >> +# define PR_INDIR_BR_LP_ENABLE (1UL << 0) >> + >> +/* >> + * Prevent further changes to the specified indirect branch tracking >> + * configuration. All bits may be locked via this call, including >> + * undefined bits. >> + */ >> +#define PR_LOCK_INDIR_BR_LP_STATUS 79 >> + >> #endif /* _LINUX_PRCTL_H */ >> diff --git a/kernel/sys.c b/kernel/sys.c >> index cb366ff8703a..f347f3518d0b 100644 >> --- a/kernel/sys.c >> +++ b/kernel/sys.c >> @@ -2336,6 +2336,21 @@ int __weak arch_lock_shadow_stack_status(struct task_struct *t, unsigned long st >> return -EINVAL; >> } >> >> +int __weak arch_get_indir_br_lp_status(struct task_struct *t, unsigned long __user *status) >> +{ >> + return -EINVAL; >> +} >> + >> +int __weak arch_set_indir_br_lp_status(struct task_struct *t, unsigned long status) >> +{ >> + return -EINVAL; >> +} >> + >> +int __weak arch_lock_indir_br_lp_status(struct task_struct *t, unsigned long status) >> +{ >> + return -EINVAL; >> +} >> + >> #define PR_IO_FLUSHER (PF_MEMALLOC_NOIO | PF_LOCAL_THROTTLE) >> >> #ifdef CONFIG_ANON_VMA_NAME >> @@ -2811,6 +2826,21 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, >> return -EINVAL; >> error = arch_lock_shadow_stack_status(me, arg2); >> break; >> + case PR_GET_INDIR_BR_LP_STATUS: >> + if (arg3 || arg4 || arg5) >> + return -EINVAL; >> + error = arch_get_indir_br_lp_status(me, (unsigned long __user *)arg2); >> + break; >> + case PR_SET_INDIR_BR_LP_STATUS: >> + if (arg3 || arg4 || arg5) >> + return -EINVAL; >> + error = arch_set_indir_br_lp_status(me, arg2); >> + break; >> + case PR_LOCK_INDIR_BR_LP_STATUS: >> + if (arg3 || arg4 || arg5) >> + return -EINVAL; >> + error = arch_lock_indir_br_lp_status(me, arg2); >> + break; >> default: >> trace_task_prctl_unknown(option, arg2, arg3, arg4, arg5); >> error = -EINVAL; >> >> -- >> 2.34.1 >> >> >> _______________________________________________ >> linux-riscv mailing list >> linux-riscv@lists.infradead.org >> http://lists.infradead.org/mailman/listinfo/linux-riscv