Message ID | 20240403234054.2020347-11-debug@rivosinc.com |
---|---|
State | Superseded |
Headers | show |
Series | riscv control-flow integrity for usermode | expand |
On Wed, Apr 03, 2024 at 04:34:58PM -0700, Deepak Gupta wrote: > `arch_calc_vm_prot_bits` is implemented on risc-v to return VM_READ | > VM_WRITE if PROT_WRITE is specified. Similarly `riscv_sys_mmap` is > updated to convert all incoming PROT_WRITE to (PROT_WRITE | PROT_READ). > This is to make sure that any existing apps using PROT_WRITE still work. > > Earlier `protection_map[VM_WRITE]` used to pick read-write PTE encodings. > Now `protection_map[VM_WRITE]` will always pick PAGE_SHADOWSTACK PTE > encodings for shadow stack. Above changes ensure that existing apps > continue to work because underneath kernel will be picking > `protection_map[VM_WRITE|VM_READ]` PTE encodings. > > Signed-off-by: Deepak Gupta <debug@rivosinc.com> > --- > arch/riscv/include/asm/mman.h | 24 ++++++++++++++++++++++++ > arch/riscv/include/asm/pgtable.h | 1 + > arch/riscv/kernel/sys_riscv.c | 11 +++++++++++ > arch/riscv/mm/init.c | 2 +- > mm/mmap.c | 1 + > 5 files changed, 38 insertions(+), 1 deletion(-) > create mode 100644 arch/riscv/include/asm/mman.h > > diff --git a/arch/riscv/include/asm/mman.h b/arch/riscv/include/asm/mman.h > new file mode 100644 > index 000000000000..ef9fedf32546 > --- /dev/null > +++ b/arch/riscv/include/asm/mman.h > @@ -0,0 +1,24 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +#ifndef __ASM_MMAN_H__ > +#define __ASM_MMAN_H__ > + > +#include <linux/compiler.h> > +#include <linux/types.h> > +#include <uapi/asm/mman.h> > + > +static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, > + unsigned long pkey __always_unused) > +{ > + unsigned long ret = 0; > + > + /* > + * If PROT_WRITE was specified, force it to VM_READ | VM_WRITE. > + * Only VM_WRITE means shadow stack. > + */ > + if (prot & PROT_WRITE) > + ret = (VM_READ | VM_WRITE); > + return ret; > +} > +#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey) > + > +#endif /* ! __ASM_MMAN_H__ */ > diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h > index 6066822e7396..4d5983bc6766 100644 > --- a/arch/riscv/include/asm/pgtable.h > +++ b/arch/riscv/include/asm/pgtable.h > @@ -184,6 +184,7 @@ extern struct pt_alloc_ops pt_ops __initdata; > #define PAGE_READ_EXEC __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC) > #define PAGE_WRITE_EXEC __pgprot(_PAGE_BASE | _PAGE_READ | \ > _PAGE_EXEC | _PAGE_WRITE) > +#define PAGE_SHADOWSTACK __pgprot(_PAGE_BASE | _PAGE_WRITE) > > #define PAGE_COPY PAGE_READ > #define PAGE_COPY_EXEC PAGE_READ_EXEC > diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c > index f1c1416a9f1e..846c36b1b3d5 100644 > --- a/arch/riscv/kernel/sys_riscv.c > +++ b/arch/riscv/kernel/sys_riscv.c > @@ -8,6 +8,8 @@ > #include <linux/syscalls.h> > #include <asm/cacheflush.h> > #include <asm-generic/mman-common.h> > +#include <vdso/vsyscall.h> > +#include <asm/mman.h> > > static long riscv_sys_mmap(unsigned long addr, unsigned long len, > unsigned long prot, unsigned long flags, > @@ -17,6 +19,15 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len, > if (unlikely(offset & (~PAGE_MASK >> page_shift_offset))) > return -EINVAL; > > + /* > + * If only PROT_WRITE is specified then extend that to PROT_READ > + * protection_map[VM_WRITE] is now going to select shadow stack encodings. > + * So specifying PROT_WRITE actually should select protection_map [VM_WRITE | VM_READ] > + * If user wants to create shadow stack then they should use `map_shadow_stack` syscall. > + */ > + if (unlikely((prot & PROT_WRITE) && !(prot & PROT_READ))) The comments says that this should extend to PROT_READ if only PROT_WRITE is specified. This condition instead is checking if PROT_WRITE is selected but PROT_READ is not. If prot is (VM_EXEC | VM_WRITE) then it would be extended to (VM_EXEC | VM_WRITE | VM_READ). This will not currently cause any issues because these both map to the same value in the protection_map PAGE_COPY_EXEC, however this seems to be not the intention of this change. prot == PROT_WRITE better suits the condition explained in the comment. > + prot |= PROT_READ; > + > return ksys_mmap_pgoff(addr, len, prot, flags, fd, > offset >> (PAGE_SHIFT - page_shift_offset)); > } > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c > index fa34cf55037b..98e5ece4052a 100644 > --- a/arch/riscv/mm/init.c > +++ b/arch/riscv/mm/init.c > @@ -299,7 +299,7 @@ pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); > static const pgprot_t protection_map[16] = { > [VM_NONE] = PAGE_NONE, > [VM_READ] = PAGE_READ, > - [VM_WRITE] = PAGE_COPY, > + [VM_WRITE] = PAGE_SHADOWSTACK, > [VM_WRITE | VM_READ] = PAGE_COPY, > [VM_EXEC] = PAGE_EXEC, > [VM_EXEC | VM_READ] = PAGE_READ_EXEC, > diff --git a/mm/mmap.c b/mm/mmap.c > index d89770eaab6b..57a974f49b00 100644 > --- a/mm/mmap.c > +++ b/mm/mmap.c > @@ -47,6 +47,7 @@ > #include <linux/oom.h> > #include <linux/sched/mm.h> > #include <linux/ksm.h> > +#include <linux/processor.h> It doesn't seem like this is necessary for this patch. - Charlie > > #include <linux/uaccess.h> > #include <asm/cacheflush.h> > -- > 2.43.2 >
Hi Deepak, On 04/04/2024 01:34, Deepak Gupta wrote: > `arch_calc_vm_prot_bits` is implemented on risc-v to return VM_READ | > VM_WRITE if PROT_WRITE is specified. Similarly `riscv_sys_mmap` is > updated to convert all incoming PROT_WRITE to (PROT_WRITE | PROT_READ). > This is to make sure that any existing apps using PROT_WRITE still work. > > Earlier `protection_map[VM_WRITE]` used to pick read-write PTE encodings. > Now `protection_map[VM_WRITE]` will always pick PAGE_SHADOWSTACK PTE > encodings for shadow stack. Above changes ensure that existing apps > continue to work because underneath kernel will be picking > `protection_map[VM_WRITE|VM_READ]` PTE encodings. > > Signed-off-by: Deepak Gupta <debug@rivosinc.com> > --- > arch/riscv/include/asm/mman.h | 24 ++++++++++++++++++++++++ > arch/riscv/include/asm/pgtable.h | 1 + > arch/riscv/kernel/sys_riscv.c | 11 +++++++++++ > arch/riscv/mm/init.c | 2 +- > mm/mmap.c | 1 + > 5 files changed, 38 insertions(+), 1 deletion(-) > create mode 100644 arch/riscv/include/asm/mman.h > > diff --git a/arch/riscv/include/asm/mman.h b/arch/riscv/include/asm/mman.h > new file mode 100644 > index 000000000000..ef9fedf32546 > --- /dev/null > +++ b/arch/riscv/include/asm/mman.h > @@ -0,0 +1,24 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +#ifndef __ASM_MMAN_H__ > +#define __ASM_MMAN_H__ > + > +#include <linux/compiler.h> > +#include <linux/types.h> > +#include <uapi/asm/mman.h> > + > +static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, > + unsigned long pkey __always_unused) > +{ > + unsigned long ret = 0; > + > + /* > + * If PROT_WRITE was specified, force it to VM_READ | VM_WRITE. > + * Only VM_WRITE means shadow stack. > + */ > + if (prot & PROT_WRITE) > + ret = (VM_READ | VM_WRITE); > + return ret; > +} > +#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey) > + > +#endif /* ! __ASM_MMAN_H__ */ > diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h > index 6066822e7396..4d5983bc6766 100644 > --- a/arch/riscv/include/asm/pgtable.h > +++ b/arch/riscv/include/asm/pgtable.h > @@ -184,6 +184,7 @@ extern struct pt_alloc_ops pt_ops __initdata; > #define PAGE_READ_EXEC __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC) > #define PAGE_WRITE_EXEC __pgprot(_PAGE_BASE | _PAGE_READ | \ > _PAGE_EXEC | _PAGE_WRITE) > +#define PAGE_SHADOWSTACK __pgprot(_PAGE_BASE | _PAGE_WRITE) > > #define PAGE_COPY PAGE_READ > #define PAGE_COPY_EXEC PAGE_READ_EXEC > diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c > index f1c1416a9f1e..846c36b1b3d5 100644 > --- a/arch/riscv/kernel/sys_riscv.c > +++ b/arch/riscv/kernel/sys_riscv.c > @@ -8,6 +8,8 @@ > #include <linux/syscalls.h> > #include <asm/cacheflush.h> > #include <asm-generic/mman-common.h> > +#include <vdso/vsyscall.h> > +#include <asm/mman.h> > > static long riscv_sys_mmap(unsigned long addr, unsigned long len, > unsigned long prot, unsigned long flags, > @@ -17,6 +19,15 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len, > if (unlikely(offset & (~PAGE_MASK >> page_shift_offset))) > return -EINVAL; > > + /* > + * If only PROT_WRITE is specified then extend that to PROT_READ > + * protection_map[VM_WRITE] is now going to select shadow stack encodings. > + * So specifying PROT_WRITE actually should select protection_map [VM_WRITE | VM_READ] > + * If user wants to create shadow stack then they should use `map_shadow_stack` syscall. > + */ > + if (unlikely((prot & PROT_WRITE) && !(prot & PROT_READ))) > + prot |= PROT_READ; > + > return ksys_mmap_pgoff(addr, len, prot, flags, fd, > offset >> (PAGE_SHIFT - page_shift_offset)); > } > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c > index fa34cf55037b..98e5ece4052a 100644 > --- a/arch/riscv/mm/init.c > +++ b/arch/riscv/mm/init.c > @@ -299,7 +299,7 @@ pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); > static const pgprot_t protection_map[16] = { > [VM_NONE] = PAGE_NONE, > [VM_READ] = PAGE_READ, > - [VM_WRITE] = PAGE_COPY, > + [VM_WRITE] = PAGE_SHADOWSTACK, > [VM_WRITE | VM_READ] = PAGE_COPY, > [VM_EXEC] = PAGE_EXEC, > [VM_EXEC | VM_READ] = PAGE_READ_EXEC, > diff --git a/mm/mmap.c b/mm/mmap.c > index d89770eaab6b..57a974f49b00 100644 > --- a/mm/mmap.c > +++ b/mm/mmap.c > @@ -47,6 +47,7 @@ > #include <linux/oom.h> > #include <linux/sched/mm.h> > #include <linux/ksm.h> > +#include <linux/processor.h> > > #include <linux/uaccess.h> > #include <asm/cacheflush.h> What happens if someone restricts the permission to PROT_WRITE using mprotect()? I would say this is an issue since it would turn the pages into shadow stack pages.
On Fri, May 10, 2024 at 02:02:54PM -0700, Charlie Jenkins wrote: >On Wed, Apr 03, 2024 at 04:34:58PM -0700, Deepak Gupta wrote: >> `arch_calc_vm_prot_bits` is implemented on risc-v to return VM_READ | >> VM_WRITE if PROT_WRITE is specified. Similarly `riscv_sys_mmap` is >> updated to convert all incoming PROT_WRITE to (PROT_WRITE | PROT_READ). >> This is to make sure that any existing apps using PROT_WRITE still work. >> >> Earlier `protection_map[VM_WRITE]` used to pick read-write PTE encodings. >> Now `protection_map[VM_WRITE]` will always pick PAGE_SHADOWSTACK PTE >> encodings for shadow stack. Above changes ensure that existing apps >> continue to work because underneath kernel will be picking >> `protection_map[VM_WRITE|VM_READ]` PTE encodings. >> >> Signed-off-by: Deepak Gupta <debug@rivosinc.com> >> --- >> arch/riscv/include/asm/mman.h | 24 ++++++++++++++++++++++++ >> arch/riscv/include/asm/pgtable.h | 1 + >> arch/riscv/kernel/sys_riscv.c | 11 +++++++++++ >> arch/riscv/mm/init.c | 2 +- >> mm/mmap.c | 1 + >> 5 files changed, 38 insertions(+), 1 deletion(-) >> create mode 100644 arch/riscv/include/asm/mman.h >> >> diff --git a/arch/riscv/include/asm/mman.h b/arch/riscv/include/asm/mman.h >> new file mode 100644 >> index 000000000000..ef9fedf32546 >> --- /dev/null >> +++ b/arch/riscv/include/asm/mman.h >> @@ -0,0 +1,24 @@ >> +/* SPDX-License-Identifier: GPL-2.0 */ >> +#ifndef __ASM_MMAN_H__ >> +#define __ASM_MMAN_H__ >> + >> +#include <linux/compiler.h> >> +#include <linux/types.h> >> +#include <uapi/asm/mman.h> >> + >> +static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, >> + unsigned long pkey __always_unused) >> +{ >> + unsigned long ret = 0; >> + >> + /* >> + * If PROT_WRITE was specified, force it to VM_READ | VM_WRITE. >> + * Only VM_WRITE means shadow stack. >> + */ >> + if (prot & PROT_WRITE) >> + ret = (VM_READ | VM_WRITE); >> + return ret; >> +} >> +#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey) >> + >> +#endif /* ! __ASM_MMAN_H__ */ >> diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h >> index 6066822e7396..4d5983bc6766 100644 >> --- a/arch/riscv/include/asm/pgtable.h >> +++ b/arch/riscv/include/asm/pgtable.h >> @@ -184,6 +184,7 @@ extern struct pt_alloc_ops pt_ops __initdata; >> #define PAGE_READ_EXEC __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC) >> #define PAGE_WRITE_EXEC __pgprot(_PAGE_BASE | _PAGE_READ | \ >> _PAGE_EXEC | _PAGE_WRITE) >> +#define PAGE_SHADOWSTACK __pgprot(_PAGE_BASE | _PAGE_WRITE) >> >> #define PAGE_COPY PAGE_READ >> #define PAGE_COPY_EXEC PAGE_READ_EXEC >> diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c >> index f1c1416a9f1e..846c36b1b3d5 100644 >> --- a/arch/riscv/kernel/sys_riscv.c >> +++ b/arch/riscv/kernel/sys_riscv.c >> @@ -8,6 +8,8 @@ >> #include <linux/syscalls.h> >> #include <asm/cacheflush.h> >> #include <asm-generic/mman-common.h> >> +#include <vdso/vsyscall.h> >> +#include <asm/mman.h> >> >> static long riscv_sys_mmap(unsigned long addr, unsigned long len, >> unsigned long prot, unsigned long flags, >> @@ -17,6 +19,15 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len, >> if (unlikely(offset & (~PAGE_MASK >> page_shift_offset))) >> return -EINVAL; >> >> + /* >> + * If only PROT_WRITE is specified then extend that to PROT_READ >> + * protection_map[VM_WRITE] is now going to select shadow stack encodings. >> + * So specifying PROT_WRITE actually should select protection_map [VM_WRITE | VM_READ] >> + * If user wants to create shadow stack then they should use `map_shadow_stack` syscall. >> + */ >> + if (unlikely((prot & PROT_WRITE) && !(prot & PROT_READ))) > >The comments says that this should extend to PROT_READ if only >PROT_WRITE is specified. This condition instead is checking if >PROT_WRITE is selected but PROT_READ is not. If prot is (VM_EXEC | >VM_WRITE) then it would be extended to (VM_EXEC | VM_WRITE | VM_READ). >This will not currently cause any issues because these both map to the >same value in the protection_map PAGE_COPY_EXEC, however this seems to >be not the intention of this change. > >prot == PROT_WRITE better suits the condition explained in the comment. If someone specifies this (PROT_EXEC | PROT_WRITE) today, it works because of the way permissions are setup in `protection_map`. On risc-v there is no way to have a page which is execute and write only. So expectation is that if some apps were using `PROT_EXEC | PROT_WRITE` today, they were working because internally it was translating to read, write and execute on page permissions level. This patch make sure that, it stays same from page permissions perspective. If someone was using PROT_EXEC, it may translate to execute only and this change doesn't impact that. Patch simply looks for presence of `PROT_WRITE` and absence of `PROT_READ` in protection flags and if that condition is satisfied, it assumes that caller assumed page is going to be read allowed as well. > >> + prot |= PROT_READ; >> + >> return ksys_mmap_pgoff(addr, len, prot, flags, fd, >> offset >> (PAGE_SHIFT - page_shift_offset)); >> } >> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c >> index fa34cf55037b..98e5ece4052a 100644 >> --- a/arch/riscv/mm/init.c >> +++ b/arch/riscv/mm/init.c >> @@ -299,7 +299,7 @@ pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); >> static const pgprot_t protection_map[16] = { >> [VM_NONE] = PAGE_NONE, >> [VM_READ] = PAGE_READ, >> - [VM_WRITE] = PAGE_COPY, >> + [VM_WRITE] = PAGE_SHADOWSTACK, >> [VM_WRITE | VM_READ] = PAGE_COPY, >> [VM_EXEC] = PAGE_EXEC, >> [VM_EXEC | VM_READ] = PAGE_READ_EXEC, >> diff --git a/mm/mmap.c b/mm/mmap.c >> index d89770eaab6b..57a974f49b00 100644 >> --- a/mm/mmap.c >> +++ b/mm/mmap.c >> @@ -47,6 +47,7 @@ >> #include <linux/oom.h> >> #include <linux/sched/mm.h> >> #include <linux/ksm.h> >> +#include <linux/processor.h> > >It doesn't seem like this is necessary for this patch. Thanks. Yeah it looks like I forgot to remove this over the churn. Will fix it. > >- Charlie > >> >> #include <linux/uaccess.h> >> #include <asm/cacheflush.h> >> -- >> 2.43.2 >>
On Sun, May 12, 2024 at 06:24:45PM +0200, Alexandre Ghiti wrote: >Hi Deepak, > >On 04/04/2024 01:34, Deepak Gupta wrote: >>`arch_calc_vm_prot_bits` is implemented on risc-v to return VM_READ | >>VM_WRITE if PROT_WRITE is specified. Similarly `riscv_sys_mmap` is >>updated to convert all incoming PROT_WRITE to (PROT_WRITE | PROT_READ). >>This is to make sure that any existing apps using PROT_WRITE still work. >> >>Earlier `protection_map[VM_WRITE]` used to pick read-write PTE encodings. >>Now `protection_map[VM_WRITE]` will always pick PAGE_SHADOWSTACK PTE >>encodings for shadow stack. Above changes ensure that existing apps >>continue to work because underneath kernel will be picking >>`protection_map[VM_WRITE|VM_READ]` PTE encodings. >> >>Signed-off-by: Deepak Gupta <debug@rivosinc.com> >>--- >> arch/riscv/include/asm/mman.h | 24 ++++++++++++++++++++++++ >> arch/riscv/include/asm/pgtable.h | 1 + >> arch/riscv/kernel/sys_riscv.c | 11 +++++++++++ >> arch/riscv/mm/init.c | 2 +- >> mm/mmap.c | 1 + >> 5 files changed, 38 insertions(+), 1 deletion(-) >> create mode 100644 arch/riscv/include/asm/mman.h >> >>diff --git a/arch/riscv/include/asm/mman.h b/arch/riscv/include/asm/mman.h >>new file mode 100644 >>index 000000000000..ef9fedf32546 >>--- /dev/null >>+++ b/arch/riscv/include/asm/mman.h >>@@ -0,0 +1,24 @@ >>+/* SPDX-License-Identifier: GPL-2.0 */ >>+#ifndef __ASM_MMAN_H__ >>+#define __ASM_MMAN_H__ >>+ >>+#include <linux/compiler.h> >>+#include <linux/types.h> >>+#include <uapi/asm/mman.h> >>+ >>+static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, >>+ unsigned long pkey __always_unused) >>+{ >>+ unsigned long ret = 0; >>+ >>+ /* >>+ * If PROT_WRITE was specified, force it to VM_READ | VM_WRITE. >>+ * Only VM_WRITE means shadow stack. >>+ */ >>+ if (prot & PROT_WRITE) >>+ ret = (VM_READ | VM_WRITE); >>+ return ret; >>+} >>+#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey) >>+ >>+#endif /* ! __ASM_MMAN_H__ */ >>diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h >>index 6066822e7396..4d5983bc6766 100644 >>--- a/arch/riscv/include/asm/pgtable.h >>+++ b/arch/riscv/include/asm/pgtable.h >>@@ -184,6 +184,7 @@ extern struct pt_alloc_ops pt_ops __initdata; >> #define PAGE_READ_EXEC __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC) >> #define PAGE_WRITE_EXEC __pgprot(_PAGE_BASE | _PAGE_READ | \ >> _PAGE_EXEC | _PAGE_WRITE) >>+#define PAGE_SHADOWSTACK __pgprot(_PAGE_BASE | _PAGE_WRITE) >> #define PAGE_COPY PAGE_READ >> #define PAGE_COPY_EXEC PAGE_READ_EXEC >>diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c >>index f1c1416a9f1e..846c36b1b3d5 100644 >>--- a/arch/riscv/kernel/sys_riscv.c >>+++ b/arch/riscv/kernel/sys_riscv.c >>@@ -8,6 +8,8 @@ >> #include <linux/syscalls.h> >> #include <asm/cacheflush.h> >> #include <asm-generic/mman-common.h> >>+#include <vdso/vsyscall.h> >>+#include <asm/mman.h> >> static long riscv_sys_mmap(unsigned long addr, unsigned long len, >> unsigned long prot, unsigned long flags, >>@@ -17,6 +19,15 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len, >> if (unlikely(offset & (~PAGE_MASK >> page_shift_offset))) >> return -EINVAL; >>+ /* >>+ * If only PROT_WRITE is specified then extend that to PROT_READ >>+ * protection_map[VM_WRITE] is now going to select shadow stack encodings. >>+ * So specifying PROT_WRITE actually should select protection_map [VM_WRITE | VM_READ] >>+ * If user wants to create shadow stack then they should use `map_shadow_stack` syscall. >>+ */ >>+ if (unlikely((prot & PROT_WRITE) && !(prot & PROT_READ))) >>+ prot |= PROT_READ; >>+ >> return ksys_mmap_pgoff(addr, len, prot, flags, fd, >> offset >> (PAGE_SHIFT - page_shift_offset)); >> } >>diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c >>index fa34cf55037b..98e5ece4052a 100644 >>--- a/arch/riscv/mm/init.c >>+++ b/arch/riscv/mm/init.c >>@@ -299,7 +299,7 @@ pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); >> static const pgprot_t protection_map[16] = { >> [VM_NONE] = PAGE_NONE, >> [VM_READ] = PAGE_READ, >>- [VM_WRITE] = PAGE_COPY, >>+ [VM_WRITE] = PAGE_SHADOWSTACK, >> [VM_WRITE | VM_READ] = PAGE_COPY, >> [VM_EXEC] = PAGE_EXEC, >> [VM_EXEC | VM_READ] = PAGE_READ_EXEC, >>diff --git a/mm/mmap.c b/mm/mmap.c >>index d89770eaab6b..57a974f49b00 100644 >>--- a/mm/mmap.c >>+++ b/mm/mmap.c >>@@ -47,6 +47,7 @@ >> #include <linux/oom.h> >> #include <linux/sched/mm.h> >> #include <linux/ksm.h> >>+#include <linux/processor.h> >> #include <linux/uaccess.h> >> #include <asm/cacheflush.h> > > >What happens if someone restricts the permission to PROT_WRITE using >mprotect()? I would say this is an issue since it would turn the pages >into shadow stack pages. look at this patch in this patch series. "riscv/mm : ensure PROT_WRITE leads to VM_READ | VM_WRITE" It implements `arch_calc_vm_prot_bits` for risc-v and enforces that incoming PROT_WRITE is converted to VM_READ | VM_WRITE. And thus it'll become read/write memory. This way `mprotect` can be used to convert a shadow stack page to read/write memory but not a regular memory to shadow stack page. > >
On Mon, May 13, 2024 at 10:47:25AM -0700, Deepak Gupta wrote: > On Fri, May 10, 2024 at 02:02:54PM -0700, Charlie Jenkins wrote: > > On Wed, Apr 03, 2024 at 04:34:58PM -0700, Deepak Gupta wrote: > > > `arch_calc_vm_prot_bits` is implemented on risc-v to return VM_READ | > > > VM_WRITE if PROT_WRITE is specified. Similarly `riscv_sys_mmap` is > > > updated to convert all incoming PROT_WRITE to (PROT_WRITE | PROT_READ). > > > This is to make sure that any existing apps using PROT_WRITE still work. > > > > > > Earlier `protection_map[VM_WRITE]` used to pick read-write PTE encodings. > > > Now `protection_map[VM_WRITE]` will always pick PAGE_SHADOWSTACK PTE > > > encodings for shadow stack. Above changes ensure that existing apps > > > continue to work because underneath kernel will be picking > > > `protection_map[VM_WRITE|VM_READ]` PTE encodings. > > > > > > Signed-off-by: Deepak Gupta <debug@rivosinc.com> > > > --- > > > arch/riscv/include/asm/mman.h | 24 ++++++++++++++++++++++++ > > > arch/riscv/include/asm/pgtable.h | 1 + > > > arch/riscv/kernel/sys_riscv.c | 11 +++++++++++ > > > arch/riscv/mm/init.c | 2 +- > > > mm/mmap.c | 1 + > > > 5 files changed, 38 insertions(+), 1 deletion(-) > > > create mode 100644 arch/riscv/include/asm/mman.h > > > > > > diff --git a/arch/riscv/include/asm/mman.h b/arch/riscv/include/asm/mman.h > > > new file mode 100644 > > > index 000000000000..ef9fedf32546 > > > --- /dev/null > > > +++ b/arch/riscv/include/asm/mman.h > > > @@ -0,0 +1,24 @@ > > > +/* SPDX-License-Identifier: GPL-2.0 */ > > > +#ifndef __ASM_MMAN_H__ > > > +#define __ASM_MMAN_H__ > > > + > > > +#include <linux/compiler.h> > > > +#include <linux/types.h> > > > +#include <uapi/asm/mman.h> > > > + > > > +static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, > > > + unsigned long pkey __always_unused) > > > +{ > > > + unsigned long ret = 0; > > > + > > > + /* > > > + * If PROT_WRITE was specified, force it to VM_READ | VM_WRITE. > > > + * Only VM_WRITE means shadow stack. > > > + */ > > > + if (prot & PROT_WRITE) > > > + ret = (VM_READ | VM_WRITE); > > > + return ret; > > > +} > > > +#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey) > > > + > > > +#endif /* ! __ASM_MMAN_H__ */ > > > diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h > > > index 6066822e7396..4d5983bc6766 100644 > > > --- a/arch/riscv/include/asm/pgtable.h > > > +++ b/arch/riscv/include/asm/pgtable.h > > > @@ -184,6 +184,7 @@ extern struct pt_alloc_ops pt_ops __initdata; > > > #define PAGE_READ_EXEC __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC) > > > #define PAGE_WRITE_EXEC __pgprot(_PAGE_BASE | _PAGE_READ | \ > > > _PAGE_EXEC | _PAGE_WRITE) > > > +#define PAGE_SHADOWSTACK __pgprot(_PAGE_BASE | _PAGE_WRITE) > > > > > > #define PAGE_COPY PAGE_READ > > > #define PAGE_COPY_EXEC PAGE_READ_EXEC > > > diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c > > > index f1c1416a9f1e..846c36b1b3d5 100644 > > > --- a/arch/riscv/kernel/sys_riscv.c > > > +++ b/arch/riscv/kernel/sys_riscv.c > > > @@ -8,6 +8,8 @@ > > > #include <linux/syscalls.h> > > > #include <asm/cacheflush.h> > > > #include <asm-generic/mman-common.h> > > > +#include <vdso/vsyscall.h> > > > +#include <asm/mman.h> > > > > > > static long riscv_sys_mmap(unsigned long addr, unsigned long len, > > > unsigned long prot, unsigned long flags, > > > @@ -17,6 +19,15 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len, > > > if (unlikely(offset & (~PAGE_MASK >> page_shift_offset))) > > > return -EINVAL; > > > > > > + /* > > > + * If only PROT_WRITE is specified then extend that to PROT_READ > > > + * protection_map[VM_WRITE] is now going to select shadow stack encodings. > > > + * So specifying PROT_WRITE actually should select protection_map [VM_WRITE | VM_READ] > > > + * If user wants to create shadow stack then they should use `map_shadow_stack` syscall. > > > + */ > > > + if (unlikely((prot & PROT_WRITE) && !(prot & PROT_READ))) > > > > The comments says that this should extend to PROT_READ if only > > PROT_WRITE is specified. This condition instead is checking if > > PROT_WRITE is selected but PROT_READ is not. If prot is (VM_EXEC | > > VM_WRITE) then it would be extended to (VM_EXEC | VM_WRITE | VM_READ). > > This will not currently cause any issues because these both map to the > > same value in the protection_map PAGE_COPY_EXEC, however this seems to > > be not the intention of this change. > > > > prot == PROT_WRITE better suits the condition explained in the comment. > > If someone specifies this (PROT_EXEC | PROT_WRITE) today, it works because > of the way permissions are setup in `protection_map`. On risc-v there is no > way to have a page which is execute and write only. So expectation is that > if some apps were using `PROT_EXEC | PROT_WRITE` today, they were working > because internally it was translating to read, write and execute on page > permissions level. This patch make sure that, it stays same from page > permissions perspective. > > If someone was using PROT_EXEC, it may translate to execute only and this change > doesn't impact that. > > Patch simply looks for presence of `PROT_WRITE` and absence of `PROT_READ` in > protection flags and if that condition is satisfied, it assumes that caller assumed > page is going to be read allowed as well. The purpose of this change is for compatibility with shadow stack pages but this affects flags for pages that are not shadow stack pages. Adding PROT_READ to the other cases is redundant as protection_map already handles that mapping. Permissions being strictly PROT_WRITE is the only case that needs to be handled, and is the only case that is called out in the commit message and in the comment. - Charlie > > > > > > > + prot |= PROT_READ; > > > + > > > return ksys_mmap_pgoff(addr, len, prot, flags, fd, > > > offset >> (PAGE_SHIFT - page_shift_offset)); > > > } > > > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c > > > index fa34cf55037b..98e5ece4052a 100644 > > > --- a/arch/riscv/mm/init.c > > > +++ b/arch/riscv/mm/init.c > > > @@ -299,7 +299,7 @@ pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); > > > static const pgprot_t protection_map[16] = { > > > [VM_NONE] = PAGE_NONE, > > > [VM_READ] = PAGE_READ, > > > - [VM_WRITE] = PAGE_COPY, > > > + [VM_WRITE] = PAGE_SHADOWSTACK, > > > [VM_WRITE | VM_READ] = PAGE_COPY, > > > [VM_EXEC] = PAGE_EXEC, > > > [VM_EXEC | VM_READ] = PAGE_READ_EXEC, > > > diff --git a/mm/mmap.c b/mm/mmap.c > > > index d89770eaab6b..57a974f49b00 100644 > > > --- a/mm/mmap.c > > > +++ b/mm/mmap.c > > > @@ -47,6 +47,7 @@ > > > #include <linux/oom.h> > > > #include <linux/sched/mm.h> > > > #include <linux/ksm.h> > > > +#include <linux/processor.h> > > > > It doesn't seem like this is necessary for this patch. > > Thanks. Yeah it looks like I forgot to remove this over the churn. > Will fix it. > > > > > - Charlie > > > > > > > > #include <linux/uaccess.h> > > > #include <asm/cacheflush.h> > > > -- > > > 2.43.2 > > >
On Mon, May 13, 2024 at 11:36:49AM -0700, Charlie Jenkins wrote: >On Mon, May 13, 2024 at 10:47:25AM -0700, Deepak Gupta wrote: >> On Fri, May 10, 2024 at 02:02:54PM -0700, Charlie Jenkins wrote: >> > On Wed, Apr 03, 2024 at 04:34:58PM -0700, Deepak Gupta wrote: >> > > `arch_calc_vm_prot_bits` is implemented on risc-v to return VM_READ | >> > > VM_WRITE if PROT_WRITE is specified. Similarly `riscv_sys_mmap` is >> > > updated to convert all incoming PROT_WRITE to (PROT_WRITE | PROT_READ). >> > > This is to make sure that any existing apps using PROT_WRITE still work. >> > > >> > > Earlier `protection_map[VM_WRITE]` used to pick read-write PTE encodings. >> > > Now `protection_map[VM_WRITE]` will always pick PAGE_SHADOWSTACK PTE >> > > encodings for shadow stack. Above changes ensure that existing apps >> > > continue to work because underneath kernel will be picking >> > > `protection_map[VM_WRITE|VM_READ]` PTE encodings. >> > > >> > > Signed-off-by: Deepak Gupta <debug@rivosinc.com> >> > > --- >> > > arch/riscv/include/asm/mman.h | 24 ++++++++++++++++++++++++ >> > > arch/riscv/include/asm/pgtable.h | 1 + >> > > arch/riscv/kernel/sys_riscv.c | 11 +++++++++++ >> > > arch/riscv/mm/init.c | 2 +- >> > > mm/mmap.c | 1 + >> > > 5 files changed, 38 insertions(+), 1 deletion(-) >> > > create mode 100644 arch/riscv/include/asm/mman.h >> > > >> > > diff --git a/arch/riscv/include/asm/mman.h b/arch/riscv/include/asm/mman.h >> > > new file mode 100644 >> > > index 000000000000..ef9fedf32546 >> > > --- /dev/null >> > > +++ b/arch/riscv/include/asm/mman.h >> > > @@ -0,0 +1,24 @@ >> > > +/* SPDX-License-Identifier: GPL-2.0 */ >> > > +#ifndef __ASM_MMAN_H__ >> > > +#define __ASM_MMAN_H__ >> > > + >> > > +#include <linux/compiler.h> >> > > +#include <linux/types.h> >> > > +#include <uapi/asm/mman.h> >> > > + >> > > +static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, >> > > + unsigned long pkey __always_unused) >> > > +{ >> > > + unsigned long ret = 0; >> > > + >> > > + /* >> > > + * If PROT_WRITE was specified, force it to VM_READ | VM_WRITE. >> > > + * Only VM_WRITE means shadow stack. >> > > + */ >> > > + if (prot & PROT_WRITE) >> > > + ret = (VM_READ | VM_WRITE); >> > > + return ret; >> > > +} >> > > +#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey) >> > > + >> > > +#endif /* ! __ASM_MMAN_H__ */ >> > > diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h >> > > index 6066822e7396..4d5983bc6766 100644 >> > > --- a/arch/riscv/include/asm/pgtable.h >> > > +++ b/arch/riscv/include/asm/pgtable.h >> > > @@ -184,6 +184,7 @@ extern struct pt_alloc_ops pt_ops __initdata; >> > > #define PAGE_READ_EXEC __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC) >> > > #define PAGE_WRITE_EXEC __pgprot(_PAGE_BASE | _PAGE_READ | \ >> > > _PAGE_EXEC | _PAGE_WRITE) >> > > +#define PAGE_SHADOWSTACK __pgprot(_PAGE_BASE | _PAGE_WRITE) >> > > >> > > #define PAGE_COPY PAGE_READ >> > > #define PAGE_COPY_EXEC PAGE_READ_EXEC >> > > diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c >> > > index f1c1416a9f1e..846c36b1b3d5 100644 >> > > --- a/arch/riscv/kernel/sys_riscv.c >> > > +++ b/arch/riscv/kernel/sys_riscv.c >> > > @@ -8,6 +8,8 @@ >> > > #include <linux/syscalls.h> >> > > #include <asm/cacheflush.h> >> > > #include <asm-generic/mman-common.h> >> > > +#include <vdso/vsyscall.h> >> > > +#include <asm/mman.h> >> > > >> > > static long riscv_sys_mmap(unsigned long addr, unsigned long len, >> > > unsigned long prot, unsigned long flags, >> > > @@ -17,6 +19,15 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len, >> > > if (unlikely(offset & (~PAGE_MASK >> page_shift_offset))) >> > > return -EINVAL; >> > > >> > > + /* >> > > + * If only PROT_WRITE is specified then extend that to PROT_READ >> > > + * protection_map[VM_WRITE] is now going to select shadow stack encodings. >> > > + * So specifying PROT_WRITE actually should select protection_map [VM_WRITE | VM_READ] >> > > + * If user wants to create shadow stack then they should use `map_shadow_stack` syscall. >> > > + */ >> > > + if (unlikely((prot & PROT_WRITE) && !(prot & PROT_READ))) >> > >> > The comments says that this should extend to PROT_READ if only >> > PROT_WRITE is specified. This condition instead is checking if >> > PROT_WRITE is selected but PROT_READ is not. If prot is (VM_EXEC | >> > VM_WRITE) then it would be extended to (VM_EXEC | VM_WRITE | VM_READ). >> > This will not currently cause any issues because these both map to the >> > same value in the protection_map PAGE_COPY_EXEC, however this seems to >> > be not the intention of this change. >> > >> > prot == PROT_WRITE better suits the condition explained in the comment. >> >> If someone specifies this (PROT_EXEC | PROT_WRITE) today, it works because >> of the way permissions are setup in `protection_map`. On risc-v there is no >> way to have a page which is execute and write only. So expectation is that >> if some apps were using `PROT_EXEC | PROT_WRITE` today, they were working >> because internally it was translating to read, write and execute on page >> permissions level. This patch make sure that, it stays same from page >> permissions perspective. >> >> If someone was using PROT_EXEC, it may translate to execute only and this change >> doesn't impact that. >> >> Patch simply looks for presence of `PROT_WRITE` and absence of `PROT_READ` in >> protection flags and if that condition is satisfied, it assumes that caller assumed >> page is going to be read allowed as well. > >The purpose of this change is for compatibility with shadow stack pages >but this affects flags for pages that are not shadow stack pages. >Adding PROT_READ to the other cases is redundant as protection_map >already handles that mapping. Permissions being strictly PROT_WRITE is >the only case that needs to be handled, and is the only case that is >called out in the commit message and in the comment. Yeah that's fine. I can change the commit message or just strictly check for PROT_WRITE. It doesn't change bottomline, I am fine with either option. Let me know your preference. > >- Charlie > >> >> >> > >> > > + prot |= PROT_READ; >> > > + >> > > return ksys_mmap_pgoff(addr, len, prot, flags, fd, >> > > offset >> (PAGE_SHIFT - page_shift_offset)); >> > > } >> > > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c >> > > index fa34cf55037b..98e5ece4052a 100644 >> > > --- a/arch/riscv/mm/init.c >> > > +++ b/arch/riscv/mm/init.c >> > > @@ -299,7 +299,7 @@ pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); >> > > static const pgprot_t protection_map[16] = { >> > > [VM_NONE] = PAGE_NONE, >> > > [VM_READ] = PAGE_READ, >> > > - [VM_WRITE] = PAGE_COPY, >> > > + [VM_WRITE] = PAGE_SHADOWSTACK, >> > > [VM_WRITE | VM_READ] = PAGE_COPY, >> > > [VM_EXEC] = PAGE_EXEC, >> > > [VM_EXEC | VM_READ] = PAGE_READ_EXEC, >> > > diff --git a/mm/mmap.c b/mm/mmap.c >> > > index d89770eaab6b..57a974f49b00 100644 >> > > --- a/mm/mmap.c >> > > +++ b/mm/mmap.c >> > > @@ -47,6 +47,7 @@ >> > > #include <linux/oom.h> >> > > #include <linux/sched/mm.h> >> > > #include <linux/ksm.h> >> > > +#include <linux/processor.h> >> > >> > It doesn't seem like this is necessary for this patch. >> >> Thanks. Yeah it looks like I forgot to remove this over the churn. >> Will fix it. >> >> > >> > - Charlie >> > >> > > >> > > #include <linux/uaccess.h> >> > > #include <asm/cacheflush.h> >> > > -- >> > > 2.43.2 >> > >
On Mon, May 13, 2024 at 11:41:34AM -0700, Deepak Gupta wrote: > On Mon, May 13, 2024 at 11:36:49AM -0700, Charlie Jenkins wrote: > > On Mon, May 13, 2024 at 10:47:25AM -0700, Deepak Gupta wrote: > > > On Fri, May 10, 2024 at 02:02:54PM -0700, Charlie Jenkins wrote: > > > > On Wed, Apr 03, 2024 at 04:34:58PM -0700, Deepak Gupta wrote: > > > > > `arch_calc_vm_prot_bits` is implemented on risc-v to return VM_READ | > > > > > VM_WRITE if PROT_WRITE is specified. Similarly `riscv_sys_mmap` is > > > > > updated to convert all incoming PROT_WRITE to (PROT_WRITE | PROT_READ). > > > > > This is to make sure that any existing apps using PROT_WRITE still work. > > > > > > > > > > Earlier `protection_map[VM_WRITE]` used to pick read-write PTE encodings. > > > > > Now `protection_map[VM_WRITE]` will always pick PAGE_SHADOWSTACK PTE > > > > > encodings for shadow stack. Above changes ensure that existing apps > > > > > continue to work because underneath kernel will be picking > > > > > `protection_map[VM_WRITE|VM_READ]` PTE encodings. > > > > > > > > > > Signed-off-by: Deepak Gupta <debug@rivosinc.com> > > > > > --- > > > > > arch/riscv/include/asm/mman.h | 24 ++++++++++++++++++++++++ > > > > > arch/riscv/include/asm/pgtable.h | 1 + > > > > > arch/riscv/kernel/sys_riscv.c | 11 +++++++++++ > > > > > arch/riscv/mm/init.c | 2 +- > > > > > mm/mmap.c | 1 + > > > > > 5 files changed, 38 insertions(+), 1 deletion(-) > > > > > create mode 100644 arch/riscv/include/asm/mman.h > > > > > > > > > > diff --git a/arch/riscv/include/asm/mman.h b/arch/riscv/include/asm/mman.h > > > > > new file mode 100644 > > > > > index 000000000000..ef9fedf32546 > > > > > --- /dev/null > > > > > +++ b/arch/riscv/include/asm/mman.h > > > > > @@ -0,0 +1,24 @@ > > > > > +/* SPDX-License-Identifier: GPL-2.0 */ > > > > > +#ifndef __ASM_MMAN_H__ > > > > > +#define __ASM_MMAN_H__ > > > > > + > > > > > +#include <linux/compiler.h> > > > > > +#include <linux/types.h> > > > > > +#include <uapi/asm/mman.h> > > > > > + > > > > > +static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, > > > > > + unsigned long pkey __always_unused) > > > > > +{ > > > > > + unsigned long ret = 0; > > > > > + > > > > > + /* > > > > > + * If PROT_WRITE was specified, force it to VM_READ | VM_WRITE. > > > > > + * Only VM_WRITE means shadow stack. > > > > > + */ > > > > > + if (prot & PROT_WRITE) > > > > > + ret = (VM_READ | VM_WRITE); > > > > > + return ret; > > > > > +} > > > > > +#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey) > > > > > + > > > > > +#endif /* ! __ASM_MMAN_H__ */ > > > > > diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h > > > > > index 6066822e7396..4d5983bc6766 100644 > > > > > --- a/arch/riscv/include/asm/pgtable.h > > > > > +++ b/arch/riscv/include/asm/pgtable.h > > > > > @@ -184,6 +184,7 @@ extern struct pt_alloc_ops pt_ops __initdata; > > > > > #define PAGE_READ_EXEC __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC) > > > > > #define PAGE_WRITE_EXEC __pgprot(_PAGE_BASE | _PAGE_READ | \ > > > > > _PAGE_EXEC | _PAGE_WRITE) > > > > > +#define PAGE_SHADOWSTACK __pgprot(_PAGE_BASE | _PAGE_WRITE) > > > > > > > > > > #define PAGE_COPY PAGE_READ > > > > > #define PAGE_COPY_EXEC PAGE_READ_EXEC > > > > > diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c > > > > > index f1c1416a9f1e..846c36b1b3d5 100644 > > > > > --- a/arch/riscv/kernel/sys_riscv.c > > > > > +++ b/arch/riscv/kernel/sys_riscv.c > > > > > @@ -8,6 +8,8 @@ > > > > > #include <linux/syscalls.h> > > > > > #include <asm/cacheflush.h> > > > > > #include <asm-generic/mman-common.h> > > > > > +#include <vdso/vsyscall.h> > > > > > +#include <asm/mman.h> > > > > > > > > > > static long riscv_sys_mmap(unsigned long addr, unsigned long len, > > > > > unsigned long prot, unsigned long flags, > > > > > @@ -17,6 +19,15 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len, > > > > > if (unlikely(offset & (~PAGE_MASK >> page_shift_offset))) > > > > > return -EINVAL; > > > > > > > > > > + /* > > > > > + * If only PROT_WRITE is specified then extend that to PROT_READ > > > > > + * protection_map[VM_WRITE] is now going to select shadow stack encodings. > > > > > + * So specifying PROT_WRITE actually should select protection_map [VM_WRITE | VM_READ] > > > > > + * If user wants to create shadow stack then they should use `map_shadow_stack` syscall. > > > > > + */ > > > > > + if (unlikely((prot & PROT_WRITE) && !(prot & PROT_READ))) > > > > > > > > The comments says that this should extend to PROT_READ if only > > > > PROT_WRITE is specified. This condition instead is checking if > > > > PROT_WRITE is selected but PROT_READ is not. If prot is (VM_EXEC | > > > > VM_WRITE) then it would be extended to (VM_EXEC | VM_WRITE | VM_READ). > > > > This will not currently cause any issues because these both map to the > > > > same value in the protection_map PAGE_COPY_EXEC, however this seems to > > > > be not the intention of this change. > > > > > > > > prot == PROT_WRITE better suits the condition explained in the comment. > > > > > > If someone specifies this (PROT_EXEC | PROT_WRITE) today, it works because > > > of the way permissions are setup in `protection_map`. On risc-v there is no > > > way to have a page which is execute and write only. So expectation is that > > > if some apps were using `PROT_EXEC | PROT_WRITE` today, they were working > > > because internally it was translating to read, write and execute on page > > > permissions level. This patch make sure that, it stays same from page > > > permissions perspective. > > > > > > If someone was using PROT_EXEC, it may translate to execute only and this change > > > doesn't impact that. > > > > > > Patch simply looks for presence of `PROT_WRITE` and absence of `PROT_READ` in > > > protection flags and if that condition is satisfied, it assumes that caller assumed > > > page is going to be read allowed as well. > > > > The purpose of this change is for compatibility with shadow stack pages > > but this affects flags for pages that are not shadow stack pages. > > Adding PROT_READ to the other cases is redundant as protection_map > > already handles that mapping. Permissions being strictly PROT_WRITE is > > the only case that needs to be handled, and is the only case that is > > called out in the commit message and in the comment. > > Yeah that's fine. > I can change the commit message or just strictly check for PROT_WRITE. > It doesn't change bottomline, I am fine with either option. > > Let me know your preference. I would prefer the strict check. This is not critical though so I will support whatever you decide! - Charlie > > > > > - Charlie > > > > > > > > > > > > > > > > > + prot |= PROT_READ; > > > > > + > > > > > return ksys_mmap_pgoff(addr, len, prot, flags, fd, > > > > > offset >> (PAGE_SHIFT - page_shift_offset)); > > > > > } > > > > > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c > > > > > index fa34cf55037b..98e5ece4052a 100644 > > > > > --- a/arch/riscv/mm/init.c > > > > > +++ b/arch/riscv/mm/init.c > > > > > @@ -299,7 +299,7 @@ pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); > > > > > static const pgprot_t protection_map[16] = { > > > > > [VM_NONE] = PAGE_NONE, > > > > > [VM_READ] = PAGE_READ, > > > > > - [VM_WRITE] = PAGE_COPY, > > > > > + [VM_WRITE] = PAGE_SHADOWSTACK, > > > > > [VM_WRITE | VM_READ] = PAGE_COPY, > > > > > [VM_EXEC] = PAGE_EXEC, > > > > > [VM_EXEC | VM_READ] = PAGE_READ_EXEC, > > > > > diff --git a/mm/mmap.c b/mm/mmap.c > > > > > index d89770eaab6b..57a974f49b00 100644 > > > > > --- a/mm/mmap.c > > > > > +++ b/mm/mmap.c > > > > > @@ -47,6 +47,7 @@ > > > > > #include <linux/oom.h> > > > > > #include <linux/sched/mm.h> > > > > > #include <linux/ksm.h> > > > > > +#include <linux/processor.h> > > > > > > > > It doesn't seem like this is necessary for this patch. > > > > > > Thanks. Yeah it looks like I forgot to remove this over the churn. > > > Will fix it. > > > > > > > > > > > - Charlie > > > > > > > > > > > > > > #include <linux/uaccess.h> > > > > > #include <asm/cacheflush.h> > > > > > -- > > > > > 2.43.2 > > > > >
diff --git a/arch/riscv/include/asm/mman.h b/arch/riscv/include/asm/mman.h new file mode 100644 index 000000000000..ef9fedf32546 --- /dev/null +++ b/arch/riscv/include/asm/mman.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_MMAN_H__ +#define __ASM_MMAN_H__ + +#include <linux/compiler.h> +#include <linux/types.h> +#include <uapi/asm/mman.h> + +static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, + unsigned long pkey __always_unused) +{ + unsigned long ret = 0; + + /* + * If PROT_WRITE was specified, force it to VM_READ | VM_WRITE. + * Only VM_WRITE means shadow stack. + */ + if (prot & PROT_WRITE) + ret = (VM_READ | VM_WRITE); + return ret; +} +#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey) + +#endif /* ! __ASM_MMAN_H__ */ diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 6066822e7396..4d5983bc6766 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -184,6 +184,7 @@ extern struct pt_alloc_ops pt_ops __initdata; #define PAGE_READ_EXEC __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC) #define PAGE_WRITE_EXEC __pgprot(_PAGE_BASE | _PAGE_READ | \ _PAGE_EXEC | _PAGE_WRITE) +#define PAGE_SHADOWSTACK __pgprot(_PAGE_BASE | _PAGE_WRITE) #define PAGE_COPY PAGE_READ #define PAGE_COPY_EXEC PAGE_READ_EXEC diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index f1c1416a9f1e..846c36b1b3d5 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -8,6 +8,8 @@ #include <linux/syscalls.h> #include <asm/cacheflush.h> #include <asm-generic/mman-common.h> +#include <vdso/vsyscall.h> +#include <asm/mman.h> static long riscv_sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, @@ -17,6 +19,15 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len, if (unlikely(offset & (~PAGE_MASK >> page_shift_offset))) return -EINVAL; + /* + * If only PROT_WRITE is specified then extend that to PROT_READ + * protection_map[VM_WRITE] is now going to select shadow stack encodings. + * So specifying PROT_WRITE actually should select protection_map [VM_WRITE | VM_READ] + * If user wants to create shadow stack then they should use `map_shadow_stack` syscall. + */ + if (unlikely((prot & PROT_WRITE) && !(prot & PROT_READ))) + prot |= PROT_READ; + return ksys_mmap_pgoff(addr, len, prot, flags, fd, offset >> (PAGE_SHIFT - page_shift_offset)); } diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index fa34cf55037b..98e5ece4052a 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -299,7 +299,7 @@ pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); static const pgprot_t protection_map[16] = { [VM_NONE] = PAGE_NONE, [VM_READ] = PAGE_READ, - [VM_WRITE] = PAGE_COPY, + [VM_WRITE] = PAGE_SHADOWSTACK, [VM_WRITE | VM_READ] = PAGE_COPY, [VM_EXEC] = PAGE_EXEC, [VM_EXEC | VM_READ] = PAGE_READ_EXEC, diff --git a/mm/mmap.c b/mm/mmap.c index d89770eaab6b..57a974f49b00 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -47,6 +47,7 @@ #include <linux/oom.h> #include <linux/sched/mm.h> #include <linux/ksm.h> +#include <linux/processor.h> #include <linux/uaccess.h> #include <asm/cacheflush.h>
`arch_calc_vm_prot_bits` is implemented on risc-v to return VM_READ | VM_WRITE if PROT_WRITE is specified. Similarly `riscv_sys_mmap` is updated to convert all incoming PROT_WRITE to (PROT_WRITE | PROT_READ). This is to make sure that any existing apps using PROT_WRITE still work. Earlier `protection_map[VM_WRITE]` used to pick read-write PTE encodings. Now `protection_map[VM_WRITE]` will always pick PAGE_SHADOWSTACK PTE encodings for shadow stack. Above changes ensure that existing apps continue to work because underneath kernel will be picking `protection_map[VM_WRITE|VM_READ]` PTE encodings. Signed-off-by: Deepak Gupta <debug@rivosinc.com> --- arch/riscv/include/asm/mman.h | 24 ++++++++++++++++++++++++ arch/riscv/include/asm/pgtable.h | 1 + arch/riscv/kernel/sys_riscv.c | 11 +++++++++++ arch/riscv/mm/init.c | 2 +- mm/mmap.c | 1 + 5 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 arch/riscv/include/asm/mman.h