Message ID | 20241028160917.1380714-4-alexander.shishkin@linux.intel.com |
---|---|
State | New |
Headers | show |
Series | Enable Linear Address Space Separation support | expand |
On 10/28/24 09:07, Alexander Shishkin wrote: > static void text_poke_memcpy(void *dst, const void *src, size_t len) > { > - memcpy(dst, src, len); > + lass_stac(); > + __inline_memcpy(dst, src, len); > + lass_clac(); > } > > static void text_poke_memset(void *dst, const void *src, size_t len) > { > int c = *(const int *)src; > > - memset(dst, c, len); > + lass_stac(); > + __inline_memset(dst, c, len); > + lass_clac(); > } These are the _only_ users of lass_stac/clac() or the new inlines. First of all, I totally agree that the _existing_ strict objtool behavior around STAC/CLAC is a good idea. But text poking really is special and the context is highly unlikely to result in bugs or exploits. My first instinct here would have been to tell objtool that the text poking code is OK and to relax objtool's STAC/CLAC paranoia here. Looking at objtool, I can see how important it is to keep the STAC/CLAC code as dirt simple and foolproof as possible. I don't see an obvious way to except the text poking code without adding at least some complexity. Basically what I'm asking for is if the goal is to keep objtool simple, please *SAY* that. Because on the surface this doesn't look like a good idea.
On Mon, Oct 28, 2024 at 10:49:07AM -0700, Dave Hansen wrote: > On 10/28/24 09:07, Alexander Shishkin wrote: > > static void text_poke_memcpy(void *dst, const void *src, size_t len) > > { > > - memcpy(dst, src, len); > > + lass_stac(); > > + __inline_memcpy(dst, src, len); > > + lass_clac(); > > } > > > > static void text_poke_memset(void *dst, const void *src, size_t len) > > { > > int c = *(const int *)src; > > > > - memset(dst, c, len); > > + lass_stac(); > > + __inline_memset(dst, c, len); > > + lass_clac(); > > } > > These are the _only_ users of lass_stac/clac() or the new inlines. For now; I have vague memories of running into trouble with compilers doing random things with memcpy before, and having these inline versions gives us more control. One of the cases I remember running into was KASAN, where a compiler is SUPPOSED to issue __asan_memcpy calls instead of the regular memcpy calls, except they weren't all doing that, with the end result that our regular memcpy implementation grew instrumentation to deal with that. That got sorted -- by deprecating / breaking all those non-conformant compilers. But still, I think it would be good to have the option to force a simple inline memcpy when needed. > First of all, I totally agree that the _existing_ strict objtool > behavior around STAC/CLAC is a good idea. > > But text poking really is special and the context is highly unlikely to > result in bugs or exploits. My first instinct here would have been to > tell objtool that the text poking code is OK and to relax objtool's > STAC/CLAC paranoia here. > > Looking at objtool, I can see how important it is to keep the STAC/CLAC > code as dirt simple and foolproof as possible. I don't see an obvious > way to except the text poking code without adding at least some complexity. > > Basically what I'm asking for is if the goal is to keep objtool simple, > please *SAY* that. Because on the surface this doesn't look like a good > idea. There is, you can add it to uaccess_safe_builtin[], but I'm not sure we want to blanked accept memcpy() -- or perhaps that is what you're saying. Anyway, looking at this, I see we grew rep_{movs,stos}_alternative, as used in copy_user_generic() and __clear_user(). Which are all somewhat similar.
On Tue, Oct 29, 2024 at 12:36:11PM +0100, Peter Zijlstra wrote: > Anyway, looking at this, I see we grew rep_{movs,stos}_alternative, as > used in copy_user_generic() and __clear_user(). Which are all somewhat > similar. That is, we could consider something like the completely untested and probably broken, will light your granny on fire and maul pets like below.. --- diff --git a/arch/x86/include/asm/string.h b/arch/x86/include/asm/string.h index 9cb5aae7fba9..e25a988360a1 100644 --- a/arch/x86/include/asm/string.h +++ b/arch/x86/include/asm/string.h @@ -2,31 +2,50 @@ #ifndef _ASM_X86_STRING_H #define _ASM_X86_STRING_H +#include <asm/asm.h> +#include <asm/alternative.h> + #ifdef CONFIG_X86_32 # include <asm/string_32.h> #else # include <asm/string_64.h> #endif +#ifdef CONFIG_X86_64 +#define ALT_64(orig, alt, feat) ALTERNATIVE(orig, alt, feat) +#else +#define ALT_64(orig, alt, feat) orig +#endif + static __always_inline void *__inline_memcpy(void *to, const void *from, size_t len) { void *ret = to; - asm volatile("rep movsb" - : "+D" (to), "+S" (from), "+c" (len) - : : "memory"); - return ret; + asm volatile("1:\n\t" + ALT_64("rep movsb", + "call rep_movs_alternative", ALT_NOT(X86_FEATURE_FSRM)) + "2:\n\t" + _ASM_EXTABLE_UA(1b, 2b) + : "+D" (to), "+S" (from), "+c" (len), ASM_CALL_CONSTRAINT + : : "memory", _ASM_AX); + + return ret + len; } static __always_inline void *__inline_memset(void *s, int v, size_t n) { void *ret = s; - asm volatile("rep stosb" - : "+D" (s), "+c" (n) + asm volatile("1:\n\t" + ALT_64("rep stosb", + "call rep_stos_alternative", ALT_NOT(X86_FEATURE_FSRM)) + "2:\n\t" + _ASM_EXTABLE_UA(1b, 2b) + : "+D" (s), "+c" (n), ASM_CALL_CONSTRAINT : "a" ((uint8_t)v) - : "memory"); - return ret; + : "memory", _ASM_SI); + + return ret + len; } #endif /* _ASM_X86_STRING_H */ diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index b0a887209400..9f2d2c2ca731 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -13,6 +13,7 @@ #include <asm/page.h> #include <asm/percpu.h> #include <asm/runtime-const.h> +#include <asm/string.h> /* * Virtual variable: there's no actual backing store for this, @@ -118,21 +119,12 @@ rep_movs_alternative(void *to, const void *from, unsigned len); static __always_inline __must_check unsigned long copy_user_generic(void *to, const void *from, unsigned long len) { + void *ret; + stac(); - /* - * If CPU has FSRM feature, use 'rep movs'. - * Otherwise, use rep_movs_alternative. - */ - asm volatile( - "1:\n\t" - ALTERNATIVE("rep movsb", - "call rep_movs_alternative", ALT_NOT(X86_FEATURE_FSRM)) - "2:\n" - _ASM_EXTABLE_UA(1b, 2b) - :"+c" (len), "+D" (to), "+S" (from), ASM_CALL_CONSTRAINT - : : "memory", "rax"); + ret = __inline_memcpy(to, from, len); clac(); - return len; + return ret - to; } static __always_inline __must_check unsigned long @@ -178,25 +170,15 @@ rep_stos_alternative(void __user *addr, unsigned long len); static __always_inline __must_check unsigned long __clear_user(void __user *addr, unsigned long size) { - might_fault(); - stac(); + void *ret; - /* - * No memory constraint because it doesn't change any memory gcc - * knows about. - */ - asm volatile( - "1:\n\t" - ALTERNATIVE("rep stosb", - "call rep_stos_alternative", ALT_NOT(X86_FEATURE_FSRS)) - "2:\n" - _ASM_EXTABLE_UA(1b, 2b) - : "+c" (size), "+D" (addr), ASM_CALL_CONSTRAINT - : "a" (0)); + might_fault(); + stac(); + ret = __inline_memset(addr, 0, size); clac(); - return size; + return ret - addr; } static __always_inline unsigned long clear_user(void __user *to, unsigned long n) diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S index 2760a15fbc00..17d4bf6f50e5 100644 --- a/arch/x86/lib/clear_page_64.S +++ b/arch/x86/lib/clear_page_64.S @@ -53,16 +53,22 @@ SYM_FUNC_END(clear_page_erms) EXPORT_SYMBOL_GPL(clear_page_erms) /* - * Default clear user-space. + * Default memset * Input: * rdi destination + * rsi scratch * rcx count - * rax is zero + * al is value * * Output: - * rcx: uncleared bytes or 0 if successful. + * rcx: unset bytes or 0 if successful. */ SYM_FUNC_START(rep_stos_alternative) + + movzbl %al, %rsi + movabs $0x0101010101010101, %rax + mulq %rsi + cmpq $64,%rcx jae .Lunrolled
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index d17518ca19b8..2dc097014c2d 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -1841,16 +1841,24 @@ static inline void unuse_temporary_mm(temp_mm_state_t prev_state) __ro_after_init struct mm_struct *poking_mm; __ro_after_init unsigned long poking_addr; +/* + * poking_init() initializes the text poking address from the lower half of the + * address space. Relax LASS enforcement when accessing the poking address. + */ static void text_poke_memcpy(void *dst, const void *src, size_t len) { - memcpy(dst, src, len); + lass_stac(); + __inline_memcpy(dst, src, len); + lass_clac(); } static void text_poke_memset(void *dst, const void *src, size_t len) { int c = *(const int *)src; - memset(dst, c, len); + lass_stac(); + __inline_memset(dst, c, len); + lass_clac(); } typedef void text_poke_f(void *dst, const void *src, size_t len);