diff mbox series

[5.10,119/199] x86/mmx: Use KFPU_387 for MMX string operations

Message ID 20210125183221.250497496@linuxfoundation.org
State Superseded
Headers show
Series None | expand

Commit Message

Greg Kroah-Hartman Jan. 25, 2021, 6:39 p.m. UTC
From: Andy Lutomirski <luto@kernel.org>

commit 67de8dca50c027ca0fa3b62a488ee5035036a0da upstream.

The default kernel_fpu_begin() doesn't work on systems that support XMM but
haven't yet enabled CR4.OSFXSR.  This causes crashes when _mmx_memcpy() is
called too early because LDMXCSR generates #UD when the aforementioned bit
is clear.

Fix it by using kernel_fpu_begin_mask(KFPU_387) explicitly.

Fixes: 7ad816762f9b ("x86/fpu: Reset MXCSR to default in kernel_fpu_begin()")
Reported-by: Krzysztof Mazur <krzysiek@podlesie.net>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Tested-by: Krzysztof Piotr Olędzki <ole@ans.pl>
Tested-by: Krzysztof Mazur <krzysiek@podlesie.net>
Cc: <stable@vger.kernel.org>
Link: https://lkml.kernel.org/r/e7bf21855fe99e5f3baa27446e32623358f69e8d.1611205691.git.luto@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 arch/x86/lib/mmx_32.c |   20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

Comments

Krzysztof Olędzki Jan. 26, 2021, 3:24 a.m. UTC | #1
Hi,

I think for both 5.4-stable and 5.10-stable we also need 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e45122893a9870813f9bd7b4add4f613e6f29008 
- "x86/fpu: Add kernel_fpu_begin_mask() to selectively initialize state"

Without this, there is no kernel_fpu_begin_mask().

Thanks,
  Krzysztof


On 2021-01-25 at 10:39, Greg Kroah-Hartman wrote:
> From: Andy Lutomirski <luto@kernel.org>

> 

> commit 67de8dca50c027ca0fa3b62a488ee5035036a0da upstream.

> 

> The default kernel_fpu_begin() doesn't work on systems that support XMM but

> haven't yet enabled CR4.OSFXSR.  This causes crashes when _mmx_memcpy() is

> called too early because LDMXCSR generates #UD when the aforementioned bit

> is clear.

> 

> Fix it by using kernel_fpu_begin_mask(KFPU_387) explicitly.

> 

> Fixes: 7ad816762f9b ("x86/fpu: Reset MXCSR to default in kernel_fpu_begin()")

> Reported-by: Krzysztof Mazur <krzysiek@podlesie.net>

> Signed-off-by: Andy Lutomirski <luto@kernel.org>

> Signed-off-by: Borislav Petkov <bp@suse.de>

> Tested-by: Krzysztof Piotr Olędzki <ole@ans.pl>

> Tested-by: Krzysztof Mazur <krzysiek@podlesie.net>

> Cc: <stable@vger.kernel.org>

> Link: https://lkml.kernel.org/r/e7bf21855fe99e5f3baa27446e32623358f69e8d.1611205691.git.luto@kernel.org

> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

> 

> ---

>   arch/x86/lib/mmx_32.c |   20 +++++++++++++++-----

>   1 file changed, 15 insertions(+), 5 deletions(-)

> 

> --- a/arch/x86/lib/mmx_32.c

> +++ b/arch/x86/lib/mmx_32.c

> @@ -26,6 +26,16 @@

>   #include <asm/fpu/api.h>

>   #include <asm/asm.h>

>   

> +/*

> + * Use KFPU_387.  MMX instructions are not affected by MXCSR,

> + * but both AMD and Intel documentation states that even integer MMX

> + * operations will result in #MF if an exception is pending in FCW.

> + *

> + * EMMS is not needed afterwards because, after calling kernel_fpu_end(),

> + * any subsequent user of the 387 stack will reinitialize it using

> + * KFPU_387.

> + */

> +

>   void *_mmx_memcpy(void *to, const void *from, size_t len)

>   {

>   	void *p;

> @@ -37,7 +47,7 @@ void *_mmx_memcpy(void *to, const void *

>   	p = to;

>   	i = len >> 6; /* len/64 */

>   

> -	kernel_fpu_begin();

> +	kernel_fpu_begin_mask(KFPU_387);

>   

>   	__asm__ __volatile__ (

>   		"1: prefetch (%0)\n"		/* This set is 28 bytes */

> @@ -127,7 +137,7 @@ static void fast_clear_page(void *page)

>   {

>   	int i;

>   

> -	kernel_fpu_begin();

> +	kernel_fpu_begin_mask(KFPU_387);

>   

>   	__asm__ __volatile__ (

>   		"  pxor %%mm0, %%mm0\n" : :

> @@ -160,7 +170,7 @@ static void fast_copy_page(void *to, voi

>   {

>   	int i;

>   

> -	kernel_fpu_begin();

> +	kernel_fpu_begin_mask(KFPU_387);

>   

>   	/*

>   	 * maybe the prefetch stuff can go before the expensive fnsave...

> @@ -247,7 +257,7 @@ static void fast_clear_page(void *page)

>   {

>   	int i;

>   

> -	kernel_fpu_begin();

> +	kernel_fpu_begin_mask(KFPU_387);

>   

>   	__asm__ __volatile__ (

>   		"  pxor %%mm0, %%mm0\n" : :

> @@ -282,7 +292,7 @@ static void fast_copy_page(void *to, voi

>   {

>   	int i;

>   

> -	kernel_fpu_begin();

> +	kernel_fpu_begin_mask(KFPU_387);

>   

>   	__asm__ __volatile__ (

>   		"1: prefetch (%0)\n"

> 

>
Greg Kroah-Hartman Jan. 26, 2021, 8:27 a.m. UTC | #2
On Mon, Jan 25, 2021 at 07:24:56PM -0800, Krzysztof Olędzki wrote:
> Hi,

> 

> I think for both 5.4-stable and 5.10-stable we also need https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e45122893a9870813f9bd7b4add4f613e6f29008

> - "x86/fpu: Add kernel_fpu_begin_mask() to selectively initialize state"

> 

> Without this, there is no kernel_fpu_begin_mask().


Thank you, I have now added this to both trees.

greg k-h
diff mbox series

Patch

--- a/arch/x86/lib/mmx_32.c
+++ b/arch/x86/lib/mmx_32.c
@@ -26,6 +26,16 @@ 
 #include <asm/fpu/api.h>
 #include <asm/asm.h>
 
+/*
+ * Use KFPU_387.  MMX instructions are not affected by MXCSR,
+ * but both AMD and Intel documentation states that even integer MMX
+ * operations will result in #MF if an exception is pending in FCW.
+ *
+ * EMMS is not needed afterwards because, after calling kernel_fpu_end(),
+ * any subsequent user of the 387 stack will reinitialize it using
+ * KFPU_387.
+ */
+
 void *_mmx_memcpy(void *to, const void *from, size_t len)
 {
 	void *p;
@@ -37,7 +47,7 @@  void *_mmx_memcpy(void *to, const void *
 	p = to;
 	i = len >> 6; /* len/64 */
 
-	kernel_fpu_begin();
+	kernel_fpu_begin_mask(KFPU_387);
 
 	__asm__ __volatile__ (
 		"1: prefetch (%0)\n"		/* This set is 28 bytes */
@@ -127,7 +137,7 @@  static void fast_clear_page(void *page)
 {
 	int i;
 
-	kernel_fpu_begin();
+	kernel_fpu_begin_mask(KFPU_387);
 
 	__asm__ __volatile__ (
 		"  pxor %%mm0, %%mm0\n" : :
@@ -160,7 +170,7 @@  static void fast_copy_page(void *to, voi
 {
 	int i;
 
-	kernel_fpu_begin();
+	kernel_fpu_begin_mask(KFPU_387);
 
 	/*
 	 * maybe the prefetch stuff can go before the expensive fnsave...
@@ -247,7 +257,7 @@  static void fast_clear_page(void *page)
 {
 	int i;
 
-	kernel_fpu_begin();
+	kernel_fpu_begin_mask(KFPU_387);
 
 	__asm__ __volatile__ (
 		"  pxor %%mm0, %%mm0\n" : :
@@ -282,7 +292,7 @@  static void fast_copy_page(void *to, voi
 {
 	int i;
 
-	kernel_fpu_begin();
+	kernel_fpu_begin_mask(KFPU_387);
 
 	__asm__ __volatile__ (
 		"1: prefetch (%0)\n"