Message ID | 20250331082251.3171276-11-xin@zytor.com |
---|---|
State | New |
Headers | show |
Series | MSR refactor with new MSR instructions support | expand |
On Mon, Mar 31, 2025, Xin Li (Intel) wrote: > Signed-off-by: Xin Li (Intel) <xin@zytor.com> > --- > arch/x86/include/asm/msr-index.h | 6 ++++++ > arch/x86/kvm/vmx/vmenter.S | 28 ++++++++++++++++++++++++---- > 2 files changed, 30 insertions(+), 4 deletions(-) > > diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h > index e6134ef2263d..04244c3ba374 100644 > --- a/arch/x86/include/asm/msr-index.h > +++ b/arch/x86/include/asm/msr-index.h > @@ -1226,4 +1226,10 @@ > * a #GP > */ > > +/* Instruction opcode for WRMSRNS supported in binutils >= 2.40 */ > +#define ASM_WRMSRNS _ASM_BYTES(0x0f,0x01,0xc6) > + > +/* Instruction opcode for the immediate form RDMSR/WRMSRNS */ > +#define ASM_WRMSRNS_RAX _ASM_BYTES(0xc4,0xe7,0x7a,0xf6,0xc0) > + > #endif /* _ASM_X86_MSR_INDEX_H */ > diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S > index f6986dee6f8c..9fae43723c44 100644 > --- a/arch/x86/kvm/vmx/vmenter.S > +++ b/arch/x86/kvm/vmx/vmenter.S > @@ -64,6 +64,29 @@ > RET > .endm > > +/* > + * Write EAX to MSR_IA32_SPEC_CTRL. > + * > + * Choose the best WRMSR instruction based on availability. > + * > + * Replace with 'wrmsrns' and 'wrmsrns %rax, $MSR_IA32_SPEC_CTRL' once binutils support them. > + */ > +.macro WRITE_EAX_TO_MSR_IA32_SPEC_CTRL > + ALTERNATIVE_2 __stringify(mov $MSR_IA32_SPEC_CTRL, %ecx; \ > + xor %edx, %edx; \ > + mov %edi, %eax; \ > + ds wrmsr), \ > + __stringify(mov $MSR_IA32_SPEC_CTRL, %ecx; \ > + xor %edx, %edx; \ > + mov %edi, %eax; \ > + ASM_WRMSRNS), \ > + X86_FEATURE_WRMSRNS, \ > + __stringify(xor %_ASM_AX, %_ASM_AX; \ > + mov %edi, %eax; \ > + ASM_WRMSRNS_RAX; .long MSR_IA32_SPEC_CTRL), \ > + X86_FEATURE_MSR_IMM > +.endm This is quite hideous. I have no objection to optimizing __vmx_vcpu_run(), but I would much prefer that a macro like this live in generic code, and that it be generic. It should be easy enough to provide an assembly friendly equivalent to __native_wrmsr_constant(). > + > .section .noinstr.text, "ax" > > /** > @@ -123,10 +146,7 @@ SYM_FUNC_START(__vmx_vcpu_run) > movl PER_CPU_VAR(x86_spec_ctrl_current), %esi > cmp %edi, %esi > je .Lspec_ctrl_done > - mov $MSR_IA32_SPEC_CTRL, %ecx > - xor %edx, %edx > - mov %edi, %eax > - wrmsr > + WRITE_EAX_TO_MSR_IA32_SPEC_CTRL > > .Lspec_ctrl_done: > > -- > 2.49.0 >
On 4/10/2025 4:24 PM, Sean Christopherson wrote: >> +/* >> + * Write EAX to MSR_IA32_SPEC_CTRL. >> + * >> + * Choose the best WRMSR instruction based on availability. >> + * >> + * Replace with 'wrmsrns' and 'wrmsrns %rax, $MSR_IA32_SPEC_CTRL' once binutils support them. >> + */ >> +.macro WRITE_EAX_TO_MSR_IA32_SPEC_CTRL >> + ALTERNATIVE_2 __stringify(mov $MSR_IA32_SPEC_CTRL, %ecx; \ >> + xor %edx, %edx; \ >> + mov %edi, %eax; \ >> + ds wrmsr), \ >> + __stringify(mov $MSR_IA32_SPEC_CTRL, %ecx; \ >> + xor %edx, %edx; \ >> + mov %edi, %eax; \ >> + ASM_WRMSRNS), \ >> + X86_FEATURE_WRMSRNS, \ >> + __stringify(xor %_ASM_AX, %_ASM_AX; \ >> + mov %edi, %eax; \ >> + ASM_WRMSRNS_RAX; .long MSR_IA32_SPEC_CTRL), \ >> + X86_FEATURE_MSR_IMM >> +.endm > This is quite hideous. I have no objection to optimizing __vmx_vcpu_run(), but > I would much prefer that a macro like this live in generic code, and that it be > generic. It should be easy enough to provide an assembly friendly equivalent to > __native_wrmsr_constant(). Will do.
On April 11, 2025 9:18:08 AM PDT, Xin Li <xin@zytor.com> wrote: >On 4/10/2025 4:24 PM, Sean Christopherson wrote: >>> +/* >>> + * Write EAX to MSR_IA32_SPEC_CTRL. >>> + * >>> + * Choose the best WRMSR instruction based on availability. >>> + * >>> + * Replace with 'wrmsrns' and 'wrmsrns %rax, $MSR_IA32_SPEC_CTRL' once binutils support them. >>> + */ >>> +.macro WRITE_EAX_TO_MSR_IA32_SPEC_CTRL >>> + ALTERNATIVE_2 __stringify(mov $MSR_IA32_SPEC_CTRL, %ecx; \ >>> + xor %edx, %edx; \ >>> + mov %edi, %eax; \ >>> + ds wrmsr), \ >>> + __stringify(mov $MSR_IA32_SPEC_CTRL, %ecx; \ >>> + xor %edx, %edx; \ >>> + mov %edi, %eax; \ >>> + ASM_WRMSRNS), \ >>> + X86_FEATURE_WRMSRNS, \ >>> + __stringify(xor %_ASM_AX, %_ASM_AX; \ >>> + mov %edi, %eax; \ >>> + ASM_WRMSRNS_RAX; .long MSR_IA32_SPEC_CTRL), \ >>> + X86_FEATURE_MSR_IMM >>> +.endm >> This is quite hideous. I have no objection to optimizing __vmx_vcpu_run(), but >> I would much prefer that a macro like this live in generic code, and that it be >> generic. It should be easy enough to provide an assembly friendly equivalent to >> __native_wrmsr_constant(). > >Will do. This should be coming anyway, right?
On Thu, Apr 10, 2025 at 4:24 PM Sean Christopherson <seanjc@google.com> wrote: > > On Mon, Mar 31, 2025, Xin Li (Intel) wrote: > > Signed-off-by: Xin Li (Intel) <xin@zytor.com> > > --- > > arch/x86/include/asm/msr-index.h | 6 ++++++ > > arch/x86/kvm/vmx/vmenter.S | 28 ++++++++++++++++++++++++---- > > 2 files changed, 30 insertions(+), 4 deletions(-) > > > > diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h > > index e6134ef2263d..04244c3ba374 100644 > > --- a/arch/x86/include/asm/msr-index.h > > +++ b/arch/x86/include/asm/msr-index.h > > @@ -1226,4 +1226,10 @@ > > * a #GP > > */ > > > > +/* Instruction opcode for WRMSRNS supported in binutils >= 2.40 */ > > +#define ASM_WRMSRNS _ASM_BYTES(0x0f,0x01,0xc6) > > + > > +/* Instruction opcode for the immediate form RDMSR/WRMSRNS */ > > +#define ASM_WRMSRNS_RAX _ASM_BYTES(0xc4,0xe7,0x7a,0xf6,0xc0) > > + > > #endif /* _ASM_X86_MSR_INDEX_H */ > > diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S > > index f6986dee6f8c..9fae43723c44 100644 > > --- a/arch/x86/kvm/vmx/vmenter.S > > +++ b/arch/x86/kvm/vmx/vmenter.S > > @@ -64,6 +64,29 @@ > > RET > > .endm > > > > +/* > > + * Write EAX to MSR_IA32_SPEC_CTRL. > > + * > > + * Choose the best WRMSR instruction based on availability. > > + * > > + * Replace with 'wrmsrns' and 'wrmsrns %rax, $MSR_IA32_SPEC_CTRL' once binutils support them. > > + */ > > +.macro WRITE_EAX_TO_MSR_IA32_SPEC_CTRL > > + ALTERNATIVE_2 __stringify(mov $MSR_IA32_SPEC_CTRL, %ecx; \ > > + xor %edx, %edx; \ > > + mov %edi, %eax; \ > > + ds wrmsr), \ > > + __stringify(mov $MSR_IA32_SPEC_CTRL, %ecx; \ > > + xor %edx, %edx; \ > > + mov %edi, %eax; \ > > + ASM_WRMSRNS), \ > > + X86_FEATURE_WRMSRNS, \ > > + __stringify(xor %_ASM_AX, %_ASM_AX; \ > > + mov %edi, %eax; \ > > + ASM_WRMSRNS_RAX; .long MSR_IA32_SPEC_CTRL), \ > > + X86_FEATURE_MSR_IMM > > +.endm > > This is quite hideous. I have no objection to optimizing __vmx_vcpu_run(), but > I would much prefer that a macro like this live in generic code, and that it be > generic. It should be easy enough to provide an assembly friendly equivalent to > __native_wrmsr_constant(). Surely, any CPU that has WRMSRNS also supports "Virtualize IA32_SPEC_CTRL," right? Shouldn't we be using that feature rather than swapping host and guest values with some form of WRMSR? > > + > > .section .noinstr.text, "ax" > > > > /** > > @@ -123,10 +146,7 @@ SYM_FUNC_START(__vmx_vcpu_run) > > movl PER_CPU_VAR(x86_spec_ctrl_current), %esi > > cmp %edi, %esi > > je .Lspec_ctrl_done > > - mov $MSR_IA32_SPEC_CTRL, %ecx > > - xor %edx, %edx > > - mov %edi, %eax > > - wrmsr > > + WRITE_EAX_TO_MSR_IA32_SPEC_CTRL > > > > .Lspec_ctrl_done: > > > > -- > > 2.49.0 > > >
On 4/11/2025 1:50 PM, H. Peter Anvin wrote: >>> This is quite hideous. I have no objection to optimizing __vmx_vcpu_run(), but >>> I would much prefer that a macro like this live in generic code, and that it be >>> generic. It should be easy enough to provide an assembly friendly equivalent to >>> __native_wrmsr_constant(). >> Will do. > This should be coming anyway, right? Absolutely. Totally stupid me: we have it ready to use here, but ...
On 4/11/2025 2:12 PM, Jim Mattson wrote: > Surely, any CPU that has WRMSRNS also supports "Virtualize > IA32_SPEC_CTRL," right? Shouldn't we be using that feature rather than > swapping host and guest values with some form of WRMSR? Good question, the simple answer is that they are irrelevant.
On April 11, 2025 9:32:32 PM PDT, Xin Li <xin@zytor.com> wrote: >On 4/11/2025 2:12 PM, Jim Mattson wrote: >> Surely, any CPU that has WRMSRNS also supports "Virtualize >> IA32_SPEC_CTRL," right? Shouldn't we be using that feature rather than >> swapping host and guest values with some form of WRMSR? > >Good question, the simple answer is that they are irrelevant. Also, *in this specific case* IA32_SPEC_CTRL is architecturally nonserializing, i.e. WRMSR executes as WRMSRNS anyway.
On 4/12/2025 4:10 PM, H. Peter Anvin wrote:
> Also,*in this specific case* IA32_SPEC_CTRL is architecturally nonserializing, i.e. WRMSR executes as WRMSRNS anyway.
While the immediate form WRMSRNS could be faster because the MSR index
is available *much* earlier in the pipeline, right?
On April 14, 2025 10:48:47 AM PDT, Xin Li <xin@zytor.com> wrote: >On 4/12/2025 4:10 PM, H. Peter Anvin wrote: >> Also,*in this specific case* IA32_SPEC_CTRL is architecturally nonserializing, i.e. WRMSR executes as WRMSRNS anyway. > >While the immediate form WRMSRNS could be faster because the MSR index >is available *much* earlier in the pipeline, right? Yes, but then it would be redundant with the virtualization support.
On 4/14/2025 11:56 PM, H. Peter Anvin wrote: >> arlier in the pipeline, right? > Yes, but then it would be redundant with the virtualization support. > So better to drop this patch then.
On April 15, 2025 10:06:01 AM PDT, Xin Li <xin@zytor.com> wrote: >On 4/14/2025 11:56 PM, H. Peter Anvin wrote: >>> arlier in the pipeline, right? >> Yes, but then it would be redundant with the virtualization support. >> > >So better to drop this patch then. Yeah, if it gets pulled in as a consequence of a global change that is OK but the local change makes no sense.
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index e6134ef2263d..04244c3ba374 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -1226,4 +1226,10 @@ * a #GP */ +/* Instruction opcode for WRMSRNS supported in binutils >= 2.40 */ +#define ASM_WRMSRNS _ASM_BYTES(0x0f,0x01,0xc6) + +/* Instruction opcode for the immediate form RDMSR/WRMSRNS */ +#define ASM_WRMSRNS_RAX _ASM_BYTES(0xc4,0xe7,0x7a,0xf6,0xc0) + #endif /* _ASM_X86_MSR_INDEX_H */ diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index f6986dee6f8c..9fae43723c44 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -64,6 +64,29 @@ RET .endm +/* + * Write EAX to MSR_IA32_SPEC_CTRL. + * + * Choose the best WRMSR instruction based on availability. + * + * Replace with 'wrmsrns' and 'wrmsrns %rax, $MSR_IA32_SPEC_CTRL' once binutils support them. + */ +.macro WRITE_EAX_TO_MSR_IA32_SPEC_CTRL + ALTERNATIVE_2 __stringify(mov $MSR_IA32_SPEC_CTRL, %ecx; \ + xor %edx, %edx; \ + mov %edi, %eax; \ + ds wrmsr), \ + __stringify(mov $MSR_IA32_SPEC_CTRL, %ecx; \ + xor %edx, %edx; \ + mov %edi, %eax; \ + ASM_WRMSRNS), \ + X86_FEATURE_WRMSRNS, \ + __stringify(xor %_ASM_AX, %_ASM_AX; \ + mov %edi, %eax; \ + ASM_WRMSRNS_RAX; .long MSR_IA32_SPEC_CTRL), \ + X86_FEATURE_MSR_IMM +.endm + .section .noinstr.text, "ax" /** @@ -123,10 +146,7 @@ SYM_FUNC_START(__vmx_vcpu_run) movl PER_CPU_VAR(x86_spec_ctrl_current), %esi cmp %edi, %esi je .Lspec_ctrl_done - mov $MSR_IA32_SPEC_CTRL, %ecx - xor %edx, %edx - mov %edi, %eax - wrmsr + WRITE_EAX_TO_MSR_IA32_SPEC_CTRL .Lspec_ctrl_done:
Signed-off-by: Xin Li (Intel) <xin@zytor.com> --- arch/x86/include/asm/msr-index.h | 6 ++++++ arch/x86/kvm/vmx/vmenter.S | 28 ++++++++++++++++++++++++---- 2 files changed, 30 insertions(+), 4 deletions(-)