Message ID | 20240207172646.3981-9-xin3.li@intel.com |
---|---|
State | New |
Headers | show |
Series | Enable FRED with KVM VMX | expand |
On Wed, Feb 07, 2024 at 09:26:28AM -0800, Xin Li wrote: >Initialize host VMCS FRED fields with host FRED MSRs' value and >guest VMCS FRED fields to 0. > >FRED CPU states are managed in 9 new FRED MSRs, as well as a few >existing CPU registers and MSRs, e.g., CR4.FRED. To support FRED >context management, new VMCS fields corresponding to most of FRED >CPU state MSRs are added to both the host-state and guest-state >areas of VMCS. > >Specifically no VMCS fields are added for FRED RSP0 and SSP0 MSRs, >because the 2 FRED MSRs are used during ring 3 event delivery only, >thus KVM, running on ring 0, can run safely even with guest FRED >RSP0 and SSP0. It can be deferred to load host FRED RSP0 and SSP0 >until before returning to user level. > >Signed-off-by: Xin Li <xin3.li@intel.com> >Tested-by: Shan Kang <shan.kang@intel.com> >--- > >Changes since v1: >* Use kvm_cpu_cap_has() instead of cpu_feature_enabled() to decouple > KVM's capability to virtualize a feature and host's enabling of a > feature (Chao Gao). >* Move guest FRED states init into __vmx_vcpu_reset() (Chao Gao). >--- > arch/x86/include/asm/vmx.h | 16 ++++++++++++++++ > arch/x86/kvm/vmx/vmx.c | 34 ++++++++++++++++++++++++++++++++++ > 2 files changed, 50 insertions(+) > >diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h >index cb14f7e315f5..4889754415b5 100644 >--- a/arch/x86/include/asm/vmx.h >+++ b/arch/x86/include/asm/vmx.h >@@ -280,12 +280,28 @@ enum vmcs_field { > GUEST_BNDCFGS_HIGH = 0x00002813, > GUEST_IA32_RTIT_CTL = 0x00002814, > GUEST_IA32_RTIT_CTL_HIGH = 0x00002815, >+ GUEST_IA32_FRED_CONFIG = 0x0000281a, >+ GUEST_IA32_FRED_RSP1 = 0x0000281c, >+ GUEST_IA32_FRED_RSP2 = 0x0000281e, >+ GUEST_IA32_FRED_RSP3 = 0x00002820, >+ GUEST_IA32_FRED_STKLVLS = 0x00002822, >+ GUEST_IA32_FRED_SSP1 = 0x00002824, >+ GUEST_IA32_FRED_SSP2 = 0x00002826, >+ GUEST_IA32_FRED_SSP3 = 0x00002828, > HOST_IA32_PAT = 0x00002c00, > HOST_IA32_PAT_HIGH = 0x00002c01, > HOST_IA32_EFER = 0x00002c02, > HOST_IA32_EFER_HIGH = 0x00002c03, > HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04, > HOST_IA32_PERF_GLOBAL_CTRL_HIGH = 0x00002c05, >+ HOST_IA32_FRED_CONFIG = 0x00002c08, >+ HOST_IA32_FRED_RSP1 = 0x00002c0a, >+ HOST_IA32_FRED_RSP2 = 0x00002c0c, >+ HOST_IA32_FRED_RSP3 = 0x00002c0e, >+ HOST_IA32_FRED_STKLVLS = 0x00002c10, >+ HOST_IA32_FRED_SSP1 = 0x00002c12, >+ HOST_IA32_FRED_SSP2 = 0x00002c14, >+ HOST_IA32_FRED_SSP3 = 0x00002c16, > PIN_BASED_VM_EXEC_CONTROL = 0x00004000, > CPU_BASED_VM_EXEC_CONTROL = 0x00004002, > EXCEPTION_BITMAP = 0x00004004, >diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c >index d58ed2d3d379..b7b772183ee4 100644 >--- a/arch/x86/kvm/vmx/vmx.c >+++ b/arch/x86/kvm/vmx/vmx.c >@@ -1470,6 +1470,18 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu, > (unsigned long)(cpu_entry_stack(cpu) + 1)); > } > >+#ifdef CONFIG_X86_64 is this #ifdeffery neccesary? I assume kvm_cpu_cap_has(X86_FEATURE_FRED) is always false for !CONFIG_X86_64. Looks most of FRED changes in core kernel don't have such #ifdeffery. >+ /* Per-CPU FRED MSRs */ Please explain why these six MSRs are updated here and why only they are updated in this comment. >+ if (kvm_cpu_cap_has(X86_FEATURE_FRED)) { >+ vmcs_write64(HOST_IA32_FRED_RSP1, read_msr(MSR_IA32_FRED_RSP1)); >+ vmcs_write64(HOST_IA32_FRED_RSP2, read_msr(MSR_IA32_FRED_RSP2)); >+ vmcs_write64(HOST_IA32_FRED_RSP3, read_msr(MSR_IA32_FRED_RSP3)); >+ vmcs_write64(HOST_IA32_FRED_SSP1, read_msr(MSR_IA32_FRED_SSP1)); >+ vmcs_write64(HOST_IA32_FRED_SSP2, read_msr(MSR_IA32_FRED_SSP2)); >+ vmcs_write64(HOST_IA32_FRED_SSP3, read_msr(MSR_IA32_FRED_SSP3)); >+ } >+#endif >+ > vmx->loaded_vmcs->cpu = cpu; > } > } >@@ -4321,6 +4333,15 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx) > */ > vmcs_write16(HOST_DS_SELECTOR, 0); > vmcs_write16(HOST_ES_SELECTOR, 0); >+ >+ /* >+ * FRED MSRs are per-cpu, however FRED CONFIG and STKLVLS MSRs >+ * are the same on all CPUs, thus they are initialized here. >+ */ >+ if (kvm_cpu_cap_has(X86_FEATURE_FRED)) { >+ vmcs_write64(HOST_IA32_FRED_CONFIG, read_msr(MSR_IA32_FRED_CONFIG)); >+ vmcs_write64(HOST_IA32_FRED_STKLVLS, read_msr(MSR_IA32_FRED_STKLVLS)); >+ } > #else > vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ > vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */ >@@ -4865,6 +4886,19 @@ static void __vmx_vcpu_reset(struct kvm_vcpu *vcpu) > */ > vmx->pi_desc.nv = POSTED_INTR_VECTOR; > vmx->pi_desc.sn = 1; >+ >+#ifdef CONFIG_X86_64 ditto >+ if (kvm_cpu_cap_has(X86_FEATURE_FRED)) { >+ vmcs_write64(GUEST_IA32_FRED_CONFIG, 0); >+ vmcs_write64(GUEST_IA32_FRED_RSP1, 0); >+ vmcs_write64(GUEST_IA32_FRED_RSP2, 0); >+ vmcs_write64(GUEST_IA32_FRED_RSP3, 0); >+ vmcs_write64(GUEST_IA32_FRED_STKLVLS, 0); >+ vmcs_write64(GUEST_IA32_FRED_SSP1, 0); >+ vmcs_write64(GUEST_IA32_FRED_SSP2, 0); >+ vmcs_write64(GUEST_IA32_FRED_SSP3, 0); >+ } >+#endif > } > > static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) >-- >2.43.0 > >
> >+#ifdef CONFIG_X86_64 > > is this #ifdeffery neccesary? Yes, otherwise build fails on 32 bit. > > I assume kvm_cpu_cap_has(X86_FEATURE_FRED) is always false > for !CONFIG_X86_64. > Looks most of FRED changes in core kernel don't have such #ifdeffery. Because it's not a compile time false, instead false from runtime. > > >+ /* Per-CPU FRED MSRs */ > > Please explain why these six MSRs are updated here and why only they are updated in > this comment. The explanation is kind of implicit "per-CPU", I will make it more explicit. Thanks! Xin
On Wed, Feb 07, 2024, Xin Li wrote: > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > index d58ed2d3d379..b7b772183ee4 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -1470,6 +1470,18 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu, > (unsigned long)(cpu_entry_stack(cpu) + 1)); > } > > +#ifdef CONFIG_X86_64 Don't bother, practically no one cares about 32-bit KVM these days, and I highly don't anyone that runs 32-bit KVM cares about the code footprint to this degree. > + /* Per-CPU FRED MSRs */ > + if (kvm_cpu_cap_has(X86_FEATURE_FRED)) { > + vmcs_write64(HOST_IA32_FRED_RSP1, read_msr(MSR_IA32_FRED_RSP1)); > + vmcs_write64(HOST_IA32_FRED_RSP2, read_msr(MSR_IA32_FRED_RSP2)); > + vmcs_write64(HOST_IA32_FRED_RSP3, read_msr(MSR_IA32_FRED_RSP3)); > + vmcs_write64(HOST_IA32_FRED_SSP1, read_msr(MSR_IA32_FRED_SSP1)); > + vmcs_write64(HOST_IA32_FRED_SSP2, read_msr(MSR_IA32_FRED_SSP2)); > + vmcs_write64(HOST_IA32_FRED_SSP3, read_msr(MSR_IA32_FRED_SSP3)); That's a lot of RDMSRs to eat on every task migration. How hard would it be to add a per-CPU cache for each of these? Or is there a pre-existing way to get at the info that's faster than RDMSR? > + } > +#endif > + > vmx->loaded_vmcs->cpu = cpu; > } > } > @@ -4321,6 +4333,15 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx) > */ > vmcs_write16(HOST_DS_SELECTOR, 0); > vmcs_write16(HOST_ES_SELECTOR, 0); > + > + /* > + * FRED MSRs are per-cpu, however FRED CONFIG and STKLVLS MSRs > + * are the same on all CPUs, thus they are initialized here. Eh, just trim this to: /* FRED CONFIG and STKLVLS are the same on all CPUs. */ > + */ > + if (kvm_cpu_cap_has(X86_FEATURE_FRED)) { > + vmcs_write64(HOST_IA32_FRED_CONFIG, read_msr(MSR_IA32_FRED_CONFIG)); > + vmcs_write64(HOST_IA32_FRED_STKLVLS, read_msr(MSR_IA32_FRED_STKLVLS)); > + } > #else > vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ > vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */ > @@ -4865,6 +4886,19 @@ static void __vmx_vcpu_reset(struct kvm_vcpu *vcpu) > */ > vmx->pi_desc.nv = POSTED_INTR_VECTOR; > vmx->pi_desc.sn = 1; > + > +#ifdef CONFIG_X86_64 > + if (kvm_cpu_cap_has(X86_FEATURE_FRED)) { > + vmcs_write64(GUEST_IA32_FRED_CONFIG, 0); > + vmcs_write64(GUEST_IA32_FRED_RSP1, 0); > + vmcs_write64(GUEST_IA32_FRED_RSP2, 0); > + vmcs_write64(GUEST_IA32_FRED_RSP3, 0); > + vmcs_write64(GUEST_IA32_FRED_STKLVLS, 0); > + vmcs_write64(GUEST_IA32_FRED_SSP1, 0); > + vmcs_write64(GUEST_IA32_FRED_SSP2, 0); > + vmcs_write64(GUEST_IA32_FRED_SSP3, 0); > + } Somewhat of a moot point, but this belongs in init_vmcs(), not __vmx_vcpu_reset().
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index cb14f7e315f5..4889754415b5 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -280,12 +280,28 @@ enum vmcs_field { GUEST_BNDCFGS_HIGH = 0x00002813, GUEST_IA32_RTIT_CTL = 0x00002814, GUEST_IA32_RTIT_CTL_HIGH = 0x00002815, + GUEST_IA32_FRED_CONFIG = 0x0000281a, + GUEST_IA32_FRED_RSP1 = 0x0000281c, + GUEST_IA32_FRED_RSP2 = 0x0000281e, + GUEST_IA32_FRED_RSP3 = 0x00002820, + GUEST_IA32_FRED_STKLVLS = 0x00002822, + GUEST_IA32_FRED_SSP1 = 0x00002824, + GUEST_IA32_FRED_SSP2 = 0x00002826, + GUEST_IA32_FRED_SSP3 = 0x00002828, HOST_IA32_PAT = 0x00002c00, HOST_IA32_PAT_HIGH = 0x00002c01, HOST_IA32_EFER = 0x00002c02, HOST_IA32_EFER_HIGH = 0x00002c03, HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04, HOST_IA32_PERF_GLOBAL_CTRL_HIGH = 0x00002c05, + HOST_IA32_FRED_CONFIG = 0x00002c08, + HOST_IA32_FRED_RSP1 = 0x00002c0a, + HOST_IA32_FRED_RSP2 = 0x00002c0c, + HOST_IA32_FRED_RSP3 = 0x00002c0e, + HOST_IA32_FRED_STKLVLS = 0x00002c10, + HOST_IA32_FRED_SSP1 = 0x00002c12, + HOST_IA32_FRED_SSP2 = 0x00002c14, + HOST_IA32_FRED_SSP3 = 0x00002c16, PIN_BASED_VM_EXEC_CONTROL = 0x00004000, CPU_BASED_VM_EXEC_CONTROL = 0x00004002, EXCEPTION_BITMAP = 0x00004004, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index d58ed2d3d379..b7b772183ee4 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1470,6 +1470,18 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu, (unsigned long)(cpu_entry_stack(cpu) + 1)); } +#ifdef CONFIG_X86_64 + /* Per-CPU FRED MSRs */ + if (kvm_cpu_cap_has(X86_FEATURE_FRED)) { + vmcs_write64(HOST_IA32_FRED_RSP1, read_msr(MSR_IA32_FRED_RSP1)); + vmcs_write64(HOST_IA32_FRED_RSP2, read_msr(MSR_IA32_FRED_RSP2)); + vmcs_write64(HOST_IA32_FRED_RSP3, read_msr(MSR_IA32_FRED_RSP3)); + vmcs_write64(HOST_IA32_FRED_SSP1, read_msr(MSR_IA32_FRED_SSP1)); + vmcs_write64(HOST_IA32_FRED_SSP2, read_msr(MSR_IA32_FRED_SSP2)); + vmcs_write64(HOST_IA32_FRED_SSP3, read_msr(MSR_IA32_FRED_SSP3)); + } +#endif + vmx->loaded_vmcs->cpu = cpu; } } @@ -4321,6 +4333,15 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx) */ vmcs_write16(HOST_DS_SELECTOR, 0); vmcs_write16(HOST_ES_SELECTOR, 0); + + /* + * FRED MSRs are per-cpu, however FRED CONFIG and STKLVLS MSRs + * are the same on all CPUs, thus they are initialized here. + */ + if (kvm_cpu_cap_has(X86_FEATURE_FRED)) { + vmcs_write64(HOST_IA32_FRED_CONFIG, read_msr(MSR_IA32_FRED_CONFIG)); + vmcs_write64(HOST_IA32_FRED_STKLVLS, read_msr(MSR_IA32_FRED_STKLVLS)); + } #else vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */ @@ -4865,6 +4886,19 @@ static void __vmx_vcpu_reset(struct kvm_vcpu *vcpu) */ vmx->pi_desc.nv = POSTED_INTR_VECTOR; vmx->pi_desc.sn = 1; + +#ifdef CONFIG_X86_64 + if (kvm_cpu_cap_has(X86_FEATURE_FRED)) { + vmcs_write64(GUEST_IA32_FRED_CONFIG, 0); + vmcs_write64(GUEST_IA32_FRED_RSP1, 0); + vmcs_write64(GUEST_IA32_FRED_RSP2, 0); + vmcs_write64(GUEST_IA32_FRED_RSP3, 0); + vmcs_write64(GUEST_IA32_FRED_STKLVLS, 0); + vmcs_write64(GUEST_IA32_FRED_SSP1, 0); + vmcs_write64(GUEST_IA32_FRED_SSP2, 0); + vmcs_write64(GUEST_IA32_FRED_SSP3, 0); + } +#endif } static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)