diff mbox series

[v2,08/25] KVM: VMX: Initialize VMCS FRED fields

Message ID 20240207172646.3981-9-xin3.li@intel.com
State New
Headers show
Series Enable FRED with KVM VMX | expand

Commit Message

Li, Xin3 Feb. 7, 2024, 5:26 p.m. UTC
Initialize host VMCS FRED fields with host FRED MSRs' value and
guest VMCS FRED fields to 0.

FRED CPU states are managed in 9 new FRED MSRs, as well as a few
existing CPU registers and MSRs, e.g., CR4.FRED.  To support FRED
context management, new VMCS fields corresponding to most of FRED
CPU state MSRs are added to both the host-state and guest-state
areas of VMCS.

Specifically no VMCS fields are added for FRED RSP0 and SSP0 MSRs,
because the 2 FRED MSRs are used during ring 3 event delivery only,
thus KVM, running on ring 0, can run safely even with guest FRED
RSP0 and SSP0.  It can be deferred to load host FRED RSP0 and SSP0
until before returning to user level.

Signed-off-by: Xin Li <xin3.li@intel.com>
Tested-by: Shan Kang <shan.kang@intel.com>
---

Changes since v1:
* Use kvm_cpu_cap_has() instead of cpu_feature_enabled() to decouple
  KVM's capability to virtualize a feature and host's enabling of a
  feature (Chao Gao).
* Move guest FRED states init into __vmx_vcpu_reset() (Chao Gao).
---
 arch/x86/include/asm/vmx.h | 16 ++++++++++++++++
 arch/x86/kvm/vmx/vmx.c     | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 50 insertions(+)

Comments

Chao Gao April 19, 2024, 2:01 p.m. UTC | #1
On Wed, Feb 07, 2024 at 09:26:28AM -0800, Xin Li wrote:
>Initialize host VMCS FRED fields with host FRED MSRs' value and
>guest VMCS FRED fields to 0.
>
>FRED CPU states are managed in 9 new FRED MSRs, as well as a few
>existing CPU registers and MSRs, e.g., CR4.FRED.  To support FRED
>context management, new VMCS fields corresponding to most of FRED
>CPU state MSRs are added to both the host-state and guest-state
>areas of VMCS.
>
>Specifically no VMCS fields are added for FRED RSP0 and SSP0 MSRs,
>because the 2 FRED MSRs are used during ring 3 event delivery only,
>thus KVM, running on ring 0, can run safely even with guest FRED
>RSP0 and SSP0.  It can be deferred to load host FRED RSP0 and SSP0
>until before returning to user level.
>
>Signed-off-by: Xin Li <xin3.li@intel.com>
>Tested-by: Shan Kang <shan.kang@intel.com>
>---
>
>Changes since v1:
>* Use kvm_cpu_cap_has() instead of cpu_feature_enabled() to decouple
>  KVM's capability to virtualize a feature and host's enabling of a
>  feature (Chao Gao).
>* Move guest FRED states init into __vmx_vcpu_reset() (Chao Gao).
>---
> arch/x86/include/asm/vmx.h | 16 ++++++++++++++++
> arch/x86/kvm/vmx/vmx.c     | 34 ++++++++++++++++++++++++++++++++++
> 2 files changed, 50 insertions(+)
>
>diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
>index cb14f7e315f5..4889754415b5 100644
>--- a/arch/x86/include/asm/vmx.h
>+++ b/arch/x86/include/asm/vmx.h
>@@ -280,12 +280,28 @@ enum vmcs_field {
> 	GUEST_BNDCFGS_HIGH              = 0x00002813,
> 	GUEST_IA32_RTIT_CTL		= 0x00002814,
> 	GUEST_IA32_RTIT_CTL_HIGH	= 0x00002815,
>+	GUEST_IA32_FRED_CONFIG		= 0x0000281a,
>+	GUEST_IA32_FRED_RSP1		= 0x0000281c,
>+	GUEST_IA32_FRED_RSP2		= 0x0000281e,
>+	GUEST_IA32_FRED_RSP3		= 0x00002820,
>+	GUEST_IA32_FRED_STKLVLS		= 0x00002822,
>+	GUEST_IA32_FRED_SSP1		= 0x00002824,
>+	GUEST_IA32_FRED_SSP2		= 0x00002826,
>+	GUEST_IA32_FRED_SSP3		= 0x00002828,
> 	HOST_IA32_PAT			= 0x00002c00,
> 	HOST_IA32_PAT_HIGH		= 0x00002c01,
> 	HOST_IA32_EFER			= 0x00002c02,
> 	HOST_IA32_EFER_HIGH		= 0x00002c03,
> 	HOST_IA32_PERF_GLOBAL_CTRL	= 0x00002c04,
> 	HOST_IA32_PERF_GLOBAL_CTRL_HIGH	= 0x00002c05,
>+	HOST_IA32_FRED_CONFIG		= 0x00002c08,
>+	HOST_IA32_FRED_RSP1		= 0x00002c0a,
>+	HOST_IA32_FRED_RSP2		= 0x00002c0c,
>+	HOST_IA32_FRED_RSP3		= 0x00002c0e,
>+	HOST_IA32_FRED_STKLVLS		= 0x00002c10,
>+	HOST_IA32_FRED_SSP1		= 0x00002c12,
>+	HOST_IA32_FRED_SSP2		= 0x00002c14,
>+	HOST_IA32_FRED_SSP3		= 0x00002c16,
> 	PIN_BASED_VM_EXEC_CONTROL       = 0x00004000,
> 	CPU_BASED_VM_EXEC_CONTROL       = 0x00004002,
> 	EXCEPTION_BITMAP                = 0x00004004,
>diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
>index d58ed2d3d379..b7b772183ee4 100644
>--- a/arch/x86/kvm/vmx/vmx.c
>+++ b/arch/x86/kvm/vmx/vmx.c
>@@ -1470,6 +1470,18 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
> 				    (unsigned long)(cpu_entry_stack(cpu) + 1));
> 		}
> 
>+#ifdef CONFIG_X86_64

is this #ifdeffery neccesary?

I assume kvm_cpu_cap_has(X86_FEATURE_FRED) is always false for !CONFIG_X86_64.
Looks most of FRED changes in core kernel don't have such #ifdeffery.

>+		/* Per-CPU FRED MSRs */

Please explain why these six MSRs are updated here and why only they are updated in this
comment.

>+		if (kvm_cpu_cap_has(X86_FEATURE_FRED)) {
>+			vmcs_write64(HOST_IA32_FRED_RSP1, read_msr(MSR_IA32_FRED_RSP1));
>+			vmcs_write64(HOST_IA32_FRED_RSP2, read_msr(MSR_IA32_FRED_RSP2));
>+			vmcs_write64(HOST_IA32_FRED_RSP3, read_msr(MSR_IA32_FRED_RSP3));
>+			vmcs_write64(HOST_IA32_FRED_SSP1, read_msr(MSR_IA32_FRED_SSP1));
>+			vmcs_write64(HOST_IA32_FRED_SSP2, read_msr(MSR_IA32_FRED_SSP2));
>+			vmcs_write64(HOST_IA32_FRED_SSP3, read_msr(MSR_IA32_FRED_SSP3));
>+		}
>+#endif
>+
> 		vmx->loaded_vmcs->cpu = cpu;
> 	}
> }
>@@ -4321,6 +4333,15 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
> 	 */
> 	vmcs_write16(HOST_DS_SELECTOR, 0);
> 	vmcs_write16(HOST_ES_SELECTOR, 0);
>+
>+	/*
>+	 * FRED MSRs are per-cpu, however FRED CONFIG and STKLVLS MSRs
>+	 * are the same on all CPUs, thus they are initialized here.
>+	 */
>+	if (kvm_cpu_cap_has(X86_FEATURE_FRED)) {
>+		vmcs_write64(HOST_IA32_FRED_CONFIG, read_msr(MSR_IA32_FRED_CONFIG));
>+		vmcs_write64(HOST_IA32_FRED_STKLVLS, read_msr(MSR_IA32_FRED_STKLVLS));
>+	}
> #else
> 	vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS);  /* 22.2.4 */
> 	vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS);  /* 22.2.4 */
>@@ -4865,6 +4886,19 @@ static void __vmx_vcpu_reset(struct kvm_vcpu *vcpu)
> 	 */
> 	vmx->pi_desc.nv = POSTED_INTR_VECTOR;
> 	vmx->pi_desc.sn = 1;
>+
>+#ifdef CONFIG_X86_64

ditto

>+	if (kvm_cpu_cap_has(X86_FEATURE_FRED)) {
>+		vmcs_write64(GUEST_IA32_FRED_CONFIG, 0);
>+		vmcs_write64(GUEST_IA32_FRED_RSP1, 0);
>+		vmcs_write64(GUEST_IA32_FRED_RSP2, 0);
>+		vmcs_write64(GUEST_IA32_FRED_RSP3, 0);
>+		vmcs_write64(GUEST_IA32_FRED_STKLVLS, 0);
>+		vmcs_write64(GUEST_IA32_FRED_SSP1, 0);
>+		vmcs_write64(GUEST_IA32_FRED_SSP2, 0);
>+		vmcs_write64(GUEST_IA32_FRED_SSP3, 0);
>+	}
>+#endif
> }
> 
> static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
>-- 
>2.43.0
>
>
Li, Xin3 April 19, 2024, 5:02 p.m. UTC | #2
> >+#ifdef CONFIG_X86_64
> 
> is this #ifdeffery neccesary?

Yes, otherwise build fails on 32 bit.

> 
> I assume kvm_cpu_cap_has(X86_FEATURE_FRED) is always false
> for !CONFIG_X86_64.
> Looks most of FRED changes in core kernel don't have such #ifdeffery.

Because it's not a compile time false, instead false from runtime.

> 
> >+		/* Per-CPU FRED MSRs */
> 
> Please explain why these six MSRs are updated here and why only they are updated in
> this comment.

The explanation is kind of implicit "per-CPU", I will make it more explicit.	

Thanks!
    Xin
Sean Christopherson June 12, 2024, 9:41 p.m. UTC | #3
On Wed, Feb 07, 2024, Xin Li wrote:
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index d58ed2d3d379..b7b772183ee4 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -1470,6 +1470,18 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
>  				    (unsigned long)(cpu_entry_stack(cpu) + 1));
>  		}
>  
> +#ifdef CONFIG_X86_64

Don't bother, practically no one cares about 32-bit KVM these days, and I highly
don't anyone that runs 32-bit KVM cares about the code footprint to this degree.

> +		/* Per-CPU FRED MSRs */
> +		if (kvm_cpu_cap_has(X86_FEATURE_FRED)) {
> +			vmcs_write64(HOST_IA32_FRED_RSP1, read_msr(MSR_IA32_FRED_RSP1));
> +			vmcs_write64(HOST_IA32_FRED_RSP2, read_msr(MSR_IA32_FRED_RSP2));
> +			vmcs_write64(HOST_IA32_FRED_RSP3, read_msr(MSR_IA32_FRED_RSP3));
> +			vmcs_write64(HOST_IA32_FRED_SSP1, read_msr(MSR_IA32_FRED_SSP1));
> +			vmcs_write64(HOST_IA32_FRED_SSP2, read_msr(MSR_IA32_FRED_SSP2));
> +			vmcs_write64(HOST_IA32_FRED_SSP3, read_msr(MSR_IA32_FRED_SSP3));

That's a lot of RDMSRs to eat on every task migration.  How hard would it be to
add a per-CPU cache for each of these?  Or is there a pre-existing way to get at
the info that's faster than RDMSR?

> +		}
> +#endif
> +
>  		vmx->loaded_vmcs->cpu = cpu;
>  	}
>  }
> @@ -4321,6 +4333,15 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
>  	 */
>  	vmcs_write16(HOST_DS_SELECTOR, 0);
>  	vmcs_write16(HOST_ES_SELECTOR, 0);
> +
> +	/*
> +	 * FRED MSRs are per-cpu, however FRED CONFIG and STKLVLS MSRs
> +	 * are the same on all CPUs, thus they are initialized here.

Eh, just trim this to:

	/* FRED CONFIG and STKLVLS are the same on all CPUs. */

> +	 */
> +	if (kvm_cpu_cap_has(X86_FEATURE_FRED)) {
> +		vmcs_write64(HOST_IA32_FRED_CONFIG, read_msr(MSR_IA32_FRED_CONFIG));
> +		vmcs_write64(HOST_IA32_FRED_STKLVLS, read_msr(MSR_IA32_FRED_STKLVLS));
> +	}
>  #else
>  	vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS);  /* 22.2.4 */
>  	vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS);  /* 22.2.4 */
> @@ -4865,6 +4886,19 @@ static void __vmx_vcpu_reset(struct kvm_vcpu *vcpu)
>  	 */
>  	vmx->pi_desc.nv = POSTED_INTR_VECTOR;
>  	vmx->pi_desc.sn = 1;
> +
> +#ifdef CONFIG_X86_64
> +	if (kvm_cpu_cap_has(X86_FEATURE_FRED)) {
> +		vmcs_write64(GUEST_IA32_FRED_CONFIG, 0);
> +		vmcs_write64(GUEST_IA32_FRED_RSP1, 0);
> +		vmcs_write64(GUEST_IA32_FRED_RSP2, 0);
> +		vmcs_write64(GUEST_IA32_FRED_RSP3, 0);
> +		vmcs_write64(GUEST_IA32_FRED_STKLVLS, 0);
> +		vmcs_write64(GUEST_IA32_FRED_SSP1, 0);
> +		vmcs_write64(GUEST_IA32_FRED_SSP2, 0);
> +		vmcs_write64(GUEST_IA32_FRED_SSP3, 0);
> +	}

Somewhat of a moot point, but this belongs in init_vmcs(), not __vmx_vcpu_reset().
diff mbox series

Patch

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index cb14f7e315f5..4889754415b5 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -280,12 +280,28 @@  enum vmcs_field {
 	GUEST_BNDCFGS_HIGH              = 0x00002813,
 	GUEST_IA32_RTIT_CTL		= 0x00002814,
 	GUEST_IA32_RTIT_CTL_HIGH	= 0x00002815,
+	GUEST_IA32_FRED_CONFIG		= 0x0000281a,
+	GUEST_IA32_FRED_RSP1		= 0x0000281c,
+	GUEST_IA32_FRED_RSP2		= 0x0000281e,
+	GUEST_IA32_FRED_RSP3		= 0x00002820,
+	GUEST_IA32_FRED_STKLVLS		= 0x00002822,
+	GUEST_IA32_FRED_SSP1		= 0x00002824,
+	GUEST_IA32_FRED_SSP2		= 0x00002826,
+	GUEST_IA32_FRED_SSP3		= 0x00002828,
 	HOST_IA32_PAT			= 0x00002c00,
 	HOST_IA32_PAT_HIGH		= 0x00002c01,
 	HOST_IA32_EFER			= 0x00002c02,
 	HOST_IA32_EFER_HIGH		= 0x00002c03,
 	HOST_IA32_PERF_GLOBAL_CTRL	= 0x00002c04,
 	HOST_IA32_PERF_GLOBAL_CTRL_HIGH	= 0x00002c05,
+	HOST_IA32_FRED_CONFIG		= 0x00002c08,
+	HOST_IA32_FRED_RSP1		= 0x00002c0a,
+	HOST_IA32_FRED_RSP2		= 0x00002c0c,
+	HOST_IA32_FRED_RSP3		= 0x00002c0e,
+	HOST_IA32_FRED_STKLVLS		= 0x00002c10,
+	HOST_IA32_FRED_SSP1		= 0x00002c12,
+	HOST_IA32_FRED_SSP2		= 0x00002c14,
+	HOST_IA32_FRED_SSP3		= 0x00002c16,
 	PIN_BASED_VM_EXEC_CONTROL       = 0x00004000,
 	CPU_BASED_VM_EXEC_CONTROL       = 0x00004002,
 	EXCEPTION_BITMAP                = 0x00004004,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index d58ed2d3d379..b7b772183ee4 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1470,6 +1470,18 @@  void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
 				    (unsigned long)(cpu_entry_stack(cpu) + 1));
 		}
 
+#ifdef CONFIG_X86_64
+		/* Per-CPU FRED MSRs */
+		if (kvm_cpu_cap_has(X86_FEATURE_FRED)) {
+			vmcs_write64(HOST_IA32_FRED_RSP1, read_msr(MSR_IA32_FRED_RSP1));
+			vmcs_write64(HOST_IA32_FRED_RSP2, read_msr(MSR_IA32_FRED_RSP2));
+			vmcs_write64(HOST_IA32_FRED_RSP3, read_msr(MSR_IA32_FRED_RSP3));
+			vmcs_write64(HOST_IA32_FRED_SSP1, read_msr(MSR_IA32_FRED_SSP1));
+			vmcs_write64(HOST_IA32_FRED_SSP2, read_msr(MSR_IA32_FRED_SSP2));
+			vmcs_write64(HOST_IA32_FRED_SSP3, read_msr(MSR_IA32_FRED_SSP3));
+		}
+#endif
+
 		vmx->loaded_vmcs->cpu = cpu;
 	}
 }
@@ -4321,6 +4333,15 @@  void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
 	 */
 	vmcs_write16(HOST_DS_SELECTOR, 0);
 	vmcs_write16(HOST_ES_SELECTOR, 0);
+
+	/*
+	 * FRED MSRs are per-cpu, however FRED CONFIG and STKLVLS MSRs
+	 * are the same on all CPUs, thus they are initialized here.
+	 */
+	if (kvm_cpu_cap_has(X86_FEATURE_FRED)) {
+		vmcs_write64(HOST_IA32_FRED_CONFIG, read_msr(MSR_IA32_FRED_CONFIG));
+		vmcs_write64(HOST_IA32_FRED_STKLVLS, read_msr(MSR_IA32_FRED_STKLVLS));
+	}
 #else
 	vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS);  /* 22.2.4 */
 	vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS);  /* 22.2.4 */
@@ -4865,6 +4886,19 @@  static void __vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 	 */
 	vmx->pi_desc.nv = POSTED_INTR_VECTOR;
 	vmx->pi_desc.sn = 1;
+
+#ifdef CONFIG_X86_64
+	if (kvm_cpu_cap_has(X86_FEATURE_FRED)) {
+		vmcs_write64(GUEST_IA32_FRED_CONFIG, 0);
+		vmcs_write64(GUEST_IA32_FRED_RSP1, 0);
+		vmcs_write64(GUEST_IA32_FRED_RSP2, 0);
+		vmcs_write64(GUEST_IA32_FRED_RSP3, 0);
+		vmcs_write64(GUEST_IA32_FRED_STKLVLS, 0);
+		vmcs_write64(GUEST_IA32_FRED_SSP1, 0);
+		vmcs_write64(GUEST_IA32_FRED_SSP2, 0);
+		vmcs_write64(GUEST_IA32_FRED_SSP3, 0);
+	}
+#endif
 }
 
 static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)