diff mbox series

[RFC,v2,3/5] KVM: SVM: Enable Bus lock threshold exit

Message ID 20241001063413.687787-4-manali.shukla@amd.com
State New
Headers show
Series [RFC,v2,1/5] x86/cpu: Add virt tag in /proc/cpuinfo | expand

Commit Message

Manali Shukla Oct. 1, 2024, 6:34 a.m. UTC
From: Nikunj A Dadhania <nikunj@amd.com>

Virtual machines can exploit bus locks to degrade the performance of
the system. Bus locks can be caused by Non-WB(Write back) and
misaligned locked RMW (Read-modify-Write) instructions and require
systemwide synchronization among all processors which can result into
significant performance penalties.

To address this issue, the Bus Lock Threshold feature is introduced to
provide ability to hypervisor to restrict guests' capability of
initiating mulitple buslocks, thereby preventing system slowdowns.

Support for the buslock threshold is indicated via CPUID function
0x8000000A_EDX[29].

On the processors that support the Bus Lock Threshold feature, the
VMCB provides a Bus Lock Threshold enable bit and an unsigned 16-bit
Bus Lock threshold count.

VMCB intercept bit
VMCB Offset	Bits	Function
14h	        5	Intercept bus lock operations
                        (occurs after guest instruction finishes)

Bus lock threshold count
VMCB Offset	Bits	Function
120h	        15:0	Bus lock counter

When a VMRUN instruction is executed, the bus lock threshold count is
loaded into an internal count register. Before the processor executes
a bus lock in the guest, it checks the value of this register:

 - If the value is greater than '0', the processor successfully
   executes the bus lock and decrements the count.

 - If the value is '0', the bus lock is not executed, and a #VMEXIT to
   the VMM is taken.

The bus lock threshold #VMEXIT is reported to the VMM with the VMEXIT
code A5h, SVM_EXIT_BUS_LOCK.

When SVM_EXIT_BUS_LOCK occurs, the value of the current Bus Lock
Threshold counter, which is '0', is loaded into the VMCB. This value
must be reset to a default greater than '0' in bus_lock_exit handler
in hypervisor, because the behavior of SVM_EXIT_BUS_LOCK is fault
like, so the bus lock exit to userspace  happens with the RIP pointing
to the offending instruction (which generates buslocks).

So, if the value of the Bus Lock Threshold counter remains '0', the
guest continuously exits with SVM_EXIT_BUS_LOCK.

The generated SVM_EXIT_BUS_LOCK in kvm will exit to user space by
setting the KVM_RUN_BUS_LOCK flag in vcpu->run->flags, indicating to
the user space that a bus lock has been detected in the guest.

Use the already available KVM capability KVM_CAP_X86_BUS_LOCK_EXIT to
enable the feature. This feature is disabled by default, it can be
enabled explicitly from user space.

More details about the Bus Lock Threshold feature can be found in AMD
APM [1].

[1]: AMD64 Architecture Programmer's Manual Pub. 24593, April 2024,
     Vol 2, 15.14.5 Bus Lock Threshold.
     https://bugzilla.kernel.org/attachment.cgi?id=306250

Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Co-developed-by: Manali Shukla <manali.shukla@amd.com>
Signed-off-by: Manali Shukla <manali.shukla@amd.com>
---
 arch/x86/include/asm/svm.h      |  5 ++++-
 arch/x86/include/uapi/asm/svm.h |  2 ++
 arch/x86/kvm/svm/nested.c       | 12 ++++++++++++
 arch/x86/kvm/svm/svm.c          | 29 +++++++++++++++++++++++++++++
 4 files changed, 47 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index f0dea3750ca9..bad9723f40e1 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -116,6 +116,7 @@  enum {
 	INTERCEPT_INVPCID,
 	INTERCEPT_MCOMMIT,
 	INTERCEPT_TLBSYNC,
+	INTERCEPT_BUSLOCK,
 };
 
 
@@ -158,7 +159,9 @@  struct __attribute__ ((__packed__)) vmcb_control_area {
 	u64 avic_physical_id;	/* Offset 0xf8 */
 	u8 reserved_7[8];
 	u64 vmsa_pa;		/* Used for an SEV-ES guest */
-	u8 reserved_8[720];
+	u8 reserved_8[16];
+	u16 bus_lock_counter;	/* Offset 0x120 */
+	u8 reserved_9[702];
 	/*
 	 * Offset 0x3e0, 32 bytes reserved
 	 * for use by hypervisor/software.
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index 1814b413fd57..abf6aed88cee 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -95,6 +95,7 @@ 
 #define SVM_EXIT_CR14_WRITE_TRAP		0x09e
 #define SVM_EXIT_CR15_WRITE_TRAP		0x09f
 #define SVM_EXIT_INVPCID       0x0a2
+#define SVM_EXIT_BUS_LOCK			0x0a5
 #define SVM_EXIT_NPF           0x400
 #define SVM_EXIT_AVIC_INCOMPLETE_IPI		0x401
 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS	0x402
@@ -224,6 +225,7 @@ 
 	{ SVM_EXIT_CR4_WRITE_TRAP,	"write_cr4_trap" }, \
 	{ SVM_EXIT_CR8_WRITE_TRAP,	"write_cr8_trap" }, \
 	{ SVM_EXIT_INVPCID,     "invpcid" }, \
+	{ SVM_EXIT_BUS_LOCK,     "buslock" }, \
 	{ SVM_EXIT_NPF,         "npf" }, \
 	{ SVM_EXIT_AVIC_INCOMPLETE_IPI,		"avic_incomplete_ipi" }, \
 	{ SVM_EXIT_AVIC_UNACCELERATED_ACCESS,   "avic_unaccelerated_access" }, \
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 6f704c1037e5..670092d31f77 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -669,6 +669,12 @@  static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
 	vmcb02->control.iopm_base_pa = vmcb01->control.iopm_base_pa;
 	vmcb02->control.msrpm_base_pa = vmcb01->control.msrpm_base_pa;
 
+	/*
+	 * The bus lock threshold count should keep running across nested
+	 * transitions.
+	 * Copied the buslock threshold count from vmcb01 to vmcb02.
+	 */
+	vmcb02->control.bus_lock_counter = vmcb01->control.bus_lock_counter;
 	/* Done at vmrun: asid.  */
 
 	/* Also overwritten later if necessary.  */
@@ -1035,6 +1041,12 @@  int nested_svm_vmexit(struct vcpu_svm *svm)
 
 	}
 
+	/*
+	 * The bus lock threshold count should keep running across nested
+	 * transitions.
+	 * Copied the buslock threshold count from vmcb02 to vmcb01.
+	 */
+	vmcb01->control.bus_lock_counter = vmcb02->control.bus_lock_counter;
 	nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr);
 
 	svm_switch_vmcb(svm, &svm->vmcb01);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 5ab2c92c7331..41c773a40f99 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1372,6 +1372,11 @@  static void init_vmcb(struct kvm_vcpu *vcpu)
 		svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK;
 	}
 
+	if (vcpu->kvm->arch.bus_lock_detection_enabled)
+		svm_set_intercept(svm, INTERCEPT_BUSLOCK);
+	else
+		svm_clr_intercept(svm, INTERCEPT_BUSLOCK);
+
 	if (sev_guest(vcpu->kvm))
 		sev_init_vmcb(svm);
 
@@ -3283,6 +3288,24 @@  static int invpcid_interception(struct kvm_vcpu *vcpu)
 	return kvm_handle_invpcid(vcpu, type, gva);
 }
 
+static int bus_lock_exit(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	vcpu->run->exit_reason = KVM_EXIT_X86_BUS_LOCK;
+	vcpu->run->flags |= KVM_RUN_X86_BUS_LOCK;
+
+	/*
+	 * Reload the counter with value greater than '0'.
+	 * The bus lock exit on SVM happens with RIP pointing to the guilty
+	 * instruction. So, reloading the value of bus_lock_counter to '0'
+	 * results into generating continuous bus lock exits.
+	 */
+	svm->vmcb->control.bus_lock_counter = 1;
+
+	return 0;
+}
+
 static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 	[SVM_EXIT_READ_CR0]			= cr_interception,
 	[SVM_EXIT_READ_CR3]			= cr_interception,
@@ -3350,6 +3373,7 @@  static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 	[SVM_EXIT_CR4_WRITE_TRAP]		= cr_trap,
 	[SVM_EXIT_CR8_WRITE_TRAP]		= cr_trap,
 	[SVM_EXIT_INVPCID]                      = invpcid_interception,
+	[SVM_EXIT_BUS_LOCK]			= bus_lock_exit,
 	[SVM_EXIT_NPF]				= npf_interception,
 	[SVM_EXIT_RSM]                          = rsm_interception,
 	[SVM_EXIT_AVIC_INCOMPLETE_IPI]		= avic_incomplete_ipi_interception,
@@ -5212,6 +5236,11 @@  static __init void svm_set_cpu_caps(void)
 		kvm_cpu_cap_set(X86_FEATURE_SVME_ADDR_CHK);
 	}
 
+	if (cpu_feature_enabled(X86_VIRT_FEATURE_BUS_LOCK_THRESHOLD)) {
+		pr_info("Bus Lock Threashold supported\n");
+		kvm_caps.has_bus_lock_exit = true;
+	}
+
 	/* CPUID 0x80000008 */
 	if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) ||
 	    boot_cpu_has(X86_FEATURE_AMD_SSBD))