diff mbox series

cpufreq: intel_pstate: Make hwp_notify_lock a raw spinlock

Message ID 20240919081121.10784-2-ukleinek@debian.org
State New
Headers show
Series cpufreq: intel_pstate: Make hwp_notify_lock a raw spinlock | expand

Commit Message

Uwe Kleine-König Sept. 19, 2024, 8:11 a.m. UTC
notify_hwp_interrupt() is called via sysvec_thermal() ->
smp_thermal_vector() -> intel_thermal_interrupt() in hard irq context.
For this reason it must not use a simple spin_lock that sleeps with
PREEMPT_RT enabled. So convert it to a raw spinlock.

Reported-by: xiao sheng wen <atzlinux@sina.com>
Link: https://bugs.debian.org/1076483
Signed-off-by: Uwe Kleine-König <ukleinek@debian.org>
---
Hello,

this is deep in x86 land where I have *very* little experience and
knowledge. Is sysvec_thermal() (defined in arch/x86/kernel/irq.c) really
running in hard irq context? The stacktrace in the Debian bug report
suggests that.

So please double check my reasoning before applying this patch.

Thanks
Uwe

 drivers/cpufreq/intel_pstate.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)


base-commit: 3621a2c9142bd490af0666c0c02d52d60ce0d2a5

Comments

xiao sheng wen(肖盛文) Sept. 23, 2024, 1:53 a.m. UTC | #1
I apply this patch on Debian kernel 6.11-1~exp1 (2024-09-19) source code.

uname -a
Linux atzlinux-ce 6.11-rt-amd64 #1 SMP PREEMPT_RT atzlinux 6.11-1~exp1 
(2024-09-19) x86_64 GNU/Linux

I get the following errors in dmesg:

[    7.273690] ucsi_acpi USBC000:00: possible UCSI driver bug 2
[    7.451822] ucsi_acpi USBC000:00: error -EINVAL: PPM init failed
[    7.537749] ------------[ cut here ]------------
[    7.537753] WARNING: CPU: 12 PID: 96 at kernel/workqueue.c:2259 
delayed_work_timer_fn+0x63/0x90
[    7.537763] Modules linked in: rfcomm cmac algif_hash algif_skcipher 
af_alg snd_seq_dummy snd_hrtimer snd_seq snd_seq_device bnep btusb 
uvcvideo btrtl btintel videobuf2_vmalloc btbcm uvc btmtk 
videobuf2_memops videobuf2_v4l2 bluetooth videodev videobuf2_common mc 
snd_soc_skl_hda_dsp snd_soc_hdac_hdmi snd_sof_probes 
snd_soc_intel_hda_dsp_common binfmt_misc nls_ascii nls_cp437 vfat fat 
snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic 
snd_hda_scodec_component snd_soc_dmic snd_sof_pci_intel_tgl 
snd_sof_pci_intel_cnl snd_sof_intel_hda_generic soundwire_intel 
soundwire_generic_allocation soundwire_cadence snd_sof_intel_hda_common 
snd_sof_intel_hda_mlink snd_sof_intel_hda snd_sof_pci snd_sof_xtensa_dsp 
snd_sof iwlmvm snd_sof_utils intel_uncore_frequency snd_soc_hdac_hda 
intel_uncore_frequency_common snd_soc_acpi_intel_match 
x86_pkg_temp_thermal intel_powerclamp snd_soc_acpi soundwire_bus 
coretemp kvm_intel snd_ctl_led mac80211 snd_soc_avs kvm 
snd_soc_hda_codec snd_hda_ext_core snd_soc_core libarc4
[    7.537811]  crct10dif_pclmul snd_compress snd_pcm_dmaengine 
ghash_clmulni_intel sha512_ssse3 snd_hda_intel sha256_ssse3 sha1_ssse3 
iwlwifi snd_intel_dspcfg snd_intel_sdw_acpi snd_hda_codec aesni_intel 
gf128mul snd_hda_core crypto_simd cryptd rapl mei_hdcp mei_pxp mei_wdt 
snd_hwdep intel_rapl_msr processor_thermal_device_pci intel_cstate 
cfg80211 processor_thermal_device processor_thermal_wt_hint intel_uncore 
snd_pcm thinkpad_acpi think_lmi iTCO_wdt processor_thermal_rfim mei_me 
firmware_attributes_class intel_pmc_bxt processor_thermal_rapl snd_timer 
ucsi_acpi mei iTCO_vendor_support intel_rapl_common nvram typec_ucsi 
wmi_bmof watchdog platform_profile processor_thermal_wt_req pcspkr 
processor_thermal_power_floor snd igen6_edac processor_thermal_mbox 
typec roles soundcore rfkill ac int3403_thermal joydev 
int340x_thermal_zone intel_pmc_core int3400_thermal intel_vsec intel_hid 
pmt_telemetry acpi_thermal_rel sparse_keymap acpi_tad pmt_class acpi_pad 
evdev serio_raw dm_mod loop configfs efi_pstore efivarfs ip_tables
[    7.537861]  x_tables autofs4 ext4 crc16 mbcache jbd2 crc32c_generic 
i915 drm_buddy i2c_algo_bit drm_display_helper cec hid_multitouch 
rc_core hid_generic xhci_pci xhci_hcd ttm i2c_hid_acpi i2c_hid usbcore 
drm_kms_helper hid intel_lpss_pci video nvme intel_lpss crc32_pclmul 
i2c_i801 thunderbolt psmouse drm nvme_core e1000e crc32c_intel idma64 
usb_common i2c_smbus fan button battery wmi
[    7.537887] CPU: 12 UID: 0 PID: 96 Comm: ktimers/12 Not tainted 
6.11-rt-amd64 #1  atzlinux 6.11-1~exp1
[    7.537891] Hardware name: LENOVO 21HD0078CD/21HD0078CD, BIOS 
N3QET47W (1.47 ) 07/18/2024
[    7.537893] RIP: 0010:delayed_work_timer_fn+0x63/0x90
[    7.537898] Code: 65 48 8b 3d 3f ad b4 77 65 8b 05 40 ad b4 77 8b 97 
00 0d 00 00 25 00 00 ff 00 81 e2 00 01 00 00 09 d0 75 06 f6 47 2c 20 75 
0b <0f> 0b 5b 5d 41 5c c3 cc cc cc cc e8 fd 76 00 00 48 85 c0 74 eb 48
[    7.537900] RSP: 0018:ffffbb900046bc98 EFLAGS: 00010006
[    7.537903] RAX: 0000000000000100 RBX: ffff9a6182053000 RCX: 
ffff9a64cf7a35c0
[    7.537904] RDX: 0000000000000100 RSI: ffffffff884eab80 RDI: 
ffff9a6181130000
[    7.537906] RBP: 0000000000002000 R08: ffff9a64cf7a3610 R09: 
ffff9a64cf7a3658
[    7.537907] R10: 0000000000000002 R11: 0000000000000001 R12: 
ffff9a6183bfa900
[    7.537908] R13: dead000000000122 R14: ffffffff884eab80 R15: 
ffff9a6183bfa920
[    7.537910] FS:  0000000000000000(0000) GS:ffff9a64cf600000(0000) 
knlGS:0000000000000000
[    7.537912] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    7.537913] CR2: 0000561c94ed4000 CR3: 000000041d62c000 CR4: 
0000000000f50ef0
[    7.537915] PKRU: 55555554
[    7.537916] Call Trace:
[    7.537918]  <TASK>
[    7.537919]  ? delayed_work_timer_fn+0x63/0x90
[    7.537923]  ? __warn.cold+0x93/0xed
[    7.537926]  ? delayed_work_timer_fn+0x63/0x90
[    7.537933]  ? report_bug+0xff/0x140
[    7.537937]  ? handle_bug+0x3c/0x80
[    7.537942]  ? exc_invalid_op+0x17/0x70
[    7.537945]  ? asm_exc_invalid_op+0x1a/0x20
[    7.537947]  ? __pfx_delayed_work_timer_fn+0x10/0x10
[    7.537951]  ? __pfx_delayed_work_timer_fn+0x10/0x10
[    7.537955]  ? delayed_work_timer_fn+0x63/0x90
[    7.537958]  ? __pfx_delayed_work_timer_fn+0x10/0x10
[    7.537961]  call_timer_fn+0x27/0x120
[    7.537965]  __run_timers+0x18d/0x300
[    7.537969]  timer_expire_remote+0x51/0x80
[    7.537973]  tmigr_handle_remote+0x3cc/0x470
[    7.537979]  handle_softirqs.isra.0+0xb5/0x230
[    7.537982]  ? __pfx_smpboot_thread_fn+0x10/0x10
[    7.537986]  run_timersd+0x3e/0x80
[    7.537988]  smpboot_thread_fn+0xda/0x1d0
[    7.537992]  kthread+0xcf/0x100
[    7.537995]  ? __pfx_kthread+0x10/0x10
[    7.537997]  ret_from_fork+0x31/0x50
[    7.538000]  ? __pfx_kthread+0x10/0x10
[    7.538001]  ret_from_fork_asm+0x1a/0x30
[    7.538006]  </TASK>
[    7.538007] ---[ end trace 0000000000000000 ]---

The whole dmesg is:
https://linux-hardware.org/?probe=28057f9ed2&log=dmesg

My notebook hardware info is:
https://linux-hardware.org/?probe=28057f9ed2

If need do more test, please tell me.

Thanks!

在 2024/9/19 16:11, Uwe Kleine-König 写道:
> notify_hwp_interrupt() is called via sysvec_thermal() ->
> smp_thermal_vector() -> intel_thermal_interrupt() in hard irq context.
> For this reason it must not use a simple spin_lock that sleeps with
> PREEMPT_RT enabled. So convert it to a raw spinlock.
> 
> Reported-by: xiao sheng wen <atzlinux@sina.com>
> Link: https://bugs.debian.org/1076483
> Signed-off-by: Uwe Kleine-König <ukleinek@debian.org>
> ---
> Hello,
> 
> this is deep in x86 land where I have *very* little experience and
> knowledge. Is sysvec_thermal() (defined in arch/x86/kernel/irq.c) really
> running in hard irq context? The stacktrace in the Debian bug report
> suggests that.
> 
> So please double check my reasoning before applying this patch.
> 
> Thanks
> Uwe
> 
>   drivers/cpufreq/intel_pstate.c | 16 ++++++++--------
>   1 file changed, 8 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
> index aaea9a39eced..b0018f371ea3 100644
> --- a/drivers/cpufreq/intel_pstate.c
> +++ b/drivers/cpufreq/intel_pstate.c
> @@ -1845,7 +1845,7 @@ static void intel_pstate_notify_work(struct work_struct *work)
>   	wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0);
>   }
>   
> -static DEFINE_SPINLOCK(hwp_notify_lock);
> +static DEFINE_RAW_SPINLOCK(hwp_notify_lock);
>   static cpumask_t hwp_intr_enable_mask;
>   
>   #define HWP_GUARANTEED_PERF_CHANGE_STATUS      BIT(0)
> @@ -1868,7 +1868,7 @@ void notify_hwp_interrupt(void)
>   	if (!(value & status_mask))
>   		return;
>   
> -	spin_lock_irqsave(&hwp_notify_lock, flags);
> +	raw_spin_lock_irqsave(&hwp_notify_lock, flags);
>   
>   	if (!cpumask_test_cpu(this_cpu, &hwp_intr_enable_mask))
>   		goto ack_intr;
> @@ -1876,13 +1876,13 @@ void notify_hwp_interrupt(void)
>   	schedule_delayed_work(&all_cpu_data[this_cpu]->hwp_notify_work,
>   			      msecs_to_jiffies(10));
>   
> -	spin_unlock_irqrestore(&hwp_notify_lock, flags);
> +	raw_spin_unlock_irqrestore(&hwp_notify_lock, flags);
>   
>   	return;
>   
>   ack_intr:
>   	wrmsrl_safe(MSR_HWP_STATUS, 0);
> -	spin_unlock_irqrestore(&hwp_notify_lock, flags);
> +	raw_spin_unlock_irqrestore(&hwp_notify_lock, flags);
>   }
>   
>   static void intel_pstate_disable_hwp_interrupt(struct cpudata *cpudata)
> @@ -1895,9 +1895,9 @@ static void intel_pstate_disable_hwp_interrupt(struct cpudata *cpudata)
>   	/* wrmsrl_on_cpu has to be outside spinlock as this can result in IPC */
>   	wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
>   
> -	spin_lock_irq(&hwp_notify_lock);
> +	raw_spin_lock_irq(&hwp_notify_lock);
>   	cancel_work = cpumask_test_and_clear_cpu(cpudata->cpu, &hwp_intr_enable_mask);
> -	spin_unlock_irq(&hwp_notify_lock);
> +	raw_spin_unlock_irq(&hwp_notify_lock);
>   
>   	if (cancel_work)
>   		cancel_delayed_work_sync(&cpudata->hwp_notify_work);
> @@ -1912,10 +1912,10 @@ static void intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata)
>   	if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) {
>   		u64 interrupt_mask = HWP_GUARANTEED_PERF_CHANGE_REQ;
>   
> -		spin_lock_irq(&hwp_notify_lock);
> +		raw_spin_lock_irq(&hwp_notify_lock);
>   		INIT_DELAYED_WORK(&cpudata->hwp_notify_work, intel_pstate_notify_work);
>   		cpumask_set_cpu(cpudata->cpu, &hwp_intr_enable_mask);
> -		spin_unlock_irq(&hwp_notify_lock);
> +		raw_spin_unlock_irq(&hwp_notify_lock);
>   
>   		if (cpu_feature_enabled(X86_FEATURE_HWP_HIGHEST_PERF_CHANGE))
>   			interrupt_mask |= HWP_HIGHEST_PERF_CHANGE_REQ;
> 
> base-commit: 3621a2c9142bd490af0666c0c02d52d60ce0d2a5
diff mbox series

Patch

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index aaea9a39eced..b0018f371ea3 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1845,7 +1845,7 @@  static void intel_pstate_notify_work(struct work_struct *work)
 	wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0);
 }
 
-static DEFINE_SPINLOCK(hwp_notify_lock);
+static DEFINE_RAW_SPINLOCK(hwp_notify_lock);
 static cpumask_t hwp_intr_enable_mask;
 
 #define HWP_GUARANTEED_PERF_CHANGE_STATUS      BIT(0)
@@ -1868,7 +1868,7 @@  void notify_hwp_interrupt(void)
 	if (!(value & status_mask))
 		return;
 
-	spin_lock_irqsave(&hwp_notify_lock, flags);
+	raw_spin_lock_irqsave(&hwp_notify_lock, flags);
 
 	if (!cpumask_test_cpu(this_cpu, &hwp_intr_enable_mask))
 		goto ack_intr;
@@ -1876,13 +1876,13 @@  void notify_hwp_interrupt(void)
 	schedule_delayed_work(&all_cpu_data[this_cpu]->hwp_notify_work,
 			      msecs_to_jiffies(10));
 
-	spin_unlock_irqrestore(&hwp_notify_lock, flags);
+	raw_spin_unlock_irqrestore(&hwp_notify_lock, flags);
 
 	return;
 
 ack_intr:
 	wrmsrl_safe(MSR_HWP_STATUS, 0);
-	spin_unlock_irqrestore(&hwp_notify_lock, flags);
+	raw_spin_unlock_irqrestore(&hwp_notify_lock, flags);
 }
 
 static void intel_pstate_disable_hwp_interrupt(struct cpudata *cpudata)
@@ -1895,9 +1895,9 @@  static void intel_pstate_disable_hwp_interrupt(struct cpudata *cpudata)
 	/* wrmsrl_on_cpu has to be outside spinlock as this can result in IPC */
 	wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
 
-	spin_lock_irq(&hwp_notify_lock);
+	raw_spin_lock_irq(&hwp_notify_lock);
 	cancel_work = cpumask_test_and_clear_cpu(cpudata->cpu, &hwp_intr_enable_mask);
-	spin_unlock_irq(&hwp_notify_lock);
+	raw_spin_unlock_irq(&hwp_notify_lock);
 
 	if (cancel_work)
 		cancel_delayed_work_sync(&cpudata->hwp_notify_work);
@@ -1912,10 +1912,10 @@  static void intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata)
 	if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) {
 		u64 interrupt_mask = HWP_GUARANTEED_PERF_CHANGE_REQ;
 
-		spin_lock_irq(&hwp_notify_lock);
+		raw_spin_lock_irq(&hwp_notify_lock);
 		INIT_DELAYED_WORK(&cpudata->hwp_notify_work, intel_pstate_notify_work);
 		cpumask_set_cpu(cpudata->cpu, &hwp_intr_enable_mask);
-		spin_unlock_irq(&hwp_notify_lock);
+		raw_spin_unlock_irq(&hwp_notify_lock);
 
 		if (cpu_feature_enabled(X86_FEATURE_HWP_HIGHEST_PERF_CHANGE))
 			interrupt_mask |= HWP_HIGHEST_PERF_CHANGE_REQ;