diff mbox series

cpufreq: intel_pstate: Make hwp_notify_lock a raw spinlock

Message ID 20240919081121.10784-2-ukleinek@debian.org
State Accepted
Commit 8b4865cd904650cbed7f2407e653934c621b8127
Headers show
Series cpufreq: intel_pstate: Make hwp_notify_lock a raw spinlock | expand

Commit Message

Uwe Kleine-König Sept. 19, 2024, 8:11 a.m. UTC
notify_hwp_interrupt() is called via sysvec_thermal() ->
smp_thermal_vector() -> intel_thermal_interrupt() in hard irq context.
For this reason it must not use a simple spin_lock that sleeps with
PREEMPT_RT enabled. So convert it to a raw spinlock.

Reported-by: xiao sheng wen <atzlinux@sina.com>
Link: https://bugs.debian.org/1076483
Signed-off-by: Uwe Kleine-König <ukleinek@debian.org>
---
Hello,

this is deep in x86 land where I have *very* little experience and
knowledge. Is sysvec_thermal() (defined in arch/x86/kernel/irq.c) really
running in hard irq context? The stacktrace in the Debian bug report
suggests that.

So please double check my reasoning before applying this patch.

Thanks
Uwe

 drivers/cpufreq/intel_pstate.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)


base-commit: 3621a2c9142bd490af0666c0c02d52d60ce0d2a5

Comments

xiao sheng wen(肖盛文) Sept. 23, 2024, 1:53 a.m. UTC | #1
I apply this patch on Debian kernel 6.11-1~exp1 (2024-09-19) source code.

uname -a
Linux atzlinux-ce 6.11-rt-amd64 #1 SMP PREEMPT_RT atzlinux 6.11-1~exp1 
(2024-09-19) x86_64 GNU/Linux

I get the following errors in dmesg:

[    7.273690] ucsi_acpi USBC000:00: possible UCSI driver bug 2
[    7.451822] ucsi_acpi USBC000:00: error -EINVAL: PPM init failed
[    7.537749] ------------[ cut here ]------------
[    7.537753] WARNING: CPU: 12 PID: 96 at kernel/workqueue.c:2259 
delayed_work_timer_fn+0x63/0x90
[    7.537763] Modules linked in: rfcomm cmac algif_hash algif_skcipher 
af_alg snd_seq_dummy snd_hrtimer snd_seq snd_seq_device bnep btusb 
uvcvideo btrtl btintel videobuf2_vmalloc btbcm uvc btmtk 
videobuf2_memops videobuf2_v4l2 bluetooth videodev videobuf2_common mc 
snd_soc_skl_hda_dsp snd_soc_hdac_hdmi snd_sof_probes 
snd_soc_intel_hda_dsp_common binfmt_misc nls_ascii nls_cp437 vfat fat 
snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic 
snd_hda_scodec_component snd_soc_dmic snd_sof_pci_intel_tgl 
snd_sof_pci_intel_cnl snd_sof_intel_hda_generic soundwire_intel 
soundwire_generic_allocation soundwire_cadence snd_sof_intel_hda_common 
snd_sof_intel_hda_mlink snd_sof_intel_hda snd_sof_pci snd_sof_xtensa_dsp 
snd_sof iwlmvm snd_sof_utils intel_uncore_frequency snd_soc_hdac_hda 
intel_uncore_frequency_common snd_soc_acpi_intel_match 
x86_pkg_temp_thermal intel_powerclamp snd_soc_acpi soundwire_bus 
coretemp kvm_intel snd_ctl_led mac80211 snd_soc_avs kvm 
snd_soc_hda_codec snd_hda_ext_core snd_soc_core libarc4
[    7.537811]  crct10dif_pclmul snd_compress snd_pcm_dmaengine 
ghash_clmulni_intel sha512_ssse3 snd_hda_intel sha256_ssse3 sha1_ssse3 
iwlwifi snd_intel_dspcfg snd_intel_sdw_acpi snd_hda_codec aesni_intel 
gf128mul snd_hda_core crypto_simd cryptd rapl mei_hdcp mei_pxp mei_wdt 
snd_hwdep intel_rapl_msr processor_thermal_device_pci intel_cstate 
cfg80211 processor_thermal_device processor_thermal_wt_hint intel_uncore 
snd_pcm thinkpad_acpi think_lmi iTCO_wdt processor_thermal_rfim mei_me 
firmware_attributes_class intel_pmc_bxt processor_thermal_rapl snd_timer 
ucsi_acpi mei iTCO_vendor_support intel_rapl_common nvram typec_ucsi 
wmi_bmof watchdog platform_profile processor_thermal_wt_req pcspkr 
processor_thermal_power_floor snd igen6_edac processor_thermal_mbox 
typec roles soundcore rfkill ac int3403_thermal joydev 
int340x_thermal_zone intel_pmc_core int3400_thermal intel_vsec intel_hid 
pmt_telemetry acpi_thermal_rel sparse_keymap acpi_tad pmt_class acpi_pad 
evdev serio_raw dm_mod loop configfs efi_pstore efivarfs ip_tables
[    7.537861]  x_tables autofs4 ext4 crc16 mbcache jbd2 crc32c_generic 
i915 drm_buddy i2c_algo_bit drm_display_helper cec hid_multitouch 
rc_core hid_generic xhci_pci xhci_hcd ttm i2c_hid_acpi i2c_hid usbcore 
drm_kms_helper hid intel_lpss_pci video nvme intel_lpss crc32_pclmul 
i2c_i801 thunderbolt psmouse drm nvme_core e1000e crc32c_intel idma64 
usb_common i2c_smbus fan button battery wmi
[    7.537887] CPU: 12 UID: 0 PID: 96 Comm: ktimers/12 Not tainted 
6.11-rt-amd64 #1  atzlinux 6.11-1~exp1
[    7.537891] Hardware name: LENOVO 21HD0078CD/21HD0078CD, BIOS 
N3QET47W (1.47 ) 07/18/2024
[    7.537893] RIP: 0010:delayed_work_timer_fn+0x63/0x90
[    7.537898] Code: 65 48 8b 3d 3f ad b4 77 65 8b 05 40 ad b4 77 8b 97 
00 0d 00 00 25 00 00 ff 00 81 e2 00 01 00 00 09 d0 75 06 f6 47 2c 20 75 
0b <0f> 0b 5b 5d 41 5c c3 cc cc cc cc e8 fd 76 00 00 48 85 c0 74 eb 48
[    7.537900] RSP: 0018:ffffbb900046bc98 EFLAGS: 00010006
[    7.537903] RAX: 0000000000000100 RBX: ffff9a6182053000 RCX: 
ffff9a64cf7a35c0
[    7.537904] RDX: 0000000000000100 RSI: ffffffff884eab80 RDI: 
ffff9a6181130000
[    7.537906] RBP: 0000000000002000 R08: ffff9a64cf7a3610 R09: 
ffff9a64cf7a3658
[    7.537907] R10: 0000000000000002 R11: 0000000000000001 R12: 
ffff9a6183bfa900
[    7.537908] R13: dead000000000122 R14: ffffffff884eab80 R15: 
ffff9a6183bfa920
[    7.537910] FS:  0000000000000000(0000) GS:ffff9a64cf600000(0000) 
knlGS:0000000000000000
[    7.537912] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    7.537913] CR2: 0000561c94ed4000 CR3: 000000041d62c000 CR4: 
0000000000f50ef0
[    7.537915] PKRU: 55555554
[    7.537916] Call Trace:
[    7.537918]  <TASK>
[    7.537919]  ? delayed_work_timer_fn+0x63/0x90
[    7.537923]  ? __warn.cold+0x93/0xed
[    7.537926]  ? delayed_work_timer_fn+0x63/0x90
[    7.537933]  ? report_bug+0xff/0x140
[    7.537937]  ? handle_bug+0x3c/0x80
[    7.537942]  ? exc_invalid_op+0x17/0x70
[    7.537945]  ? asm_exc_invalid_op+0x1a/0x20
[    7.537947]  ? __pfx_delayed_work_timer_fn+0x10/0x10
[    7.537951]  ? __pfx_delayed_work_timer_fn+0x10/0x10
[    7.537955]  ? delayed_work_timer_fn+0x63/0x90
[    7.537958]  ? __pfx_delayed_work_timer_fn+0x10/0x10
[    7.537961]  call_timer_fn+0x27/0x120
[    7.537965]  __run_timers+0x18d/0x300
[    7.537969]  timer_expire_remote+0x51/0x80
[    7.537973]  tmigr_handle_remote+0x3cc/0x470
[    7.537979]  handle_softirqs.isra.0+0xb5/0x230
[    7.537982]  ? __pfx_smpboot_thread_fn+0x10/0x10
[    7.537986]  run_timersd+0x3e/0x80
[    7.537988]  smpboot_thread_fn+0xda/0x1d0
[    7.537992]  kthread+0xcf/0x100
[    7.537995]  ? __pfx_kthread+0x10/0x10
[    7.537997]  ret_from_fork+0x31/0x50
[    7.538000]  ? __pfx_kthread+0x10/0x10
[    7.538001]  ret_from_fork_asm+0x1a/0x30
[    7.538006]  </TASK>
[    7.538007] ---[ end trace 0000000000000000 ]---

The whole dmesg is:
https://linux-hardware.org/?probe=28057f9ed2&log=dmesg

My notebook hardware info is:
https://linux-hardware.org/?probe=28057f9ed2

If need do more test, please tell me.

Thanks!

在 2024/9/19 16:11, Uwe Kleine-König 写道:
> notify_hwp_interrupt() is called via sysvec_thermal() ->
> smp_thermal_vector() -> intel_thermal_interrupt() in hard irq context.
> For this reason it must not use a simple spin_lock that sleeps with
> PREEMPT_RT enabled. So convert it to a raw spinlock.
> 
> Reported-by: xiao sheng wen <atzlinux@sina.com>
> Link: https://bugs.debian.org/1076483
> Signed-off-by: Uwe Kleine-König <ukleinek@debian.org>
> ---
> Hello,
> 
> this is deep in x86 land where I have *very* little experience and
> knowledge. Is sysvec_thermal() (defined in arch/x86/kernel/irq.c) really
> running in hard irq context? The stacktrace in the Debian bug report
> suggests that.
> 
> So please double check my reasoning before applying this patch.
> 
> Thanks
> Uwe
> 
>   drivers/cpufreq/intel_pstate.c | 16 ++++++++--------
>   1 file changed, 8 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
> index aaea9a39eced..b0018f371ea3 100644
> --- a/drivers/cpufreq/intel_pstate.c
> +++ b/drivers/cpufreq/intel_pstate.c
> @@ -1845,7 +1845,7 @@ static void intel_pstate_notify_work(struct work_struct *work)
>   	wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0);
>   }
>   
> -static DEFINE_SPINLOCK(hwp_notify_lock);
> +static DEFINE_RAW_SPINLOCK(hwp_notify_lock);
>   static cpumask_t hwp_intr_enable_mask;
>   
>   #define HWP_GUARANTEED_PERF_CHANGE_STATUS      BIT(0)
> @@ -1868,7 +1868,7 @@ void notify_hwp_interrupt(void)
>   	if (!(value & status_mask))
>   		return;
>   
> -	spin_lock_irqsave(&hwp_notify_lock, flags);
> +	raw_spin_lock_irqsave(&hwp_notify_lock, flags);
>   
>   	if (!cpumask_test_cpu(this_cpu, &hwp_intr_enable_mask))
>   		goto ack_intr;
> @@ -1876,13 +1876,13 @@ void notify_hwp_interrupt(void)
>   	schedule_delayed_work(&all_cpu_data[this_cpu]->hwp_notify_work,
>   			      msecs_to_jiffies(10));
>   
> -	spin_unlock_irqrestore(&hwp_notify_lock, flags);
> +	raw_spin_unlock_irqrestore(&hwp_notify_lock, flags);
>   
>   	return;
>   
>   ack_intr:
>   	wrmsrl_safe(MSR_HWP_STATUS, 0);
> -	spin_unlock_irqrestore(&hwp_notify_lock, flags);
> +	raw_spin_unlock_irqrestore(&hwp_notify_lock, flags);
>   }
>   
>   static void intel_pstate_disable_hwp_interrupt(struct cpudata *cpudata)
> @@ -1895,9 +1895,9 @@ static void intel_pstate_disable_hwp_interrupt(struct cpudata *cpudata)
>   	/* wrmsrl_on_cpu has to be outside spinlock as this can result in IPC */
>   	wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
>   
> -	spin_lock_irq(&hwp_notify_lock);
> +	raw_spin_lock_irq(&hwp_notify_lock);
>   	cancel_work = cpumask_test_and_clear_cpu(cpudata->cpu, &hwp_intr_enable_mask);
> -	spin_unlock_irq(&hwp_notify_lock);
> +	raw_spin_unlock_irq(&hwp_notify_lock);
>   
>   	if (cancel_work)
>   		cancel_delayed_work_sync(&cpudata->hwp_notify_work);
> @@ -1912,10 +1912,10 @@ static void intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata)
>   	if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) {
>   		u64 interrupt_mask = HWP_GUARANTEED_PERF_CHANGE_REQ;
>   
> -		spin_lock_irq(&hwp_notify_lock);
> +		raw_spin_lock_irq(&hwp_notify_lock);
>   		INIT_DELAYED_WORK(&cpudata->hwp_notify_work, intel_pstate_notify_work);
>   		cpumask_set_cpu(cpudata->cpu, &hwp_intr_enable_mask);
> -		spin_unlock_irq(&hwp_notify_lock);
> +		raw_spin_unlock_irq(&hwp_notify_lock);
>   
>   		if (cpu_feature_enabled(X86_FEATURE_HWP_HIGHEST_PERF_CHANGE))
>   			interrupt_mask |= HWP_HIGHEST_PERF_CHANGE_REQ;
> 
> base-commit: 3621a2c9142bd490af0666c0c02d52d60ce0d2a5
Uwe Kleine-König Sept. 26, 2024, 7:48 a.m. UTC | #2
Hello,

On 9/23/24 03:53, xiao sheng wen(肖盛文) wrote:
> I apply this patch on Debian kernel 6.11-1~exp1 (2024-09-19) source code.
> 
> uname -a
> Linux atzlinux-ce 6.11-rt-amd64 #1 SMP PREEMPT_RT atzlinux 6.11-1~exp1 
> (2024-09-19) x86_64 GNU/Linux
> 
> I get the following errors in dmesg:
> 
> [    7.273690] ucsi_acpi USBC000:00: possible UCSI driver bug 2
> [    7.451822] ucsi_acpi USBC000:00: error -EINVAL: PPM init failed
> [    7.537749] ------------[ cut here ]------------
> [    7.537753] WARNING: CPU: 12 PID: 96 at kernel/workqueue.c:2259 
> delayed_work_timer_fn+0x63/0x90
> [    7.537763] Modules linked in: rfcomm cmac algif_hash algif_skcipher 
> af_alg snd_seq_dummy snd_hrtimer snd_seq snd_seq_device bnep btusb 
> uvcvideo btrtl btintel videobuf2_vmalloc btbcm uvc btmtk 
> videobuf2_memops videobuf2_v4l2 bluetooth videodev videobuf2_common mc 
> snd_soc_skl_hda_dsp snd_soc_hdac_hdmi snd_sof_probes 
> snd_soc_intel_hda_dsp_common binfmt_misc nls_ascii nls_cp437 vfat fat 
> snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic 
> snd_hda_scodec_component snd_soc_dmic snd_sof_pci_intel_tgl 
> snd_sof_pci_intel_cnl snd_sof_intel_hda_generic soundwire_intel 
> soundwire_generic_allocation soundwire_cadence snd_sof_intel_hda_common 
> snd_sof_intel_hda_mlink snd_sof_intel_hda snd_sof_pci snd_sof_xtensa_dsp 
> snd_sof iwlmvm snd_sof_utils intel_uncore_frequency snd_soc_hdac_hda 
> intel_uncore_frequency_common snd_soc_acpi_intel_match 
> x86_pkg_temp_thermal intel_powerclamp snd_soc_acpi soundwire_bus 
> coretemp kvm_intel snd_ctl_led mac80211 snd_soc_avs kvm 
> snd_soc_hda_codec snd_hda_ext_core snd_soc_core libarc4
> [    7.537811]  crct10dif_pclmul snd_compress snd_pcm_dmaengine 
> ghash_clmulni_intel sha512_ssse3 snd_hda_intel sha256_ssse3 sha1_ssse3 
> iwlwifi snd_intel_dspcfg snd_intel_sdw_acpi snd_hda_codec aesni_intel 
> gf128mul snd_hda_core crypto_simd cryptd rapl mei_hdcp mei_pxp mei_wdt 
> snd_hwdep intel_rapl_msr processor_thermal_device_pci intel_cstate 
> cfg80211 processor_thermal_device processor_thermal_wt_hint intel_uncore 
> snd_pcm thinkpad_acpi think_lmi iTCO_wdt processor_thermal_rfim mei_me 
> firmware_attributes_class intel_pmc_bxt processor_thermal_rapl snd_timer 
> ucsi_acpi mei iTCO_vendor_support intel_rapl_common nvram typec_ucsi 
> wmi_bmof watchdog platform_profile processor_thermal_wt_req pcspkr 
> processor_thermal_power_floor snd igen6_edac processor_thermal_mbox 
> typec roles soundcore rfkill ac int3403_thermal joydev 
> int340x_thermal_zone intel_pmc_core int3400_thermal intel_vsec intel_hid 
> pmt_telemetry acpi_thermal_rel sparse_keymap acpi_tad pmt_class acpi_pad 
> evdev serio_raw dm_mod loop configfs efi_pstore efivarfs ip_tables
> [    7.537861]  x_tables autofs4 ext4 crc16 mbcache jbd2 crc32c_generic 
> i915 drm_buddy i2c_algo_bit drm_display_helper cec hid_multitouch 
> rc_core hid_generic xhci_pci xhci_hcd ttm i2c_hid_acpi i2c_hid usbcore 
> drm_kms_helper hid intel_lpss_pci video nvme intel_lpss crc32_pclmul 
> i2c_i801 thunderbolt psmouse drm nvme_core e1000e crc32c_intel idma64 
> usb_common i2c_smbus fan button battery wmi
> [    7.537887] CPU: 12 UID: 0 PID: 96 Comm: ktimers/12 Not tainted 6.11- 
> rt-amd64 #1  atzlinux 6.11-1~exp1
> [    7.537891] Hardware name: LENOVO 21HD0078CD/21HD0078CD, BIOS 
> N3QET47W (1.47 ) 07/18/2024
> [    7.537893] RIP: 0010:delayed_work_timer_fn+0x63/0x90
> [    7.537898] Code: 65 48 8b 3d 3f ad b4 77 65 8b 05 40 ad b4 77 8b 97 
> 00 0d 00 00 25 00 00 ff 00 81 e2 00 01 00 00 09 d0 75 06 f6 47 2c 20 75 
> 0b <0f> 0b 5b 5d 41 5c c3 cc cc cc cc e8 fd 76 00 00 48 85 c0 74 eb 48
> [    7.537900] RSP: 0018:ffffbb900046bc98 EFLAGS: 00010006
> [    7.537903] RAX: 0000000000000100 RBX: ffff9a6182053000 RCX: 
> ffff9a64cf7a35c0
> [    7.537904] RDX: 0000000000000100 RSI: ffffffff884eab80 RDI: 
> ffff9a6181130000
> [    7.537906] RBP: 0000000000002000 R08: ffff9a64cf7a3610 R09: 
> ffff9a64cf7a3658
> [    7.537907] R10: 0000000000000002 R11: 0000000000000001 R12: 
> ffff9a6183bfa900
> [    7.537908] R13: dead000000000122 R14: ffffffff884eab80 R15: 
> ffff9a6183bfa920
> [    7.537910] FS:  0000000000000000(0000) GS:ffff9a64cf600000(0000) 
> knlGS:0000000000000000
> [    7.537912] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [    7.537913] CR2: 0000561c94ed4000 CR3: 000000041d62c000 CR4: 
> 0000000000f50ef0
> [    7.537915] PKRU: 55555554
> [    7.537916] Call Trace:
> [    7.537918]  <TASK>
> [    7.537919]  ? delayed_work_timer_fn+0x63/0x90
> [    7.537923]  ? __warn.cold+0x93/0xed
> [    7.537926]  ? delayed_work_timer_fn+0x63/0x90
> [    7.537933]  ? report_bug+0xff/0x140
> [    7.537937]  ? handle_bug+0x3c/0x80
> [    7.537942]  ? exc_invalid_op+0x17/0x70
> [    7.537945]  ? asm_exc_invalid_op+0x1a/0x20
> [    7.537947]  ? __pfx_delayed_work_timer_fn+0x10/0x10
> [    7.537951]  ? __pfx_delayed_work_timer_fn+0x10/0x10
> [    7.537955]  ? delayed_work_timer_fn+0x63/0x90
> [    7.537958]  ? __pfx_delayed_work_timer_fn+0x10/0x10
> [    7.537961]  call_timer_fn+0x27/0x120
> [    7.537965]  __run_timers+0x18d/0x300
> [    7.537969]  timer_expire_remote+0x51/0x80
> [    7.537973]  tmigr_handle_remote+0x3cc/0x470
> [    7.537979]  handle_softirqs.isra.0+0xb5/0x230
> [    7.537982]  ? __pfx_smpboot_thread_fn+0x10/0x10
> [    7.537986]  run_timersd+0x3e/0x80
> [    7.537988]  smpboot_thread_fn+0xda/0x1d0
> [    7.537992]  kthread+0xcf/0x100
> [    7.537995]  ? __pfx_kthread+0x10/0x10
> [    7.537997]  ret_from_fork+0x31/0x50
> [    7.538000]  ? __pfx_kthread+0x10/0x10
> [    7.538001]  ret_from_fork_asm+0x1a/0x30
> [    7.538006]  </TASK>
> [    7.538007] ---[ end trace 0000000000000000 ]---
> 
> The whole dmesg is:
> https://linux-hardware.org/?probe=28057f9ed2&log=dmesg

On a first glance this looks like an unrelated issue to me. Could you 
test with CONFIG_X86_INTEL_PSTATE=n? (If doesn't matter if you do this 
with my patch applied or not, without CONFIG_X86_INTEL_PSTATE the driver 
isn't build.)

Best regards
Uwe
xiao sheng wen(肖盛文) Sept. 27, 2024, 9:43 a.m. UTC | #3
Hello,

在 2024/9/26 15:48, Uwe Kleine-König 写道:
> Hello,
> 
> On 9/23/24 03:53, xiao sheng wen(肖盛文) wrote:
>> I apply this patch on Debian kernel 6.11-1~exp1 (2024-09-19) source code.
>>
>> uname -a
>> Linux atzlinux-ce 6.11-rt-amd64 #1 SMP PREEMPT_RT atzlinux 6.11-1~exp1 
>> (2024-09-19) x86_64 GNU/Linux
>>
>> I get the following errors in dmesg:
>>
>> [    7.273690] ucsi_acpi USBC000:00: possible UCSI driver bug 2
>> [    7.451822] ucsi_acpi USBC000:00: error -EINVAL: PPM init failed
>> [    7.537749] ------------[ cut here ]------------
>> [    7.537753] WARNING: CPU: 12 PID: 96 at kernel/workqueue.c:2259 
>> delayed_work_timer_fn+0x63/0x90
>> [    7.537763] Modules linked in: rfcomm cmac algif_hash 
>> algif_skcipher af_alg snd_seq_dummy snd_hrtimer snd_seq snd_seq_device 
>> bnep btusb uvcvideo btrtl btintel videobuf2_vmalloc btbcm uvc btmtk 
>> videobuf2_memops videobuf2_v4l2 bluetooth videodev videobuf2_common mc 
>> snd_soc_skl_hda_dsp snd_soc_hdac_hdmi snd_sof_probes 
>> snd_soc_intel_hda_dsp_common binfmt_misc nls_ascii nls_cp437 vfat fat 
>> snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic 
>> snd_hda_scodec_component snd_soc_dmic snd_sof_pci_intel_tgl 
>> snd_sof_pci_intel_cnl snd_sof_intel_hda_generic soundwire_intel 
>> soundwire_generic_allocation soundwire_cadence 
>> snd_sof_intel_hda_common snd_sof_intel_hda_mlink snd_sof_intel_hda 
>> snd_sof_pci snd_sof_xtensa_dsp snd_sof iwlmvm snd_sof_utils 
>> intel_uncore_frequency snd_soc_hdac_hda intel_uncore_frequency_common 
>> snd_soc_acpi_intel_match x86_pkg_temp_thermal intel_powerclamp 
>> snd_soc_acpi soundwire_bus coretemp kvm_intel snd_ctl_led mac80211 
>> snd_soc_avs kvm snd_soc_hda_codec snd_hda_ext_core snd_soc_core libarc4
>> [    7.537811]  crct10dif_pclmul snd_compress snd_pcm_dmaengine 
>> ghash_clmulni_intel sha512_ssse3 snd_hda_intel sha256_ssse3 sha1_ssse3 
>> iwlwifi snd_intel_dspcfg snd_intel_sdw_acpi snd_hda_codec aesni_intel 
>> gf128mul snd_hda_core crypto_simd cryptd rapl mei_hdcp mei_pxp mei_wdt 
>> snd_hwdep intel_rapl_msr processor_thermal_device_pci intel_cstate 
>> cfg80211 processor_thermal_device processor_thermal_wt_hint 
>> intel_uncore snd_pcm thinkpad_acpi think_lmi iTCO_wdt 
>> processor_thermal_rfim mei_me firmware_attributes_class intel_pmc_bxt 
>> processor_thermal_rapl snd_timer ucsi_acpi mei iTCO_vendor_support 
>> intel_rapl_common nvram typec_ucsi wmi_bmof watchdog platform_profile 
>> processor_thermal_wt_req pcspkr processor_thermal_power_floor snd 
>> igen6_edac processor_thermal_mbox typec roles soundcore rfkill ac 
>> int3403_thermal joydev int340x_thermal_zone intel_pmc_core 
>> int3400_thermal intel_vsec intel_hid pmt_telemetry acpi_thermal_rel 
>> sparse_keymap acpi_tad pmt_class acpi_pad evdev serio_raw dm_mod loop 
>> configfs efi_pstore efivarfs ip_tables
>> [    7.537861]  x_tables autofs4 ext4 crc16 mbcache jbd2 
>> crc32c_generic i915 drm_buddy i2c_algo_bit drm_display_helper cec 
>> hid_multitouch rc_core hid_generic xhci_pci xhci_hcd ttm i2c_hid_acpi 
>> i2c_hid usbcore drm_kms_helper hid intel_lpss_pci video nvme 
>> intel_lpss crc32_pclmul i2c_i801 thunderbolt psmouse drm nvme_core 
>> e1000e crc32c_intel idma64 usb_common i2c_smbus fan button battery wmi
>> [    7.537887] CPU: 12 UID: 0 PID: 96 Comm: ktimers/12 Not tainted 
>> 6.11- rt-amd64 #1  atzlinux 6.11-1~exp1
>> [    7.537891] Hardware name: LENOVO 21HD0078CD/21HD0078CD, BIOS 
>> N3QET47W (1.47 ) 07/18/2024
>> [    7.537893] RIP: 0010:delayed_work_timer_fn+0x63/0x90
>> [    7.537898] Code: 65 48 8b 3d 3f ad b4 77 65 8b 05 40 ad b4 77 8b 
>> 97 00 0d 00 00 25 00 00 ff 00 81 e2 00 01 00 00 09 d0 75 06 f6 47 2c 
>> 20 75 0b <0f> 0b 5b 5d 41 5c c3 cc cc cc cc e8 fd 76 00 00 48 85 c0 74 
>> eb 48
>> [    7.537900] RSP: 0018:ffffbb900046bc98 EFLAGS: 00010006
>> [    7.537903] RAX: 0000000000000100 RBX: ffff9a6182053000 RCX: 
>> ffff9a64cf7a35c0
>> [    7.537904] RDX: 0000000000000100 RSI: ffffffff884eab80 RDI: 
>> ffff9a6181130000
>> [    7.537906] RBP: 0000000000002000 R08: ffff9a64cf7a3610 R09: 
>> ffff9a64cf7a3658
>> [    7.537907] R10: 0000000000000002 R11: 0000000000000001 R12: 
>> ffff9a6183bfa900
>> [    7.537908] R13: dead000000000122 R14: ffffffff884eab80 R15: 
>> ffff9a6183bfa920
>> [    7.537910] FS:  0000000000000000(0000) GS:ffff9a64cf600000(0000) 
>> knlGS:0000000000000000
>> [    7.537912] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>> [    7.537913] CR2: 0000561c94ed4000 CR3: 000000041d62c000 CR4: 
>> 0000000000f50ef0
>> [    7.537915] PKRU: 55555554
>> [    7.537916] Call Trace:
>> [    7.537918]  <TASK>
>> [    7.537919]  ? delayed_work_timer_fn+0x63/0x90
>> [    7.537923]  ? __warn.cold+0x93/0xed
>> [    7.537926]  ? delayed_work_timer_fn+0x63/0x90
>> [    7.537933]  ? report_bug+0xff/0x140
>> [    7.537937]  ? handle_bug+0x3c/0x80
>> [    7.537942]  ? exc_invalid_op+0x17/0x70
>> [    7.537945]  ? asm_exc_invalid_op+0x1a/0x20
>> [    7.537947]  ? __pfx_delayed_work_timer_fn+0x10/0x10
>> [    7.537951]  ? __pfx_delayed_work_timer_fn+0x10/0x10
>> [    7.537955]  ? delayed_work_timer_fn+0x63/0x90
>> [    7.537958]  ? __pfx_delayed_work_timer_fn+0x10/0x10
>> [    7.537961]  call_timer_fn+0x27/0x120
>> [    7.537965]  __run_timers+0x18d/0x300
>> [    7.537969]  timer_expire_remote+0x51/0x80
>> [    7.537973]  tmigr_handle_remote+0x3cc/0x470
>> [    7.537979]  handle_softirqs.isra.0+0xb5/0x230
>> [    7.537982]  ? __pfx_smpboot_thread_fn+0x10/0x10
>> [    7.537986]  run_timersd+0x3e/0x80
>> [    7.537988]  smpboot_thread_fn+0xda/0x1d0
>> [    7.537992]  kthread+0xcf/0x100
>> [    7.537995]  ? __pfx_kthread+0x10/0x10
>> [    7.537997]  ret_from_fork+0x31/0x50
>> [    7.538000]  ? __pfx_kthread+0x10/0x10
>> [    7.538001]  ret_from_fork_asm+0x1a/0x30
>> [    7.538006]  </TASK>
>> [    7.538007] ---[ end trace 0000000000000000 ]---
>>
>> The whole dmesg is:
>> https://linux-hardware.org/?probe=28057f9ed2&log=dmesg
> 
> On a first glance this looks like an unrelated issue to me. Could you 
> test with CONFIG_X86_INTEL_PSTATE=n? (If doesn't matter if you do this 
> with my patch applied or not, without CONFIG_X86_INTEL_PSTATE the driver 
> isn't build.)
> 
> Best regards
> Uwe
Yes, this WARNING is unrelated issue to your patch.It come from kernel 
[ktimers], so I don't need do test with CONFIG_X86_INTEL_PSTATE=n.

I apply your patch on Debian kernel 6.10.11-1 (2024-09-22) source code.

uname -a
Linux atzlinux-ce 6.10.11-rt-amd64 #1 SMP PREEMPT_RT atzlinux 6.10.11-1 
(2024-09-22) x86_64 GNU/Linux

There is not any errors in dmesg now, work perfect!
IMHO, this patch fix this bug in kernel 6.10.11-1.


Thanks!
Srinivas Pandruvada Sept. 27, 2024, 10:47 a.m. UTC | #4
On Thu, 2024-09-19 at 10:11 +0200, Uwe Kleine-König wrote:
> notify_hwp_interrupt() is called via sysvec_thermal() ->
> smp_thermal_vector() -> intel_thermal_interrupt() in hard irq
> context.
> For this reason it must not use a simple spin_lock that sleeps with
> PREEMPT_RT enabled. So convert it to a raw spinlock.
> 
> Reported-by: xiao sheng wen <atzlinux@sina.com>
> Link: https://bugs.debian.org/1076483
> Signed-off-by: Uwe Kleine-König <ukleinek@debian.org>

Missing Tested-by?

    Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>

> ---
> Hello,
> 
> this is deep in x86 land where I have *very* little experience and
> knowledge. Is sysvec_thermal() (defined in arch/x86/kernel/irq.c)
> really
> running in hard irq context? The stacktrace in the Debian bug report
> suggests that.
> 
> So please double check my reasoning before applying this patch.
> 
This is thermal LVT and runs in hard IRQ context. 

> Thanks
> Uwe
> 
>  drivers/cpufreq/intel_pstate.c | 16 ++++++++--------
>  1 file changed, 8 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/cpufreq/intel_pstate.c
> b/drivers/cpufreq/intel_pstate.c
> index aaea9a39eced..b0018f371ea3 100644
> --- a/drivers/cpufreq/intel_pstate.c
> +++ b/drivers/cpufreq/intel_pstate.c
> @@ -1845,7 +1845,7 @@ static void intel_pstate_notify_work(struct
> work_struct *work)
>  	wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0);
>  }
>  
> -static DEFINE_SPINLOCK(hwp_notify_lock);
> +static DEFINE_RAW_SPINLOCK(hwp_notify_lock);
>  static cpumask_t hwp_intr_enable_mask;
>  
>  #define HWP_GUARANTEED_PERF_CHANGE_STATUS      BIT(0)
> @@ -1868,7 +1868,7 @@ void notify_hwp_interrupt(void)
>  	if (!(value & status_mask))
>  		return;
>  
> -	spin_lock_irqsave(&hwp_notify_lock, flags);
> +	raw_spin_lock_irqsave(&hwp_notify_lock, flags);
>  
>  	if (!cpumask_test_cpu(this_cpu, &hwp_intr_enable_mask))
>  		goto ack_intr;
> @@ -1876,13 +1876,13 @@ void notify_hwp_interrupt(void)
>  	schedule_delayed_work(&all_cpu_data[this_cpu]-
> >hwp_notify_work,
>  			      msecs_to_jiffies(10));
>  
> -	spin_unlock_irqrestore(&hwp_notify_lock, flags);
> +	raw_spin_unlock_irqrestore(&hwp_notify_lock, flags);
>  
>  	return;
>  
>  ack_intr:
>  	wrmsrl_safe(MSR_HWP_STATUS, 0);
> -	spin_unlock_irqrestore(&hwp_notify_lock, flags);
> +	raw_spin_unlock_irqrestore(&hwp_notify_lock, flags);
>  }
>  
>  static void intel_pstate_disable_hwp_interrupt(struct cpudata
> *cpudata)
> @@ -1895,9 +1895,9 @@ static void
> intel_pstate_disable_hwp_interrupt(struct cpudata *cpudata)
>  	/* wrmsrl_on_cpu has to be outside spinlock as this can
> result in IPC */
>  	wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
>  
> -	spin_lock_irq(&hwp_notify_lock);
> +	raw_spin_lock_irq(&hwp_notify_lock);
>  	cancel_work = cpumask_test_and_clear_cpu(cpudata->cpu,
> &hwp_intr_enable_mask);
> -	spin_unlock_irq(&hwp_notify_lock);
> +	raw_spin_unlock_irq(&hwp_notify_lock);
>  
>  	if (cancel_work)
>  		cancel_delayed_work_sync(&cpudata->hwp_notify_work);
> @@ -1912,10 +1912,10 @@ static void
> intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata)
>  	if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) {
>  		u64 interrupt_mask = HWP_GUARANTEED_PERF_CHANGE_REQ;
>  
> -		spin_lock_irq(&hwp_notify_lock);
> +		raw_spin_lock_irq(&hwp_notify_lock);
>  		INIT_DELAYED_WORK(&cpudata->hwp_notify_work,
> intel_pstate_notify_work);
>  		cpumask_set_cpu(cpudata->cpu,
> &hwp_intr_enable_mask);
> -		spin_unlock_irq(&hwp_notify_lock);
> +		raw_spin_unlock_irq(&hwp_notify_lock);
>  
>  		if
> (cpu_feature_enabled(X86_FEATURE_HWP_HIGHEST_PERF_CHANGE))
>  			interrupt_mask |=
> HWP_HIGHEST_PERF_CHANGE_REQ;
> 
> base-commit: 3621a2c9142bd490af0666c0c02d52d60ce0d2a5
Uwe Kleine-König Sept. 28, 2024, 11:51 a.m. UTC | #5
Hello,

On Fri, Sep 27, 2024 at 03:47:08AM -0700, srinivas pandruvada wrote:
> On Thu, 2024-09-19 at 10:11 +0200, Uwe Kleine-König wrote:
> > notify_hwp_interrupt() is called via sysvec_thermal() ->
> > smp_thermal_vector() -> intel_thermal_interrupt() in hard irq
> > context.
> > For this reason it must not use a simple spin_lock that sleeps with
> > PREEMPT_RT enabled. So convert it to a raw spinlock.
> > 
> > Reported-by: xiao sheng wen <atzlinux@sina.com>
> > Link: https://bugs.debian.org/1076483
> > Signed-off-by: Uwe Kleine-König <ukleinek@debian.org>
> 
> Missing Tested-by?

Neither I nor anybody else did test that patch before I sent it to the
list (apart from a build test). I guess xiao sheng wen might have
replied with a Tested-by tag.

>     Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>

Thanks
Uwe
Rafael J. Wysocki Oct. 2, 2024, 12:12 p.m. UTC | #6
Hi Uwe,

On Wed, Oct 2, 2024 at 10:24 AM Uwe Kleine-König <ukleinek@debian.org> wrote:
>
> Hello Rafael,
>
> On 10/1/24 20:39, Rafael J. Wysocki wrote:
> > Applied as 6.12-rc material, thanks!
>
> Thanks. Triggered by this patch not being in today's next I looked at
> git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git and
> didn't find it there. What am I missing?

It's in my bleeding-edge branch where things go before hitting
linux-next so they can receive some build testing coverage upfront.

It'll show up in linux-next tomorrow.
diff mbox series

Patch

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index aaea9a39eced..b0018f371ea3 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1845,7 +1845,7 @@  static void intel_pstate_notify_work(struct work_struct *work)
 	wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0);
 }
 
-static DEFINE_SPINLOCK(hwp_notify_lock);
+static DEFINE_RAW_SPINLOCK(hwp_notify_lock);
 static cpumask_t hwp_intr_enable_mask;
 
 #define HWP_GUARANTEED_PERF_CHANGE_STATUS      BIT(0)
@@ -1868,7 +1868,7 @@  void notify_hwp_interrupt(void)
 	if (!(value & status_mask))
 		return;
 
-	spin_lock_irqsave(&hwp_notify_lock, flags);
+	raw_spin_lock_irqsave(&hwp_notify_lock, flags);
 
 	if (!cpumask_test_cpu(this_cpu, &hwp_intr_enable_mask))
 		goto ack_intr;
@@ -1876,13 +1876,13 @@  void notify_hwp_interrupt(void)
 	schedule_delayed_work(&all_cpu_data[this_cpu]->hwp_notify_work,
 			      msecs_to_jiffies(10));
 
-	spin_unlock_irqrestore(&hwp_notify_lock, flags);
+	raw_spin_unlock_irqrestore(&hwp_notify_lock, flags);
 
 	return;
 
 ack_intr:
 	wrmsrl_safe(MSR_HWP_STATUS, 0);
-	spin_unlock_irqrestore(&hwp_notify_lock, flags);
+	raw_spin_unlock_irqrestore(&hwp_notify_lock, flags);
 }
 
 static void intel_pstate_disable_hwp_interrupt(struct cpudata *cpudata)
@@ -1895,9 +1895,9 @@  static void intel_pstate_disable_hwp_interrupt(struct cpudata *cpudata)
 	/* wrmsrl_on_cpu has to be outside spinlock as this can result in IPC */
 	wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
 
-	spin_lock_irq(&hwp_notify_lock);
+	raw_spin_lock_irq(&hwp_notify_lock);
 	cancel_work = cpumask_test_and_clear_cpu(cpudata->cpu, &hwp_intr_enable_mask);
-	spin_unlock_irq(&hwp_notify_lock);
+	raw_spin_unlock_irq(&hwp_notify_lock);
 
 	if (cancel_work)
 		cancel_delayed_work_sync(&cpudata->hwp_notify_work);
@@ -1912,10 +1912,10 @@  static void intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata)
 	if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) {
 		u64 interrupt_mask = HWP_GUARANTEED_PERF_CHANGE_REQ;
 
-		spin_lock_irq(&hwp_notify_lock);
+		raw_spin_lock_irq(&hwp_notify_lock);
 		INIT_DELAYED_WORK(&cpudata->hwp_notify_work, intel_pstate_notify_work);
 		cpumask_set_cpu(cpudata->cpu, &hwp_intr_enable_mask);
-		spin_unlock_irq(&hwp_notify_lock);
+		raw_spin_unlock_irq(&hwp_notify_lock);
 
 		if (cpu_feature_enabled(X86_FEATURE_HWP_HIGHEST_PERF_CHANGE))
 			interrupt_mask |= HWP_HIGHEST_PERF_CHANGE_REQ;