Message ID | 20200902155557.h2wl2qpfn2rwsofw@linutronix.de |
---|---|
State | New |
Headers | show |
Series | [ANNOUNCE] v5.9-rc3-rt3 | expand |
[ 22.004225] r8169 0000:03:00.0 eth0: Link is Up - 1Gbps/Full - flow control off [ 22.004450] br0: port 1(eth0) entered blocking state [ 22.004473] br0: port 1(eth0) entered forwarding state [ 22.006411] IPv6: ADDRCONF(NETDEV_CHANGE): br0: link becomes ready [ 22.024936] ====================================================== [ 22.024936] WARNING: possible circular locking dependency detected [ 22.024937] 5.9.0.gc70672d-rt3-rt #8 Tainted: G E [ 22.024938] ------------------------------------------------------ [ 22.024939] ksoftirqd/0/10 is trying to acquire lock: [ 22.024941] ffff983475521278 (&sch->q.lock){+...}-{0:0}, at: sch_direct_xmit+0x81/0x2f0 [ 22.024947] but task is already holding lock: [ 22.024947] ffff9834755212b8 (&s->seqcount#9){+...}-{0:0}, at: br_dev_queue_push_xmit+0x7d/0x180 [bridge] [ 22.024959] which lock already depends on the new lock. [ 22.024960] the existing dependency chain (in reverse order) is: [ 22.024961] -> #1 (&s->seqcount#9){+...}-{0:0}: [ 22.024963] lock_acquire+0x92/0x3f0 [ 22.024967] __dev_queue_xmit+0xce7/0xe30 [ 22.024969] br_dev_queue_push_xmit+0x7d/0x180 [bridge] [ 22.024974] br_forward_finish+0x10a/0x1b0 [bridge] [ 22.024980] __br_forward+0x17d/0x300 [bridge] [ 22.024984] br_dev_xmit+0x442/0x570 [bridge] [ 22.024990] dev_hard_start_xmit+0xc5/0x3f0 [ 22.024992] __dev_queue_xmit+0x9db/0xe30 [ 22.024993] ip6_finish_output2+0x26a/0x990 [ 22.024995] ip6_output+0x6d/0x260 [ 22.024996] mld_sendpack+0x1d9/0x360 [ 22.024999] mld_ifc_timer_expire+0x1f7/0x370 [ 22.025000] call_timer_fn+0xa0/0x390 [ 22.025003] run_timer_softirq+0x59a/0x720 [ 22.025004] __do_softirq+0xc1/0x5b2 [ 22.025006] run_ksoftirqd+0x47/0x70 [ 22.025007] smpboot_thread_fn+0x266/0x320 [ 22.025009] kthread+0x171/0x190 [ 22.025010] ret_from_fork+0x1f/0x30 [ 22.025013] -> #0 (&sch->q.lock){+...}-{0:0}: [ 22.025015] validate_chain+0xa81/0x1230 [ 22.025016] __lock_acquire+0x880/0xbf0 [ 22.025017] lock_acquire+0x92/0x3f0 [ 22.025018] rt_spin_lock+0x78/0xd0 [ 22.025020] sch_direct_xmit+0x81/0x2f0 [ 22.025022] __dev_queue_xmit+0xd38/0xe30 [ 22.025023] br_dev_queue_push_xmit+0x7d/0x180 [bridge] [ 22.025029] br_forward_finish+0x10a/0x1b0 [bridge] [ 22.025033] __br_forward+0x17d/0x300 [bridge] [ 22.025039] br_dev_xmit+0x442/0x570 [bridge] [ 22.025043] dev_hard_start_xmit+0xc5/0x3f0 [ 22.025044] __dev_queue_xmit+0x9db/0xe30 [ 22.025046] ip6_finish_output2+0x26a/0x990 [ 22.025047] ip6_output+0x6d/0x260 [ 22.025049] mld_sendpack+0x1d9/0x360 [ 22.025050] mld_ifc_timer_expire+0x1f7/0x370 [ 22.025052] call_timer_fn+0xa0/0x390 [ 22.025053] run_timer_softirq+0x59a/0x720 [ 22.025054] __do_softirq+0xc1/0x5b2 [ 22.025055] run_ksoftirqd+0x47/0x70 [ 22.025056] smpboot_thread_fn+0x266/0x320 [ 22.025058] kthread+0x171/0x190 [ 22.025059] ret_from_fork+0x1f/0x30 [ 22.025060] other info that might help us debug this: [ 22.025061] Possible unsafe locking scenario: [ 22.025061] CPU0 CPU1 [ 22.025061] ---- ---- [ 22.025062] lock(&s->seqcount#9); [ 22.025064] lock(&sch->q.lock); [ 22.025065] lock(&s->seqcount#9); [ 22.025065] lock(&sch->q.lock); [ 22.025066] *** DEADLOCK *** [ 22.025066] 20 locks held by ksoftirqd/0/10: [ 22.025067] #0: ffffffff9a4c7140 (rcu_read_lock){....}-{1:3}, at: rt_spin_lock+0x5/0xd0 [ 22.025071] #1: ffff98351ec1a6d0 (per_cpu_ptr(&bh_lock.l.lock, cpu)){....}-{3:3}, at: __local_bh_disable_ip+0xbf/0x230 [ 22.025074] #2: ffffffff9a4c7140 (rcu_read_lock){....}-{1:3}, at: __local_bh_disable_ip+0xfb/0x230 [ 22.025077] #3: ffffffff9a4c7140 (rcu_read_lock){....}-{1:3}, at: rt_spin_lock+0x5/0xd0 [ 22.025080] #4: ffff98351ec1b338 (&base->expiry_lock){+...}-{0:0}, at: run_timer_softirq+0x3e6/0x720 [ 22.025083] #5: ffffb32e8007bd68 ((&idev->mc_ifc_timer)){+...}-{0:0}, at: call_timer_fn+0x5/0x390 [ 22.025086] #6: ffffffff9a4c7140 (rcu_read_lock){....}-{1:3}, at: mld_sendpack+0x5/0x360 [ 22.025090] #7: ffffffff9a4c7140 (rcu_read_lock){....}-{1:3}, at: __local_bh_disable_ip+0xfb/0x230 [ 22.025093] #8: ffffffff9a4c7100 (rcu_read_lock_bh){....}-{1:3}, at: ip6_finish_output2+0x73/0x990 [ 22.025096] #9: ffffffff9a4c7140 (rcu_read_lock){....}-{1:3}, at: __local_bh_disable_ip+0xfb/0x230 [ 22.025097] #10: ffffffff9a4c7100 (rcu_read_lock_bh){....}-{1:3}, at: __dev_queue_xmit+0x63/0xe30 [ 22.025100] #11: ffffffff9a4c7140 (rcu_read_lock){....}-{1:3}, at: br_dev_xmit+0x5/0x570 [bridge] [ 22.025108] #12: ffffffff9a4c7140 (rcu_read_lock){....}-{1:3}, at: __local_bh_disable_ip+0xfb/0x230 [ 22.025110] #13: ffffffff9a4c7100 (rcu_read_lock_bh){....}-{1:3}, at: __dev_queue_xmit+0x63/0xe30 [ 22.025113] #14: ffffffff9a4c7140 (rcu_read_lock){....}-{1:3}, at: rt_spin_lock+0x5/0xd0 [ 22.025116] #15: ffff9834755215f0 (dev->qdisc_tx_busylock ?: &qdisc_tx_busylock){+...}-{0:0}, at: __dev_queue_xmit+0x8a4/0xe30 [ 22.025119] #16: ffffffff9a4c7140 (rcu_read_lock){....}-{1:3}, at: rt_spin_lock+0x5/0xd0 [ 22.025121] #17: ffff983475521398 (dev->qdisc_running_key ?: &qdisc_running_key){+...}-{0:0}, at: __dev_queue_xmit+0xca6/0xe30 [ 22.025124] #18: ffff9834755212b8 (&s->seqcount#9){+...}-{0:0}, at: br_dev_queue_push_xmit+0x7d/0x180 [bridge] [ 22.025132] #19: ffffffff9a4c7140 (rcu_read_lock){....}-{1:3}, at: rt_spin_lock+0x5/0xd0 [ 22.025134] stack backtrace: [ 22.025134] CPU: 0 PID: 10 Comm: ksoftirqd/0 Kdump: loaded Tainted: G E 5.9.0.gc70672d-rt3-rt #8 [ 22.025135] Hardware name: MEDION MS-7848/MS-7848, BIOS M7848W08.20C 09/23/2013 [ 22.025136] Call Trace: [ 22.025138] dump_stack+0x77/0x9b [ 22.025143] check_noncircular+0x148/0x160 [ 22.025147] ? validate_chain+0xa81/0x1230 [ 22.025148] validate_chain+0xa81/0x1230 [ 22.025153] __lock_acquire+0x880/0xbf0 [ 22.025157] lock_acquire+0x92/0x3f0 [ 22.025158] ? sch_direct_xmit+0x81/0x2f0 [ 22.025160] ? rt_spin_unlock+0x39/0x90 [ 22.025162] rt_spin_lock+0x78/0xd0 [ 22.025164] ? sch_direct_xmit+0x81/0x2f0 [ 22.025166] sch_direct_xmit+0x81/0x2f0 [ 22.025169] __dev_queue_xmit+0xd38/0xe30 [ 22.025173] ? find_held_lock+0x2d/0x90 [ 22.025176] ? br_dev_queue_push_xmit+0x7d/0x180 [bridge] [ 22.025182] br_dev_queue_push_xmit+0x7d/0x180 [bridge] [ 22.025190] br_forward_finish+0x10a/0x1b0 [bridge] [ 22.025196] ? __br_forward+0x151/0x300 [bridge] [ 22.025204] __br_forward+0x17d/0x300 [bridge] [ 22.025211] ? br_flood+0x98/0x120 [bridge] [ 22.025216] br_dev_xmit+0x442/0x570 [bridge] [ 22.025224] dev_hard_start_xmit+0xc5/0x3f0 [ 22.025226] ? netif_skb_features+0xb0/0x230 [ 22.025228] __dev_queue_xmit+0x9db/0xe30 [ 22.025231] ? eth_header+0x25/0xc0 [ 22.025235] ? ip6_finish_output2+0x26a/0x990 [ 22.025236] ip6_finish_output2+0x26a/0x990 [ 22.025239] ? ip6_mtu+0x135/0x1b0 [ 22.025241] ? ip6_output+0x6d/0x260 [ 22.025243] ip6_output+0x6d/0x260 [ 22.025246] ? __ip6_finish_output+0x210/0x210 [ 22.025249] mld_sendpack+0x1d9/0x360 [ 22.025252] ? mld_ifc_timer_expire+0x119/0x370 [ 22.025254] mld_ifc_timer_expire+0x1f7/0x370 [ 22.025256] ? mld_dad_timer_expire+0xb0/0xb0 [ 22.025258] ? mld_dad_timer_expire+0xb0/0xb0 [ 22.025260] call_timer_fn+0xa0/0x390 [ 22.025263] ? mld_dad_timer_expire+0xb0/0xb0 [ 22.025264] run_timer_softirq+0x59a/0x720 [ 22.025268] ? lock_acquire+0x92/0x3f0 [ 22.025272] __do_softirq+0xc1/0x5b2 [ 22.025274] ? smpboot_thread_fn+0x28/0x320 [ 22.025276] ? smpboot_thread_fn+0x28/0x320 [ 22.025278] ? smpboot_thread_fn+0x70/0x320 [ 22.025279] run_ksoftirqd+0x47/0x70 [ 22.025281] smpboot_thread_fn+0x266/0x320 [ 22.025284] ? smpboot_register_percpu_thread+0xe0/0xe0 [ 22.025286] kthread+0x171/0x190 [ 22.025287] ? kthread_park+0x90/0x90 [ 22.025288] ret_from_fork+0x1f/0x30 [ 22.176416] NET: Registered protocol family 17
On 2020-09-05 07:19:10 [+0200], Mike Galbraith wrote: > Lappy, which does not use bridge, boots clean... but lock leakage > pretty darn quickly inspires lockdep to craps its drawers. > > [ 209.001111] BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low! > [ 209.001113] turning off the locking correctness validator. > [ 209.001114] CPU: 2 PID: 3773 Comm: Socket Thread Tainted: G S I E 5.9.0.gc70672d-rt3-rt #8 > [ 209.001117] Hardware name: HP HP Spectre x360 Convertible/804F, BIOS F.47 11/22/2017 > [ 209.001118] Call Trace: > [ 209.001123] dump_stack+0x77/0x9b > [ 209.001129] validate_chain+0xf60/0x1230 I have no idea how to debug this based on this report. Can you narrow it down to something? Is Lappy new, got a new something or has a new config switch? I'm just curious if this something or something that was always there but remained undetected. (Your other report was about something that was previously always "broken".) Sebastian
On Tue, 2020-09-08 at 17:12 +0200, Sebastian Andrzej Siewior wrote: > On 2020-09-05 07:19:10 [+0200], Mike Galbraith wrote: > > Lappy, which does not use bridge, boots clean... but lock leakage > > pretty darn quickly inspires lockdep to craps its drawers. > > > > [ 209.001111] BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low! > > [ 209.001113] turning off the locking correctness validator. > > [ 209.001114] CPU: 2 PID: 3773 Comm: Socket Thread Tainted: G S I E 5.9.0.gc70672d-rt3-rt #8 > > [ 209.001117] Hardware name: HP HP Spectre x360 Convertible/804F, BIOS F.47 11/22/2017 > > [ 209.001118] Call Trace: > > [ 209.001123] dump_stack+0x77/0x9b > > [ 209.001129] validate_chain+0xf60/0x1230 > > I have no idea how to debug this based on this report. Can you narrow > it down to something? I instrumented what I presume is still this problem once upon a time, structures containing locks are allocated/initialized/freed again and again with no cleanup until we increment into the wall. > Is Lappy new, got a new something or has a new config switch? I'm just > curious if this something or something that was always there but > remained undetected. Nah, this is nothing new. Turn lockdep on in RT, it's just a matter of time before it turns itself off. It's usually just not _that_ quick. -Mike
On Tue, 2020-09-08 at 17:06 +0200, Sebastian Andrzej Siewior wrote: > On 2020-09-08 16:56:20 [+0200], Mike Galbraith wrote: > > On Tue, 2020-09-08 at 14:19 +0200, Sebastian Andrzej Siewior wrote: > > > > > > This has nothing to do with the bridge but with the fact that you use a > > > non standard queue class (something else than pfifo_fast). > > > > That must be SUSE, I don't muck about in network land. I downloaded a > > whole library of RFCs decades ago, but turns out that one of those is > > all the bedtime story you'll ever need. Huge waste of bandwidth :) > > I see. > This should cure it: I'll give that a go. -Mike
On Wed, 2020-09-02 at 17:55 +0200, Sebastian Andrzej Siewior wrote: > > Known issues > - It has been pointed out that due to changes to the printk code the > internal buffer representation changed. This is only an issue if tools > like `crash' are used to extract the printk buffer from a kernel memory > image. Ouch. While installing -rt5 on lappy via nfs, -rt5 server box exploded leaving nada in logs. I have a nifty crash dump of the event, but... -Mike
On Wed, 2020-09-09 at 05:12 +0200, Mike Galbraith wrote: > On Wed, 2020-09-02 at 17:55 +0200, Sebastian Andrzej Siewior wrote: > > > > Known issues > > - It has been pointed out that due to changes to the printk code the > > internal buffer representation changed. This is only an issue if tools > > like `crash' are used to extract the printk buffer from a kernel memory > > image. > > Ouch. While installing -rt5 on lappy via nfs, -rt5 server box exploded > leaving nada in logs. I have a nifty crash dump of the event, but... After convincing crash (with club) that it didn't _really_ need a log_buf, nfs had nothing to do with the crash, it was nouveau. KERNEL: vmlinux-5.9.0.gf4d51df-rt5-rt.gz DUMPFILE: vmcore CPUS: 8 DATE: Wed Sep 9 04:41:24 2020 UPTIME: 00:08:10 LOAD AVERAGE: 3.17, 1.86, 0.99 TASKS: 715 NODENAME: homer RELEASE: 5.9.0.gf4d51df-rt5-rt VERSION: #1 SMP PREEMPT_RT Wed Sep 9 03:22:01 CEST 2020 MACHINE: x86_64 (3591 Mhz) MEMORY: 16 GB PANIC: "" PID: 2146 COMMAND: "X" TASK: ffff994c7fad0000 [THREAD_INFO: ffff994c7fad0000] CPU: 0 STATE: TASK_RUNNING (PANIC) crash> bt -l PID: 2146 TASK: ffff994c7fad0000 CPU: 0 COMMAND: "X" #0 [ffffbfffc11a76c8] machine_kexec at ffffffffb7064879 /backup/usr/local/src/kernel/linux-master-rt/./include/linux/ftrace.h: 792 #1 [ffffbfffc11a7710] __crash_kexec at ffffffffb7173622 /backup/usr/local/src/kernel/linux-master-rt/kernel/kexec_core.c: 963 #2 [ffffbfffc11a77d0] crash_kexec at ffffffffb7174920 /backup/usr/local/src/kernel/linux-master-rt/./arch/x86/include/asm/atomic.h: 41 #3 [ffffbfffc11a77e0] oops_end at ffffffffb702716f /backup/usr/local/src/kernel/linux-master-rt/arch/x86/kernel/dumpstack.c: 342 #4 [ffffbfffc11a7800] exc_general_protection at ffffffffb79a2fc6 /backup/usr/local/src/kernel/linux-master-rt/arch/x86/kernel/traps.c: 82 #5 [ffffbfffc11a7890] asm_exc_general_protection at ffffffffb7a00a1e /backup/usr/local/src/kernel/linux-master-rt/./arch/x86/include/asm/idtentry.h: 532 #6 [ffffbfffc11a78a0] nvif_object_ctor at ffffffffc07ee6a7 [nouveau] /backup/usr/local/src/kernel/linux-master-rt/drivers/gpu/drm/nouveau/nvif/object.c: 280 #7 [ffffbfffc11a7918] __kmalloc at ffffffffb72eea12 /backup/usr/local/src/kernel/linux-master-rt/mm/slub.c: 261 #8 [ffffbfffc11a7980] nvif_object_ctor at ffffffffc07ee6a7 [nouveau] /backup/usr/local/src/kernel/linux-master-rt/drivers/gpu/drm/nouveau/nvif/object.c: 280 #9 [ffffbfffc11a79d0] nvif_mem_ctor_type at ffffffffc07eef48 [nouveau] /backup/usr/local/src/kernel/linux-master-rt/drivers/gpu/drm/nouveau/nvif/mem.c: 74 #10 [ffffbfffc11a7aa8] nouveau_mem_vram at ffffffffc08b5291 [nouveau] /backup/usr/local/src/kernel/linux-master-rt/drivers/gpu/drm/nouveau/nouveau_mem.c: 155 #11 [ffffbfffc11a7b10] nouveau_vram_manager_new at ffffffffc08b594d [nouveau] /backup/usr/local/src/kernel/linux-master-rt/drivers/gpu/drm/nouveau/nouveau_ttm.c: 76 #12 [ffffbfffc11a7b30] ttm_bo_mem_space at ffffffffc05af2ac [ttm] /backup/usr/local/src/kernel/linux-master-rt/drivers/gpu/drm/ttm/ttm_bo.c: 1065 #13 [ffffbfffc11a7b88] ttm_bo_validate at ffffffffc05afaca [ttm] /backup/usr/local/src/kernel/linux-master-rt/drivers/gpu/drm/ttm/ttm_bo.c: 1137 #14 [ffffbfffc11a7c18] ttm_bo_init_reserved at ffffffffc05afe70 [ttm] /backup/usr/local/src/kernel/linux-master-rt/drivers/gpu/drm/ttm/ttm_bo.c: 1330 #15 [ffffbfffc11a7c60] ttm_bo_init at ffffffffc05afff7 [ttm] /backup/usr/local/src/kernel/linux-master-rt/drivers/gpu/drm/ttm/ttm_bo.c: 1364 #16 [ffffbfffc11a7cc8] nouveau_bo_init at ffffffffc08b0f7b [nouveau] /backup/usr/local/src/kernel/linux-master-rt/drivers/gpu/drm/nouveau/nouveau_bo.c: 317 #17 [ffffbfffc11a7d38] nouveau_gem_new at ffffffffc08b2f7b [nouveau] /backup/usr/local/src/kernel/linux-master-rt/drivers/gpu/drm/nouveau/nouveau_gem.c: 206 #18 [ffffbfffc11a7d70] nouveau_gem_ioctl_new at ffffffffc08b3001 [nouveau] /backup/usr/local/src/kernel/linux-master-rt/drivers/gpu/drm/nouveau/nouveau_gem.c: 272 #19 [ffffbfffc11a7da0] drm_ioctl_kernel at ffffffffc066f564 [drm] /backup/usr/local/src/kernel/linux-master-rt/drivers/gpu/drm/drm_ioctl.c: 793 #20 [ffffbfffc11a7de0] drm_ioctl at ffffffffc066f88e [drm] /backup/usr/local/src/kernel/linux-master-rt/./include/linux/uaccess.h: 168 #21 [ffffbfffc11a7ed0] nouveau_drm_ioctl at ffffffffc08abf56 [nouveau] /backup/usr/local/src/kernel/linux-master-rt/drivers/gpu/drm/nouveau/nouveau_drm.c: 1163 #22 [ffffbfffc11a7f08] __x64_sys_ioctl at ffffffffb733255e /backup/usr/local/src/kernel/linux-master-rt/fs/ioctl.c: 49 #23 [ffffbfffc11a7f40] do_syscall_64 at ffffffffb79a25c3 /backup/usr/local/src/kernel/linux-master-rt/arch/x86/entry/common.c: 46 #24 [ffffbfffc11a7f50] entry_SYSCALL_64_after_hwframe at ffffffffb7a0008c /backup/usr/local/src/kernel/linux-master-rt/arch/x86/entry/entry_64.S: 125 RIP: 00007f96707a6ac7 RSP: 00007ffc1cbc2998 RFLAGS: 00000246 RAX: ffffffffffffffda RBX: 000055743cf152e0 RCX: 00007f96707a6ac7 RDX: 00007ffc1cbc29f0 RSI: 00000000c0306480 RDI: 000000000000000e RBP: 00007ffc1cbc29f0 R8: 0000000000000000 R9: 0000000000000003 R10: fffffffffffffd98 R11: 0000000000000246 R12: 00000000c0306480 R13: 000000000000000e R14: 000055743ce99040 R15: 000055743c60cfd0 ORIG_RAX: 0000000000000010 CS: 0033 SS: 002b
On 2020-09-09 07:45:22 [+0200], Mike Galbraith wrote: > On Wed, 2020-09-09 at 05:12 +0200, Mike Galbraith wrote: > > On Wed, 2020-09-02 at 17:55 +0200, Sebastian Andrzej Siewior wrote: > > > > > > Known issues > > > - It has been pointed out that due to changes to the printk code the > > > internal buffer representation changed. This is only an issue if tools > > > like `crash' are used to extract the printk buffer from a kernel memory > > > image. > > > > Ouch. While installing -rt5 on lappy via nfs, -rt5 server box exploded > > leaving nada in logs. I have a nifty crash dump of the event, but... > > After convincing crash (with club) that it didn't _really_ need a > log_buf, nfs had nothing to do with the crash, it was nouveau. okay. Line 280 is hard to understand. My guess is that we got a pointer and then the boom occurred but I can't tell why/how. A few lines later there is args->x = y… Do you see the lockdep splat without nouveau? > crash> bt -l > PID: 2146 TASK: ffff994c7fad0000 CPU: 0 COMMAND: "X" > #0 [ffffbfffc11a76c8] machine_kexec at ffffffffb7064879 > /backup/usr/local/src/kernel/linux-master-rt/./include/linux/ftrace.h: 792 > #1 [ffffbfffc11a7710] __crash_kexec at ffffffffb7173622 > /backup/usr/local/src/kernel/linux-master-rt/kernel/kexec_core.c: 963 > #2 [ffffbfffc11a77d0] crash_kexec at ffffffffb7174920 > /backup/usr/local/src/kernel/linux-master-rt/./arch/x86/include/asm/atomic.h: 41 > #3 [ffffbfffc11a77e0] oops_end at ffffffffb702716f > /backup/usr/local/src/kernel/linux-master-rt/arch/x86/kernel/dumpstack.c: 342 > #4 [ffffbfffc11a7800] exc_general_protection at ffffffffb79a2fc6 > /backup/usr/local/src/kernel/linux-master-rt/arch/x86/kernel/traps.c: 82 > #5 [ffffbfffc11a7890] asm_exc_general_protection at ffffffffb7a00a1e > /backup/usr/local/src/kernel/linux-master-rt/./arch/x86/include/asm/idtentry.h: 532 > #6 [ffffbfffc11a78a0] nvif_object_ctor at ffffffffc07ee6a7 [nouveau] > /backup/usr/local/src/kernel/linux-master-rt/drivers/gpu/drm/nouveau/nvif/object.c: 280 > #7 [ffffbfffc11a7918] __kmalloc at ffffffffb72eea12 > /backup/usr/local/src/kernel/linux-master-rt/mm/slub.c: 261 > #8 [ffffbfffc11a7980] nvif_object_ctor at ffffffffc07ee6a7 [nouveau] > /backup/usr/local/src/kernel/linux-master-rt/drivers/gpu/drm/nouveau/nvif/object.c: 280 Sebastian
On 2020-09-09 10:56:41 [+0200], Mike Galbraith wrote: > On Wed, 2020-09-09 at 10:20 +0200, Sebastian Andrzej Siewior wrote: > > > > Do you see the lockdep splat without nouveau? > > Yeah. Lappy uses i915, but lockdep also shuts itself off. You sent the config, I will try to throw it later on kvm and actual hardware and see what happens. > BTW, methinks RT had nothing to do with the nouveau burp. that is good to hear :) > -Mike Sebastian
diff --git a/arch/Kconfig b/arch/Kconfig index 222e553f3cf50..5c8e173dc7c2b 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -415,6 +415,13 @@ config MMU_GATHER_NO_GATHER bool depends on MMU_GATHER_TABLE_FREE +config ARCH_WANT_IRQS_OFF_ACTIVATE_MM + bool + help + Temporary select until all architectures can be converted to have + irqs disabled over activate_mm. Architectures that do IPI based TLB + shootdowns should enable this. + config ARCH_HAVE_NMI_SAFE_CMPXCHG bool diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index c5700f44422ec..e8f809161c75f 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -29,7 +29,6 @@ #include <linux/async.h> #include <linux/i2c.h> #include <linux/sched/clock.h> -#include <linux/local_lock.h> #include <drm/drm_atomic.h> #include <drm/drm_crtc.h> @@ -1150,7 +1149,6 @@ struct intel_crtc { #ifdef CONFIG_DEBUG_FS struct intel_pipe_crc pipe_crc; #endif - local_lock_t pipe_update_lock; }; struct intel_plane { diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index 62b8248d2ee79..1b9d5e690a9f0 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -118,7 +118,8 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state) "PSR idle timed out 0x%x, atomic update may fail\n", psr_status); - local_lock_irq(&crtc->pipe_update_lock); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_disable(); crtc->debug.min_vbl = min; crtc->debug.max_vbl = max; @@ -143,11 +144,13 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state) break; } - local_unlock_irq(&crtc->pipe_update_lock); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_enable(); timeout = schedule_timeout(timeout); - local_lock_irq(&crtc->pipe_update_lock); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_disable(); } finish_wait(wq, &wait); @@ -180,7 +183,8 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state) return; irq_disable: - local_lock_irq(&crtc->pipe_update_lock); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_disable(); } /** @@ -218,7 +222,8 @@ void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state) new_crtc_state->uapi.event = NULL; } - local_unlock_irq(&crtc->pipe_update_lock); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_enable(); if (intel_vgpu_active(dev_priv)) return; diff --git a/fs/exec.c b/fs/exec.c index a91003e28eaae..d4fb18baf1fb1 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1130,11 +1130,24 @@ static int exec_mmap(struct mm_struct *mm) } task_lock(tsk); - active_mm = tsk->active_mm; membarrier_exec_mmap(mm); - tsk->mm = mm; + + local_irq_disable(); + active_mm = tsk->active_mm; tsk->active_mm = mm; + tsk->mm = mm; + /* + * This prevents preemption while active_mm is being loaded and + * it and mm are being updated, which could cause problems for + * lazy tlb mm refcounting when these are updated by context + * switches. Not all architectures can handle irqs off over + * activate_mm yet. + */ + if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) + local_irq_enable(); activate_mm(active_mm, mm); + if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) + local_irq_enable(); tsk->mm->vmacache_seqnum = 0; vmacache_flush(tsk); task_unlock(tsk); diff --git a/localversion-rt b/localversion-rt index c3054d08a1129..1445cd65885cd 100644 --- a/localversion-rt +++ b/localversion-rt @@ -1 +1 @@ --rt2 +-rt3