Message ID | 20240415113436.3261042-2-vschneid@redhat.com |
---|---|
State | New |
Headers | show |
Series | tcp/dcpp: Un-pin tw_timer | expand |
Hello, kernel test robot noticed "BUG:sleeping_function_called_from_invalid_context_at_kernel/workqueue.c" on: commit: 1463958a05a90694cf63a6decf02983ef9a0b102 ("[PATCH v5 1/2] SQUASH: tcp/dcpp: Convert timewait timer into a delayed_work") url: https://github.com/intel-lab-lkp/linux/commits/Valentin-Schneider/SQUASH-tcp-dcpp-Convert-timewait-timer-into-a-delayed_work/20240415-193744 base: https://git.kernel.org/cgit/linux/kernel/git/bpf/bpf-next.git master patch link: https://lore.kernel.org/all/20240415113436.3261042-2-vschneid@redhat.com/ patch subject: [PATCH v5 1/2] SQUASH: tcp/dcpp: Convert timewait timer into a delayed_work in testcase: ltp version: ltp-x86_64-14c1f76-1_20240420 with following parameters: test: net_stress.appl-ssh compiler: gcc-13 test machine: 8 threads 1 sockets Intel(R) Core(TM) i7-4770 CPU @ 3.40GHz (Haswell) with 16G memory (please refer to attached dmesg/kmsg for entire log/backtrace) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <oliver.sang@intel.com> | Closes: https://lore.kernel.org/oe-lkp/202404261608.7c346a06-oliver.sang@intel.com [ 157.135844][ C7] BUG: sleeping function called from invalid context at kernel/workqueue.c:2195 [ 157.144790][ C7] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 3686, name: sshd [ 157.153204][ C7] preempt_count: 101, expected: 0 [ 157.158116][ C7] RCU nest depth: 4, expected: 0 [ 157.162939][ C7] CPU: 7 PID: 3686 Comm: sshd Tainted: G S 6.9.0-rc1-00332-g1463958a05a9 #1 [ 157.172905][ C7] Hardware name: Dell Inc. OptiPlex 9020/0DNKMN, BIOS A05 12/05/2013 [ 157.180861][ C7] Call Trace: [ 157.184021][ C7] <IRQ> [ 157.186743][ C7] dump_stack_lvl (lib/dump_stack.c:117) [ 157.191131][ C7] __might_resched (kernel/sched/core.c:10198) [ 157.195780][ C7] ? __pfx___might_resched (kernel/sched/core.c:10152) [ 157.200951][ C7] __cancel_work_sync (include/linux/kernel.h:73 kernel/workqueue.c:2195 kernel/workqueue.c:4295) [ 157.205773][ C7] ? __pfx___cancel_work_sync (kernel/workqueue.c:4290) [ 157.211209][ C7] inet_twsk_deschedule_put (net/ipv4/inet_timewait_sock.c:222 (discriminator 1)) [ 157.216470][ C7] tcp_v4_rcv (net/ipv4/tcp_ipv4.c:2413) [ 157.220770][ C7] ? __pfx_tcp_v4_rcv (net/ipv4/tcp_ipv4.c:2163) [ 157.225501][ C7] ? __kernel_text_address (kernel/extable.c:79 (discriminator 1)) [ 157.230668][ C7] ? __pfx_raw_v4_input (net/ipv4/raw.c:165) [ 157.235574][ C7] ip_protocol_deliver_rcu (net/ipv4/ip_input.c:205 (discriminator 1)) [ 157.240830][ C7] ip_local_deliver_finish (include/linux/rcupdate.h:813 net/ipv4/ip_input.c:234) [ 157.246171][ C7] ? tcp_wfree (net/ipv4/tcp_output.c:1225) [ 157.250464][ C7] ip_local_deliver (include/linux/netfilter.h:314 include/linux/netfilter.h:308 net/ipv4/ip_input.c:254) [ 157.255190][ C7] ? __pfx_ip_local_deliver (net/ipv4/ip_input.c:243) [ 157.260442][ C7] ? ip_rcv_finish_core+0x1c0/0x10a0 [ 157.266219][ C7] ip_rcv (include/net/dst.h:460 (discriminator 4) net/ipv4/ip_input.c:449 (discriminator 4) include/linux/netfilter.h:314 (discriminator 4) include/linux/netfilter.h:308 (discriminator 4) net/ipv4/ip_input.c:569 (discriminator 4)) [ 157.270078][ C7] ? __pfx_ip_rcv (net/ipv4/ip_input.c:562) [ 157.274459][ C7] ? do_syscall_64 (arch/x86/entry/common.c:52 (discriminator 1) arch/x86/entry/common.c:83 (discriminator 1)) [ 157.279012][ C7] ? update_curr (kernel/sched/fair.c:1164 (discriminator 1)) [ 157.283395][ C7] ? update_load_avg (kernel/sched/fair.c:4411 kernel/sched/fair.c:4748) [ 157.288300][ C7] ? __pfx_ip_rcv (net/ipv4/ip_input.c:562) [ 157.292684][ C7] __netif_receive_skb_one_core (net/core/dev.c:5585 (discriminator 4)) [ 157.298462][ C7] ? __pfx___netif_receive_skb_one_core (net/core/dev.c:5578) [ 157.304760][ C7] ? _raw_spin_lock_irq (arch/x86/include/asm/atomic.h:115 (discriminator 4) include/linux/atomic/atomic-arch-fallback.h:2170 (discriminator 4) include/linux/atomic/atomic-instrumented.h:1302 (discriminator 4) include/asm-generic/qspinlock.h:111 (discriminator 4) include/linux/spinlock.h:187 (discriminator 4) include/linux/spinlock_api_smp.h:120 (discriminator 4) kernel/locking/spinlock.c:170 (discriminator 4)) [ 157.309663][ C7] process_backlog (include/linux/rcupdate.h:813 net/core/dev.c:6029) [ 157.314305][ C7] ? __pfx_trigger_load_balance (kernel/sched/fair.c:12435) [ 157.319904][ C7] __napi_poll (net/core/dev.c:6679) [ 157.324110][ C7] ? update_process_times (kernel/time/timer.c:2480) [ 157.329276][ C7] net_rx_action (net/core/dev.c:6750 net/core/dev.c:6864) [ 157.333748][ C7] ? __pfx_net_rx_action (net/core/dev.c:6828) [ 157.338734][ C7] ? __pfx_sched_clock_cpu (kernel/sched/clock.c:389) [ 157.343902][ C7] ? __pfx_sched_clock_cpu (kernel/sched/clock.c:389) [ 157.349070][ C7] __do_softirq (arch/x86/include/asm/jump_label.h:27 include/linux/jump_label.h:207 include/trace/events/irq.h:142 kernel/softirq.c:555) [ 157.353453][ C7] ? __pfx___do_softirq (kernel/softirq.c:512) [ 157.358361][ C7] ? irqtime_account_irq (kernel/sched/cputime.c:64 (discriminator 1)) [ 157.363449][ C7] do_softirq (kernel/softirq.c:455 (discriminator 32) kernel/softirq.c:442 (discriminator 32)) [ 157.367488][ C7] </IRQ> [ 157.370296][ C7] <TASK> [ 157.373107][ C7] __local_bh_enable_ip (kernel/softirq.c:382) [ 157.378015][ C7] __dev_queue_xmit (net/core/dev.c:4389) [ 157.382836][ C7] ? unwind_next_frame (arch/x86/kernel/unwind_orc.c:406 (discriminator 1) arch/x86/kernel/unwind_orc.c:648 (discriminator 1)) [ 157.388007][ C7] ? __pfx___dev_queue_xmit (net/core/dev.c:4270) [ 157.393263][ C7] ? kernel_text_address (kernel/extable.c:119 (discriminator 1) kernel/extable.c:94 (discriminator 1)) [ 157.398348][ C7] ? __kernel_text_address (kernel/extable.c:79 (discriminator 1)) [ 157.403516][ C7] ? unwind_get_return_address (arch/x86/kernel/unwind_orc.c:369 (discriminator 1)) [ 157.409035][ C7] ? arch_stack_walk (arch/x86/kernel/stacktrace.c:26) [ 157.413769][ C7] ? eth_header (net/ethernet/eth.c:100) [ 157.418065][ C7] ? neigh_resolve_output (include/linux/netdevice.h:3145 net/core/neighbour.c:1558 net/core/neighbour.c:1543) [ 157.423324][ C7] ip_finish_output2 (include/net/neighbour.h:542 (discriminator 2) net/ipv4/ip_output.c:235 (discriminator 2)) [ 157.428226][ C7] ? arch_stack_walk (arch/x86/kernel/stacktrace.c:24 (discriminator 1)) [ 157.432953][ C7] ? __pfx_ip_skb_dst_mtu (include/net/ip.h:496) [ 157.438041][ C7] ? __pfx_ip_finish_output2 (net/ipv4/ip_output.c:199) [ 157.443394][ C7] ? __ip_finish_output (include/linux/skbuff.h:1636 include/linux/skbuff.h:4958 net/ipv4/ip_output.c:307 net/ipv4/ip_output.c:295) [ 157.448381][ C7] ip_output (net/ipv4/ip_output.c:427) [ 157.452495][ C7] ? __pfx_ip_output (net/ipv4/ip_output.c:427) [ 157.457137][ C7] __ip_queue_xmit (net/ipv4/ip_output.c:535) [ 157.461862][ C7] ? __copy_skb_header (include/net/dst.h:290 net/core/skbuff.c:1528) [ 157.466765][ C7] ? __skb_clone (arch/x86/include/asm/atomic.h:53 include/linux/atomic/atomic-arch-fallback.h:992 include/linux/atomic/atomic-instrumented.h:436 net/core/skbuff.c:1599) [ 157.471229][ C7] __tcp_transmit_skb (net/ipv4/tcp_output.c:1462 (discriminator 4)) [ 157.476317][ C7] ? __pfx___tcp_transmit_skb (net/ipv4/tcp_output.c:1283) [ 157.481740][ C7] tcp_write_xmit (net/ipv4/tcp_output.c:2792 (discriminator 2)) [ 157.486379][ C7] ? skb_do_copy_data_nocache (include/linux/uio.h:204 include/linux/uio.h:211 include/net/sock.h:2238) [ 157.491977][ C7] ? __pfx_skb_do_copy_data_nocache (include/net/sock.h:2229) [ 157.497923][ C7] ? skb_page_frag_refill (arch/x86/include/asm/atomic.h:23 include/linux/atomic/atomic-arch-fallback.h:457 include/linux/atomic/atomic-instrumented.h:33 include/linux/page_ref.h:67 net/core/sock.c:2908) [ 157.503083][ C7] __tcp_push_pending_frames (net/ipv4/tcp_output.c:2977 (discriminator 2)) [ 157.508505][ C7] tcp_sendmsg_locked (net/ipv4/tcp.c:1310) [ 157.513578][ C7] ? __pfx_chacha_block_generic (lib/crypto/chacha.c:77) [ 157.519173][ C7] ? __pfx_tcp_sendmsg_locked (net/ipv4/tcp.c:1040) [ 157.524594][ C7] ? _raw_spin_lock_bh (arch/x86/include/asm/atomic.h:115 (discriminator 4) include/linux/atomic/atomic-arch-fallback.h:2170 (discriminator 4) include/linux/atomic/atomic-instrumented.h:1302 (discriminator 4) include/asm-generic/qspinlock.h:111 (discriminator 4) include/linux/spinlock.h:187 (discriminator 4) include/linux/spinlock_api_smp.h:127 (discriminator 4) kernel/locking/spinlock.c:178 (discriminator 4)) [ 157.529405][ C7] ? __pfx__raw_spin_lock_bh (kernel/locking/spinlock.c:177) [ 157.534736][ C7] tcp_sendmsg (net/ipv4/tcp.c:1343) [ 157.538850][ C7] sock_write_iter (net/socket.c:730 (discriminator 1) net/socket.c:745 (discriminator 1) net/socket.c:1160 (discriminator 1)) [ 157.543486][ C7] ? __pfx_sock_write_iter (net/socket.c:1144) [ 157.548644][ C7] ? rw_verify_area (fs/read_write.c:377) [ 157.553279][ C7] vfs_write (include/linux/fs.h:2108 fs/read_write.c:497 fs/read_write.c:590) [ 157.557391][ C7] ? __pfx_vfs_write (fs/read_write.c:571) [ 157.562026][ C7] ? __pfx___might_resched (kernel/sched/core.c:10152) [ 157.567189][ C7] ksys_write (fs/read_write.c:643) [ 157.571389][ C7] ? __pfx_ksys_write (fs/read_write.c:633) [ 157.576112][ C7] do_syscall_64 (arch/x86/entry/common.c:52 (discriminator 1) arch/x86/entry/common.c:83 (discriminator 1)) [ 157.580486][ C7] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:129) [ 157.586254][ C7] RIP: 0033:0x7f568f9de240 [ 157.590542][ C7] Code: 40 00 48 8b 15 c1 9b 0d 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 80 3d a1 23 0e 00 00 74 17 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 58 c3 0f 1f 80 00 00 00 00 48 83 ec 28 48 89 All code ======== 0: 40 00 48 8b add %cl,-0x75(%rax) 4: 15 c1 9b 0d 00 adc $0xd9bc1,%eax 9: f7 d8 neg %eax b: 64 89 02 mov %eax,%fs:(%rdx) e: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax 15: eb b7 jmp 0xffffffffffffffce 17: 0f 1f 00 nopl (%rax) 1a: 80 3d a1 23 0e 00 00 cmpb $0x0,0xe23a1(%rip) # 0xe23c2 21: 74 17 je 0x3a 23: b8 01 00 00 00 mov $0x1,%eax 28: 0f 05 syscall 2a:* 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax <-- trapping instruction 30: 77 58 ja 0x8a 32: c3 retq 33: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) 3a: 48 83 ec 28 sub $0x28,%rsp 3e: 48 rex.W 3f: 89 .byte 0x89 Code starting with the faulting instruction =========================================== 0: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax 6: 77 58 ja 0x60 8: c3 retq 9: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) 10: 48 83 ec 28 sub $0x28,%rsp 14: 48 rex.W 15: 89 .byte 0x89 The kernel config and materials to reproduce are available at: https://download.01.org/0day-ci/archive/20240426/202404261608.7c346a06-oliver.sang@intel.com
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index f28da08a37b4e..c4d64f1f8d415 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -72,7 +72,7 @@ struct inet_timewait_sock { tw_tos : 8; u32 tw_txhash; u32 tw_priority; - struct timer_list tw_timer; + struct delayed_work tw_expiry_work; struct inet_bind_bucket *tw_tb; struct inet_bind2_bucket *tw_tb2; }; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 7adace541fe29..ab11b688f1eeb 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -444,7 +444,7 @@ static int inet_twsk_diag_fill(struct sock *sk, r->idiag_state = tw->tw_substate; r->idiag_timer = 3; - tmo = tw->tw_timer.expires - jiffies; + tmo = tw->tw_expiry_work.timer.expires - jiffies; r->idiag_expires = jiffies_delta_to_msecs(tmo); r->idiag_rqueue = 0; r->idiag_wqueue = 0; diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index e8de45d34d56a..7a2dcbaa1a61e 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -150,11 +150,13 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, } EXPORT_SYMBOL_GPL(inet_twsk_hashdance); -static void tw_timer_handler(struct timer_list *t) +static void tw_expiry_workfn(struct work_struct *work) { - struct inet_timewait_sock *tw = from_timer(tw, t, tw_timer); - + struct inet_timewait_sock *tw = container_of( + work, struct inet_timewait_sock, tw_expiry_work.work); + local_bh_disable(); inet_twsk_kill(tw); + local_bh_enable(); } struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, @@ -192,7 +194,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, tw->tw_prot = sk->sk_prot_creator; atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie)); twsk_net_set(tw, sock_net(sk)); - timer_setup(&tw->tw_timer, tw_timer_handler, TIMER_PINNED); + INIT_DELAYED_WORK(&tw->tw_expiry_work, tw_expiry_workfn); /* * Because we use RCU lookups, we should not set tw_refcnt * to a non null value before everything is setup for this @@ -217,7 +219,7 @@ EXPORT_SYMBOL_GPL(inet_twsk_alloc); */ void inet_twsk_deschedule_put(struct inet_timewait_sock *tw) { - if (del_timer_sync(&tw->tw_timer)) + if (cancel_delayed_work_sync(&tw->tw_expiry_work)) inet_twsk_kill(tw); inet_twsk_put(tw); } @@ -255,10 +257,10 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm) __NET_INC_STATS(twsk_net(tw), kill ? LINUX_MIB_TIMEWAITKILLED : LINUX_MIB_TIMEWAITED); - BUG_ON(mod_timer(&tw->tw_timer, jiffies + timeo)); + BUG_ON(!queue_delayed_work(system_unbound_wq, &tw->tw_expiry_work, timeo)); refcount_inc(&tw->tw_dr->tw_refcount); } else { - mod_timer_pending(&tw->tw_timer, jiffies + timeo); + mod_delayed_work(system_unbound_wq, &tw->tw_expiry_work, timeo); } } EXPORT_SYMBOL_GPL(__inet_twsk_schedule); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a22ee58387518..4b106f017a81f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2920,7 +2920,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) static void get_timewait4_sock(const struct inet_timewait_sock *tw, struct seq_file *f, int i) { - long delta = tw->tw_timer.expires - jiffies; + long delta = tw->tw_expiry_work.timer.expires - jiffies; __be32 dest, src; __u16 destp, srcp; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3f4cba49e9ee6..58cd12fdc91a5 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2242,7 +2242,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) static void get_timewait6_sock(struct seq_file *seq, struct inet_timewait_sock *tw, int i) { - long delta = tw->tw_timer.expires - jiffies; + long delta = tw->tw_expiry_work.timer.expires - jiffies; const struct in6_addr *dest, *src; __u16 destp, srcp; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c index 92267abb462fc..a429f4eb9939c 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c @@ -152,7 +152,7 @@ static int dump_tw_sock(struct seq_file *seq, struct tcp_timewait_sock *ttw, __be32 dest, src; long delta; - delta = tw->tw_timer.expires - bpf_jiffies64(); + delta = tw->tw_expiry_work.timer.expires - bpf_jiffies64(); dest = tw->tw_daddr; src = tw->tw_rcv_saddr; destp = bpf_ntohs(tw->tw_dport); diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c index 943f7bba180e7..795bb34c95f72 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c @@ -157,7 +157,7 @@ static int dump_tw_sock(struct seq_file *seq, struct tcp_timewait_sock *ttw, __u16 destp, srcp; long delta; - delta = tw->tw_timer.expires - bpf_jiffies64(); + delta = tw->tw_expiry_work.timer.expires - bpf_jiffies64(); dest = &tw->tw_v6_daddr; src = &tw->tw_v6_rcv_saddr; destp = bpf_ntohs(tw->tw_dport);
!NOTE! this changes behaviour, as the tw_timer is TIMER_PINNED and delayed works currently cannot behave as such. TIMER_PINNED timers are enqueued onto the local base, whereas delayed_works have their timer enqueued on the global base AND also check for HK_TYPE_TIMER isolation. The split between this commit and the next is mainly there for ease of reviewing. This commit should be squashed with the next one. Signed-off-by: Valentin Schneider <vschneid@redhat.com> --- include/net/inet_timewait_sock.h | 2 +- net/ipv4/inet_diag.c | 2 +- net/ipv4/inet_timewait_sock.c | 16 +++++++++------- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- .../testing/selftests/bpf/progs/bpf_iter_tcp4.c | 2 +- .../testing/selftests/bpf/progs/bpf_iter_tcp6.c | 2 +- 7 files changed, 15 insertions(+), 13 deletions(-)