Message ID | 20200905044518.3282-1-hoang.h.le@dektech.com.au |
---|---|
State | New |
Headers | show |
Series | [net-next] tipc: fix a deadlock when flushing scheduled work | expand |
On Sat, 5 Sep 2020 11:45:18 +0700 Hoang Huu Le wrote: > In the commit fdeba99b1e58 > ("tipc: fix use-after-free in tipc_bcast_get_mode"), we're trying > to make sure the tipc_net_finalize_work work item finished if it > enqueued. But calling flush_scheduled_work() is not just affecting > above work item but either any scheduled work. This has turned out > to be overkill and caused to deadlock as syzbot reported: > > ====================================================== > WARNING: possible circular locking dependency detected > 5.9.0-rc2-next-20200828-syzkaller #0 Not tainted > ------------------------------------------------------ > kworker/u4:6/349 is trying to acquire lock: > ffff8880aa063d38 ((wq_completion)events){+.+.}-{0:0}, at: flush_workqueue+0xe1/0x13e0 kernel/workqueue.c:2777 > > but task is already holding lock: > ffffffff8a879430 (pernet_ops_rwsem){++++}-{3:3}, at: cleanup_net+0x9b/0xb10 net/core/net_namespace.c:565 > > [...] > Possible unsafe locking scenario: > > CPU0 CPU1 > ---- ---- > lock(pernet_ops_rwsem); > lock(&sb->s_type->i_mutex_key#13); > lock(pernet_ops_rwsem); > lock((wq_completion)events); > > *** DEADLOCK *** > [...] > > To fix the original issue, we replace above calling by introducing > a bit flag. When a namespace cleaned-up, bit flag is set to zero and: > - tipc_net_finalize functionial just does return immediately. > - tipc_net_finalize_work does not enqueue into the scheduled work queue. Is struct tipc_net not going to be freed right after tipc_exit_net() returns? In that case you'd be back to UAF if the flag is in this structure. > @@ -110,10 +111,6 @@ static void __net_exit tipc_exit_net(struct net *net) > tipc_detach_loopback(net); > tipc_net_stop(net); > > - /* Make sure the tipc_net_finalize_work stopped > - * before releasing the resources. > - */ > - flush_scheduled_work(); > tipc_bcast_stop(net); > tipc_nametbl_stop(net); > tipc_sk_rht_destroy(net); > @@ -124,6 +121,9 @@ static void __net_exit tipc_exit_net(struct net *net) > > static void __net_exit tipc_pernet_pre_exit(struct net *net) > { > + struct tipc_net *tn = tipc_net(net); > + > + clear_bit_unlock(0, &tn->net_exit_flag); > tipc_node_pre_cleanup_net(net); > } > > diff --git a/net/tipc/core.h b/net/tipc/core.h > index 631d83c9705f..aa75882dd932 100644 > --- a/net/tipc/core.h > +++ b/net/tipc/core.h > @@ -143,6 +143,7 @@ struct tipc_net { > /* TX crypto handler */ > struct tipc_crypto *crypto_tx; > #endif > + unsigned long net_exit_flag; > }; > > static inline struct tipc_net *tipc_net(struct net *net) > diff --git a/net/tipc/net.c b/net/tipc/net.c > index 85400e4242de..8ad5b9ad89c0 100644 > --- a/net/tipc/net.c > +++ b/net/tipc/net.c > @@ -132,6 +132,9 @@ static void tipc_net_finalize(struct net *net, u32 addr) > { > struct tipc_net *tn = tipc_net(net); > > + if (unlikely(!test_bit(0, &tn->net_exit_flag))) > + return; > + > if (cmpxchg(&tn->node_addr, 0, addr)) > return; > tipc_set_node_addr(net, addr); > @@ -153,8 +156,13 @@ static void tipc_net_finalize_work(struct work_struct *work) > > void tipc_sched_net_finalize(struct net *net, u32 addr) > { > - struct tipc_net_work *fwork = kzalloc(sizeof(*fwork), GFP_ATOMIC); > + struct tipc_net *tn = tipc_net(net); > + struct tipc_net_work *fwork; > + > + if (unlikely(!test_bit(0, &tn->net_exit_flag))) > + return; > > + fwork = kzalloc(sizeof(*fwork), GFP_ATOMIC); > if (!fwork) > return; > INIT_WORK(&fwork->work, tipc_net_finalize_work);
> -----Original Message----- > From: Jakub Kicinski <kuba@kernel.org> > Sent: Monday, September 7, 2020 12:57 AM > To: Hoang Huu Le <hoang.h.le@dektech.com.au> > Cc: ying.xue@windriver.com; netdev@vger.kernel.org; jmaloy@redhat.com; maloy@donjonn.com; > syzbot+d5aa7e0385f6a5d0f4fd@syzkaller.appspotmail.com > Subject: Re: [net-next] tipc: fix a deadlock when flushing scheduled work > > On Sat, 5 Sep 2020 11:45:18 +0700 Hoang Huu Le wrote: > > In the commit fdeba99b1e58 > > ("tipc: fix use-after-free in tipc_bcast_get_mode"), we're trying > > to make sure the tipc_net_finalize_work work item finished if it > > enqueued. But calling flush_scheduled_work() is not just affecting > > above work item but either any scheduled work. This has turned out > > to be overkill and caused to deadlock as syzbot reported: > > > > ====================================================== > > WARNING: possible circular locking dependency detected > > 5.9.0-rc2-next-20200828-syzkaller #0 Not tainted > > ------------------------------------------------------ > > kworker/u4:6/349 is trying to acquire lock: > > ffff8880aa063d38 ((wq_completion)events){+.+.}-{0:0}, at: flush_workqueue+0xe1/0x13e0 kernel/workqueue.c:2777 > > > > but task is already holding lock: > > ffffffff8a879430 (pernet_ops_rwsem){++++}-{3:3}, at: cleanup_net+0x9b/0xb10 net/core/net_namespace.c:565 > > > > [...] > > Possible unsafe locking scenario: > > > > CPU0 CPU1 > > ---- ---- > > lock(pernet_ops_rwsem); > > lock(&sb->s_type->i_mutex_key#13); > > lock(pernet_ops_rwsem); > > lock((wq_completion)events); > > > > *** DEADLOCK *** > > [...] > > > > To fix the original issue, we replace above calling by introducing > > a bit flag. When a namespace cleaned-up, bit flag is set to zero and: > > - tipc_net_finalize functionial just does return immediately. > > - tipc_net_finalize_work does not enqueue into the scheduled work queue. > > Is struct tipc_net not going to be freed right after tipc_exit_net() > returns? In that case you'd be back to UAF if the flag is in this > structure. > I rework the fix with version 2. In there, I use cancel_work_sync() API to cancel the specific tipc_net_finalize_work work. > > @@ -110,10 +111,6 @@ static void __net_exit tipc_exit_net(struct net *net) > > tipc_detach_loopback(net); > > tipc_net_stop(net); > > > > - /* Make sure the tipc_net_finalize_work stopped > > - * before releasing the resources. > > - */ > > - flush_scheduled_work(); > > tipc_bcast_stop(net); > > tipc_nametbl_stop(net); > > tipc_sk_rht_destroy(net); > > @@ -124,6 +121,9 @@ static void __net_exit tipc_exit_net(struct net *net) > > > > static void __net_exit tipc_pernet_pre_exit(struct net *net) > > { > > + struct tipc_net *tn = tipc_net(net); > > + > > + clear_bit_unlock(0, &tn->net_exit_flag); > > tipc_node_pre_cleanup_net(net); > > } > > > > diff --git a/net/tipc/core.h b/net/tipc/core.h > > index 631d83c9705f..aa75882dd932 100644 > > --- a/net/tipc/core.h > > +++ b/net/tipc/core.h > > @@ -143,6 +143,7 @@ struct tipc_net { > > /* TX crypto handler */ > > struct tipc_crypto *crypto_tx; > > #endif > > + unsigned long net_exit_flag; > > }; > > > > static inline struct tipc_net *tipc_net(struct net *net) > > diff --git a/net/tipc/net.c b/net/tipc/net.c > > index 85400e4242de..8ad5b9ad89c0 100644 > > --- a/net/tipc/net.c > > +++ b/net/tipc/net.c > > @@ -132,6 +132,9 @@ static void tipc_net_finalize(struct net *net, u32 addr) > > { > > struct tipc_net *tn = tipc_net(net); > > > > + if (unlikely(!test_bit(0, &tn->net_exit_flag))) > > + return; > > + > > if (cmpxchg(&tn->node_addr, 0, addr)) > > return; > > tipc_set_node_addr(net, addr); > > @@ -153,8 +156,13 @@ static void tipc_net_finalize_work(struct work_struct *work) > > > > void tipc_sched_net_finalize(struct net *net, u32 addr) > > { > > - struct tipc_net_work *fwork = kzalloc(sizeof(*fwork), GFP_ATOMIC); > > + struct tipc_net *tn = tipc_net(net); > > + struct tipc_net_work *fwork; > > + > > + if (unlikely(!test_bit(0, &tn->net_exit_flag))) > > + return; > > > > + fwork = kzalloc(sizeof(*fwork), GFP_ATOMIC); > > if (!fwork) > > return; > > INIT_WORK(&fwork->work, tipc_net_finalize_work);
====================================================== WARNING: possible circular locking dependency detected 5.9.0-rc2-next-20200828-syzkaller #0 Not tainted ------------------------------------------------------ kworker/u4:6/349 is trying to acquire lock: ffff8880aa063d38 ((wq_completion)events){+.+.}-{0:0}, at: flush_workqueue+0xe1/0x13e0 kernel/workqueue.c:2777 but task is already holding lock: ffffffff8a879430 (pernet_ops_rwsem){++++}-{3:3}, at: cleanup_net+0x9b/0xb10 net/core/net_namespace.c:565 [...] Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(pernet_ops_rwsem); lock(&sb->s_type->i_mutex_key#13); lock(pernet_ops_rwsem); lock((wq_completion)events); *** DEADLOCK *** [...] To fix the original issue, we replace above calling by introducing a bit flag. When a namespace cleaned-up, bit flag is set to zero and: - tipc_net_finalize functionial just does return immediately. - tipc_net_finalize_work does not enqueue into the scheduled work queue. Reported-by: syzbot+d5aa7e0385f6a5d0f4fd@syzkaller.appspotmail.com Fixes: fdeba99b1e58 ("tipc: fix use-after-free in tipc_bcast_get_mode") Acked-by: Jon Maloy <jmaloy@redhat.com> Signed-off-by: Hoang Huu Le <hoang.h.le@dektech.com.au> --- net/tipc/core.c | 8 ++++---- net/tipc/core.h | 1 + net/tipc/net.c | 10 +++++++++- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/net/tipc/core.c b/net/tipc/core.c index 37d8695548cf..5e7bb768f45c 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -60,6 +60,7 @@ static int __net_init tipc_init_net(struct net *net) tn->trial_addr = 0; tn->addr_trial_end = 0; tn->capabilities = TIPC_NODE_CAPABILITIES; + test_and_set_bit_lock(0, &tn->net_exit_flag); memset(tn->node_id, 0, sizeof(tn->node_id)); memset(tn->node_id_string, 0, sizeof(tn->node_id_string)); tn->mon_threshold = TIPC_DEF_MON_THRESHOLD; @@ -110,10 +111,6 @@ static void __net_exit tipc_exit_net(struct net *net) tipc_detach_loopback(net); tipc_net_stop(net); - /* Make sure the tipc_net_finalize_work stopped - * before releasing the resources. - */ - flush_scheduled_work(); tipc_bcast_stop(net); tipc_nametbl_stop(net); tipc_sk_rht_destroy(net); @@ -124,6 +121,9 @@ static void __net_exit tipc_exit_net(struct net *net) static void __net_exit tipc_pernet_pre_exit(struct net *net) { + struct tipc_net *tn = tipc_net(net); + + clear_bit_unlock(0, &tn->net_exit_flag); tipc_node_pre_cleanup_net(net); } diff --git a/net/tipc/core.h b/net/tipc/core.h index 631d83c9705f..aa75882dd932 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -143,6 +143,7 @@ struct tipc_net { /* TX crypto handler */ struct tipc_crypto *crypto_tx; #endif + unsigned long net_exit_flag; }; static inline struct tipc_net *tipc_net(struct net *net) diff --git a/net/tipc/net.c b/net/tipc/net.c index 85400e4242de..8ad5b9ad89c0 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -132,6 +132,9 @@ static void tipc_net_finalize(struct net *net, u32 addr) { struct tipc_net *tn = tipc_net(net); + if (unlikely(!test_bit(0, &tn->net_exit_flag))) + return; + if (cmpxchg(&tn->node_addr, 0, addr)) return; tipc_set_node_addr(net, addr); @@ -153,8 +156,13 @@ static void tipc_net_finalize_work(struct work_struct *work) void tipc_sched_net_finalize(struct net *net, u32 addr) { - struct tipc_net_work *fwork = kzalloc(sizeof(*fwork), GFP_ATOMIC); + struct tipc_net *tn = tipc_net(net); + struct tipc_net_work *fwork; + + if (unlikely(!test_bit(0, &tn->net_exit_flag))) + return; + fwork = kzalloc(sizeof(*fwork), GFP_ATOMIC); if (!fwork) return; INIT_WORK(&fwork->work, tipc_net_finalize_work);