Message ID | tencent_97BC6EC36EF24C91A7E7C6DFD2C106688906@qq.com |
---|---|
State | New |
Headers | show |
Series | hwrng: core - fix task hung in hwrng_fillfn | expand |
Edward Adam Davis <eadavis@qq.com> wrote: > > diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c > index 420f155d251f..7323ddc958ce 100644 > --- a/drivers/char/hw_random/core.c > +++ b/drivers/char/hw_random/core.c > @@ -225,17 +225,18 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf, > goto out; > } > > - if (mutex_lock_interruptible(&reading_mutex)) { > - err = -ERESTARTSYS; > - goto out_put; > - } > if (!data_avail) { > + if (mutex_lock_interruptible(&reading_mutex)) { > + err = -ERESTARTSYS; > + goto out_put; > + } > bytes_read = rng_get_data(rng, rng_buffer, > rng_buffer_size(), > !(filp->f_flags & O_NONBLOCK)); > + mutex_unlock(&reading_mutex); > if (bytes_read < 0) { > err = bytes_read; > - goto out_unlock_reading; > + goto out_put; > } > data_avail = bytes_read; > } Does this change anything at all? Please explain why it was holding this lock for 143 seconds in the first place. If it's doing it in rng_get_data, then your change has zero effect. > @@ -501,7 +499,10 @@ static int hwrng_fillfn(void *unused) > rng = get_current_rng(); > if (IS_ERR(rng) || !rng) > break; > - mutex_lock(&reading_mutex); > + if (mutex_lock_interruptible(&reading_mutex)) { > + put_rng(rng); > + return -ERESTARTSYS; > + } No this is just the symptom. The real problem is why is the driver spending 143 seconds in rng_get_data? Cheers,
On Fri, 1 Dec 2023 17:56:59 +0800, Herbert Xu <herbert@gondor.apana.org.au> wrote: > > diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c > > index 420f155d251f..7323ddc958ce 100644 > > --- a/drivers/char/hw_random/core.c > > +++ b/drivers/char/hw_random/core.c > > @@ -225,17 +225,18 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf, > > goto out; > > } > > > > - if (mutex_lock_interruptible(&reading_mutex)) { > > - err = -ERESTARTSYS; > > - goto out_put; > > - } > > if (!data_avail) { > > + if (mutex_lock_interruptible(&reading_mutex)) { > > + err = -ERESTARTSYS; > > + goto out_put; > > + } > > bytes_read = rng_get_data(rng, rng_buffer, > > rng_buffer_size(), > > !(filp->f_flags & O_NONBLOCK)); > > + mutex_unlock(&reading_mutex); > > if (bytes_read < 0) { > > err = bytes_read; > > - goto out_unlock_reading; > > + goto out_put; > > } > > data_avail = bytes_read; > > } > > Does this change anything at all? Please explain why it was holding > this lock for 143 seconds in the first place. If it's doing it in > rng_get_data, then your change has zero effect. Reduce the scope of critical zone protection. The original critical zone contains a too large range, especially like copy_to_user() should not be included in the critical zone. > > > @@ -501,7 +499,10 @@ static int hwrng_fillfn(void *unused) > > rng = get_current_rng(); > > if (IS_ERR(rng) || !rng) > > break; > > - mutex_lock(&reading_mutex); > > + if (mutex_lock_interruptible(&reading_mutex)) { > > + put_rng(rng); > > + return -ERESTARTSYS; > > + } > > No this is just the symptom. The real problem is why is the driver > spending 143 seconds in rng_get_data? In the second version of the patch, I have removed the fix in hwrng_fillfn(). But for some reason, the V2 patch did not appear in the mailing list. I think it was due to consuming too much time while executing copy_to_user() that resulted in 143s. So, I narrowed down the scope of the critical area and moved the code copy_to_user() out of the critical area. Edward
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 420f155d251f..7323ddc958ce 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -225,17 +225,18 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf, goto out; } - if (mutex_lock_interruptible(&reading_mutex)) { - err = -ERESTARTSYS; - goto out_put; - } if (!data_avail) { + if (mutex_lock_interruptible(&reading_mutex)) { + err = -ERESTARTSYS; + goto out_put; + } bytes_read = rng_get_data(rng, rng_buffer, rng_buffer_size(), !(filp->f_flags & O_NONBLOCK)); + mutex_unlock(&reading_mutex); if (bytes_read < 0) { err = bytes_read; - goto out_unlock_reading; + goto out_put; } data_avail = bytes_read; } @@ -243,7 +244,7 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf, if (!data_avail) { if (filp->f_flags & O_NONBLOCK) { err = -EAGAIN; - goto out_unlock_reading; + goto out_put; } } else { len = data_avail; @@ -255,14 +256,13 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf, if (copy_to_user(buf + ret, rng_buffer + data_avail, len)) { err = -EFAULT; - goto out_unlock_reading; + goto out_put; } size -= len; ret += len; } - mutex_unlock(&reading_mutex); put_rng(rng); if (need_resched()) @@ -276,8 +276,6 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf, out: return ret ? : err; -out_unlock_reading: - mutex_unlock(&reading_mutex); out_put: put_rng(rng); goto out; @@ -501,7 +499,10 @@ static int hwrng_fillfn(void *unused) rng = get_current_rng(); if (IS_ERR(rng) || !rng) break; - mutex_lock(&reading_mutex); + if (mutex_lock_interruptible(&reading_mutex)) { + put_rng(rng); + return -ERESTARTSYS; + } rc = rng_get_data(rng, rng_fillbuf, rng_buffer_size(), 1); if (current_quality != rng->quality)
[Syz repo] INFO: task hwrng:749 blocked for more than 143 seconds. Not tainted 6.7.0-rc2-syzkaller #0 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:hwrng state:D stack:29040 pid:749 tgid:749 ppid:2 flags:0x00004000 Call Trace: <TASK> context_switch kernel/sched/core.c:5376 [inline] __schedule+0xedb/0x5af0 kernel/sched/core.c:6688 __schedule_loop kernel/sched/core.c:6763 [inline] schedule+0xe9/0x270 kernel/sched/core.c:6778 schedule_preempt_disabled+0x13/0x20 kernel/sched/core.c:6835 __mutex_lock_common kernel/locking/mutex.c:679 [inline] __mutex_lock+0x5b9/0x9d0 kernel/locking/mutex.c:747 hwrng_fillfn+0x145/0x430 drivers/char/hw_random/core.c:504 kthread+0x2c6/0x3a0 kernel/kthread.c:388 ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:242 </TASK> ... RIP: 0010:rep_movs_alternative+0x57/0x70 arch/x86/lib/copy_user_64.S:80 Code: 00 66 90 48 8b 06 48 89 07 48 83 c6 08 48 83 c7 08 83 e9 08 74 df 83 f9 08 73 e8 eb c9 eb 01 c3 48 89 c8 48 c1 e9 03 83 e0 07 <f3> 48 a5 89 c1 85 c9 75 b3 c3 48 8d 0c c8 eb ac 66 0f 1f 84 00 00 RSP: 0018:ffffc90004427bb0 EFLAGS: 00050246 RAX: 0000000000000000 RBX: 0000000000000040 RCX: 0000000000000008 RDX: ffffed1028a4ab48 RSI: ffff888145255a00 RDI: 0000000020019980 RBP: 0000000020019980 R08: 0000000000000000 R09: ffffed1028a4ab47 R10: ffff888145255a3f R11: 0000000000000001 R12: ffff888145255a00 R13: 00000000200199c0 R14: 0000000000000000 R15: dffffc0000000000 copy_user_generic arch/x86/include/asm/uaccess_64.h:112 [inline] raw_copy_to_user arch/x86/include/asm/uaccess_64.h:133 [inline] _copy_to_user lib/usercopy.c:41 [inline] _copy_to_user+0xa8/0xb0 lib/usercopy.c:34 copy_to_user include/linux/uaccess.h:191 [inline] rng_dev_read+0x184/0x580 drivers/char/hw_random/core.c:255 do_loop_readv_writev fs/read_write.c:755 [inline] do_loop_readv_writev fs/read_write.c:743 [inline] do_iter_read+0x567/0x830 fs/read_write.c:797 vfs_readv+0x12d/0x1a0 fs/read_write.c:915 do_preadv fs/read_write.c:1007 [inline] __do_sys_preadv fs/read_write.c:1057 [inline] __se_sys_preadv fs/read_write.c:1052 [inline] __x64_sys_preadv+0x228/0x300 fs/read_write.c:1052 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x40/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b [Analysis] The lock reading_mutex in rng_dev_read() has been occupied for too long, causing the thread callback function hwrng_fillfn() to wait for a timeout. [Fix] Move code that does not require this lock protection out of the critical area. Reported-and-tested-by: syzbot+c52ab18308964d248092@syzkaller.appspotmail.com Signed-off-by: Edward Adam Davis <eadavis@qq.com> --- drivers/char/hw_random/core.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-)