Message ID | 20220831172024.1613208-2-svenva@chromium.org |
---|---|
State | Superseded |
Headers | show |
Series | [v1,1/2] random: make add_hwgenerator_randomness() more flexible | expand |
Any feedback on this patch, good/bad, any suggestions? Herbert, this patch should fix hw_random/core suspend/resume issues, without having to freeze the kthread - which should avoid the freeze regression problems that popped up. See earlier kthread freeze attempt here: https://lore.kernel.org/all/4a45b3e0-ed3a-61d3-bfc6-957c7ba631bb@maciej.szmigiero.name/T/#m2c37e2c176c4efc362116b57493749664b960f45 I was hoping you could take a look. On Wed, Aug 31, 2022 at 1:20 PM Sven van Ashbrook <svenva@chromium.org> wrote: > > The hwrng fill function runs as a normal kthread. This thread > doesn't get frozen by the PM, i.e. it will keep running during, > or in, system suspend. It may call the client driver's > data_present()/data_read() functions during, or in, suspend; > which may generate errors or warnings. For example, if the > client driver uses an i2c bus, the following warning may be > intermittently generated: > > i2c: Transfer while suspended > > Fix by converting the delay polled kthread into an ordered work > queue running a single, self-rearming delayed_work. Make the > workqueue WQ_FREEZABLE, so the PM will drain any work items > before going into suspend. This prevents client drivers from > being accessed during, or in, suspend. > > Tested on a Chromebook containing an cr50 tpm over i2c. The test > consists of 31000 suspend/resume cycles. Occasional > "i2c: Transfer while suspended" warnings are seen. After applying > this patch, these warnings disappear. > > This patch also does not appear to cause any regressions on the > ChromeOS test queues. > > Signed-off-by: Sven van Ashbrook <svenva@chromium.org> > --- > > drivers/char/hw_random/core.c | 95 +++++++++++++++++++---------------- > 1 file changed, 51 insertions(+), 44 deletions(-) > > diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c > index 3675122c6cce..ee85ca97d215 100644 > --- a/drivers/char/hw_random/core.c > +++ b/drivers/char/hw_random/core.c > @@ -17,7 +17,7 @@ > #include <linux/hw_random.h> > #include <linux/random.h> > #include <linux/kernel.h> > -#include <linux/kthread.h> > +#include <linux/workqueue.h> > #include <linux/sched/signal.h> > #include <linux/miscdevice.h> > #include <linux/module.h> > @@ -28,14 +28,17 @@ > > #define RNG_MODULE_NAME "hw_random" > > -static struct hwrng *current_rng; > /* the current rng has been explicitly chosen by user via sysfs */ > static int cur_rng_set_by_user; > -static struct task_struct *hwrng_fill; > +static struct workqueue_struct *hwrng_wq; > +static struct delayed_work hwrng_fill_dwork; > +static size_t entropy_credit; > +/* Protects rng_list, current_rng, is_hwrng_wq_running */ > +static DEFINE_MUTEX(rng_mutex); > /* list of registered rngs */ > static LIST_HEAD(rng_list); > -/* Protects rng_list and current_rng */ > -static DEFINE_MUTEX(rng_mutex); > +static struct hwrng *current_rng; > +static bool is_hwrng_wq_running; > /* Protects rng read functions, data_avail, rng_buffer and rng_fillbuf */ > static DEFINE_MUTEX(reading_mutex); > static int data_avail; > @@ -488,37 +491,29 @@ static int __init register_miscdev(void) > return misc_register(&rng_miscdev); > } > > -static int hwrng_fillfn(void *unused) > +static void hwrng_fillfn(struct work_struct *unused) > { > - size_t entropy, entropy_credit = 0; /* in 1/1024 of a bit */ > + unsigned short quality; > unsigned long delay; > + struct hwrng *rng; > + size_t entropy; /* in 1/1024 of a bit */ > long rc; > > - while (!kthread_should_stop()) { > - unsigned short quality; > - struct hwrng *rng; > - > - rng = get_current_rng(); > - if (IS_ERR(rng) || !rng) > - break; > - mutex_lock(&reading_mutex); > - rc = rng_get_data(rng, rng_fillbuf, > - rng_buffer_size(), 1); > - if (current_quality != rng->quality) > - rng->quality = current_quality; /* obsolete */ > - quality = rng->quality; > - mutex_unlock(&reading_mutex); > - put_rng(rng); > - > - if (!quality) > - break; > + rng = get_current_rng(); > + if (IS_ERR(rng) || !rng) > + return; > + mutex_lock(&reading_mutex); > + rc = rng_get_data(rng, rng_fillbuf, rng_buffer_size(), 1); > + if (current_quality != rng->quality) > + rng->quality = current_quality; /* obsolete */ > + quality = rng->quality; > + mutex_unlock(&reading_mutex); > + put_rng(rng); > > - if (rc <= 0) { > - pr_warn("hwrng: no data available\n"); > - msleep_interruptible(10000); > - continue; > - } > + if (!quality) > + return; > > + if (rc > 0) { > /* If we cannot credit at least one bit of entropy, > * keep track of the remainder for the next iteration > */ > @@ -529,11 +524,11 @@ static int hwrng_fillfn(void *unused) > /* Outside lock, sure, but y'know: randomness. */ > delay = add_hwgenerator_randomness((void *)rng_fillbuf, rc, > entropy >> 10); > - if (delay > 0) > - schedule_timeout_interruptible(delay); > + } else { > + pr_warn("hwrng: no data available\n"); > + delay = 10 * HZ; > } > - hwrng_fill = NULL; > - return 0; > + mod_delayed_work(hwrng_wq, &hwrng_fill_dwork, delay); > } > > static void hwrng_manage_rngd(struct hwrng *rng) > @@ -541,14 +536,12 @@ static void hwrng_manage_rngd(struct hwrng *rng) > if (WARN_ON(!mutex_is_locked(&rng_mutex))) > return; > > - if (rng->quality == 0 && hwrng_fill) > - kthread_stop(hwrng_fill); > - if (rng->quality > 0 && !hwrng_fill) { > - hwrng_fill = kthread_run(hwrng_fillfn, NULL, "hwrng"); > - if (IS_ERR(hwrng_fill)) { > - pr_err("hwrng_fill thread creation failed\n"); > - hwrng_fill = NULL; > - } > + if (rng->quality == 0 && is_hwrng_wq_running) { > + cancel_delayed_work(&hwrng_fill_dwork); > + is_hwrng_wq_running = false; > + } else if (rng->quality > 0 && !is_hwrng_wq_running) { > + mod_delayed_work(hwrng_wq, &hwrng_fill_dwork, 0); > + is_hwrng_wq_running = true; > } > } > > @@ -631,8 +624,7 @@ void hwrng_unregister(struct hwrng *rng) > new_rng = get_current_rng_nolock(); > if (list_empty(&rng_list)) { > mutex_unlock(&rng_mutex); > - if (hwrng_fill) > - kthread_stop(hwrng_fill); > + cancel_delayed_work_sync(&hwrng_fill_dwork); > } else > mutex_unlock(&rng_mutex); > > @@ -703,17 +695,32 @@ static int __init hwrng_modinit(void) > return -ENOMEM; > } > > + /* ordered wq to mimic delay-polled kthread behaviour */ > + hwrng_wq = alloc_ordered_workqueue("hwrng", > + WQ_FREEZABLE | /* prevent work from running during suspend/resume */ > + WQ_MEM_RECLAIM /* client drivers may need memory reclaim */ > + ); > + if (!hwrng_wq) { > + kfree(rng_fillbuf); > + kfree(rng_buffer); > + return -ENOMEM; > + } > + > ret = register_miscdev(); > if (ret) { > + destroy_workqueue(hwrng_wq); > kfree(rng_fillbuf); > kfree(rng_buffer); > } > > + INIT_DELAYED_WORK(&hwrng_fill_dwork, hwrng_fillfn); > + > return ret; > } > > static void __exit hwrng_modexit(void) > { > + destroy_workqueue(hwrng_wq); > mutex_lock(&rng_mutex); > BUG_ON(current_rng); > kfree(rng_buffer); > -- > 2.37.2.672.g94769d06f0-goog >
On Wed, Aug 31, 2022 at 05:20:24PM +0000, Sven van Ashbrook wrote: > The hwrng fill function runs as a normal kthread. This thread > doesn't get frozen by the PM, i.e. it will keep running during, > or in, system suspend. It may call the client driver's > data_present()/data_read() functions during, or in, suspend; > which may generate errors or warnings. For example, if the > client driver uses an i2c bus, the following warning may be > intermittently generated: > > i2c: Transfer while suspended > > Fix by converting the delay polled kthread into an ordered work > queue running a single, self-rearming delayed_work. Make the > workqueue WQ_FREEZABLE, so the PM will drain any work items > before going into suspend. This prevents client drivers from > being accessed during, or in, suspend. > > Tested on a Chromebook containing an cr50 tpm over i2c. The test > consists of 31000 suspend/resume cycles. Occasional > "i2c: Transfer while suspended" warnings are seen. After applying > this patch, these warnings disappear. > > This patch also does not appear to cause any regressions on the > ChromeOS test queues. > > Signed-off-by: Sven van Ashbrook <svenva@chromium.org> The general concept seems to be fine. > - if (rc <= 0) { > - pr_warn("hwrng: no data available\n"); > - msleep_interruptible(10000); > - continue; > - } > + if (!quality) > + return; > > + if (rc > 0) { > /* If we cannot credit at least one bit of entropy, > * keep track of the remainder for the next iteration > */ You need to refresh your patch-set against the latest tree. This function has changed quite a bit. > @@ -541,14 +536,12 @@ static void hwrng_manage_rngd(struct hwrng *rng) > if (WARN_ON(!mutex_is_locked(&rng_mutex))) > return; > > - if (rng->quality == 0 && hwrng_fill) > - kthread_stop(hwrng_fill); > - if (rng->quality > 0 && !hwrng_fill) { > - hwrng_fill = kthread_run(hwrng_fillfn, NULL, "hwrng"); > - if (IS_ERR(hwrng_fill)) { > - pr_err("hwrng_fill thread creation failed\n"); > - hwrng_fill = NULL; > - } > + if (rng->quality == 0 && is_hwrng_wq_running) { > + cancel_delayed_work(&hwrng_fill_dwork); > + is_hwrng_wq_running = false; > + } else if (rng->quality > 0 && !is_hwrng_wq_running) { > + mod_delayed_work(hwrng_wq, &hwrng_fill_dwork, 0); > + is_hwrng_wq_running = true; > } > } > > @@ -631,8 +624,7 @@ void hwrng_unregister(struct hwrng *rng) > new_rng = get_current_rng_nolock(); > if (list_empty(&rng_list)) { > mutex_unlock(&rng_mutex); > - if (hwrng_fill) > - kthread_stop(hwrng_fill); > + cancel_delayed_work_sync(&hwrng_fill_dwork); What if hwrng_manage_rngd races against hwrng_unregister? Thanks,
Hi Herbert, On Wed, Sep 7, 2022 at 2:29 AM Herbert Xu <herbert@gondor.apana.org.au> wrote: > > The general concept seems to be fine. Thank you kindly for the fast review. I plan to get a v2 out in the next few days.
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 3675122c6cce..ee85ca97d215 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -17,7 +17,7 @@ #include <linux/hw_random.h> #include <linux/random.h> #include <linux/kernel.h> -#include <linux/kthread.h> +#include <linux/workqueue.h> #include <linux/sched/signal.h> #include <linux/miscdevice.h> #include <linux/module.h> @@ -28,14 +28,17 @@ #define RNG_MODULE_NAME "hw_random" -static struct hwrng *current_rng; /* the current rng has been explicitly chosen by user via sysfs */ static int cur_rng_set_by_user; -static struct task_struct *hwrng_fill; +static struct workqueue_struct *hwrng_wq; +static struct delayed_work hwrng_fill_dwork; +static size_t entropy_credit; +/* Protects rng_list, current_rng, is_hwrng_wq_running */ +static DEFINE_MUTEX(rng_mutex); /* list of registered rngs */ static LIST_HEAD(rng_list); -/* Protects rng_list and current_rng */ -static DEFINE_MUTEX(rng_mutex); +static struct hwrng *current_rng; +static bool is_hwrng_wq_running; /* Protects rng read functions, data_avail, rng_buffer and rng_fillbuf */ static DEFINE_MUTEX(reading_mutex); static int data_avail; @@ -488,37 +491,29 @@ static int __init register_miscdev(void) return misc_register(&rng_miscdev); } -static int hwrng_fillfn(void *unused) +static void hwrng_fillfn(struct work_struct *unused) { - size_t entropy, entropy_credit = 0; /* in 1/1024 of a bit */ + unsigned short quality; unsigned long delay; + struct hwrng *rng; + size_t entropy; /* in 1/1024 of a bit */ long rc; - while (!kthread_should_stop()) { - unsigned short quality; - struct hwrng *rng; - - rng = get_current_rng(); - if (IS_ERR(rng) || !rng) - break; - mutex_lock(&reading_mutex); - rc = rng_get_data(rng, rng_fillbuf, - rng_buffer_size(), 1); - if (current_quality != rng->quality) - rng->quality = current_quality; /* obsolete */ - quality = rng->quality; - mutex_unlock(&reading_mutex); - put_rng(rng); - - if (!quality) - break; + rng = get_current_rng(); + if (IS_ERR(rng) || !rng) + return; + mutex_lock(&reading_mutex); + rc = rng_get_data(rng, rng_fillbuf, rng_buffer_size(), 1); + if (current_quality != rng->quality) + rng->quality = current_quality; /* obsolete */ + quality = rng->quality; + mutex_unlock(&reading_mutex); + put_rng(rng); - if (rc <= 0) { - pr_warn("hwrng: no data available\n"); - msleep_interruptible(10000); - continue; - } + if (!quality) + return; + if (rc > 0) { /* If we cannot credit at least one bit of entropy, * keep track of the remainder for the next iteration */ @@ -529,11 +524,11 @@ static int hwrng_fillfn(void *unused) /* Outside lock, sure, but y'know: randomness. */ delay = add_hwgenerator_randomness((void *)rng_fillbuf, rc, entropy >> 10); - if (delay > 0) - schedule_timeout_interruptible(delay); + } else { + pr_warn("hwrng: no data available\n"); + delay = 10 * HZ; } - hwrng_fill = NULL; - return 0; + mod_delayed_work(hwrng_wq, &hwrng_fill_dwork, delay); } static void hwrng_manage_rngd(struct hwrng *rng) @@ -541,14 +536,12 @@ static void hwrng_manage_rngd(struct hwrng *rng) if (WARN_ON(!mutex_is_locked(&rng_mutex))) return; - if (rng->quality == 0 && hwrng_fill) - kthread_stop(hwrng_fill); - if (rng->quality > 0 && !hwrng_fill) { - hwrng_fill = kthread_run(hwrng_fillfn, NULL, "hwrng"); - if (IS_ERR(hwrng_fill)) { - pr_err("hwrng_fill thread creation failed\n"); - hwrng_fill = NULL; - } + if (rng->quality == 0 && is_hwrng_wq_running) { + cancel_delayed_work(&hwrng_fill_dwork); + is_hwrng_wq_running = false; + } else if (rng->quality > 0 && !is_hwrng_wq_running) { + mod_delayed_work(hwrng_wq, &hwrng_fill_dwork, 0); + is_hwrng_wq_running = true; } } @@ -631,8 +624,7 @@ void hwrng_unregister(struct hwrng *rng) new_rng = get_current_rng_nolock(); if (list_empty(&rng_list)) { mutex_unlock(&rng_mutex); - if (hwrng_fill) - kthread_stop(hwrng_fill); + cancel_delayed_work_sync(&hwrng_fill_dwork); } else mutex_unlock(&rng_mutex); @@ -703,17 +695,32 @@ static int __init hwrng_modinit(void) return -ENOMEM; } + /* ordered wq to mimic delay-polled kthread behaviour */ + hwrng_wq = alloc_ordered_workqueue("hwrng", + WQ_FREEZABLE | /* prevent work from running during suspend/resume */ + WQ_MEM_RECLAIM /* client drivers may need memory reclaim */ + ); + if (!hwrng_wq) { + kfree(rng_fillbuf); + kfree(rng_buffer); + return -ENOMEM; + } + ret = register_miscdev(); if (ret) { + destroy_workqueue(hwrng_wq); kfree(rng_fillbuf); kfree(rng_buffer); } + INIT_DELAYED_WORK(&hwrng_fill_dwork, hwrng_fillfn); + return ret; } static void __exit hwrng_modexit(void) { + destroy_workqueue(hwrng_wq); mutex_lock(&rng_mutex); BUG_ON(current_rng); kfree(rng_buffer);
The hwrng fill function runs as a normal kthread. This thread doesn't get frozen by the PM, i.e. it will keep running during, or in, system suspend. It may call the client driver's data_present()/data_read() functions during, or in, suspend; which may generate errors or warnings. For example, if the client driver uses an i2c bus, the following warning may be intermittently generated: i2c: Transfer while suspended Fix by converting the delay polled kthread into an ordered work queue running a single, self-rearming delayed_work. Make the workqueue WQ_FREEZABLE, so the PM will drain any work items before going into suspend. This prevents client drivers from being accessed during, or in, suspend. Tested on a Chromebook containing an cr50 tpm over i2c. The test consists of 31000 suspend/resume cycles. Occasional "i2c: Transfer while suspended" warnings are seen. After applying this patch, these warnings disappear. This patch also does not appear to cause any regressions on the ChromeOS test queues. Signed-off-by: Sven van Ashbrook <svenva@chromium.org> --- drivers/char/hw_random/core.c | 95 +++++++++++++++++++---------------- 1 file changed, 51 insertions(+), 44 deletions(-)