Message ID | 20240202-fix-device-links-overlays-v1-1-f9fd1404c8e2@analog.com |
---|---|
State | Superseded |
Headers | show |
Series | fix DT overlays when device links are released | expand |
On Fri, Feb 2, 2024 at 1:18 PM Nuno Sa via B4 Relay <devnull+nuno.sa.analog.com@kernel.org> wrote: > > From: Nuno Sa <nuno.sa@analog.com> > > Let's use a dedicated queue for devlinks since releasing a link happens > asynchronously but some code paths, like DT overlays, have some > expectations regarding the of_node when being removed (the refcount must > be 1). Given how devlinks are released that cannot be assured. Hence, add a > dedicated queue so that it's easy to sync against devlinks removal. Thanks for following my suggestion! > While at it, make sure to explicitly include <linux/workqueue.h>. > > Signed-off-by: Nuno Sa <nuno.sa@analog.com> > --- > drivers/base/core.c | 33 +++++++++++++++++++++++++++++---- > include/linux/fwnode.h | 1 + > 2 files changed, 30 insertions(+), 4 deletions(-) > > diff --git a/drivers/base/core.c b/drivers/base/core.c > index 14d46af40f9a..06e7766b5227 100644 > --- a/drivers/base/core.c > +++ b/drivers/base/core.c > @@ -31,6 +31,7 @@ > #include <linux/swiotlb.h> > #include <linux/sysfs.h> > #include <linux/dma-map-ops.h> /* for dma_default_coherent */ > +#include <linux/workqueue.h> > > #include "base.h" > #include "physical_location.h" > @@ -44,6 +45,7 @@ static bool fw_devlink_is_permissive(void); > static void __fw_devlink_link_to_consumers(struct device *dev); > static bool fw_devlink_drv_reg_done; > static bool fw_devlink_best_effort; > +static struct workqueue_struct *devlink_release_queue __ro_after_init; > > /** > * __fwnode_link_add - Create a link between two fwnode_handles. > @@ -235,6 +237,11 @@ static void __fw_devlink_pickup_dangling_consumers(struct fwnode_handle *fwnode, > __fw_devlink_pickup_dangling_consumers(child, new_sup); > } > > +void fwnode_links_flush_queue(void) > +{ > + flush_workqueue(devlink_release_queue); > +} > + > static DEFINE_MUTEX(device_links_lock); > DEFINE_STATIC_SRCU(device_links_srcu); > > @@ -531,9 +538,10 @@ static void devlink_dev_release(struct device *dev) > * It may take a while to complete this work because of the SRCU > * synchronization in device_link_release_fn() and if the consumer or > * supplier devices get deleted when it runs, so put it into the "long" > - * workqueue. > + * devlink workqueue. > + * > */ > - queue_work(system_long_wq, &link->rm_work); > + queue_work(devlink_release_queue, &link->rm_work); > } > > static struct class devlink_class = { > @@ -636,10 +644,27 @@ static int __init devlink_class_init(void) > return ret; > > ret = class_interface_register(&devlink_class_intf); > - if (ret) > + if (ret) { > + class_unregister(&devlink_class); > + return ret; > + } > + > + /* > + * Using a dedicated queue for devlinks since releasing a link happens > + * asynchronously but some code paths, like DT overlays, have some > + * expectations regarding the of_node when being removed (the refcount > + * must be 1). Given how devlinks are released that cannot be assured. > + * Hence, add a dedicated queue so that it's easy to sync against > + * devlinks removal. > + */ > + devlink_release_queue = alloc_workqueue("devlink_release", 0, 0); > + if (!devlink_release_queue) { > + class_interface_unregister(&devlink_class_intf); > class_unregister(&devlink_class); This is a bit drastic. I think that device links can still work if devlink_release_queue is NULL, just devlink_dev_release() needs to check it and release synchronously if it is NULL. Apart from this LGTM. > + return -ENODEV; > + } > > - return ret; > + return 0; > } > postcore_initcall(devlink_class_init); > > diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h > index 2a72f55d26eb..017b170e9903 100644 > --- a/include/linux/fwnode.h > +++ b/include/linux/fwnode.h > @@ -213,5 +213,6 @@ extern bool fw_devlink_is_strict(void); > int fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup); > void fwnode_links_purge(struct fwnode_handle *fwnode); > void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode); > +void fwnode_links_flush_queue(void); > > #endif > > --
On Fri, 2024-02-02 at 16:59 +0100, Rafael J. Wysocki wrote: > On Fri, Feb 2, 2024 at 1:18 PM Nuno Sa via B4 Relay > <devnull+nuno.sa.analog.com@kernel.org> wrote: > > > > From: Nuno Sa <nuno.sa@analog.com> > > > > Let's use a dedicated queue for devlinks since releasing a link happens > > asynchronously but some code paths, like DT overlays, have some > > expectations regarding the of_node when being removed (the refcount must > > be 1). Given how devlinks are released that cannot be assured. Hence, add a > > dedicated queue so that it's easy to sync against devlinks removal. > > Thanks for following my suggestion! > > > While at it, make sure to explicitly include <linux/workqueue.h>. > > > > Signed-off-by: Nuno Sa <nuno.sa@analog.com> > > --- > > drivers/base/core.c | 33 +++++++++++++++++++++++++++++---- > > include/linux/fwnode.h | 1 + > > 2 files changed, 30 insertions(+), 4 deletions(-) > > > > diff --git a/drivers/base/core.c b/drivers/base/core.c > > index 14d46af40f9a..06e7766b5227 100644 > > --- a/drivers/base/core.c > > +++ b/drivers/base/core.c > > @@ -31,6 +31,7 @@ > > #include <linux/swiotlb.h> > > #include <linux/sysfs.h> > > #include <linux/dma-map-ops.h> /* for dma_default_coherent */ > > +#include <linux/workqueue.h> > > > > #include "base.h" > > #include "physical_location.h" > > @@ -44,6 +45,7 @@ static bool fw_devlink_is_permissive(void); > > static void __fw_devlink_link_to_consumers(struct device *dev); > > static bool fw_devlink_drv_reg_done; > > static bool fw_devlink_best_effort; > > +static struct workqueue_struct *devlink_release_queue __ro_after_init; > > > > /** > > * __fwnode_link_add - Create a link between two fwnode_handles. > > @@ -235,6 +237,11 @@ static void __fw_devlink_pickup_dangling_consumers(struct > > fwnode_handle *fwnode, > > __fw_devlink_pickup_dangling_consumers(child, new_sup); > > } > > > > +void fwnode_links_flush_queue(void) > > +{ > > + flush_workqueue(devlink_release_queue); > > +} > > + > > static DEFINE_MUTEX(device_links_lock); > > DEFINE_STATIC_SRCU(device_links_srcu); > > > > @@ -531,9 +538,10 @@ static void devlink_dev_release(struct device *dev) > > * It may take a while to complete this work because of the SRCU > > * synchronization in device_link_release_fn() and if the consumer or > > * supplier devices get deleted when it runs, so put it into the "long" > > - * workqueue. > > + * devlink workqueue. > > + * > > */ > > - queue_work(system_long_wq, &link->rm_work); > > + queue_work(devlink_release_queue, &link->rm_work); > > } > > > > static struct class devlink_class = { > > @@ -636,10 +644,27 @@ static int __init devlink_class_init(void) > > return ret; > > > > ret = class_interface_register(&devlink_class_intf); > > - if (ret) > > + if (ret) { > > + class_unregister(&devlink_class); > > + return ret; > > + } > > + > > + /* > > + * Using a dedicated queue for devlinks since releasing a link happens > > + * asynchronously but some code paths, like DT overlays, have some > > + * expectations regarding the of_node when being removed (the refcount > > + * must be 1). Given how devlinks are released that cannot be assured. > > + * Hence, add a dedicated queue so that it's easy to sync against > > + * devlinks removal. > > + */ > > + devlink_release_queue = alloc_workqueue("devlink_release", 0, 0); > > + if (!devlink_release_queue) { > > + class_interface_unregister(&devlink_class_intf); > > class_unregister(&devlink_class); > > This is a bit drastic. > > I think that device links can still work if devlink_release_queue is > NULL, just devlink_dev_release() needs to check it and release > synchronously if it is NULL. > Agreed, I'll do that way. It will always synchronously remove the links (which is different than before) but I guess that failing in allocating the queue is rather unlikely. - Nuno Sá
On Mon, Feb 5, 2024 at 9:29 AM Nuno Sá <noname.nuno@gmail.com> wrote: > > On Fri, 2024-02-02 at 16:59 +0100, Rafael J. Wysocki wrote: > > On Fri, Feb 2, 2024 at 1:18 PM Nuno Sa via B4 Relay > > <devnull+nuno.sa.analog.com@kernel.org> wrote: > > > > > > From: Nuno Sa <nuno.sa@analog.com> > > > > > > Let's use a dedicated queue for devlinks since releasing a link happens > > > asynchronously but some code paths, like DT overlays, have some > > > expectations regarding the of_node when being removed (the refcount must > > > be 1). Given how devlinks are released that cannot be assured. Hence, add a > > > dedicated queue so that it's easy to sync against devlinks removal. > > > > Thanks for following my suggestion! > > > > > While at it, make sure to explicitly include <linux/workqueue.h>. > > > > > > Signed-off-by: Nuno Sa <nuno.sa@analog.com> > > > --- > > > drivers/base/core.c | 33 +++++++++++++++++++++++++++++---- > > > include/linux/fwnode.h | 1 + > > > 2 files changed, 30 insertions(+), 4 deletions(-) > > > > > > diff --git a/drivers/base/core.c b/drivers/base/core.c > > > index 14d46af40f9a..06e7766b5227 100644 > > > --- a/drivers/base/core.c > > > +++ b/drivers/base/core.c > > > @@ -31,6 +31,7 @@ > > > #include <linux/swiotlb.h> > > > #include <linux/sysfs.h> > > > #include <linux/dma-map-ops.h> /* for dma_default_coherent */ > > > +#include <linux/workqueue.h> > > > > > > #include "base.h" > > > #include "physical_location.h" > > > @@ -44,6 +45,7 @@ static bool fw_devlink_is_permissive(void); > > > static void __fw_devlink_link_to_consumers(struct device *dev); > > > static bool fw_devlink_drv_reg_done; > > > static bool fw_devlink_best_effort; > > > +static struct workqueue_struct *devlink_release_queue __ro_after_init; > > > > > > /** > > > * __fwnode_link_add - Create a link between two fwnode_handles. > > > @@ -235,6 +237,11 @@ static void __fw_devlink_pickup_dangling_consumers(struct > > > fwnode_handle *fwnode, > > > __fw_devlink_pickup_dangling_consumers(child, new_sup); > > > } > > > > > > +void fwnode_links_flush_queue(void) > > > +{ > > > + flush_workqueue(devlink_release_queue); > > > +} > > > + > > > static DEFINE_MUTEX(device_links_lock); > > > DEFINE_STATIC_SRCU(device_links_srcu); > > > > > > @@ -531,9 +538,10 @@ static void devlink_dev_release(struct device *dev) > > > * It may take a while to complete this work because of the SRCU > > > * synchronization in device_link_release_fn() and if the consumer or > > > * supplier devices get deleted when it runs, so put it into the "long" > > > - * workqueue. > > > + * devlink workqueue. > > > + * > > > */ > > > - queue_work(system_long_wq, &link->rm_work); > > > + queue_work(devlink_release_queue, &link->rm_work); > > > } > > > > > > static struct class devlink_class = { > > > @@ -636,10 +644,27 @@ static int __init devlink_class_init(void) > > > return ret; > > > > > > ret = class_interface_register(&devlink_class_intf); > > > - if (ret) > > > + if (ret) { > > > + class_unregister(&devlink_class); > > > + return ret; > > > + } > > > + > > > + /* > > > + * Using a dedicated queue for devlinks since releasing a link happens > > > + * asynchronously but some code paths, like DT overlays, have some > > > + * expectations regarding the of_node when being removed (the refcount > > > + * must be 1). Given how devlinks are released that cannot be assured. > > > + * Hence, add a dedicated queue so that it's easy to sync against > > > + * devlinks removal. > > > + */ > > > + devlink_release_queue = alloc_workqueue("devlink_release", 0, 0); > > > + if (!devlink_release_queue) { > > > + class_interface_unregister(&devlink_class_intf); > > > class_unregister(&devlink_class); > > > > This is a bit drastic. > > > > I think that device links can still work if devlink_release_queue is > > NULL, just devlink_dev_release() needs to check it and release > > synchronously if it is NULL. > > > > Agreed, I'll do that way. It will always synchronously remove the links (which is > different than before) but I guess that failing in allocating the queue is rather > unlikely. Right.
diff --git a/drivers/base/core.c b/drivers/base/core.c index 14d46af40f9a..06e7766b5227 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -31,6 +31,7 @@ #include <linux/swiotlb.h> #include <linux/sysfs.h> #include <linux/dma-map-ops.h> /* for dma_default_coherent */ +#include <linux/workqueue.h> #include "base.h" #include "physical_location.h" @@ -44,6 +45,7 @@ static bool fw_devlink_is_permissive(void); static void __fw_devlink_link_to_consumers(struct device *dev); static bool fw_devlink_drv_reg_done; static bool fw_devlink_best_effort; +static struct workqueue_struct *devlink_release_queue __ro_after_init; /** * __fwnode_link_add - Create a link between two fwnode_handles. @@ -235,6 +237,11 @@ static void __fw_devlink_pickup_dangling_consumers(struct fwnode_handle *fwnode, __fw_devlink_pickup_dangling_consumers(child, new_sup); } +void fwnode_links_flush_queue(void) +{ + flush_workqueue(devlink_release_queue); +} + static DEFINE_MUTEX(device_links_lock); DEFINE_STATIC_SRCU(device_links_srcu); @@ -531,9 +538,10 @@ static void devlink_dev_release(struct device *dev) * It may take a while to complete this work because of the SRCU * synchronization in device_link_release_fn() and if the consumer or * supplier devices get deleted when it runs, so put it into the "long" - * workqueue. + * devlink workqueue. + * */ - queue_work(system_long_wq, &link->rm_work); + queue_work(devlink_release_queue, &link->rm_work); } static struct class devlink_class = { @@ -636,10 +644,27 @@ static int __init devlink_class_init(void) return ret; ret = class_interface_register(&devlink_class_intf); - if (ret) + if (ret) { + class_unregister(&devlink_class); + return ret; + } + + /* + * Using a dedicated queue for devlinks since releasing a link happens + * asynchronously but some code paths, like DT overlays, have some + * expectations regarding the of_node when being removed (the refcount + * must be 1). Given how devlinks are released that cannot be assured. + * Hence, add a dedicated queue so that it's easy to sync against + * devlinks removal. + */ + devlink_release_queue = alloc_workqueue("devlink_release", 0, 0); + if (!devlink_release_queue) { + class_interface_unregister(&devlink_class_intf); class_unregister(&devlink_class); + return -ENODEV; + } - return ret; + return 0; } postcore_initcall(devlink_class_init); diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 2a72f55d26eb..017b170e9903 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -213,5 +213,6 @@ extern bool fw_devlink_is_strict(void); int fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup); void fwnode_links_purge(struct fwnode_handle *fwnode); void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode); +void fwnode_links_flush_queue(void); #endif