@@ -18,6 +18,13 @@
#include <linux/pm_domain.h>
#include <linux/pm_runtime.h>
+/*
+ * These are purposely not in any headers since we don't want anyone to use
+ * them except us.
+ */
+void __clk_prepare_lock(void);
+void __clk_prepare_unlock(void);
+
#ifdef CONFIG_PM_CLK
enum pce_status {
@@ -108,6 +115,10 @@ static int pm_clk_op_lock(struct pm_subsys_data *psd, unsigned long *flags,
/* we must switch to the mutex */
spin_unlock_irqrestore(&psd->lock, *flags);
+
+ /* Manually grab the prepare lock to avoid ABBA w/ psd->clock_mutex */
+ __clk_prepare_lock();
+
mutex_lock(&psd->clock_mutex);
/*
@@ -118,6 +129,8 @@ static int pm_clk_op_lock(struct pm_subsys_data *psd, unsigned long *flags,
return 0;
mutex_unlock(&psd->clock_mutex);
+ __clk_prepare_unlock();
+
goto try_again;
}
@@ -132,6 +145,7 @@ static void pm_clk_op_unlock(struct pm_subsys_data *psd, unsigned long *flags)
{
if (psd->clock_op_might_sleep) {
mutex_unlock(&psd->clock_mutex);
+ __clk_prepare_unlock();
} else {
/* the __acquire is there to work around sparse limitations */
__acquire(&psd->lock);
@@ -149,6 +149,18 @@ static void clk_prepare_unlock(void)
mutex_unlock(&prepare_lock);
}
+/* Non-static wrapper of lock function for drivers/base/power/clock_ops.c */
+void __clk_prepare_lock(void)
+{
+ clk_prepare_lock();
+}
+
+/* Non-static wrapper of unlock function for drivers/base/power/clock_ops.c */
+void __clk_prepare_unlock(void)
+{
+ clk_prepare_unlock();
+}
+
static unsigned long clk_enable_lock(void)
__acquires(enable_lock)
{
If you get the exact right boot sequence / timing / hardware setup, lockdep can complain at bootup about an unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&psd->clock_mutex); lock(prepare_lock); lock(&psd->clock_mutex); lock(prepare_lock); The case of &psd->clock_mutex being grabbed before the prepare lock is illustrated by this stack crawl: clk_prepare_lock() clk_unprepare() clk_disable_unprepare() pm_clk_suspend() pm_clk_runtime_suspend() The critical things to note are: - pm_clk_suspend(), in some cases, will grab and hold "&psd->clock_mutex" first before calling the clock prepare/unprepare functions. - pm_clk_suspend() can be called in cases where the clock prepare mutex isn't already held. The reverse case, where the prepare lock is held first and then the &psd->clock_mutex is illustrated by: pm_clk_op_lock() pm_clk_resume() pm_generic_runtime_resume() ... pm_runtime_resume_and_get() clk_pm_runtime_get() __clk_core_init() __clk_register() clk_hw_register() In the above: - __clk_core_init() grabs the prepare lock and holds it while calling clk_pm_runtime_get(). - pm_clk_op_lock() grabs &psd->clock_mutex. Presumably nobody noticed this before because it only happens in one specific case of the pm_clk code. It's also possible that it was unnoticed before because the clock prepare lock is a funny little animal and allows the lock to be acquired more than once in a given callchain but lockdep doesn't really track this. Let's fix this lockdep error by just consistently grabbing the clock prepare lock before &psd->clock_mutex. This means adding a private interface since the clock's prepare lock isn't exposed. We'll add it in a way to discourage anyone else from using it. We're special since "pm_clk" is core code and already quite intertwined with the clock core anyway. NOTE: I haven't done any analysis about whether what lockdep complains about is really a feasible lockup. I merely confirmed that, indeed, we have an ABBA situation and it seems like we should fix it. Signed-off-by: Douglas Anderson <dianders@chromium.org> --- As you can tell at just a glance, this is a terrible hack. I'm mostly throwing it out into the world to try to start a conversation. I'd be very happy for someone else to send a better patch or to tell me of a better way to solve this. I've still tried to at least make it a landable patch, however, in case nobody has any better ideas and we want to land this. I'll note that this lockdep warning showed up on my system at the same time as a deadlock. Unfortunately, to the best of my ability to debug the two are unrelated. Fixing this lockdep warning doesn't fix the deadlock and fixing the deadlock doesn't make lockdep happy. If you want to see my fix for the deadlock, feel free to peruse: https://lore.kernel.org/r/20220922154354.2486595-1-dianders@chromium.org NOTE: problem was found and debugging was done on a downstream kernel (chromeos-5.15), not pure upstream. I see no reason why this won't apply to upstream, but if you have some belief that this problem is already solved or can't happen upstream then please yell and tell me that I should abandon this patch. If need be, I can also split this into two patches. I figured I'd do that on-demand if someone actually told me that would be useful to them and that they were considering landing this patch. drivers/base/power/clock_ops.c | 14 ++++++++++++++ drivers/clk/clk.c | 12 ++++++++++++ 2 files changed, 26 insertions(+)