diff mbox series

[v3] i2c: lpi2c: cache peripheral clock rate

Message ID 20230310130815.562418-1-alexander.sverdlin@siemens.com
State New
Headers show
Series [v3] i2c: lpi2c: cache peripheral clock rate | expand

Commit Message

A. Sverdlin March 10, 2023, 1:08 p.m. UTC
From: Alexander Sverdlin <alexander.sverdlin@siemens.com>

One of the reasons to do it is to save some CPU cycles on cpu_freq_get()
under mutex. The second reason if the (false-positive) lockdep splat caused
by the recursive feature of the "prepare_lock" (one clock instance is I2C
peripheral clock and another is pcf85063 RTC as clock provider):

======================================================
WARNING: possible circular locking dependency detected
5.15.71+... #1 Tainted: G           O
------------------------------------------------------
fs-value/2332 is trying to acquire lock:
ffff8000096cae08 (prepare_lock){+.+.}-{3:3}, at: clk_prepare_lock

but task is already holding lock:
ffff000011021100 (i2c_register_adapter){+.+.}-{3:3}, at: i2c_adapter_lock_bus

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #2 (i2c_register_adapter){+.+.}-{3:3}:
       lock_acquire
       rt_mutex_lock_nested
       i2c_adapter_lock_bus
       i2c_transfer
       regmap_i2c_read
       _regmap_raw_read
       _regmap_bus_read
       _regmap_read
       regmap_read
       pcf85063_probe
       i2c_device_probe
       really_probe
       __driver_probe_device
       driver_probe_device
       __device_attach_driver
       bus_for_each_drv
       __device_attach
       device_initial_probe
       bus_probe_device
       device_add
       device_register
       i2c_new_client_device
       of_i2c_register_devices
       i2c_register_adapter
       __i2c_add_numbered_adapter
       i2c_add_adapter
       lpi2c_imx_probe
       platform_probe
       really_probe
       __driver_probe_device
       driver_probe_device
       __device_attach_driver
       bus_for_each_drv
       __device_attach
       device_initial_probe
       bus_probe_device
       deferred_probe_work_func
       process_one_work
       worker_thread
       kthread
       ret_from_fork

-> #1 (rtc_pcf85063:560:(&config->regmap)->lock){+.+.}-{3:3}:
       lock_acquire
       __mutex_lock
       mutex_lock_nested
       regmap_lock_mutex
       regmap_read
       pcf85063_clkout_recalc_rate
       __clk_register
       devm_clk_register
       pcf85063_probe
       i2c_device_probe
       really_probe
       __driver_probe_device
       driver_probe_device
       __device_attach_driver
       bus_for_each_drv
       __device_attach
       device_initial_probe
       bus_probe_device
       device_add
       device_register
       i2c_new_client_device
       of_i2c_register_devices
       i2c_register_adapter
       __i2c_add_numbered_adapter
       i2c_add_adapter
       lpi2c_imx_probe
       platform_probe
       really_probe
       __driver_probe_device
       driver_probe_device
       __device_attach_driver
       bus_for_each_drv
       __device_attach
       device_initial_probe
       bus_probe_device
       deferred_probe_work_func
       process_one_work
       worker_thread
       kthread
       ret_from_fork

-> #0 (prepare_lock){+.+.}-{3:3}:
       __lock_acquire
       lock_acquire.part.0
       lock_acquire
       __mutex_lock
       mutex_lock_nested
       clk_prepare_lock
       clk_get_rate
       lpi2c_imx_xfer
       __i2c_transfer
       i2c_transfer
       regmap_i2c_read
       _regmap_raw_read
       regmap_raw_read
       regmap_bulk_read
       at24_read
       nvmem_reg_read
       bin_attr_nvmem_read
       sysfs_kf_bin_read
       kernfs_fop_read_iter
       new_sync_read
       vfs_read
       ksys_read
       __arm64_sys_read
       invoke_syscall
       ...

other info that might help us debug this:

Chain exists of:
  prepare_lock --> rtc_pcf85063:560:(&config->regmap)->lock --> i2c_register_adapter

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(i2c_register_adapter);
                               lock(rtc_pcf85063:560:(&config->regmap)->lock);
                               lock(i2c_register_adapter);
  lock(prepare_lock);

 *** DEADLOCK ***

4 locks held by .../2332:
 #0: ffff0000146eb288 (&of->mutex){+.+.}-{3:3}, at: kernfs_fop_read_iter
 #1: ffff000010fe4400 (kn->active#72){.+.+}-{0:0}, at: kernfs_fop_read_iter
 #2: ffff0000110168e8 (&at24->lock){+.+.}-{3:3}, at: at24_read
 #3: ffff000011021100 (i2c_register_adapter){+.+.}-{3:3}, at: i2c_adapter_lock_bus

stack backtrace:
CPU: 1 PID: 2332 Comm: ... Tainted: G           O      5.15.71+... #1
Hardware name: ... (DT)
Call trace:
 dump_backtrace
 show_stack
 dump_stack_lvl
 dump_stack
 print_circular_bug
 check_noncircular
 __lock_acquire
 lock_acquire.part.0
 lock_acquire
 __mutex_lock
 mutex_lock_nested
 clk_prepare_lock
 clk_get_rate
 lpi2c_imx_xfer
 __i2c_transfer
 i2c_transfer
 regmap_i2c_read
 _regmap_raw_read
 regmap_raw_read
 regmap_bulk_read
 at24_read
 nvmem_reg_read
 bin_attr_nvmem_read
 sysfs_kf_bin_read
 kernfs_fop_read_iter
 new_sync_read
 vfs_read
 ksys_read
 __arm64_sys_read
 invoke_syscall
 ...

Fixes: a55fa9d0e42e ("i2c: imx-lpi2c: add low power i2c bus driver")
Signed-off-by: Alexander Sverdlin <alexander.sverdlin@siemens.com>
---
 drivers/i2c/busses/i2c-imx-lpi2c.c | 33 +++++++++++++++++++++++++++---
 1 file changed, 30 insertions(+), 3 deletions(-)

Changelog:
v3: fixed build error reported by kernel test robot <lkp@intel.com>
    https://lore.kernel.org/oe-kbuild-all/202303102010.pAv56wKs-lkp@intel.com/
v2: added clk_notifier as Alexander suggested

Comments

Alexander Stein April 21, 2023, 1:48 p.m. UTC | #1
Hi,

sorry for the delay.

Am Freitag, 10. März 2023, 14:08:15 CEST schrieb A. Sverdlin:
> From: Alexander Sverdlin <alexander.sverdlin@siemens.com>
> 
> One of the reasons to do it is to save some CPU cycles on cpu_freq_get()
> under mutex. The second reason if the (false-positive) lockdep splat caused
> by the recursive feature of the "prepare_lock" (one clock instance is I2C
> peripheral clock and another is pcf85063 RTC as clock provider):
> 
> ======================================================
> WARNING: possible circular locking dependency detected
> 5.15.71+... #1 Tainted: G           O
> ------------------------------------------------------
> fs-value/2332 is trying to acquire lock:
> ffff8000096cae08 (prepare_lock){+.+.}-{3:3}, at: clk_prepare_lock
> 
> but task is already holding lock:
> ffff000011021100 (i2c_register_adapter){+.+.}-{3:3}, at:
> i2c_adapter_lock_bus
> 
> which lock already depends on the new lock.
> 
> the existing dependency chain (in reverse order) is:
> 
> -> #2 (i2c_register_adapter){+.+.}-{3:3}:
>        lock_acquire
>        rt_mutex_lock_nested
>        i2c_adapter_lock_bus
>        i2c_transfer
>        regmap_i2c_read
>        _regmap_raw_read
>        _regmap_bus_read
>        _regmap_read
>        regmap_read
>        pcf85063_probe
>        i2c_device_probe
>        really_probe
>        __driver_probe_device
>        driver_probe_device
>        __device_attach_driver
>        bus_for_each_drv
>        __device_attach
>        device_initial_probe
>        bus_probe_device
>        device_add
>        device_register
>        i2c_new_client_device
>        of_i2c_register_devices
>        i2c_register_adapter
>        __i2c_add_numbered_adapter
>        i2c_add_adapter
>        lpi2c_imx_probe
>        platform_probe
>        really_probe
>        __driver_probe_device
>        driver_probe_device
>        __device_attach_driver
>        bus_for_each_drv
>        __device_attach
>        device_initial_probe
>        bus_probe_device
>        deferred_probe_work_func
>        process_one_work
>        worker_thread
>        kthread
>        ret_from_fork
> 
> -> #1 (rtc_pcf85063:560:(&config->regmap)->lock){+.+.}-{3:3}:
>        lock_acquire
>        __mutex_lock
>        mutex_lock_nested
>        regmap_lock_mutex
>        regmap_read
>        pcf85063_clkout_recalc_rate
>        __clk_register
>        devm_clk_register
>        pcf85063_probe
>        i2c_device_probe
>        really_probe
>        __driver_probe_device
>        driver_probe_device
>        __device_attach_driver
>        bus_for_each_drv
>        __device_attach
>        device_initial_probe
>        bus_probe_device
>        device_add
>        device_register
>        i2c_new_client_device
>        of_i2c_register_devices
>        i2c_register_adapter
>        __i2c_add_numbered_adapter
>        i2c_add_adapter
>        lpi2c_imx_probe
>        platform_probe
>        really_probe
>        __driver_probe_device
>        driver_probe_device
>        __device_attach_driver
>        bus_for_each_drv
>        __device_attach
>        device_initial_probe
>        bus_probe_device
>        deferred_probe_work_func
>        process_one_work
>        worker_thread
>        kthread
>        ret_from_fork
> 
> -> #0 (prepare_lock){+.+.}-{3:3}:
>        __lock_acquire
>        lock_acquire.part.0
>        lock_acquire
>        __mutex_lock
>        mutex_lock_nested
>        clk_prepare_lock
>        clk_get_rate
>        lpi2c_imx_xfer
>        __i2c_transfer
>        i2c_transfer
>        regmap_i2c_read
>        _regmap_raw_read
>        regmap_raw_read
>        regmap_bulk_read
>        at24_read
>        nvmem_reg_read
>        bin_attr_nvmem_read
>        sysfs_kf_bin_read
>        kernfs_fop_read_iter
>        new_sync_read
>        vfs_read
>        ksys_read
>        __arm64_sys_read
>        invoke_syscall
>        ...
> 
> other info that might help us debug this:
> 
> Chain exists of:
>   prepare_lock --> rtc_pcf85063:560:(&config->regmap)->lock -->
> i2c_register_adapter
> 
>  Possible unsafe locking scenario:
> 
>        CPU0                    CPU1
>        ----                    ----
>   lock(i2c_register_adapter);
>                               
> lock(rtc_pcf85063:560:(&config->regmap)->lock); lock(i2c_register_adapter);
>   lock(prepare_lock);
> 
>  *** DEADLOCK ***
> 
> 4 locks held by .../2332:
>  #0: ffff0000146eb288 (&of->mutex){+.+.}-{3:3}, at: kernfs_fop_read_iter
>  #1: ffff000010fe4400 (kn->active#72){.+.+}-{0:0}, at: kernfs_fop_read_iter
>  #2: ffff0000110168e8 (&at24->lock){+.+.}-{3:3}, at: at24_read
>  #3: ffff000011021100 (i2c_register_adapter){+.+.}-{3:3}, at:
> i2c_adapter_lock_bus
> 
> stack backtrace:
> CPU: 1 PID: 2332 Comm: ... Tainted: G           O      5.15.71+... #1
> Hardware name: ... (DT)
> Call trace:
>  dump_backtrace
>  show_stack
>  dump_stack_lvl
>  dump_stack
>  print_circular_bug
>  check_noncircular
>  __lock_acquire
>  lock_acquire.part.0
>  lock_acquire
>  __mutex_lock
>  mutex_lock_nested
>  clk_prepare_lock
>  clk_get_rate
>  lpi2c_imx_xfer
>  __i2c_transfer
>  i2c_transfer
>  regmap_i2c_read
>  _regmap_raw_read
>  regmap_raw_read
>  regmap_bulk_read
>  at24_read
>  nvmem_reg_read
>  bin_attr_nvmem_read
>  sysfs_kf_bin_read
>  kernfs_fop_read_iter
>  new_sync_read
>  vfs_read
>  ksys_read
>  __arm64_sys_read
>  invoke_syscall
>  ...
> 
> Fixes: a55fa9d0e42e ("i2c: imx-lpi2c: add low power i2c bus driver")
> Signed-off-by: Alexander Sverdlin <alexander.sverdlin@siemens.com>
> ---
>  drivers/i2c/busses/i2c-imx-lpi2c.c | 33 +++++++++++++++++++++++++++---
>  1 file changed, 30 insertions(+), 3 deletions(-)
> 
> Changelog:
> v3: fixed build error reported by kernel test robot <lkp@intel.com>
>    
> https://lore.kernel.org/oe-kbuild-all/202303102010.pAv56wKs-lkp@intel.com/
> v2: added clk_notifier as Alexander suggested
> 
> diff --git a/drivers/i2c/busses/i2c-imx-lpi2c.c
> b/drivers/i2c/busses/i2c-imx-lpi2c.c index 188f2a36d2fd6..5f1d1d4e018bd
> 100644
> --- a/drivers/i2c/busses/i2c-imx-lpi2c.c
> +++ b/drivers/i2c/busses/i2c-imx-lpi2c.c
> @@ -100,6 +100,8 @@ struct lpi2c_imx_struct {
>  	__u8			*rx_buf;
>  	__u8			*tx_buf;
>  	struct completion	complete;
> +	struct notifier_block	clk_change_nb;
> +	unsigned int		rate_per;
>  	unsigned int		msglen;
>  	unsigned int		delivered;
>  	unsigned int		block_data;
> @@ -198,24 +200,37 @@ static void lpi2c_imx_stop(struct lpi2c_imx_struct
> *lpi2c_imx) } while (1);
>  }
> 
> +static int lpi2c_imx_clk_change_cb(struct notifier_block *nb,
> +				   unsigned long action, void *data)
> +{
> +	struct clk_notifier_data *ndata = data;
> +	struct lpi2c_imx_struct *lpi2c_imx = container_of(nb,
> +							  struct 
lpi2c_imx_struct,
> +							  
clk_change_nb);
> +
> +	if (action & POST_RATE_CHANGE)
> +		lpi2c_imx->rate_per = ndata->new_rate;
> +
> +	return NOTIFY_OK;
> +}
> +
>  /* CLKLO = I2C_CLK_RATIO * CLKHI, SETHOLD = CLKHI, DATAVD = CLKHI/2 */
>  static int lpi2c_imx_config(struct lpi2c_imx_struct *lpi2c_imx)
>  {
>  	u8 prescale, filt, sethold, clkhi, clklo, datavd;
> -	unsigned int clk_rate, clk_cycle;
> +	unsigned int clk_cycle;
>  	enum lpi2c_imx_pincfg pincfg;
>  	unsigned int temp;
> 
>  	lpi2c_imx_set_mode(lpi2c_imx);
> 
> -	clk_rate = clk_get_rate(lpi2c_imx->clks[0].clk);
>  	if (lpi2c_imx->mode == HS || lpi2c_imx->mode == ULTRA_FAST)
>  		filt = 0;
>  	else
>  		filt = 2;
> 
>  	for (prescale = 0; prescale <= 7; prescale++) {
> -		clk_cycle = clk_rate / ((1 << prescale) * lpi2c_imx-
>bitrate)
> +		clk_cycle = lpi2c_imx->rate_per / ((1 << prescale) * 
lpi2c_imx->bitrate)
>  			    - 3 - (filt >> 1);
>  		clkhi = (clk_cycle + I2C_CLK_RATIO) / (I2C_CLK_RATIO + 1);
>  		clklo = clk_cycle - clkhi;
> @@ -588,6 +603,18 @@ static int lpi2c_imx_probe(struct platform_device
> *pdev) if (ret)
>  		return ret;
> 
> +	lpi2c_imx->clk_change_nb.notifier_call = lpi2c_imx_clk_change_cb;
> +	ret = devm_clk_notifier_register(&pdev->dev, lpi2c_imx->clks[0].clk,
> +					 &lpi2c_imx->clk_change_nb);
> +	if (ret)
> +		return dev_err_probe(&pdev->dev, ret,
> +				     "can't register peripheral clock 
notifier\n");
> +	lpi2c_imx->rate_per = clk_get_rate(lpi2c_imx->clks[0].clk);
> +	if (!lpi2c_imx->rate_per) {
> +		dev_err(&pdev->dev, "can't get I2C peripheral clock 
rate\n");
> +		return -EINVAL;
> +	}
> +

I would switch the order of the calls to devm_clk_notifier_register() and 
clk_get_rate(). AFAICS this looks like a possible lost update. The notifier 
might change rate_per before the (old) value from clk_get_rate is actually 
assigned.

With this change you fix the following call chain (from a kernel log)

[    4.562396]        clk_prepare_lock+0x48/0x9c
[    4.566740]        clk_get_rate+0x1c/0x74
[    4.570736]        lpi2c_imx_config+0x4c/0x190
[    4.575167]        lpi2c_imx_master_enable+0x54/0x128
[    4.580205]        lpi2c_imx_xfer+0x2c/0x3c8

Unfortuantely this was just hiding another call path from lpi2c_imx_xfer to 
clk_bulk_prepare. Here is my kernel log:

[   22.264508] ======================================================
[   22.270697] WARNING: possible circular locking dependency detected
[   22.276893] 6.3.0-rc7-next-20230420+ #6 Not tainted
[   22.281782] ------------------------------------------------------
[   22.287968] kworker/2:3/93 is trying to acquire lock:
[   22.293034] ffff800009d38bf8 (prepare_lock){+.+.}-{3:3}, at: 
clk_prepare_lock+0x48/0x9c
[   22.301095] 
[   22.301095] but task is already holding lock:
[   22.306942] ffff0000039ab100 (i2c_register_adapter){+.+.}-{3:3}, at: 
i2c_adapter_lock_bus+0x20/0x2c
[   22.316046] 
[   22.316046] which lock already depends on the new lock.
[   22.316046] 
[   22.324238] 
[   22.324238] the existing dependency chain (in reverse order) is:
[   22.331727] 
[   22.331727] -> #2 (i2c_register_adapter){+.+.}-{3:3}:
[   22.338295]        __lock_acquire+0x370/0x6e8
[   22.342674]        lock_acquire.part.0+0xcc/0x208
[   22.347399]        lock_acquire+0x94/0x14c
[   22.351509]        rt_mutex_lock_nested+0x5c/0x98
[   22.356235]        i2c_adapter_lock_bus+0x20/0x2c
[   22.360961]        i2c_transfer+0x80/0x114
[   22.365069]        regmap_i2c_read+0x5c/0xa0
[   22.369362]        _regmap_raw_read+0x110/0x2dc
[   22.373905]        _regmap_bus_read+0x40/0x74
[   22.378274]        _regmap_read+0x74/0x248
[   22.382393]        regmap_read+0x48/0x70
[   22.386337]        pcf85063_probe+0xf0/0x4f4
[   22.390640]        i2c_device_probe+0x100/0x2d4
[   22.395206]        call_driver_probe+0x28/0x15c
[   22.399750]        really_probe+0x180/0x320
[   22.403946]        __driver_probe_device+0x84/0x144
[   22.408837]        driver_probe_device+0x38/0x150
[   22.413554]        __device_attach_driver+0xcc/0x194
[   22.418532]        bus_for_each_drv+0x80/0xdc
[   22.422910]        __device_attach+0xa8/0x1f8
[   22.426545] systemd-journald[128]: Oldest entry in /var/log/journal/
e4579cc7db6747028247b9b859e132d6/system.joural is older than the configured 
file retention duration (1month), suggesting rotation.
[   22.427278]        device_initial_probe+0x10/0x18
[   22.427290]        bus_probe_device+0xa4/0xa8
[   22.427299]        device_add+0x3b0/0x508
[   22.427311]        device_register+0x1c/0x28
[   22.427323]        i2c_new_client_device+0x1c8/0x2bc
[   22.427333]        of_i2c_register_device+0xb4/0xdc
[   22.427344]        of_i2c_register_devices+0x80/0x154
[   22.456329] systemd-journald[128]: /var/log/journal/
e4579cc7db6747028247b9b859e132d6/system.journal: Journal heaer limits reached 
or header out-of-date, rotating.
[   22.458176]        i2c_register_adapter+0x184/0x4c8
[   22.458202]        __i2c_add_numbered_adapter+0x5c/0xa4
[   22.502097]        i2c_add_adapter+0xa0/0xcc
[   22.502122]        lpi2c_imx_probe+0x23c/0x350
[   22.502135]        platform_probe+0x64/0xfc
[   22.502146]        call_driver_probe+0x28/0x15c
[   22.502155]        really_probe+0x180/0x320
[   22.502164]        __driver_probe_device+0x84/0x144
[   22.502173]        driver_probe_device+0x38/0x150
[   22.502182]        __device_attach_driver+0xcc/0x194
[   22.502191]        bus_for_each_drv+0x80/0xdc
[   22.502204]        __device_attach+0xa8/0x1f8
[   22.502212]        device_initial_probe+0x10/0x18
[   22.502222]        bus_probe_device+0xa4/0xa8
[   22.502230]        deferred_probe_work_func+0xa0/0xf0
[   22.502239]        process_one_work+0x284/0x5b0
[   22.502252]        worker_thread+0x68/0x39c
[   22.502263]        kthread+0x104/0x108
[   22.502274]        ret_from_fork+0x10/0x20
[   22.502286] 
[   22.502286] -> #1 (rtc_pcf85063:594:(&config->regmap)->lock){+.+.}-{3:3}:
[   22.502310]        __lock_acquire+0x370/0x6e8
[   22.502322]        lock_acquire.part.0+0xcc/0x208
[   22.502332]        lock_acquire+0x94/0x14c
[   22.502341]        __mutex_lock+0x9c/0x838
[   22.502353]        mutex_lock_nested+0x20/0x28
[   22.502364]        regmap_lock_mutex+0x10/0x18
[   22.502377]        regmap_read+0x38/0x70
[   22.502389]        pcf85063_clkout_recalc_rate+0x2c/0x78
[   22.502403]        __clk_core_init+0x46c/0x4e0
[   22.502417]        __clk_register+0x160/0x23c
[   22.502429]        devm_clk_register+0x58/0xb4
[   22.502441]        pcf85063_probe+0x238/0x4f4
[   22.502451]        i2c_device_probe+0x100/0x2d4
[   22.502465]        call_driver_probe+0x28/0x15c
[   22.502473]        really_probe+0x180/0x320
[   22.502482]        __driver_probe_device+0x84/0x144
[   22.502491]        driver_probe_device+0x38/0x150
[   22.502500]        __device_attach_driver+0xcc/0x194
[   22.502509]        bus_for_each_drv+0x80/0xdc
[   22.502521]        __device_attach+0xa8/0x1f8
[   22.502530]        device_initial_probe+0x10/0x18
[   22.502539]        bus_probe_device+0xa4/0xa8
[   22.502548]        device_add+0x3b0/0x508
[   22.502559]        device_register+0x1c/0x28
[   22.502570]        i2c_new_client_device+0x1c8/0x2bc
[   22.502580]        of_i2c_register_device+0xb4/0xdc
[  OK  ] Started Network Time Synchronization[   22.502589]        
of_i2c_register_devices+0x80/0x154
.[   22.502599]        i2c_register_adapter+0x184/0x4c8

[   22.502607]        __i2c_add_numbered_adapter+0x5c/0xa4
[   22.502616]        i2c_add_adapter+0xa0/0xcc
[   22.502624]        lpi2c_imx_probe+0x23c/0x350
[   22.502636]        platform_probe+0x64/0xfc
[   22.502646]        call_driver_probe+0x28/0x15c
[   22.502655]        really_probe+0x180/0x320
[   22.502663]        __driver_probe_device+0x84/0x144
[   22.502672]        driver_probe_device+0x38/0x150
[   22.502681]        __device_attach_driver+0xcc/0x194
[   22.502690]        bus_for_each_drv+0x80/0xdc
[   22.502702]        __device_attach+0xa8/0x1f8
[   22.502711]        device_initial_probe+0x10/0x18
[   22.502720]        bus_probe_device+0xa4/0xa8
[   22.502729]        deferred_probe_work_func+0xa0/0xf0
[   22.502738]        process_one_work+0x284/0x5b0
[   22.502749]        worker_thread+0x68/0x39c
[   22.502760]        kthread+0x104/0x108
[   22.502770]        ret_from_fork+0x10/0x20
[   22.502779] 
[   22.502779] -> #0 (prepare_lock){+.+.}-{3:3}:
[   22.502799]        check_prev_add+0xb0/0xc80
[   22.502809]        validate_chain+0x444/0x510
[   22.502818]        __lock_acquire+0x370/0x6e8
[   22.502827]        lock_acquire.part.0+0xcc/0x208
[   22.502837]        lock_acquire+0x94/0x14c
[   22.502846]        __mutex_lock+0x9c/0x838
[   22.502857]        mutex_lock_nested+0x20/0x28
[   22.502867]        clk_prepare_lock+0x48/0x9c
[   22.502878]        clk_prepare+0x1c/0x3c
[   22.502890]        clk_bulk_prepare+0x48/0xcc
[   22.502899]        lpi2c_runtime_resume+0x30/0x84
[   22.502910]        pm_generic_runtime_resume+0x28/0x3c
[   22.502923]        __genpd_runtime_resume+0x2c/0xa0
[   22.502935]        genpd_runtime_resume+0xbc/0x308
[   22.502944]        __rpm_callback+0x44/0x19c
[   22.502953]        rpm_callback+0x64/0x70
[   22.502962]        rpm_resume+0x438/0x6d8
[   22.502970]        __pm_runtime_resume+0x54/0xb0
[   22.502978]        lpi2c_imx_master_enable+0x24/0x128
[   22.502989]        lpi2c_imx_xfer+0x2c/0x3c8
[   22.502999]        __i2c_transfer+0xe4/0x5a0
[   22.503008]        i2c_transfer+0x90/0x114
[   22.503016]        i2c_transfer_buffer_flags+0x58/0x84
[   22.503025]        regmap_i2c_write+0x1c/0x4c
[   22.503035]        _regmap_raw_write_impl+0x7dc/0x944
[   22.503044]        _regmap_bus_raw_write+0x5c/0x74
[   22.503052]        _regmap_write+0x64/0x238
[   22.503060]        _regmap_update_bits+0x100/0x11c
[   22.503069]        regmap_update_bits_base+0x60/0x90
[   22.503077]        pca953x_gpio_set_value+0x74/0x90
[   22.503088]        gpiod_set_raw_value_commit+0x6c/0x7c
[   22.503098]        gpiod_set_value_nocheck+0x68/0x70
[   22.503107]        gpiod_set_value_cansleep+0x3c/0xa8
[   22.503116]        gpio_led_set_blocking+0x54/0x7c
[   22.503128]        set_brightness_delayed+0x90/0xd8
[   22.503138]        process_one_work+0x284/0x5b0
[   22.503149]        worker_thread+0x68/0x39c
[   22.503160]        kthread+0x104/0x108
[   22.503169]        ret_from_fork+0x10/0x20
[   22.503179] 
[   22.503179] other info that might help us debug this:
[   22.503179] 
[   22.503183] Chain exists of:
[   22.503183]   prepare_lock --> rtc_pcf85063:594:(&config->regmap)->lock --> 
i2c_register_adapter
[   22.503183] 
[   22.503207]  Possible unsafe locking scenario:
[   22.503207] 
[   22.503210]        CPU0                    CPU1
[   22.503213]        ----                    ----
[   22.503216]   lock(i2c_register_adapter);
[   22.503225]                                lock(rtc_pcf85063:594:(&config-
>regmap)->lock);
[   22.503235]                                lock(i2c_register_adapter);
[   22.503244]   lock(prepare_lock);
[   22.503253] 
[   22.503253]  *** DEADLOCK ***

Now lpi2c_runtime_resume will call into clk_prepare() which also calls 
clk_prepare_lock() (identical to clk_get_rate).

Best regards,
Alexader

>  	pm_runtime_set_autosuspend_delay(&pdev->dev, I2C_PM_TIMEOUT);
>  	pm_runtime_use_autosuspend(&pdev->dev);
>  	pm_runtime_get_noresume(&pdev->dev);
Marc Kleine-Budde April 21, 2023, 1:59 p.m. UTC | #2
On 21.04.2023 15:48:59, Alexander Stein wrote:
[...]
> Now lpi2c_runtime_resume will call into clk_prepare() which also calls 
> clk_prepare_lock() (identical to clk_get_rate).

IIRC this is a general problem^w limitation of the clock framework,
clock providers cannot use clocks themselves in certain callback, e.g.
set_rate.

Marc
A. Sverdlin April 21, 2023, 2:10 p.m. UTC | #3
Hello Alexander,

thanks for looking into that!

On Fri, 2023-04-21 at 15:48 +0200, Alexander Stein wrote:
> Unfortuantely this was just hiding another call path from
> lpi2c_imx_xfer to 
> clk_bulk_prepare. Here is my kernel log:

Yes, soon after publishing the patch I reproduced your log as well,
that's why I didn't follow up on the original patch, as it indeed
doesn't solve all the LOCKDEP splats with lpi2c involved and I didn't
have time to look into this yet.

> [   22.264508] ======================================================
> [   22.270697] WARNING: possible circular locking dependency detected
> [   22.276893] 6.3.0-rc7-next-20230420+ #6 Not tainted
> [   22.281782] ------------------------------------------------------
> [   22.287968] kworker/2:3/93 is trying to acquire lock:
> [   22.293034] ffff800009d38bf8 (prepare_lock){+.+.}-{3:3}, at: 
> clk_prepare_lock+0x48/0x9c
> [   22.301095] 
> [   22.301095] but task is already holding lock:
> [   22.306942] ffff0000039ab100 (i2c_register_adapter){+.+.}-{3:3},
> at: 
> i2c_adapter_lock_bus+0x20/0x2c
> [   22.316046] 
> [   22.316046] which lock already depends on the new lock.
> [   22.316046] 
> [   22.324238] 
> [   22.324238] the existing dependency chain (in reverse order) is:
> [   22.331727] 
> [   22.331727] -> #2 (i2c_register_adapter){+.+.}-{3:3}:
> [   22.338295]        __lock_acquire+0x370/0x6e8
> [   22.342674]        lock_acquire.part.0+0xcc/0x208
> [   22.347399]        lock_acquire+0x94/0x14c
> [   22.351509]        rt_mutex_lock_nested+0x5c/0x98
> [   22.356235]        i2c_adapter_lock_bus+0x20/0x2c
> [   22.360961]        i2c_transfer+0x80/0x114
> [   22.365069]        regmap_i2c_read+0x5c/0xa0
> [   22.369362]        _regmap_raw_read+0x110/0x2dc
> [   22.373905]        _regmap_bus_read+0x40/0x74
> [   22.378274]        _regmap_read+0x74/0x248
> [   22.382393]        regmap_read+0x48/0x70
> [   22.386337]        pcf85063_probe+0xf0/0x4f4
> [   22.390640]        i2c_device_probe+0x100/0x2d4
> [   22.395206]        call_driver_probe+0x28/0x15c
> [   22.399750]        really_probe+0x180/0x320
> [   22.403946]        __driver_probe_device+0x84/0x144
> [   22.408837]        driver_probe_device+0x38/0x150
> [   22.413554]        __device_attach_driver+0xcc/0x194
> [   22.418532]        bus_for_each_drv+0x80/0xdc
> [   22.422910]        __device_attach+0xa8/0x1f8
> [   22.426545] systemd-journald[128]: Oldest entry in
> /var/log/journal/
> e4579cc7db6747028247b9b859e132d6/system.joural is older than the
> configured 
> file retention duration (1month), suggesting rotation.
> [   22.427278]        device_initial_probe+0x10/0x18
> [   22.427290]        bus_probe_device+0xa4/0xa8
> [   22.427299]        device_add+0x3b0/0x508
> [   22.427311]        device_register+0x1c/0x28
> [   22.427323]        i2c_new_client_device+0x1c8/0x2bc
> [   22.427333]        of_i2c_register_device+0xb4/0xdc
> [   22.427344]        of_i2c_register_devices+0x80/0x154
> [   22.456329] systemd-journald[128]: /var/log/journal/
> e4579cc7db6747028247b9b859e132d6/system.journal: Journal heaer limits
> reached 
> or header out-of-date, rotating.
> [   22.458176]        i2c_register_adapter+0x184/0x4c8
> [   22.458202]        __i2c_add_numbered_adapter+0x5c/0xa4
> [   22.502097]        i2c_add_adapter+0xa0/0xcc
> [   22.502122]        lpi2c_imx_probe+0x23c/0x350
> [   22.502135]        platform_probe+0x64/0xfc
> [   22.502146]        call_driver_probe+0x28/0x15c
> [   22.502155]        really_probe+0x180/0x320
> [   22.502164]        __driver_probe_device+0x84/0x144
> [   22.502173]        driver_probe_device+0x38/0x150
> [   22.502182]        __device_attach_driver+0xcc/0x194
> [   22.502191]        bus_for_each_drv+0x80/0xdc
> [   22.502204]        __device_attach+0xa8/0x1f8
> [   22.502212]        device_initial_probe+0x10/0x18
> [   22.502222]        bus_probe_device+0xa4/0xa8
> [   22.502230]        deferred_probe_work_func+0xa0/0xf0
> [   22.502239]        process_one_work+0x284/0x5b0
> [   22.502252]        worker_thread+0x68/0x39c
> [   22.502263]        kthread+0x104/0x108
> [   22.502274]        ret_from_fork+0x10/0x20
> [   22.502286] 
> [   22.502286] -> #1 (rtc_pcf85063:594:(&config->regmap)-
> >lock){+.+.}-{3:3}:
> [   22.502310]        __lock_acquire+0x370/0x6e8
> [   22.502322]        lock_acquire.part.0+0xcc/0x208
> [   22.502332]        lock_acquire+0x94/0x14c
> [   22.502341]        __mutex_lock+0x9c/0x838
> [   22.502353]        mutex_lock_nested+0x20/0x28
> [   22.502364]        regmap_lock_mutex+0x10/0x18
> [   22.502377]        regmap_read+0x38/0x70
> [   22.502389]        pcf85063_clkout_recalc_rate+0x2c/0x78
> [   22.502403]        __clk_core_init+0x46c/0x4e0
> [   22.502417]        __clk_register+0x160/0x23c
> [   22.502429]        devm_clk_register+0x58/0xb4
> [   22.502441]        pcf85063_probe+0x238/0x4f4
> [   22.502451]        i2c_device_probe+0x100/0x2d4
> [   22.502465]        call_driver_probe+0x28/0x15c
> [   22.502473]        really_probe+0x180/0x320
> [   22.502482]        __driver_probe_device+0x84/0x144
> [   22.502491]        driver_probe_device+0x38/0x150
> [   22.502500]        __device_attach_driver+0xcc/0x194
> [   22.502509]        bus_for_each_drv+0x80/0xdc
> [   22.502521]        __device_attach+0xa8/0x1f8
> [   22.502530]        device_initial_probe+0x10/0x18
> [   22.502539]        bus_probe_device+0xa4/0xa8
> [   22.502548]        device_add+0x3b0/0x508
> [   22.502559]        device_register+0x1c/0x28
> [   22.502570]        i2c_new_client_device+0x1c8/0x2bc
> [   22.502580]        of_i2c_register_device+0xb4/0xdc
> [  OK  ] Started Network Time Synchronization[   22.502589]        
> of_i2c_register_devices+0x80/0x154
> .[   22.502599]        i2c_register_adapter+0x184/0x4c8
> 
> [   22.502607]        __i2c_add_numbered_adapter+0x5c/0xa4
> [   22.502616]        i2c_add_adapter+0xa0/0xcc
> [   22.502624]        lpi2c_imx_probe+0x23c/0x350
> [   22.502636]        platform_probe+0x64/0xfc
> [   22.502646]        call_driver_probe+0x28/0x15c
> [   22.502655]        really_probe+0x180/0x320
> [   22.502663]        __driver_probe_device+0x84/0x144
> [   22.502672]        driver_probe_device+0x38/0x150
> [   22.502681]        __device_attach_driver+0xcc/0x194
> [   22.502690]        bus_for_each_drv+0x80/0xdc
> [   22.502702]        __device_attach+0xa8/0x1f8
> [   22.502711]        device_initial_probe+0x10/0x18
> [   22.502720]        bus_probe_device+0xa4/0xa8
> [   22.502729]        deferred_probe_work_func+0xa0/0xf0
> [   22.502738]        process_one_work+0x284/0x5b0
> [   22.502749]        worker_thread+0x68/0x39c
> [   22.502760]        kthread+0x104/0x108
> [   22.502770]        ret_from_fork+0x10/0x20
> [   22.502779] 
> [   22.502779] -> #0 (prepare_lock){+.+.}-{3:3}:
> [   22.502799]        check_prev_add+0xb0/0xc80
> [   22.502809]        validate_chain+0x444/0x510
> [   22.502818]        __lock_acquire+0x370/0x6e8
> [   22.502827]        lock_acquire.part.0+0xcc/0x208
> [   22.502837]        lock_acquire+0x94/0x14c
> [   22.502846]        __mutex_lock+0x9c/0x838
> [   22.502857]        mutex_lock_nested+0x20/0x28
> [   22.502867]        clk_prepare_lock+0x48/0x9c
> [   22.502878]        clk_prepare+0x1c/0x3c
> [   22.502890]        clk_bulk_prepare+0x48/0xcc
> [   22.502899]        lpi2c_runtime_resume+0x30/0x84
> [   22.502910]        pm_generic_runtime_resume+0x28/0x3c
> [   22.502923]        __genpd_runtime_resume+0x2c/0xa0
> [   22.502935]        genpd_runtime_resume+0xbc/0x308
> [   22.502944]        __rpm_callback+0x44/0x19c
> [   22.502953]        rpm_callback+0x64/0x70
> [   22.502962]        rpm_resume+0x438/0x6d8
> [   22.502970]        __pm_runtime_resume+0x54/0xb0
> [   22.502978]        lpi2c_imx_master_enable+0x24/0x128
> [   22.502989]        lpi2c_imx_xfer+0x2c/0x3c8
> [   22.502999]        __i2c_transfer+0xe4/0x5a0
> [   22.503008]        i2c_transfer+0x90/0x114
> [   22.503016]        i2c_transfer_buffer_flags+0x58/0x84
> [   22.503025]        regmap_i2c_write+0x1c/0x4c
> [   22.503035]        _regmap_raw_write_impl+0x7dc/0x944
> [   22.503044]        _regmap_bus_raw_write+0x5c/0x74
> [   22.503052]        _regmap_write+0x64/0x238
> [   22.503060]        _regmap_update_bits+0x100/0x11c
> [   22.503069]        regmap_update_bits_base+0x60/0x90
> [   22.503077]        pca953x_gpio_set_value+0x74/0x90
> [   22.503088]        gpiod_set_raw_value_commit+0x6c/0x7c
> [   22.503098]        gpiod_set_value_nocheck+0x68/0x70
> [   22.503107]        gpiod_set_value_cansleep+0x3c/0xa8
> [   22.503116]        gpio_led_set_blocking+0x54/0x7c
> [   22.503128]        set_brightness_delayed+0x90/0xd8
> [   22.503138]        process_one_work+0x284/0x5b0
> [   22.503149]        worker_thread+0x68/0x39c
> [   22.503160]        kthread+0x104/0x108
> [   22.503169]        ret_from_fork+0x10/0x20
> [   22.503179] 
> [   22.503179] other info that might help us debug this:
> [   22.503179] 
> [   22.503183] Chain exists of:
> [   22.503183]   prepare_lock --> rtc_pcf85063:594:(&config->regmap)-
> >lock --> 
> i2c_register_adapter
> [   22.503183] 
> [   22.503207]  Possible unsafe locking scenario:
> [   22.503207] 
> [   22.503210]        CPU0                    CPU1
> [   22.503213]        ----                    ----
> [   22.503216]   lock(i2c_register_adapter);
> [   22.503225]                               
> lock(rtc_pcf85063:594:(&config-
> > regmap)->lock);
> [   22.503235]                               
> lock(i2c_register_adapter);
> [   22.503244]   lock(prepare_lock);
> [   22.503253] 
> [   22.503253]  *** DEADLOCK ***
> 
> Now lpi2c_runtime_resume will call into clk_prepare() which also
> calls 
> clk_prepare_lock() (identical to clk_get_rate).
Alexander Stein April 24, 2023, 7:03 a.m. UTC | #4
Hi Marc,

Am Freitag, 21. April 2023, 15:59:52 CEST schrieb Marc Kleine-Budde:
> * PGP Signed by an unknown key
> 
> On 21.04.2023 15:48:59, Alexander Stein wrote:
> [...]
> 
> > Now lpi2c_runtime_resume will call into clk_prepare() which also calls
> > clk_prepare_lock() (identical to clk_get_rate).
> 
> IIRC this is a general problem^w limitation of the clock framework,
> clock providers cannot use clocks themselves in certain callback, e.g.
> set_rate.

Well, that's essentially impossible when this clock provider is attached via 
i2c. i2c transfers potentially need to change or prepare clocks.

One problem is that during clock registration (__clk_core_init) in the call to 
clk_ops.recalc_rate an i2c transfer is issued (via regmap). This is the 
inverse lock order to a regular transfer runtime resuming the i2c master and 
preparing the clocks.

While looking at this, is_prepared is also affected by this. It's reading a 
register, thus issuing a i2c transfer. But this function can also be called 
from within clk_unprepare_unused_subtree, which holds clk_prepare_lock as 
well.

This might be avoided by using regcache, but I'm not really sure this catches 
every case.

Best regards,
Alexander
Alexander Stein April 24, 2023, 8:08 a.m. UTC | #5
Am Freitag, 10. März 2023, 14:08:15 CEST schrieb A. Sverdlin:
> From: Alexander Sverdlin <alexander.sverdlin@siemens.com>
> 
> One of the reasons to do it is to save some CPU cycles on cpu_freq_get()
> under mutex. The second reason if the (false-positive) lockdep splat caused
> by the recursive feature of the "prepare_lock" (one clock instance is I2C
> peripheral clock and another is pcf85063 RTC as clock provider):
> 
> ======================================================
> WARNING: possible circular locking dependency detected
> 5.15.71+... #1 Tainted: G           O
> ------------------------------------------------------
> fs-value/2332 is trying to acquire lock:
> ffff8000096cae08 (prepare_lock){+.+.}-{3:3}, at: clk_prepare_lock
> 
> but task is already holding lock:
> ffff000011021100 (i2c_register_adapter){+.+.}-{3:3}, at:
> i2c_adapter_lock_bus
> 
> which lock already depends on the new lock.
> 
> the existing dependency chain (in reverse order) is:
> 
> -> #2 (i2c_register_adapter){+.+.}-{3:3}:
>        lock_acquire
>        rt_mutex_lock_nested
>        i2c_adapter_lock_bus
>        i2c_transfer
>        regmap_i2c_read
>        _regmap_raw_read
>        _regmap_bus_read
>        _regmap_read
>        regmap_read
>        pcf85063_probe
>        i2c_device_probe
>        really_probe
>        __driver_probe_device
>        driver_probe_device
>        __device_attach_driver
>        bus_for_each_drv
>        __device_attach
>        device_initial_probe
>        bus_probe_device
>        device_add
>        device_register
>        i2c_new_client_device
>        of_i2c_register_devices
>        i2c_register_adapter
>        __i2c_add_numbered_adapter
>        i2c_add_adapter
>        lpi2c_imx_probe
>        platform_probe
>        really_probe
>        __driver_probe_device
>        driver_probe_device
>        __device_attach_driver
>        bus_for_each_drv
>        __device_attach
>        device_initial_probe
>        bus_probe_device
>        deferred_probe_work_func
>        process_one_work
>        worker_thread
>        kthread
>        ret_from_fork
> 
> -> #1 (rtc_pcf85063:560:(&config->regmap)->lock){+.+.}-{3:3}:
>        lock_acquire
>        __mutex_lock
>        mutex_lock_nested
>        regmap_lock_mutex
>        regmap_read
>        pcf85063_clkout_recalc_rate
>        __clk_register
>        devm_clk_register
>        pcf85063_probe
>        i2c_device_probe
>        really_probe
>        __driver_probe_device
>        driver_probe_device
>        __device_attach_driver
>        bus_for_each_drv
>        __device_attach
>        device_initial_probe
>        bus_probe_device
>        device_add
>        device_register
>        i2c_new_client_device
>        of_i2c_register_devices
>        i2c_register_adapter
>        __i2c_add_numbered_adapter
>        i2c_add_adapter
>        lpi2c_imx_probe
>        platform_probe
>        really_probe
>        __driver_probe_device
>        driver_probe_device
>        __device_attach_driver
>        bus_for_each_drv
>        __device_attach
>        device_initial_probe
>        bus_probe_device
>        deferred_probe_work_func
>        process_one_work
>        worker_thread
>        kthread
>        ret_from_fork
> 
> -> #0 (prepare_lock){+.+.}-{3:3}:
>        __lock_acquire
>        lock_acquire.part.0
>        lock_acquire
>        __mutex_lock
>        mutex_lock_nested
>        clk_prepare_lock
>        clk_get_rate
>        lpi2c_imx_xfer
>        __i2c_transfer
>        i2c_transfer
>        regmap_i2c_read
>        _regmap_raw_read
>        regmap_raw_read
>        regmap_bulk_read
>        at24_read
>        nvmem_reg_read
>        bin_attr_nvmem_read
>        sysfs_kf_bin_read
>        kernfs_fop_read_iter
>        new_sync_read
>        vfs_read
>        ksys_read
>        __arm64_sys_read
>        invoke_syscall
>        ...
> 
> other info that might help us debug this:
> 
> Chain exists of:
>   prepare_lock --> rtc_pcf85063:560:(&config->regmap)->lock -->
> i2c_register_adapter
> 
>  Possible unsafe locking scenario:
> 
>        CPU0                    CPU1
>        ----                    ----
>   lock(i2c_register_adapter);
>                               
> lock(rtc_pcf85063:560:(&config->regmap)->lock); lock(i2c_register_adapter);
>   lock(prepare_lock);
> 
>  *** DEADLOCK ***
> 
> 4 locks held by .../2332:
>  #0: ffff0000146eb288 (&of->mutex){+.+.}-{3:3}, at: kernfs_fop_read_iter
>  #1: ffff000010fe4400 (kn->active#72){.+.+}-{0:0}, at: kernfs_fop_read_iter
>  #2: ffff0000110168e8 (&at24->lock){+.+.}-{3:3}, at: at24_read
>  #3: ffff000011021100 (i2c_register_adapter){+.+.}-{3:3}, at:
> i2c_adapter_lock_bus
> 
> stack backtrace:
> CPU: 1 PID: 2332 Comm: ... Tainted: G           O      5.15.71+... #1
> Hardware name: ... (DT)
> Call trace:
>  dump_backtrace
>  show_stack
>  dump_stack_lvl
>  dump_stack
>  print_circular_bug
>  check_noncircular
>  __lock_acquire
>  lock_acquire.part.0
>  lock_acquire
>  __mutex_lock
>  mutex_lock_nested
>  clk_prepare_lock
>  clk_get_rate
>  lpi2c_imx_xfer
>  __i2c_transfer
>  i2c_transfer
>  regmap_i2c_read
>  _regmap_raw_read
>  regmap_raw_read
>  regmap_bulk_read
>  at24_read
>  nvmem_reg_read
>  bin_attr_nvmem_read
>  sysfs_kf_bin_read
>  kernfs_fop_read_iter
>  new_sync_read
>  vfs_read
>  ksys_read
>  __arm64_sys_read
>  invoke_syscall
>  ...
> 
> Fixes: a55fa9d0e42e ("i2c: imx-lpi2c: add low power i2c bus driver")
> Signed-off-by: Alexander Sverdlin <alexander.sverdlin@siemens.com>

Reviewed-by: Alexander Stein <alexander.stein@ew.tq-group.com>

Note: [1] needs to be applied as well in order to get rid of all the lockdep 
errors.

[1] https://lore.kernel.org/linux-i2c/20230424080627.20564-1-alexander.stein@ew.tq-group.com/T/#u

> ---
>  drivers/i2c/busses/i2c-imx-lpi2c.c | 33 +++++++++++++++++++++++++++---
>  1 file changed, 30 insertions(+), 3 deletions(-)
> 
> Changelog:
> v3: fixed build error reported by kernel test robot <lkp@intel.com>
>    
> https://lore.kernel.org/oe-kbuild-all/202303102010.pAv56wKs-lkp@intel.com/
> v2: added clk_notifier as Alexander suggested
> 
> diff --git a/drivers/i2c/busses/i2c-imx-lpi2c.c
> b/drivers/i2c/busses/i2c-imx-lpi2c.c index 188f2a36d2fd6..5f1d1d4e018bd
> 100644
> --- a/drivers/i2c/busses/i2c-imx-lpi2c.c
> +++ b/drivers/i2c/busses/i2c-imx-lpi2c.c
> @@ -100,6 +100,8 @@ struct lpi2c_imx_struct {
>  	__u8			*rx_buf;
>  	__u8			*tx_buf;
>  	struct completion	complete;
> +	struct notifier_block	clk_change_nb;
> +	unsigned int		rate_per;
>  	unsigned int		msglen;
>  	unsigned int		delivered;
>  	unsigned int		block_data;
> @@ -198,24 +200,37 @@ static void lpi2c_imx_stop(struct lpi2c_imx_struct
> *lpi2c_imx) } while (1);
>  }
> 
> +static int lpi2c_imx_clk_change_cb(struct notifier_block *nb,
> +				   unsigned long action, void *data)
> +{
> +	struct clk_notifier_data *ndata = data;
> +	struct lpi2c_imx_struct *lpi2c_imx = container_of(nb,
> +							  struct 
lpi2c_imx_struct,
> +							  
clk_change_nb);
> +
> +	if (action & POST_RATE_CHANGE)
> +		lpi2c_imx->rate_per = ndata->new_rate;
> +
> +	return NOTIFY_OK;
> +}
> +
>  /* CLKLO = I2C_CLK_RATIO * CLKHI, SETHOLD = CLKHI, DATAVD = CLKHI/2 */
>  static int lpi2c_imx_config(struct lpi2c_imx_struct *lpi2c_imx)
>  {
>  	u8 prescale, filt, sethold, clkhi, clklo, datavd;
> -	unsigned int clk_rate, clk_cycle;
> +	unsigned int clk_cycle;
>  	enum lpi2c_imx_pincfg pincfg;
>  	unsigned int temp;
> 
>  	lpi2c_imx_set_mode(lpi2c_imx);
> 
> -	clk_rate = clk_get_rate(lpi2c_imx->clks[0].clk);
>  	if (lpi2c_imx->mode == HS || lpi2c_imx->mode == ULTRA_FAST)
>  		filt = 0;
>  	else
>  		filt = 2;
> 
>  	for (prescale = 0; prescale <= 7; prescale++) {
> -		clk_cycle = clk_rate / ((1 << prescale) * lpi2c_imx-
>bitrate)
> +		clk_cycle = lpi2c_imx->rate_per / ((1 << prescale) * 
lpi2c_imx->bitrate)
>  			    - 3 - (filt >> 1);
>  		clkhi = (clk_cycle + I2C_CLK_RATIO) / (I2C_CLK_RATIO + 1);
>  		clklo = clk_cycle - clkhi;
> @@ -588,6 +603,18 @@ static int lpi2c_imx_probe(struct platform_device
> *pdev) if (ret)
>  		return ret;
> 
> +	lpi2c_imx->clk_change_nb.notifier_call = lpi2c_imx_clk_change_cb;
> +	ret = devm_clk_notifier_register(&pdev->dev, lpi2c_imx->clks[0].clk,
> +					 &lpi2c_imx->clk_change_nb);
> +	if (ret)
> +		return dev_err_probe(&pdev->dev, ret,
> +				     "can't register peripheral clock 
notifier\n");
> +	lpi2c_imx->rate_per = clk_get_rate(lpi2c_imx->clks[0].clk);
> +	if (!lpi2c_imx->rate_per) {
> +		dev_err(&pdev->dev, "can't get I2C peripheral clock 
rate\n");
> +		return -EINVAL;
> +	}
> +
>  	pm_runtime_set_autosuspend_delay(&pdev->dev, I2C_PM_TIMEOUT);
>  	pm_runtime_use_autosuspend(&pdev->dev);
>  	pm_runtime_get_noresume(&pdev->dev);
Wolfram Sang April 30, 2023, 7:05 a.m. UTC | #6
> > IIRC this is a general problem^w limitation of the clock framework,
> > clock providers cannot use clocks themselves in certain callback, e.g.
> > set_rate.
> 
> Well, that's essentially impossible when this clock provider is attached via 
> i2c. i2c transfers potentially need to change or prepare clocks.

So, as I get it, this is not a specific lpi2c problem but affecting any
I2C controller driver which uses get_rate() to setup a transfer to a
remote I2C clock provider? And this lockdep warning is a false-positive?
Alexander Stein May 2, 2023, 6:50 a.m. UTC | #7
Hello Wolfgang,

Am Sonntag, 30. April 2023, 09:05:55 CEST schrieb Wolfram Sang:
> * PGP Signed by an unknown key
> 
> > > IIRC this is a general problem^w limitation of the clock framework,
> > > clock providers cannot use clocks themselves in certain callback, e.g.
> > > set_rate.
> > 
> > Well, that's essentially impossible when this clock provider is attached
> > via i2c. i2c transfers potentially need to change or prepare clocks.
> 
> So, as I get it, this is not a specific lpi2c problem but affecting any
> I2C controller driver which uses get_rate() to setup a transfer to a
> remote I2C clock provider? And this lockdep warning is a false-positive?

Yes, IMHO this could potentially occur on every I2C controller driver, if a 
clock provider while holding the clk_prepare_lock, e.g. during registration, 
issues an i2c transfer.
I'm not so sure if this is a false-positive, but more like trying to do a 
nested lock.

Best regards
Alexander
Marc Kleine-Budde May 2, 2023, 7:03 a.m. UTC | #8
On 02.05.2023 08:50:06, Alexander Stein wrote:
> Hello Wolfgang,
> 
> Am Sonntag, 30. April 2023, 09:05:55 CEST schrieb Wolfram Sang:
> > * PGP Signed by an unknown key
> > 
> > > > IIRC this is a general problem^w limitation of the clock framework,
> > > > clock providers cannot use clocks themselves in certain callback, e.g.
> > > > set_rate.
> > > 
> > > Well, that's essentially impossible when this clock provider is attached
> > > via i2c. i2c transfers potentially need to change or prepare clocks.
> > 
> > So, as I get it, this is not a specific lpi2c problem but affecting any
> > I2C controller driver which uses get_rate() to setup a transfer to a
> > remote I2C clock provider? And this lockdep warning is a false-positive?
> 
> Yes, IMHO this could potentially occur on every I2C controller driver, if a 
> clock provider while holding the clk_prepare_lock, e.g. during registration, 
> issues an i2c transfer.
> I'm not so sure if this is a false-positive, but more like trying to do a 
> nested lock.

It's a general limitation of the clock framework. The clk_prepare_lock()
is taken by clk_prepare() and by clk_get_rate(). So if you implement a
clock provider driver that uses I2C, which might use clk_get_rate() it
might deadlock. However there's a workaround for nested I2C calls:
533ddeb1e86f "(clk: allow reentrant calls into the clk framework,
2013-03-28)". This doesn't work for SPI, as it's using a worker thread.

| https://elixir.bootlin.com/linux/v6.3/source/drivers/clk/clk.c#L128

regards,
Marc
Alexander Stein May 15, 2023, 9:11 a.m. UTC | #9
Hi,

Am Freitag, 21. April 2023, 15:48:59 CEST schrieb Alexander Stein:
> Hi,
> 
> sorry for the delay.
> 
> Am Freitag, 10. März 2023, 14:08:15 CEST schrieb A. Sverdlin:
> > From: Alexander Sverdlin <alexander.sverdlin@siemens.com>
> > 
> > One of the reasons to do it is to save some CPU cycles on cpu_freq_get()
> > under mutex. The second reason if the (false-positive) lockdep splat
> > caused
> > by the recursive feature of the "prepare_lock" (one clock instance is I2C
> > peripheral clock and another is pcf85063 RTC as clock provider):
> > 
> > ======================================================
> > WARNING: possible circular locking dependency detected
> > 5.15.71+... #1 Tainted: G           O
> > ------------------------------------------------------
> > fs-value/2332 is trying to acquire lock:
> > ffff8000096cae08 (prepare_lock){+.+.}-{3:3}, at: clk_prepare_lock
> > 
> > but task is already holding lock:
> > ffff000011021100 (i2c_register_adapter){+.+.}-{3:3}, at:
> > i2c_adapter_lock_bus
> > 
> > which lock already depends on the new lock.
> > 
> > the existing dependency chain (in reverse order) is:
> > 
> > -> #2 (i2c_register_adapter){+.+.}-{3:3}:
> >        lock_acquire
> >        rt_mutex_lock_nested
> >        i2c_adapter_lock_bus
> >        i2c_transfer
> >        regmap_i2c_read
> >        _regmap_raw_read
> >        _regmap_bus_read
> >        _regmap_read
> >        regmap_read
> >        pcf85063_probe
> >        i2c_device_probe
> >        really_probe
> >        __driver_probe_device
> >        driver_probe_device
> >        __device_attach_driver
> >        bus_for_each_drv
> >        __device_attach
> >        device_initial_probe
> >        bus_probe_device
> >        device_add
> >        device_register
> >        i2c_new_client_device
> >        of_i2c_register_devices
> >        i2c_register_adapter
> >        __i2c_add_numbered_adapter
> >        i2c_add_adapter
> >        lpi2c_imx_probe
> >        platform_probe
> >        really_probe
> >        __driver_probe_device
> >        driver_probe_device
> >        __device_attach_driver
> >        bus_for_each_drv
> >        __device_attach
> >        device_initial_probe
> >        bus_probe_device
> >        deferred_probe_work_func
> >        process_one_work
> >        worker_thread
> >        kthread
> >        ret_from_fork
> > 
> > -> #1 (rtc_pcf85063:560:(&config->regmap)->lock){+.+.}-{3:3}:
> >        lock_acquire
> >        __mutex_lock
> >        mutex_lock_nested
> >        regmap_lock_mutex
> >        regmap_read
> >        pcf85063_clkout_recalc_rate
> >        __clk_register
> >        devm_clk_register
> >        pcf85063_probe
> >        i2c_device_probe
> >        really_probe
> >        __driver_probe_device
> >        driver_probe_device
> >        __device_attach_driver
> >        bus_for_each_drv
> >        __device_attach
> >        device_initial_probe
> >        bus_probe_device
> >        device_add
> >        device_register
> >        i2c_new_client_device
> >        of_i2c_register_devices
> >        i2c_register_adapter
> >        __i2c_add_numbered_adapter
> >        i2c_add_adapter
> >        lpi2c_imx_probe
> >        platform_probe
> >        really_probe
> >        __driver_probe_device
> >        driver_probe_device
> >        __device_attach_driver
> >        bus_for_each_drv
> >        __device_attach
> >        device_initial_probe
> >        bus_probe_device
> >        deferred_probe_work_func
> >        process_one_work
> >        worker_thread
> >        kthread
> >        ret_from_fork
> > 
> > -> #0 (prepare_lock){+.+.}-{3:3}:
> >        __lock_acquire
> >        lock_acquire.part.0
> >        lock_acquire
> >        __mutex_lock
> >        mutex_lock_nested
> >        clk_prepare_lock
> >        clk_get_rate
> >        lpi2c_imx_xfer
> >        __i2c_transfer
> >        i2c_transfer
> >        regmap_i2c_read
> >        _regmap_raw_read
> >        regmap_raw_read
> >        regmap_bulk_read
> >        at24_read
> >        nvmem_reg_read
> >        bin_attr_nvmem_read
> >        sysfs_kf_bin_read
> >        kernfs_fop_read_iter
> >        new_sync_read
> >        vfs_read
> >        ksys_read
> >        __arm64_sys_read
> >        invoke_syscall
> >        ...
> > 
> > other info that might help us debug this:
> > 
> > Chain exists of:
> >   prepare_lock --> rtc_pcf85063:560:(&config->regmap)->lock -->
> > 
> > i2c_register_adapter
> > 
> >  Possible unsafe locking scenario:
> >        CPU0                    CPU1
> >        ----                    ----
> >   
> >   lock(i2c_register_adapter);
> > 
> > lock(rtc_pcf85063:560:(&config->regmap)->lock);
> > lock(i2c_register_adapter);
> > 
> >   lock(prepare_lock);
> >  
> >  *** DEADLOCK ***
> > 
> > 4 locks held by .../2332:
> >  #0: ffff0000146eb288 (&of->mutex){+.+.}-{3:3}, at: kernfs_fop_read_iter
> >  #1: ffff000010fe4400 (kn->active#72){.+.+}-{0:0}, at:
> >  kernfs_fop_read_iter
> >  #2: ffff0000110168e8 (&at24->lock){+.+.}-{3:3}, at: at24_read
> > 
> >  #3: ffff000011021100 (i2c_register_adapter){+.+.}-{3:3}, at:
> > i2c_adapter_lock_bus
> > 
> > stack backtrace:
> > CPU: 1 PID: 2332 Comm: ... Tainted: G           O      5.15.71+... #1
> > Hardware name: ... (DT)
> > 
> > Call trace:
> >  dump_backtrace
> >  show_stack
> >  dump_stack_lvl
> >  dump_stack
> >  print_circular_bug
> >  check_noncircular
> >  __lock_acquire
> >  lock_acquire.part.0
> >  lock_acquire
> >  __mutex_lock
> >  mutex_lock_nested
> >  clk_prepare_lock
> >  clk_get_rate
> >  lpi2c_imx_xfer
> >  __i2c_transfer
> >  i2c_transfer
> >  regmap_i2c_read
> >  _regmap_raw_read
> >  regmap_raw_read
> >  regmap_bulk_read
> >  at24_read
> >  nvmem_reg_read
> >  bin_attr_nvmem_read
> >  sysfs_kf_bin_read
> >  kernfs_fop_read_iter
> >  new_sync_read
> >  vfs_read
> >  ksys_read
> >  __arm64_sys_read
> >  invoke_syscall
> >  ...
> > 
> > Fixes: a55fa9d0e42e ("i2c: imx-lpi2c: add low power i2c bus driver")
> > Signed-off-by: Alexander Sverdlin <alexander.sverdlin@siemens.com>
> > ---
> > 
> >  drivers/i2c/busses/i2c-imx-lpi2c.c | 33 +++++++++++++++++++++++++++---
> >  1 file changed, 30 insertions(+), 3 deletions(-)
> > 
> > Changelog:
> > v3: fixed build error reported by kernel test robot <lkp@intel.com>
> > 
> > https://lore.kernel.org/oe-kbuild-all/202303102010.pAv56wKs-lkp@intel.com/
> > v2: added clk_notifier as Alexander suggested
> > 
> > diff --git a/drivers/i2c/busses/i2c-imx-lpi2c.c
> > b/drivers/i2c/busses/i2c-imx-lpi2c.c index 188f2a36d2fd6..5f1d1d4e018bd
> > 100644
> > --- a/drivers/i2c/busses/i2c-imx-lpi2c.c
> > +++ b/drivers/i2c/busses/i2c-imx-lpi2c.c
> > @@ -100,6 +100,8 @@ struct lpi2c_imx_struct {
> > 
> >  	__u8			*rx_buf;
> >  	__u8			*tx_buf;
> >  	struct completion	complete;
> > 
> > +	struct notifier_block	clk_change_nb;
> > +	unsigned int		rate_per;
> > 
> >  	unsigned int		msglen;
> >  	unsigned int		delivered;
> >  	unsigned int		block_data;
> > 
> > @@ -198,24 +200,37 @@ static void lpi2c_imx_stop(struct lpi2c_imx_struct
> > *lpi2c_imx) } while (1);
> > 
> >  }
> > 
> > +static int lpi2c_imx_clk_change_cb(struct notifier_block *nb,
> > +				   unsigned long action, void *data)
> > +{
> > +	struct clk_notifier_data *ndata = data;
> > +	struct lpi2c_imx_struct *lpi2c_imx = container_of(nb,
> > +							  struct
> 
> lpi2c_imx_struct,
> 
> > +
> 
> clk_change_nb);
> 
> > +
> > +	if (action & POST_RATE_CHANGE)
> > +		lpi2c_imx->rate_per = ndata->new_rate;
> > +
> > +	return NOTIFY_OK;
> > +}
> > +
> > 
> >  /* CLKLO = I2C_CLK_RATIO * CLKHI, SETHOLD = CLKHI, DATAVD = CLKHI/2 */
> >  static int lpi2c_imx_config(struct lpi2c_imx_struct *lpi2c_imx)
> >  {
> >  
> >  	u8 prescale, filt, sethold, clkhi, clklo, datavd;
> > 
> > -	unsigned int clk_rate, clk_cycle;
> > +	unsigned int clk_cycle;
> > 
> >  	enum lpi2c_imx_pincfg pincfg;
> >  	unsigned int temp;
> >  	
> >  	lpi2c_imx_set_mode(lpi2c_imx);
> > 
> > -	clk_rate = clk_get_rate(lpi2c_imx->clks[0].clk);
> > 
> >  	if (lpi2c_imx->mode == HS || lpi2c_imx->mode == ULTRA_FAST)
> >  	
> >  		filt = 0;
> >  	
> >  	else
> >  	
> >  		filt = 2;
> >  	
> >  	for (prescale = 0; prescale <= 7; prescale++) {
> > 
> > -		clk_cycle = clk_rate / ((1 << prescale) * lpi2c_imx-
> >
> >bitrate)
> >
> > +		clk_cycle = lpi2c_imx->rate_per / ((1 << prescale) *
> 
> lpi2c_imx->bitrate)
> 
> >  			    - 3 - (filt >> 1);
> >  		
> >  		clkhi = (clk_cycle + I2C_CLK_RATIO) / (I2C_CLK_RATIO + 1);
> >  		clklo = clk_cycle - clkhi;
> > 
> > @@ -588,6 +603,18 @@ static int lpi2c_imx_probe(struct platform_device
> > *pdev) if (ret)
> > 
> >  		return ret;
> > 
> > +	lpi2c_imx->clk_change_nb.notifier_call = lpi2c_imx_clk_change_cb;
> > +	ret = devm_clk_notifier_register(&pdev->dev, lpi2c_imx->clks[0].clk,
> > +					 &lpi2c_imx->clk_change_nb);
> > +	if (ret)
> > +		return dev_err_probe(&pdev->dev, ret,
> > +				     "can't register peripheral clock
> 
> notifier\n");
> 
> > +	lpi2c_imx->rate_per = clk_get_rate(lpi2c_imx->clks[0].clk);
> > +	if (!lpi2c_imx->rate_per) {
> > +		dev_err(&pdev->dev, "can't get I2C peripheral clock
> 
> rate\n");
> 
> > +		return -EINVAL;
> > +	}
> > +
> 
> I would switch the order of the calls to devm_clk_notifier_register() and
> clk_get_rate(). AFAICS this looks like a possible lost update. The notifier
> might change rate_per before the (old) value from clk_get_rate is actually
> assigned.
> 
> With this change you fix the following call chain (from a kernel log)
> 
> [    4.562396]        clk_prepare_lock+0x48/0x9c
> [    4.566740]        clk_get_rate+0x1c/0x74
> [    4.570736]        lpi2c_imx_config+0x4c/0x190
> [    4.575167]        lpi2c_imx_master_enable+0x54/0x128
> [    4.580205]        lpi2c_imx_xfer+0x2c/0x3c8
> 
> Unfortuantely this was just hiding another call path from lpi2c_imx_xfer to
> clk_bulk_prepare. Here is my kernel log:
> 
> [   22.264508] ======================================================
> [   22.270697] WARNING: possible circular locking dependency detected
> [   22.276893] 6.3.0-rc7-next-20230420+ #6 Not tainted
> [   22.281782] ------------------------------------------------------
> [   22.287968] kworker/2:3/93 is trying to acquire lock:
> [   22.293034] ffff800009d38bf8 (prepare_lock){+.+.}-{3:3}, at:
> clk_prepare_lock+0x48/0x9c
> [   22.301095]
> [   22.301095] but task is already holding lock:
> [   22.306942] ffff0000039ab100 (i2c_register_adapter){+.+.}-{3:3}, at:
> i2c_adapter_lock_bus+0x20/0x2c
> [   22.316046]
> [   22.316046] which lock already depends on the new lock.
> [   22.316046]
> [   22.324238]
> [   22.324238] the existing dependency chain (in reverse order) is:
> [   22.331727]
> [   22.331727] -> #2 (i2c_register_adapter){+.+.}-{3:3}:
> [   22.338295]        __lock_acquire+0x370/0x6e8
> [   22.342674]        lock_acquire.part.0+0xcc/0x208
> [   22.347399]        lock_acquire+0x94/0x14c
> [   22.351509]        rt_mutex_lock_nested+0x5c/0x98
> [   22.356235]        i2c_adapter_lock_bus+0x20/0x2c
> [   22.360961]        i2c_transfer+0x80/0x114
> [   22.365069]        regmap_i2c_read+0x5c/0xa0
> [   22.369362]        _regmap_raw_read+0x110/0x2dc
> [   22.373905]        _regmap_bus_read+0x40/0x74
> [   22.378274]        _regmap_read+0x74/0x248
> [   22.382393]        regmap_read+0x48/0x70
> [   22.386337]        pcf85063_probe+0xf0/0x4f4
> [   22.390640]        i2c_device_probe+0x100/0x2d4
> [   22.395206]        call_driver_probe+0x28/0x15c
> [   22.399750]        really_probe+0x180/0x320
> [   22.403946]        __driver_probe_device+0x84/0x144
> [   22.408837]        driver_probe_device+0x38/0x150
> [   22.413554]        __device_attach_driver+0xcc/0x194
> [   22.418532]        bus_for_each_drv+0x80/0xdc
> [   22.422910]        __device_attach+0xa8/0x1f8
> [   22.426545] systemd-journald[128]: Oldest entry in /var/log/journal/
> e4579cc7db6747028247b9b859e132d6/system.joural is older than the configured
> file retention duration (1month), suggesting rotation.
> [   22.427278]        device_initial_probe+0x10/0x18
> [   22.427290]        bus_probe_device+0xa4/0xa8
> [   22.427299]        device_add+0x3b0/0x508
> [   22.427311]        device_register+0x1c/0x28
> [   22.427323]        i2c_new_client_device+0x1c8/0x2bc
> [   22.427333]        of_i2c_register_device+0xb4/0xdc
> [   22.427344]        of_i2c_register_devices+0x80/0x154
> [   22.456329] systemd-journald[128]: /var/log/journal/
> e4579cc7db6747028247b9b859e132d6/system.journal: Journal heaer limits
> reached or header out-of-date, rotating.
> [   22.458176]        i2c_register_adapter+0x184/0x4c8
> [   22.458202]        __i2c_add_numbered_adapter+0x5c/0xa4
> [   22.502097]        i2c_add_adapter+0xa0/0xcc
> [   22.502122]        lpi2c_imx_probe+0x23c/0x350
> [   22.502135]        platform_probe+0x64/0xfc
> [   22.502146]        call_driver_probe+0x28/0x15c
> [   22.502155]        really_probe+0x180/0x320
> [   22.502164]        __driver_probe_device+0x84/0x144
> [   22.502173]        driver_probe_device+0x38/0x150
> [   22.502182]        __device_attach_driver+0xcc/0x194
> [   22.502191]        bus_for_each_drv+0x80/0xdc
> [   22.502204]        __device_attach+0xa8/0x1f8
> [   22.502212]        device_initial_probe+0x10/0x18
> [   22.502222]        bus_probe_device+0xa4/0xa8
> [   22.502230]        deferred_probe_work_func+0xa0/0xf0
> [   22.502239]        process_one_work+0x284/0x5b0
> [   22.502252]        worker_thread+0x68/0x39c
> [   22.502263]        kthread+0x104/0x108
> [   22.502274]        ret_from_fork+0x10/0x20
> [   22.502286]
> [   22.502286] -> #1 (rtc_pcf85063:594:(&config->regmap)->lock){+.+.}-{3:3}:
> [   22.502310]        __lock_acquire+0x370/0x6e8
> [   22.502322]        lock_acquire.part.0+0xcc/0x208
> [   22.502332]        lock_acquire+0x94/0x14c
> [   22.502341]        __mutex_lock+0x9c/0x838
> [   22.502353]        mutex_lock_nested+0x20/0x28
> [   22.502364]        regmap_lock_mutex+0x10/0x18
> [   22.502377]        regmap_read+0x38/0x70
> [   22.502389]        pcf85063_clkout_recalc_rate+0x2c/0x78
> [   22.502403]        __clk_core_init+0x46c/0x4e0
> [   22.502417]        __clk_register+0x160/0x23c
> [   22.502429]        devm_clk_register+0x58/0xb4
> [   22.502441]        pcf85063_probe+0x238/0x4f4
> [   22.502451]        i2c_device_probe+0x100/0x2d4
> [   22.502465]        call_driver_probe+0x28/0x15c
> [   22.502473]        really_probe+0x180/0x320
> [   22.502482]        __driver_probe_device+0x84/0x144
> [   22.502491]        driver_probe_device+0x38/0x150
> [   22.502500]        __device_attach_driver+0xcc/0x194
> [   22.502509]        bus_for_each_drv+0x80/0xdc
> [   22.502521]        __device_attach+0xa8/0x1f8
> [   22.502530]        device_initial_probe+0x10/0x18
> [   22.502539]        bus_probe_device+0xa4/0xa8
> [   22.502548]        device_add+0x3b0/0x508
> [   22.502559]        device_register+0x1c/0x28
> [   22.502570]        i2c_new_client_device+0x1c8/0x2bc
> [   22.502580]        of_i2c_register_device+0xb4/0xdc
> [  OK  ] Started Network Time Synchronization[   22.502589]
> of_i2c_register_devices+0x80/0x154
> .[   22.502599]        i2c_register_adapter+0x184/0x4c8
> 
> [   22.502607]        __i2c_add_numbered_adapter+0x5c/0xa4
> [   22.502616]        i2c_add_adapter+0xa0/0xcc
> [   22.502624]        lpi2c_imx_probe+0x23c/0x350
> [   22.502636]        platform_probe+0x64/0xfc
> [   22.502646]        call_driver_probe+0x28/0x15c
> [   22.502655]        really_probe+0x180/0x320
> [   22.502663]        __driver_probe_device+0x84/0x144
> [   22.502672]        driver_probe_device+0x38/0x150
> [   22.502681]        __device_attach_driver+0xcc/0x194
> [   22.502690]        bus_for_each_drv+0x80/0xdc
> [   22.502702]        __device_attach+0xa8/0x1f8
> [   22.502711]        device_initial_probe+0x10/0x18
> [   22.502720]        bus_probe_device+0xa4/0xa8
> [   22.502729]        deferred_probe_work_func+0xa0/0xf0
> [   22.502738]        process_one_work+0x284/0x5b0
> [   22.502749]        worker_thread+0x68/0x39c
> [   22.502760]        kthread+0x104/0x108
> [   22.502770]        ret_from_fork+0x10/0x20
> [   22.502779]
> [   22.502779] -> #0 (prepare_lock){+.+.}-{3:3}:
> [   22.502799]        check_prev_add+0xb0/0xc80
> [   22.502809]        validate_chain+0x444/0x510
> [   22.502818]        __lock_acquire+0x370/0x6e8
> [   22.502827]        lock_acquire.part.0+0xcc/0x208
> [   22.502837]        lock_acquire+0x94/0x14c
> [   22.502846]        __mutex_lock+0x9c/0x838
> [   22.502857]        mutex_lock_nested+0x20/0x28
> [   22.502867]        clk_prepare_lock+0x48/0x9c
> [   22.502878]        clk_prepare+0x1c/0x3c
> [   22.502890]        clk_bulk_prepare+0x48/0xcc
> [   22.502899]        lpi2c_runtime_resume+0x30/0x84
> [   22.502910]        pm_generic_runtime_resume+0x28/0x3c
> [   22.502923]        __genpd_runtime_resume+0x2c/0xa0
> [   22.502935]        genpd_runtime_resume+0xbc/0x308
> [   22.502944]        __rpm_callback+0x44/0x19c
> [   22.502953]        rpm_callback+0x64/0x70
> [   22.502962]        rpm_resume+0x438/0x6d8
> [   22.502970]        __pm_runtime_resume+0x54/0xb0
> [   22.502978]        lpi2c_imx_master_enable+0x24/0x128
> [   22.502989]        lpi2c_imx_xfer+0x2c/0x3c8
> [   22.502999]        __i2c_transfer+0xe4/0x5a0
> [   22.503008]        i2c_transfer+0x90/0x114
> [   22.503016]        i2c_transfer_buffer_flags+0x58/0x84
> [   22.503025]        regmap_i2c_write+0x1c/0x4c
> [   22.503035]        _regmap_raw_write_impl+0x7dc/0x944
> [   22.503044]        _regmap_bus_raw_write+0x5c/0x74
> [   22.503052]        _regmap_write+0x64/0x238
> [   22.503060]        _regmap_update_bits+0x100/0x11c
> [   22.503069]        regmap_update_bits_base+0x60/0x90
> [   22.503077]        pca953x_gpio_set_value+0x74/0x90
> [   22.503088]        gpiod_set_raw_value_commit+0x6c/0x7c
> [   22.503098]        gpiod_set_value_nocheck+0x68/0x70
> [   22.503107]        gpiod_set_value_cansleep+0x3c/0xa8
> [   22.503116]        gpio_led_set_blocking+0x54/0x7c
> [   22.503128]        set_brightness_delayed+0x90/0xd8
> [   22.503138]        process_one_work+0x284/0x5b0
> [   22.503149]        worker_thread+0x68/0x39c
> [   22.503160]        kthread+0x104/0x108
> [   22.503169]        ret_from_fork+0x10/0x20
> [   22.503179]
> [   22.503179] other info that might help us debug this:
> [   22.503179]
> [   22.503183] Chain exists of:
> [   22.503183]   prepare_lock --> rtc_pcf85063:594:(&config->regmap)->lock
> --> i2c_register_adapter
> [   22.503183]
> [   22.503207]  Possible unsafe locking scenario:
> [   22.503207]
> [   22.503210]        CPU0                    CPU1
> [   22.503213]        ----                    ----
> [   22.503216]   lock(i2c_register_adapter);
> [   22.503225]                               
> lock(rtc_pcf85063:594:(&config-
> >regmap)->lock);
> 
> [   22.503235]                                lock(i2c_register_adapter);
> [   22.503244]   lock(prepare_lock);
> [   22.503253]
> [   22.503253]  *** DEADLOCK ***
> 
> Now lpi2c_runtime_resume will call into clk_prepare() which also calls
> clk_prepare_lock() (identical to clk_get_rate).
> 
> Best regards,
> Alexader
> 
> >  	pm_runtime_set_autosuspend_delay(&pdev->dev, I2C_PM_TIMEOUT);
> >  	pm_runtime_use_autosuspend(&pdev->dev);
> >  	pm_runtime_get_noresume(&pdev->dev);

With commit fa39065833db ("i2c: imx-lpi2c: avoid taking clk_prepare mutex in 
PM callbacks") in place, the additional deadlock warning regarding runtime 
resume is gone.
So only the ordering issue needs to be addressed.

Best regards,
Alexander
A. Sverdlin May 15, 2023, 12:04 p.m. UTC | #10
Hello Alexander!

On Mon, 2023-05-15 at 11:11 +0200, Alexander Stein wrote:
> > [   22.503179] other info that might help us debug this:
> > [   22.503179]
> > [   22.503183] Chain exists of:
> > [   22.503183]   prepare_lock --> rtc_pcf85063:594:(&config-
> > >regmap)->lock
> > --> i2c_register_adapter
> > [   22.503183]
> > [   22.503207]  Possible unsafe locking scenario:
> > [   22.503207]
> > [   22.503210]        CPU0                    CPU1
> > [   22.503213]        ----                    ----
> > [   22.503216]   lock(i2c_register_adapter);
> > [   22.503225]                               
> > lock(rtc_pcf85063:594:(&config-
> > > regmap)->lock);
> > 
> > [   22.503235]                               
> > lock(i2c_register_adapter);
> > [   22.503244]   lock(prepare_lock);
> > [   22.503253]
> > [   22.503253]  *** DEADLOCK ***
> > 
> > Now lpi2c_runtime_resume will call into clk_prepare() which also
> > calls
> > clk_prepare_lock() (identical to clk_get_rate).
> > 
> > Best regards,
> > Alexader
> > 
> > >         pm_runtime_set_autosuspend_delay(&pdev->dev,
> > > I2C_PM_TIMEOUT);
> > >         pm_runtime_use_autosuspend(&pdev->dev);
> > >         pm_runtime_get_noresume(&pdev->dev);
> 
> With commit fa39065833db ("i2c: imx-lpi2c: avoid taking clk_prepare
> mutex in 
> PM callbacks") in place, the additional deadlock warning regarding
> runtime 
> resume is gone.
> So only the ordering issue needs to be addressed.

Unfortunately with both your (backported) and my patches applied I've
got a 3rd warning:

======================================================
WARNING: possible circular locking dependency detected
5.15.71+git8e43aee #1 Tainted: G           O     
------------------------------------------------------
xxxxxxx/2238 is trying to acquire lock:
ffff0000040bda40 (&desc->request_mutex){+.+.}-{3:3}, at:
__setup_irq+0xbc/0x8bc

but task is already holding lock:
ffff000010ef1100 (i2c_register_adapter){+.+.}-{3:3}, at:
i2c_adapter_lock_bus+0x2c/0x3c

which lock already depends on the new lock.


the existing dependency chain (in reverse order) is:

-> #5 (i2c_register_adapter){+.+.}-{3:3}:
       lock_acquire+0x68/0x84
       rt_mutex_lock_nested+0x88/0xe0
       i2c_adapter_lock_bus+0x2c/0x3c
       i2c_transfer+0x58/0x130
       regmap_i2c_read+0x64/0xb0
       _regmap_raw_read+0x114/0x440
       _regmap_bus_read+0x4c/0x84
       _regmap_read+0x6c/0x270
       regmap_read+0x54/0x80
       pcf85063_probe+0xec/0x4cc
       i2c_device_probe+0x10c/0x350
       really_probe+0xc4/0x470
       __driver_probe_device+0x11c/0x190
       driver_probe_device+0x48/0x110
       __device_attach_driver+0xc4/0x160
       bus_for_each_drv+0x80/0xe0
       __device_attach+0xb0/0x1f0
       device_initial_probe+0x1c/0x2c
       bus_probe_device+0xa4/0xb0
       device_add+0x398/0x8ac
       device_register+0x28/0x40
       i2c_new_client_device+0x144/0x290
       of_i2c_register_devices+0x18c/0x230
       i2c_register_adapter+0x1dc/0x6b0
       __i2c_add_numbered_adapter+0x68/0xbc
       i2c_add_adapter+0xb0/0xe0
       lpi2c_imx_probe+0x3a4/0x670
       platform_probe+0x70/0xec
       really_probe+0xc4/0x470
       __driver_probe_device+0x11c/0x190
       driver_probe_device+0x48/0x110
       __device_attach_driver+0xc4/0x160
       bus_for_each_drv+0x80/0xe0
       __device_attach+0xb0/0x1f0
       device_initial_probe+0x1c/0x2c
       bus_probe_device+0xa4/0xb0
       deferred_probe_work_func+0xb8/0x110
       process_one_work+0x27c/0x6c4
       worker_thread+0x7c/0x450
       kthread+0x168/0x17c
       ret_from_fork+0x10/0x20

-> #4 (rtc_pcf85063:560:(&config->regmap)->lock){+.+.}-{3:3}:
       lock_acquire+0x68/0x84
       __mutex_lock+0xa8/0x4d0
       mutex_lock_nested+0x48/0x54
       regmap_lock_mutex+0x1c/0x30
       regmap_read+0x44/0x80
       pcf85063_clkout_recalc_rate+0x34/0x80
       __clk_register+0x520/0x880
       devm_clk_register+0x64/0xc4
       pcf85063_probe+0x24c/0x4cc
       i2c_device_probe+0x10c/0x350
       really_probe+0xc4/0x470
       __driver_probe_device+0x11c/0x190
       driver_probe_device+0x48/0x110
       __device_attach_driver+0xc4/0x160
       bus_for_each_drv+0x80/0xe0
       __device_attach+0xb0/0x1f0
       device_initial_probe+0x1c/0x2c
       bus_probe_device+0xa4/0xb0
       device_add+0x398/0x8ac
       device_register+0x28/0x40
       i2c_new_client_device+0x144/0x290
       of_i2c_register_devices+0x18c/0x230
       i2c_register_adapter+0x1dc/0x6b0
       __i2c_add_numbered_adapter+0x68/0xbc
       i2c_add_adapter+0xb0/0xe0
       lpi2c_imx_probe+0x3a4/0x670
       platform_probe+0x70/0xec
       really_probe+0xc4/0x470
       __driver_probe_device+0x11c/0x190
       driver_probe_device+0x48/0x110
       __device_attach_driver+0xc4/0x160
       bus_for_each_drv+0x80/0xe0
       __device_attach+0xb0/0x1f0
       device_initial_probe+0x1c/0x2c
       bus_probe_device+0xa4/0xb0
       deferred_probe_work_func+0xb8/0x110
       process_one_work+0x27c/0x6c4
       worker_thread+0x7c/0x450
       kthread+0x168/0x17c
       ret_from_fork+0x10/0x20

-> #3 (prepare_lock){+.+.}-{3:3}:
       lock_acquire+0x68/0x84
       __mutex_lock+0xa8/0x4d0
       mutex_lock_nested+0x48/0x54
       clk_prepare_lock+0x50/0xb0
       clk_prepare+0x28/0x50
       fec_runtime_resume+0x3c/0xe0
       pm_generic_runtime_resume+0x34/0x50
       __genpd_runtime_resume+0x38/0x90
       genpd_runtime_resume+0xa0/0x304
       __rpm_callback+0x50/0x1b0
       rpm_callback+0x40/0x80
       rpm_resume+0x448/0x6e0
       __pm_runtime_resume+0x50/0xc0
       fec_enet_mdio_read+0x48/0x190
       __mdiobus_read+0x48/0xb0
       mdiobus_read_nested+0x48/0x70
       mv88e6xxx_smi_dual_direct_read+0x2c/0x50
       mv88e6xxx_read+0x64/0xf0
       mv88e6xxx_g1_read+0x28/0x34
       mv88e6xxx_g1_irq_thread_work+0x50/0x17c
       mv88e6xxx_irq_poll+0x28/0x50
       kthread_worker_fn+0x100/0x480
       kthread+0x168/0x17c
       ret_from_fork+0x10/0x20

-> #2 (&bus->mdio_lock/2){+.+.}-{3:3}:
       lock_acquire+0x68/0x84
       __mutex_lock+0xa8/0x4d0
       mutex_lock_nested+0x48/0x54
       mdiobus_read_nested+0x38/0x70
       mv88e6xxx_smi_dual_direct_read+0x2c/0x50
       mv88e6xxx_read+0x64/0xf0
       mv88e6xxx_port_read+0x24/0x30
       mv88e6xxx_probe+0x250/0x7f0
       mdio_probe+0x3c/0x80
       really_probe+0xc4/0x470
       __driver_probe_device+0x11c/0x190
       driver_probe_device+0x48/0x110
       __device_attach_driver+0xc4/0x160
       bus_for_each_drv+0x80/0xe0
       __device_attach+0xb0/0x1f0
       device_initial_probe+0x1c/0x2c
       bus_probe_device+0xa4/0xb0
       deferred_probe_work_func+0xb8/0x110
       process_one_work+0x27c/0x6c4
       worker_thread+0x7c/0x450
       kthread+0x168/0x17c
       ret_from_fork+0x10/0x20

-> #1 (&chip->reg_lock){+.+.}-{3:3}:
       lock_acquire+0x68/0x84
       __mutex_lock+0xa8/0x4d0
       mutex_lock_nested+0x48/0x54
       mv88e6xxx_g1_irq_bus_lock+0x24/0x30
       __setup_irq+0x6e0/0x8bc
       request_threaded_irq+0xf4/0x1b4
       mv88e6xxx_g2_irq_setup+0x1f0/0x2b4
       mv88e6xxx_probe+0x480/0x7f0
       mdio_probe+0x3c/0x80
       really_probe+0xc4/0x470
       __driver_probe_device+0x11c/0x190
       driver_probe_device+0x48/0x110
       __device_attach_driver+0xc4/0x160
       bus_for_each_drv+0x80/0xe0
       __device_attach+0xb0/0x1f0
       device_initial_probe+0x1c/0x2c
       bus_probe_device+0xa4/0xb0
       deferred_probe_work_func+0xb8/0x110
       process_one_work+0x27c/0x6c4
       worker_thread+0x7c/0x450
       kthread+0x168/0x17c
       ret_from_fork+0x10/0x20

-> #0 (&desc->request_mutex){+.+.}-{3:3}:
       __lock_acquire+0x12a4/0x20a0
       lock_acquire.part.0+0xe0/0x230
       lock_acquire+0x68/0x84
       __mutex_lock+0xa8/0x4d0
       mutex_lock_nested+0x48/0x54
       __setup_irq+0xbc/0x8bc
       request_threaded_irq+0xf4/0x1b4
       devm_request_threaded_irq+0x88/0x104
       lpi2c_runtime_resume+0x70/0xe4
       pm_generic_runtime_resume+0x34/0x50
       __genpd_runtime_resume+0x38/0x90
       genpd_runtime_resume+0xa0/0x304
       __rpm_callback+0x50/0x1b0
       rpm_callback+0x74/0x80
       rpm_resume+0x448/0x6e0
       __pm_runtime_resume+0x50/0xc0
       lpi2c_imx_xfer+0x60/0xa5c
       __i2c_transfer+0x174/0xa80
       i2c_transfer+0x68/0x130
       regmap_i2c_read+0x64/0xb0
       _regmap_raw_read+0x114/0x440
       regmap_raw_read+0x19c/0x28c
       regmap_bulk_read+0x1b8/0x244
       at24_read+0x14c/0x2c4
       nvmem_reg_read+0x2c/0x54
       bin_attr_nvmem_read+0x8c/0xbc
       sysfs_kf_bin_read+0x74/0x94
       kernfs_fop_read_iter+0xb0/0x1d0
       new_sync_read+0xf0/0x184
       vfs_read+0x154/0x1f0
       ksys_read+0x70/0x100
       __arm64_sys_read+0x24/0x30
       invoke_syscall+0x50/0x120
       el0_svc_common.constprop.0+0x68/0x124
       do_el0_svc+0x30/0x9c
       el0_svc+0x54/0x110
       el0t_64_sync_handler+0xa4/0x130
       el0t_64_sync+0x1a0/0x1a4

other info that might help us debug this:

Chain exists of:
  &desc->request_mutex --> rtc_pcf85063:560:(&config->regmap)->lock -->
i2c_register_adapter

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(i2c_register_adapter);
                               lock(rtc_pcf85063:560:(&config->regmap)-
>lock);
                               lock(i2c_register_adapter);
  lock(&desc->request_mutex);

 *** DEADLOCK ***

4 locks held by xxxxxxx/2238:
 #0: ffff000015a64488 (&of->mutex){+.+.}-{3:3}, at:
kernfs_fop_read_iter+0x74/0x1d0
 #1: ffff0000119e2400 (kn->active#58){.+.+}-{0:0}, at:
kernfs_fop_read_iter+0x7c/0x1d0
 #2: ffff0000119a26e8 (&at24->lock){+.+.}-{3:3}, at:
at24_read+0x8c/0x2c4
 #3: ffff000010ef1100 (i2c_register_adapter){+.+.}-{3:3}, at:
i2c_adapter_lock_bus+0x2c/0x3c

stack backtrace:
CPU: 1 PID: 2238 Comm: xxxxxxx Tainted: G           O     
5.15.71+git8e43aee #1
Hardware name: Siemens PXC5.E24 (DT)
Call trace:
 dump_backtrace+0x0/0x1d4
 show_stack+0x20/0x2c
 dump_stack_lvl+0x8c/0xb8
 dump_stack+0x18/0x34
 print_circular_bug+0x1f8/0x200
 check_noncircular+0x130/0x144
 __lock_acquire+0x12a4/0x20a0
 lock_acquire.part.0+0xe0/0x230
 lock_acquire+0x68/0x84
 __mutex_lock+0xa8/0x4d0
 mutex_lock_nested+0x48/0x54
 __setup_irq+0xbc/0x8bc
 request_threaded_irq+0xf4/0x1b4
 devm_request_threaded_irq+0x88/0x104
 lpi2c_runtime_resume+0x70/0xe4
 pm_generic_runtime_resume+0x34/0x50
 __genpd_runtime_resume+0x38/0x90
 genpd_runtime_resume+0xa0/0x304
 __rpm_callback+0x50/0x1b0
 rpm_callback+0x74/0x80
 rpm_resume+0x448/0x6e0
 __pm_runtime_resume+0x50/0xc0
 lpi2c_imx_xfer+0x60/0xa5c
 __i2c_transfer+0x174/0xa80
 i2c_transfer+0x68/0x130
 regmap_i2c_read+0x64/0xb0
 _regmap_raw_read+0x114/0x440
 regmap_raw_read+0x19c/0x28c
 regmap_bulk_read+0x1b8/0x244
 at24_read+0x14c/0x2c4
 nvmem_reg_read+0x2c/0x54
 bin_attr_nvmem_read+0x8c/0xbc
 sysfs_kf_bin_read+0x74/0x94
 kernfs_fop_read_iter+0xb0/0x1d0
 new_sync_read+0xf0/0x184
 vfs_read+0x154/0x1f0
 ksys_read+0x70/0x100
 __arm64_sys_read+0x24/0x30
 invoke_syscall+0x50/0x120
 el0_svc_common.constprop.0+0x68/0x124
 do_el0_svc+0x30/0x9c
 el0_svc+0x54/0x110
 el0t_64_sync_handler+0xa4/0x130
 el0t_64_sync+0x1a0/0x1a4

So this doesn't look like an end of the story yet, unfortunately.
Alexander Stein May 15, 2023, 12:32 p.m. UTC | #11
Hi Alexander,

Am Montag, 15. Mai 2023, 14:04:10 CEST schrieb Sverdlin, Alexander:
> Hello Alexander!
> 
> On Mon, 2023-05-15 at 11:11 +0200, Alexander Stein wrote:
> 
> > > [   22.503179] other info that might help us debug this:
> > > [   22.503179]
> > > [   22.503183] Chain exists of:
> > > [   22.503183]   prepare_lock --> rtc_pcf85063:594:(&config-
> > > 
> > > >regmap)->lock
> > > 
> > > --> i2c_register_adapter
> > > [   22.503183]
> > > [   22.503207]  Possible unsafe locking scenario:
> > > [   22.503207]
> > > [   22.503210]        CPU0                    CPU1
> > > [   22.503213]        ----                    ----
> > > [   22.503216]   lock(i2c_register_adapter);
> > > [   22.503225]                               
> > > lock(rtc_pcf85063:594:(&config-
> > > 
> > > > regmap)->lock);
> > > 
> > > 
> > > [   22.503235]                               
> > > lock(i2c_register_adapter);
> > > [   22.503244]   lock(prepare_lock);
> > > [   22.503253]
> > > [   22.503253]  *** DEADLOCK ***
> > > 
> > > Now lpi2c_runtime_resume will call into clk_prepare() which also
> > > calls
> > > clk_prepare_lock() (identical to clk_get_rate).
> > > 
> > > Best regards,
> > > Alexader
> > > 
> > > 
> > > >         pm_runtime_set_autosuspend_delay(&pdev->dev,
> > > > I2C_PM_TIMEOUT);
> > > >         pm_runtime_use_autosuspend(&pdev->dev);
> > > >         pm_runtime_get_noresume(&pdev->dev);
> > 
> > 
> > With commit fa39065833db ("i2c: imx-lpi2c: avoid taking clk_prepare
> > mutex in 
> > PM callbacks") in place, the additional deadlock warning regarding
> > runtime 
> > resume is gone.
> > So only the ordering issue needs to be addressed.
> 
> 
> Unfortunately with both your (backported) and my patches applied I've
> got a 3rd warning:
> 
> ======================================================
> WARNING: possible circular locking dependency detected
> 5.15.71+git8e43aee #1 Tainted: G           O     
> ------------------------------------------------------
> xxxxxxx/2238 is trying to acquire lock:
> ffff0000040bda40 (&desc->request_mutex){+.+.}-{3:3}, at:
> __setup_irq+0xbc/0x8bc
> 
> but task is already holding lock:
> ffff000010ef1100 (i2c_register_adapter){+.+.}-{3:3}, at:
> i2c_adapter_lock_bus+0x2c/0x3c
> 
> which lock already depends on the new lock.
> 
> 
> the existing dependency chain (in reverse order) is:
> 
> -> #5 (i2c_register_adapter){+.+.}-{3:3}:
>        lock_acquire+0x68/0x84
>        rt_mutex_lock_nested+0x88/0xe0
>        i2c_adapter_lock_bus+0x2c/0x3c
>        i2c_transfer+0x58/0x130
>        regmap_i2c_read+0x64/0xb0
>        _regmap_raw_read+0x114/0x440
>        _regmap_bus_read+0x4c/0x84
>        _regmap_read+0x6c/0x270
>        regmap_read+0x54/0x80
>        pcf85063_probe+0xec/0x4cc
>        i2c_device_probe+0x10c/0x350
>        really_probe+0xc4/0x470
>        __driver_probe_device+0x11c/0x190
>        driver_probe_device+0x48/0x110
>        __device_attach_driver+0xc4/0x160
>        bus_for_each_drv+0x80/0xe0
>        __device_attach+0xb0/0x1f0
>        device_initial_probe+0x1c/0x2c
>        bus_probe_device+0xa4/0xb0
>        device_add+0x398/0x8ac
>        device_register+0x28/0x40
>        i2c_new_client_device+0x144/0x290
>        of_i2c_register_devices+0x18c/0x230
>        i2c_register_adapter+0x1dc/0x6b0
>        __i2c_add_numbered_adapter+0x68/0xbc
>        i2c_add_adapter+0xb0/0xe0
>        lpi2c_imx_probe+0x3a4/0x670
>        platform_probe+0x70/0xec
>        really_probe+0xc4/0x470
>        __driver_probe_device+0x11c/0x190
>        driver_probe_device+0x48/0x110
>        __device_attach_driver+0xc4/0x160
>        bus_for_each_drv+0x80/0xe0
>        __device_attach+0xb0/0x1f0
>        device_initial_probe+0x1c/0x2c
>        bus_probe_device+0xa4/0xb0
>        deferred_probe_work_func+0xb8/0x110
>        process_one_work+0x27c/0x6c4
>        worker_thread+0x7c/0x450
>        kthread+0x168/0x17c
>        ret_from_fork+0x10/0x20
> 
> -> #4 (rtc_pcf85063:560:(&config->regmap)->lock){+.+.}-{3:3}:
>        lock_acquire+0x68/0x84
>        __mutex_lock+0xa8/0x4d0
>        mutex_lock_nested+0x48/0x54
>        regmap_lock_mutex+0x1c/0x30
>        regmap_read+0x44/0x80
>        pcf85063_clkout_recalc_rate+0x34/0x80
>        __clk_register+0x520/0x880
>        devm_clk_register+0x64/0xc4
>        pcf85063_probe+0x24c/0x4cc
>        i2c_device_probe+0x10c/0x350
>        really_probe+0xc4/0x470
>        __driver_probe_device+0x11c/0x190
>        driver_probe_device+0x48/0x110
>        __device_attach_driver+0xc4/0x160
>        bus_for_each_drv+0x80/0xe0
>        __device_attach+0xb0/0x1f0
>        device_initial_probe+0x1c/0x2c
>        bus_probe_device+0xa4/0xb0
>        device_add+0x398/0x8ac
>        device_register+0x28/0x40
>        i2c_new_client_device+0x144/0x290
>        of_i2c_register_devices+0x18c/0x230
>        i2c_register_adapter+0x1dc/0x6b0
>        __i2c_add_numbered_adapter+0x68/0xbc
>        i2c_add_adapter+0xb0/0xe0
>        lpi2c_imx_probe+0x3a4/0x670
>        platform_probe+0x70/0xec
>        really_probe+0xc4/0x470
>        __driver_probe_device+0x11c/0x190
>        driver_probe_device+0x48/0x110
>        __device_attach_driver+0xc4/0x160
>        bus_for_each_drv+0x80/0xe0
>        __device_attach+0xb0/0x1f0
>        device_initial_probe+0x1c/0x2c
>        bus_probe_device+0xa4/0xb0
>        deferred_probe_work_func+0xb8/0x110
>        process_one_work+0x27c/0x6c4
>        worker_thread+0x7c/0x450
>        kthread+0x168/0x17c
>        ret_from_fork+0x10/0x20
> 
> -> #3 (prepare_lock){+.+.}-{3:3}:
>        lock_acquire+0x68/0x84
>        __mutex_lock+0xa8/0x4d0
>        mutex_lock_nested+0x48/0x54
>        clk_prepare_lock+0x50/0xb0
>        clk_prepare+0x28/0x50
>        fec_runtime_resume+0x3c/0xe0
>        pm_generic_runtime_resume+0x34/0x50
>        __genpd_runtime_resume+0x38/0x90
>        genpd_runtime_resume+0xa0/0x304
>        __rpm_callback+0x50/0x1b0
>        rpm_callback+0x40/0x80
>        rpm_resume+0x448/0x6e0
>        __pm_runtime_resume+0x50/0xc0
>        fec_enet_mdio_read+0x48/0x190
>        __mdiobus_read+0x48/0xb0
>        mdiobus_read_nested+0x48/0x70
>        mv88e6xxx_smi_dual_direct_read+0x2c/0x50
>        mv88e6xxx_read+0x64/0xf0
>        mv88e6xxx_g1_read+0x28/0x34
>        mv88e6xxx_g1_irq_thread_work+0x50/0x17c
>        mv88e6xxx_irq_poll+0x28/0x50
>        kthread_worker_fn+0x100/0x480
>        kthread+0x168/0x17c
>        ret_from_fork+0x10/0x20
> 
> -> #2 (&bus->mdio_lock/2){+.+.}-{3:3}:
>        lock_acquire+0x68/0x84
>        __mutex_lock+0xa8/0x4d0
>        mutex_lock_nested+0x48/0x54
>        mdiobus_read_nested+0x38/0x70
>        mv88e6xxx_smi_dual_direct_read+0x2c/0x50
>        mv88e6xxx_read+0x64/0xf0
>        mv88e6xxx_port_read+0x24/0x30
>        mv88e6xxx_probe+0x250/0x7f0
>        mdio_probe+0x3c/0x80
>        really_probe+0xc4/0x470
>        __driver_probe_device+0x11c/0x190
>        driver_probe_device+0x48/0x110
>        __device_attach_driver+0xc4/0x160
>        bus_for_each_drv+0x80/0xe0
>        __device_attach+0xb0/0x1f0
>        device_initial_probe+0x1c/0x2c
>        bus_probe_device+0xa4/0xb0
>        deferred_probe_work_func+0xb8/0x110
>        process_one_work+0x27c/0x6c4
>        worker_thread+0x7c/0x450
>        kthread+0x168/0x17c
>        ret_from_fork+0x10/0x20
> 
> -> #1 (&chip->reg_lock){+.+.}-{3:3}:
>        lock_acquire+0x68/0x84
>        __mutex_lock+0xa8/0x4d0
>        mutex_lock_nested+0x48/0x54
>        mv88e6xxx_g1_irq_bus_lock+0x24/0x30
>        __setup_irq+0x6e0/0x8bc
>        request_threaded_irq+0xf4/0x1b4
>        mv88e6xxx_g2_irq_setup+0x1f0/0x2b4
>        mv88e6xxx_probe+0x480/0x7f0
>        mdio_probe+0x3c/0x80
>        really_probe+0xc4/0x470
>        __driver_probe_device+0x11c/0x190
>        driver_probe_device+0x48/0x110
>        __device_attach_driver+0xc4/0x160
>        bus_for_each_drv+0x80/0xe0
>        __device_attach+0xb0/0x1f0
>        device_initial_probe+0x1c/0x2c
>        bus_probe_device+0xa4/0xb0
>        deferred_probe_work_func+0xb8/0x110
>        process_one_work+0x27c/0x6c4
>        worker_thread+0x7c/0x450
>        kthread+0x168/0x17c
>        ret_from_fork+0x10/0x20
> 
> -> #0 (&desc->request_mutex){+.+.}-{3:3}:
>        __lock_acquire+0x12a4/0x20a0
>        lock_acquire.part.0+0xe0/0x230
>        lock_acquire+0x68/0x84
>        __mutex_lock+0xa8/0x4d0
>        mutex_lock_nested+0x48/0x54
>        __setup_irq+0xbc/0x8bc
>        request_threaded_irq+0xf4/0x1b4
>        devm_request_threaded_irq+0x88/0x104
>        lpi2c_runtime_resume+0x70/0xe4

This looks like the NXP downstream kernel, requesting the IRQ from within 
runtime resume.

>        pm_generic_runtime_resume+0x34/0x50
>        __genpd_runtime_resume+0x38/0x90
>        genpd_runtime_resume+0xa0/0x304
>        __rpm_callback+0x50/0x1b0
>        rpm_callback+0x74/0x80
>        rpm_resume+0x448/0x6e0
>        __pm_runtime_resume+0x50/0xc0
>        lpi2c_imx_xfer+0x60/0xa5c
>        __i2c_transfer+0x174/0xa80
>        i2c_transfer+0x68/0x130
>        regmap_i2c_read+0x64/0xb0
>        _regmap_raw_read+0x114/0x440
>        regmap_raw_read+0x19c/0x28c
>        regmap_bulk_read+0x1b8/0x244
>        at24_read+0x14c/0x2c4
>        nvmem_reg_read+0x2c/0x54
>        bin_attr_nvmem_read+0x8c/0xbc
>        sysfs_kf_bin_read+0x74/0x94
>        kernfs_fop_read_iter+0xb0/0x1d0
>        new_sync_read+0xf0/0x184
>        vfs_read+0x154/0x1f0
>        ksys_read+0x70/0x100
>        __arm64_sys_read+0x24/0x30
>        invoke_syscall+0x50/0x120
>        el0_svc_common.constprop.0+0x68/0x124
>        do_el0_svc+0x30/0x9c
>        el0_svc+0x54/0x110
>        el0t_64_sync_handler+0xa4/0x130
>        el0t_64_sync+0x1a0/0x1a4
> 
> other info that might help us debug this:
> 
> Chain exists of:
>   &desc->request_mutex --> rtc_pcf85063:560:(&config->regmap)->lock -->
> i2c_register_adapter
> 
>  Possible unsafe locking scenario:
> 
>        CPU0                    CPU1
>        ----                    ----
>   lock(i2c_register_adapter);
>                                lock(rtc_pcf85063:560:(&config->regmap)-
> 
> >lock);
> 
>                                lock(i2c_register_adapter);
>   lock(&desc->request_mutex);
> 
>  *** DEADLOCK ***
> 
> 4 locks held by xxxxxxx/2238:
>  #0: ffff000015a64488 (&of->mutex){+.+.}-{3:3}, at:
> kernfs_fop_read_iter+0x74/0x1d0
>  #1: ffff0000119e2400 (kn->active#58){.+.+}-{0:0}, at:
> kernfs_fop_read_iter+0x7c/0x1d0
>  #2: ffff0000119a26e8 (&at24->lock){+.+.}-{3:3}, at:
> at24_read+0x8c/0x2c4
>  #3: ffff000010ef1100 (i2c_register_adapter){+.+.}-{3:3}, at:
> i2c_adapter_lock_bus+0x2c/0x3c
> 
> stack backtrace:
> CPU: 1 PID: 2238 Comm: xxxxxxx Tainted: G           O     
> 5.15.71+git8e43aee #1
> Hardware name: Siemens PXC5.E24 (DT)
> Call trace:
>  dump_backtrace+0x0/0x1d4
>  show_stack+0x20/0x2c
>  dump_stack_lvl+0x8c/0xb8
>  dump_stack+0x18/0x34
>  print_circular_bug+0x1f8/0x200
>  check_noncircular+0x130/0x144
>  __lock_acquire+0x12a4/0x20a0
>  lock_acquire.part.0+0xe0/0x230
>  lock_acquire+0x68/0x84
>  __mutex_lock+0xa8/0x4d0
>  mutex_lock_nested+0x48/0x54
>  __setup_irq+0xbc/0x8bc
>  request_threaded_irq+0xf4/0x1b4
>  devm_request_threaded_irq+0x88/0x104
>  lpi2c_runtime_resume+0x70/0xe4

It seems your (possible) deadlock is triggered along devm_request_irq() during 
resume.
As mainline just enables the clocks, there is nothing we can do here. Your 
patch still is sensible, can you send a new version with the review addressed? 
Thanks.

Best regards,
Alexander

>  pm_generic_runtime_resume+0x34/0x50
>  __genpd_runtime_resume+0x38/0x90
>  genpd_runtime_resume+0xa0/0x304
>  __rpm_callback+0x50/0x1b0
>  rpm_callback+0x74/0x80
>  rpm_resume+0x448/0x6e0
>  __pm_runtime_resume+0x50/0xc0
>  lpi2c_imx_xfer+0x60/0xa5c
>  __i2c_transfer+0x174/0xa80
>  i2c_transfer+0x68/0x130
>  regmap_i2c_read+0x64/0xb0
>  _regmap_raw_read+0x114/0x440
>  regmap_raw_read+0x19c/0x28c
>  regmap_bulk_read+0x1b8/0x244
>  at24_read+0x14c/0x2c4
>  nvmem_reg_read+0x2c/0x54
>  bin_attr_nvmem_read+0x8c/0xbc
>  sysfs_kf_bin_read+0x74/0x94
>  kernfs_fop_read_iter+0xb0/0x1d0
>  new_sync_read+0xf0/0x184
>  vfs_read+0x154/0x1f0
>  ksys_read+0x70/0x100
>  __arm64_sys_read+0x24/0x30
>  invoke_syscall+0x50/0x120
>  el0_svc_common.constprop.0+0x68/0x124
>  do_el0_svc+0x30/0x9c
>  el0_svc+0x54/0x110
>  el0t_64_sync_handler+0xa4/0x130
>  el0t_64_sync+0x1a0/0x1a4
> 
> So this doesn't look like an end of the story yet, unfortunately.
> 
> -- 
> Alexander Sverdlin
> Siemens AG
> www.siemens.com
A. Sverdlin May 15, 2023, 12:37 p.m. UTC | #12
Hello Alexander!

On Mon, 2023-05-15 at 14:32 +0200, Alexander Stein wrote:
> > stack backtrace:
> > CPU: 1 PID: 2238 Comm: xxxxxxx Tainted: G           O     
> > 5.15.71+git8e43aee #1
> > Hardware name: Siemens PXC5.E24 (DT)
> > Call trace:
> >   dump_backtrace+0x0/0x1d4
> >   show_stack+0x20/0x2c
> >   dump_stack_lvl+0x8c/0xb8
> >   dump_stack+0x18/0x34
> >   print_circular_bug+0x1f8/0x200
> >   check_noncircular+0x130/0x144
> >   __lock_acquire+0x12a4/0x20a0
> >   lock_acquire.part.0+0xe0/0x230
> >   lock_acquire+0x68/0x84
> >   __mutex_lock+0xa8/0x4d0
> >   mutex_lock_nested+0x48/0x54
> >   __setup_irq+0xbc/0x8bc
> >   request_threaded_irq+0xf4/0x1b4
> >   devm_request_threaded_irq+0x88/0x104
> >   lpi2c_runtime_resume+0x70/0xe4
> 
> It seems your (possible) deadlock is triggered along
> devm_request_irq() during 
> resume.

You are right! Thank you for looking into this!

> As mainline just enables the clocks, there is nothing we can do here.
> Your 
> patch still is sensible, can you send a new version with the review
> addressed? 

Sure, I'll do!
A. Sverdlin May 15, 2023, 1:11 p.m. UTC | #13
Hello Alexander,

On Fri, 2023-04-21 at 15:48 +0200, Alexander Stein wrote:
> > +       lpi2c_imx->clk_change_nb.notifier_call =
> > lpi2c_imx_clk_change_cb;
> > +       ret = devm_clk_notifier_register(&pdev->dev, lpi2c_imx-
> > >clks[0].clk,
> > +                                        &lpi2c_imx-
> > >clk_change_nb);
> > +       if (ret)
> > +               return dev_err_probe(&pdev->dev, ret,
> > +                                    "can't register peripheral
> > clock 
> notifier\n");
> > +       lpi2c_imx->rate_per = clk_get_rate(lpi2c_imx->clks[0].clk);
> > +       if (!lpi2c_imx->rate_per) {
> > +               dev_err(&pdev->dev, "can't get I2C peripheral clock
> rate\n");
> > +               return -EINVAL;
> > +       }
> > +
> 
> I would switch the order of the calls to devm_clk_notifier_register()
> and 
> clk_get_rate(). AFAICS this looks like a possible lost update. The
> notifier 
> might change rate_per before the (old) value from clk_get_rate is
> actually 
> assigned.

just swapping would not be enough I believe, in the case update event
happens between clk_get_rate() and devm_clk_notifier_register(). I'll
think about it...
diff mbox series

Patch

diff --git a/drivers/i2c/busses/i2c-imx-lpi2c.c b/drivers/i2c/busses/i2c-imx-lpi2c.c
index 188f2a36d2fd6..5f1d1d4e018bd 100644
--- a/drivers/i2c/busses/i2c-imx-lpi2c.c
+++ b/drivers/i2c/busses/i2c-imx-lpi2c.c
@@ -100,6 +100,8 @@  struct lpi2c_imx_struct {
 	__u8			*rx_buf;
 	__u8			*tx_buf;
 	struct completion	complete;
+	struct notifier_block	clk_change_nb;
+	unsigned int		rate_per;
 	unsigned int		msglen;
 	unsigned int		delivered;
 	unsigned int		block_data;
@@ -198,24 +200,37 @@  static void lpi2c_imx_stop(struct lpi2c_imx_struct *lpi2c_imx)
 	} while (1);
 }
 
+static int lpi2c_imx_clk_change_cb(struct notifier_block *nb,
+				   unsigned long action, void *data)
+{
+	struct clk_notifier_data *ndata = data;
+	struct lpi2c_imx_struct *lpi2c_imx = container_of(nb,
+							  struct lpi2c_imx_struct,
+							  clk_change_nb);
+
+	if (action & POST_RATE_CHANGE)
+		lpi2c_imx->rate_per = ndata->new_rate;
+
+	return NOTIFY_OK;
+}
+
 /* CLKLO = I2C_CLK_RATIO * CLKHI, SETHOLD = CLKHI, DATAVD = CLKHI/2 */
 static int lpi2c_imx_config(struct lpi2c_imx_struct *lpi2c_imx)
 {
 	u8 prescale, filt, sethold, clkhi, clklo, datavd;
-	unsigned int clk_rate, clk_cycle;
+	unsigned int clk_cycle;
 	enum lpi2c_imx_pincfg pincfg;
 	unsigned int temp;
 
 	lpi2c_imx_set_mode(lpi2c_imx);
 
-	clk_rate = clk_get_rate(lpi2c_imx->clks[0].clk);
 	if (lpi2c_imx->mode == HS || lpi2c_imx->mode == ULTRA_FAST)
 		filt = 0;
 	else
 		filt = 2;
 
 	for (prescale = 0; prescale <= 7; prescale++) {
-		clk_cycle = clk_rate / ((1 << prescale) * lpi2c_imx->bitrate)
+		clk_cycle = lpi2c_imx->rate_per / ((1 << prescale) * lpi2c_imx->bitrate)
 			    - 3 - (filt >> 1);
 		clkhi = (clk_cycle + I2C_CLK_RATIO) / (I2C_CLK_RATIO + 1);
 		clklo = clk_cycle - clkhi;
@@ -588,6 +603,18 @@  static int lpi2c_imx_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
+	lpi2c_imx->clk_change_nb.notifier_call = lpi2c_imx_clk_change_cb;
+	ret = devm_clk_notifier_register(&pdev->dev, lpi2c_imx->clks[0].clk,
+					 &lpi2c_imx->clk_change_nb);
+	if (ret)
+		return dev_err_probe(&pdev->dev, ret,
+				     "can't register peripheral clock notifier\n");
+	lpi2c_imx->rate_per = clk_get_rate(lpi2c_imx->clks[0].clk);
+	if (!lpi2c_imx->rate_per) {
+		dev_err(&pdev->dev, "can't get I2C peripheral clock rate\n");
+		return -EINVAL;
+	}
+
 	pm_runtime_set_autosuspend_delay(&pdev->dev, I2C_PM_TIMEOUT);
 	pm_runtime_use_autosuspend(&pdev->dev);
 	pm_runtime_get_noresume(&pdev->dev);