diff mbox series

[v2,10/16] drm/exynos: implement a drm bridge

Message ID 20200911135413.3654800-11-m.tretter@pengutronix.de
State New
Headers show
Series drm/exynos: Convert driver to drm bridge | expand

Commit Message

Michael Tretter Sept. 11, 2020, 1:54 p.m. UTC
Make the exynos_dsi driver a full drm bridge that can be found and used
from other drivers.

Other drivers can only attach to the bridge, if a mipi dsi device
already attached to the bridge. This allows to defer the probe of the
display pipe until the downstream bridges are available, too.

Signed-off-by: Michael Tretter <m.tretter@pengutronix.de>
---
v2:
- move attach of out_bridge to bridge_attach
- add bridge_detach
- don't cleanup encoder if create_connector failed
---
 drivers/gpu/drm/exynos/exynos_drm_dsi.c | 103 +++++++++++++++++-------
 1 file changed, 75 insertions(+), 28 deletions(-)

Comments

Marek Szyprowski Sept. 14, 2020, 8:29 a.m. UTC | #1
Hi Michael,

On 11.09.2020 15:54, Michael Tretter wrote:
> Make the exynos_dsi driver a full drm bridge that can be found and used
> from other drivers.
>
> Other drivers can only attach to the bridge, if a mipi dsi device
> already attached to the bridge. This allows to defer the probe of the
> display pipe until the downstream bridges are available, too.
>
> Signed-off-by: Michael Tretter <m.tretter@pengutronix.de>

This one (and the whole series applied) still fails on Exynos boards:

[drm] Exynos DRM: using 11c00000.fimd device for DMA mapping operations
exynos-drm exynos-drm: bound 11c00000.fimd (ops fimd_component_ops)
OF: graph: no port node found in /soc/dsi@11c80000
8<--- cut here ---
Unable to handle kernel NULL pointer dereference at virtual address 00000084
pgd = (ptrval)
[00000084] *pgd=00000000
Internal error: Oops: 5 [#1] PREEMPT SMP ARM
Modules linked in:
CPU: 1 PID: 1 Comm: swapper/0 Not tainted 
5.9.0-rc4-next-20200911-00010-g417dc70d70ec #1608
Hardware name: Samsung Exynos (Flattened Device Tree)
PC is at drm_bridge_attach+0x18/0x164
LR is at exynos_dsi_bind+0x88/0xa8
pc : [<c0628c08>]    lr : [<c064d560>]    psr: 20000013
sp : ef0dfca8  ip : 00000002  fp : c13190e0
r10: 00000000  r9 : ee46d580  r8 : c13190e0
r7 : ee438800  r6 : 00000018  r5 : ef253810  r4 : ef39e840
r3 : 00000000  r2 : 00000018  r1 : ef39e888  r0 : ef39e840
Flags: nzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
Control: 10c5387d  Table: 4000404a  DAC: 00000051
Process swapper/0 (pid: 1, stack limit = 0x(ptrval))
Stack: (0xef0dfca8 to 0xef0e0000)
...
[<c0628c08>] (drm_bridge_attach) from [<c064d560>] 
(exynos_dsi_bind+0x88/0xa8)
[<c064d560>] (exynos_dsi_bind) from [<c066a800>] 
(component_bind_all+0xfc/0x290)
[<c066a800>] (component_bind_all) from [<c0649dc0>] 
(exynos_drm_bind+0xe4/0x19c)
[<c0649dc0>] (exynos_drm_bind) from [<c066ad74>] 
(try_to_bring_up_master+0x1e4/0x2c4)
[<c066ad74>] (try_to_bring_up_master) from [<c066b2b4>] 
(component_master_add_with_match+0xd4/0x108)
[<c066b2b4>] (component_master_add_with_match) from [<c0649ae8>] 
(exynos_drm_platform_probe+0xe4/0x110)
[<c0649ae8>] (exynos_drm_platform_probe) from [<c0674e6c>] 
(platform_drv_probe+0x6c/0xa4)
[<c0674e6c>] (platform_drv_probe) from [<c067242c>] 
(really_probe+0x200/0x4fc)
[<c067242c>] (really_probe) from [<c06728f0>] 
(driver_probe_device+0x78/0x1fc)
[<c06728f0>] (driver_probe_device) from [<c0672cd8>] 
(device_driver_attach+0x58/0x60)
[<c0672cd8>] (device_driver_attach) from [<c0672dbc>] 
(__driver_attach+0xdc/0x174)
[<c0672dbc>] (__driver_attach) from [<c06701b4>] 
(bus_for_each_dev+0x68/0xb4)
[<c06701b4>] (bus_for_each_dev) from [<c06714e8>] 
(bus_add_driver+0x158/0x214)
[<c06714e8>] (bus_add_driver) from [<c0673c1c>] (driver_register+0x78/0x110)
[<c0673c1c>] (driver_register) from [<c0649ca8>] 
(exynos_drm_init+0xe4/0x118)
[<c0649ca8>] (exynos_drm_init) from [<c0102484>] 
(do_one_initcall+0x8c/0x42c)
[<c0102484>] (do_one_initcall) from [<c11011c0>] 
(kernel_init_freeable+0x190/0x1dc)
[<c11011c0>] (kernel_init_freeable) from [<c0af7880>] 
(kernel_init+0x8/0x118)
[<c0af7880>] (kernel_init) from [<c0100114>] (ret_from_fork+0x14/0x20)
Exception stack(0xef0dffb0 to 0xef0dfff8)
...
---[ end trace ee27f313f9ed9da1 ]---

# arm-linux-gnueabi-addr2line -e vmlinux c0628c08
drivers/gpu/drm/drm_bridge.c:184 (discriminator 1)

I will try to debug it a bit more today.

> ---
> v2:
> - move attach of out_bridge to bridge_attach
> - add bridge_detach
> - don't cleanup encoder if create_connector failed
> ---
>   drivers/gpu/drm/exynos/exynos_drm_dsi.c | 103 +++++++++++++++++-------
>   1 file changed, 75 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
> index 2d75f9877dc0..5e7c1a99a3ee 100644
> --- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c
> +++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
> @@ -266,6 +266,7 @@ struct exynos_dsi_driver_data {
>   
>   struct exynos_dsi {
>   	struct drm_encoder encoder;
> +	struct drm_bridge bridge;
>   	struct mipi_dsi_host dsi_host;
>   	struct drm_connector connector;
>   	struct drm_panel *panel;
> @@ -1602,6 +1603,60 @@ static const struct drm_encoder_helper_funcs exynos_dsi_encoder_helper_funcs = {
>   	.disable = exynos_dsi_disable,
>   };
>   
> +static int exynos_dsi_bridge_attach(struct drm_bridge *bridge,
> +				    enum drm_bridge_attach_flags flags)
> +{
> +	struct exynos_dsi *dsi = bridge->driver_private;
> +	struct drm_encoder *encoder = bridge->encoder;
> +	int ret;
> +
> +	if (!dsi->out_bridge && !dsi->panel)
> +		return -EPROBE_DEFER;
> +
> +	if (dsi->out_bridge) {
> +		ret = drm_bridge_attach(encoder, dsi->out_bridge,
> +					bridge, flags);
> +		if (ret)
> +			return ret;
> +		list_splice_init(&encoder->bridge_chain, &dsi->bridge_chain);
> +	} else {
> +		ret = exynos_dsi_create_connector(encoder);
> +		if (ret)
> +			return ret;
> +
> +		if (dsi->panel) {
> +			dsi->connector.status = connector_status_connected;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static void exynos_dsi_bridge_detach(struct drm_bridge *bridge)
> +{
> +	struct exynos_dsi *dsi = bridge->driver_private;
> +	struct drm_encoder *encoder = bridge->encoder;
> +	struct drm_device *drm = encoder->dev;
> +
> +	if (dsi->panel) {
> +		mutex_lock(&drm->mode_config.mutex);
> +		exynos_dsi_disable(&dsi->encoder);
> +		dsi->panel = NULL;
> +		dsi->connector.status = connector_status_disconnected;
> +		mutex_unlock(&drm->mode_config.mutex);
> +	} else {
> +		if (dsi->out_bridge->funcs->detach)
> +			dsi->out_bridge->funcs->detach(dsi->out_bridge);
> +		dsi->out_bridge = NULL;
> +		INIT_LIST_HEAD(&dsi->bridge_chain);
> +	}
> +}
> +
> +static const struct drm_bridge_funcs exynos_dsi_bridge_funcs = {
> +	.attach = exynos_dsi_bridge_attach,
> +	.detach = exynos_dsi_bridge_detach,
> +};
> +
>   MODULE_DEVICE_TABLE(of, exynos_dsi_of_match);
>   
>   static int exynos_dsi_host_attach(struct mipi_dsi_host *host,
> @@ -1609,25 +1664,12 @@ static int exynos_dsi_host_attach(struct mipi_dsi_host *host,
>   {
>   	struct exynos_dsi *dsi = host_to_dsi(host);
>   	const struct exynos_dsi_host_ops *ops = dsi->driver_data->host_ops;
> -	struct drm_encoder *encoder = &dsi->encoder;
>   	struct drm_bridge *out_bridge;
>   
> -	out_bridge  = of_drm_find_bridge(device->dev.of_node);
> +	out_bridge = of_drm_find_bridge(device->dev.of_node);
>   	if (out_bridge) {
> -		drm_bridge_attach(encoder, out_bridge, NULL, 0);
>   		dsi->out_bridge = out_bridge;
> -		list_splice_init(&encoder->bridge_chain, &dsi->bridge_chain);
>   	} else {
> -		int ret = exynos_dsi_create_connector(encoder);
> -
> -		if (ret) {
> -			DRM_DEV_ERROR(dsi->dev,
> -				      "failed to create connector ret = %d\n",
> -				      ret);
> -			drm_encoder_cleanup(encoder);
> -			return ret;
> -		}
> -
>   		dsi->panel = of_drm_find_panel(device->dev.of_node);
>   		if (IS_ERR(dsi->panel))
>   			dsi->panel = NULL;
> @@ -1662,20 +1704,6 @@ static int exynos_dsi_host_detach(struct mipi_dsi_host *host,
>   {
>   	struct exynos_dsi *dsi = host_to_dsi(host);
>   	const struct exynos_dsi_host_ops *ops = dsi->driver_data->host_ops;
> -	struct drm_device *drm = dsi->encoder.dev;
> -
> -	if (dsi->panel) {
> -		mutex_lock(&drm->mode_config.mutex);
> -		exynos_dsi_disable(&dsi->encoder);
> -		dsi->panel = NULL;
> -		dsi->connector.status = connector_status_disconnected;
> -		mutex_unlock(&drm->mode_config.mutex);
> -	} else {
> -		if (dsi->out_bridge->funcs->detach)
> -			dsi->out_bridge->funcs->detach(dsi->out_bridge);
> -		dsi->out_bridge = NULL;
> -		INIT_LIST_HEAD(&dsi->bridge_chain);
> -	}
>   
>   	if (ops && ops->detach)
>   		ops->detach(dsi->dsi_host.dev, device);
> @@ -1786,7 +1814,15 @@ static int exynos_dsi_bind(struct device *dev, struct device *master,
>   		of_node_put(in_bridge_node);
>   	}
>   
> +	ret = drm_bridge_attach(encoder, &dsi->bridge, in_bridge, 0);
> +	if (ret)
> +		goto err;
> +
>   	return 0;
> +
> +err:
> +	drm_encoder_cleanup(encoder);
> +	return ret;
>   }
>   
>   static void exynos_dsi_unbind(struct device *dev, struct device *master,
> @@ -1796,6 +1832,8 @@ static void exynos_dsi_unbind(struct device *dev, struct device *master,
>   	struct drm_encoder *encoder = &dsi->encoder;
>   
>   	exynos_dsi_disable(encoder);
> +
> +	drm_encoder_cleanup(encoder);
>   }
>   
>   static const struct component_ops exynos_dsi_component_ops = {
> @@ -1806,6 +1844,7 @@ static const struct component_ops exynos_dsi_component_ops = {
>   static struct exynos_dsi *__exynos_dsi_probe(struct platform_device *pdev)
>   {
>   	struct device *dev = &pdev->dev;
> +	struct drm_bridge *bridge;
>   	struct resource *res;
>   	struct exynos_dsi *dsi;
>   	int ret, i;
> @@ -1894,11 +1933,19 @@ static struct exynos_dsi *__exynos_dsi_probe(struct platform_device *pdev)
>   	if (ret)
>   		return ERR_PTR(ret);
>   
> +	bridge = &dsi->bridge;
> +	bridge->driver_private = dsi;
> +	bridge->funcs = &exynos_dsi_bridge_funcs;
> +	bridge->of_node = dev->of_node;
> +	drm_bridge_add(bridge);
> +
>   	return dsi;
>   }
>   
>   static void __exynos_dsi_remove(struct exynos_dsi *dsi)
>   {
> +	drm_bridge_remove(&dsi->bridge);
> +
>   	mipi_dsi_host_unregister(&dsi->dsi_host);
>   }
>   

Best regards
Michael Tretter Sept. 14, 2020, 8:01 p.m. UTC | #2
Hi,

On Mon, 14 Sep 2020 14:31:19 +0200, Marek Szyprowski wrote:
> On 14.09.2020 10:29, Marek Szyprowski wrote:

> > On 11.09.2020 15:54, Michael Tretter wrote:

> >> Make the exynos_dsi driver a full drm bridge that can be found and used

> >> from other drivers.

> >>

> >> Other drivers can only attach to the bridge, if a mipi dsi device

> >> already attached to the bridge. This allows to defer the probe of the

> >> display pipe until the downstream bridges are available, too.

> >>

> >> Signed-off-by: Michael Tretter <m.tretter@pengutronix.de>

> > This one (and the whole series applied) still fails on Exynos boards:

> >

> > [drm] Exynos DRM: using 11c00000.fimd device for DMA mapping operations

> > exynos-drm exynos-drm: bound 11c00000.fimd (ops fimd_component_ops)

> > OF: graph: no port node found in /soc/dsi@11c80000

> > 8<--- cut here ---

> > Unable to handle kernel NULL pointer dereference at virtual address 00000084

> > pgd = (ptrval)

> > [00000084] *pgd=00000000

> > Internal error: Oops: 5 [#1] PREEMPT SMP ARM

> > Modules linked in:

> > CPU: 1 PID: 1 Comm: swapper/0 Not tainted

> > 5.9.0-rc4-next-20200911-00010-g417dc70d70ec #1608

> > Hardware name: Samsung Exynos (Flattened Device Tree)

> > PC is at drm_bridge_attach+0x18/0x164

> > LR is at exynos_dsi_bind+0x88/0xa8

> > pc : [<c0628c08>]    lr : [<c064d560>]    psr: 20000013

> > sp : ef0dfca8  ip : 00000002  fp : c13190e0

> > r10: 00000000  r9 : ee46d580  r8 : c13190e0

> > r7 : ee438800  r6 : 00000018  r5 : ef253810  r4 : ef39e840

> > r3 : 00000000  r2 : 00000018  r1 : ef39e888  r0 : ef39e840

> > Flags: nzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none

> > Control: 10c5387d  Table: 4000404a  DAC: 00000051

> > Process swapper/0 (pid: 1, stack limit = 0x(ptrval))

> > Stack: (0xef0dfca8 to 0xef0e0000)

> > ...

> > [<c0628c08>] (drm_bridge_attach) from [<c064d560>]

> > (exynos_dsi_bind+0x88/0xa8)

> > [<c064d560>] (exynos_dsi_bind) from [<c066a800>]

> > (component_bind_all+0xfc/0x290)

> > [<c066a800>] (component_bind_all) from [<c0649dc0>]

> > (exynos_drm_bind+0xe4/0x19c)

> > [<c0649dc0>] (exynos_drm_bind) from [<c066ad74>]

> > (try_to_bring_up_master+0x1e4/0x2c4)

> > [<c066ad74>] (try_to_bring_up_master) from [<c066b2b4>]

> > (component_master_add_with_match+0xd4/0x108)

> > [<c066b2b4>] (component_master_add_with_match) from [<c0649ae8>]

> > (exynos_drm_platform_probe+0xe4/0x110)

> > [<c0649ae8>] (exynos_drm_platform_probe) from [<c0674e6c>]

> > (platform_drv_probe+0x6c/0xa4)

> > [<c0674e6c>] (platform_drv_probe) from [<c067242c>]

> > (really_probe+0x200/0x4fc)

> > [<c067242c>] (really_probe) from [<c06728f0>]

> > (driver_probe_device+0x78/0x1fc)

> > [<c06728f0>] (driver_probe_device) from [<c0672cd8>]

> > (device_driver_attach+0x58/0x60)

> > [<c0672cd8>] (device_driver_attach) from [<c0672dbc>]

> > (__driver_attach+0xdc/0x174)

> > [<c0672dbc>] (__driver_attach) from [<c06701b4>]

> > (bus_for_each_dev+0x68/0xb4)

> > [<c06701b4>] (bus_for_each_dev) from [<c06714e8>]

> > (bus_add_driver+0x158/0x214)

> > [<c06714e8>] (bus_add_driver) from [<c0673c1c>] (driver_register+0x78/0x110)

> > [<c0673c1c>] (driver_register) from [<c0649ca8>]

> > (exynos_drm_init+0xe4/0x118)

> > [<c0649ca8>] (exynos_drm_init) from [<c0102484>]

> > (do_one_initcall+0x8c/0x42c)

> > [<c0102484>] (do_one_initcall) from [<c11011c0>]

> > (kernel_init_freeable+0x190/0x1dc)

> > [<c11011c0>] (kernel_init_freeable) from [<c0af7880>]

> > (kernel_init+0x8/0x118)

> > [<c0af7880>] (kernel_init) from [<c0100114>] (ret_from_fork+0x14/0x20)

> > Exception stack(0xef0dffb0 to 0xef0dfff8)

> > ...

> > ---[ end trace ee27f313f9ed9da1 ]---

> >

> > # arm-linux-gnueabi-addr2line -e vmlinux c0628c08

> > drivers/gpu/drm/drm_bridge.c:184 (discriminator 1)

> >

> > I will try to debug it a bit more today.

> 

> The above crash has been caused by lack of in_bridge initialization to 

> NULL in exynos_dsi_bind() in this patch. However, fixing it reveals 

> another issue:

> 

> [drm] Exynos DRM: using 11c00000.fimd device for DMA mapping operations

> exynos-drm exynos-drm: bound 11c00000.fimd (ops fimd_component_ops)

> OF: graph: no port node found in /soc/dsi@11c80000

> 8<--- cut here ---

> Unable to handle kernel NULL pointer dereference at virtual address 00000280

> pgd = (ptrval)

> [00000280] *pgd=00000000

> Internal error: Oops: 5 [#1] PREEMPT SMP ARM

> Modules linked in:

> CPU: 0 PID: 1 Comm: swapper/0 Not tainted 

> 5.9.0-rc4-next-20200911-00010-g417dc70d70ec-dirty #1613

> Hardware name: Samsung Exynos (Flattened Device Tree)

> PC is at __mutex_lock+0x54/0xb18

> LR is at lock_is_held_type+0x80/0x138

> pc : [<c0afc920>]    lr : [<c0af63e8>]    psr: 60000013

> sp : ef0dfd30  ip : 33937b74  fp : c13193c8

> r10: c1208eec  r9 : 00000000  r8 : ee45f808

> r7 : c19561a4  r6 : 00000000  r5 : 00000000  r4 : 0000024c

> r3 : 00000000  r2 : 00204140  r1 : c124f13c  r0 : 00000000

> Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none

> Control: 10c5387d  Table: 4000404a  DAC: 00000051

> Process swapper/0 (pid: 1, stack limit = 0x(ptrval))

> Stack: (0xef0dfd30 to 0xef0e0000)

> ...

> [<c0afc920>] (__mutex_lock) from [<c0afd400>] (mutex_lock_nested+0x1c/0x24)

> [<c0afd400>] (mutex_lock_nested) from [<c064d4b8>] 

> (__exynos_dsi_host_attach+0x20/0x6c)

> [<c064d4b8>] (__exynos_dsi_host_attach) from [<c064d914>] 

> (exynos_dsi_host_attach+0x70/0x194)

> [<c064d914>] (exynos_dsi_host_attach) from [<c0656b64>] 

> (s6e8aa0_probe+0x1b0/0x218)

> [<c0656b64>] (s6e8aa0_probe) from [<c0672530>] (really_probe+0x200/0x4fc)

> [<c0672530>] (really_probe) from [<c06729f4>] 

> (driver_probe_device+0x78/0x1fc)

> [<c06729f4>] (driver_probe_device) from [<c0672ddc>] 

> (device_driver_attach+0x58/0x60)

> [<c0672ddc>] (device_driver_attach) from [<c0672ec0>] 

> (__driver_attach+0xdc/0x174)

> [<c0672ec0>] (__driver_attach) from [<c06702b8>] 

> (bus_for_each_dev+0x68/0xb4)

> [<c06702b8>] (bus_for_each_dev) from [<c06715ec>] 

> (bus_add_driver+0x158/0x214)

> [<c06715ec>] (bus_add_driver) from [<c0673d20>] (driver_register+0x78/0x110)

> [<c0673d20>] (driver_register) from [<c0102484>] 

> (do_one_initcall+0x8c/0x42c)

> [<c0102484>] (do_one_initcall) from [<c11011c0>] 

> (kernel_init_freeable+0x190/0x1dc)

> [<c11011c0>] (kernel_init_freeable) from [<c0af7988>] 

> (kernel_init+0x8/0x118)

> [<c0af7988>] (kernel_init) from [<c0100114>] (ret_from_fork+0x14/0x20)

> Exception stack(0xef0dffb0 to 0xef0dfff8)

> ...

> ---[ end trace c06e996ec2e8234d ]---

> 

> This means that dsi->encoder.dev is not initialized in 

> __exynos_dsi_host_attach().

> 

> This happens, because drm_bridge_attach() in exynos_dsi_bind() returned 

> earlier -517 (deferred probe), what causes cleanup of encoder and 

> release of all drm resources.

> 

> Then however, the panel tries to register itself and 

> exynos_dsi_host_attach() tries to access the released encoder (which is 

> zeroed in drm_encoder_release) and rest of resources, what causes failure.

> 

> It looks that something is missing. Maybe mipi host has to be registered 

> later, when bridge is ready? I have no idea how it is handled before 

> this patch. Andrzej, could you comment it a bit?


I intentionally changed the order, because if another bridge follows in the
pipeline, the probe of the drm driver has to be deferred until some bridge
provides a connector. The next bridge registers itself via the host_attach
function and the deferral is ensured via the bind for the bind/unbind API or
the bridge_attach function otherwise.

On the other hand, the bridge does not have an encoder until the mipi device
has been attached.

As a solution, the exynos dsi driver must initialize the encoder in
exynos_dsi_probe instead of in exynos_dsi_bind and access the encoder via
exynos_dsi instead of the bridge.

Can you try to move everything except samsung_dsim_bind from exynos_dsi_bind
to exynos_dsi_probe (respectively for unbind) and report if it fixes the
crash.

Michael
Andrzej Hajda Sept. 14, 2020, 9:19 p.m. UTC | #3
Hi Marek, Michael,

On 14.09.2020 22:01, Michael Tretter wrote:
> Hi,
>
> On Mon, 14 Sep 2020 14:31:19 +0200, Marek Szyprowski wrote:
>> On 14.09.2020 10:29, Marek Szyprowski wrote:
>>> On 11.09.2020 15:54, Michael Tretter wrote:
>>>> Make the exynos_dsi driver a full drm bridge that can be found and used
>>>> from other drivers.
>>>>
>>>> Other drivers can only attach to the bridge, if a mipi dsi device
>>>> already attached to the bridge. This allows to defer the probe of the
>>>> display pipe until the downstream bridges are available, too.
>>>>
>>>> Signed-off-by: Michael Tretter <m.tretter@pengutronix.de>
>>> This one (and the whole series applied) still fails on Exynos boards:
>>>
>>> [drm] Exynos DRM: using 11c00000.fimd device for DMA mapping operations
>>> exynos-drm exynos-drm: bound 11c00000.fimd (ops fimd_component_ops)
>>> OF: graph: no port node found in /soc/dsi@11c80000
>>> 8<--- cut here ---
>>> Unable to handle kernel NULL pointer dereference at virtual address 00000084
>>> pgd = (ptrval)
>>> [00000084] *pgd=00000000
>>> Internal error: Oops: 5 [#1] PREEMPT SMP ARM
>>> Modules linked in:
>>> CPU: 1 PID: 1 Comm: swapper/0 Not tainted
>>> 5.9.0-rc4-next-20200911-00010-g417dc70d70ec #1608
>>> Hardware name: Samsung Exynos (Flattened Device Tree)
>>> PC is at drm_bridge_attach+0x18/0x164
>>> LR is at exynos_dsi_bind+0x88/0xa8
>>> pc : [<c0628c08>]    lr : [<c064d560>]    psr: 20000013
>>> sp : ef0dfca8  ip : 00000002  fp : c13190e0
>>> r10: 00000000  r9 : ee46d580  r8 : c13190e0
>>> r7 : ee438800  r6 : 00000018  r5 : ef253810  r4 : ef39e840
>>> r3 : 00000000  r2 : 00000018  r1 : ef39e888  r0 : ef39e840
>>> Flags: nzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
>>> Control: 10c5387d  Table: 4000404a  DAC: 00000051
>>> Process swapper/0 (pid: 1, stack limit = 0x(ptrval))
>>> Stack: (0xef0dfca8 to 0xef0e0000)
>>> ...
>>> [<c0628c08>] (drm_bridge_attach) from [<c064d560>]
>>> (exynos_dsi_bind+0x88/0xa8)
>>> [<c064d560>] (exynos_dsi_bind) from [<c066a800>]
>>> (component_bind_all+0xfc/0x290)
>>> [<c066a800>] (component_bind_all) from [<c0649dc0>]
>>> (exynos_drm_bind+0xe4/0x19c)
>>> [<c0649dc0>] (exynos_drm_bind) from [<c066ad74>]
>>> (try_to_bring_up_master+0x1e4/0x2c4)
>>> [<c066ad74>] (try_to_bring_up_master) from [<c066b2b4>]
>>> (component_master_add_with_match+0xd4/0x108)
>>> [<c066b2b4>] (component_master_add_with_match) from [<c0649ae8>]
>>> (exynos_drm_platform_probe+0xe4/0x110)
>>> [<c0649ae8>] (exynos_drm_platform_probe) from [<c0674e6c>]
>>> (platform_drv_probe+0x6c/0xa4)
>>> [<c0674e6c>] (platform_drv_probe) from [<c067242c>]
>>> (really_probe+0x200/0x4fc)
>>> [<c067242c>] (really_probe) from [<c06728f0>]
>>> (driver_probe_device+0x78/0x1fc)
>>> [<c06728f0>] (driver_probe_device) from [<c0672cd8>]
>>> (device_driver_attach+0x58/0x60)
>>> [<c0672cd8>] (device_driver_attach) from [<c0672dbc>]
>>> (__driver_attach+0xdc/0x174)
>>> [<c0672dbc>] (__driver_attach) from [<c06701b4>]
>>> (bus_for_each_dev+0x68/0xb4)
>>> [<c06701b4>] (bus_for_each_dev) from [<c06714e8>]
>>> (bus_add_driver+0x158/0x214)
>>> [<c06714e8>] (bus_add_driver) from [<c0673c1c>] (driver_register+0x78/0x110)
>>> [<c0673c1c>] (driver_register) from [<c0649ca8>]
>>> (exynos_drm_init+0xe4/0x118)
>>> [<c0649ca8>] (exynos_drm_init) from [<c0102484>]
>>> (do_one_initcall+0x8c/0x42c)
>>> [<c0102484>] (do_one_initcall) from [<c11011c0>]
>>> (kernel_init_freeable+0x190/0x1dc)
>>> [<c11011c0>] (kernel_init_freeable) from [<c0af7880>]
>>> (kernel_init+0x8/0x118)
>>> [<c0af7880>] (kernel_init) from [<c0100114>] (ret_from_fork+0x14/0x20)
>>> Exception stack(0xef0dffb0 to 0xef0dfff8)
>>> ...
>>> ---[ end trace ee27f313f9ed9da1 ]---
>>>
>>> # arm-linux-gnueabi-addr2line -e vmlinux c0628c08
>>> drivers/gpu/drm/drm_bridge.c:184 (discriminator 1)
>>>
>>> I will try to debug it a bit more today.
>> The above crash has been caused by lack of in_bridge initialization to
>> NULL in exynos_dsi_bind() in this patch. However, fixing it reveals
>> another issue:
>>
>> [drm] Exynos DRM: using 11c00000.fimd device for DMA mapping operations
>> exynos-drm exynos-drm: bound 11c00000.fimd (ops fimd_component_ops)
>> OF: graph: no port node found in /soc/dsi@11c80000
>> 8<--- cut here ---
>> Unable to handle kernel NULL pointer dereference at virtual address 00000280
>> pgd = (ptrval)
>> [00000280] *pgd=00000000
>> Internal error: Oops: 5 [#1] PREEMPT SMP ARM
>> Modules linked in:
>> CPU: 0 PID: 1 Comm: swapper/0 Not tainted
>> 5.9.0-rc4-next-20200911-00010-g417dc70d70ec-dirty #1613
>> Hardware name: Samsung Exynos (Flattened Device Tree)
>> PC is at __mutex_lock+0x54/0xb18
>> LR is at lock_is_held_type+0x80/0x138
>> pc : [<c0afc920>]    lr : [<c0af63e8>]    psr: 60000013
>> sp : ef0dfd30  ip : 33937b74  fp : c13193c8
>> r10: c1208eec  r9 : 00000000  r8 : ee45f808
>> r7 : c19561a4  r6 : 00000000  r5 : 00000000  r4 : 0000024c
>> r3 : 00000000  r2 : 00204140  r1 : c124f13c  r0 : 00000000
>> Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
>> Control: 10c5387d  Table: 4000404a  DAC: 00000051
>> Process swapper/0 (pid: 1, stack limit = 0x(ptrval))
>> Stack: (0xef0dfd30 to 0xef0e0000)
>> ...
>> [<c0afc920>] (__mutex_lock) from [<c0afd400>] (mutex_lock_nested+0x1c/0x24)
>> [<c0afd400>] (mutex_lock_nested) from [<c064d4b8>]
>> (__exynos_dsi_host_attach+0x20/0x6c)
>> [<c064d4b8>] (__exynos_dsi_host_attach) from [<c064d914>]
>> (exynos_dsi_host_attach+0x70/0x194)
>> [<c064d914>] (exynos_dsi_host_attach) from [<c0656b64>]
>> (s6e8aa0_probe+0x1b0/0x218)
>> [<c0656b64>] (s6e8aa0_probe) from [<c0672530>] (really_probe+0x200/0x4fc)
>> [<c0672530>] (really_probe) from [<c06729f4>]
>> (driver_probe_device+0x78/0x1fc)
>> [<c06729f4>] (driver_probe_device) from [<c0672ddc>]
>> (device_driver_attach+0x58/0x60)
>> [<c0672ddc>] (device_driver_attach) from [<c0672ec0>]
>> (__driver_attach+0xdc/0x174)
>> [<c0672ec0>] (__driver_attach) from [<c06702b8>]
>> (bus_for_each_dev+0x68/0xb4)
>> [<c06702b8>] (bus_for_each_dev) from [<c06715ec>]
>> (bus_add_driver+0x158/0x214)
>> [<c06715ec>] (bus_add_driver) from [<c0673d20>] (driver_register+0x78/0x110)
>> [<c0673d20>] (driver_register) from [<c0102484>]
>> (do_one_initcall+0x8c/0x42c)
>> [<c0102484>] (do_one_initcall) from [<c11011c0>]
>> (kernel_init_freeable+0x190/0x1dc)
>> [<c11011c0>] (kernel_init_freeable) from [<c0af7988>]
>> (kernel_init+0x8/0x118)
>> [<c0af7988>] (kernel_init) from [<c0100114>] (ret_from_fork+0x14/0x20)
>> Exception stack(0xef0dffb0 to 0xef0dfff8)
>> ...
>> ---[ end trace c06e996ec2e8234d ]---
>>
>> This means that dsi->encoder.dev is not initialized in
>> __exynos_dsi_host_attach().
>>
>> This happens, because drm_bridge_attach() in exynos_dsi_bind() returned
>> earlier -517 (deferred probe), what causes cleanup of encoder and
>> release of all drm resources.
>>
>> Then however, the panel tries to register itself and
>> exynos_dsi_host_attach() tries to access the released encoder (which is
>> zeroed in drm_encoder_release) and rest of resources, what causes failure.
>>
>> It looks that something is missing. Maybe mipi host has to be registered
>> later, when bridge is ready? I have no idea how it is handled before
>> this patch. Andrzej, could you comment it a bit?
> I intentionally changed the order, because if another bridge follows in the
> pipeline, the probe of the drm driver has to be deferred until some bridge
> provides a connector. The next bridge registers itself via the host_attach
> function and the deferral is ensured via the bind for the bind/unbind API or
> the bridge_attach function otherwise.
>
> On the other hand, the bridge does not have an encoder until the mipi device
> has been attached.
>
> As a solution, the exynos dsi driver must initialize the encoder in
> exynos_dsi_probe instead of in exynos_dsi_bind and access the encoder via
> exynos_dsi instead of the bridge.
>
> Can you try to move everything except samsung_dsim_bind from exynos_dsi_bind
> to exynos_dsi_probe (respectively for unbind) and report if it fixes the
> crash.


The original behaviour is that encoder (exynos_dsi) is registered 
regardless of sink presence (initially panel, later also bridge) - it 
avoids multiple issues with deferred probe, device driver bind/unbind 
and module load/unload. Appearance or disappearance of sink is reported 
to host nicely via DSI attach/detach callbacks - and it is reflected in 
drm world as change state of the connector.

Registering DSI host in bind and unregistering in unbind assures that if 
mipi_dsi device is attached/detached the drm device is always present - 
it makes device/driver binding race free and allows to avoid additional 
locking.

Moving DSI host registration to probe changes everything, for sure it 
breaks the nice feature of DSI attach/detach callbacks and apparently 
can cause different issues depending on device bind order.

I will try to look at the patches tomorrow and maybe I can find more 
constructive comments :)


Regards

Andrzej


>
> Michael
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://protect2.fireeye.com/v1/url?k=4f0be936-129547ac-4f0a6279-0cc47a336fae-e9aecfc5418740e8&q=1&e=1d4b0871-5b85-47f3-9506-79c768643aee&u=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fdri-devel
Michael Tretter Feb. 1, 2021, 4:33 p.m. UTC | #4
On Tue, 15 Sep 2020 21:40:40 +0200, Andrzej Hajda wrote:
> W dniu 14.09.2020 o 23:19, Andrzej Hajda pisze:

> > On 14.09.2020 22:01, Michael Tretter wrote:

> >> On Mon, 14 Sep 2020 14:31:19 +0200, Marek Szyprowski wrote:

> >>> On 14.09.2020 10:29, Marek Szyprowski wrote:

> >>>> On 11.09.2020 15:54, Michael Tretter wrote:

> >>>>> Make the exynos_dsi driver a full drm bridge that can be found and 

> >>>>> used

> >>>>> from other drivers.

> >>>>>

> >>>>> Other drivers can only attach to the bridge, if a mipi dsi device

> >>>>> already attached to the bridge. This allows to defer the probe of the

> >>>>> display pipe until the downstream bridges are available, too.

> >>>>>

> >>>>> Signed-off-by: Michael Tretter <m.tretter@pengutronix.de>

> >>>> This one (and the whole series applied) still fails on Exynos boards:

> >>>>

> >>>> [drm] Exynos DRM: using 11c00000.fimd device for DMA mapping 

> >>>> operations

> >>>> exynos-drm exynos-drm: bound 11c00000.fimd (ops fimd_component_ops)

> >>>> OF: graph: no port node found in /soc/dsi@11c80000

> >>>> 8<--- cut here ---

> >>>> Unable to handle kernel NULL pointer dereference at virtual address 

> >>>> 00000084

> >>>> pgd = (ptrval)

> >>>> [00000084] *pgd=00000000

> >>>> Internal error: Oops: 5 [#1] PREEMPT SMP ARM

> >>>> Modules linked in:

> >>>> CPU: 1 PID: 1 Comm: swapper/0 Not tainted

> >>>> 5.9.0-rc4-next-20200911-00010-g417dc70d70ec #1608

> >>>> Hardware name: Samsung Exynos (Flattened Device Tree)

> >>>> PC is at drm_bridge_attach+0x18/0x164

> >>>> LR is at exynos_dsi_bind+0x88/0xa8

> >>>> pc : [<c0628c08>]    lr : [<c064d560>]    psr: 20000013

> >>>> sp : ef0dfca8  ip : 00000002  fp : c13190e0

> >>>> r10: 00000000  r9 : ee46d580  r8 : c13190e0

> >>>> r7 : ee438800  r6 : 00000018  r5 : ef253810  r4 : ef39e840

> >>>> r3 : 00000000  r2 : 00000018  r1 : ef39e888  r0 : ef39e840

> >>>> Flags: nzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none

> >>>> Control: 10c5387d  Table: 4000404a  DAC: 00000051

> >>>> Process swapper/0 (pid: 1, stack limit = 0x(ptrval))

> >>>> Stack: (0xef0dfca8 to 0xef0e0000)

> >>>> ...

> >>>> [<c0628c08>] (drm_bridge_attach) from [<c064d560>]

> >>>> (exynos_dsi_bind+0x88/0xa8)

> >>>> [<c064d560>] (exynos_dsi_bind) from [<c066a800>]

> >>>> (component_bind_all+0xfc/0x290)

> >>>> [<c066a800>] (component_bind_all) from [<c0649dc0>]

> >>>> (exynos_drm_bind+0xe4/0x19c)

> >>>> [<c0649dc0>] (exynos_drm_bind) from [<c066ad74>]

> >>>> (try_to_bring_up_master+0x1e4/0x2c4)

> >>>> [<c066ad74>] (try_to_bring_up_master) from [<c066b2b4>]

> >>>> (component_master_add_with_match+0xd4/0x108)

> >>>> [<c066b2b4>] (component_master_add_with_match) from [<c0649ae8>]

> >>>> (exynos_drm_platform_probe+0xe4/0x110)

> >>>> [<c0649ae8>] (exynos_drm_platform_probe) from [<c0674e6c>]

> >>>> (platform_drv_probe+0x6c/0xa4)

> >>>> [<c0674e6c>] (platform_drv_probe) from [<c067242c>]

> >>>> (really_probe+0x200/0x4fc)

> >>>> [<c067242c>] (really_probe) from [<c06728f0>]

> >>>> (driver_probe_device+0x78/0x1fc)

> >>>> [<c06728f0>] (driver_probe_device) from [<c0672cd8>]

> >>>> (device_driver_attach+0x58/0x60)

> >>>> [<c0672cd8>] (device_driver_attach) from [<c0672dbc>]

> >>>> (__driver_attach+0xdc/0x174)

> >>>> [<c0672dbc>] (__driver_attach) from [<c06701b4>]

> >>>> (bus_for_each_dev+0x68/0xb4)

> >>>> [<c06701b4>] (bus_for_each_dev) from [<c06714e8>]

> >>>> (bus_add_driver+0x158/0x214)

> >>>> [<c06714e8>] (bus_add_driver) from [<c0673c1c>] 

> >>>> (driver_register+0x78/0x110)

> >>>> [<c0673c1c>] (driver_register) from [<c0649ca8>]

> >>>> (exynos_drm_init+0xe4/0x118)

> >>>> [<c0649ca8>] (exynos_drm_init) from [<c0102484>]

> >>>> (do_one_initcall+0x8c/0x42c)

> >>>> [<c0102484>] (do_one_initcall) from [<c11011c0>]

> >>>> (kernel_init_freeable+0x190/0x1dc)

> >>>> [<c11011c0>] (kernel_init_freeable) from [<c0af7880>]

> >>>> (kernel_init+0x8/0x118)

> >>>> [<c0af7880>] (kernel_init) from [<c0100114>] (ret_from_fork+0x14/0x20)

> >>>> Exception stack(0xef0dffb0 to 0xef0dfff8)

> >>>> ...

> >>>> ---[ end trace ee27f313f9ed9da1 ]---

> >>>>

> >>>> # arm-linux-gnueabi-addr2line -e vmlinux c0628c08

> >>>> drivers/gpu/drm/drm_bridge.c:184 (discriminator 1)

> >>>>

> >>>> I will try to debug it a bit more today.

> >>> The above crash has been caused by lack of in_bridge initialization to

> >>> NULL in exynos_dsi_bind() in this patch. However, fixing it reveals

> >>> another issue:

> >>>

> >>> [drm] Exynos DRM: using 11c00000.fimd device for DMA mapping operations

> >>> exynos-drm exynos-drm: bound 11c00000.fimd (ops fimd_component_ops)

> >>> OF: graph: no port node found in /soc/dsi@11c80000

> >>> 8<--- cut here ---

> >>> Unable to handle kernel NULL pointer dereference at virtual address 

> >>> 00000280

> >>> pgd = (ptrval)

> >>> [00000280] *pgd=00000000

> >>> Internal error: Oops: 5 [#1] PREEMPT SMP ARM

> >>> Modules linked in:

> >>> CPU: 0 PID: 1 Comm: swapper/0 Not tainted

> >>> 5.9.0-rc4-next-20200911-00010-g417dc70d70ec-dirty #1613

> >>> Hardware name: Samsung Exynos (Flattened Device Tree)

> >>> PC is at __mutex_lock+0x54/0xb18

> >>> LR is at lock_is_held_type+0x80/0x138

> >>> pc : [<c0afc920>]    lr : [<c0af63e8>]    psr: 60000013

> >>> sp : ef0dfd30  ip : 33937b74  fp : c13193c8

> >>> r10: c1208eec  r9 : 00000000  r8 : ee45f808

> >>> r7 : c19561a4  r6 : 00000000  r5 : 00000000  r4 : 0000024c

> >>> r3 : 00000000  r2 : 00204140  r1 : c124f13c  r0 : 00000000

> >>> Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none

> >>> Control: 10c5387d  Table: 4000404a  DAC: 00000051

> >>> Process swapper/0 (pid: 1, stack limit = 0x(ptrval))

> >>> Stack: (0xef0dfd30 to 0xef0e0000)

> >>> ...

> >>> [<c0afc920>] (__mutex_lock) from [<c0afd400>] 

> >>> (mutex_lock_nested+0x1c/0x24)

> >>> [<c0afd400>] (mutex_lock_nested) from [<c064d4b8>]

> >>> (__exynos_dsi_host_attach+0x20/0x6c)

> >>> [<c064d4b8>] (__exynos_dsi_host_attach) from [<c064d914>]

> >>> (exynos_dsi_host_attach+0x70/0x194)

> >>> [<c064d914>] (exynos_dsi_host_attach) from [<c0656b64>]

> >>> (s6e8aa0_probe+0x1b0/0x218)

> >>> [<c0656b64>] (s6e8aa0_probe) from [<c0672530>] 

> >>> (really_probe+0x200/0x4fc)

> >>> [<c0672530>] (really_probe) from [<c06729f4>]

> >>> (driver_probe_device+0x78/0x1fc)

> >>> [<c06729f4>] (driver_probe_device) from [<c0672ddc>]

> >>> (device_driver_attach+0x58/0x60)

> >>> [<c0672ddc>] (device_driver_attach) from [<c0672ec0>]

> >>> (__driver_attach+0xdc/0x174)

> >>> [<c0672ec0>] (__driver_attach) from [<c06702b8>]

> >>> (bus_for_each_dev+0x68/0xb4)

> >>> [<c06702b8>] (bus_for_each_dev) from [<c06715ec>]

> >>> (bus_add_driver+0x158/0x214)

> >>> [<c06715ec>] (bus_add_driver) from [<c0673d20>] 

> >>> (driver_register+0x78/0x110)

> >>> [<c0673d20>] (driver_register) from [<c0102484>]

> >>> (do_one_initcall+0x8c/0x42c)

> >>> [<c0102484>] (do_one_initcall) from [<c11011c0>]

> >>> (kernel_init_freeable+0x190/0x1dc)

> >>> [<c11011c0>] (kernel_init_freeable) from [<c0af7988>]

> >>> (kernel_init+0x8/0x118)

> >>> [<c0af7988>] (kernel_init) from [<c0100114>] (ret_from_fork+0x14/0x20)

> >>> Exception stack(0xef0dffb0 to 0xef0dfff8)

> >>> ...

> >>> ---[ end trace c06e996ec2e8234d ]---

> >>>

> >>> This means that dsi->encoder.dev is not initialized in

> >>> __exynos_dsi_host_attach().

> >>>

> >>> This happens, because drm_bridge_attach() in exynos_dsi_bind() returned

> >>> earlier -517 (deferred probe), what causes cleanup of encoder and

> >>> release of all drm resources.

> >>>

> >>> Then however, the panel tries to register itself and

> >>> exynos_dsi_host_attach() tries to access the released encoder (which is

> >>> zeroed in drm_encoder_release) and rest of resources, what causes 

> >>> failure.

> >>>

> >>> It looks that something is missing. Maybe mipi host has to be 

> >>> registered

> >>> later, when bridge is ready? I have no idea how it is handled before

> >>> this patch. Andrzej, could you comment it a bit?

> >> I intentionally changed the order, because if another bridge follows 

> >> in the

> >> pipeline, the probe of the drm driver has to be deferred until some 

> >> bridge

> >> provides a connector. The next bridge registers itself via the 

> >> host_attach

> >> function and the deferral is ensured via the bind for the bind/unbind 

> >> API or

> >> the bridge_attach function otherwise.

> >>

> >> On the other hand, the bridge does not have an encoder until the mipi 

> >> device

> >> has been attached.

> >>

> >> As a solution, the exynos dsi driver must initialize the encoder in

> >> exynos_dsi_probe instead of in exynos_dsi_bind and access the encoder 

> >> via

> >> exynos_dsi instead of the bridge.

> >>

> >> Can you try to move everything except samsung_dsim_bind from 

> >> exynos_dsi_bind

> >> to exynos_dsi_probe (respectively for unbind) and report if it fixes the

> >> crash.

> >

> >

> > The original behaviour is that encoder (exynos_dsi) is registered 

> > regardless of sink presence (initially panel, later also bridge) - it 

> > avoids multiple issues with deferred probe, device driver bind/unbind 

> > and module load/unload. Appearance or disappearance of sink is 

> > reported to host nicely via DSI attach/detach callbacks - and it is 

> > reflected in drm world as change state of the connector.

> >

> > Registering DSI host in bind and unregistering in unbind assures that 

> > if mipi_dsi device is attached/detached the drm device is always 

> > present - it makes device/driver binding race free and allows to avoid 

> > additional locking.

> >

> > Moving DSI host registration to probe changes everything, for sure it 

> > breaks the nice feature of DSI attach/detach callbacks and apparently 

> > can cause different issues depending on device bind order.

> >

> > I will try to look at the patches tomorrow and maybe I can find more 

> > constructive comments :)

> 

> 

> As I said yesterday, exynos_dsi driver uses dsi host attach/detach 

> callbacks to control appearance/disappearance of downstream device. It 

> allows to:

> 

> 1. Safely bind/unbind different device drivers at any time and at any 

> order, without killing exynos_drm and/or crashing system.

> 

> 2. Avoid issues with late drm init - on some platforms exynos_drm device 

> appeared too late, due to deferred probe, and resulted in black screen 

> in userspace.

> 

> 

> Now if we want to convert exynos_dsi to drm_bridge I see following options:

> 

> A. Forgot about callbacks and make the exynos_drm to defer probing until 

> exynos_dsi bridge is available, probably it will cause later exynos_drm 

> appearance, thus probably black screen on some targets. So for sure it 

> will be suboptimal. Making it bridge unbind safe would be another 

> problem, but most developers do not care about it so why should we? :)

> 

> B. Try to mimic current behaviour - exynos_dsi register bridge ASAP, 

> even if downstream devices are not yet attached, on attach/detach notify 

> drm about it via connector status change, for this dsi_host registration 

> should be performed from drm_bridge attach, I guess.

> 

> 

> Option A is more standard, but is unsafe and causes other issues.

> 

> Option B keeps current behaviour.


Maybe we can have both, but I am not sure, if I am missing something:

I still prefer option A for the samsung-dsim driver, because it is more
standard, simpler and avoids issues with encoders, connectors or handling
hotplug.

The idea is to use two bridges in the exynos-dsi driver: One bridge in the
samsung-dsim driver which implements option A and defers probing of the drm
driver until the next bridge is attached. And a second bridge in the
exynos_dsi that attaches to the first bridge (thus, allowing the exynos_drm
device to appear) and implements the hotplug handling for notifying drm via
connector status change.

The driver for the i.MX8M would use the samsung-dsim bridge without an
additional bridge.

This allows the samsung-dsim driver to expose the standard behavior while the
exynos_dsi may stick to the existing behavior for the exynos_drm driver.

I hope this makes sense and does not sound too crazy. It might be difficult to
get the probing and mipi host/device registration correct, but I will try, if
this can work.

Michael
Michael Tretter Feb. 3, 2021, 8:31 p.m. UTC | #5
On Mon, 01 Feb 2021 17:33:14 +0100, Michael Tretter wrote:
> On Tue, 15 Sep 2020 21:40:40 +0200, Andrzej Hajda wrote:
> > W dniu 14.09.2020 o 23:19, Andrzej Hajda pisze:
> > > On 14.09.2020 22:01, Michael Tretter wrote:
> > >> On Mon, 14 Sep 2020 14:31:19 +0200, Marek Szyprowski wrote:
> > >>> On 14.09.2020 10:29, Marek Szyprowski wrote:
> > >>>> On 11.09.2020 15:54, Michael Tretter wrote:
> > >>>>> Make the exynos_dsi driver a full drm bridge that can be found and 
> > >>>>> used
> > >>>>> from other drivers.
> > >>>>>
> > >>>>> Other drivers can only attach to the bridge, if a mipi dsi device
> > >>>>> already attached to the bridge. This allows to defer the probe of the
> > >>>>> display pipe until the downstream bridges are available, too.
> > >>>>>
> > >>>>> Signed-off-by: Michael Tretter <m.tretter@pengutronix.de>
> > >>>> This one (and the whole series applied) still fails on Exynos boards:
> > >>>>
> > >>>> [drm] Exynos DRM: using 11c00000.fimd device for DMA mapping 
> > >>>> operations
> > >>>> exynos-drm exynos-drm: bound 11c00000.fimd (ops fimd_component_ops)
> > >>>> OF: graph: no port node found in /soc/dsi@11c80000
> > >>>> 8<--- cut here ---
> > >>>> Unable to handle kernel NULL pointer dereference at virtual address 
> > >>>> 00000084
> > >>>> pgd = (ptrval)
> > >>>> [00000084] *pgd=00000000
> > >>>> Internal error: Oops: 5 [#1] PREEMPT SMP ARM
> > >>>> Modules linked in:
> > >>>> CPU: 1 PID: 1 Comm: swapper/0 Not tainted
> > >>>> 5.9.0-rc4-next-20200911-00010-g417dc70d70ec #1608
> > >>>> Hardware name: Samsung Exynos (Flattened Device Tree)
> > >>>> PC is at drm_bridge_attach+0x18/0x164
> > >>>> LR is at exynos_dsi_bind+0x88/0xa8
> > >>>> pc : [<c0628c08>]    lr : [<c064d560>]    psr: 20000013
> > >>>> sp : ef0dfca8  ip : 00000002  fp : c13190e0
> > >>>> r10: 00000000  r9 : ee46d580  r8 : c13190e0
> > >>>> r7 : ee438800  r6 : 00000018  r5 : ef253810  r4 : ef39e840
> > >>>> r3 : 00000000  r2 : 00000018  r1 : ef39e888  r0 : ef39e840
> > >>>> Flags: nzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
> > >>>> Control: 10c5387d  Table: 4000404a  DAC: 00000051
> > >>>> Process swapper/0 (pid: 1, stack limit = 0x(ptrval))
> > >>>> Stack: (0xef0dfca8 to 0xef0e0000)
> > >>>> ...
> > >>>> [<c0628c08>] (drm_bridge_attach) from [<c064d560>]
> > >>>> (exynos_dsi_bind+0x88/0xa8)
> > >>>> [<c064d560>] (exynos_dsi_bind) from [<c066a800>]
> > >>>> (component_bind_all+0xfc/0x290)
> > >>>> [<c066a800>] (component_bind_all) from [<c0649dc0>]
> > >>>> (exynos_drm_bind+0xe4/0x19c)
> > >>>> [<c0649dc0>] (exynos_drm_bind) from [<c066ad74>]
> > >>>> (try_to_bring_up_master+0x1e4/0x2c4)
> > >>>> [<c066ad74>] (try_to_bring_up_master) from [<c066b2b4>]
> > >>>> (component_master_add_with_match+0xd4/0x108)
> > >>>> [<c066b2b4>] (component_master_add_with_match) from [<c0649ae8>]
> > >>>> (exynos_drm_platform_probe+0xe4/0x110)
> > >>>> [<c0649ae8>] (exynos_drm_platform_probe) from [<c0674e6c>]
> > >>>> (platform_drv_probe+0x6c/0xa4)
> > >>>> [<c0674e6c>] (platform_drv_probe) from [<c067242c>]
> > >>>> (really_probe+0x200/0x4fc)
> > >>>> [<c067242c>] (really_probe) from [<c06728f0>]
> > >>>> (driver_probe_device+0x78/0x1fc)
> > >>>> [<c06728f0>] (driver_probe_device) from [<c0672cd8>]
> > >>>> (device_driver_attach+0x58/0x60)
> > >>>> [<c0672cd8>] (device_driver_attach) from [<c0672dbc>]
> > >>>> (__driver_attach+0xdc/0x174)
> > >>>> [<c0672dbc>] (__driver_attach) from [<c06701b4>]
> > >>>> (bus_for_each_dev+0x68/0xb4)
> > >>>> [<c06701b4>] (bus_for_each_dev) from [<c06714e8>]
> > >>>> (bus_add_driver+0x158/0x214)
> > >>>> [<c06714e8>] (bus_add_driver) from [<c0673c1c>] 
> > >>>> (driver_register+0x78/0x110)
> > >>>> [<c0673c1c>] (driver_register) from [<c0649ca8>]
> > >>>> (exynos_drm_init+0xe4/0x118)
> > >>>> [<c0649ca8>] (exynos_drm_init) from [<c0102484>]
> > >>>> (do_one_initcall+0x8c/0x42c)
> > >>>> [<c0102484>] (do_one_initcall) from [<c11011c0>]
> > >>>> (kernel_init_freeable+0x190/0x1dc)
> > >>>> [<c11011c0>] (kernel_init_freeable) from [<c0af7880>]
> > >>>> (kernel_init+0x8/0x118)
> > >>>> [<c0af7880>] (kernel_init) from [<c0100114>] (ret_from_fork+0x14/0x20)
> > >>>> Exception stack(0xef0dffb0 to 0xef0dfff8)
> > >>>> ...
> > >>>> ---[ end trace ee27f313f9ed9da1 ]---
> > >>>>
> > >>>> # arm-linux-gnueabi-addr2line -e vmlinux c0628c08
> > >>>> drivers/gpu/drm/drm_bridge.c:184 (discriminator 1)
> > >>>>
> > >>>> I will try to debug it a bit more today.
> > >>> The above crash has been caused by lack of in_bridge initialization to
> > >>> NULL in exynos_dsi_bind() in this patch. However, fixing it reveals
> > >>> another issue:
> > >>>
> > >>> [drm] Exynos DRM: using 11c00000.fimd device for DMA mapping operations
> > >>> exynos-drm exynos-drm: bound 11c00000.fimd (ops fimd_component_ops)
> > >>> OF: graph: no port node found in /soc/dsi@11c80000
> > >>> 8<--- cut here ---
> > >>> Unable to handle kernel NULL pointer dereference at virtual address 
> > >>> 00000280
> > >>> pgd = (ptrval)
> > >>> [00000280] *pgd=00000000
> > >>> Internal error: Oops: 5 [#1] PREEMPT SMP ARM
> > >>> Modules linked in:
> > >>> CPU: 0 PID: 1 Comm: swapper/0 Not tainted
> > >>> 5.9.0-rc4-next-20200911-00010-g417dc70d70ec-dirty #1613
> > >>> Hardware name: Samsung Exynos (Flattened Device Tree)
> > >>> PC is at __mutex_lock+0x54/0xb18
> > >>> LR is at lock_is_held_type+0x80/0x138
> > >>> pc : [<c0afc920>]    lr : [<c0af63e8>]    psr: 60000013
> > >>> sp : ef0dfd30  ip : 33937b74  fp : c13193c8
> > >>> r10: c1208eec  r9 : 00000000  r8 : ee45f808
> > >>> r7 : c19561a4  r6 : 00000000  r5 : 00000000  r4 : 0000024c
> > >>> r3 : 00000000  r2 : 00204140  r1 : c124f13c  r0 : 00000000
> > >>> Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
> > >>> Control: 10c5387d  Table: 4000404a  DAC: 00000051
> > >>> Process swapper/0 (pid: 1, stack limit = 0x(ptrval))
> > >>> Stack: (0xef0dfd30 to 0xef0e0000)
> > >>> ...
> > >>> [<c0afc920>] (__mutex_lock) from [<c0afd400>] 
> > >>> (mutex_lock_nested+0x1c/0x24)
> > >>> [<c0afd400>] (mutex_lock_nested) from [<c064d4b8>]
> > >>> (__exynos_dsi_host_attach+0x20/0x6c)
> > >>> [<c064d4b8>] (__exynos_dsi_host_attach) from [<c064d914>]
> > >>> (exynos_dsi_host_attach+0x70/0x194)
> > >>> [<c064d914>] (exynos_dsi_host_attach) from [<c0656b64>]
> > >>> (s6e8aa0_probe+0x1b0/0x218)
> > >>> [<c0656b64>] (s6e8aa0_probe) from [<c0672530>] 
> > >>> (really_probe+0x200/0x4fc)
> > >>> [<c0672530>] (really_probe) from [<c06729f4>]
> > >>> (driver_probe_device+0x78/0x1fc)
> > >>> [<c06729f4>] (driver_probe_device) from [<c0672ddc>]
> > >>> (device_driver_attach+0x58/0x60)
> > >>> [<c0672ddc>] (device_driver_attach) from [<c0672ec0>]
> > >>> (__driver_attach+0xdc/0x174)
> > >>> [<c0672ec0>] (__driver_attach) from [<c06702b8>]
> > >>> (bus_for_each_dev+0x68/0xb4)
> > >>> [<c06702b8>] (bus_for_each_dev) from [<c06715ec>]
> > >>> (bus_add_driver+0x158/0x214)
> > >>> [<c06715ec>] (bus_add_driver) from [<c0673d20>] 
> > >>> (driver_register+0x78/0x110)
> > >>> [<c0673d20>] (driver_register) from [<c0102484>]
> > >>> (do_one_initcall+0x8c/0x42c)
> > >>> [<c0102484>] (do_one_initcall) from [<c11011c0>]
> > >>> (kernel_init_freeable+0x190/0x1dc)
> > >>> [<c11011c0>] (kernel_init_freeable) from [<c0af7988>]
> > >>> (kernel_init+0x8/0x118)
> > >>> [<c0af7988>] (kernel_init) from [<c0100114>] (ret_from_fork+0x14/0x20)
> > >>> Exception stack(0xef0dffb0 to 0xef0dfff8)
> > >>> ...
> > >>> ---[ end trace c06e996ec2e8234d ]---
> > >>>
> > >>> This means that dsi->encoder.dev is not initialized in
> > >>> __exynos_dsi_host_attach().
> > >>>
> > >>> This happens, because drm_bridge_attach() in exynos_dsi_bind() returned
> > >>> earlier -517 (deferred probe), what causes cleanup of encoder and
> > >>> release of all drm resources.
> > >>>
> > >>> Then however, the panel tries to register itself and
> > >>> exynos_dsi_host_attach() tries to access the released encoder (which is
> > >>> zeroed in drm_encoder_release) and rest of resources, what causes 
> > >>> failure.
> > >>>
> > >>> It looks that something is missing. Maybe mipi host has to be 
> > >>> registered
> > >>> later, when bridge is ready? I have no idea how it is handled before
> > >>> this patch. Andrzej, could you comment it a bit?
> > >> I intentionally changed the order, because if another bridge follows 
> > >> in the
> > >> pipeline, the probe of the drm driver has to be deferred until some 
> > >> bridge
> > >> provides a connector. The next bridge registers itself via the 
> > >> host_attach
> > >> function and the deferral is ensured via the bind for the bind/unbind 
> > >> API or
> > >> the bridge_attach function otherwise.
> > >>
> > >> On the other hand, the bridge does not have an encoder until the mipi 
> > >> device
> > >> has been attached.
> > >>
> > >> As a solution, the exynos dsi driver must initialize the encoder in
> > >> exynos_dsi_probe instead of in exynos_dsi_bind and access the encoder 
> > >> via
> > >> exynos_dsi instead of the bridge.
> > >>
> > >> Can you try to move everything except samsung_dsim_bind from 
> > >> exynos_dsi_bind
> > >> to exynos_dsi_probe (respectively for unbind) and report if it fixes the
> > >> crash.
> > >
> > >
> > > The original behaviour is that encoder (exynos_dsi) is registered 
> > > regardless of sink presence (initially panel, later also bridge) - it 
> > > avoids multiple issues with deferred probe, device driver bind/unbind 
> > > and module load/unload. Appearance or disappearance of sink is 
> > > reported to host nicely via DSI attach/detach callbacks - and it is 
> > > reflected in drm world as change state of the connector.
> > >
> > > Registering DSI host in bind and unregistering in unbind assures that 
> > > if mipi_dsi device is attached/detached the drm device is always 
> > > present - it makes device/driver binding race free and allows to avoid 
> > > additional locking.
> > >
> > > Moving DSI host registration to probe changes everything, for sure it 
> > > breaks the nice feature of DSI attach/detach callbacks and apparently 
> > > can cause different issues depending on device bind order.
> > >
> > > I will try to look at the patches tomorrow and maybe I can find more 
> > > constructive comments :)
> > 
> > 
> > As I said yesterday, exynos_dsi driver uses dsi host attach/detach 
> > callbacks to control appearance/disappearance of downstream device. It 
> > allows to:
> > 
> > 1. Safely bind/unbind different device drivers at any time and at any 
> > order, without killing exynos_drm and/or crashing system.
> > 
> > 2. Avoid issues with late drm init - on some platforms exynos_drm device 
> > appeared too late, due to deferred probe, and resulted in black screen 
> > in userspace.
> > 
> > 
> > Now if we want to convert exynos_dsi to drm_bridge I see following options:
> > 
> > A. Forgot about callbacks and make the exynos_drm to defer probing until 
> > exynos_dsi bridge is available, probably it will cause later exynos_drm 
> > appearance, thus probably black screen on some targets. So for sure it 
> > will be suboptimal. Making it bridge unbind safe would be another 
> > problem, but most developers do not care about it so why should we? :)
> > 
> > B. Try to mimic current behaviour - exynos_dsi register bridge ASAP, 
> > even if downstream devices are not yet attached, on attach/detach notify 
> > drm about it via connector status change, for this dsi_host registration 
> > should be performed from drm_bridge attach, I guess.
> > 
> > 
> > Option A is more standard, but is unsafe and causes other issues.
> > 
> > Option B keeps current behaviour.
> 
> Maybe we can have both, but I am not sure, if I am missing something:
> 
> I still prefer option A for the samsung-dsim driver, because it is more
> standard, simpler and avoids issues with encoders, connectors or handling
> hotplug.
> 
> The idea is to use two bridges in the exynos-dsi driver: One bridge in the
> samsung-dsim driver which implements option A and defers probing of the drm
> driver until the next bridge is attached. And a second bridge in the
> exynos_dsi that attaches to the first bridge (thus, allowing the exynos_drm
> device to appear) and implements the hotplug handling for notifying drm via
> connector status change.
> 
> The driver for the i.MX8M would use the samsung-dsim bridge without an
> additional bridge.
> 
> This allows the samsung-dsim driver to expose the standard behavior while the
> exynos_dsi may stick to the existing behavior for the exynos_drm driver.
> 
> I hope this makes sense and does not sound too crazy. It might be difficult to
> get the probing and mipi host/device registration correct, but I will try, if
> this can work.

Adding two bridges for being able to support hotplugging adds many special
cases to the bridge driver and still requires more custom API to correctly add
the second bridge. I don't think that this a viable path to go.

This leaves us with:

Option A) Standard drm_bridge behavior, which is currently implemented, but
incompatible with the currently expected behavior of exynos_drm.

Option B) Creating the drm device without all bridges being attached, which
would work with the exynos_drm driver, but breaks for the standard drm_bridge
behavior, especially, if the encoder/connector is created at the beginning of
the pipeline and passed downwards when the bridges are attached.

Option C) Extracting only low level register accesses into shared code, adding
a custom interface and implementing the drm_bridge handling in the platform
specific code.

None of the options really convinces me.

Michael
Daniel Vetter Feb. 4, 2021, 10:17 a.m. UTC | #6
On Wed, Feb 3, 2021 at 9:32 PM Michael Tretter <m.tretter@pengutronix.de> wrote:
>
> On Mon, 01 Feb 2021 17:33:14 +0100, Michael Tretter wrote:
> > On Tue, 15 Sep 2020 21:40:40 +0200, Andrzej Hajda wrote:
> > > W dniu 14.09.2020 o 23:19, Andrzej Hajda pisze:
> > > > On 14.09.2020 22:01, Michael Tretter wrote:
> > > >> On Mon, 14 Sep 2020 14:31:19 +0200, Marek Szyprowski wrote:
> > > >>> On 14.09.2020 10:29, Marek Szyprowski wrote:
> > > >>>> On 11.09.2020 15:54, Michael Tretter wrote:
> > > >>>>> Make the exynos_dsi driver a full drm bridge that can be found and
> > > >>>>> used
> > > >>>>> from other drivers.
> > > >>>>>
> > > >>>>> Other drivers can only attach to the bridge, if a mipi dsi device
> > > >>>>> already attached to the bridge. This allows to defer the probe of the
> > > >>>>> display pipe until the downstream bridges are available, too.
> > > >>>>>
> > > >>>>> Signed-off-by: Michael Tretter <m.tretter@pengutronix.de>
> > > >>>> This one (and the whole series applied) still fails on Exynos boards:
> > > >>>>
> > > >>>> [drm] Exynos DRM: using 11c00000.fimd device for DMA mapping
> > > >>>> operations
> > > >>>> exynos-drm exynos-drm: bound 11c00000.fimd (ops fimd_component_ops)
> > > >>>> OF: graph: no port node found in /soc/dsi@11c80000
> > > >>>> 8<--- cut here ---
> > > >>>> Unable to handle kernel NULL pointer dereference at virtual address
> > > >>>> 00000084
> > > >>>> pgd = (ptrval)
> > > >>>> [00000084] *pgd=00000000
> > > >>>> Internal error: Oops: 5 [#1] PREEMPT SMP ARM
> > > >>>> Modules linked in:
> > > >>>> CPU: 1 PID: 1 Comm: swapper/0 Not tainted
> > > >>>> 5.9.0-rc4-next-20200911-00010-g417dc70d70ec #1608
> > > >>>> Hardware name: Samsung Exynos (Flattened Device Tree)
> > > >>>> PC is at drm_bridge_attach+0x18/0x164
> > > >>>> LR is at exynos_dsi_bind+0x88/0xa8
> > > >>>> pc : [<c0628c08>]    lr : [<c064d560>]    psr: 20000013
> > > >>>> sp : ef0dfca8  ip : 00000002  fp : c13190e0
> > > >>>> r10: 00000000  r9 : ee46d580  r8 : c13190e0
> > > >>>> r7 : ee438800  r6 : 00000018  r5 : ef253810  r4 : ef39e840
> > > >>>> r3 : 00000000  r2 : 00000018  r1 : ef39e888  r0 : ef39e840
> > > >>>> Flags: nzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
> > > >>>> Control: 10c5387d  Table: 4000404a  DAC: 00000051
> > > >>>> Process swapper/0 (pid: 1, stack limit = 0x(ptrval))
> > > >>>> Stack: (0xef0dfca8 to 0xef0e0000)
> > > >>>> ...
> > > >>>> [<c0628c08>] (drm_bridge_attach) from [<c064d560>]
> > > >>>> (exynos_dsi_bind+0x88/0xa8)
> > > >>>> [<c064d560>] (exynos_dsi_bind) from [<c066a800>]
> > > >>>> (component_bind_all+0xfc/0x290)
> > > >>>> [<c066a800>] (component_bind_all) from [<c0649dc0>]
> > > >>>> (exynos_drm_bind+0xe4/0x19c)
> > > >>>> [<c0649dc0>] (exynos_drm_bind) from [<c066ad74>]
> > > >>>> (try_to_bring_up_master+0x1e4/0x2c4)
> > > >>>> [<c066ad74>] (try_to_bring_up_master) from [<c066b2b4>]
> > > >>>> (component_master_add_with_match+0xd4/0x108)
> > > >>>> [<c066b2b4>] (component_master_add_with_match) from [<c0649ae8>]
> > > >>>> (exynos_drm_platform_probe+0xe4/0x110)
> > > >>>> [<c0649ae8>] (exynos_drm_platform_probe) from [<c0674e6c>]
> > > >>>> (platform_drv_probe+0x6c/0xa4)
> > > >>>> [<c0674e6c>] (platform_drv_probe) from [<c067242c>]
> > > >>>> (really_probe+0x200/0x4fc)
> > > >>>> [<c067242c>] (really_probe) from [<c06728f0>]
> > > >>>> (driver_probe_device+0x78/0x1fc)
> > > >>>> [<c06728f0>] (driver_probe_device) from [<c0672cd8>]
> > > >>>> (device_driver_attach+0x58/0x60)
> > > >>>> [<c0672cd8>] (device_driver_attach) from [<c0672dbc>]
> > > >>>> (__driver_attach+0xdc/0x174)
> > > >>>> [<c0672dbc>] (__driver_attach) from [<c06701b4>]
> > > >>>> (bus_for_each_dev+0x68/0xb4)
> > > >>>> [<c06701b4>] (bus_for_each_dev) from [<c06714e8>]
> > > >>>> (bus_add_driver+0x158/0x214)
> > > >>>> [<c06714e8>] (bus_add_driver) from [<c0673c1c>]
> > > >>>> (driver_register+0x78/0x110)
> > > >>>> [<c0673c1c>] (driver_register) from [<c0649ca8>]
> > > >>>> (exynos_drm_init+0xe4/0x118)
> > > >>>> [<c0649ca8>] (exynos_drm_init) from [<c0102484>]
> > > >>>> (do_one_initcall+0x8c/0x42c)
> > > >>>> [<c0102484>] (do_one_initcall) from [<c11011c0>]
> > > >>>> (kernel_init_freeable+0x190/0x1dc)
> > > >>>> [<c11011c0>] (kernel_init_freeable) from [<c0af7880>]
> > > >>>> (kernel_init+0x8/0x118)
> > > >>>> [<c0af7880>] (kernel_init) from [<c0100114>] (ret_from_fork+0x14/0x20)
> > > >>>> Exception stack(0xef0dffb0 to 0xef0dfff8)
> > > >>>> ...
> > > >>>> ---[ end trace ee27f313f9ed9da1 ]---
> > > >>>>
> > > >>>> # arm-linux-gnueabi-addr2line -e vmlinux c0628c08
> > > >>>> drivers/gpu/drm/drm_bridge.c:184 (discriminator 1)
> > > >>>>
> > > >>>> I will try to debug it a bit more today.
> > > >>> The above crash has been caused by lack of in_bridge initialization to
> > > >>> NULL in exynos_dsi_bind() in this patch. However, fixing it reveals
> > > >>> another issue:
> > > >>>
> > > >>> [drm] Exynos DRM: using 11c00000.fimd device for DMA mapping operations
> > > >>> exynos-drm exynos-drm: bound 11c00000.fimd (ops fimd_component_ops)
> > > >>> OF: graph: no port node found in /soc/dsi@11c80000
> > > >>> 8<--- cut here ---
> > > >>> Unable to handle kernel NULL pointer dereference at virtual address
> > > >>> 00000280
> > > >>> pgd = (ptrval)
> > > >>> [00000280] *pgd=00000000
> > > >>> Internal error: Oops: 5 [#1] PREEMPT SMP ARM
> > > >>> Modules linked in:
> > > >>> CPU: 0 PID: 1 Comm: swapper/0 Not tainted
> > > >>> 5.9.0-rc4-next-20200911-00010-g417dc70d70ec-dirty #1613
> > > >>> Hardware name: Samsung Exynos (Flattened Device Tree)
> > > >>> PC is at __mutex_lock+0x54/0xb18
> > > >>> LR is at lock_is_held_type+0x80/0x138
> > > >>> pc : [<c0afc920>]    lr : [<c0af63e8>]    psr: 60000013
> > > >>> sp : ef0dfd30  ip : 33937b74  fp : c13193c8
> > > >>> r10: c1208eec  r9 : 00000000  r8 : ee45f808
> > > >>> r7 : c19561a4  r6 : 00000000  r5 : 00000000  r4 : 0000024c
> > > >>> r3 : 00000000  r2 : 00204140  r1 : c124f13c  r0 : 00000000
> > > >>> Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
> > > >>> Control: 10c5387d  Table: 4000404a  DAC: 00000051
> > > >>> Process swapper/0 (pid: 1, stack limit = 0x(ptrval))
> > > >>> Stack: (0xef0dfd30 to 0xef0e0000)
> > > >>> ...
> > > >>> [<c0afc920>] (__mutex_lock) from [<c0afd400>]
> > > >>> (mutex_lock_nested+0x1c/0x24)
> > > >>> [<c0afd400>] (mutex_lock_nested) from [<c064d4b8>]
> > > >>> (__exynos_dsi_host_attach+0x20/0x6c)
> > > >>> [<c064d4b8>] (__exynos_dsi_host_attach) from [<c064d914>]
> > > >>> (exynos_dsi_host_attach+0x70/0x194)
> > > >>> [<c064d914>] (exynos_dsi_host_attach) from [<c0656b64>]
> > > >>> (s6e8aa0_probe+0x1b0/0x218)
> > > >>> [<c0656b64>] (s6e8aa0_probe) from [<c0672530>]
> > > >>> (really_probe+0x200/0x4fc)
> > > >>> [<c0672530>] (really_probe) from [<c06729f4>]
> > > >>> (driver_probe_device+0x78/0x1fc)
> > > >>> [<c06729f4>] (driver_probe_device) from [<c0672ddc>]
> > > >>> (device_driver_attach+0x58/0x60)
> > > >>> [<c0672ddc>] (device_driver_attach) from [<c0672ec0>]
> > > >>> (__driver_attach+0xdc/0x174)
> > > >>> [<c0672ec0>] (__driver_attach) from [<c06702b8>]
> > > >>> (bus_for_each_dev+0x68/0xb4)
> > > >>> [<c06702b8>] (bus_for_each_dev) from [<c06715ec>]
> > > >>> (bus_add_driver+0x158/0x214)
> > > >>> [<c06715ec>] (bus_add_driver) from [<c0673d20>]
> > > >>> (driver_register+0x78/0x110)
> > > >>> [<c0673d20>] (driver_register) from [<c0102484>]
> > > >>> (do_one_initcall+0x8c/0x42c)
> > > >>> [<c0102484>] (do_one_initcall) from [<c11011c0>]
> > > >>> (kernel_init_freeable+0x190/0x1dc)
> > > >>> [<c11011c0>] (kernel_init_freeable) from [<c0af7988>]
> > > >>> (kernel_init+0x8/0x118)
> > > >>> [<c0af7988>] (kernel_init) from [<c0100114>] (ret_from_fork+0x14/0x20)
> > > >>> Exception stack(0xef0dffb0 to 0xef0dfff8)
> > > >>> ...
> > > >>> ---[ end trace c06e996ec2e8234d ]---
> > > >>>
> > > >>> This means that dsi->encoder.dev is not initialized in
> > > >>> __exynos_dsi_host_attach().
> > > >>>
> > > >>> This happens, because drm_bridge_attach() in exynos_dsi_bind() returned
> > > >>> earlier -517 (deferred probe), what causes cleanup of encoder and
> > > >>> release of all drm resources.
> > > >>>
> > > >>> Then however, the panel tries to register itself and
> > > >>> exynos_dsi_host_attach() tries to access the released encoder (which is
> > > >>> zeroed in drm_encoder_release) and rest of resources, what causes
> > > >>> failure.
> > > >>>
> > > >>> It looks that something is missing. Maybe mipi host has to be
> > > >>> registered
> > > >>> later, when bridge is ready? I have no idea how it is handled before
> > > >>> this patch. Andrzej, could you comment it a bit?
> > > >> I intentionally changed the order, because if another bridge follows
> > > >> in the
> > > >> pipeline, the probe of the drm driver has to be deferred until some
> > > >> bridge
> > > >> provides a connector. The next bridge registers itself via the
> > > >> host_attach
> > > >> function and the deferral is ensured via the bind for the bind/unbind
> > > >> API or
> > > >> the bridge_attach function otherwise.
> > > >>
> > > >> On the other hand, the bridge does not have an encoder until the mipi
> > > >> device
> > > >> has been attached.
> > > >>
> > > >> As a solution, the exynos dsi driver must initialize the encoder in
> > > >> exynos_dsi_probe instead of in exynos_dsi_bind and access the encoder
> > > >> via
> > > >> exynos_dsi instead of the bridge.
> > > >>
> > > >> Can you try to move everything except samsung_dsim_bind from
> > > >> exynos_dsi_bind
> > > >> to exynos_dsi_probe (respectively for unbind) and report if it fixes the
> > > >> crash.
> > > >
> > > >
> > > > The original behaviour is that encoder (exynos_dsi) is registered
> > > > regardless of sink presence (initially panel, later also bridge) - it
> > > > avoids multiple issues with deferred probe, device driver bind/unbind
> > > > and module load/unload. Appearance or disappearance of sink is
> > > > reported to host nicely via DSI attach/detach callbacks - and it is
> > > > reflected in drm world as change state of the connector.
> > > >
> > > > Registering DSI host in bind and unregistering in unbind assures that
> > > > if mipi_dsi device is attached/detached the drm device is always
> > > > present - it makes device/driver binding race free and allows to avoid
> > > > additional locking.
> > > >
> > > > Moving DSI host registration to probe changes everything, for sure it
> > > > breaks the nice feature of DSI attach/detach callbacks and apparently
> > > > can cause different issues depending on device bind order.
> > > >
> > > > I will try to look at the patches tomorrow and maybe I can find more
> > > > constructive comments :)
> > >
> > >
> > > As I said yesterday, exynos_dsi driver uses dsi host attach/detach
> > > callbacks to control appearance/disappearance of downstream device. It
> > > allows to:
> > >
> > > 1. Safely bind/unbind different device drivers at any time and at any
> > > order, without killing exynos_drm and/or crashing system.
> > >
> > > 2. Avoid issues with late drm init - on some platforms exynos_drm device
> > > appeared too late, due to deferred probe, and resulted in black screen
> > > in userspace.
> > >
> > >
> > > Now if we want to convert exynos_dsi to drm_bridge I see following options:
> > >
> > > A. Forgot about callbacks and make the exynos_drm to defer probing until
> > > exynos_dsi bridge is available, probably it will cause later exynos_drm
> > > appearance, thus probably black screen on some targets. So for sure it
> > > will be suboptimal. Making it bridge unbind safe would be another
> > > problem, but most developers do not care about it so why should we? :)
> > >
> > > B. Try to mimic current behaviour - exynos_dsi register bridge ASAP,
> > > even if downstream devices are not yet attached, on attach/detach notify
> > > drm about it via connector status change, for this dsi_host registration
> > > should be performed from drm_bridge attach, I guess.
> > >
> > >
> > > Option A is more standard, but is unsafe and causes other issues.
> > >
> > > Option B keeps current behaviour.
> >
> > Maybe we can have both, but I am not sure, if I am missing something:
> >
> > I still prefer option A for the samsung-dsim driver, because it is more
> > standard, simpler and avoids issues with encoders, connectors or handling
> > hotplug.
> >
> > The idea is to use two bridges in the exynos-dsi driver: One bridge in the
> > samsung-dsim driver which implements option A and defers probing of the drm
> > driver until the next bridge is attached. And a second bridge in the
> > exynos_dsi that attaches to the first bridge (thus, allowing the exynos_drm
> > device to appear) and implements the hotplug handling for notifying drm via
> > connector status change.
> >
> > The driver for the i.MX8M would use the samsung-dsim bridge without an
> > additional bridge.
> >
> > This allows the samsung-dsim driver to expose the standard behavior while the
> > exynos_dsi may stick to the existing behavior for the exynos_drm driver.
> >
> > I hope this makes sense and does not sound too crazy. It might be difficult to
> > get the probing and mipi host/device registration correct, but I will try, if
> > this can work.
>
> Adding two bridges for being able to support hotplugging adds many special
> cases to the bridge driver and still requires more custom API to correctly add
> the second bridge. I don't think that this a viable path to go.

Just jumping in here: You cannot hotplug/hotremove anything from a
drm_device after drm_dev_register has been called, except
drm_connector. I didn't dig into details here so not sure whether you
want to late-bind your bridge after drm_dev_register is called or not,
so might just be fyi and not relevant to the discussion.
-Daniel

>
> This leaves us with:
>
> Option A) Standard drm_bridge behavior, which is currently implemented, but
> incompatible with the currently expected behavior of exynos_drm.
>
> Option B) Creating the drm device without all bridges being attached, which
> would work with the exynos_drm driver, but breaks for the standard drm_bridge
> behavior, especially, if the encoder/connector is created at the beginning of
> the pipeline and passed downwards when the bridges are attached.
>
> Option C) Extracting only low level register accesses into shared code, adding
> a custom interface and implementing the drm_bridge handling in the platform
> specific code.
>
> None of the options really convinces me.
>
> Michael
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Michael Tretter Feb. 4, 2021, 10:56 a.m. UTC | #7
On Thu, 04 Feb 2021 11:17:49 +0100, Daniel Vetter wrote:
> On Wed, Feb 3, 2021 at 9:32 PM Michael Tretter <m.tretter@pengutronix.de> wrote:
> >
> > On Mon, 01 Feb 2021 17:33:14 +0100, Michael Tretter wrote:
> > > On Tue, 15 Sep 2020 21:40:40 +0200, Andrzej Hajda wrote:
> > > > W dniu 14.09.2020 o 23:19, Andrzej Hajda pisze:
> > > > > On 14.09.2020 22:01, Michael Tretter wrote:
> > > > >> On Mon, 14 Sep 2020 14:31:19 +0200, Marek Szyprowski wrote:
> > > > >>> On 14.09.2020 10:29, Marek Szyprowski wrote:
> > > > >>>> On 11.09.2020 15:54, Michael Tretter wrote:
> > > > >>>>> Make the exynos_dsi driver a full drm bridge that can be found and
> > > > >>>>> used
> > > > >>>>> from other drivers.
> > > > >>>>>
> > > > >>>>> Other drivers can only attach to the bridge, if a mipi dsi device
> > > > >>>>> already attached to the bridge. This allows to defer the probe of the
> > > > >>>>> display pipe until the downstream bridges are available, too.
> > > > >>>>>
> > > > >>>>> Signed-off-by: Michael Tretter <m.tretter@pengutronix.de>
> > > > >>>> This one (and the whole series applied) still fails on Exynos boards:
> > > > >>>>
> > > > >>>> [drm] Exynos DRM: using 11c00000.fimd device for DMA mapping
> > > > >>>> operations
> > > > >>>> exynos-drm exynos-drm: bound 11c00000.fimd (ops fimd_component_ops)
> > > > >>>> OF: graph: no port node found in /soc/dsi@11c80000
> > > > >>>> 8<--- cut here ---
> > > > >>>> Unable to handle kernel NULL pointer dereference at virtual address
> > > > >>>> 00000084
> > > > >>>> pgd = (ptrval)
> > > > >>>> [00000084] *pgd=00000000
> > > > >>>> Internal error: Oops: 5 [#1] PREEMPT SMP ARM
> > > > >>>> Modules linked in:
> > > > >>>> CPU: 1 PID: 1 Comm: swapper/0 Not tainted
> > > > >>>> 5.9.0-rc4-next-20200911-00010-g417dc70d70ec #1608
> > > > >>>> Hardware name: Samsung Exynos (Flattened Device Tree)
> > > > >>>> PC is at drm_bridge_attach+0x18/0x164
> > > > >>>> LR is at exynos_dsi_bind+0x88/0xa8
> > > > >>>> pc : [<c0628c08>]    lr : [<c064d560>]    psr: 20000013
> > > > >>>> sp : ef0dfca8  ip : 00000002  fp : c13190e0
> > > > >>>> r10: 00000000  r9 : ee46d580  r8 : c13190e0
> > > > >>>> r7 : ee438800  r6 : 00000018  r5 : ef253810  r4 : ef39e840
> > > > >>>> r3 : 00000000  r2 : 00000018  r1 : ef39e888  r0 : ef39e840
> > > > >>>> Flags: nzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
> > > > >>>> Control: 10c5387d  Table: 4000404a  DAC: 00000051
> > > > >>>> Process swapper/0 (pid: 1, stack limit = 0x(ptrval))
> > > > >>>> Stack: (0xef0dfca8 to 0xef0e0000)
> > > > >>>> ...
> > > > >>>> [<c0628c08>] (drm_bridge_attach) from [<c064d560>]
> > > > >>>> (exynos_dsi_bind+0x88/0xa8)
> > > > >>>> [<c064d560>] (exynos_dsi_bind) from [<c066a800>]
> > > > >>>> (component_bind_all+0xfc/0x290)
> > > > >>>> [<c066a800>] (component_bind_all) from [<c0649dc0>]
> > > > >>>> (exynos_drm_bind+0xe4/0x19c)
> > > > >>>> [<c0649dc0>] (exynos_drm_bind) from [<c066ad74>]
> > > > >>>> (try_to_bring_up_master+0x1e4/0x2c4)
> > > > >>>> [<c066ad74>] (try_to_bring_up_master) from [<c066b2b4>]
> > > > >>>> (component_master_add_with_match+0xd4/0x108)
> > > > >>>> [<c066b2b4>] (component_master_add_with_match) from [<c0649ae8>]
> > > > >>>> (exynos_drm_platform_probe+0xe4/0x110)
> > > > >>>> [<c0649ae8>] (exynos_drm_platform_probe) from [<c0674e6c>]
> > > > >>>> (platform_drv_probe+0x6c/0xa4)
> > > > >>>> [<c0674e6c>] (platform_drv_probe) from [<c067242c>]
> > > > >>>> (really_probe+0x200/0x4fc)
> > > > >>>> [<c067242c>] (really_probe) from [<c06728f0>]
> > > > >>>> (driver_probe_device+0x78/0x1fc)
> > > > >>>> [<c06728f0>] (driver_probe_device) from [<c0672cd8>]
> > > > >>>> (device_driver_attach+0x58/0x60)
> > > > >>>> [<c0672cd8>] (device_driver_attach) from [<c0672dbc>]
> > > > >>>> (__driver_attach+0xdc/0x174)
> > > > >>>> [<c0672dbc>] (__driver_attach) from [<c06701b4>]
> > > > >>>> (bus_for_each_dev+0x68/0xb4)
> > > > >>>> [<c06701b4>] (bus_for_each_dev) from [<c06714e8>]
> > > > >>>> (bus_add_driver+0x158/0x214)
> > > > >>>> [<c06714e8>] (bus_add_driver) from [<c0673c1c>]
> > > > >>>> (driver_register+0x78/0x110)
> > > > >>>> [<c0673c1c>] (driver_register) from [<c0649ca8>]
> > > > >>>> (exynos_drm_init+0xe4/0x118)
> > > > >>>> [<c0649ca8>] (exynos_drm_init) from [<c0102484>]
> > > > >>>> (do_one_initcall+0x8c/0x42c)
> > > > >>>> [<c0102484>] (do_one_initcall) from [<c11011c0>]
> > > > >>>> (kernel_init_freeable+0x190/0x1dc)
> > > > >>>> [<c11011c0>] (kernel_init_freeable) from [<c0af7880>]
> > > > >>>> (kernel_init+0x8/0x118)
> > > > >>>> [<c0af7880>] (kernel_init) from [<c0100114>] (ret_from_fork+0x14/0x20)
> > > > >>>> Exception stack(0xef0dffb0 to 0xef0dfff8)
> > > > >>>> ...
> > > > >>>> ---[ end trace ee27f313f9ed9da1 ]---
> > > > >>>>
> > > > >>>> # arm-linux-gnueabi-addr2line -e vmlinux c0628c08
> > > > >>>> drivers/gpu/drm/drm_bridge.c:184 (discriminator 1)
> > > > >>>>
> > > > >>>> I will try to debug it a bit more today.
> > > > >>> The above crash has been caused by lack of in_bridge initialization to
> > > > >>> NULL in exynos_dsi_bind() in this patch. However, fixing it reveals
> > > > >>> another issue:
> > > > >>>
> > > > >>> [drm] Exynos DRM: using 11c00000.fimd device for DMA mapping operations
> > > > >>> exynos-drm exynos-drm: bound 11c00000.fimd (ops fimd_component_ops)
> > > > >>> OF: graph: no port node found in /soc/dsi@11c80000
> > > > >>> 8<--- cut here ---
> > > > >>> Unable to handle kernel NULL pointer dereference at virtual address
> > > > >>> 00000280
> > > > >>> pgd = (ptrval)
> > > > >>> [00000280] *pgd=00000000
> > > > >>> Internal error: Oops: 5 [#1] PREEMPT SMP ARM
> > > > >>> Modules linked in:
> > > > >>> CPU: 0 PID: 1 Comm: swapper/0 Not tainted
> > > > >>> 5.9.0-rc4-next-20200911-00010-g417dc70d70ec-dirty #1613
> > > > >>> Hardware name: Samsung Exynos (Flattened Device Tree)
> > > > >>> PC is at __mutex_lock+0x54/0xb18
> > > > >>> LR is at lock_is_held_type+0x80/0x138
> > > > >>> pc : [<c0afc920>]    lr : [<c0af63e8>]    psr: 60000013
> > > > >>> sp : ef0dfd30  ip : 33937b74  fp : c13193c8
> > > > >>> r10: c1208eec  r9 : 00000000  r8 : ee45f808
> > > > >>> r7 : c19561a4  r6 : 00000000  r5 : 00000000  r4 : 0000024c
> > > > >>> r3 : 00000000  r2 : 00204140  r1 : c124f13c  r0 : 00000000
> > > > >>> Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
> > > > >>> Control: 10c5387d  Table: 4000404a  DAC: 00000051
> > > > >>> Process swapper/0 (pid: 1, stack limit = 0x(ptrval))
> > > > >>> Stack: (0xef0dfd30 to 0xef0e0000)
> > > > >>> ...
> > > > >>> [<c0afc920>] (__mutex_lock) from [<c0afd400>]
> > > > >>> (mutex_lock_nested+0x1c/0x24)
> > > > >>> [<c0afd400>] (mutex_lock_nested) from [<c064d4b8>]
> > > > >>> (__exynos_dsi_host_attach+0x20/0x6c)
> > > > >>> [<c064d4b8>] (__exynos_dsi_host_attach) from [<c064d914>]
> > > > >>> (exynos_dsi_host_attach+0x70/0x194)
> > > > >>> [<c064d914>] (exynos_dsi_host_attach) from [<c0656b64>]
> > > > >>> (s6e8aa0_probe+0x1b0/0x218)
> > > > >>> [<c0656b64>] (s6e8aa0_probe) from [<c0672530>]
> > > > >>> (really_probe+0x200/0x4fc)
> > > > >>> [<c0672530>] (really_probe) from [<c06729f4>]
> > > > >>> (driver_probe_device+0x78/0x1fc)
> > > > >>> [<c06729f4>] (driver_probe_device) from [<c0672ddc>]
> > > > >>> (device_driver_attach+0x58/0x60)
> > > > >>> [<c0672ddc>] (device_driver_attach) from [<c0672ec0>]
> > > > >>> (__driver_attach+0xdc/0x174)
> > > > >>> [<c0672ec0>] (__driver_attach) from [<c06702b8>]
> > > > >>> (bus_for_each_dev+0x68/0xb4)
> > > > >>> [<c06702b8>] (bus_for_each_dev) from [<c06715ec>]
> > > > >>> (bus_add_driver+0x158/0x214)
> > > > >>> [<c06715ec>] (bus_add_driver) from [<c0673d20>]
> > > > >>> (driver_register+0x78/0x110)
> > > > >>> [<c0673d20>] (driver_register) from [<c0102484>]
> > > > >>> (do_one_initcall+0x8c/0x42c)
> > > > >>> [<c0102484>] (do_one_initcall) from [<c11011c0>]
> > > > >>> (kernel_init_freeable+0x190/0x1dc)
> > > > >>> [<c11011c0>] (kernel_init_freeable) from [<c0af7988>]
> > > > >>> (kernel_init+0x8/0x118)
> > > > >>> [<c0af7988>] (kernel_init) from [<c0100114>] (ret_from_fork+0x14/0x20)
> > > > >>> Exception stack(0xef0dffb0 to 0xef0dfff8)
> > > > >>> ...
> > > > >>> ---[ end trace c06e996ec2e8234d ]---
> > > > >>>
> > > > >>> This means that dsi->encoder.dev is not initialized in
> > > > >>> __exynos_dsi_host_attach().
> > > > >>>
> > > > >>> This happens, because drm_bridge_attach() in exynos_dsi_bind() returned
> > > > >>> earlier -517 (deferred probe), what causes cleanup of encoder and
> > > > >>> release of all drm resources.
> > > > >>>
> > > > >>> Then however, the panel tries to register itself and
> > > > >>> exynos_dsi_host_attach() tries to access the released encoder (which is
> > > > >>> zeroed in drm_encoder_release) and rest of resources, what causes
> > > > >>> failure.
> > > > >>>
> > > > >>> It looks that something is missing. Maybe mipi host has to be
> > > > >>> registered
> > > > >>> later, when bridge is ready? I have no idea how it is handled before
> > > > >>> this patch. Andrzej, could you comment it a bit?
> > > > >> I intentionally changed the order, because if another bridge follows
> > > > >> in the
> > > > >> pipeline, the probe of the drm driver has to be deferred until some
> > > > >> bridge
> > > > >> provides a connector. The next bridge registers itself via the
> > > > >> host_attach
> > > > >> function and the deferral is ensured via the bind for the bind/unbind
> > > > >> API or
> > > > >> the bridge_attach function otherwise.
> > > > >>
> > > > >> On the other hand, the bridge does not have an encoder until the mipi
> > > > >> device
> > > > >> has been attached.
> > > > >>
> > > > >> As a solution, the exynos dsi driver must initialize the encoder in
> > > > >> exynos_dsi_probe instead of in exynos_dsi_bind and access the encoder
> > > > >> via
> > > > >> exynos_dsi instead of the bridge.
> > > > >>
> > > > >> Can you try to move everything except samsung_dsim_bind from
> > > > >> exynos_dsi_bind
> > > > >> to exynos_dsi_probe (respectively for unbind) and report if it fixes the
> > > > >> crash.
> > > > >
> > > > >
> > > > > The original behaviour is that encoder (exynos_dsi) is registered
> > > > > regardless of sink presence (initially panel, later also bridge) - it
> > > > > avoids multiple issues with deferred probe, device driver bind/unbind
> > > > > and module load/unload. Appearance or disappearance of sink is
> > > > > reported to host nicely via DSI attach/detach callbacks - and it is
> > > > > reflected in drm world as change state of the connector.
> > > > >
> > > > > Registering DSI host in bind and unregistering in unbind assures that
> > > > > if mipi_dsi device is attached/detached the drm device is always
> > > > > present - it makes device/driver binding race free and allows to avoid
> > > > > additional locking.
> > > > >
> > > > > Moving DSI host registration to probe changes everything, for sure it
> > > > > breaks the nice feature of DSI attach/detach callbacks and apparently
> > > > > can cause different issues depending on device bind order.
> > > > >
> > > > > I will try to look at the patches tomorrow and maybe I can find more
> > > > > constructive comments :)
> > > >
> > > >
> > > > As I said yesterday, exynos_dsi driver uses dsi host attach/detach
> > > > callbacks to control appearance/disappearance of downstream device. It
> > > > allows to:
> > > >
> > > > 1. Safely bind/unbind different device drivers at any time and at any
> > > > order, without killing exynos_drm and/or crashing system.
> > > >
> > > > 2. Avoid issues with late drm init - on some platforms exynos_drm device
> > > > appeared too late, due to deferred probe, and resulted in black screen
> > > > in userspace.
> > > >
> > > >
> > > > Now if we want to convert exynos_dsi to drm_bridge I see following options:
> > > >
> > > > A. Forgot about callbacks and make the exynos_drm to defer probing until
> > > > exynos_dsi bridge is available, probably it will cause later exynos_drm
> > > > appearance, thus probably black screen on some targets. So for sure it
> > > > will be suboptimal. Making it bridge unbind safe would be another
> > > > problem, but most developers do not care about it so why should we? :)
> > > >
> > > > B. Try to mimic current behaviour - exynos_dsi register bridge ASAP,
> > > > even if downstream devices are not yet attached, on attach/detach notify
> > > > drm about it via connector status change, for this dsi_host registration
> > > > should be performed from drm_bridge attach, I guess.
> > > >
> > > >
> > > > Option A is more standard, but is unsafe and causes other issues.
> > > >
> > > > Option B keeps current behaviour.
> > >
> > > Maybe we can have both, but I am not sure, if I am missing something:
> > >
> > > I still prefer option A for the samsung-dsim driver, because it is more
> > > standard, simpler and avoids issues with encoders, connectors or handling
> > > hotplug.
> > >
> > > The idea is to use two bridges in the exynos-dsi driver: One bridge in the
> > > samsung-dsim driver which implements option A and defers probing of the drm
> > > driver until the next bridge is attached. And a second bridge in the
> > > exynos_dsi that attaches to the first bridge (thus, allowing the exynos_drm
> > > device to appear) and implements the hotplug handling for notifying drm via
> > > connector status change.
> > >
> > > The driver for the i.MX8M would use the samsung-dsim bridge without an
> > > additional bridge.
> > >
> > > This allows the samsung-dsim driver to expose the standard behavior while the
> > > exynos_dsi may stick to the existing behavior for the exynos_drm driver.
> > >
> > > I hope this makes sense and does not sound too crazy. It might be difficult to
> > > get the probing and mipi host/device registration correct, but I will try, if
> > > this can work.
> >
> > Adding two bridges for being able to support hotplugging adds many special
> > cases to the bridge driver and still requires more custom API to correctly add
> > the second bridge. I don't think that this a viable path to go.
> 
> Just jumping in here: You cannot hotplug/hotremove anything from a
> drm_device after drm_dev_register has been called, except
> drm_connector. I didn't dig into details here so not sure whether you
> want to late-bind your bridge after drm_dev_register is called or not,
> so might just be fyi and not relevant to the discussion.

Thanks. AFAIC that is exactly what is currently implemented in the exynos_drm
driver (i.e. Option B)

exynos_dsi_bind configures the encoder and registers a DSI host. Afterwards,
exynos_drm_bind (as component_master_ops) calls drm_dev_register. Later, a DSI
device might attach to the DSI host and call exynos_dsi_host_attach. In
exynos_dsi_host_attach, the driver finds the drm_bridge for the DSI device and
attaches this bridge to the encoder _after_ drm_dev_register has been called.
This is invalid behavior, right?

Michael

> -Daniel
> 
> >
> > This leaves us with:
> >
> > Option A) Standard drm_bridge behavior, which is currently implemented, but
> > incompatible with the currently expected behavior of exynos_drm.
> >
> > Option B) Creating the drm device without all bridges being attached, which
> > would work with the exynos_drm driver, but breaks for the standard drm_bridge
> > behavior, especially, if the encoder/connector is created at the beginning of
> > the pipeline and passed downwards when the bridges are attached.
> >
> > Option C) Extracting only low level register accesses into shared code, adding
> > a custom interface and implementing the drm_bridge handling in the platform
> > specific code.
> >
> > None of the options really convinces me.
> >
> > Michael
> > _______________________________________________
> > dri-devel mailing list
> > dri-devel@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/dri-devel
Daniel Vetter Feb. 4, 2021, 4:05 p.m. UTC | #8
On Thu, Feb 04, 2021 at 11:56:32AM +0100, Michael Tretter wrote:
> On Thu, 04 Feb 2021 11:17:49 +0100, Daniel Vetter wrote:
> > On Wed, Feb 3, 2021 at 9:32 PM Michael Tretter <m.tretter@pengutronix.de> wrote:
> > >
> > > On Mon, 01 Feb 2021 17:33:14 +0100, Michael Tretter wrote:
> > > > On Tue, 15 Sep 2020 21:40:40 +0200, Andrzej Hajda wrote:
> > > > > W dniu 14.09.2020 o 23:19, Andrzej Hajda pisze:
> > > > > > On 14.09.2020 22:01, Michael Tretter wrote:
> > > > > >> On Mon, 14 Sep 2020 14:31:19 +0200, Marek Szyprowski wrote:
> > > > > >>> On 14.09.2020 10:29, Marek Szyprowski wrote:
> > > > > >>>> On 11.09.2020 15:54, Michael Tretter wrote:
> > > > > >>>>> Make the exynos_dsi driver a full drm bridge that can be found and
> > > > > >>>>> used
> > > > > >>>>> from other drivers.
> > > > > >>>>>
> > > > > >>>>> Other drivers can only attach to the bridge, if a mipi dsi device
> > > > > >>>>> already attached to the bridge. This allows to defer the probe of the
> > > > > >>>>> display pipe until the downstream bridges are available, too.
> > > > > >>>>>
> > > > > >>>>> Signed-off-by: Michael Tretter <m.tretter@pengutronix.de>
> > > > > >>>> This one (and the whole series applied) still fails on Exynos boards:
> > > > > >>>>
> > > > > >>>> [drm] Exynos DRM: using 11c00000.fimd device for DMA mapping
> > > > > >>>> operations
> > > > > >>>> exynos-drm exynos-drm: bound 11c00000.fimd (ops fimd_component_ops)
> > > > > >>>> OF: graph: no port node found in /soc/dsi@11c80000
> > > > > >>>> 8<--- cut here ---
> > > > > >>>> Unable to handle kernel NULL pointer dereference at virtual address
> > > > > >>>> 00000084
> > > > > >>>> pgd = (ptrval)
> > > > > >>>> [00000084] *pgd=00000000
> > > > > >>>> Internal error: Oops: 5 [#1] PREEMPT SMP ARM
> > > > > >>>> Modules linked in:
> > > > > >>>> CPU: 1 PID: 1 Comm: swapper/0 Not tainted
> > > > > >>>> 5.9.0-rc4-next-20200911-00010-g417dc70d70ec #1608
> > > > > >>>> Hardware name: Samsung Exynos (Flattened Device Tree)
> > > > > >>>> PC is at drm_bridge_attach+0x18/0x164
> > > > > >>>> LR is at exynos_dsi_bind+0x88/0xa8
> > > > > >>>> pc : [<c0628c08>]    lr : [<c064d560>]    psr: 20000013
> > > > > >>>> sp : ef0dfca8  ip : 00000002  fp : c13190e0
> > > > > >>>> r10: 00000000  r9 : ee46d580  r8 : c13190e0
> > > > > >>>> r7 : ee438800  r6 : 00000018  r5 : ef253810  r4 : ef39e840
> > > > > >>>> r3 : 00000000  r2 : 00000018  r1 : ef39e888  r0 : ef39e840
> > > > > >>>> Flags: nzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
> > > > > >>>> Control: 10c5387d  Table: 4000404a  DAC: 00000051
> > > > > >>>> Process swapper/0 (pid: 1, stack limit = 0x(ptrval))
> > > > > >>>> Stack: (0xef0dfca8 to 0xef0e0000)
> > > > > >>>> ...
> > > > > >>>> [<c0628c08>] (drm_bridge_attach) from [<c064d560>]
> > > > > >>>> (exynos_dsi_bind+0x88/0xa8)
> > > > > >>>> [<c064d560>] (exynos_dsi_bind) from [<c066a800>]
> > > > > >>>> (component_bind_all+0xfc/0x290)
> > > > > >>>> [<c066a800>] (component_bind_all) from [<c0649dc0>]
> > > > > >>>> (exynos_drm_bind+0xe4/0x19c)
> > > > > >>>> [<c0649dc0>] (exynos_drm_bind) from [<c066ad74>]
> > > > > >>>> (try_to_bring_up_master+0x1e4/0x2c4)
> > > > > >>>> [<c066ad74>] (try_to_bring_up_master) from [<c066b2b4>]
> > > > > >>>> (component_master_add_with_match+0xd4/0x108)
> > > > > >>>> [<c066b2b4>] (component_master_add_with_match) from [<c0649ae8>]
> > > > > >>>> (exynos_drm_platform_probe+0xe4/0x110)
> > > > > >>>> [<c0649ae8>] (exynos_drm_platform_probe) from [<c0674e6c>]
> > > > > >>>> (platform_drv_probe+0x6c/0xa4)
> > > > > >>>> [<c0674e6c>] (platform_drv_probe) from [<c067242c>]
> > > > > >>>> (really_probe+0x200/0x4fc)
> > > > > >>>> [<c067242c>] (really_probe) from [<c06728f0>]
> > > > > >>>> (driver_probe_device+0x78/0x1fc)
> > > > > >>>> [<c06728f0>] (driver_probe_device) from [<c0672cd8>]
> > > > > >>>> (device_driver_attach+0x58/0x60)
> > > > > >>>> [<c0672cd8>] (device_driver_attach) from [<c0672dbc>]
> > > > > >>>> (__driver_attach+0xdc/0x174)
> > > > > >>>> [<c0672dbc>] (__driver_attach) from [<c06701b4>]
> > > > > >>>> (bus_for_each_dev+0x68/0xb4)
> > > > > >>>> [<c06701b4>] (bus_for_each_dev) from [<c06714e8>]
> > > > > >>>> (bus_add_driver+0x158/0x214)
> > > > > >>>> [<c06714e8>] (bus_add_driver) from [<c0673c1c>]
> > > > > >>>> (driver_register+0x78/0x110)
> > > > > >>>> [<c0673c1c>] (driver_register) from [<c0649ca8>]
> > > > > >>>> (exynos_drm_init+0xe4/0x118)
> > > > > >>>> [<c0649ca8>] (exynos_drm_init) from [<c0102484>]
> > > > > >>>> (do_one_initcall+0x8c/0x42c)
> > > > > >>>> [<c0102484>] (do_one_initcall) from [<c11011c0>]
> > > > > >>>> (kernel_init_freeable+0x190/0x1dc)
> > > > > >>>> [<c11011c0>] (kernel_init_freeable) from [<c0af7880>]
> > > > > >>>> (kernel_init+0x8/0x118)
> > > > > >>>> [<c0af7880>] (kernel_init) from [<c0100114>] (ret_from_fork+0x14/0x20)
> > > > > >>>> Exception stack(0xef0dffb0 to 0xef0dfff8)
> > > > > >>>> ...
> > > > > >>>> ---[ end trace ee27f313f9ed9da1 ]---
> > > > > >>>>
> > > > > >>>> # arm-linux-gnueabi-addr2line -e vmlinux c0628c08
> > > > > >>>> drivers/gpu/drm/drm_bridge.c:184 (discriminator 1)
> > > > > >>>>
> > > > > >>>> I will try to debug it a bit more today.
> > > > > >>> The above crash has been caused by lack of in_bridge initialization to
> > > > > >>> NULL in exynos_dsi_bind() in this patch. However, fixing it reveals
> > > > > >>> another issue:
> > > > > >>>
> > > > > >>> [drm] Exynos DRM: using 11c00000.fimd device for DMA mapping operations
> > > > > >>> exynos-drm exynos-drm: bound 11c00000.fimd (ops fimd_component_ops)
> > > > > >>> OF: graph: no port node found in /soc/dsi@11c80000
> > > > > >>> 8<--- cut here ---
> > > > > >>> Unable to handle kernel NULL pointer dereference at virtual address
> > > > > >>> 00000280
> > > > > >>> pgd = (ptrval)
> > > > > >>> [00000280] *pgd=00000000
> > > > > >>> Internal error: Oops: 5 [#1] PREEMPT SMP ARM
> > > > > >>> Modules linked in:
> > > > > >>> CPU: 0 PID: 1 Comm: swapper/0 Not tainted
> > > > > >>> 5.9.0-rc4-next-20200911-00010-g417dc70d70ec-dirty #1613
> > > > > >>> Hardware name: Samsung Exynos (Flattened Device Tree)
> > > > > >>> PC is at __mutex_lock+0x54/0xb18
> > > > > >>> LR is at lock_is_held_type+0x80/0x138
> > > > > >>> pc : [<c0afc920>]    lr : [<c0af63e8>]    psr: 60000013
> > > > > >>> sp : ef0dfd30  ip : 33937b74  fp : c13193c8
> > > > > >>> r10: c1208eec  r9 : 00000000  r8 : ee45f808
> > > > > >>> r7 : c19561a4  r6 : 00000000  r5 : 00000000  r4 : 0000024c
> > > > > >>> r3 : 00000000  r2 : 00204140  r1 : c124f13c  r0 : 00000000
> > > > > >>> Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
> > > > > >>> Control: 10c5387d  Table: 4000404a  DAC: 00000051
> > > > > >>> Process swapper/0 (pid: 1, stack limit = 0x(ptrval))
> > > > > >>> Stack: (0xef0dfd30 to 0xef0e0000)
> > > > > >>> ...
> > > > > >>> [<c0afc920>] (__mutex_lock) from [<c0afd400>]
> > > > > >>> (mutex_lock_nested+0x1c/0x24)
> > > > > >>> [<c0afd400>] (mutex_lock_nested) from [<c064d4b8>]
> > > > > >>> (__exynos_dsi_host_attach+0x20/0x6c)
> > > > > >>> [<c064d4b8>] (__exynos_dsi_host_attach) from [<c064d914>]
> > > > > >>> (exynos_dsi_host_attach+0x70/0x194)
> > > > > >>> [<c064d914>] (exynos_dsi_host_attach) from [<c0656b64>]
> > > > > >>> (s6e8aa0_probe+0x1b0/0x218)
> > > > > >>> [<c0656b64>] (s6e8aa0_probe) from [<c0672530>]
> > > > > >>> (really_probe+0x200/0x4fc)
> > > > > >>> [<c0672530>] (really_probe) from [<c06729f4>]
> > > > > >>> (driver_probe_device+0x78/0x1fc)
> > > > > >>> [<c06729f4>] (driver_probe_device) from [<c0672ddc>]
> > > > > >>> (device_driver_attach+0x58/0x60)
> > > > > >>> [<c0672ddc>] (device_driver_attach) from [<c0672ec0>]
> > > > > >>> (__driver_attach+0xdc/0x174)
> > > > > >>> [<c0672ec0>] (__driver_attach) from [<c06702b8>]
> > > > > >>> (bus_for_each_dev+0x68/0xb4)
> > > > > >>> [<c06702b8>] (bus_for_each_dev) from [<c06715ec>]
> > > > > >>> (bus_add_driver+0x158/0x214)
> > > > > >>> [<c06715ec>] (bus_add_driver) from [<c0673d20>]
> > > > > >>> (driver_register+0x78/0x110)
> > > > > >>> [<c0673d20>] (driver_register) from [<c0102484>]
> > > > > >>> (do_one_initcall+0x8c/0x42c)
> > > > > >>> [<c0102484>] (do_one_initcall) from [<c11011c0>]
> > > > > >>> (kernel_init_freeable+0x190/0x1dc)
> > > > > >>> [<c11011c0>] (kernel_init_freeable) from [<c0af7988>]
> > > > > >>> (kernel_init+0x8/0x118)
> > > > > >>> [<c0af7988>] (kernel_init) from [<c0100114>] (ret_from_fork+0x14/0x20)
> > > > > >>> Exception stack(0xef0dffb0 to 0xef0dfff8)
> > > > > >>> ...
> > > > > >>> ---[ end trace c06e996ec2e8234d ]---
> > > > > >>>
> > > > > >>> This means that dsi->encoder.dev is not initialized in
> > > > > >>> __exynos_dsi_host_attach().
> > > > > >>>
> > > > > >>> This happens, because drm_bridge_attach() in exynos_dsi_bind() returned
> > > > > >>> earlier -517 (deferred probe), what causes cleanup of encoder and
> > > > > >>> release of all drm resources.
> > > > > >>>
> > > > > >>> Then however, the panel tries to register itself and
> > > > > >>> exynos_dsi_host_attach() tries to access the released encoder (which is
> > > > > >>> zeroed in drm_encoder_release) and rest of resources, what causes
> > > > > >>> failure.
> > > > > >>>
> > > > > >>> It looks that something is missing. Maybe mipi host has to be
> > > > > >>> registered
> > > > > >>> later, when bridge is ready? I have no idea how it is handled before
> > > > > >>> this patch. Andrzej, could you comment it a bit?
> > > > > >> I intentionally changed the order, because if another bridge follows
> > > > > >> in the
> > > > > >> pipeline, the probe of the drm driver has to be deferred until some
> > > > > >> bridge
> > > > > >> provides a connector. The next bridge registers itself via the
> > > > > >> host_attach
> > > > > >> function and the deferral is ensured via the bind for the bind/unbind
> > > > > >> API or
> > > > > >> the bridge_attach function otherwise.
> > > > > >>
> > > > > >> On the other hand, the bridge does not have an encoder until the mipi
> > > > > >> device
> > > > > >> has been attached.
> > > > > >>
> > > > > >> As a solution, the exynos dsi driver must initialize the encoder in
> > > > > >> exynos_dsi_probe instead of in exynos_dsi_bind and access the encoder
> > > > > >> via
> > > > > >> exynos_dsi instead of the bridge.
> > > > > >>
> > > > > >> Can you try to move everything except samsung_dsim_bind from
> > > > > >> exynos_dsi_bind
> > > > > >> to exynos_dsi_probe (respectively for unbind) and report if it fixes the
> > > > > >> crash.
> > > > > >
> > > > > >
> > > > > > The original behaviour is that encoder (exynos_dsi) is registered
> > > > > > regardless of sink presence (initially panel, later also bridge) - it
> > > > > > avoids multiple issues with deferred probe, device driver bind/unbind
> > > > > > and module load/unload. Appearance or disappearance of sink is
> > > > > > reported to host nicely via DSI attach/detach callbacks - and it is
> > > > > > reflected in drm world as change state of the connector.
> > > > > >
> > > > > > Registering DSI host in bind and unregistering in unbind assures that
> > > > > > if mipi_dsi device is attached/detached the drm device is always
> > > > > > present - it makes device/driver binding race free and allows to avoid
> > > > > > additional locking.
> > > > > >
> > > > > > Moving DSI host registration to probe changes everything, for sure it
> > > > > > breaks the nice feature of DSI attach/detach callbacks and apparently
> > > > > > can cause different issues depending on device bind order.
> > > > > >
> > > > > > I will try to look at the patches tomorrow and maybe I can find more
> > > > > > constructive comments :)
> > > > >
> > > > >
> > > > > As I said yesterday, exynos_dsi driver uses dsi host attach/detach
> > > > > callbacks to control appearance/disappearance of downstream device. It
> > > > > allows to:
> > > > >
> > > > > 1. Safely bind/unbind different device drivers at any time and at any
> > > > > order, without killing exynos_drm and/or crashing system.
> > > > >
> > > > > 2. Avoid issues with late drm init - on some platforms exynos_drm device
> > > > > appeared too late, due to deferred probe, and resulted in black screen
> > > > > in userspace.
> > > > >
> > > > >
> > > > > Now if we want to convert exynos_dsi to drm_bridge I see following options:
> > > > >
> > > > > A. Forgot about callbacks and make the exynos_drm to defer probing until
> > > > > exynos_dsi bridge is available, probably it will cause later exynos_drm
> > > > > appearance, thus probably black screen on some targets. So for sure it
> > > > > will be suboptimal. Making it bridge unbind safe would be another
> > > > > problem, but most developers do not care about it so why should we? :)
> > > > >
> > > > > B. Try to mimic current behaviour - exynos_dsi register bridge ASAP,
> > > > > even if downstream devices are not yet attached, on attach/detach notify
> > > > > drm about it via connector status change, for this dsi_host registration
> > > > > should be performed from drm_bridge attach, I guess.
> > > > >
> > > > >
> > > > > Option A is more standard, but is unsafe and causes other issues.
> > > > >
> > > > > Option B keeps current behaviour.
> > > >
> > > > Maybe we can have both, but I am not sure, if I am missing something:
> > > >
> > > > I still prefer option A for the samsung-dsim driver, because it is more
> > > > standard, simpler and avoids issues with encoders, connectors or handling
> > > > hotplug.
> > > >
> > > > The idea is to use two bridges in the exynos-dsi driver: One bridge in the
> > > > samsung-dsim driver which implements option A and defers probing of the drm
> > > > driver until the next bridge is attached. And a second bridge in the
> > > > exynos_dsi that attaches to the first bridge (thus, allowing the exynos_drm
> > > > device to appear) and implements the hotplug handling for notifying drm via
> > > > connector status change.
> > > >
> > > > The driver for the i.MX8M would use the samsung-dsim bridge without an
> > > > additional bridge.
> > > >
> > > > This allows the samsung-dsim driver to expose the standard behavior while the
> > > > exynos_dsi may stick to the existing behavior for the exynos_drm driver.
> > > >
> > > > I hope this makes sense and does not sound too crazy. It might be difficult to
> > > > get the probing and mipi host/device registration correct, but I will try, if
> > > > this can work.
> > >
> > > Adding two bridges for being able to support hotplugging adds many special
> > > cases to the bridge driver and still requires more custom API to correctly add
> > > the second bridge. I don't think that this a viable path to go.
> > 
> > Just jumping in here: You cannot hotplug/hotremove anything from a
> > drm_device after drm_dev_register has been called, except
> > drm_connector. I didn't dig into details here so not sure whether you
> > want to late-bind your bridge after drm_dev_register is called or not,
> > so might just be fyi and not relevant to the discussion.
> 
> Thanks. AFAIC that is exactly what is currently implemented in the exynos_drm
> driver (i.e. Option B)
> 
> exynos_dsi_bind configures the encoder and registers a DSI host. Afterwards,
> exynos_drm_bind (as component_master_ops) calls drm_dev_register. Later, a DSI
> device might attach to the DSI host and call exynos_dsi_host_attach. In
> exynos_dsi_host_attach, the driver finds the drm_bridge for the DSI device and
> attaches this bridge to the encoder _after_ drm_dev_register has been called.
> This is invalid behavior, right?

Definitely not supported, I don't think we have the right locks in place
to make sure this works.

Now if your _only_ adding a drm_bridge (and not an encoder or anything
like that), and you are adding the drm_connector correctly (like a
hotplugged DP MST sink), then that would at least work from a uapi pov.
Because drm_bridge isn't exposed as an uapi object.

But yeah, as-is, don't :-)

The solution here is a bunch of EPROBE_DEFER handling until all your
bridges are loaded, with or without the assistance of component.c
framework. Only then call drm_dev_register.
-Daniel

> 
> Michael
> 
> > -Daniel
> > 
> > >
> > > This leaves us with:
> > >
> > > Option A) Standard drm_bridge behavior, which is currently implemented, but
> > > incompatible with the currently expected behavior of exynos_drm.
> > >
> > > Option B) Creating the drm device without all bridges being attached, which
> > > would work with the exynos_drm driver, but breaks for the standard drm_bridge
> > > behavior, especially, if the encoder/connector is created at the beginning of
> > > the pipeline and passed downwards when the bridges are attached.
> > >
> > > Option C) Extracting only low level register accesses into shared code, adding
> > > a custom interface and implementing the drm_bridge handling in the platform
> > > specific code.
> > >
> > > None of the options really convinces me.
> > >
> > > Michael
> > > _______________________________________________
> > > dri-devel mailing list
> > > dri-devel@lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/dri-devel
Andrzej Hajda Feb. 4, 2021, 4:28 p.m. UTC | #9
W dniu 04.02.2021 o 17:05, Daniel Vetter pisze:
> On Thu, Feb 04, 2021 at 11:56:32AM +0100, Michael Tretter wrote:
>> On Thu, 04 Feb 2021 11:17:49 +0100, Daniel Vetter wrote:
>>> On Wed, Feb 3, 2021 at 9:32 PM Michael Tretter <m.tretter@pengutronix.de> wrote:
>>>> On Mon, 01 Feb 2021 17:33:14 +0100, Michael Tretter wrote:
>>>>> On Tue, 15 Sep 2020 21:40:40 +0200, Andrzej Hajda wrote:
>>>>>> W dniu 14.09.2020 o 23:19, Andrzej Hajda pisze:
>>>>>>> On 14.09.2020 22:01, Michael Tretter wrote:
>>>>>>>> On Mon, 14 Sep 2020 14:31:19 +0200, Marek Szyprowski wrote:
>>>>>>>>> On 14.09.2020 10:29, Marek Szyprowski wrote:
>>>>>>>>>> On 11.09.2020 15:54, Michael Tretter wrote:
>>>>>>>>>>> Make the exynos_dsi driver a full drm bridge that can be found and
>>>>>>>>>>> used
>>>>>>>>>>> from other drivers.
>>>>>>>>>>>
>>>>>>>>>>> Other drivers can only attach to the bridge, if a mipi dsi device
>>>>>>>>>>> already attached to the bridge. This allows to defer the probe of the
>>>>>>>>>>> display pipe until the downstream bridges are available, too.
>>>>>>>>>>>
>>>>>>>>>>> Signed-off-by: Michael Tretter <m.tretter@pengutronix.de>
>>>>>>>>>> This one (and the whole series applied) still fails on Exynos boards:
>>>>>>>>>>
>>>>>>>>>> [drm] Exynos DRM: using 11c00000.fimd device for DMA mapping
>>>>>>>>>> operations
>>>>>>>>>> exynos-drm exynos-drm: bound 11c00000.fimd (ops fimd_component_ops)
>>>>>>>>>> OF: graph: no port node found in /soc/dsi@11c80000
>>>>>>>>>> 8<--- cut here ---
>>>>>>>>>> Unable to handle kernel NULL pointer dereference at virtual address
>>>>>>>>>> 00000084
>>>>>>>>>> pgd = (ptrval)
>>>>>>>>>> [00000084] *pgd=00000000
>>>>>>>>>> Internal error: Oops: 5 [#1] PREEMPT SMP ARM
>>>>>>>>>> Modules linked in:
>>>>>>>>>> CPU: 1 PID: 1 Comm: swapper/0 Not tainted
>>>>>>>>>> 5.9.0-rc4-next-20200911-00010-g417dc70d70ec #1608
>>>>>>>>>> Hardware name: Samsung Exynos (Flattened Device Tree)
>>>>>>>>>> PC is at drm_bridge_attach+0x18/0x164
>>>>>>>>>> LR is at exynos_dsi_bind+0x88/0xa8
>>>>>>>>>> pc : [<c0628c08>]    lr : [<c064d560>]    psr: 20000013
>>>>>>>>>> sp : ef0dfca8  ip : 00000002  fp : c13190e0
>>>>>>>>>> r10: 00000000  r9 : ee46d580  r8 : c13190e0
>>>>>>>>>> r7 : ee438800  r6 : 00000018  r5 : ef253810  r4 : ef39e840
>>>>>>>>>> r3 : 00000000  r2 : 00000018  r1 : ef39e888  r0 : ef39e840
>>>>>>>>>> Flags: nzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
>>>>>>>>>> Control: 10c5387d  Table: 4000404a  DAC: 00000051
>>>>>>>>>> Process swapper/0 (pid: 1, stack limit = 0x(ptrval))
>>>>>>>>>> Stack: (0xef0dfca8 to 0xef0e0000)
>>>>>>>>>> ...
>>>>>>>>>> [<c0628c08>] (drm_bridge_attach) from [<c064d560>]
>>>>>>>>>> (exynos_dsi_bind+0x88/0xa8)
>>>>>>>>>> [<c064d560>] (exynos_dsi_bind) from [<c066a800>]
>>>>>>>>>> (component_bind_all+0xfc/0x290)
>>>>>>>>>> [<c066a800>] (component_bind_all) from [<c0649dc0>]
>>>>>>>>>> (exynos_drm_bind+0xe4/0x19c)
>>>>>>>>>> [<c0649dc0>] (exynos_drm_bind) from [<c066ad74>]
>>>>>>>>>> (try_to_bring_up_master+0x1e4/0x2c4)
>>>>>>>>>> [<c066ad74>] (try_to_bring_up_master) from [<c066b2b4>]
>>>>>>>>>> (component_master_add_with_match+0xd4/0x108)
>>>>>>>>>> [<c066b2b4>] (component_master_add_with_match) from [<c0649ae8>]
>>>>>>>>>> (exynos_drm_platform_probe+0xe4/0x110)
>>>>>>>>>> [<c0649ae8>] (exynos_drm_platform_probe) from [<c0674e6c>]
>>>>>>>>>> (platform_drv_probe+0x6c/0xa4)
>>>>>>>>>> [<c0674e6c>] (platform_drv_probe) from [<c067242c>]
>>>>>>>>>> (really_probe+0x200/0x4fc)
>>>>>>>>>> [<c067242c>] (really_probe) from [<c06728f0>]
>>>>>>>>>> (driver_probe_device+0x78/0x1fc)
>>>>>>>>>> [<c06728f0>] (driver_probe_device) from [<c0672cd8>]
>>>>>>>>>> (device_driver_attach+0x58/0x60)
>>>>>>>>>> [<c0672cd8>] (device_driver_attach) from [<c0672dbc>]
>>>>>>>>>> (__driver_attach+0xdc/0x174)
>>>>>>>>>> [<c0672dbc>] (__driver_attach) from [<c06701b4>]
>>>>>>>>>> (bus_for_each_dev+0x68/0xb4)
>>>>>>>>>> [<c06701b4>] (bus_for_each_dev) from [<c06714e8>]
>>>>>>>>>> (bus_add_driver+0x158/0x214)
>>>>>>>>>> [<c06714e8>] (bus_add_driver) from [<c0673c1c>]
>>>>>>>>>> (driver_register+0x78/0x110)
>>>>>>>>>> [<c0673c1c>] (driver_register) from [<c0649ca8>]
>>>>>>>>>> (exynos_drm_init+0xe4/0x118)
>>>>>>>>>> [<c0649ca8>] (exynos_drm_init) from [<c0102484>]
>>>>>>>>>> (do_one_initcall+0x8c/0x42c)
>>>>>>>>>> [<c0102484>] (do_one_initcall) from [<c11011c0>]
>>>>>>>>>> (kernel_init_freeable+0x190/0x1dc)
>>>>>>>>>> [<c11011c0>] (kernel_init_freeable) from [<c0af7880>]
>>>>>>>>>> (kernel_init+0x8/0x118)
>>>>>>>>>> [<c0af7880>] (kernel_init) from [<c0100114>] (ret_from_fork+0x14/0x20)
>>>>>>>>>> Exception stack(0xef0dffb0 to 0xef0dfff8)
>>>>>>>>>> ...
>>>>>>>>>> ---[ end trace ee27f313f9ed9da1 ]---
>>>>>>>>>>
>>>>>>>>>> # arm-linux-gnueabi-addr2line -e vmlinux c0628c08
>>>>>>>>>> drivers/gpu/drm/drm_bridge.c:184 (discriminator 1)
>>>>>>>>>>
>>>>>>>>>> I will try to debug it a bit more today.
>>>>>>>>> The above crash has been caused by lack of in_bridge initialization to
>>>>>>>>> NULL in exynos_dsi_bind() in this patch. However, fixing it reveals
>>>>>>>>> another issue:
>>>>>>>>>
>>>>>>>>> [drm] Exynos DRM: using 11c00000.fimd device for DMA mapping operations
>>>>>>>>> exynos-drm exynos-drm: bound 11c00000.fimd (ops fimd_component_ops)
>>>>>>>>> OF: graph: no port node found in /soc/dsi@11c80000
>>>>>>>>> 8<--- cut here ---
>>>>>>>>> Unable to handle kernel NULL pointer dereference at virtual address
>>>>>>>>> 00000280
>>>>>>>>> pgd = (ptrval)
>>>>>>>>> [00000280] *pgd=00000000
>>>>>>>>> Internal error: Oops: 5 [#1] PREEMPT SMP ARM
>>>>>>>>> Modules linked in:
>>>>>>>>> CPU: 0 PID: 1 Comm: swapper/0 Not tainted
>>>>>>>>> 5.9.0-rc4-next-20200911-00010-g417dc70d70ec-dirty #1613
>>>>>>>>> Hardware name: Samsung Exynos (Flattened Device Tree)
>>>>>>>>> PC is at __mutex_lock+0x54/0xb18
>>>>>>>>> LR is at lock_is_held_type+0x80/0x138
>>>>>>>>> pc : [<c0afc920>]    lr : [<c0af63e8>]    psr: 60000013
>>>>>>>>> sp : ef0dfd30  ip : 33937b74  fp : c13193c8
>>>>>>>>> r10: c1208eec  r9 : 00000000  r8 : ee45f808
>>>>>>>>> r7 : c19561a4  r6 : 00000000  r5 : 00000000  r4 : 0000024c
>>>>>>>>> r3 : 00000000  r2 : 00204140  r1 : c124f13c  r0 : 00000000
>>>>>>>>> Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
>>>>>>>>> Control: 10c5387d  Table: 4000404a  DAC: 00000051
>>>>>>>>> Process swapper/0 (pid: 1, stack limit = 0x(ptrval))
>>>>>>>>> Stack: (0xef0dfd30 to 0xef0e0000)
>>>>>>>>> ...
>>>>>>>>> [<c0afc920>] (__mutex_lock) from [<c0afd400>]
>>>>>>>>> (mutex_lock_nested+0x1c/0x24)
>>>>>>>>> [<c0afd400>] (mutex_lock_nested) from [<c064d4b8>]
>>>>>>>>> (__exynos_dsi_host_attach+0x20/0x6c)
>>>>>>>>> [<c064d4b8>] (__exynos_dsi_host_attach) from [<c064d914>]
>>>>>>>>> (exynos_dsi_host_attach+0x70/0x194)
>>>>>>>>> [<c064d914>] (exynos_dsi_host_attach) from [<c0656b64>]
>>>>>>>>> (s6e8aa0_probe+0x1b0/0x218)
>>>>>>>>> [<c0656b64>] (s6e8aa0_probe) from [<c0672530>]
>>>>>>>>> (really_probe+0x200/0x4fc)
>>>>>>>>> [<c0672530>] (really_probe) from [<c06729f4>]
>>>>>>>>> (driver_probe_device+0x78/0x1fc)
>>>>>>>>> [<c06729f4>] (driver_probe_device) from [<c0672ddc>]
>>>>>>>>> (device_driver_attach+0x58/0x60)
>>>>>>>>> [<c0672ddc>] (device_driver_attach) from [<c0672ec0>]
>>>>>>>>> (__driver_attach+0xdc/0x174)
>>>>>>>>> [<c0672ec0>] (__driver_attach) from [<c06702b8>]
>>>>>>>>> (bus_for_each_dev+0x68/0xb4)
>>>>>>>>> [<c06702b8>] (bus_for_each_dev) from [<c06715ec>]
>>>>>>>>> (bus_add_driver+0x158/0x214)
>>>>>>>>> [<c06715ec>] (bus_add_driver) from [<c0673d20>]
>>>>>>>>> (driver_register+0x78/0x110)
>>>>>>>>> [<c0673d20>] (driver_register) from [<c0102484>]
>>>>>>>>> (do_one_initcall+0x8c/0x42c)
>>>>>>>>> [<c0102484>] (do_one_initcall) from [<c11011c0>]
>>>>>>>>> (kernel_init_freeable+0x190/0x1dc)
>>>>>>>>> [<c11011c0>] (kernel_init_freeable) from [<c0af7988>]
>>>>>>>>> (kernel_init+0x8/0x118)
>>>>>>>>> [<c0af7988>] (kernel_init) from [<c0100114>] (ret_from_fork+0x14/0x20)
>>>>>>>>> Exception stack(0xef0dffb0 to 0xef0dfff8)
>>>>>>>>> ...
>>>>>>>>> ---[ end trace c06e996ec2e8234d ]---
>>>>>>>>>
>>>>>>>>> This means that dsi->encoder.dev is not initialized in
>>>>>>>>> __exynos_dsi_host_attach().
>>>>>>>>>
>>>>>>>>> This happens, because drm_bridge_attach() in exynos_dsi_bind() returned
>>>>>>>>> earlier -517 (deferred probe), what causes cleanup of encoder and
>>>>>>>>> release of all drm resources.
>>>>>>>>>
>>>>>>>>> Then however, the panel tries to register itself and
>>>>>>>>> exynos_dsi_host_attach() tries to access the released encoder (which is
>>>>>>>>> zeroed in drm_encoder_release) and rest of resources, what causes
>>>>>>>>> failure.
>>>>>>>>>
>>>>>>>>> It looks that something is missing. Maybe mipi host has to be
>>>>>>>>> registered
>>>>>>>>> later, when bridge is ready? I have no idea how it is handled before
>>>>>>>>> this patch. Andrzej, could you comment it a bit?
>>>>>>>> I intentionally changed the order, because if another bridge follows
>>>>>>>> in the
>>>>>>>> pipeline, the probe of the drm driver has to be deferred until some
>>>>>>>> bridge
>>>>>>>> provides a connector. The next bridge registers itself via the
>>>>>>>> host_attach
>>>>>>>> function and the deferral is ensured via the bind for the bind/unbind
>>>>>>>> API or
>>>>>>>> the bridge_attach function otherwise.
>>>>>>>>
>>>>>>>> On the other hand, the bridge does not have an encoder until the mipi
>>>>>>>> device
>>>>>>>> has been attached.
>>>>>>>>
>>>>>>>> As a solution, the exynos dsi driver must initialize the encoder in
>>>>>>>> exynos_dsi_probe instead of in exynos_dsi_bind and access the encoder
>>>>>>>> via
>>>>>>>> exynos_dsi instead of the bridge.
>>>>>>>>
>>>>>>>> Can you try to move everything except samsung_dsim_bind from
>>>>>>>> exynos_dsi_bind
>>>>>>>> to exynos_dsi_probe (respectively for unbind) and report if it fixes the
>>>>>>>> crash.
>>>>>>>
>>>>>>> The original behaviour is that encoder (exynos_dsi) is registered
>>>>>>> regardless of sink presence (initially panel, later also bridge) - it
>>>>>>> avoids multiple issues with deferred probe, device driver bind/unbind
>>>>>>> and module load/unload. Appearance or disappearance of sink is
>>>>>>> reported to host nicely via DSI attach/detach callbacks - and it is
>>>>>>> reflected in drm world as change state of the connector.
>>>>>>>
>>>>>>> Registering DSI host in bind and unregistering in unbind assures that
>>>>>>> if mipi_dsi device is attached/detached the drm device is always
>>>>>>> present - it makes device/driver binding race free and allows to avoid
>>>>>>> additional locking.
>>>>>>>
>>>>>>> Moving DSI host registration to probe changes everything, for sure it
>>>>>>> breaks the nice feature of DSI attach/detach callbacks and apparently
>>>>>>> can cause different issues depending on device bind order.
>>>>>>>
>>>>>>> I will try to look at the patches tomorrow and maybe I can find more
>>>>>>> constructive comments :)
>>>>>>
>>>>>> As I said yesterday, exynos_dsi driver uses dsi host attach/detach
>>>>>> callbacks to control appearance/disappearance of downstream device. It
>>>>>> allows to:
>>>>>>
>>>>>> 1. Safely bind/unbind different device drivers at any time and at any
>>>>>> order, without killing exynos_drm and/or crashing system.
>>>>>>
>>>>>> 2. Avoid issues with late drm init - on some platforms exynos_drm device
>>>>>> appeared too late, due to deferred probe, and resulted in black screen
>>>>>> in userspace.
>>>>>>
>>>>>>
>>>>>> Now if we want to convert exynos_dsi to drm_bridge I see following options:
>>>>>>
>>>>>> A. Forgot about callbacks and make the exynos_drm to defer probing until
>>>>>> exynos_dsi bridge is available, probably it will cause later exynos_drm
>>>>>> appearance, thus probably black screen on some targets. So for sure it
>>>>>> will be suboptimal. Making it bridge unbind safe would be another
>>>>>> problem, but most developers do not care about it so why should we? :)
>>>>>>
>>>>>> B. Try to mimic current behaviour - exynos_dsi register bridge ASAP,
>>>>>> even if downstream devices are not yet attached, on attach/detach notify
>>>>>> drm about it via connector status change, for this dsi_host registration
>>>>>> should be performed from drm_bridge attach, I guess.
>>>>>>
>>>>>>
>>>>>> Option A is more standard, but is unsafe and causes other issues.
>>>>>>
>>>>>> Option B keeps current behaviour.
>>>>> Maybe we can have both, but I am not sure, if I am missing something:
>>>>>
>>>>> I still prefer option A for the samsung-dsim driver, because it is more
>>>>> standard, simpler and avoids issues with encoders, connectors or handling
>>>>> hotplug.
>>>>>
>>>>> The idea is to use two bridges in the exynos-dsi driver: One bridge in the
>>>>> samsung-dsim driver which implements option A and defers probing of the drm
>>>>> driver until the next bridge is attached. And a second bridge in the
>>>>> exynos_dsi that attaches to the first bridge (thus, allowing the exynos_drm
>>>>> device to appear) and implements the hotplug handling for notifying drm via
>>>>> connector status change.
>>>>>
>>>>> The driver for the i.MX8M would use the samsung-dsim bridge without an
>>>>> additional bridge.
>>>>>
>>>>> This allows the samsung-dsim driver to expose the standard behavior while the
>>>>> exynos_dsi may stick to the existing behavior for the exynos_drm driver.
>>>>>
>>>>> I hope this makes sense and does not sound too crazy. It might be difficult to
>>>>> get the probing and mipi host/device registration correct, but I will try, if
>>>>> this can work.
>>>> Adding two bridges for being able to support hotplugging adds many special
>>>> cases to the bridge driver and still requires more custom API to correctly add
>>>> the second bridge. I don't think that this a viable path to go.
>>> Just jumping in here: You cannot hotplug/hotremove anything from a
>>> drm_device after drm_dev_register has been called, except
>>> drm_connector. I didn't dig into details here so not sure whether you
>>> want to late-bind your bridge after drm_dev_register is called or not,
>>> so might just be fyi and not relevant to the discussion.
>> Thanks. AFAIC that is exactly what is currently implemented in the exynos_drm
>> driver (i.e. Option B)
>>
>> exynos_dsi_bind configures the encoder and registers a DSI host. Afterwards,
>> exynos_drm_bind (as component_master_ops) calls drm_dev_register. Later, a DSI
>> device might attach to the DSI host and call exynos_dsi_host_attach. In
>> exynos_dsi_host_attach, the driver finds the drm_bridge for the DSI device and
>> attaches this bridge to the encoder _after_ drm_dev_register has been called.
>> This is invalid behavior, right?
> Definitely not supported, I don't think we have the right locks in place
> to make sure this works.
>
> Now if your _only_ adding a drm_bridge (and not an encoder or anything
> like that), and you are adding the drm_connector correctly (like a
> hotplugged DP MST sink), then that would at least work from a uapi pov.
> Because drm_bridge isn't exposed as an uapi object.
>
> But yeah, as-is, don't :-)
>
> The solution here is a bunch of EPROBE_DEFER handling until all your
> bridges are loaded, with or without the assistance of component.c
> framework. Only then call drm_dev_register.
> -Daniel


I have impression we have similar conversation already.

As you stated drm_bridge and drm_panel are not exposed to userspace so 
there shouldn't be problem with them from uapi PoV.

On the other side drm_panel or drm_bridge are not used until pipeline 
enters connected state (at least they were not some time ago :) ). The 
issue is that bridge exposes drm_connector, but as you stated (again :) 
) connectors can be hotplugged, so in theory it should work. Practical 
tests shows that it also works, but bugs can be still there.

Bunch of EPROBE_DEFER was very slow (as a result userspace timeouted and 
decided there is no display), and does not handle unbinding/re-binding 
drivers.


Regards

Andrzej


>
>> Michael
>>
>>> -Daniel
>>>
>>>> This leaves us with:
>>>>
>>>> Option A) Standard drm_bridge behavior, which is currently implemented, but
>>>> incompatible with the currently expected behavior of exynos_drm.
>>>>
>>>> Option B) Creating the drm device without all bridges being attached, which
>>>> would work with the exynos_drm driver, but breaks for the standard drm_bridge
>>>> behavior, especially, if the encoder/connector is created at the beginning of
>>>> the pipeline and passed downwards when the bridges are attached.
>>>>
>>>> Option C) Extracting only low level register accesses into shared code, adding
>>>> a custom interface and implementing the drm_bridge handling in the platform
>>>> specific code.
>>>>
>>>> None of the options really convinces me.
>>>>
>>>> Michael
>>>> _______________________________________________
>>>> dri-devel mailing list
>>>> dri-devel@lists.freedesktop.org
>>>> https://protect2.fireeye.com/v1/url?k=9b91fda0-c40ac4d5-9b9076ef-0cc47a31ce4e-a7f88871f6fd42d4&q=1&e=f2cfade9-8ecf-4697-88b3-69d93ed38361&u=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fdri-devel
Daniel Vetter Feb. 4, 2021, 5:19 p.m. UTC | #10
On Thu, Feb 4, 2021 at 5:28 PM Andrzej Hajda <a.hajda@samsung.com> wrote:
>
>
> W dniu 04.02.2021 o 17:05, Daniel Vetter pisze:
> > On Thu, Feb 04, 2021 at 11:56:32AM +0100, Michael Tretter wrote:
> >> On Thu, 04 Feb 2021 11:17:49 +0100, Daniel Vetter wrote:
> >>> On Wed, Feb 3, 2021 at 9:32 PM Michael Tretter <m.tretter@pengutronix.de> wrote:
> >>>> On Mon, 01 Feb 2021 17:33:14 +0100, Michael Tretter wrote:
> >>>>> On Tue, 15 Sep 2020 21:40:40 +0200, Andrzej Hajda wrote:
> >>>>>> W dniu 14.09.2020 o 23:19, Andrzej Hajda pisze:
> >>>>>>> On 14.09.2020 22:01, Michael Tretter wrote:
> >>>>>>>> On Mon, 14 Sep 2020 14:31:19 +0200, Marek Szyprowski wrote:
> >>>>>>>>> On 14.09.2020 10:29, Marek Szyprowski wrote:
> >>>>>>>>>> On 11.09.2020 15:54, Michael Tretter wrote:
> >>>>>>>>>>> Make the exynos_dsi driver a full drm bridge that can be found and
> >>>>>>>>>>> used
> >>>>>>>>>>> from other drivers.
> >>>>>>>>>>>
> >>>>>>>>>>> Other drivers can only attach to the bridge, if a mipi dsi device
> >>>>>>>>>>> already attached to the bridge. This allows to defer the probe of the
> >>>>>>>>>>> display pipe until the downstream bridges are available, too.
> >>>>>>>>>>>
> >>>>>>>>>>> Signed-off-by: Michael Tretter <m.tretter@pengutronix.de>
> >>>>>>>>>> This one (and the whole series applied) still fails on Exynos boards:
> >>>>>>>>>>
> >>>>>>>>>> [drm] Exynos DRM: using 11c00000.fimd device for DMA mapping
> >>>>>>>>>> operations
> >>>>>>>>>> exynos-drm exynos-drm: bound 11c00000.fimd (ops fimd_component_ops)
> >>>>>>>>>> OF: graph: no port node found in /soc/dsi@11c80000
> >>>>>>>>>> 8<--- cut here ---
> >>>>>>>>>> Unable to handle kernel NULL pointer dereference at virtual address
> >>>>>>>>>> 00000084
> >>>>>>>>>> pgd = (ptrval)
> >>>>>>>>>> [00000084] *pgd=00000000
> >>>>>>>>>> Internal error: Oops: 5 [#1] PREEMPT SMP ARM
> >>>>>>>>>> Modules linked in:
> >>>>>>>>>> CPU: 1 PID: 1 Comm: swapper/0 Not tainted
> >>>>>>>>>> 5.9.0-rc4-next-20200911-00010-g417dc70d70ec #1608
> >>>>>>>>>> Hardware name: Samsung Exynos (Flattened Device Tree)
> >>>>>>>>>> PC is at drm_bridge_attach+0x18/0x164
> >>>>>>>>>> LR is at exynos_dsi_bind+0x88/0xa8
> >>>>>>>>>> pc : [<c0628c08>]    lr : [<c064d560>]    psr: 20000013
> >>>>>>>>>> sp : ef0dfca8  ip : 00000002  fp : c13190e0
> >>>>>>>>>> r10: 00000000  r9 : ee46d580  r8 : c13190e0
> >>>>>>>>>> r7 : ee438800  r6 : 00000018  r5 : ef253810  r4 : ef39e840
> >>>>>>>>>> r3 : 00000000  r2 : 00000018  r1 : ef39e888  r0 : ef39e840
> >>>>>>>>>> Flags: nzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
> >>>>>>>>>> Control: 10c5387d  Table: 4000404a  DAC: 00000051
> >>>>>>>>>> Process swapper/0 (pid: 1, stack limit = 0x(ptrval))
> >>>>>>>>>> Stack: (0xef0dfca8 to 0xef0e0000)
> >>>>>>>>>> ...
> >>>>>>>>>> [<c0628c08>] (drm_bridge_attach) from [<c064d560>]
> >>>>>>>>>> (exynos_dsi_bind+0x88/0xa8)
> >>>>>>>>>> [<c064d560>] (exynos_dsi_bind) from [<c066a800>]
> >>>>>>>>>> (component_bind_all+0xfc/0x290)
> >>>>>>>>>> [<c066a800>] (component_bind_all) from [<c0649dc0>]
> >>>>>>>>>> (exynos_drm_bind+0xe4/0x19c)
> >>>>>>>>>> [<c0649dc0>] (exynos_drm_bind) from [<c066ad74>]
> >>>>>>>>>> (try_to_bring_up_master+0x1e4/0x2c4)
> >>>>>>>>>> [<c066ad74>] (try_to_bring_up_master) from [<c066b2b4>]
> >>>>>>>>>> (component_master_add_with_match+0xd4/0x108)
> >>>>>>>>>> [<c066b2b4>] (component_master_add_with_match) from [<c0649ae8>]
> >>>>>>>>>> (exynos_drm_platform_probe+0xe4/0x110)
> >>>>>>>>>> [<c0649ae8>] (exynos_drm_platform_probe) from [<c0674e6c>]
> >>>>>>>>>> (platform_drv_probe+0x6c/0xa4)
> >>>>>>>>>> [<c0674e6c>] (platform_drv_probe) from [<c067242c>]
> >>>>>>>>>> (really_probe+0x200/0x4fc)
> >>>>>>>>>> [<c067242c>] (really_probe) from [<c06728f0>]
> >>>>>>>>>> (driver_probe_device+0x78/0x1fc)
> >>>>>>>>>> [<c06728f0>] (driver_probe_device) from [<c0672cd8>]
> >>>>>>>>>> (device_driver_attach+0x58/0x60)
> >>>>>>>>>> [<c0672cd8>] (device_driver_attach) from [<c0672dbc>]
> >>>>>>>>>> (__driver_attach+0xdc/0x174)
> >>>>>>>>>> [<c0672dbc>] (__driver_attach) from [<c06701b4>]
> >>>>>>>>>> (bus_for_each_dev+0x68/0xb4)
> >>>>>>>>>> [<c06701b4>] (bus_for_each_dev) from [<c06714e8>]
> >>>>>>>>>> (bus_add_driver+0x158/0x214)
> >>>>>>>>>> [<c06714e8>] (bus_add_driver) from [<c0673c1c>]
> >>>>>>>>>> (driver_register+0x78/0x110)
> >>>>>>>>>> [<c0673c1c>] (driver_register) from [<c0649ca8>]
> >>>>>>>>>> (exynos_drm_init+0xe4/0x118)
> >>>>>>>>>> [<c0649ca8>] (exynos_drm_init) from [<c0102484>]
> >>>>>>>>>> (do_one_initcall+0x8c/0x42c)
> >>>>>>>>>> [<c0102484>] (do_one_initcall) from [<c11011c0>]
> >>>>>>>>>> (kernel_init_freeable+0x190/0x1dc)
> >>>>>>>>>> [<c11011c0>] (kernel_init_freeable) from [<c0af7880>]
> >>>>>>>>>> (kernel_init+0x8/0x118)
> >>>>>>>>>> [<c0af7880>] (kernel_init) from [<c0100114>] (ret_from_fork+0x14/0x20)
> >>>>>>>>>> Exception stack(0xef0dffb0 to 0xef0dfff8)
> >>>>>>>>>> ...
> >>>>>>>>>> ---[ end trace ee27f313f9ed9da1 ]---
> >>>>>>>>>>
> >>>>>>>>>> # arm-linux-gnueabi-addr2line -e vmlinux c0628c08
> >>>>>>>>>> drivers/gpu/drm/drm_bridge.c:184 (discriminator 1)
> >>>>>>>>>>
> >>>>>>>>>> I will try to debug it a bit more today.
> >>>>>>>>> The above crash has been caused by lack of in_bridge initialization to
> >>>>>>>>> NULL in exynos_dsi_bind() in this patch. However, fixing it reveals
> >>>>>>>>> another issue:
> >>>>>>>>>
> >>>>>>>>> [drm] Exynos DRM: using 11c00000.fimd device for DMA mapping operations
> >>>>>>>>> exynos-drm exynos-drm: bound 11c00000.fimd (ops fimd_component_ops)
> >>>>>>>>> OF: graph: no port node found in /soc/dsi@11c80000
> >>>>>>>>> 8<--- cut here ---
> >>>>>>>>> Unable to handle kernel NULL pointer dereference at virtual address
> >>>>>>>>> 00000280
> >>>>>>>>> pgd = (ptrval)
> >>>>>>>>> [00000280] *pgd=00000000
> >>>>>>>>> Internal error: Oops: 5 [#1] PREEMPT SMP ARM
> >>>>>>>>> Modules linked in:
> >>>>>>>>> CPU: 0 PID: 1 Comm: swapper/0 Not tainted
> >>>>>>>>> 5.9.0-rc4-next-20200911-00010-g417dc70d70ec-dirty #1613
> >>>>>>>>> Hardware name: Samsung Exynos (Flattened Device Tree)
> >>>>>>>>> PC is at __mutex_lock+0x54/0xb18
> >>>>>>>>> LR is at lock_is_held_type+0x80/0x138
> >>>>>>>>> pc : [<c0afc920>]    lr : [<c0af63e8>]    psr: 60000013
> >>>>>>>>> sp : ef0dfd30  ip : 33937b74  fp : c13193c8
> >>>>>>>>> r10: c1208eec  r9 : 00000000  r8 : ee45f808
> >>>>>>>>> r7 : c19561a4  r6 : 00000000  r5 : 00000000  r4 : 0000024c
> >>>>>>>>> r3 : 00000000  r2 : 00204140  r1 : c124f13c  r0 : 00000000
> >>>>>>>>> Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
> >>>>>>>>> Control: 10c5387d  Table: 4000404a  DAC: 00000051
> >>>>>>>>> Process swapper/0 (pid: 1, stack limit = 0x(ptrval))
> >>>>>>>>> Stack: (0xef0dfd30 to 0xef0e0000)
> >>>>>>>>> ...
> >>>>>>>>> [<c0afc920>] (__mutex_lock) from [<c0afd400>]
> >>>>>>>>> (mutex_lock_nested+0x1c/0x24)
> >>>>>>>>> [<c0afd400>] (mutex_lock_nested) from [<c064d4b8>]
> >>>>>>>>> (__exynos_dsi_host_attach+0x20/0x6c)
> >>>>>>>>> [<c064d4b8>] (__exynos_dsi_host_attach) from [<c064d914>]
> >>>>>>>>> (exynos_dsi_host_attach+0x70/0x194)
> >>>>>>>>> [<c064d914>] (exynos_dsi_host_attach) from [<c0656b64>]
> >>>>>>>>> (s6e8aa0_probe+0x1b0/0x218)
> >>>>>>>>> [<c0656b64>] (s6e8aa0_probe) from [<c0672530>]
> >>>>>>>>> (really_probe+0x200/0x4fc)
> >>>>>>>>> [<c0672530>] (really_probe) from [<c06729f4>]
> >>>>>>>>> (driver_probe_device+0x78/0x1fc)
> >>>>>>>>> [<c06729f4>] (driver_probe_device) from [<c0672ddc>]
> >>>>>>>>> (device_driver_attach+0x58/0x60)
> >>>>>>>>> [<c0672ddc>] (device_driver_attach) from [<c0672ec0>]
> >>>>>>>>> (__driver_attach+0xdc/0x174)
> >>>>>>>>> [<c0672ec0>] (__driver_attach) from [<c06702b8>]
> >>>>>>>>> (bus_for_each_dev+0x68/0xb4)
> >>>>>>>>> [<c06702b8>] (bus_for_each_dev) from [<c06715ec>]
> >>>>>>>>> (bus_add_driver+0x158/0x214)
> >>>>>>>>> [<c06715ec>] (bus_add_driver) from [<c0673d20>]
> >>>>>>>>> (driver_register+0x78/0x110)
> >>>>>>>>> [<c0673d20>] (driver_register) from [<c0102484>]
> >>>>>>>>> (do_one_initcall+0x8c/0x42c)
> >>>>>>>>> [<c0102484>] (do_one_initcall) from [<c11011c0>]
> >>>>>>>>> (kernel_init_freeable+0x190/0x1dc)
> >>>>>>>>> [<c11011c0>] (kernel_init_freeable) from [<c0af7988>]
> >>>>>>>>> (kernel_init+0x8/0x118)
> >>>>>>>>> [<c0af7988>] (kernel_init) from [<c0100114>] (ret_from_fork+0x14/0x20)
> >>>>>>>>> Exception stack(0xef0dffb0 to 0xef0dfff8)
> >>>>>>>>> ...
> >>>>>>>>> ---[ end trace c06e996ec2e8234d ]---
> >>>>>>>>>
> >>>>>>>>> This means that dsi->encoder.dev is not initialized in
> >>>>>>>>> __exynos_dsi_host_attach().
> >>>>>>>>>
> >>>>>>>>> This happens, because drm_bridge_attach() in exynos_dsi_bind() returned
> >>>>>>>>> earlier -517 (deferred probe), what causes cleanup of encoder and
> >>>>>>>>> release of all drm resources.
> >>>>>>>>>
> >>>>>>>>> Then however, the panel tries to register itself and
> >>>>>>>>> exynos_dsi_host_attach() tries to access the released encoder (which is
> >>>>>>>>> zeroed in drm_encoder_release) and rest of resources, what causes
> >>>>>>>>> failure.
> >>>>>>>>>
> >>>>>>>>> It looks that something is missing. Maybe mipi host has to be
> >>>>>>>>> registered
> >>>>>>>>> later, when bridge is ready? I have no idea how it is handled before
> >>>>>>>>> this patch. Andrzej, could you comment it a bit?
> >>>>>>>> I intentionally changed the order, because if another bridge follows
> >>>>>>>> in the
> >>>>>>>> pipeline, the probe of the drm driver has to be deferred until some
> >>>>>>>> bridge
> >>>>>>>> provides a connector. The next bridge registers itself via the
> >>>>>>>> host_attach
> >>>>>>>> function and the deferral is ensured via the bind for the bind/unbind
> >>>>>>>> API or
> >>>>>>>> the bridge_attach function otherwise.
> >>>>>>>>
> >>>>>>>> On the other hand, the bridge does not have an encoder until the mipi
> >>>>>>>> device
> >>>>>>>> has been attached.
> >>>>>>>>
> >>>>>>>> As a solution, the exynos dsi driver must initialize the encoder in
> >>>>>>>> exynos_dsi_probe instead of in exynos_dsi_bind and access the encoder
> >>>>>>>> via
> >>>>>>>> exynos_dsi instead of the bridge.
> >>>>>>>>
> >>>>>>>> Can you try to move everything except samsung_dsim_bind from
> >>>>>>>> exynos_dsi_bind
> >>>>>>>> to exynos_dsi_probe (respectively for unbind) and report if it fixes the
> >>>>>>>> crash.
> >>>>>>>
> >>>>>>> The original behaviour is that encoder (exynos_dsi) is registered
> >>>>>>> regardless of sink presence (initially panel, later also bridge) - it
> >>>>>>> avoids multiple issues with deferred probe, device driver bind/unbind
> >>>>>>> and module load/unload. Appearance or disappearance of sink is
> >>>>>>> reported to host nicely via DSI attach/detach callbacks - and it is
> >>>>>>> reflected in drm world as change state of the connector.
> >>>>>>>
> >>>>>>> Registering DSI host in bind and unregistering in unbind assures that
> >>>>>>> if mipi_dsi device is attached/detached the drm device is always
> >>>>>>> present - it makes device/driver binding race free and allows to avoid
> >>>>>>> additional locking.
> >>>>>>>
> >>>>>>> Moving DSI host registration to probe changes everything, for sure it
> >>>>>>> breaks the nice feature of DSI attach/detach callbacks and apparently
> >>>>>>> can cause different issues depending on device bind order.
> >>>>>>>
> >>>>>>> I will try to look at the patches tomorrow and maybe I can find more
> >>>>>>> constructive comments :)
> >>>>>>
> >>>>>> As I said yesterday, exynos_dsi driver uses dsi host attach/detach
> >>>>>> callbacks to control appearance/disappearance of downstream device. It
> >>>>>> allows to:
> >>>>>>
> >>>>>> 1. Safely bind/unbind different device drivers at any time and at any
> >>>>>> order, without killing exynos_drm and/or crashing system.
> >>>>>>
> >>>>>> 2. Avoid issues with late drm init - on some platforms exynos_drm device
> >>>>>> appeared too late, due to deferred probe, and resulted in black screen
> >>>>>> in userspace.
> >>>>>>
> >>>>>>
> >>>>>> Now if we want to convert exynos_dsi to drm_bridge I see following options:
> >>>>>>
> >>>>>> A. Forgot about callbacks and make the exynos_drm to defer probing until
> >>>>>> exynos_dsi bridge is available, probably it will cause later exynos_drm
> >>>>>> appearance, thus probably black screen on some targets. So for sure it
> >>>>>> will be suboptimal. Making it bridge unbind safe would be another
> >>>>>> problem, but most developers do not care about it so why should we? :)
> >>>>>>
> >>>>>> B. Try to mimic current behaviour - exynos_dsi register bridge ASAP,
> >>>>>> even if downstream devices are not yet attached, on attach/detach notify
> >>>>>> drm about it via connector status change, for this dsi_host registration
> >>>>>> should be performed from drm_bridge attach, I guess.
> >>>>>>
> >>>>>>
> >>>>>> Option A is more standard, but is unsafe and causes other issues.
> >>>>>>
> >>>>>> Option B keeps current behaviour.
> >>>>> Maybe we can have both, but I am not sure, if I am missing something:
> >>>>>
> >>>>> I still prefer option A for the samsung-dsim driver, because it is more
> >>>>> standard, simpler and avoids issues with encoders, connectors or handling
> >>>>> hotplug.
> >>>>>
> >>>>> The idea is to use two bridges in the exynos-dsi driver: One bridge in the
> >>>>> samsung-dsim driver which implements option A and defers probing of the drm
> >>>>> driver until the next bridge is attached. And a second bridge in the
> >>>>> exynos_dsi that attaches to the first bridge (thus, allowing the exynos_drm
> >>>>> device to appear) and implements the hotplug handling for notifying drm via
> >>>>> connector status change.
> >>>>>
> >>>>> The driver for the i.MX8M would use the samsung-dsim bridge without an
> >>>>> additional bridge.
> >>>>>
> >>>>> This allows the samsung-dsim driver to expose the standard behavior while the
> >>>>> exynos_dsi may stick to the existing behavior for the exynos_drm driver.
> >>>>>
> >>>>> I hope this makes sense and does not sound too crazy. It might be difficult to
> >>>>> get the probing and mipi host/device registration correct, but I will try, if
> >>>>> this can work.
> >>>> Adding two bridges for being able to support hotplugging adds many special
> >>>> cases to the bridge driver and still requires more custom API to correctly add
> >>>> the second bridge. I don't think that this a viable path to go.
> >>> Just jumping in here: You cannot hotplug/hotremove anything from a
> >>> drm_device after drm_dev_register has been called, except
> >>> drm_connector. I didn't dig into details here so not sure whether you
> >>> want to late-bind your bridge after drm_dev_register is called or not,
> >>> so might just be fyi and not relevant to the discussion.
> >> Thanks. AFAIC that is exactly what is currently implemented in the exynos_drm
> >> driver (i.e. Option B)
> >>
> >> exynos_dsi_bind configures the encoder and registers a DSI host. Afterwards,
> >> exynos_drm_bind (as component_master_ops) calls drm_dev_register. Later, a DSI
> >> device might attach to the DSI host and call exynos_dsi_host_attach. In
> >> exynos_dsi_host_attach, the driver finds the drm_bridge for the DSI device and
> >> attaches this bridge to the encoder _after_ drm_dev_register has been called.
> >> This is invalid behavior, right?
> > Definitely not supported, I don't think we have the right locks in place
> > to make sure this works.
> >
> > Now if your _only_ adding a drm_bridge (and not an encoder or anything
> > like that), and you are adding the drm_connector correctly (like a
> > hotplugged DP MST sink), then that would at least work from a uapi pov.
> > Because drm_bridge isn't exposed as an uapi object.
> >
> > But yeah, as-is, don't :-)
> >
> > The solution here is a bunch of EPROBE_DEFER handling until all your
> > bridges are loaded, with or without the assistance of component.c
> > framework. Only then call drm_dev_register.
> > -Daniel
>
>
> I have impression we have similar conversation already.
>
> As you stated drm_bridge and drm_panel are not exposed to userspace so
> there shouldn't be problem with them from uapi PoV.
>
> On the other side drm_panel or drm_bridge are not used until pipeline
> enters connected state (at least they were not some time ago :) ). The
> issue is that bridge exposes drm_connector, but as you stated (again :)
> ) connectors can be hotplugged, so in theory it should work. Practical
> tests shows that it also works, but bugs can be still there.
>
> Bunch of EPROBE_DEFER was very slow (as a result userspace timeouted and
> decided there is no display), and does not handle unbinding/re-binding
> drivers.

Rebinding drivers should be fixed now, with a bunch of fixes in driver
core. If not, we need to fix this more.

Also, EPROBE_DEFER is how this is supposed to work. If it's too slow,
we need to fix EPROBE_DEFER (there's ideas for pre-sorting that never
seem to go anywhere), not paper over it with bad architecture in
drivers.

There's also a bit the issue that most userspace handles panels in a
special way, and if they don't find the panel at first, that doesn't
work. Stuff like "which is the main screen" on laptops.

So yeah please fix this properly.
-Daniel

>
>
> Regards
>
> Andrzej
>
>
> >
> >> Michael
> >>
> >>> -Daniel
> >>>
> >>>> This leaves us with:
> >>>>
> >>>> Option A) Standard drm_bridge behavior, which is currently implemented, but
> >>>> incompatible with the currently expected behavior of exynos_drm.
> >>>>
> >>>> Option B) Creating the drm device without all bridges being attached, which
> >>>> would work with the exynos_drm driver, but breaks for the standard drm_bridge
> >>>> behavior, especially, if the encoder/connector is created at the beginning of
> >>>> the pipeline and passed downwards when the bridges are attached.
> >>>>
> >>>> Option C) Extracting only low level register accesses into shared code, adding
> >>>> a custom interface and implementing the drm_bridge handling in the platform
> >>>> specific code.
> >>>>
> >>>> None of the options really convinces me.
> >>>>
> >>>> Michael
> >>>> _______________________________________________
> >>>> dri-devel mailing list
> >>>> dri-devel@lists.freedesktop.org
> >>>> https://protect2.fireeye.com/v1/url?k=9b91fda0-c40ac4d5-9b9076ef-0cc47a31ce4e-a7f88871f6fd42d4&q=1&e=f2cfade9-8ecf-4697-88b3-69d93ed38361&u=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fdri-devel
Laurent Pinchart Feb. 4, 2021, 5:26 p.m. UTC | #11
Hi Daniel,

On Thu, Feb 04, 2021 at 06:19:22PM +0100, Daniel Vetter wrote:
> On Thu, Feb 4, 2021 at 5:28 PM Andrzej Hajda wrote:
> > W dniu 04.02.2021 o 17:05, Daniel Vetter pisze:
> > > On Thu, Feb 04, 2021 at 11:56:32AM +0100, Michael Tretter wrote:
> > >> On Thu, 04 Feb 2021 11:17:49 +0100, Daniel Vetter wrote:
> > >>> On Wed, Feb 3, 2021 at 9:32 PM Michael Tretter wrote:
> > >>>> On Mon, 01 Feb 2021 17:33:14 +0100, Michael Tretter wrote:
> > >>>>> On Tue, 15 Sep 2020 21:40:40 +0200, Andrzej Hajda wrote:
> > >>>>>> W dniu 14.09.2020 o 23:19, Andrzej Hajda pisze:
> > >>>>>>> On 14.09.2020 22:01, Michael Tretter wrote:
> > >>>>>>>> On Mon, 14 Sep 2020 14:31:19 +0200, Marek Szyprowski wrote:
> > >>>>>>>>> On 14.09.2020 10:29, Marek Szyprowski wrote:
> > >>>>>>>>>> On 11.09.2020 15:54, Michael Tretter wrote:
> > >>>>>>>>>>> Make the exynos_dsi driver a full drm bridge that can be found and
> > >>>>>>>>>>> used
> > >>>>>>>>>>> from other drivers.
> > >>>>>>>>>>>
> > >>>>>>>>>>> Other drivers can only attach to the bridge, if a mipi dsi device
> > >>>>>>>>>>> already attached to the bridge. This allows to defer the probe of the
> > >>>>>>>>>>> display pipe until the downstream bridges are available, too.
> > >>>>>>>>>>>
> > >>>>>>>>>>> Signed-off-by: Michael Tretter <m.tretter@pengutronix.de>
> > >>>>>>>>>> This one (and the whole series applied) still fails on Exynos boards:
> > >>>>>>>>>>
> > >>>>>>>>>> [drm] Exynos DRM: using 11c00000.fimd device for DMA mapping
> > >>>>>>>>>> operations
> > >>>>>>>>>> exynos-drm exynos-drm: bound 11c00000.fimd (ops fimd_component_ops)
> > >>>>>>>>>> OF: graph: no port node found in /soc/dsi@11c80000
> > >>>>>>>>>> 8<--- cut here ---
> > >>>>>>>>>> Unable to handle kernel NULL pointer dereference at virtual address
> > >>>>>>>>>> 00000084
> > >>>>>>>>>> pgd = (ptrval)
> > >>>>>>>>>> [00000084] *pgd=00000000
> > >>>>>>>>>> Internal error: Oops: 5 [#1] PREEMPT SMP ARM
> > >>>>>>>>>> Modules linked in:
> > >>>>>>>>>> CPU: 1 PID: 1 Comm: swapper/0 Not tainted
> > >>>>>>>>>> 5.9.0-rc4-next-20200911-00010-g417dc70d70ec #1608
> > >>>>>>>>>> Hardware name: Samsung Exynos (Flattened Device Tree)
> > >>>>>>>>>> PC is at drm_bridge_attach+0x18/0x164
> > >>>>>>>>>> LR is at exynos_dsi_bind+0x88/0xa8
> > >>>>>>>>>> pc : [<c0628c08>]    lr : [<c064d560>]    psr: 20000013
> > >>>>>>>>>> sp : ef0dfca8  ip : 00000002  fp : c13190e0
> > >>>>>>>>>> r10: 00000000  r9 : ee46d580  r8 : c13190e0
> > >>>>>>>>>> r7 : ee438800  r6 : 00000018  r5 : ef253810  r4 : ef39e840
> > >>>>>>>>>> r3 : 00000000  r2 : 00000018  r1 : ef39e888  r0 : ef39e840
> > >>>>>>>>>> Flags: nzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
> > >>>>>>>>>> Control: 10c5387d  Table: 4000404a  DAC: 00000051
> > >>>>>>>>>> Process swapper/0 (pid: 1, stack limit = 0x(ptrval))
> > >>>>>>>>>> Stack: (0xef0dfca8 to 0xef0e0000)
> > >>>>>>>>>> ...
> > >>>>>>>>>> [<c0628c08>] (drm_bridge_attach) from [<c064d560>]
> > >>>>>>>>>> (exynos_dsi_bind+0x88/0xa8)
> > >>>>>>>>>> [<c064d560>] (exynos_dsi_bind) from [<c066a800>]
> > >>>>>>>>>> (component_bind_all+0xfc/0x290)
> > >>>>>>>>>> [<c066a800>] (component_bind_all) from [<c0649dc0>]
> > >>>>>>>>>> (exynos_drm_bind+0xe4/0x19c)
> > >>>>>>>>>> [<c0649dc0>] (exynos_drm_bind) from [<c066ad74>]
> > >>>>>>>>>> (try_to_bring_up_master+0x1e4/0x2c4)
> > >>>>>>>>>> [<c066ad74>] (try_to_bring_up_master) from [<c066b2b4>]
> > >>>>>>>>>> (component_master_add_with_match+0xd4/0x108)
> > >>>>>>>>>> [<c066b2b4>] (component_master_add_with_match) from [<c0649ae8>]
> > >>>>>>>>>> (exynos_drm_platform_probe+0xe4/0x110)
> > >>>>>>>>>> [<c0649ae8>] (exynos_drm_platform_probe) from [<c0674e6c>]
> > >>>>>>>>>> (platform_drv_probe+0x6c/0xa4)
> > >>>>>>>>>> [<c0674e6c>] (platform_drv_probe) from [<c067242c>]
> > >>>>>>>>>> (really_probe+0x200/0x4fc)
> > >>>>>>>>>> [<c067242c>] (really_probe) from [<c06728f0>]
> > >>>>>>>>>> (driver_probe_device+0x78/0x1fc)
> > >>>>>>>>>> [<c06728f0>] (driver_probe_device) from [<c0672cd8>]
> > >>>>>>>>>> (device_driver_attach+0x58/0x60)
> > >>>>>>>>>> [<c0672cd8>] (device_driver_attach) from [<c0672dbc>]
> > >>>>>>>>>> (__driver_attach+0xdc/0x174)
> > >>>>>>>>>> [<c0672dbc>] (__driver_attach) from [<c06701b4>]
> > >>>>>>>>>> (bus_for_each_dev+0x68/0xb4)
> > >>>>>>>>>> [<c06701b4>] (bus_for_each_dev) from [<c06714e8>]
> > >>>>>>>>>> (bus_add_driver+0x158/0x214)
> > >>>>>>>>>> [<c06714e8>] (bus_add_driver) from [<c0673c1c>]
> > >>>>>>>>>> (driver_register+0x78/0x110)
> > >>>>>>>>>> [<c0673c1c>] (driver_register) from [<c0649ca8>]
> > >>>>>>>>>> (exynos_drm_init+0xe4/0x118)
> > >>>>>>>>>> [<c0649ca8>] (exynos_drm_init) from [<c0102484>]
> > >>>>>>>>>> (do_one_initcall+0x8c/0x42c)
> > >>>>>>>>>> [<c0102484>] (do_one_initcall) from [<c11011c0>]
> > >>>>>>>>>> (kernel_init_freeable+0x190/0x1dc)
> > >>>>>>>>>> [<c11011c0>] (kernel_init_freeable) from [<c0af7880>]
> > >>>>>>>>>> (kernel_init+0x8/0x118)
> > >>>>>>>>>> [<c0af7880>] (kernel_init) from [<c0100114>] (ret_from_fork+0x14/0x20)
> > >>>>>>>>>> Exception stack(0xef0dffb0 to 0xef0dfff8)
> > >>>>>>>>>> ...
> > >>>>>>>>>> ---[ end trace ee27f313f9ed9da1 ]---
> > >>>>>>>>>>
> > >>>>>>>>>> # arm-linux-gnueabi-addr2line -e vmlinux c0628c08
> > >>>>>>>>>> drivers/gpu/drm/drm_bridge.c:184 (discriminator 1)
> > >>>>>>>>>>
> > >>>>>>>>>> I will try to debug it a bit more today.
> > >>>>>>>>> The above crash has been caused by lack of in_bridge initialization to
> > >>>>>>>>> NULL in exynos_dsi_bind() in this patch. However, fixing it reveals
> > >>>>>>>>> another issue:
> > >>>>>>>>>
> > >>>>>>>>> [drm] Exynos DRM: using 11c00000.fimd device for DMA mapping operations
> > >>>>>>>>> exynos-drm exynos-drm: bound 11c00000.fimd (ops fimd_component_ops)
> > >>>>>>>>> OF: graph: no port node found in /soc/dsi@11c80000
> > >>>>>>>>> 8<--- cut here ---
> > >>>>>>>>> Unable to handle kernel NULL pointer dereference at virtual address
> > >>>>>>>>> 00000280
> > >>>>>>>>> pgd = (ptrval)
> > >>>>>>>>> [00000280] *pgd=00000000
> > >>>>>>>>> Internal error: Oops: 5 [#1] PREEMPT SMP ARM
> > >>>>>>>>> Modules linked in:
> > >>>>>>>>> CPU: 0 PID: 1 Comm: swapper/0 Not tainted
> > >>>>>>>>> 5.9.0-rc4-next-20200911-00010-g417dc70d70ec-dirty #1613
> > >>>>>>>>> Hardware name: Samsung Exynos (Flattened Device Tree)
> > >>>>>>>>> PC is at __mutex_lock+0x54/0xb18
> > >>>>>>>>> LR is at lock_is_held_type+0x80/0x138
> > >>>>>>>>> pc : [<c0afc920>]    lr : [<c0af63e8>]    psr: 60000013
> > >>>>>>>>> sp : ef0dfd30  ip : 33937b74  fp : c13193c8
> > >>>>>>>>> r10: c1208eec  r9 : 00000000  r8 : ee45f808
> > >>>>>>>>> r7 : c19561a4  r6 : 00000000  r5 : 00000000  r4 : 0000024c
> > >>>>>>>>> r3 : 00000000  r2 : 00204140  r1 : c124f13c  r0 : 00000000
> > >>>>>>>>> Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
> > >>>>>>>>> Control: 10c5387d  Table: 4000404a  DAC: 00000051
> > >>>>>>>>> Process swapper/0 (pid: 1, stack limit = 0x(ptrval))
> > >>>>>>>>> Stack: (0xef0dfd30 to 0xef0e0000)
> > >>>>>>>>> ...
> > >>>>>>>>> [<c0afc920>] (__mutex_lock) from [<c0afd400>]
> > >>>>>>>>> (mutex_lock_nested+0x1c/0x24)
> > >>>>>>>>> [<c0afd400>] (mutex_lock_nested) from [<c064d4b8>]
> > >>>>>>>>> (__exynos_dsi_host_attach+0x20/0x6c)
> > >>>>>>>>> [<c064d4b8>] (__exynos_dsi_host_attach) from [<c064d914>]
> > >>>>>>>>> (exynos_dsi_host_attach+0x70/0x194)
> > >>>>>>>>> [<c064d914>] (exynos_dsi_host_attach) from [<c0656b64>]
> > >>>>>>>>> (s6e8aa0_probe+0x1b0/0x218)
> > >>>>>>>>> [<c0656b64>] (s6e8aa0_probe) from [<c0672530>]
> > >>>>>>>>> (really_probe+0x200/0x4fc)
> > >>>>>>>>> [<c0672530>] (really_probe) from [<c06729f4>]
> > >>>>>>>>> (driver_probe_device+0x78/0x1fc)
> > >>>>>>>>> [<c06729f4>] (driver_probe_device) from [<c0672ddc>]
> > >>>>>>>>> (device_driver_attach+0x58/0x60)
> > >>>>>>>>> [<c0672ddc>] (device_driver_attach) from [<c0672ec0>]
> > >>>>>>>>> (__driver_attach+0xdc/0x174)
> > >>>>>>>>> [<c0672ec0>] (__driver_attach) from [<c06702b8>]
> > >>>>>>>>> (bus_for_each_dev+0x68/0xb4)
> > >>>>>>>>> [<c06702b8>] (bus_for_each_dev) from [<c06715ec>]
> > >>>>>>>>> (bus_add_driver+0x158/0x214)
> > >>>>>>>>> [<c06715ec>] (bus_add_driver) from [<c0673d20>]
> > >>>>>>>>> (driver_register+0x78/0x110)
> > >>>>>>>>> [<c0673d20>] (driver_register) from [<c0102484>]
> > >>>>>>>>> (do_one_initcall+0x8c/0x42c)
> > >>>>>>>>> [<c0102484>] (do_one_initcall) from [<c11011c0>]
> > >>>>>>>>> (kernel_init_freeable+0x190/0x1dc)
> > >>>>>>>>> [<c11011c0>] (kernel_init_freeable) from [<c0af7988>]
> > >>>>>>>>> (kernel_init+0x8/0x118)
> > >>>>>>>>> [<c0af7988>] (kernel_init) from [<c0100114>] (ret_from_fork+0x14/0x20)
> > >>>>>>>>> Exception stack(0xef0dffb0 to 0xef0dfff8)
> > >>>>>>>>> ...
> > >>>>>>>>> ---[ end trace c06e996ec2e8234d ]---
> > >>>>>>>>>
> > >>>>>>>>> This means that dsi->encoder.dev is not initialized in
> > >>>>>>>>> __exynos_dsi_host_attach().
> > >>>>>>>>>
> > >>>>>>>>> This happens, because drm_bridge_attach() in exynos_dsi_bind() returned
> > >>>>>>>>> earlier -517 (deferred probe), what causes cleanup of encoder and
> > >>>>>>>>> release of all drm resources.
> > >>>>>>>>>
> > >>>>>>>>> Then however, the panel tries to register itself and
> > >>>>>>>>> exynos_dsi_host_attach() tries to access the released encoder (which is
> > >>>>>>>>> zeroed in drm_encoder_release) and rest of resources, what causes
> > >>>>>>>>> failure.
> > >>>>>>>>>
> > >>>>>>>>> It looks that something is missing. Maybe mipi host has to be
> > >>>>>>>>> registered
> > >>>>>>>>> later, when bridge is ready? I have no idea how it is handled before
> > >>>>>>>>> this patch. Andrzej, could you comment it a bit?
> > >>>>>>>> I intentionally changed the order, because if another bridge follows
> > >>>>>>>> in the
> > >>>>>>>> pipeline, the probe of the drm driver has to be deferred until some
> > >>>>>>>> bridge
> > >>>>>>>> provides a connector. The next bridge registers itself via the
> > >>>>>>>> host_attach
> > >>>>>>>> function and the deferral is ensured via the bind for the bind/unbind
> > >>>>>>>> API or
> > >>>>>>>> the bridge_attach function otherwise.
> > >>>>>>>>
> > >>>>>>>> On the other hand, the bridge does not have an encoder until the mipi
> > >>>>>>>> device
> > >>>>>>>> has been attached.
> > >>>>>>>>
> > >>>>>>>> As a solution, the exynos dsi driver must initialize the encoder in
> > >>>>>>>> exynos_dsi_probe instead of in exynos_dsi_bind and access the encoder
> > >>>>>>>> via
> > >>>>>>>> exynos_dsi instead of the bridge.
> > >>>>>>>>
> > >>>>>>>> Can you try to move everything except samsung_dsim_bind from
> > >>>>>>>> exynos_dsi_bind
> > >>>>>>>> to exynos_dsi_probe (respectively for unbind) and report if it fixes the
> > >>>>>>>> crash.
> > >>>>>>>
> > >>>>>>> The original behaviour is that encoder (exynos_dsi) is registered
> > >>>>>>> regardless of sink presence (initially panel, later also bridge) - it
> > >>>>>>> avoids multiple issues with deferred probe, device driver bind/unbind
> > >>>>>>> and module load/unload. Appearance or disappearance of sink is
> > >>>>>>> reported to host nicely via DSI attach/detach callbacks - and it is
> > >>>>>>> reflected in drm world as change state of the connector.
> > >>>>>>>
> > >>>>>>> Registering DSI host in bind and unregistering in unbind assures that
> > >>>>>>> if mipi_dsi device is attached/detached the drm device is always
> > >>>>>>> present - it makes device/driver binding race free and allows to avoid
> > >>>>>>> additional locking.
> > >>>>>>>
> > >>>>>>> Moving DSI host registration to probe changes everything, for sure it
> > >>>>>>> breaks the nice feature of DSI attach/detach callbacks and apparently
> > >>>>>>> can cause different issues depending on device bind order.
> > >>>>>>>
> > >>>>>>> I will try to look at the patches tomorrow and maybe I can find more
> > >>>>>>> constructive comments :)
> > >>>>>>
> > >>>>>> As I said yesterday, exynos_dsi driver uses dsi host attach/detach
> > >>>>>> callbacks to control appearance/disappearance of downstream device. It
> > >>>>>> allows to:
> > >>>>>>
> > >>>>>> 1. Safely bind/unbind different device drivers at any time and at any
> > >>>>>> order, without killing exynos_drm and/or crashing system.
> > >>>>>>
> > >>>>>> 2. Avoid issues with late drm init - on some platforms exynos_drm device
> > >>>>>> appeared too late, due to deferred probe, and resulted in black screen
> > >>>>>> in userspace.
> > >>>>>>
> > >>>>>>
> > >>>>>> Now if we want to convert exynos_dsi to drm_bridge I see following options:
> > >>>>>>
> > >>>>>> A. Forgot about callbacks and make the exynos_drm to defer probing until
> > >>>>>> exynos_dsi bridge is available, probably it will cause later exynos_drm
> > >>>>>> appearance, thus probably black screen on some targets. So for sure it
> > >>>>>> will be suboptimal. Making it bridge unbind safe would be another
> > >>>>>> problem, but most developers do not care about it so why should we? :)
> > >>>>>>
> > >>>>>> B. Try to mimic current behaviour - exynos_dsi register bridge ASAP,
> > >>>>>> even if downstream devices are not yet attached, on attach/detach notify
> > >>>>>> drm about it via connector status change, for this dsi_host registration
> > >>>>>> should be performed from drm_bridge attach, I guess.
> > >>>>>>
> > >>>>>>
> > >>>>>> Option A is more standard, but is unsafe and causes other issues.
> > >>>>>>
> > >>>>>> Option B keeps current behaviour.
> > >>>>> Maybe we can have both, but I am not sure, if I am missing something:
> > >>>>>
> > >>>>> I still prefer option A for the samsung-dsim driver, because it is more
> > >>>>> standard, simpler and avoids issues with encoders, connectors or handling
> > >>>>> hotplug.
> > >>>>>
> > >>>>> The idea is to use two bridges in the exynos-dsi driver: One bridge in the
> > >>>>> samsung-dsim driver which implements option A and defers probing of the drm
> > >>>>> driver until the next bridge is attached. And a second bridge in the
> > >>>>> exynos_dsi that attaches to the first bridge (thus, allowing the exynos_drm
> > >>>>> device to appear) and implements the hotplug handling for notifying drm via
> > >>>>> connector status change.
> > >>>>>
> > >>>>> The driver for the i.MX8M would use the samsung-dsim bridge without an
> > >>>>> additional bridge.
> > >>>>>
> > >>>>> This allows the samsung-dsim driver to expose the standard behavior while the
> > >>>>> exynos_dsi may stick to the existing behavior for the exynos_drm driver.
> > >>>>>
> > >>>>> I hope this makes sense and does not sound too crazy. It might be difficult to
> > >>>>> get the probing and mipi host/device registration correct, but I will try, if
> > >>>>> this can work.
> > >>>> Adding two bridges for being able to support hotplugging adds many special
> > >>>> cases to the bridge driver and still requires more custom API to correctly add
> > >>>> the second bridge. I don't think that this a viable path to go.
> > >>> Just jumping in here: You cannot hotplug/hotremove anything from a
> > >>> drm_device after drm_dev_register has been called, except
> > >>> drm_connector. I didn't dig into details here so not sure whether you
> > >>> want to late-bind your bridge after drm_dev_register is called or not,
> > >>> so might just be fyi and not relevant to the discussion.
> > >> Thanks. AFAIC that is exactly what is currently implemented in the exynos_drm
> > >> driver (i.e. Option B)
> > >>
> > >> exynos_dsi_bind configures the encoder and registers a DSI host. Afterwards,
> > >> exynos_drm_bind (as component_master_ops) calls drm_dev_register. Later, a DSI
> > >> device might attach to the DSI host and call exynos_dsi_host_attach. In
> > >> exynos_dsi_host_attach, the driver finds the drm_bridge for the DSI device and
> > >> attaches this bridge to the encoder _after_ drm_dev_register has been called.
> > >> This is invalid behavior, right?
> > > Definitely not supported, I don't think we have the right locks in place
> > > to make sure this works.
> > >
> > > Now if your _only_ adding a drm_bridge (and not an encoder or anything
> > > like that), and you are adding the drm_connector correctly (like a
> > > hotplugged DP MST sink), then that would at least work from a uapi pov.
> > > Because drm_bridge isn't exposed as an uapi object.
> > >
> > > But yeah, as-is, don't :-)
> > >
> > > The solution here is a bunch of EPROBE_DEFER handling until all your
> > > bridges are loaded, with or without the assistance of component.c
> > > framework. Only then call drm_dev_register.
> >
> > I have impression we have similar conversation already.
> >
> > As you stated drm_bridge and drm_panel are not exposed to userspace so
> > there shouldn't be problem with them from uapi PoV.
> >
> > On the other side drm_panel or drm_bridge are not used until pipeline
> > enters connected state (at least they were not some time ago :) ). The
> > issue is that bridge exposes drm_connector, but as you stated (again :)
> > ) connectors can be hotplugged, so in theory it should work. Practical
> > tests shows that it also works, but bugs can be still there.
> >
> > Bunch of EPROBE_DEFER was very slow (as a result userspace timeouted and
> > decided there is no display), and does not handle unbinding/re-binding
> > drivers.
> 
> Rebinding drivers should be fixed now, with a bunch of fixes in driver
> core. If not, we need to fix this more.
> 
> Also, EPROBE_DEFER is how this is supposed to work. If it's too slow,
> we need to fix EPROBE_DEFER (there's ideas for pre-sorting that never
> seem to go anywhere), not paper over it with bad architecture in
> drivers.

I've heard this argument multiple times, but it sounds more like an
attempt to ignore the problem and hope it will fall on someone else's
plate :-) Improvement in the probe deferral mechanism are certainly an
option to explore, but as far as I can tell nobody has proven that this
mechanism is or will be able to solve all problems related to probe
ordering dependencies. I wouldn't rule out the need for different
solutions for some of the issues.

> There's also a bit the issue that most userspace handles panels in a
> special way, and if they don't find the panel at first, that doesn't
> work. Stuff like "which is the main screen" on laptops.
> 
> So yeah please fix this properly.
>
> > >>>> This leaves us with:
> > >>>>
> > >>>> Option A) Standard drm_bridge behavior, which is currently implemented, but
> > >>>> incompatible with the currently expected behavior of exynos_drm.
> > >>>>
> > >>>> Option B) Creating the drm device without all bridges being attached, which
> > >>>> would work with the exynos_drm driver, but breaks for the standard drm_bridge
> > >>>> behavior, especially, if the encoder/connector is created at the beginning of
> > >>>> the pipeline and passed downwards when the bridges are attached.
> > >>>>
> > >>>> Option C) Extracting only low level register accesses into shared code, adding
> > >>>> a custom interface and implementing the drm_bridge handling in the platform
> > >>>> specific code.
> > >>>>
> > >>>> None of the options really convinces me.
Daniel Vetter Feb. 4, 2021, 5:46 p.m. UTC | #12
On Thu, Feb 4, 2021 at 6:26 PM Laurent Pinchart
<laurent.pinchart@ideasonboard.com> wrote:
>
> Hi Daniel,
>
> On Thu, Feb 04, 2021 at 06:19:22PM +0100, Daniel Vetter wrote:
> > On Thu, Feb 4, 2021 at 5:28 PM Andrzej Hajda wrote:
> > > W dniu 04.02.2021 o 17:05, Daniel Vetter pisze:
> > > > On Thu, Feb 04, 2021 at 11:56:32AM +0100, Michael Tretter wrote:
> > > >> On Thu, 04 Feb 2021 11:17:49 +0100, Daniel Vetter wrote:
> > > >>> On Wed, Feb 3, 2021 at 9:32 PM Michael Tretter wrote:
> > > >>>> On Mon, 01 Feb 2021 17:33:14 +0100, Michael Tretter wrote:
> > > >>>>> On Tue, 15 Sep 2020 21:40:40 +0200, Andrzej Hajda wrote:
> > > >>>>>> W dniu 14.09.2020 o 23:19, Andrzej Hajda pisze:
> > > >>>>>>> On 14.09.2020 22:01, Michael Tretter wrote:
> > > >>>>>>>> On Mon, 14 Sep 2020 14:31:19 +0200, Marek Szyprowski wrote:
> > > >>>>>>>>> On 14.09.2020 10:29, Marek Szyprowski wrote:
> > > >>>>>>>>>> On 11.09.2020 15:54, Michael Tretter wrote:
> > > >>>>>>>>>>> Make the exynos_dsi driver a full drm bridge that can be found and
> > > >>>>>>>>>>> used
> > > >>>>>>>>>>> from other drivers.
> > > >>>>>>>>>>>
> > > >>>>>>>>>>> Other drivers can only attach to the bridge, if a mipi dsi device
> > > >>>>>>>>>>> already attached to the bridge. This allows to defer the probe of the
> > > >>>>>>>>>>> display pipe until the downstream bridges are available, too.
> > > >>>>>>>>>>>
> > > >>>>>>>>>>> Signed-off-by: Michael Tretter <m.tretter@pengutronix.de>
> > > >>>>>>>>>> This one (and the whole series applied) still fails on Exynos boards:
> > > >>>>>>>>>>
> > > >>>>>>>>>> [drm] Exynos DRM: using 11c00000.fimd device for DMA mapping
> > > >>>>>>>>>> operations
> > > >>>>>>>>>> exynos-drm exynos-drm: bound 11c00000.fimd (ops fimd_component_ops)
> > > >>>>>>>>>> OF: graph: no port node found in /soc/dsi@11c80000
> > > >>>>>>>>>> 8<--- cut here ---
> > > >>>>>>>>>> Unable to handle kernel NULL pointer dereference at virtual address
> > > >>>>>>>>>> 00000084
> > > >>>>>>>>>> pgd = (ptrval)
> > > >>>>>>>>>> [00000084] *pgd=00000000
> > > >>>>>>>>>> Internal error: Oops: 5 [#1] PREEMPT SMP ARM
> > > >>>>>>>>>> Modules linked in:
> > > >>>>>>>>>> CPU: 1 PID: 1 Comm: swapper/0 Not tainted
> > > >>>>>>>>>> 5.9.0-rc4-next-20200911-00010-g417dc70d70ec #1608
> > > >>>>>>>>>> Hardware name: Samsung Exynos (Flattened Device Tree)
> > > >>>>>>>>>> PC is at drm_bridge_attach+0x18/0x164
> > > >>>>>>>>>> LR is at exynos_dsi_bind+0x88/0xa8
> > > >>>>>>>>>> pc : [<c0628c08>]    lr : [<c064d560>]    psr: 20000013
> > > >>>>>>>>>> sp : ef0dfca8  ip : 00000002  fp : c13190e0
> > > >>>>>>>>>> r10: 00000000  r9 : ee46d580  r8 : c13190e0
> > > >>>>>>>>>> r7 : ee438800  r6 : 00000018  r5 : ef253810  r4 : ef39e840
> > > >>>>>>>>>> r3 : 00000000  r2 : 00000018  r1 : ef39e888  r0 : ef39e840
> > > >>>>>>>>>> Flags: nzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
> > > >>>>>>>>>> Control: 10c5387d  Table: 4000404a  DAC: 00000051
> > > >>>>>>>>>> Process swapper/0 (pid: 1, stack limit = 0x(ptrval))
> > > >>>>>>>>>> Stack: (0xef0dfca8 to 0xef0e0000)
> > > >>>>>>>>>> ...
> > > >>>>>>>>>> [<c0628c08>] (drm_bridge_attach) from [<c064d560>]
> > > >>>>>>>>>> (exynos_dsi_bind+0x88/0xa8)
> > > >>>>>>>>>> [<c064d560>] (exynos_dsi_bind) from [<c066a800>]
> > > >>>>>>>>>> (component_bind_all+0xfc/0x290)
> > > >>>>>>>>>> [<c066a800>] (component_bind_all) from [<c0649dc0>]
> > > >>>>>>>>>> (exynos_drm_bind+0xe4/0x19c)
> > > >>>>>>>>>> [<c0649dc0>] (exynos_drm_bind) from [<c066ad74>]
> > > >>>>>>>>>> (try_to_bring_up_master+0x1e4/0x2c4)
> > > >>>>>>>>>> [<c066ad74>] (try_to_bring_up_master) from [<c066b2b4>]
> > > >>>>>>>>>> (component_master_add_with_match+0xd4/0x108)
> > > >>>>>>>>>> [<c066b2b4>] (component_master_add_with_match) from [<c0649ae8>]
> > > >>>>>>>>>> (exynos_drm_platform_probe+0xe4/0x110)
> > > >>>>>>>>>> [<c0649ae8>] (exynos_drm_platform_probe) from [<c0674e6c>]
> > > >>>>>>>>>> (platform_drv_probe+0x6c/0xa4)
> > > >>>>>>>>>> [<c0674e6c>] (platform_drv_probe) from [<c067242c>]
> > > >>>>>>>>>> (really_probe+0x200/0x4fc)
> > > >>>>>>>>>> [<c067242c>] (really_probe) from [<c06728f0>]
> > > >>>>>>>>>> (driver_probe_device+0x78/0x1fc)
> > > >>>>>>>>>> [<c06728f0>] (driver_probe_device) from [<c0672cd8>]
> > > >>>>>>>>>> (device_driver_attach+0x58/0x60)
> > > >>>>>>>>>> [<c0672cd8>] (device_driver_attach) from [<c0672dbc>]
> > > >>>>>>>>>> (__driver_attach+0xdc/0x174)
> > > >>>>>>>>>> [<c0672dbc>] (__driver_attach) from [<c06701b4>]
> > > >>>>>>>>>> (bus_for_each_dev+0x68/0xb4)
> > > >>>>>>>>>> [<c06701b4>] (bus_for_each_dev) from [<c06714e8>]
> > > >>>>>>>>>> (bus_add_driver+0x158/0x214)
> > > >>>>>>>>>> [<c06714e8>] (bus_add_driver) from [<c0673c1c>]
> > > >>>>>>>>>> (driver_register+0x78/0x110)
> > > >>>>>>>>>> [<c0673c1c>] (driver_register) from [<c0649ca8>]
> > > >>>>>>>>>> (exynos_drm_init+0xe4/0x118)
> > > >>>>>>>>>> [<c0649ca8>] (exynos_drm_init) from [<c0102484>]
> > > >>>>>>>>>> (do_one_initcall+0x8c/0x42c)
> > > >>>>>>>>>> [<c0102484>] (do_one_initcall) from [<c11011c0>]
> > > >>>>>>>>>> (kernel_init_freeable+0x190/0x1dc)
> > > >>>>>>>>>> [<c11011c0>] (kernel_init_freeable) from [<c0af7880>]
> > > >>>>>>>>>> (kernel_init+0x8/0x118)
> > > >>>>>>>>>> [<c0af7880>] (kernel_init) from [<c0100114>] (ret_from_fork+0x14/0x20)
> > > >>>>>>>>>> Exception stack(0xef0dffb0 to 0xef0dfff8)
> > > >>>>>>>>>> ...
> > > >>>>>>>>>> ---[ end trace ee27f313f9ed9da1 ]---
> > > >>>>>>>>>>
> > > >>>>>>>>>> # arm-linux-gnueabi-addr2line -e vmlinux c0628c08
> > > >>>>>>>>>> drivers/gpu/drm/drm_bridge.c:184 (discriminator 1)
> > > >>>>>>>>>>
> > > >>>>>>>>>> I will try to debug it a bit more today.
> > > >>>>>>>>> The above crash has been caused by lack of in_bridge initialization to
> > > >>>>>>>>> NULL in exynos_dsi_bind() in this patch. However, fixing it reveals
> > > >>>>>>>>> another issue:
> > > >>>>>>>>>
> > > >>>>>>>>> [drm] Exynos DRM: using 11c00000.fimd device for DMA mapping operations
> > > >>>>>>>>> exynos-drm exynos-drm: bound 11c00000.fimd (ops fimd_component_ops)
> > > >>>>>>>>> OF: graph: no port node found in /soc/dsi@11c80000
> > > >>>>>>>>> 8<--- cut here ---
> > > >>>>>>>>> Unable to handle kernel NULL pointer dereference at virtual address
> > > >>>>>>>>> 00000280
> > > >>>>>>>>> pgd = (ptrval)
> > > >>>>>>>>> [00000280] *pgd=00000000
> > > >>>>>>>>> Internal error: Oops: 5 [#1] PREEMPT SMP ARM
> > > >>>>>>>>> Modules linked in:
> > > >>>>>>>>> CPU: 0 PID: 1 Comm: swapper/0 Not tainted
> > > >>>>>>>>> 5.9.0-rc4-next-20200911-00010-g417dc70d70ec-dirty #1613
> > > >>>>>>>>> Hardware name: Samsung Exynos (Flattened Device Tree)
> > > >>>>>>>>> PC is at __mutex_lock+0x54/0xb18
> > > >>>>>>>>> LR is at lock_is_held_type+0x80/0x138
> > > >>>>>>>>> pc : [<c0afc920>]    lr : [<c0af63e8>]    psr: 60000013
> > > >>>>>>>>> sp : ef0dfd30  ip : 33937b74  fp : c13193c8
> > > >>>>>>>>> r10: c1208eec  r9 : 00000000  r8 : ee45f808
> > > >>>>>>>>> r7 : c19561a4  r6 : 00000000  r5 : 00000000  r4 : 0000024c
> > > >>>>>>>>> r3 : 00000000  r2 : 00204140  r1 : c124f13c  r0 : 00000000
> > > >>>>>>>>> Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
> > > >>>>>>>>> Control: 10c5387d  Table: 4000404a  DAC: 00000051
> > > >>>>>>>>> Process swapper/0 (pid: 1, stack limit = 0x(ptrval))
> > > >>>>>>>>> Stack: (0xef0dfd30 to 0xef0e0000)
> > > >>>>>>>>> ...
> > > >>>>>>>>> [<c0afc920>] (__mutex_lock) from [<c0afd400>]
> > > >>>>>>>>> (mutex_lock_nested+0x1c/0x24)
> > > >>>>>>>>> [<c0afd400>] (mutex_lock_nested) from [<c064d4b8>]
> > > >>>>>>>>> (__exynos_dsi_host_attach+0x20/0x6c)
> > > >>>>>>>>> [<c064d4b8>] (__exynos_dsi_host_attach) from [<c064d914>]
> > > >>>>>>>>> (exynos_dsi_host_attach+0x70/0x194)
> > > >>>>>>>>> [<c064d914>] (exynos_dsi_host_attach) from [<c0656b64>]
> > > >>>>>>>>> (s6e8aa0_probe+0x1b0/0x218)
> > > >>>>>>>>> [<c0656b64>] (s6e8aa0_probe) from [<c0672530>]
> > > >>>>>>>>> (really_probe+0x200/0x4fc)
> > > >>>>>>>>> [<c0672530>] (really_probe) from [<c06729f4>]
> > > >>>>>>>>> (driver_probe_device+0x78/0x1fc)
> > > >>>>>>>>> [<c06729f4>] (driver_probe_device) from [<c0672ddc>]
> > > >>>>>>>>> (device_driver_attach+0x58/0x60)
> > > >>>>>>>>> [<c0672ddc>] (device_driver_attach) from [<c0672ec0>]
> > > >>>>>>>>> (__driver_attach+0xdc/0x174)
> > > >>>>>>>>> [<c0672ec0>] (__driver_attach) from [<c06702b8>]
> > > >>>>>>>>> (bus_for_each_dev+0x68/0xb4)
> > > >>>>>>>>> [<c06702b8>] (bus_for_each_dev) from [<c06715ec>]
> > > >>>>>>>>> (bus_add_driver+0x158/0x214)
> > > >>>>>>>>> [<c06715ec>] (bus_add_driver) from [<c0673d20>]
> > > >>>>>>>>> (driver_register+0x78/0x110)
> > > >>>>>>>>> [<c0673d20>] (driver_register) from [<c0102484>]
> > > >>>>>>>>> (do_one_initcall+0x8c/0x42c)
> > > >>>>>>>>> [<c0102484>] (do_one_initcall) from [<c11011c0>]
> > > >>>>>>>>> (kernel_init_freeable+0x190/0x1dc)
> > > >>>>>>>>> [<c11011c0>] (kernel_init_freeable) from [<c0af7988>]
> > > >>>>>>>>> (kernel_init+0x8/0x118)
> > > >>>>>>>>> [<c0af7988>] (kernel_init) from [<c0100114>] (ret_from_fork+0x14/0x20)
> > > >>>>>>>>> Exception stack(0xef0dffb0 to 0xef0dfff8)
> > > >>>>>>>>> ...
> > > >>>>>>>>> ---[ end trace c06e996ec2e8234d ]---
> > > >>>>>>>>>
> > > >>>>>>>>> This means that dsi->encoder.dev is not initialized in
> > > >>>>>>>>> __exynos_dsi_host_attach().
> > > >>>>>>>>>
> > > >>>>>>>>> This happens, because drm_bridge_attach() in exynos_dsi_bind() returned
> > > >>>>>>>>> earlier -517 (deferred probe), what causes cleanup of encoder and
> > > >>>>>>>>> release of all drm resources.
> > > >>>>>>>>>
> > > >>>>>>>>> Then however, the panel tries to register itself and
> > > >>>>>>>>> exynos_dsi_host_attach() tries to access the released encoder (which is
> > > >>>>>>>>> zeroed in drm_encoder_release) and rest of resources, what causes
> > > >>>>>>>>> failure.
> > > >>>>>>>>>
> > > >>>>>>>>> It looks that something is missing. Maybe mipi host has to be
> > > >>>>>>>>> registered
> > > >>>>>>>>> later, when bridge is ready? I have no idea how it is handled before
> > > >>>>>>>>> this patch. Andrzej, could you comment it a bit?
> > > >>>>>>>> I intentionally changed the order, because if another bridge follows
> > > >>>>>>>> in the
> > > >>>>>>>> pipeline, the probe of the drm driver has to be deferred until some
> > > >>>>>>>> bridge
> > > >>>>>>>> provides a connector. The next bridge registers itself via the
> > > >>>>>>>> host_attach
> > > >>>>>>>> function and the deferral is ensured via the bind for the bind/unbind
> > > >>>>>>>> API or
> > > >>>>>>>> the bridge_attach function otherwise.
> > > >>>>>>>>
> > > >>>>>>>> On the other hand, the bridge does not have an encoder until the mipi
> > > >>>>>>>> device
> > > >>>>>>>> has been attached.
> > > >>>>>>>>
> > > >>>>>>>> As a solution, the exynos dsi driver must initialize the encoder in
> > > >>>>>>>> exynos_dsi_probe instead of in exynos_dsi_bind and access the encoder
> > > >>>>>>>> via
> > > >>>>>>>> exynos_dsi instead of the bridge.
> > > >>>>>>>>
> > > >>>>>>>> Can you try to move everything except samsung_dsim_bind from
> > > >>>>>>>> exynos_dsi_bind
> > > >>>>>>>> to exynos_dsi_probe (respectively for unbind) and report if it fixes the
> > > >>>>>>>> crash.
> > > >>>>>>>
> > > >>>>>>> The original behaviour is that encoder (exynos_dsi) is registered
> > > >>>>>>> regardless of sink presence (initially panel, later also bridge) - it
> > > >>>>>>> avoids multiple issues with deferred probe, device driver bind/unbind
> > > >>>>>>> and module load/unload. Appearance or disappearance of sink is
> > > >>>>>>> reported to host nicely via DSI attach/detach callbacks - and it is
> > > >>>>>>> reflected in drm world as change state of the connector.
> > > >>>>>>>
> > > >>>>>>> Registering DSI host in bind and unregistering in unbind assures that
> > > >>>>>>> if mipi_dsi device is attached/detached the drm device is always
> > > >>>>>>> present - it makes device/driver binding race free and allows to avoid
> > > >>>>>>> additional locking.
> > > >>>>>>>
> > > >>>>>>> Moving DSI host registration to probe changes everything, for sure it
> > > >>>>>>> breaks the nice feature of DSI attach/detach callbacks and apparently
> > > >>>>>>> can cause different issues depending on device bind order.
> > > >>>>>>>
> > > >>>>>>> I will try to look at the patches tomorrow and maybe I can find more
> > > >>>>>>> constructive comments :)
> > > >>>>>>
> > > >>>>>> As I said yesterday, exynos_dsi driver uses dsi host attach/detach
> > > >>>>>> callbacks to control appearance/disappearance of downstream device. It
> > > >>>>>> allows to:
> > > >>>>>>
> > > >>>>>> 1. Safely bind/unbind different device drivers at any time and at any
> > > >>>>>> order, without killing exynos_drm and/or crashing system.
> > > >>>>>>
> > > >>>>>> 2. Avoid issues with late drm init - on some platforms exynos_drm device
> > > >>>>>> appeared too late, due to deferred probe, and resulted in black screen
> > > >>>>>> in userspace.
> > > >>>>>>
> > > >>>>>>
> > > >>>>>> Now if we want to convert exynos_dsi to drm_bridge I see following options:
> > > >>>>>>
> > > >>>>>> A. Forgot about callbacks and make the exynos_drm to defer probing until
> > > >>>>>> exynos_dsi bridge is available, probably it will cause later exynos_drm
> > > >>>>>> appearance, thus probably black screen on some targets. So for sure it
> > > >>>>>> will be suboptimal. Making it bridge unbind safe would be another
> > > >>>>>> problem, but most developers do not care about it so why should we? :)
> > > >>>>>>
> > > >>>>>> B. Try to mimic current behaviour - exynos_dsi register bridge ASAP,
> > > >>>>>> even if downstream devices are not yet attached, on attach/detach notify
> > > >>>>>> drm about it via connector status change, for this dsi_host registration
> > > >>>>>> should be performed from drm_bridge attach, I guess.
> > > >>>>>>
> > > >>>>>>
> > > >>>>>> Option A is more standard, but is unsafe and causes other issues.
> > > >>>>>>
> > > >>>>>> Option B keeps current behaviour.
> > > >>>>> Maybe we can have both, but I am not sure, if I am missing something:
> > > >>>>>
> > > >>>>> I still prefer option A for the samsung-dsim driver, because it is more
> > > >>>>> standard, simpler and avoids issues with encoders, connectors or handling
> > > >>>>> hotplug.
> > > >>>>>
> > > >>>>> The idea is to use two bridges in the exynos-dsi driver: One bridge in the
> > > >>>>> samsung-dsim driver which implements option A and defers probing of the drm
> > > >>>>> driver until the next bridge is attached. And a second bridge in the
> > > >>>>> exynos_dsi that attaches to the first bridge (thus, allowing the exynos_drm
> > > >>>>> device to appear) and implements the hotplug handling for notifying drm via
> > > >>>>> connector status change.
> > > >>>>>
> > > >>>>> The driver for the i.MX8M would use the samsung-dsim bridge without an
> > > >>>>> additional bridge.
> > > >>>>>
> > > >>>>> This allows the samsung-dsim driver to expose the standard behavior while the
> > > >>>>> exynos_dsi may stick to the existing behavior for the exynos_drm driver.
> > > >>>>>
> > > >>>>> I hope this makes sense and does not sound too crazy. It might be difficult to
> > > >>>>> get the probing and mipi host/device registration correct, but I will try, if
> > > >>>>> this can work.
> > > >>>> Adding two bridges for being able to support hotplugging adds many special
> > > >>>> cases to the bridge driver and still requires more custom API to correctly add
> > > >>>> the second bridge. I don't think that this a viable path to go.
> > > >>> Just jumping in here: You cannot hotplug/hotremove anything from a
> > > >>> drm_device after drm_dev_register has been called, except
> > > >>> drm_connector. I didn't dig into details here so not sure whether you
> > > >>> want to late-bind your bridge after drm_dev_register is called or not,
> > > >>> so might just be fyi and not relevant to the discussion.
> > > >> Thanks. AFAIC that is exactly what is currently implemented in the exynos_drm
> > > >> driver (i.e. Option B)
> > > >>
> > > >> exynos_dsi_bind configures the encoder and registers a DSI host. Afterwards,
> > > >> exynos_drm_bind (as component_master_ops) calls drm_dev_register. Later, a DSI
> > > >> device might attach to the DSI host and call exynos_dsi_host_attach. In
> > > >> exynos_dsi_host_attach, the driver finds the drm_bridge for the DSI device and
> > > >> attaches this bridge to the encoder _after_ drm_dev_register has been called.
> > > >> This is invalid behavior, right?
> > > > Definitely not supported, I don't think we have the right locks in place
> > > > to make sure this works.
> > > >
> > > > Now if your _only_ adding a drm_bridge (and not an encoder or anything
> > > > like that), and you are adding the drm_connector correctly (like a
> > > > hotplugged DP MST sink), then that would at least work from a uapi pov.
> > > > Because drm_bridge isn't exposed as an uapi object.
> > > >
> > > > But yeah, as-is, don't :-)
> > > >
> > > > The solution here is a bunch of EPROBE_DEFER handling until all your
> > > > bridges are loaded, with or without the assistance of component.c
> > > > framework. Only then call drm_dev_register.
> > >
> > > I have impression we have similar conversation already.
> > >
> > > As you stated drm_bridge and drm_panel are not exposed to userspace so
> > > there shouldn't be problem with them from uapi PoV.
> > >
> > > On the other side drm_panel or drm_bridge are not used until pipeline
> > > enters connected state (at least they were not some time ago :) ). The
> > > issue is that bridge exposes drm_connector, but as you stated (again :)
> > > ) connectors can be hotplugged, so in theory it should work. Practical
> > > tests shows that it also works, but bugs can be still there.
> > >
> > > Bunch of EPROBE_DEFER was very slow (as a result userspace timeouted and
> > > decided there is no display), and does not handle unbinding/re-binding
> > > drivers.
> >
> > Rebinding drivers should be fixed now, with a bunch of fixes in driver
> > core. If not, we need to fix this more.
> >
> > Also, EPROBE_DEFER is how this is supposed to work. If it's too slow,
> > we need to fix EPROBE_DEFER (there's ideas for pre-sorting that never
> > seem to go anywhere), not paper over it with bad architecture in
> > drivers.
>
> I've heard this argument multiple times, but it sounds more like an
> attempt to ignore the problem and hope it will fall on someone else's
> plate :-) Improvement in the probe deferral mechanism are certainly an
> option to explore, but as far as I can tell nobody has proven that this
> mechanism is or will be able to solve all problems related to probe
> ordering dependencies. I wouldn't rule out the need for different
> solutions for some of the issues.

Then build another one. But adding hotplug for stuff that is there,
and shouldn't be hotplugged, just because it's easier on driver
writers and harder on userspace isn't really a good approach.
-Daniel

> > There's also a bit the issue that most userspace handles panels in a
> > special way, and if they don't find the panel at first, that doesn't
> > work. Stuff like "which is the main screen" on laptops.
> >
> > So yeah please fix this properly.
> >
> > > >>>> This leaves us with:
> > > >>>>
> > > >>>> Option A) Standard drm_bridge behavior, which is currently implemented, but
> > > >>>> incompatible with the currently expected behavior of exynos_drm.
> > > >>>>
> > > >>>> Option B) Creating the drm device without all bridges being attached, which
> > > >>>> would work with the exynos_drm driver, but breaks for the standard drm_bridge
> > > >>>> behavior, especially, if the encoder/connector is created at the beginning of
> > > >>>> the pipeline and passed downwards when the bridges are attached.
> > > >>>>
> > > >>>> Option C) Extracting only low level register accesses into shared code, adding
> > > >>>> a custom interface and implementing the drm_bridge handling in the platform
> > > >>>> specific code.
> > > >>>>
> > > >>>> None of the options really convinces me.
>
> --
> Regards,
>
> Laurent Pinchart
Frieder Schrempf Feb. 10, 2021, 9:10 a.m. UTC | #13
On 04.02.21 18:46, Daniel Vetter wrote:
> On Thu, Feb 4, 2021 at 6:26 PM Laurent Pinchart
> <laurent.pinchart@ideasonboard.com> wrote:
>>
>> Hi Daniel,
>>
>> On Thu, Feb 04, 2021 at 06:19:22PM +0100, Daniel Vetter wrote:
>>> On Thu, Feb 4, 2021 at 5:28 PM Andrzej Hajda wrote:
>>>> W dniu 04.02.2021 o 17:05, Daniel Vetter pisze:
>>>>> On Thu, Feb 04, 2021 at 11:56:32AM +0100, Michael Tretter wrote:
>>>>>> On Thu, 04 Feb 2021 11:17:49 +0100, Daniel Vetter wrote:
>>>>>>> On Wed, Feb 3, 2021 at 9:32 PM Michael Tretter wrote:
>>>>>>>> On Mon, 01 Feb 2021 17:33:14 +0100, Michael Tretter wrote:
>>>>>>>>> On Tue, 15 Sep 2020 21:40:40 +0200, Andrzej Hajda wrote:
>>>>>>>>>> W dniu 14.09.2020 o 23:19, Andrzej Hajda pisze:
>>>>>>>>>>> On 14.09.2020 22:01, Michael Tretter wrote:
>>>>>>>>>>>> On Mon, 14 Sep 2020 14:31:19 +0200, Marek Szyprowski wrote:
>>>>>>>>>>>>> On 14.09.2020 10:29, Marek Szyprowski wrote:
>>>>>>>>>>>>>> On 11.09.2020 15:54, Michael Tretter wrote:
>>>>>>>>>>>>>>> Make the exynos_dsi driver a full drm bridge that can be found and
>>>>>>>>>>>>>>> used
>>>>>>>>>>>>>>> from other drivers.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Other drivers can only attach to the bridge, if a mipi dsi device
>>>>>>>>>>>>>>> already attached to the bridge. This allows to defer the probe of the
>>>>>>>>>>>>>>> display pipe until the downstream bridges are available, too.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Signed-off-by: Michael Tretter <m.tretter@pengutronix.de>
>>>>>>>>>>>>>> This one (and the whole series applied) still fails on Exynos boards:
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> [drm] Exynos DRM: using 11c00000.fimd device for DMA mapping
>>>>>>>>>>>>>> operations
>>>>>>>>>>>>>> exynos-drm exynos-drm: bound 11c00000.fimd (ops fimd_component_ops)
>>>>>>>>>>>>>> OF: graph: no port node found in /soc/dsi@11c80000
>>>>>>>>>>>>>> 8<--- cut here ---
>>>>>>>>>>>>>> Unable to handle kernel NULL pointer dereference at virtual address
>>>>>>>>>>>>>> 00000084
>>>>>>>>>>>>>> pgd = (ptrval)
>>>>>>>>>>>>>> [00000084] *pgd=00000000
>>>>>>>>>>>>>> Internal error: Oops: 5 [#1] PREEMPT SMP ARM
>>>>>>>>>>>>>> Modules linked in:
>>>>>>>>>>>>>> CPU: 1 PID: 1 Comm: swapper/0 Not tainted
>>>>>>>>>>>>>> 5.9.0-rc4-next-20200911-00010-g417dc70d70ec #1608
>>>>>>>>>>>>>> Hardware name: Samsung Exynos (Flattened Device Tree)
>>>>>>>>>>>>>> PC is at drm_bridge_attach+0x18/0x164
>>>>>>>>>>>>>> LR is at exynos_dsi_bind+0x88/0xa8
>>>>>>>>>>>>>> pc : [<c0628c08>]    lr : [<c064d560>]    psr: 20000013
>>>>>>>>>>>>>> sp : ef0dfca8  ip : 00000002  fp : c13190e0
>>>>>>>>>>>>>> r10: 00000000  r9 : ee46d580  r8 : c13190e0
>>>>>>>>>>>>>> r7 : ee438800  r6 : 00000018  r5 : ef253810  r4 : ef39e840
>>>>>>>>>>>>>> r3 : 00000000  r2 : 00000018  r1 : ef39e888  r0 : ef39e840
>>>>>>>>>>>>>> Flags: nzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
>>>>>>>>>>>>>> Control: 10c5387d  Table: 4000404a  DAC: 00000051
>>>>>>>>>>>>>> Process swapper/0 (pid: 1, stack limit = 0x(ptrval))
>>>>>>>>>>>>>> Stack: (0xef0dfca8 to 0xef0e0000)
>>>>>>>>>>>>>> ...
>>>>>>>>>>>>>> [<c0628c08>] (drm_bridge_attach) from [<c064d560>]
>>>>>>>>>>>>>> (exynos_dsi_bind+0x88/0xa8)
>>>>>>>>>>>>>> [<c064d560>] (exynos_dsi_bind) from [<c066a800>]
>>>>>>>>>>>>>> (component_bind_all+0xfc/0x290)
>>>>>>>>>>>>>> [<c066a800>] (component_bind_all) from [<c0649dc0>]
>>>>>>>>>>>>>> (exynos_drm_bind+0xe4/0x19c)
>>>>>>>>>>>>>> [<c0649dc0>] (exynos_drm_bind) from [<c066ad74>]
>>>>>>>>>>>>>> (try_to_bring_up_master+0x1e4/0x2c4)
>>>>>>>>>>>>>> [<c066ad74>] (try_to_bring_up_master) from [<c066b2b4>]
>>>>>>>>>>>>>> (component_master_add_with_match+0xd4/0x108)
>>>>>>>>>>>>>> [<c066b2b4>] (component_master_add_with_match) from [<c0649ae8>]
>>>>>>>>>>>>>> (exynos_drm_platform_probe+0xe4/0x110)
>>>>>>>>>>>>>> [<c0649ae8>] (exynos_drm_platform_probe) from [<c0674e6c>]
>>>>>>>>>>>>>> (platform_drv_probe+0x6c/0xa4)
>>>>>>>>>>>>>> [<c0674e6c>] (platform_drv_probe) from [<c067242c>]
>>>>>>>>>>>>>> (really_probe+0x200/0x4fc)
>>>>>>>>>>>>>> [<c067242c>] (really_probe) from [<c06728f0>]
>>>>>>>>>>>>>> (driver_probe_device+0x78/0x1fc)
>>>>>>>>>>>>>> [<c06728f0>] (driver_probe_device) from [<c0672cd8>]
>>>>>>>>>>>>>> (device_driver_attach+0x58/0x60)
>>>>>>>>>>>>>> [<c0672cd8>] (device_driver_attach) from [<c0672dbc>]
>>>>>>>>>>>>>> (__driver_attach+0xdc/0x174)
>>>>>>>>>>>>>> [<c0672dbc>] (__driver_attach) from [<c06701b4>]
>>>>>>>>>>>>>> (bus_for_each_dev+0x68/0xb4)
>>>>>>>>>>>>>> [<c06701b4>] (bus_for_each_dev) from [<c06714e8>]
>>>>>>>>>>>>>> (bus_add_driver+0x158/0x214)
>>>>>>>>>>>>>> [<c06714e8>] (bus_add_driver) from [<c0673c1c>]
>>>>>>>>>>>>>> (driver_register+0x78/0x110)
>>>>>>>>>>>>>> [<c0673c1c>] (driver_register) from [<c0649ca8>]
>>>>>>>>>>>>>> (exynos_drm_init+0xe4/0x118)
>>>>>>>>>>>>>> [<c0649ca8>] (exynos_drm_init) from [<c0102484>]
>>>>>>>>>>>>>> (do_one_initcall+0x8c/0x42c)
>>>>>>>>>>>>>> [<c0102484>] (do_one_initcall) from [<c11011c0>]
>>>>>>>>>>>>>> (kernel_init_freeable+0x190/0x1dc)
>>>>>>>>>>>>>> [<c11011c0>] (kernel_init_freeable) from [<c0af7880>]
>>>>>>>>>>>>>> (kernel_init+0x8/0x118)
>>>>>>>>>>>>>> [<c0af7880>] (kernel_init) from [<c0100114>] (ret_from_fork+0x14/0x20)
>>>>>>>>>>>>>> Exception stack(0xef0dffb0 to 0xef0dfff8)
>>>>>>>>>>>>>> ...
>>>>>>>>>>>>>> ---[ end trace ee27f313f9ed9da1 ]---
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> # arm-linux-gnueabi-addr2line -e vmlinux c0628c08
>>>>>>>>>>>>>> drivers/gpu/drm/drm_bridge.c:184 (discriminator 1)
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> I will try to debug it a bit more today.
>>>>>>>>>>>>> The above crash has been caused by lack of in_bridge initialization to
>>>>>>>>>>>>> NULL in exynos_dsi_bind() in this patch. However, fixing it reveals
>>>>>>>>>>>>> another issue:
>>>>>>>>>>>>>
>>>>>>>>>>>>> [drm] Exynos DRM: using 11c00000.fimd device for DMA mapping operations
>>>>>>>>>>>>> exynos-drm exynos-drm: bound 11c00000.fimd (ops fimd_component_ops)
>>>>>>>>>>>>> OF: graph: no port node found in /soc/dsi@11c80000
>>>>>>>>>>>>> 8<--- cut here ---
>>>>>>>>>>>>> Unable to handle kernel NULL pointer dereference at virtual address
>>>>>>>>>>>>> 00000280
>>>>>>>>>>>>> pgd = (ptrval)
>>>>>>>>>>>>> [00000280] *pgd=00000000
>>>>>>>>>>>>> Internal error: Oops: 5 [#1] PREEMPT SMP ARM
>>>>>>>>>>>>> Modules linked in:
>>>>>>>>>>>>> CPU: 0 PID: 1 Comm: swapper/0 Not tainted
>>>>>>>>>>>>> 5.9.0-rc4-next-20200911-00010-g417dc70d70ec-dirty #1613
>>>>>>>>>>>>> Hardware name: Samsung Exynos (Flattened Device Tree)
>>>>>>>>>>>>> PC is at __mutex_lock+0x54/0xb18
>>>>>>>>>>>>> LR is at lock_is_held_type+0x80/0x138
>>>>>>>>>>>>> pc : [<c0afc920>]    lr : [<c0af63e8>]    psr: 60000013
>>>>>>>>>>>>> sp : ef0dfd30  ip : 33937b74  fp : c13193c8
>>>>>>>>>>>>> r10: c1208eec  r9 : 00000000  r8 : ee45f808
>>>>>>>>>>>>> r7 : c19561a4  r6 : 00000000  r5 : 00000000  r4 : 0000024c
>>>>>>>>>>>>> r3 : 00000000  r2 : 00204140  r1 : c124f13c  r0 : 00000000
>>>>>>>>>>>>> Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
>>>>>>>>>>>>> Control: 10c5387d  Table: 4000404a  DAC: 00000051
>>>>>>>>>>>>> Process swapper/0 (pid: 1, stack limit = 0x(ptrval))
>>>>>>>>>>>>> Stack: (0xef0dfd30 to 0xef0e0000)
>>>>>>>>>>>>> ...
>>>>>>>>>>>>> [<c0afc920>] (__mutex_lock) from [<c0afd400>]
>>>>>>>>>>>>> (mutex_lock_nested+0x1c/0x24)
>>>>>>>>>>>>> [<c0afd400>] (mutex_lock_nested) from [<c064d4b8>]
>>>>>>>>>>>>> (__exynos_dsi_host_attach+0x20/0x6c)
>>>>>>>>>>>>> [<c064d4b8>] (__exynos_dsi_host_attach) from [<c064d914>]
>>>>>>>>>>>>> (exynos_dsi_host_attach+0x70/0x194)
>>>>>>>>>>>>> [<c064d914>] (exynos_dsi_host_attach) from [<c0656b64>]
>>>>>>>>>>>>> (s6e8aa0_probe+0x1b0/0x218)
>>>>>>>>>>>>> [<c0656b64>] (s6e8aa0_probe) from [<c0672530>]
>>>>>>>>>>>>> (really_probe+0x200/0x4fc)
>>>>>>>>>>>>> [<c0672530>] (really_probe) from [<c06729f4>]
>>>>>>>>>>>>> (driver_probe_device+0x78/0x1fc)
>>>>>>>>>>>>> [<c06729f4>] (driver_probe_device) from [<c0672ddc>]
>>>>>>>>>>>>> (device_driver_attach+0x58/0x60)
>>>>>>>>>>>>> [<c0672ddc>] (device_driver_attach) from [<c0672ec0>]
>>>>>>>>>>>>> (__driver_attach+0xdc/0x174)
>>>>>>>>>>>>> [<c0672ec0>] (__driver_attach) from [<c06702b8>]
>>>>>>>>>>>>> (bus_for_each_dev+0x68/0xb4)
>>>>>>>>>>>>> [<c06702b8>] (bus_for_each_dev) from [<c06715ec>]
>>>>>>>>>>>>> (bus_add_driver+0x158/0x214)
>>>>>>>>>>>>> [<c06715ec>] (bus_add_driver) from [<c0673d20>]
>>>>>>>>>>>>> (driver_register+0x78/0x110)
>>>>>>>>>>>>> [<c0673d20>] (driver_register) from [<c0102484>]
>>>>>>>>>>>>> (do_one_initcall+0x8c/0x42c)
>>>>>>>>>>>>> [<c0102484>] (do_one_initcall) from [<c11011c0>]
>>>>>>>>>>>>> (kernel_init_freeable+0x190/0x1dc)
>>>>>>>>>>>>> [<c11011c0>] (kernel_init_freeable) from [<c0af7988>]
>>>>>>>>>>>>> (kernel_init+0x8/0x118)
>>>>>>>>>>>>> [<c0af7988>] (kernel_init) from [<c0100114>] (ret_from_fork+0x14/0x20)
>>>>>>>>>>>>> Exception stack(0xef0dffb0 to 0xef0dfff8)
>>>>>>>>>>>>> ...
>>>>>>>>>>>>> ---[ end trace c06e996ec2e8234d ]---
>>>>>>>>>>>>>
>>>>>>>>>>>>> This means that dsi->encoder.dev is not initialized in
>>>>>>>>>>>>> __exynos_dsi_host_attach().
>>>>>>>>>>>>>
>>>>>>>>>>>>> This happens, because drm_bridge_attach() in exynos_dsi_bind() returned
>>>>>>>>>>>>> earlier -517 (deferred probe), what causes cleanup of encoder and
>>>>>>>>>>>>> release of all drm resources.
>>>>>>>>>>>>>
>>>>>>>>>>>>> Then however, the panel tries to register itself and
>>>>>>>>>>>>> exynos_dsi_host_attach() tries to access the released encoder (which is
>>>>>>>>>>>>> zeroed in drm_encoder_release) and rest of resources, what causes
>>>>>>>>>>>>> failure.
>>>>>>>>>>>>>
>>>>>>>>>>>>> It looks that something is missing. Maybe mipi host has to be
>>>>>>>>>>>>> registered
>>>>>>>>>>>>> later, when bridge is ready? I have no idea how it is handled before
>>>>>>>>>>>>> this patch. Andrzej, could you comment it a bit?
>>>>>>>>>>>> I intentionally changed the order, because if another bridge follows
>>>>>>>>>>>> in the
>>>>>>>>>>>> pipeline, the probe of the drm driver has to be deferred until some
>>>>>>>>>>>> bridge
>>>>>>>>>>>> provides a connector. The next bridge registers itself via the
>>>>>>>>>>>> host_attach
>>>>>>>>>>>> function and the deferral is ensured via the bind for the bind/unbind
>>>>>>>>>>>> API or
>>>>>>>>>>>> the bridge_attach function otherwise.
>>>>>>>>>>>>
>>>>>>>>>>>> On the other hand, the bridge does not have an encoder until the mipi
>>>>>>>>>>>> device
>>>>>>>>>>>> has been attached.
>>>>>>>>>>>>
>>>>>>>>>>>> As a solution, the exynos dsi driver must initialize the encoder in
>>>>>>>>>>>> exynos_dsi_probe instead of in exynos_dsi_bind and access the encoder
>>>>>>>>>>>> via
>>>>>>>>>>>> exynos_dsi instead of the bridge.
>>>>>>>>>>>>
>>>>>>>>>>>> Can you try to move everything except samsung_dsim_bind from
>>>>>>>>>>>> exynos_dsi_bind
>>>>>>>>>>>> to exynos_dsi_probe (respectively for unbind) and report if it fixes the
>>>>>>>>>>>> crash.
>>>>>>>>>>>
>>>>>>>>>>> The original behaviour is that encoder (exynos_dsi) is registered
>>>>>>>>>>> regardless of sink presence (initially panel, later also bridge) - it
>>>>>>>>>>> avoids multiple issues with deferred probe, device driver bind/unbind
>>>>>>>>>>> and module load/unload. Appearance or disappearance of sink is
>>>>>>>>>>> reported to host nicely via DSI attach/detach callbacks - and it is
>>>>>>>>>>> reflected in drm world as change state of the connector.
>>>>>>>>>>>
>>>>>>>>>>> Registering DSI host in bind and unregistering in unbind assures that
>>>>>>>>>>> if mipi_dsi device is attached/detached the drm device is always
>>>>>>>>>>> present - it makes device/driver binding race free and allows to avoid
>>>>>>>>>>> additional locking.
>>>>>>>>>>>
>>>>>>>>>>> Moving DSI host registration to probe changes everything, for sure it
>>>>>>>>>>> breaks the nice feature of DSI attach/detach callbacks and apparently
>>>>>>>>>>> can cause different issues depending on device bind order.
>>>>>>>>>>>
>>>>>>>>>>> I will try to look at the patches tomorrow and maybe I can find more
>>>>>>>>>>> constructive comments :)
>>>>>>>>>>
>>>>>>>>>> As I said yesterday, exynos_dsi driver uses dsi host attach/detach
>>>>>>>>>> callbacks to control appearance/disappearance of downstream device. It
>>>>>>>>>> allows to:
>>>>>>>>>>
>>>>>>>>>> 1. Safely bind/unbind different device drivers at any time and at any
>>>>>>>>>> order, without killing exynos_drm and/or crashing system.
>>>>>>>>>>
>>>>>>>>>> 2. Avoid issues with late drm init - on some platforms exynos_drm device
>>>>>>>>>> appeared too late, due to deferred probe, and resulted in black screen
>>>>>>>>>> in userspace.
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> Now if we want to convert exynos_dsi to drm_bridge I see following options:
>>>>>>>>>>
>>>>>>>>>> A. Forgot about callbacks and make the exynos_drm to defer probing until
>>>>>>>>>> exynos_dsi bridge is available, probably it will cause later exynos_drm
>>>>>>>>>> appearance, thus probably black screen on some targets. So for sure it
>>>>>>>>>> will be suboptimal. Making it bridge unbind safe would be another
>>>>>>>>>> problem, but most developers do not care about it so why should we? :)
>>>>>>>>>>
>>>>>>>>>> B. Try to mimic current behaviour - exynos_dsi register bridge ASAP,
>>>>>>>>>> even if downstream devices are not yet attached, on attach/detach notify
>>>>>>>>>> drm about it via connector status change, for this dsi_host registration
>>>>>>>>>> should be performed from drm_bridge attach, I guess.
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> Option A is more standard, but is unsafe and causes other issues.
>>>>>>>>>>
>>>>>>>>>> Option B keeps current behaviour.
>>>>>>>>> Maybe we can have both, but I am not sure, if I am missing something:
>>>>>>>>>
>>>>>>>>> I still prefer option A for the samsung-dsim driver, because it is more
>>>>>>>>> standard, simpler and avoids issues with encoders, connectors or handling
>>>>>>>>> hotplug.
>>>>>>>>>
>>>>>>>>> The idea is to use two bridges in the exynos-dsi driver: One bridge in the
>>>>>>>>> samsung-dsim driver which implements option A and defers probing of the drm
>>>>>>>>> driver until the next bridge is attached. And a second bridge in the
>>>>>>>>> exynos_dsi that attaches to the first bridge (thus, allowing the exynos_drm
>>>>>>>>> device to appear) and implements the hotplug handling for notifying drm via
>>>>>>>>> connector status change.
>>>>>>>>>
>>>>>>>>> The driver for the i.MX8M would use the samsung-dsim bridge without an
>>>>>>>>> additional bridge.
>>>>>>>>>
>>>>>>>>> This allows the samsung-dsim driver to expose the standard behavior while the
>>>>>>>>> exynos_dsi may stick to the existing behavior for the exynos_drm driver.
>>>>>>>>>
>>>>>>>>> I hope this makes sense and does not sound too crazy. It might be difficult to
>>>>>>>>> get the probing and mipi host/device registration correct, but I will try, if
>>>>>>>>> this can work.
>>>>>>>> Adding two bridges for being able to support hotplugging adds many special
>>>>>>>> cases to the bridge driver and still requires more custom API to correctly add
>>>>>>>> the second bridge. I don't think that this a viable path to go.
>>>>>>> Just jumping in here: You cannot hotplug/hotremove anything from a
>>>>>>> drm_device after drm_dev_register has been called, except
>>>>>>> drm_connector. I didn't dig into details here so not sure whether you
>>>>>>> want to late-bind your bridge after drm_dev_register is called or not,
>>>>>>> so might just be fyi and not relevant to the discussion.
>>>>>> Thanks. AFAIC that is exactly what is currently implemented in the exynos_drm
>>>>>> driver (i.e. Option B)
>>>>>>
>>>>>> exynos_dsi_bind configures the encoder and registers a DSI host. Afterwards,
>>>>>> exynos_drm_bind (as component_master_ops) calls drm_dev_register. Later, a DSI
>>>>>> device might attach to the DSI host and call exynos_dsi_host_attach. In
>>>>>> exynos_dsi_host_attach, the driver finds the drm_bridge for the DSI device and
>>>>>> attaches this bridge to the encoder _after_ drm_dev_register has been called.
>>>>>> This is invalid behavior, right?
>>>>> Definitely not supported, I don't think we have the right locks in place
>>>>> to make sure this works.
>>>>>
>>>>> Now if your _only_ adding a drm_bridge (and not an encoder or anything
>>>>> like that), and you are adding the drm_connector correctly (like a
>>>>> hotplugged DP MST sink), then that would at least work from a uapi pov.
>>>>> Because drm_bridge isn't exposed as an uapi object.
>>>>>
>>>>> But yeah, as-is, don't :-)
>>>>>
>>>>> The solution here is a bunch of EPROBE_DEFER handling until all your
>>>>> bridges are loaded, with or without the assistance of component.c
>>>>> framework. Only then call drm_dev_register.
>>>>
>>>> I have impression we have similar conversation already.
>>>>
>>>> As you stated drm_bridge and drm_panel are not exposed to userspace so
>>>> there shouldn't be problem with them from uapi PoV.
>>>>
>>>> On the other side drm_panel or drm_bridge are not used until pipeline
>>>> enters connected state (at least they were not some time ago :) ). The
>>>> issue is that bridge exposes drm_connector, but as you stated (again :)
>>>> ) connectors can be hotplugged, so in theory it should work. Practical
>>>> tests shows that it also works, but bugs can be still there.
>>>>
>>>> Bunch of EPROBE_DEFER was very slow (as a result userspace timeouted and
>>>> decided there is no display), and does not handle unbinding/re-binding
>>>> drivers.
>>>
>>> Rebinding drivers should be fixed now, with a bunch of fixes in driver
>>> core. If not, we need to fix this more.
>>>
>>> Also, EPROBE_DEFER is how this is supposed to work. If it's too slow,
>>> we need to fix EPROBE_DEFER (there's ideas for pre-sorting that never
>>> seem to go anywhere), not paper over it with bad architecture in
>>> drivers.
>>
>> I've heard this argument multiple times, but it sounds more like an
>> attempt to ignore the problem and hope it will fall on someone else's
>> plate :-) Improvement in the probe deferral mechanism are certainly an
>> option to explore, but as far as I can tell nobody has proven that this
>> mechanism is or will be able to solve all problems related to probe
>> ordering dependencies. I wouldn't rule out the need for different
>> solutions for some of the issues.
> 
> Then build another one. But adding hotplug for stuff that is there,
> and shouldn't be hotplugged, just because it's easier on driver
> writers and harder on userspace isn't really a good approach.
> -Daniel

I think it is quite clear that replacing or reworking the deferral 
mechanism is out of scope for this discussion, which is why I would like 
to come back to the original issue and sum this up as far as I 
understand it (which is not really far when it comes to the details):

We have the existing exynos driver that avoids the standard deferral 
mechanism in favor of something that works but Daniel describes as 
"definitely not supported".

We have a proposal from Michael for converting the driver to the 
standard drm_bridge behavior and more work from Michael and Marek based 
on this to implement the platform specific parts for i.MX8MM.

 From the i.MX8MM POV this approach already received some testing and 
looks good as far as I can judge. Upstreaming this solution is blocked 
because of objections from the Samsung maintainers.

Sorry if I'm being blunt or naive, but where to go from here?
Michael Tretter Feb. 18, 2021, 8:04 a.m. UTC | #14
On Wed, 10 Feb 2021 10:10:37 +0100, Frieder Schrempf wrote:
> On 04.02.21 18:46, Daniel Vetter wrote:
> > On Thu, Feb 4, 2021 at 6:26 PM Laurent Pinchart <laurent.pinchart@ideasonboard.com> wrote:
> > > On Thu, Feb 04, 2021 at 06:19:22PM +0100, Daniel Vetter wrote:
> > > > On Thu, Feb 4, 2021 at 5:28 PM Andrzej Hajda wrote:
> > > > > W dniu 04.02.2021 o 17:05, Daniel Vetter pisze:
> > > > > > On Thu, Feb 04, 2021 at 11:56:32AM +0100, Michael Tretter wrote:
> > > > > > > On Thu, 04 Feb 2021 11:17:49 +0100, Daniel Vetter wrote:
> > > > > > > > On Wed, Feb 3, 2021 at 9:32 PM Michael Tretter wrote:
> > > > > > > > > On Mon, 01 Feb 2021 17:33:14 +0100, Michael Tretter wrote:
> > > > > > > > > > On Tue, 15 Sep 2020 21:40:40 +0200, Andrzej Hajda wrote:
> > > > > > > > > > > W dniu 14.09.2020 o 23:19, Andrzej Hajda pisze:
> > > > > > > > > > > > On 14.09.2020 22:01, Michael Tretter wrote:
> > > > > > > > > > > > > On Mon, 14 Sep 2020 14:31:19 +0200, Marek Szyprowski wrote:
> > > > > > > > > > > > > > On 14.09.2020 10:29, Marek Szyprowski wrote:
> > > > > > > > > > > > > > > On 11.09.2020 15:54, Michael Tretter wrote:
> > > > > > > > > > > > > > > > Make the exynos_dsi driver a full drm bridge that can be found and
> > > > > > > > > > > > > > > > used
> > > > > > > > > > > > > > > > from other drivers.
> > > > > > > > > > > > > > > > 
> > > > > > > > > > > > > > > > Other drivers can only attach to the bridge, if a mipi dsi device
> > > > > > > > > > > > > > > > already attached to the bridge. This allows to defer the probe of the
> > > > > > > > > > > > > > > > display pipe until the downstream bridges are available, too.
> > > > > > > > > > > > > > > > 
> > > > > > > > > > > > > > > > Signed-off-by: Michael Tretter <m.tretter@pengutronix.de>
> > > > > > > > > > > > > > > This one (and the whole series applied) still fails on Exynos boards:
> > > > > > > > > > > > > > > 
> > > > > > > > > > > > > > > [drm] Exynos DRM: using 11c00000.fimd device for DMA mapping
> > > > > > > > > > > > > > > operations
> > > > > > > > > > > > > > > exynos-drm exynos-drm: bound 11c00000.fimd (ops fimd_component_ops)
> > > > > > > > > > > > > > > OF: graph: no port node found in /soc/dsi@11c80000
> > > > > > > > > > > > > > > 8<--- cut here ---
> > > > > > > > > > > > > > > Unable to handle kernel NULL pointer dereference at virtual address
> > > > > > > > > > > > > > > 00000084
> > > > > > > > > > > > > > > pgd = (ptrval)
> > > > > > > > > > > > > > > [00000084] *pgd=00000000
> > > > > > > > > > > > > > > Internal error: Oops: 5 [#1] PREEMPT SMP ARM
> > > > > > > > > > > > > > > Modules linked in:
> > > > > > > > > > > > > > > CPU: 1 PID: 1 Comm: swapper/0 Not tainted
> > > > > > > > > > > > > > > 5.9.0-rc4-next-20200911-00010-g417dc70d70ec #1608
> > > > > > > > > > > > > > > Hardware name: Samsung Exynos (Flattened Device Tree)
> > > > > > > > > > > > > > > PC is at drm_bridge_attach+0x18/0x164
> > > > > > > > > > > > > > > LR is at exynos_dsi_bind+0x88/0xa8
> > > > > > > > > > > > > > > pc : [<c0628c08>]    lr : [<c064d560>]    psr: 20000013
> > > > > > > > > > > > > > > sp : ef0dfca8  ip : 00000002  fp : c13190e0
> > > > > > > > > > > > > > > r10: 00000000  r9 : ee46d580  r8 : c13190e0
> > > > > > > > > > > > > > > r7 : ee438800  r6 : 00000018  r5 : ef253810  r4 : ef39e840
> > > > > > > > > > > > > > > r3 : 00000000  r2 : 00000018  r1 : ef39e888  r0 : ef39e840
> > > > > > > > > > > > > > > Flags: nzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
> > > > > > > > > > > > > > > Control: 10c5387d  Table: 4000404a  DAC: 00000051
> > > > > > > > > > > > > > > Process swapper/0 (pid: 1, stack limit = 0x(ptrval))
> > > > > > > > > > > > > > > Stack: (0xef0dfca8 to 0xef0e0000)
> > > > > > > > > > > > > > > ...
> > > > > > > > > > > > > > > [<c0628c08>] (drm_bridge_attach) from [<c064d560>]
> > > > > > > > > > > > > > > (exynos_dsi_bind+0x88/0xa8)
> > > > > > > > > > > > > > > [<c064d560>] (exynos_dsi_bind) from [<c066a800>]
> > > > > > > > > > > > > > > (component_bind_all+0xfc/0x290)
> > > > > > > > > > > > > > > [<c066a800>] (component_bind_all) from [<c0649dc0>]
> > > > > > > > > > > > > > > (exynos_drm_bind+0xe4/0x19c)
> > > > > > > > > > > > > > > [<c0649dc0>] (exynos_drm_bind) from [<c066ad74>]
> > > > > > > > > > > > > > > (try_to_bring_up_master+0x1e4/0x2c4)
> > > > > > > > > > > > > > > [<c066ad74>] (try_to_bring_up_master) from [<c066b2b4>]
> > > > > > > > > > > > > > > (component_master_add_with_match+0xd4/0x108)
> > > > > > > > > > > > > > > [<c066b2b4>] (component_master_add_with_match) from [<c0649ae8>]
> > > > > > > > > > > > > > > (exynos_drm_platform_probe+0xe4/0x110)
> > > > > > > > > > > > > > > [<c0649ae8>] (exynos_drm_platform_probe) from [<c0674e6c>]
> > > > > > > > > > > > > > > (platform_drv_probe+0x6c/0xa4)
> > > > > > > > > > > > > > > [<c0674e6c>] (platform_drv_probe) from [<c067242c>]
> > > > > > > > > > > > > > > (really_probe+0x200/0x4fc)
> > > > > > > > > > > > > > > [<c067242c>] (really_probe) from [<c06728f0>]
> > > > > > > > > > > > > > > (driver_probe_device+0x78/0x1fc)
> > > > > > > > > > > > > > > [<c06728f0>] (driver_probe_device) from [<c0672cd8>]
> > > > > > > > > > > > > > > (device_driver_attach+0x58/0x60)
> > > > > > > > > > > > > > > [<c0672cd8>] (device_driver_attach) from [<c0672dbc>]
> > > > > > > > > > > > > > > (__driver_attach+0xdc/0x174)
> > > > > > > > > > > > > > > [<c0672dbc>] (__driver_attach) from [<c06701b4>]
> > > > > > > > > > > > > > > (bus_for_each_dev+0x68/0xb4)
> > > > > > > > > > > > > > > [<c06701b4>] (bus_for_each_dev) from [<c06714e8>]
> > > > > > > > > > > > > > > (bus_add_driver+0x158/0x214)
> > > > > > > > > > > > > > > [<c06714e8>] (bus_add_driver) from [<c0673c1c>]
> > > > > > > > > > > > > > > (driver_register+0x78/0x110)
> > > > > > > > > > > > > > > [<c0673c1c>] (driver_register) from [<c0649ca8>]
> > > > > > > > > > > > > > > (exynos_drm_init+0xe4/0x118)
> > > > > > > > > > > > > > > [<c0649ca8>] (exynos_drm_init) from [<c0102484>]
> > > > > > > > > > > > > > > (do_one_initcall+0x8c/0x42c)
> > > > > > > > > > > > > > > [<c0102484>] (do_one_initcall) from [<c11011c0>]
> > > > > > > > > > > > > > > (kernel_init_freeable+0x190/0x1dc)
> > > > > > > > > > > > > > > [<c11011c0>] (kernel_init_freeable) from [<c0af7880>]
> > > > > > > > > > > > > > > (kernel_init+0x8/0x118)
> > > > > > > > > > > > > > > [<c0af7880>] (kernel_init) from [<c0100114>] (ret_from_fork+0x14/0x20)
> > > > > > > > > > > > > > > Exception stack(0xef0dffb0 to 0xef0dfff8)
> > > > > > > > > > > > > > > ...
> > > > > > > > > > > > > > > ---[ end trace ee27f313f9ed9da1 ]---
> > > > > > > > > > > > > > > 
> > > > > > > > > > > > > > > # arm-linux-gnueabi-addr2line -e vmlinux c0628c08
> > > > > > > > > > > > > > > drivers/gpu/drm/drm_bridge.c:184 (discriminator 1)
> > > > > > > > > > > > > > > 
> > > > > > > > > > > > > > > I will try to debug it a bit more today.
> > > > > > > > > > > > > > The above crash has been caused by lack of in_bridge initialization to
> > > > > > > > > > > > > > NULL in exynos_dsi_bind() in this patch. However, fixing it reveals
> > > > > > > > > > > > > > another issue:
> > > > > > > > > > > > > > 
> > > > > > > > > > > > > > [drm] Exynos DRM: using 11c00000.fimd device for DMA mapping operations
> > > > > > > > > > > > > > exynos-drm exynos-drm: bound 11c00000.fimd (ops fimd_component_ops)
> > > > > > > > > > > > > > OF: graph: no port node found in /soc/dsi@11c80000
> > > > > > > > > > > > > > 8<--- cut here ---
> > > > > > > > > > > > > > Unable to handle kernel NULL pointer dereference at virtual address
> > > > > > > > > > > > > > 00000280
> > > > > > > > > > > > > > pgd = (ptrval)
> > > > > > > > > > > > > > [00000280] *pgd=00000000
> > > > > > > > > > > > > > Internal error: Oops: 5 [#1] PREEMPT SMP ARM
> > > > > > > > > > > > > > Modules linked in:
> > > > > > > > > > > > > > CPU: 0 PID: 1 Comm: swapper/0 Not tainted
> > > > > > > > > > > > > > 5.9.0-rc4-next-20200911-00010-g417dc70d70ec-dirty #1613
> > > > > > > > > > > > > > Hardware name: Samsung Exynos (Flattened Device Tree)
> > > > > > > > > > > > > > PC is at __mutex_lock+0x54/0xb18
> > > > > > > > > > > > > > LR is at lock_is_held_type+0x80/0x138
> > > > > > > > > > > > > > pc : [<c0afc920>]    lr : [<c0af63e8>]    psr: 60000013
> > > > > > > > > > > > > > sp : ef0dfd30  ip : 33937b74  fp : c13193c8
> > > > > > > > > > > > > > r10: c1208eec  r9 : 00000000  r8 : ee45f808
> > > > > > > > > > > > > > r7 : c19561a4  r6 : 00000000  r5 : 00000000  r4 : 0000024c
> > > > > > > > > > > > > > r3 : 00000000  r2 : 00204140  r1 : c124f13c  r0 : 00000000
> > > > > > > > > > > > > > Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
> > > > > > > > > > > > > > Control: 10c5387d  Table: 4000404a  DAC: 00000051
> > > > > > > > > > > > > > Process swapper/0 (pid: 1, stack limit = 0x(ptrval))
> > > > > > > > > > > > > > Stack: (0xef0dfd30 to 0xef0e0000)
> > > > > > > > > > > > > > ...
> > > > > > > > > > > > > > [<c0afc920>] (__mutex_lock) from [<c0afd400>]
> > > > > > > > > > > > > > (mutex_lock_nested+0x1c/0x24)
> > > > > > > > > > > > > > [<c0afd400>] (mutex_lock_nested) from [<c064d4b8>]
> > > > > > > > > > > > > > (__exynos_dsi_host_attach+0x20/0x6c)
> > > > > > > > > > > > > > [<c064d4b8>] (__exynos_dsi_host_attach) from [<c064d914>]
> > > > > > > > > > > > > > (exynos_dsi_host_attach+0x70/0x194)
> > > > > > > > > > > > > > [<c064d914>] (exynos_dsi_host_attach) from [<c0656b64>]
> > > > > > > > > > > > > > (s6e8aa0_probe+0x1b0/0x218)
> > > > > > > > > > > > > > [<c0656b64>] (s6e8aa0_probe) from [<c0672530>]
> > > > > > > > > > > > > > (really_probe+0x200/0x4fc)
> > > > > > > > > > > > > > [<c0672530>] (really_probe) from [<c06729f4>]
> > > > > > > > > > > > > > (driver_probe_device+0x78/0x1fc)
> > > > > > > > > > > > > > [<c06729f4>] (driver_probe_device) from [<c0672ddc>]
> > > > > > > > > > > > > > (device_driver_attach+0x58/0x60)
> > > > > > > > > > > > > > [<c0672ddc>] (device_driver_attach) from [<c0672ec0>]
> > > > > > > > > > > > > > (__driver_attach+0xdc/0x174)
> > > > > > > > > > > > > > [<c0672ec0>] (__driver_attach) from [<c06702b8>]
> > > > > > > > > > > > > > (bus_for_each_dev+0x68/0xb4)
> > > > > > > > > > > > > > [<c06702b8>] (bus_for_each_dev) from [<c06715ec>]
> > > > > > > > > > > > > > (bus_add_driver+0x158/0x214)
> > > > > > > > > > > > > > [<c06715ec>] (bus_add_driver) from [<c0673d20>]
> > > > > > > > > > > > > > (driver_register+0x78/0x110)
> > > > > > > > > > > > > > [<c0673d20>] (driver_register) from [<c0102484>]
> > > > > > > > > > > > > > (do_one_initcall+0x8c/0x42c)
> > > > > > > > > > > > > > [<c0102484>] (do_one_initcall) from [<c11011c0>]
> > > > > > > > > > > > > > (kernel_init_freeable+0x190/0x1dc)
> > > > > > > > > > > > > > [<c11011c0>] (kernel_init_freeable) from [<c0af7988>]
> > > > > > > > > > > > > > (kernel_init+0x8/0x118)
> > > > > > > > > > > > > > [<c0af7988>] (kernel_init) from [<c0100114>] (ret_from_fork+0x14/0x20)
> > > > > > > > > > > > > > Exception stack(0xef0dffb0 to 0xef0dfff8)
> > > > > > > > > > > > > > ...
> > > > > > > > > > > > > > ---[ end trace c06e996ec2e8234d ]---
> > > > > > > > > > > > > > 
> > > > > > > > > > > > > > This means that dsi->encoder.dev is not initialized in
> > > > > > > > > > > > > > __exynos_dsi_host_attach().
> > > > > > > > > > > > > > 
> > > > > > > > > > > > > > This happens, because drm_bridge_attach() in exynos_dsi_bind() returned
> > > > > > > > > > > > > > earlier -517 (deferred probe), what causes cleanup of encoder and
> > > > > > > > > > > > > > release of all drm resources.
> > > > > > > > > > > > > > 
> > > > > > > > > > > > > > Then however, the panel tries to register itself and
> > > > > > > > > > > > > > exynos_dsi_host_attach() tries to access the released encoder (which is
> > > > > > > > > > > > > > zeroed in drm_encoder_release) and rest of resources, what causes
> > > > > > > > > > > > > > failure.
> > > > > > > > > > > > > > 
> > > > > > > > > > > > > > It looks that something is missing. Maybe mipi host has to be
> > > > > > > > > > > > > > registered
> > > > > > > > > > > > > > later, when bridge is ready? I have no idea how it is handled before
> > > > > > > > > > > > > > this patch. Andrzej, could you comment it a bit?
> > > > > > > > > > > > > I intentionally changed the order, because if another bridge follows
> > > > > > > > > > > > > in the
> > > > > > > > > > > > > pipeline, the probe of the drm driver has to be deferred until some
> > > > > > > > > > > > > bridge
> > > > > > > > > > > > > provides a connector. The next bridge registers itself via the
> > > > > > > > > > > > > host_attach
> > > > > > > > > > > > > function and the deferral is ensured via the bind for the bind/unbind
> > > > > > > > > > > > > API or
> > > > > > > > > > > > > the bridge_attach function otherwise.
> > > > > > > > > > > > > 
> > > > > > > > > > > > > On the other hand, the bridge does not have an encoder until the mipi
> > > > > > > > > > > > > device
> > > > > > > > > > > > > has been attached.
> > > > > > > > > > > > > 
> > > > > > > > > > > > > As a solution, the exynos dsi driver must initialize the encoder in
> > > > > > > > > > > > > exynos_dsi_probe instead of in exynos_dsi_bind and access the encoder
> > > > > > > > > > > > > via
> > > > > > > > > > > > > exynos_dsi instead of the bridge.
> > > > > > > > > > > > > 
> > > > > > > > > > > > > Can you try to move everything except samsung_dsim_bind from
> > > > > > > > > > > > > exynos_dsi_bind
> > > > > > > > > > > > > to exynos_dsi_probe (respectively for unbind) and report if it fixes the
> > > > > > > > > > > > > crash.
> > > > > > > > > > > > 
> > > > > > > > > > > > The original behaviour is that encoder (exynos_dsi) is registered
> > > > > > > > > > > > regardless of sink presence (initially panel, later also bridge) - it
> > > > > > > > > > > > avoids multiple issues with deferred probe, device driver bind/unbind
> > > > > > > > > > > > and module load/unload. Appearance or disappearance of sink is
> > > > > > > > > > > > reported to host nicely via DSI attach/detach callbacks - and it is
> > > > > > > > > > > > reflected in drm world as change state of the connector.
> > > > > > > > > > > > 
> > > > > > > > > > > > Registering DSI host in bind and unregistering in unbind assures that
> > > > > > > > > > > > if mipi_dsi device is attached/detached the drm device is always
> > > > > > > > > > > > present - it makes device/driver binding race free and allows to avoid
> > > > > > > > > > > > additional locking.
> > > > > > > > > > > > 
> > > > > > > > > > > > Moving DSI host registration to probe changes everything, for sure it
> > > > > > > > > > > > breaks the nice feature of DSI attach/detach callbacks and apparently
> > > > > > > > > > > > can cause different issues depending on device bind order.
> > > > > > > > > > > > 
> > > > > > > > > > > > I will try to look at the patches tomorrow and maybe I can find more
> > > > > > > > > > > > constructive comments :)
> > > > > > > > > > > 
> > > > > > > > > > > As I said yesterday, exynos_dsi driver uses dsi host attach/detach
> > > > > > > > > > > callbacks to control appearance/disappearance of downstream device. It
> > > > > > > > > > > allows to:
> > > > > > > > > > > 
> > > > > > > > > > > 1. Safely bind/unbind different device drivers at any time and at any
> > > > > > > > > > > order, without killing exynos_drm and/or crashing system.
> > > > > > > > > > > 
> > > > > > > > > > > 2. Avoid issues with late drm init - on some platforms exynos_drm device
> > > > > > > > > > > appeared too late, due to deferred probe, and resulted in black screen
> > > > > > > > > > > in userspace.
> > > > > > > > > > > 
> > > > > > > > > > > 
> > > > > > > > > > > Now if we want to convert exynos_dsi to drm_bridge I see following options:
> > > > > > > > > > > 
> > > > > > > > > > > A. Forgot about callbacks and make the exynos_drm to defer probing until
> > > > > > > > > > > exynos_dsi bridge is available, probably it will cause later exynos_drm
> > > > > > > > > > > appearance, thus probably black screen on some targets. So for sure it
> > > > > > > > > > > will be suboptimal. Making it bridge unbind safe would be another
> > > > > > > > > > > problem, but most developers do not care about it so why should we? :)
> > > > > > > > > > > 
> > > > > > > > > > > B. Try to mimic current behaviour - exynos_dsi register bridge ASAP,
> > > > > > > > > > > even if downstream devices are not yet attached, on attach/detach notify
> > > > > > > > > > > drm about it via connector status change, for this dsi_host registration
> > > > > > > > > > > should be performed from drm_bridge attach, I guess.
> > > > > > > > > > > 
> > > > > > > > > > > 
> > > > > > > > > > > Option A is more standard, but is unsafe and causes other issues.
> > > > > > > > > > > 
> > > > > > > > > > > Option B keeps current behaviour.
> > > > > > > > > > Maybe we can have both, but I am not sure, if I am missing something:
> > > > > > > > > > 
> > > > > > > > > > I still prefer option A for the samsung-dsim driver, because it is more
> > > > > > > > > > standard, simpler and avoids issues with encoders, connectors or handling
> > > > > > > > > > hotplug.
> > > > > > > > > > 
> > > > > > > > > > The idea is to use two bridges in the exynos-dsi driver: One bridge in the
> > > > > > > > > > samsung-dsim driver which implements option A and defers probing of the drm
> > > > > > > > > > driver until the next bridge is attached. And a second bridge in the
> > > > > > > > > > exynos_dsi that attaches to the first bridge (thus, allowing the exynos_drm
> > > > > > > > > > device to appear) and implements the hotplug handling for notifying drm via
> > > > > > > > > > connector status change.
> > > > > > > > > > 
> > > > > > > > > > The driver for the i.MX8M would use the samsung-dsim bridge without an
> > > > > > > > > > additional bridge.
> > > > > > > > > > 
> > > > > > > > > > This allows the samsung-dsim driver to expose the standard behavior while the
> > > > > > > > > > exynos_dsi may stick to the existing behavior for the exynos_drm driver.
> > > > > > > > > > 
> > > > > > > > > > I hope this makes sense and does not sound too crazy. It might be difficult to
> > > > > > > > > > get the probing and mipi host/device registration correct, but I will try, if
> > > > > > > > > > this can work.
> > > > > > > > > Adding two bridges for being able to support hotplugging adds many special
> > > > > > > > > cases to the bridge driver and still requires more custom API to correctly add
> > > > > > > > > the second bridge. I don't think that this a viable path to go.
> > > > > > > > Just jumping in here: You cannot hotplug/hotremove anything from a
> > > > > > > > drm_device after drm_dev_register has been called, except
> > > > > > > > drm_connector. I didn't dig into details here so not sure whether you
> > > > > > > > want to late-bind your bridge after drm_dev_register is called or not,
> > > > > > > > so might just be fyi and not relevant to the discussion.
> > > > > > > Thanks. AFAIC that is exactly what is currently implemented in the exynos_drm
> > > > > > > driver (i.e. Option B)
> > > > > > > 
> > > > > > > exynos_dsi_bind configures the encoder and registers a DSI host. Afterwards,
> > > > > > > exynos_drm_bind (as component_master_ops) calls drm_dev_register. Later, a DSI
> > > > > > > device might attach to the DSI host and call exynos_dsi_host_attach. In
> > > > > > > exynos_dsi_host_attach, the driver finds the drm_bridge for the DSI device and
> > > > > > > attaches this bridge to the encoder _after_ drm_dev_register has been called.
> > > > > > > This is invalid behavior, right?
> > > > > > Definitely not supported, I don't think we have the right locks in place
> > > > > > to make sure this works.
> > > > > > 
> > > > > > Now if your _only_ adding a drm_bridge (and not an encoder or anything
> > > > > > like that), and you are adding the drm_connector correctly (like a
> > > > > > hotplugged DP MST sink), then that would at least work from a uapi pov.
> > > > > > Because drm_bridge isn't exposed as an uapi object.
> > > > > > 
> > > > > > But yeah, as-is, don't :-)
> > > > > > 
> > > > > > The solution here is a bunch of EPROBE_DEFER handling until all your
> > > > > > bridges are loaded, with or without the assistance of component.c
> > > > > > framework. Only then call drm_dev_register.
> > > > > 
> > > > > I have impression we have similar conversation already.
> > > > > 
> > > > > As you stated drm_bridge and drm_panel are not exposed to userspace so
> > > > > there shouldn't be problem with them from uapi PoV.
> > > > > 
> > > > > On the other side drm_panel or drm_bridge are not used until pipeline
> > > > > enters connected state (at least they were not some time ago :) ). The
> > > > > issue is that bridge exposes drm_connector, but as you stated (again :)
> > > > > ) connectors can be hotplugged, so in theory it should work. Practical
> > > > > tests shows that it also works, but bugs can be still there.
> > > > > 
> > > > > Bunch of EPROBE_DEFER was very slow (as a result userspace timeouted and
> > > > > decided there is no display), and does not handle unbinding/re-binding
> > > > > drivers.
> > > > 
> > > > Rebinding drivers should be fixed now, with a bunch of fixes in driver
> > > > core. If not, we need to fix this more.
> > > > 
> > > > Also, EPROBE_DEFER is how this is supposed to work. If it's too slow,
> > > > we need to fix EPROBE_DEFER (there's ideas for pre-sorting that never
> > > > seem to go anywhere), not paper over it with bad architecture in
> > > > drivers.
> > > 
> > > I've heard this argument multiple times, but it sounds more like an
> > > attempt to ignore the problem and hope it will fall on someone else's
> > > plate :-) Improvement in the probe deferral mechanism are certainly an
> > > option to explore, but as far as I can tell nobody has proven that this
> > > mechanism is or will be able to solve all problems related to probe
> > > ordering dependencies. I wouldn't rule out the need for different
> > > solutions for some of the issues.
> > 
> > Then build another one. But adding hotplug for stuff that is there,
> > and shouldn't be hotplugged, just because it's easier on driver
> > writers and harder on userspace isn't really a good approach.
> > -Daniel
> 
> I think it is quite clear that replacing or reworking the deferral mechanism
> is out of scope for this discussion, which is why I would like to come back
> to the original issue and sum this up as far as I understand it (which is
> not really far when it comes to the details):
> 
> We have the existing exynos driver that avoids the standard deferral
> mechanism in favor of something that works but Daniel describes as
> "definitely not supported".
> 
> We have a proposal from Michael for converting the driver to the standard
> drm_bridge behavior and more work from Michael and Marek based on this to
> implement the platform specific parts for i.MX8MM.
> 
> From the i.MX8MM POV this approach already received some testing and looks
> good as far as I can judge. Upstreaming this solution is blocked because of
> objections from the Samsung maintainers.
> 
> Sorry if I'm being blunt or naive, but where to go from here?
> 

Maybe some more information by the Samsung maintainers would help:

If I understand correctly, the main reason for the non-standard behavior is a
userspace application that runs into a timeout if the drm-device does not
appear in time. Correct? Is there something we can do about that?

The other reason is the convenience of binding and unbinding a bridge driver,
while the drm device is kept available. Correct? Is this used in development,
testing, or production?

Is there anything else that prevents the exynos drm from switching to the
standard behavior?

Would a exynos drm specific wrapper, which uses a standard bridge driver but
exposes the non-standard behavior, be acceptable? (Unfortunately, my first try
on something like that felt really awkward and didn't really work.)

Michael
Andrzej Hajda Feb. 18, 2021, 4:02 p.m. UTC | #15
Hi Michael,

W dniu 18.02.2021 o 09:04, Michael Tretter pisze:
> On Wed, 10 Feb 2021 10:10:37 +0100, Frieder Schrempf wrote:
>> On 04.02.21 18:46, Daniel Vetter wrote:
>>> On Thu, Feb 4, 2021 at 6:26 PM Laurent Pinchart <laurent.pinchart@ideasonboard.com> wrote:
>>>> On Thu, Feb 04, 2021 at 06:19:22PM +0100, Daniel Vetter wrote:
>>>>> On Thu, Feb 4, 2021 at 5:28 PM Andrzej Hajda wrote:
>>>>>> W dniu 04.02.2021 o 17:05, Daniel Vetter pisze:
>>>>>>> On Thu, Feb 04, 2021 at 11:56:32AM +0100, Michael Tretter wrote:
>>>>>>>> On Thu, 04 Feb 2021 11:17:49 +0100, Daniel Vetter wrote:
>>>>>>>>> On Wed, Feb 3, 2021 at 9:32 PM Michael Tretter wrote:
>>>>>>>>>> On Mon, 01 Feb 2021 17:33:14 +0100, Michael Tretter wrote:
>>>>>>>>>>> On Tue, 15 Sep 2020 21:40:40 +0200, Andrzej Hajda wrote:
>>>>>>>>>>>> W dniu 14.09.2020 o 23:19, Andrzej Hajda pisze:
>>>>>>>>>>>>> On 14.09.2020 22:01, Michael Tretter wrote:
>>>>>>>>>>>>>> On Mon, 14 Sep 2020 14:31:19 +0200, Marek Szyprowski wrote:
>>>>>>>>>>>>>>> On 14.09.2020 10:29, Marek Szyprowski wrote:
>>>>>>>>>>>>>>>> On 11.09.2020 15:54, Michael Tretter wrote:
>>>>>>>>>>>>>>>>> Make the exynos_dsi driver a full drm bridge that can be found and
>>>>>>>>>>>>>>>>> used
>>>>>>>>>>>>>>>>> from other drivers.
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Other drivers can only attach to the bridge, if a mipi dsi device
>>>>>>>>>>>>>>>>> already attached to the bridge. This allows to defer the probe of the
>>>>>>>>>>>>>>>>> display pipe until the downstream bridges are available, too.
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Signed-off-by: Michael Tretter <m.tretter@pengutronix.de>
>>>>>>>>>>>>>>>> This one (and the whole series applied) still fails on Exynos boards:
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> [drm] Exynos DRM: using 11c00000.fimd device for DMA mapping
>>>>>>>>>>>>>>>> operations
>>>>>>>>>>>>>>>> exynos-drm exynos-drm: bound 11c00000.fimd (ops fimd_component_ops)
>>>>>>>>>>>>>>>> OF: graph: no port node found in /soc/dsi@11c80000
>>>>>>>>>>>>>>>> 8<--- cut here ---
>>>>>>>>>>>>>>>> Unable to handle kernel NULL pointer dereference at virtual address
>>>>>>>>>>>>>>>> 00000084
>>>>>>>>>>>>>>>> pgd = (ptrval)
>>>>>>>>>>>>>>>> [00000084] *pgd=00000000
>>>>>>>>>>>>>>>> Internal error: Oops: 5 [#1] PREEMPT SMP ARM
>>>>>>>>>>>>>>>> Modules linked in:
>>>>>>>>>>>>>>>> CPU: 1 PID: 1 Comm: swapper/0 Not tainted
>>>>>>>>>>>>>>>> 5.9.0-rc4-next-20200911-00010-g417dc70d70ec #1608
>>>>>>>>>>>>>>>> Hardware name: Samsung Exynos (Flattened Device Tree)
>>>>>>>>>>>>>>>> PC is at drm_bridge_attach+0x18/0x164
>>>>>>>>>>>>>>>> LR is at exynos_dsi_bind+0x88/0xa8
>>>>>>>>>>>>>>>> pc : [<c0628c08>]    lr : [<c064d560>]    psr: 20000013
>>>>>>>>>>>>>>>> sp : ef0dfca8  ip : 00000002  fp : c13190e0
>>>>>>>>>>>>>>>> r10: 00000000  r9 : ee46d580  r8 : c13190e0
>>>>>>>>>>>>>>>> r7 : ee438800  r6 : 00000018  r5 : ef253810  r4 : ef39e840
>>>>>>>>>>>>>>>> r3 : 00000000  r2 : 00000018  r1 : ef39e888  r0 : ef39e840
>>>>>>>>>>>>>>>> Flags: nzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
>>>>>>>>>>>>>>>> Control: 10c5387d  Table: 4000404a  DAC: 00000051
>>>>>>>>>>>>>>>> Process swapper/0 (pid: 1, stack limit = 0x(ptrval))
>>>>>>>>>>>>>>>> Stack: (0xef0dfca8 to 0xef0e0000)
>>>>>>>>>>>>>>>> ...
>>>>>>>>>>>>>>>> [<c0628c08>] (drm_bridge_attach) from [<c064d560>]
>>>>>>>>>>>>>>>> (exynos_dsi_bind+0x88/0xa8)
>>>>>>>>>>>>>>>> [<c064d560>] (exynos_dsi_bind) from [<c066a800>]
>>>>>>>>>>>>>>>> (component_bind_all+0xfc/0x290)
>>>>>>>>>>>>>>>> [<c066a800>] (component_bind_all) from [<c0649dc0>]
>>>>>>>>>>>>>>>> (exynos_drm_bind+0xe4/0x19c)
>>>>>>>>>>>>>>>> [<c0649dc0>] (exynos_drm_bind) from [<c066ad74>]
>>>>>>>>>>>>>>>> (try_to_bring_up_master+0x1e4/0x2c4)
>>>>>>>>>>>>>>>> [<c066ad74>] (try_to_bring_up_master) from [<c066b2b4>]
>>>>>>>>>>>>>>>> (component_master_add_with_match+0xd4/0x108)
>>>>>>>>>>>>>>>> [<c066b2b4>] (component_master_add_with_match) from [<c0649ae8>]
>>>>>>>>>>>>>>>> (exynos_drm_platform_probe+0xe4/0x110)
>>>>>>>>>>>>>>>> [<c0649ae8>] (exynos_drm_platform_probe) from [<c0674e6c>]
>>>>>>>>>>>>>>>> (platform_drv_probe+0x6c/0xa4)
>>>>>>>>>>>>>>>> [<c0674e6c>] (platform_drv_probe) from [<c067242c>]
>>>>>>>>>>>>>>>> (really_probe+0x200/0x4fc)
>>>>>>>>>>>>>>>> [<c067242c>] (really_probe) from [<c06728f0>]
>>>>>>>>>>>>>>>> (driver_probe_device+0x78/0x1fc)
>>>>>>>>>>>>>>>> [<c06728f0>] (driver_probe_device) from [<c0672cd8>]
>>>>>>>>>>>>>>>> (device_driver_attach+0x58/0x60)
>>>>>>>>>>>>>>>> [<c0672cd8>] (device_driver_attach) from [<c0672dbc>]
>>>>>>>>>>>>>>>> (__driver_attach+0xdc/0x174)
>>>>>>>>>>>>>>>> [<c0672dbc>] (__driver_attach) from [<c06701b4>]
>>>>>>>>>>>>>>>> (bus_for_each_dev+0x68/0xb4)
>>>>>>>>>>>>>>>> [<c06701b4>] (bus_for_each_dev) from [<c06714e8>]
>>>>>>>>>>>>>>>> (bus_add_driver+0x158/0x214)
>>>>>>>>>>>>>>>> [<c06714e8>] (bus_add_driver) from [<c0673c1c>]
>>>>>>>>>>>>>>>> (driver_register+0x78/0x110)
>>>>>>>>>>>>>>>> [<c0673c1c>] (driver_register) from [<c0649ca8>]
>>>>>>>>>>>>>>>> (exynos_drm_init+0xe4/0x118)
>>>>>>>>>>>>>>>> [<c0649ca8>] (exynos_drm_init) from [<c0102484>]
>>>>>>>>>>>>>>>> (do_one_initcall+0x8c/0x42c)
>>>>>>>>>>>>>>>> [<c0102484>] (do_one_initcall) from [<c11011c0>]
>>>>>>>>>>>>>>>> (kernel_init_freeable+0x190/0x1dc)
>>>>>>>>>>>>>>>> [<c11011c0>] (kernel_init_freeable) from [<c0af7880>]
>>>>>>>>>>>>>>>> (kernel_init+0x8/0x118)
>>>>>>>>>>>>>>>> [<c0af7880>] (kernel_init) from [<c0100114>] (ret_from_fork+0x14/0x20)
>>>>>>>>>>>>>>>> Exception stack(0xef0dffb0 to 0xef0dfff8)
>>>>>>>>>>>>>>>> ...
>>>>>>>>>>>>>>>> ---[ end trace ee27f313f9ed9da1 ]---
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> # arm-linux-gnueabi-addr2line -e vmlinux c0628c08
>>>>>>>>>>>>>>>> drivers/gpu/drm/drm_bridge.c:184 (discriminator 1)
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> I will try to debug it a bit more today.
>>>>>>>>>>>>>>> The above crash has been caused by lack of in_bridge initialization to
>>>>>>>>>>>>>>> NULL in exynos_dsi_bind() in this patch. However, fixing it reveals
>>>>>>>>>>>>>>> another issue:
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> [drm] Exynos DRM: using 11c00000.fimd device for DMA mapping operations
>>>>>>>>>>>>>>> exynos-drm exynos-drm: bound 11c00000.fimd (ops fimd_component_ops)
>>>>>>>>>>>>>>> OF: graph: no port node found in /soc/dsi@11c80000
>>>>>>>>>>>>>>> 8<--- cut here ---
>>>>>>>>>>>>>>> Unable to handle kernel NULL pointer dereference at virtual address
>>>>>>>>>>>>>>> 00000280
>>>>>>>>>>>>>>> pgd = (ptrval)
>>>>>>>>>>>>>>> [00000280] *pgd=00000000
>>>>>>>>>>>>>>> Internal error: Oops: 5 [#1] PREEMPT SMP ARM
>>>>>>>>>>>>>>> Modules linked in:
>>>>>>>>>>>>>>> CPU: 0 PID: 1 Comm: swapper/0 Not tainted
>>>>>>>>>>>>>>> 5.9.0-rc4-next-20200911-00010-g417dc70d70ec-dirty #1613
>>>>>>>>>>>>>>> Hardware name: Samsung Exynos (Flattened Device Tree)
>>>>>>>>>>>>>>> PC is at __mutex_lock+0x54/0xb18
>>>>>>>>>>>>>>> LR is at lock_is_held_type+0x80/0x138
>>>>>>>>>>>>>>> pc : [<c0afc920>]    lr : [<c0af63e8>]    psr: 60000013
>>>>>>>>>>>>>>> sp : ef0dfd30  ip : 33937b74  fp : c13193c8
>>>>>>>>>>>>>>> r10: c1208eec  r9 : 00000000  r8 : ee45f808
>>>>>>>>>>>>>>> r7 : c19561a4  r6 : 00000000  r5 : 00000000  r4 : 0000024c
>>>>>>>>>>>>>>> r3 : 00000000  r2 : 00204140  r1 : c124f13c  r0 : 00000000
>>>>>>>>>>>>>>> Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
>>>>>>>>>>>>>>> Control: 10c5387d  Table: 4000404a  DAC: 00000051
>>>>>>>>>>>>>>> Process swapper/0 (pid: 1, stack limit = 0x(ptrval))
>>>>>>>>>>>>>>> Stack: (0xef0dfd30 to 0xef0e0000)
>>>>>>>>>>>>>>> ...
>>>>>>>>>>>>>>> [<c0afc920>] (__mutex_lock) from [<c0afd400>]
>>>>>>>>>>>>>>> (mutex_lock_nested+0x1c/0x24)
>>>>>>>>>>>>>>> [<c0afd400>] (mutex_lock_nested) from [<c064d4b8>]
>>>>>>>>>>>>>>> (__exynos_dsi_host_attach+0x20/0x6c)
>>>>>>>>>>>>>>> [<c064d4b8>] (__exynos_dsi_host_attach) from [<c064d914>]
>>>>>>>>>>>>>>> (exynos_dsi_host_attach+0x70/0x194)
>>>>>>>>>>>>>>> [<c064d914>] (exynos_dsi_host_attach) from [<c0656b64>]
>>>>>>>>>>>>>>> (s6e8aa0_probe+0x1b0/0x218)
>>>>>>>>>>>>>>> [<c0656b64>] (s6e8aa0_probe) from [<c0672530>]
>>>>>>>>>>>>>>> (really_probe+0x200/0x4fc)
>>>>>>>>>>>>>>> [<c0672530>] (really_probe) from [<c06729f4>]
>>>>>>>>>>>>>>> (driver_probe_device+0x78/0x1fc)
>>>>>>>>>>>>>>> [<c06729f4>] (driver_probe_device) from [<c0672ddc>]
>>>>>>>>>>>>>>> (device_driver_attach+0x58/0x60)
>>>>>>>>>>>>>>> [<c0672ddc>] (device_driver_attach) from [<c0672ec0>]
>>>>>>>>>>>>>>> (__driver_attach+0xdc/0x174)
>>>>>>>>>>>>>>> [<c0672ec0>] (__driver_attach) from [<c06702b8>]
>>>>>>>>>>>>>>> (bus_for_each_dev+0x68/0xb4)
>>>>>>>>>>>>>>> [<c06702b8>] (bus_for_each_dev) from [<c06715ec>]
>>>>>>>>>>>>>>> (bus_add_driver+0x158/0x214)
>>>>>>>>>>>>>>> [<c06715ec>] (bus_add_driver) from [<c0673d20>]
>>>>>>>>>>>>>>> (driver_register+0x78/0x110)
>>>>>>>>>>>>>>> [<c0673d20>] (driver_register) from [<c0102484>]
>>>>>>>>>>>>>>> (do_one_initcall+0x8c/0x42c)
>>>>>>>>>>>>>>> [<c0102484>] (do_one_initcall) from [<c11011c0>]
>>>>>>>>>>>>>>> (kernel_init_freeable+0x190/0x1dc)
>>>>>>>>>>>>>>> [<c11011c0>] (kernel_init_freeable) from [<c0af7988>]
>>>>>>>>>>>>>>> (kernel_init+0x8/0x118)
>>>>>>>>>>>>>>> [<c0af7988>] (kernel_init) from [<c0100114>] (ret_from_fork+0x14/0x20)
>>>>>>>>>>>>>>> Exception stack(0xef0dffb0 to 0xef0dfff8)
>>>>>>>>>>>>>>> ...
>>>>>>>>>>>>>>> ---[ end trace c06e996ec2e8234d ]---
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> This means that dsi->encoder.dev is not initialized in
>>>>>>>>>>>>>>> __exynos_dsi_host_attach().
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> This happens, because drm_bridge_attach() in exynos_dsi_bind() returned
>>>>>>>>>>>>>>> earlier -517 (deferred probe), what causes cleanup of encoder and
>>>>>>>>>>>>>>> release of all drm resources.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Then however, the panel tries to register itself and
>>>>>>>>>>>>>>> exynos_dsi_host_attach() tries to access the released encoder (which is
>>>>>>>>>>>>>>> zeroed in drm_encoder_release) and rest of resources, what causes
>>>>>>>>>>>>>>> failure.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> It looks that something is missing. Maybe mipi host has to be
>>>>>>>>>>>>>>> registered
>>>>>>>>>>>>>>> later, when bridge is ready? I have no idea how it is handled before
>>>>>>>>>>>>>>> this patch. Andrzej, could you comment it a bit?
>>>>>>>>>>>>>> I intentionally changed the order, because if another bridge follows
>>>>>>>>>>>>>> in the
>>>>>>>>>>>>>> pipeline, the probe of the drm driver has to be deferred until some
>>>>>>>>>>>>>> bridge
>>>>>>>>>>>>>> provides a connector. The next bridge registers itself via the
>>>>>>>>>>>>>> host_attach
>>>>>>>>>>>>>> function and the deferral is ensured via the bind for the bind/unbind
>>>>>>>>>>>>>> API or
>>>>>>>>>>>>>> the bridge_attach function otherwise.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> On the other hand, the bridge does not have an encoder until the mipi
>>>>>>>>>>>>>> device
>>>>>>>>>>>>>> has been attached.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> As a solution, the exynos dsi driver must initialize the encoder in
>>>>>>>>>>>>>> exynos_dsi_probe instead of in exynos_dsi_bind and access the encoder
>>>>>>>>>>>>>> via
>>>>>>>>>>>>>> exynos_dsi instead of the bridge.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Can you try to move everything except samsung_dsim_bind from
>>>>>>>>>>>>>> exynos_dsi_bind
>>>>>>>>>>>>>> to exynos_dsi_probe (respectively for unbind) and report if it fixes the
>>>>>>>>>>>>>> crash.
>>>>>>>>>>>>> The original behaviour is that encoder (exynos_dsi) is registered
>>>>>>>>>>>>> regardless of sink presence (initially panel, later also bridge) - it
>>>>>>>>>>>>> avoids multiple issues with deferred probe, device driver bind/unbind
>>>>>>>>>>>>> and module load/unload. Appearance or disappearance of sink is
>>>>>>>>>>>>> reported to host nicely via DSI attach/detach callbacks - and it is
>>>>>>>>>>>>> reflected in drm world as change state of the connector.
>>>>>>>>>>>>>
>>>>>>>>>>>>> Registering DSI host in bind and unregistering in unbind assures that
>>>>>>>>>>>>> if mipi_dsi device is attached/detached the drm device is always
>>>>>>>>>>>>> present - it makes device/driver binding race free and allows to avoid
>>>>>>>>>>>>> additional locking.
>>>>>>>>>>>>>
>>>>>>>>>>>>> Moving DSI host registration to probe changes everything, for sure it
>>>>>>>>>>>>> breaks the nice feature of DSI attach/detach callbacks and apparently
>>>>>>>>>>>>> can cause different issues depending on device bind order.
>>>>>>>>>>>>>
>>>>>>>>>>>>> I will try to look at the patches tomorrow and maybe I can find more
>>>>>>>>>>>>> constructive comments :)
>>>>>>>>>>>> As I said yesterday, exynos_dsi driver uses dsi host attach/detach
>>>>>>>>>>>> callbacks to control appearance/disappearance of downstream device. It
>>>>>>>>>>>> allows to:
>>>>>>>>>>>>
>>>>>>>>>>>> 1. Safely bind/unbind different device drivers at any time and at any
>>>>>>>>>>>> order, without killing exynos_drm and/or crashing system.
>>>>>>>>>>>>
>>>>>>>>>>>> 2. Avoid issues with late drm init - on some platforms exynos_drm device
>>>>>>>>>>>> appeared too late, due to deferred probe, and resulted in black screen
>>>>>>>>>>>> in userspace.
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> Now if we want to convert exynos_dsi to drm_bridge I see following options:
>>>>>>>>>>>>
>>>>>>>>>>>> A. Forgot about callbacks and make the exynos_drm to defer probing until
>>>>>>>>>>>> exynos_dsi bridge is available, probably it will cause later exynos_drm
>>>>>>>>>>>> appearance, thus probably black screen on some targets. So for sure it
>>>>>>>>>>>> will be suboptimal. Making it bridge unbind safe would be another
>>>>>>>>>>>> problem, but most developers do not care about it so why should we? :)
>>>>>>>>>>>>
>>>>>>>>>>>> B. Try to mimic current behaviour - exynos_dsi register bridge ASAP,
>>>>>>>>>>>> even if downstream devices are not yet attached, on attach/detach notify
>>>>>>>>>>>> drm about it via connector status change, for this dsi_host registration
>>>>>>>>>>>> should be performed from drm_bridge attach, I guess.
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> Option A is more standard, but is unsafe and causes other issues.
>>>>>>>>>>>>
>>>>>>>>>>>> Option B keeps current behaviour.
>>>>>>>>>>> Maybe we can have both, but I am not sure, if I am missing something:
>>>>>>>>>>>
>>>>>>>>>>> I still prefer option A for the samsung-dsim driver, because it is more
>>>>>>>>>>> standard, simpler and avoids issues with encoders, connectors or handling
>>>>>>>>>>> hotplug.
>>>>>>>>>>>
>>>>>>>>>>> The idea is to use two bridges in the exynos-dsi driver: One bridge in the
>>>>>>>>>>> samsung-dsim driver which implements option A and defers probing of the drm
>>>>>>>>>>> driver until the next bridge is attached. And a second bridge in the
>>>>>>>>>>> exynos_dsi that attaches to the first bridge (thus, allowing the exynos_drm
>>>>>>>>>>> device to appear) and implements the hotplug handling for notifying drm via
>>>>>>>>>>> connector status change.
>>>>>>>>>>>
>>>>>>>>>>> The driver for the i.MX8M would use the samsung-dsim bridge without an
>>>>>>>>>>> additional bridge.
>>>>>>>>>>>
>>>>>>>>>>> This allows the samsung-dsim driver to expose the standard behavior while the
>>>>>>>>>>> exynos_dsi may stick to the existing behavior for the exynos_drm driver.
>>>>>>>>>>>
>>>>>>>>>>> I hope this makes sense and does not sound too crazy. It might be difficult to
>>>>>>>>>>> get the probing and mipi host/device registration correct, but I will try, if
>>>>>>>>>>> this can work.
>>>>>>>>>> Adding two bridges for being able to support hotplugging adds many special
>>>>>>>>>> cases to the bridge driver and still requires more custom API to correctly add
>>>>>>>>>> the second bridge. I don't think that this a viable path to go.
>>>>>>>>> Just jumping in here: You cannot hotplug/hotremove anything from a
>>>>>>>>> drm_device after drm_dev_register has been called, except
>>>>>>>>> drm_connector. I didn't dig into details here so not sure whether you
>>>>>>>>> want to late-bind your bridge after drm_dev_register is called or not,
>>>>>>>>> so might just be fyi and not relevant to the discussion.
>>>>>>>> Thanks. AFAIC that is exactly what is currently implemented in the exynos_drm
>>>>>>>> driver (i.e. Option B)
>>>>>>>>
>>>>>>>> exynos_dsi_bind configures the encoder and registers a DSI host. Afterwards,
>>>>>>>> exynos_drm_bind (as component_master_ops) calls drm_dev_register. Later, a DSI
>>>>>>>> device might attach to the DSI host and call exynos_dsi_host_attach. In
>>>>>>>> exynos_dsi_host_attach, the driver finds the drm_bridge for the DSI device and
>>>>>>>> attaches this bridge to the encoder _after_ drm_dev_register has been called.
>>>>>>>> This is invalid behavior, right?
>>>>>>> Definitely not supported, I don't think we have the right locks in place
>>>>>>> to make sure this works.
>>>>>>>
>>>>>>> Now if your _only_ adding a drm_bridge (and not an encoder or anything
>>>>>>> like that), and you are adding the drm_connector correctly (like a
>>>>>>> hotplugged DP MST sink), then that would at least work from a uapi pov.
>>>>>>> Because drm_bridge isn't exposed as an uapi object.
>>>>>>>
>>>>>>> But yeah, as-is, don't :-)
>>>>>>>
>>>>>>> The solution here is a bunch of EPROBE_DEFER handling until all your
>>>>>>> bridges are loaded, with or without the assistance of component.c
>>>>>>> framework. Only then call drm_dev_register.
>>>>>> I have impression we have similar conversation already.
>>>>>>
>>>>>> As you stated drm_bridge and drm_panel are not exposed to userspace so
>>>>>> there shouldn't be problem with them from uapi PoV.
>>>>>>
>>>>>> On the other side drm_panel or drm_bridge are not used until pipeline
>>>>>> enters connected state (at least they were not some time ago :) ). The
>>>>>> issue is that bridge exposes drm_connector, but as you stated (again :)
>>>>>> ) connectors can be hotplugged, so in theory it should work. Practical
>>>>>> tests shows that it also works, but bugs can be still there.
>>>>>>
>>>>>> Bunch of EPROBE_DEFER was very slow (as a result userspace timeouted and
>>>>>> decided there is no display), and does not handle unbinding/re-binding
>>>>>> drivers.
>>>>> Rebinding drivers should be fixed now, with a bunch of fixes in driver
>>>>> core. If not, we need to fix this more.
>>>>>
>>>>> Also, EPROBE_DEFER is how this is supposed to work. If it's too slow,
>>>>> we need to fix EPROBE_DEFER (there's ideas for pre-sorting that never
>>>>> seem to go anywhere), not paper over it with bad architecture in
>>>>> drivers.
>>>> I've heard this argument multiple times, but it sounds more like an
>>>> attempt to ignore the problem and hope it will fall on someone else's
>>>> plate :-) Improvement in the probe deferral mechanism are certainly an
>>>> option to explore, but as far as I can tell nobody has proven that this
>>>> mechanism is or will be able to solve all problems related to probe
>>>> ordering dependencies. I wouldn't rule out the need for different
>>>> solutions for some of the issues.
>>> Then build another one. But adding hotplug for stuff that is there,
>>> and shouldn't be hotplugged, just because it's easier on driver
>>> writers and harder on userspace isn't really a good approach.
>>> -Daniel
>> I think it is quite clear that replacing or reworking the deferral mechanism
>> is out of scope for this discussion, which is why I would like to come back
>> to the original issue and sum this up as far as I understand it (which is
>> not really far when it comes to the details):
>>
>> We have the existing exynos driver that avoids the standard deferral
>> mechanism in favor of something that works but Daniel describes as
>> "definitely not supported".
>>
>> We have a proposal from Michael for converting the driver to the standard
>> drm_bridge behavior and more work from Michael and Marek based on this to
>> implement the platform specific parts for i.MX8MM.
>>
>>  From the i.MX8MM POV this approach already received some testing and looks
>> good as far as I can judge. Upstreaming this solution is blocked because of
>> objections from the Samsung maintainers.
>>
>> Sorry if I'm being blunt or naive, but where to go from here?
>>
> Maybe some more information by the Samsung maintainers would help:
>
> If I understand correctly, the main reason for the non-standard behavior is a
> userspace application that runs into a timeout if the drm-device does not
> appear in time. Correct? Is there something we can do about that?
>
> The other reason is the convenience of binding and unbinding a bridge driver,
> while the drm device is kept available. Correct? Is this used in development,
> testing, or production?
>
> Is there anything else that prevents the exynos drm from switching to the
> standard behavior?
>
> Would a exynos drm specific wrapper, which uses a standard bridge driver but
> exposes the non-standard behavior, be acceptable? (Unfortunately, my first try
> on something like that felt really awkward and didn't really work.)

Even if we drop this 'non-standard' behaviour, your task will be still 
quite difficult to fulfil - you are trying to completely rewrite core 
component of Exynos display pipeline without hardware to test.

ExynosDSI is used in almost all Exynos platforms supported mainline (ls 
-1 arch/arm*/boot/dts/exynos*.dts | wc shows 35). It has different hw 
versions (4 compatibles) and is used in different configurations (video 
mode, command mode, with hw/sw trigger, connected to panels/bridges) and 
for sure with big heritage, since it was one of the 1st DSI drivers.

Rewriting such driver is challenging, even with access to hw.

So maybe it would be better to move common parts in your and exynos 
driver to 'shared library' and use it in both drivers - this way you 
have bigger chances to avoid traps.


Regards

Andrzej


>
> Michael
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://protect2.fireeye.com/v1/url?k=08365e03-57ad66fe-0837d54c-000babff317b-bdb6592fde86cf24&q=1&e=db1ed315-991b-408c-827a-4b6cbf2b4e3e&u=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fdri-devel
>
Daniel Vetter Feb. 23, 2021, 12:07 p.m. UTC | #16
On Thu, Feb 18, 2021 at 5:02 PM Andrzej Hajda <a.hajda@samsung.com> wrote:
>
> Hi Michael,
>
> W dniu 18.02.2021 o 09:04, Michael Tretter pisze:
> > On Wed, 10 Feb 2021 10:10:37 +0100, Frieder Schrempf wrote:
> >> On 04.02.21 18:46, Daniel Vetter wrote:
> >>> On Thu, Feb 4, 2021 at 6:26 PM Laurent Pinchart <laurent.pinchart@ideasonboard.com> wrote:
> >>>> On Thu, Feb 04, 2021 at 06:19:22PM +0100, Daniel Vetter wrote:
> >>>>> On Thu, Feb 4, 2021 at 5:28 PM Andrzej Hajda wrote:
> >>>>>> W dniu 04.02.2021 o 17:05, Daniel Vetter pisze:
> >>>>>>> On Thu, Feb 04, 2021 at 11:56:32AM +0100, Michael Tretter wrote:
> >>>>>>>> On Thu, 04 Feb 2021 11:17:49 +0100, Daniel Vetter wrote:
> >>>>>>>>> On Wed, Feb 3, 2021 at 9:32 PM Michael Tretter wrote:
> >>>>>>>>>> On Mon, 01 Feb 2021 17:33:14 +0100, Michael Tretter wrote:
> >>>>>>>>>>> On Tue, 15 Sep 2020 21:40:40 +0200, Andrzej Hajda wrote:
> >>>>>>>>>>>> W dniu 14.09.2020 o 23:19, Andrzej Hajda pisze:
> >>>>>>>>>>>>> On 14.09.2020 22:01, Michael Tretter wrote:
> >>>>>>>>>>>>>> On Mon, 14 Sep 2020 14:31:19 +0200, Marek Szyprowski wrote:
> >>>>>>>>>>>>>>> On 14.09.2020 10:29, Marek Szyprowski wrote:
> >>>>>>>>>>>>>>>> On 11.09.2020 15:54, Michael Tretter wrote:
> >>>>>>>>>>>>>>>>> Make the exynos_dsi driver a full drm bridge that can be found and
> >>>>>>>>>>>>>>>>> used
> >>>>>>>>>>>>>>>>> from other drivers.
> >>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>> Other drivers can only attach to the bridge, if a mipi dsi device
> >>>>>>>>>>>>>>>>> already attached to the bridge. This allows to defer the probe of the
> >>>>>>>>>>>>>>>>> display pipe until the downstream bridges are available, too.
> >>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>> Signed-off-by: Michael Tretter <m.tretter@pengutronix.de>
> >>>>>>>>>>>>>>>> This one (and the whole series applied) still fails on Exynos boards:
> >>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>> [drm] Exynos DRM: using 11c00000.fimd device for DMA mapping
> >>>>>>>>>>>>>>>> operations
> >>>>>>>>>>>>>>>> exynos-drm exynos-drm: bound 11c00000.fimd (ops fimd_component_ops)
> >>>>>>>>>>>>>>>> OF: graph: no port node found in /soc/dsi@11c80000
> >>>>>>>>>>>>>>>> 8<--- cut here ---
> >>>>>>>>>>>>>>>> Unable to handle kernel NULL pointer dereference at virtual address
> >>>>>>>>>>>>>>>> 00000084
> >>>>>>>>>>>>>>>> pgd = (ptrval)
> >>>>>>>>>>>>>>>> [00000084] *pgd=00000000
> >>>>>>>>>>>>>>>> Internal error: Oops: 5 [#1] PREEMPT SMP ARM
> >>>>>>>>>>>>>>>> Modules linked in:
> >>>>>>>>>>>>>>>> CPU: 1 PID: 1 Comm: swapper/0 Not tainted
> >>>>>>>>>>>>>>>> 5.9.0-rc4-next-20200911-00010-g417dc70d70ec #1608
> >>>>>>>>>>>>>>>> Hardware name: Samsung Exynos (Flattened Device Tree)
> >>>>>>>>>>>>>>>> PC is at drm_bridge_attach+0x18/0x164
> >>>>>>>>>>>>>>>> LR is at exynos_dsi_bind+0x88/0xa8
> >>>>>>>>>>>>>>>> pc : [<c0628c08>]    lr : [<c064d560>]    psr: 20000013
> >>>>>>>>>>>>>>>> sp : ef0dfca8  ip : 00000002  fp : c13190e0
> >>>>>>>>>>>>>>>> r10: 00000000  r9 : ee46d580  r8 : c13190e0
> >>>>>>>>>>>>>>>> r7 : ee438800  r6 : 00000018  r5 : ef253810  r4 : ef39e840
> >>>>>>>>>>>>>>>> r3 : 00000000  r2 : 00000018  r1 : ef39e888  r0 : ef39e840
> >>>>>>>>>>>>>>>> Flags: nzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
> >>>>>>>>>>>>>>>> Control: 10c5387d  Table: 4000404a  DAC: 00000051
> >>>>>>>>>>>>>>>> Process swapper/0 (pid: 1, stack limit = 0x(ptrval))
> >>>>>>>>>>>>>>>> Stack: (0xef0dfca8 to 0xef0e0000)
> >>>>>>>>>>>>>>>> ...
> >>>>>>>>>>>>>>>> [<c0628c08>] (drm_bridge_attach) from [<c064d560>]
> >>>>>>>>>>>>>>>> (exynos_dsi_bind+0x88/0xa8)
> >>>>>>>>>>>>>>>> [<c064d560>] (exynos_dsi_bind) from [<c066a800>]
> >>>>>>>>>>>>>>>> (component_bind_all+0xfc/0x290)
> >>>>>>>>>>>>>>>> [<c066a800>] (component_bind_all) from [<c0649dc0>]
> >>>>>>>>>>>>>>>> (exynos_drm_bind+0xe4/0x19c)
> >>>>>>>>>>>>>>>> [<c0649dc0>] (exynos_drm_bind) from [<c066ad74>]
> >>>>>>>>>>>>>>>> (try_to_bring_up_master+0x1e4/0x2c4)
> >>>>>>>>>>>>>>>> [<c066ad74>] (try_to_bring_up_master) from [<c066b2b4>]
> >>>>>>>>>>>>>>>> (component_master_add_with_match+0xd4/0x108)
> >>>>>>>>>>>>>>>> [<c066b2b4>] (component_master_add_with_match) from [<c0649ae8>]
> >>>>>>>>>>>>>>>> (exynos_drm_platform_probe+0xe4/0x110)
> >>>>>>>>>>>>>>>> [<c0649ae8>] (exynos_drm_platform_probe) from [<c0674e6c>]
> >>>>>>>>>>>>>>>> (platform_drv_probe+0x6c/0xa4)
> >>>>>>>>>>>>>>>> [<c0674e6c>] (platform_drv_probe) from [<c067242c>]
> >>>>>>>>>>>>>>>> (really_probe+0x200/0x4fc)
> >>>>>>>>>>>>>>>> [<c067242c>] (really_probe) from [<c06728f0>]
> >>>>>>>>>>>>>>>> (driver_probe_device+0x78/0x1fc)
> >>>>>>>>>>>>>>>> [<c06728f0>] (driver_probe_device) from [<c0672cd8>]
> >>>>>>>>>>>>>>>> (device_driver_attach+0x58/0x60)
> >>>>>>>>>>>>>>>> [<c0672cd8>] (device_driver_attach) from [<c0672dbc>]
> >>>>>>>>>>>>>>>> (__driver_attach+0xdc/0x174)
> >>>>>>>>>>>>>>>> [<c0672dbc>] (__driver_attach) from [<c06701b4>]
> >>>>>>>>>>>>>>>> (bus_for_each_dev+0x68/0xb4)
> >>>>>>>>>>>>>>>> [<c06701b4>] (bus_for_each_dev) from [<c06714e8>]
> >>>>>>>>>>>>>>>> (bus_add_driver+0x158/0x214)
> >>>>>>>>>>>>>>>> [<c06714e8>] (bus_add_driver) from [<c0673c1c>]
> >>>>>>>>>>>>>>>> (driver_register+0x78/0x110)
> >>>>>>>>>>>>>>>> [<c0673c1c>] (driver_register) from [<c0649ca8>]
> >>>>>>>>>>>>>>>> (exynos_drm_init+0xe4/0x118)
> >>>>>>>>>>>>>>>> [<c0649ca8>] (exynos_drm_init) from [<c0102484>]
> >>>>>>>>>>>>>>>> (do_one_initcall+0x8c/0x42c)
> >>>>>>>>>>>>>>>> [<c0102484>] (do_one_initcall) from [<c11011c0>]
> >>>>>>>>>>>>>>>> (kernel_init_freeable+0x190/0x1dc)
> >>>>>>>>>>>>>>>> [<c11011c0>] (kernel_init_freeable) from [<c0af7880>]
> >>>>>>>>>>>>>>>> (kernel_init+0x8/0x118)
> >>>>>>>>>>>>>>>> [<c0af7880>] (kernel_init) from [<c0100114>] (ret_from_fork+0x14/0x20)
> >>>>>>>>>>>>>>>> Exception stack(0xef0dffb0 to 0xef0dfff8)
> >>>>>>>>>>>>>>>> ...
> >>>>>>>>>>>>>>>> ---[ end trace ee27f313f9ed9da1 ]---
> >>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>> # arm-linux-gnueabi-addr2line -e vmlinux c0628c08
> >>>>>>>>>>>>>>>> drivers/gpu/drm/drm_bridge.c:184 (discriminator 1)
> >>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>> I will try to debug it a bit more today.
> >>>>>>>>>>>>>>> The above crash has been caused by lack of in_bridge initialization to
> >>>>>>>>>>>>>>> NULL in exynos_dsi_bind() in this patch. However, fixing it reveals
> >>>>>>>>>>>>>>> another issue:
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>> [drm] Exynos DRM: using 11c00000.fimd device for DMA mapping operations
> >>>>>>>>>>>>>>> exynos-drm exynos-drm: bound 11c00000.fimd (ops fimd_component_ops)
> >>>>>>>>>>>>>>> OF: graph: no port node found in /soc/dsi@11c80000
> >>>>>>>>>>>>>>> 8<--- cut here ---
> >>>>>>>>>>>>>>> Unable to handle kernel NULL pointer dereference at virtual address
> >>>>>>>>>>>>>>> 00000280
> >>>>>>>>>>>>>>> pgd = (ptrval)
> >>>>>>>>>>>>>>> [00000280] *pgd=00000000
> >>>>>>>>>>>>>>> Internal error: Oops: 5 [#1] PREEMPT SMP ARM
> >>>>>>>>>>>>>>> Modules linked in:
> >>>>>>>>>>>>>>> CPU: 0 PID: 1 Comm: swapper/0 Not tainted
> >>>>>>>>>>>>>>> 5.9.0-rc4-next-20200911-00010-g417dc70d70ec-dirty #1613
> >>>>>>>>>>>>>>> Hardware name: Samsung Exynos (Flattened Device Tree)
> >>>>>>>>>>>>>>> PC is at __mutex_lock+0x54/0xb18
> >>>>>>>>>>>>>>> LR is at lock_is_held_type+0x80/0x138
> >>>>>>>>>>>>>>> pc : [<c0afc920>]    lr : [<c0af63e8>]    psr: 60000013
> >>>>>>>>>>>>>>> sp : ef0dfd30  ip : 33937b74  fp : c13193c8
> >>>>>>>>>>>>>>> r10: c1208eec  r9 : 00000000  r8 : ee45f808
> >>>>>>>>>>>>>>> r7 : c19561a4  r6 : 00000000  r5 : 00000000  r4 : 0000024c
> >>>>>>>>>>>>>>> r3 : 00000000  r2 : 00204140  r1 : c124f13c  r0 : 00000000
> >>>>>>>>>>>>>>> Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
> >>>>>>>>>>>>>>> Control: 10c5387d  Table: 4000404a  DAC: 00000051
> >>>>>>>>>>>>>>> Process swapper/0 (pid: 1, stack limit = 0x(ptrval))
> >>>>>>>>>>>>>>> Stack: (0xef0dfd30 to 0xef0e0000)
> >>>>>>>>>>>>>>> ...
> >>>>>>>>>>>>>>> [<c0afc920>] (__mutex_lock) from [<c0afd400>]
> >>>>>>>>>>>>>>> (mutex_lock_nested+0x1c/0x24)
> >>>>>>>>>>>>>>> [<c0afd400>] (mutex_lock_nested) from [<c064d4b8>]
> >>>>>>>>>>>>>>> (__exynos_dsi_host_attach+0x20/0x6c)
> >>>>>>>>>>>>>>> [<c064d4b8>] (__exynos_dsi_host_attach) from [<c064d914>]
> >>>>>>>>>>>>>>> (exynos_dsi_host_attach+0x70/0x194)
> >>>>>>>>>>>>>>> [<c064d914>] (exynos_dsi_host_attach) from [<c0656b64>]
> >>>>>>>>>>>>>>> (s6e8aa0_probe+0x1b0/0x218)
> >>>>>>>>>>>>>>> [<c0656b64>] (s6e8aa0_probe) from [<c0672530>]
> >>>>>>>>>>>>>>> (really_probe+0x200/0x4fc)
> >>>>>>>>>>>>>>> [<c0672530>] (really_probe) from [<c06729f4>]
> >>>>>>>>>>>>>>> (driver_probe_device+0x78/0x1fc)
> >>>>>>>>>>>>>>> [<c06729f4>] (driver_probe_device) from [<c0672ddc>]
> >>>>>>>>>>>>>>> (device_driver_attach+0x58/0x60)
> >>>>>>>>>>>>>>> [<c0672ddc>] (device_driver_attach) from [<c0672ec0>]
> >>>>>>>>>>>>>>> (__driver_attach+0xdc/0x174)
> >>>>>>>>>>>>>>> [<c0672ec0>] (__driver_attach) from [<c06702b8>]
> >>>>>>>>>>>>>>> (bus_for_each_dev+0x68/0xb4)
> >>>>>>>>>>>>>>> [<c06702b8>] (bus_for_each_dev) from [<c06715ec>]
> >>>>>>>>>>>>>>> (bus_add_driver+0x158/0x214)
> >>>>>>>>>>>>>>> [<c06715ec>] (bus_add_driver) from [<c0673d20>]
> >>>>>>>>>>>>>>> (driver_register+0x78/0x110)
> >>>>>>>>>>>>>>> [<c0673d20>] (driver_register) from [<c0102484>]
> >>>>>>>>>>>>>>> (do_one_initcall+0x8c/0x42c)
> >>>>>>>>>>>>>>> [<c0102484>] (do_one_initcall) from [<c11011c0>]
> >>>>>>>>>>>>>>> (kernel_init_freeable+0x190/0x1dc)
> >>>>>>>>>>>>>>> [<c11011c0>] (kernel_init_freeable) from [<c0af7988>]
> >>>>>>>>>>>>>>> (kernel_init+0x8/0x118)
> >>>>>>>>>>>>>>> [<c0af7988>] (kernel_init) from [<c0100114>] (ret_from_fork+0x14/0x20)
> >>>>>>>>>>>>>>> Exception stack(0xef0dffb0 to 0xef0dfff8)
> >>>>>>>>>>>>>>> ...
> >>>>>>>>>>>>>>> ---[ end trace c06e996ec2e8234d ]---
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>> This means that dsi->encoder.dev is not initialized in
> >>>>>>>>>>>>>>> __exynos_dsi_host_attach().
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>> This happens, because drm_bridge_attach() in exynos_dsi_bind() returned
> >>>>>>>>>>>>>>> earlier -517 (deferred probe), what causes cleanup of encoder and
> >>>>>>>>>>>>>>> release of all drm resources.
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>> Then however, the panel tries to register itself and
> >>>>>>>>>>>>>>> exynos_dsi_host_attach() tries to access the released encoder (which is
> >>>>>>>>>>>>>>> zeroed in drm_encoder_release) and rest of resources, what causes
> >>>>>>>>>>>>>>> failure.
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>> It looks that something is missing. Maybe mipi host has to be
> >>>>>>>>>>>>>>> registered
> >>>>>>>>>>>>>>> later, when bridge is ready? I have no idea how it is handled before
> >>>>>>>>>>>>>>> this patch. Andrzej, could you comment it a bit?
> >>>>>>>>>>>>>> I intentionally changed the order, because if another bridge follows
> >>>>>>>>>>>>>> in the
> >>>>>>>>>>>>>> pipeline, the probe of the drm driver has to be deferred until some
> >>>>>>>>>>>>>> bridge
> >>>>>>>>>>>>>> provides a connector. The next bridge registers itself via the
> >>>>>>>>>>>>>> host_attach
> >>>>>>>>>>>>>> function and the deferral is ensured via the bind for the bind/unbind
> >>>>>>>>>>>>>> API or
> >>>>>>>>>>>>>> the bridge_attach function otherwise.
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>> On the other hand, the bridge does not have an encoder until the mipi
> >>>>>>>>>>>>>> device
> >>>>>>>>>>>>>> has been attached.
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>> As a solution, the exynos dsi driver must initialize the encoder in
> >>>>>>>>>>>>>> exynos_dsi_probe instead of in exynos_dsi_bind and access the encoder
> >>>>>>>>>>>>>> via
> >>>>>>>>>>>>>> exynos_dsi instead of the bridge.
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>> Can you try to move everything except samsung_dsim_bind from
> >>>>>>>>>>>>>> exynos_dsi_bind
> >>>>>>>>>>>>>> to exynos_dsi_probe (respectively for unbind) and report if it fixes the
> >>>>>>>>>>>>>> crash.
> >>>>>>>>>>>>> The original behaviour is that encoder (exynos_dsi) is registered
> >>>>>>>>>>>>> regardless of sink presence (initially panel, later also bridge) - it
> >>>>>>>>>>>>> avoids multiple issues with deferred probe, device driver bind/unbind
> >>>>>>>>>>>>> and module load/unload. Appearance or disappearance of sink is
> >>>>>>>>>>>>> reported to host nicely via DSI attach/detach callbacks - and it is
> >>>>>>>>>>>>> reflected in drm world as change state of the connector.
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> Registering DSI host in bind and unregistering in unbind assures that
> >>>>>>>>>>>>> if mipi_dsi device is attached/detached the drm device is always
> >>>>>>>>>>>>> present - it makes device/driver binding race free and allows to avoid
> >>>>>>>>>>>>> additional locking.
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> Moving DSI host registration to probe changes everything, for sure it
> >>>>>>>>>>>>> breaks the nice feature of DSI attach/detach callbacks and apparently
> >>>>>>>>>>>>> can cause different issues depending on device bind order.
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> I will try to look at the patches tomorrow and maybe I can find more
> >>>>>>>>>>>>> constructive comments :)
> >>>>>>>>>>>> As I said yesterday, exynos_dsi driver uses dsi host attach/detach
> >>>>>>>>>>>> callbacks to control appearance/disappearance of downstream device. It
> >>>>>>>>>>>> allows to:
> >>>>>>>>>>>>
> >>>>>>>>>>>> 1. Safely bind/unbind different device drivers at any time and at any
> >>>>>>>>>>>> order, without killing exynos_drm and/or crashing system.
> >>>>>>>>>>>>
> >>>>>>>>>>>> 2. Avoid issues with late drm init - on some platforms exynos_drm device
> >>>>>>>>>>>> appeared too late, due to deferred probe, and resulted in black screen
> >>>>>>>>>>>> in userspace.
> >>>>>>>>>>>>
> >>>>>>>>>>>>
> >>>>>>>>>>>> Now if we want to convert exynos_dsi to drm_bridge I see following options:
> >>>>>>>>>>>>
> >>>>>>>>>>>> A. Forgot about callbacks and make the exynos_drm to defer probing until
> >>>>>>>>>>>> exynos_dsi bridge is available, probably it will cause later exynos_drm
> >>>>>>>>>>>> appearance, thus probably black screen on some targets. So for sure it
> >>>>>>>>>>>> will be suboptimal. Making it bridge unbind safe would be another
> >>>>>>>>>>>> problem, but most developers do not care about it so why should we? :)
> >>>>>>>>>>>>
> >>>>>>>>>>>> B. Try to mimic current behaviour - exynos_dsi register bridge ASAP,
> >>>>>>>>>>>> even if downstream devices are not yet attached, on attach/detach notify
> >>>>>>>>>>>> drm about it via connector status change, for this dsi_host registration
> >>>>>>>>>>>> should be performed from drm_bridge attach, I guess.
> >>>>>>>>>>>>
> >>>>>>>>>>>>
> >>>>>>>>>>>> Option A is more standard, but is unsafe and causes other issues.
> >>>>>>>>>>>>
> >>>>>>>>>>>> Option B keeps current behaviour.
> >>>>>>>>>>> Maybe we can have both, but I am not sure, if I am missing something:
> >>>>>>>>>>>
> >>>>>>>>>>> I still prefer option A for the samsung-dsim driver, because it is more
> >>>>>>>>>>> standard, simpler and avoids issues with encoders, connectors or handling
> >>>>>>>>>>> hotplug.
> >>>>>>>>>>>
> >>>>>>>>>>> The idea is to use two bridges in the exynos-dsi driver: One bridge in the
> >>>>>>>>>>> samsung-dsim driver which implements option A and defers probing of the drm
> >>>>>>>>>>> driver until the next bridge is attached. And a second bridge in the
> >>>>>>>>>>> exynos_dsi that attaches to the first bridge (thus, allowing the exynos_drm
> >>>>>>>>>>> device to appear) and implements the hotplug handling for notifying drm via
> >>>>>>>>>>> connector status change.
> >>>>>>>>>>>
> >>>>>>>>>>> The driver for the i.MX8M would use the samsung-dsim bridge without an
> >>>>>>>>>>> additional bridge.
> >>>>>>>>>>>
> >>>>>>>>>>> This allows the samsung-dsim driver to expose the standard behavior while the
> >>>>>>>>>>> exynos_dsi may stick to the existing behavior for the exynos_drm driver.
> >>>>>>>>>>>
> >>>>>>>>>>> I hope this makes sense and does not sound too crazy. It might be difficult to
> >>>>>>>>>>> get the probing and mipi host/device registration correct, but I will try, if
> >>>>>>>>>>> this can work.
> >>>>>>>>>> Adding two bridges for being able to support hotplugging adds many special
> >>>>>>>>>> cases to the bridge driver and still requires more custom API to correctly add
> >>>>>>>>>> the second bridge. I don't think that this a viable path to go.
> >>>>>>>>> Just jumping in here: You cannot hotplug/hotremove anything from a
> >>>>>>>>> drm_device after drm_dev_register has been called, except
> >>>>>>>>> drm_connector. I didn't dig into details here so not sure whether you
> >>>>>>>>> want to late-bind your bridge after drm_dev_register is called or not,
> >>>>>>>>> so might just be fyi and not relevant to the discussion.
> >>>>>>>> Thanks. AFAIC that is exactly what is currently implemented in the exynos_drm
> >>>>>>>> driver (i.e. Option B)
> >>>>>>>>
> >>>>>>>> exynos_dsi_bind configures the encoder and registers a DSI host. Afterwards,
> >>>>>>>> exynos_drm_bind (as component_master_ops) calls drm_dev_register. Later, a DSI
> >>>>>>>> device might attach to the DSI host and call exynos_dsi_host_attach. In
> >>>>>>>> exynos_dsi_host_attach, the driver finds the drm_bridge for the DSI device and
> >>>>>>>> attaches this bridge to the encoder _after_ drm_dev_register has been called.
> >>>>>>>> This is invalid behavior, right?
> >>>>>>> Definitely not supported, I don't think we have the right locks in place
> >>>>>>> to make sure this works.
> >>>>>>>
> >>>>>>> Now if your _only_ adding a drm_bridge (and not an encoder or anything
> >>>>>>> like that), and you are adding the drm_connector correctly (like a
> >>>>>>> hotplugged DP MST sink), then that would at least work from a uapi pov.
> >>>>>>> Because drm_bridge isn't exposed as an uapi object.
> >>>>>>>
> >>>>>>> But yeah, as-is, don't :-)
> >>>>>>>
> >>>>>>> The solution here is a bunch of EPROBE_DEFER handling until all your
> >>>>>>> bridges are loaded, with or without the assistance of component.c
> >>>>>>> framework. Only then call drm_dev_register.
> >>>>>> I have impression we have similar conversation already.
> >>>>>>
> >>>>>> As you stated drm_bridge and drm_panel are not exposed to userspace so
> >>>>>> there shouldn't be problem with them from uapi PoV.
> >>>>>>
> >>>>>> On the other side drm_panel or drm_bridge are not used until pipeline
> >>>>>> enters connected state (at least they were not some time ago :) ). The
> >>>>>> issue is that bridge exposes drm_connector, but as you stated (again :)
> >>>>>> ) connectors can be hotplugged, so in theory it should work. Practical
> >>>>>> tests shows that it also works, but bugs can be still there.
> >>>>>>
> >>>>>> Bunch of EPROBE_DEFER was very slow (as a result userspace timeouted and
> >>>>>> decided there is no display), and does not handle unbinding/re-binding
> >>>>>> drivers.
> >>>>> Rebinding drivers should be fixed now, with a bunch of fixes in driver
> >>>>> core. If not, we need to fix this more.
> >>>>>
> >>>>> Also, EPROBE_DEFER is how this is supposed to work. If it's too slow,
> >>>>> we need to fix EPROBE_DEFER (there's ideas for pre-sorting that never
> >>>>> seem to go anywhere), not paper over it with bad architecture in
> >>>>> drivers.
> >>>> I've heard this argument multiple times, but it sounds more like an
> >>>> attempt to ignore the problem and hope it will fall on someone else's
> >>>> plate :-) Improvement in the probe deferral mechanism are certainly an
> >>>> option to explore, but as far as I can tell nobody has proven that this
> >>>> mechanism is or will be able to solve all problems related to probe
> >>>> ordering dependencies. I wouldn't rule out the need for different
> >>>> solutions for some of the issues.
> >>> Then build another one. But adding hotplug for stuff that is there,
> >>> and shouldn't be hotplugged, just because it's easier on driver
> >>> writers and harder on userspace isn't really a good approach.
> >>> -Daniel
> >> I think it is quite clear that replacing or reworking the deferral mechanism
> >> is out of scope for this discussion, which is why I would like to come back
> >> to the original issue and sum this up as far as I understand it (which is
> >> not really far when it comes to the details):
> >>
> >> We have the existing exynos driver that avoids the standard deferral
> >> mechanism in favor of something that works but Daniel describes as
> >> "definitely not supported".
> >>
> >> We have a proposal from Michael for converting the driver to the standard
> >> drm_bridge behavior and more work from Michael and Marek based on this to
> >> implement the platform specific parts for i.MX8MM.
> >>
> >>  From the i.MX8MM POV this approach already received some testing and looks
> >> good as far as I can judge. Upstreaming this solution is blocked because of
> >> objections from the Samsung maintainers.
> >>
> >> Sorry if I'm being blunt or naive, but where to go from here?
> >>
> > Maybe some more information by the Samsung maintainers would help:
> >
> > If I understand correctly, the main reason for the non-standard behavior is a
> > userspace application that runs into a timeout if the drm-device does not
> > appear in time. Correct? Is there something we can do about that?
> >
> > The other reason is the convenience of binding and unbinding a bridge driver,
> > while the drm device is kept available. Correct? Is this used in development,
> > testing, or production?
> >
> > Is there anything else that prevents the exynos drm from switching to the
> > standard behavior?
> >
> > Would a exynos drm specific wrapper, which uses a standard bridge driver but
> > exposes the non-standard behavior, be acceptable? (Unfortunately, my first try
> > on something like that felt really awkward and didn't really work.)
>
> Even if we drop this 'non-standard' behaviour, your task will be still
> quite difficult to fulfil - you are trying to completely rewrite core
> component of Exynos display pipeline without hardware to test.
>
> ExynosDSI is used in almost all Exynos platforms supported mainline (ls
> -1 arch/arm*/boot/dts/exynos*.dts | wc shows 35). It has different hw
> versions (4 compatibles) and is used in different configurations (video
> mode, command mode, with hw/sw trigger, connected to panels/bridges) and
> for sure with big heritage, since it was one of the 1st DSI drivers.
>
> Rewriting such driver is challenging, even with access to hw.
>
> So maybe it would be better to move common parts in your and exynos
> driver to 'shared library' and use it in both drivers - this way you
> have bigger chances to avoid traps.

If exynos really can't be fixed up in a reasonable way, then I think
sharing code doesn't make much sense - you drag the new driver down
with the old one that's just hanging in there the wrong way round. For
that case just copypaste the exynos code into a new clean drm_bridge
driver, and done.

That would also mean that new exynos support in drm/exynos would need
to be stalled until this is sorted out (at least for new platforms),
since continuing the old way really doesn't sound so great. Wouldn't
be the first time we just end up with a driver fork because the old
one has too much heritage and is too hard to change.

Note that this can also be done within one driver codebase, e.g.
nouveau has still legacy modeset code for nv04-nv4x, and atomic from
nv50+ going forward.

Should be possible to find a pragmatic solution here going forward,
despite tons of hw and heritage. If we use existing hard to retest hw
support to stop new driver submissions from doing the right thing,
that's a clear failure, we need a better approach here.
-Daniel

>
>
> Regards
>
> Andrzej
>
>
> >
> > Michael
> > _______________________________________________
> > dri-devel mailing list
> > dri-devel@lists.freedesktop.org
> > https://protect2.fireeye.com/v1/url?k=08365e03-57ad66fe-0837d54c-000babff317b-bdb6592fde86cf24&q=1&e=db1ed315-991b-408c-827a-4b6cbf2b4e3e&u=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fdri-devel
> >
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Frieder Schrempf April 20, 2021, 11:42 a.m. UTC | #17
On 23.02.21 13:07, Daniel Vetter wrote:
> On Thu, Feb 18, 2021 at 5:02 PM Andrzej Hajda <a.hajda@samsung.com> wrote:

>>

>> Hi Michael,

>>

>> W dniu 18.02.2021 o 09:04, Michael Tretter pisze:

>>> On Wed, 10 Feb 2021 10:10:37 +0100, Frieder Schrempf wrote:

>>>> On 04.02.21 18:46, Daniel Vetter wrote:

>>>>> On Thu, Feb 4, 2021 at 6:26 PM Laurent Pinchart <laurent.pinchart@ideasonboard.com> wrote:

>>>>>> On Thu, Feb 04, 2021 at 06:19:22PM +0100, Daniel Vetter wrote:

>>>>>>> On Thu, Feb 4, 2021 at 5:28 PM Andrzej Hajda wrote:

>>>>>>>> W dniu 04.02.2021 o 17:05, Daniel Vetter pisze:

>>>>>>>>> On Thu, Feb 04, 2021 at 11:56:32AM +0100, Michael Tretter wrote:

>>>>>>>>>> On Thu, 04 Feb 2021 11:17:49 +0100, Daniel Vetter wrote:

>>>>>>>>>>> On Wed, Feb 3, 2021 at 9:32 PM Michael Tretter wrote:

>>>>>>>>>>>> On Mon, 01 Feb 2021 17:33:14 +0100, Michael Tretter wrote:

>>>>>>>>>>>>> On Tue, 15 Sep 2020 21:40:40 +0200, Andrzej Hajda wrote:

>>>>>>>>>>>>>> W dniu 14.09.2020 o 23:19, Andrzej Hajda pisze:

>>>>>>>>>>>>>>> On 14.09.2020 22:01, Michael Tretter wrote:

>>>>>>>>>>>>>>>> On Mon, 14 Sep 2020 14:31:19 +0200, Marek Szyprowski wrote:

>>>>>>>>>>>>>>>>> On 14.09.2020 10:29, Marek Szyprowski wrote:

>>>>>>>>>>>>>>>>>> On 11.09.2020 15:54, Michael Tretter wrote:

>>>>>>>>>>>>>>>>>>> Make the exynos_dsi driver a full drm bridge that can be found and

>>>>>>>>>>>>>>>>>>> used

>>>>>>>>>>>>>>>>>>> from other drivers.

>>>>>>>>>>>>>>>>>>>

>>>>>>>>>>>>>>>>>>> Other drivers can only attach to the bridge, if a mipi dsi device

>>>>>>>>>>>>>>>>>>> already attached to the bridge. This allows to defer the probe of the

>>>>>>>>>>>>>>>>>>> display pipe until the downstream bridges are available, too.

>>>>>>>>>>>>>>>>>>>

>>>>>>>>>>>>>>>>>>> Signed-off-by: Michael Tretter <m.tretter@pengutronix.de>

>>>>>>>>>>>>>>>>>> This one (and the whole series applied) still fails on Exynos boards:

>>>>>>>>>>>>>>>>>>

>>>>>>>>>>>>>>>>>> [drm] Exynos DRM: using 11c00000.fimd device for DMA mapping

>>>>>>>>>>>>>>>>>> operations

>>>>>>>>>>>>>>>>>> exynos-drm exynos-drm: bound 11c00000.fimd (ops fimd_component_ops)

>>>>>>>>>>>>>>>>>> OF: graph: no port node found in /soc/dsi@11c80000

>>>>>>>>>>>>>>>>>> 8<--- cut here ---

>>>>>>>>>>>>>>>>>> Unable to handle kernel NULL pointer dereference at virtual address

>>>>>>>>>>>>>>>>>> 00000084

>>>>>>>>>>>>>>>>>> pgd = (ptrval)

>>>>>>>>>>>>>>>>>> [00000084] *pgd=00000000

>>>>>>>>>>>>>>>>>> Internal error: Oops: 5 [#1] PREEMPT SMP ARM

>>>>>>>>>>>>>>>>>> Modules linked in:

>>>>>>>>>>>>>>>>>> CPU: 1 PID: 1 Comm: swapper/0 Not tainted

>>>>>>>>>>>>>>>>>> 5.9.0-rc4-next-20200911-00010-g417dc70d70ec #1608

>>>>>>>>>>>>>>>>>> Hardware name: Samsung Exynos (Flattened Device Tree)

>>>>>>>>>>>>>>>>>> PC is at drm_bridge_attach+0x18/0x164

>>>>>>>>>>>>>>>>>> LR is at exynos_dsi_bind+0x88/0xa8

>>>>>>>>>>>>>>>>>> pc : [<c0628c08>]    lr : [<c064d560>]    psr: 20000013

>>>>>>>>>>>>>>>>>> sp : ef0dfca8  ip : 00000002  fp : c13190e0

>>>>>>>>>>>>>>>>>> r10: 00000000  r9 : ee46d580  r8 : c13190e0

>>>>>>>>>>>>>>>>>> r7 : ee438800  r6 : 00000018  r5 : ef253810  r4 : ef39e840

>>>>>>>>>>>>>>>>>> r3 : 00000000  r2 : 00000018  r1 : ef39e888  r0 : ef39e840

>>>>>>>>>>>>>>>>>> Flags: nzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none

>>>>>>>>>>>>>>>>>> Control: 10c5387d  Table: 4000404a  DAC: 00000051

>>>>>>>>>>>>>>>>>> Process swapper/0 (pid: 1, stack limit = 0x(ptrval))

>>>>>>>>>>>>>>>>>> Stack: (0xef0dfca8 to 0xef0e0000)

>>>>>>>>>>>>>>>>>> ...

>>>>>>>>>>>>>>>>>> [<c0628c08>] (drm_bridge_attach) from [<c064d560>]

>>>>>>>>>>>>>>>>>> (exynos_dsi_bind+0x88/0xa8)

>>>>>>>>>>>>>>>>>> [<c064d560>] (exynos_dsi_bind) from [<c066a800>]

>>>>>>>>>>>>>>>>>> (component_bind_all+0xfc/0x290)

>>>>>>>>>>>>>>>>>> [<c066a800>] (component_bind_all) from [<c0649dc0>]

>>>>>>>>>>>>>>>>>> (exynos_drm_bind+0xe4/0x19c)

>>>>>>>>>>>>>>>>>> [<c0649dc0>] (exynos_drm_bind) from [<c066ad74>]

>>>>>>>>>>>>>>>>>> (try_to_bring_up_master+0x1e4/0x2c4)

>>>>>>>>>>>>>>>>>> [<c066ad74>] (try_to_bring_up_master) from [<c066b2b4>]

>>>>>>>>>>>>>>>>>> (component_master_add_with_match+0xd4/0x108)

>>>>>>>>>>>>>>>>>> [<c066b2b4>] (component_master_add_with_match) from [<c0649ae8>]

>>>>>>>>>>>>>>>>>> (exynos_drm_platform_probe+0xe4/0x110)

>>>>>>>>>>>>>>>>>> [<c0649ae8>] (exynos_drm_platform_probe) from [<c0674e6c>]

>>>>>>>>>>>>>>>>>> (platform_drv_probe+0x6c/0xa4)

>>>>>>>>>>>>>>>>>> [<c0674e6c>] (platform_drv_probe) from [<c067242c>]

>>>>>>>>>>>>>>>>>> (really_probe+0x200/0x4fc)

>>>>>>>>>>>>>>>>>> [<c067242c>] (really_probe) from [<c06728f0>]

>>>>>>>>>>>>>>>>>> (driver_probe_device+0x78/0x1fc)

>>>>>>>>>>>>>>>>>> [<c06728f0>] (driver_probe_device) from [<c0672cd8>]

>>>>>>>>>>>>>>>>>> (device_driver_attach+0x58/0x60)

>>>>>>>>>>>>>>>>>> [<c0672cd8>] (device_driver_attach) from [<c0672dbc>]

>>>>>>>>>>>>>>>>>> (__driver_attach+0xdc/0x174)

>>>>>>>>>>>>>>>>>> [<c0672dbc>] (__driver_attach) from [<c06701b4>]

>>>>>>>>>>>>>>>>>> (bus_for_each_dev+0x68/0xb4)

>>>>>>>>>>>>>>>>>> [<c06701b4>] (bus_for_each_dev) from [<c06714e8>]

>>>>>>>>>>>>>>>>>> (bus_add_driver+0x158/0x214)

>>>>>>>>>>>>>>>>>> [<c06714e8>] (bus_add_driver) from [<c0673c1c>]

>>>>>>>>>>>>>>>>>> (driver_register+0x78/0x110)

>>>>>>>>>>>>>>>>>> [<c0673c1c>] (driver_register) from [<c0649ca8>]

>>>>>>>>>>>>>>>>>> (exynos_drm_init+0xe4/0x118)

>>>>>>>>>>>>>>>>>> [<c0649ca8>] (exynos_drm_init) from [<c0102484>]

>>>>>>>>>>>>>>>>>> (do_one_initcall+0x8c/0x42c)

>>>>>>>>>>>>>>>>>> [<c0102484>] (do_one_initcall) from [<c11011c0>]

>>>>>>>>>>>>>>>>>> (kernel_init_freeable+0x190/0x1dc)

>>>>>>>>>>>>>>>>>> [<c11011c0>] (kernel_init_freeable) from [<c0af7880>]

>>>>>>>>>>>>>>>>>> (kernel_init+0x8/0x118)

>>>>>>>>>>>>>>>>>> [<c0af7880>] (kernel_init) from [<c0100114>] (ret_from_fork+0x14/0x20)

>>>>>>>>>>>>>>>>>> Exception stack(0xef0dffb0 to 0xef0dfff8)

>>>>>>>>>>>>>>>>>> ...

>>>>>>>>>>>>>>>>>> ---[ end trace ee27f313f9ed9da1 ]---

>>>>>>>>>>>>>>>>>>

>>>>>>>>>>>>>>>>>> # arm-linux-gnueabi-addr2line -e vmlinux c0628c08

>>>>>>>>>>>>>>>>>> drivers/gpu/drm/drm_bridge.c:184 (discriminator 1)

>>>>>>>>>>>>>>>>>>

>>>>>>>>>>>>>>>>>> I will try to debug it a bit more today.

>>>>>>>>>>>>>>>>> The above crash has been caused by lack of in_bridge initialization to

>>>>>>>>>>>>>>>>> NULL in exynos_dsi_bind() in this patch. However, fixing it reveals

>>>>>>>>>>>>>>>>> another issue:

>>>>>>>>>>>>>>>>>

>>>>>>>>>>>>>>>>> [drm] Exynos DRM: using 11c00000.fimd device for DMA mapping operations

>>>>>>>>>>>>>>>>> exynos-drm exynos-drm: bound 11c00000.fimd (ops fimd_component_ops)

>>>>>>>>>>>>>>>>> OF: graph: no port node found in /soc/dsi@11c80000

>>>>>>>>>>>>>>>>> 8<--- cut here ---

>>>>>>>>>>>>>>>>> Unable to handle kernel NULL pointer dereference at virtual address

>>>>>>>>>>>>>>>>> 00000280

>>>>>>>>>>>>>>>>> pgd = (ptrval)

>>>>>>>>>>>>>>>>> [00000280] *pgd=00000000

>>>>>>>>>>>>>>>>> Internal error: Oops: 5 [#1] PREEMPT SMP ARM

>>>>>>>>>>>>>>>>> Modules linked in:

>>>>>>>>>>>>>>>>> CPU: 0 PID: 1 Comm: swapper/0 Not tainted

>>>>>>>>>>>>>>>>> 5.9.0-rc4-next-20200911-00010-g417dc70d70ec-dirty #1613

>>>>>>>>>>>>>>>>> Hardware name: Samsung Exynos (Flattened Device Tree)

>>>>>>>>>>>>>>>>> PC is at __mutex_lock+0x54/0xb18

>>>>>>>>>>>>>>>>> LR is at lock_is_held_type+0x80/0x138

>>>>>>>>>>>>>>>>> pc : [<c0afc920>]    lr : [<c0af63e8>]    psr: 60000013

>>>>>>>>>>>>>>>>> sp : ef0dfd30  ip : 33937b74  fp : c13193c8

>>>>>>>>>>>>>>>>> r10: c1208eec  r9 : 00000000  r8 : ee45f808

>>>>>>>>>>>>>>>>> r7 : c19561a4  r6 : 00000000  r5 : 00000000  r4 : 0000024c

>>>>>>>>>>>>>>>>> r3 : 00000000  r2 : 00204140  r1 : c124f13c  r0 : 00000000

>>>>>>>>>>>>>>>>> Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none

>>>>>>>>>>>>>>>>> Control: 10c5387d  Table: 4000404a  DAC: 00000051

>>>>>>>>>>>>>>>>> Process swapper/0 (pid: 1, stack limit = 0x(ptrval))

>>>>>>>>>>>>>>>>> Stack: (0xef0dfd30 to 0xef0e0000)

>>>>>>>>>>>>>>>>> ...

>>>>>>>>>>>>>>>>> [<c0afc920>] (__mutex_lock) from [<c0afd400>]

>>>>>>>>>>>>>>>>> (mutex_lock_nested+0x1c/0x24)

>>>>>>>>>>>>>>>>> [<c0afd400>] (mutex_lock_nested) from [<c064d4b8>]

>>>>>>>>>>>>>>>>> (__exynos_dsi_host_attach+0x20/0x6c)

>>>>>>>>>>>>>>>>> [<c064d4b8>] (__exynos_dsi_host_attach) from [<c064d914>]

>>>>>>>>>>>>>>>>> (exynos_dsi_host_attach+0x70/0x194)

>>>>>>>>>>>>>>>>> [<c064d914>] (exynos_dsi_host_attach) from [<c0656b64>]

>>>>>>>>>>>>>>>>> (s6e8aa0_probe+0x1b0/0x218)

>>>>>>>>>>>>>>>>> [<c0656b64>] (s6e8aa0_probe) from [<c0672530>]

>>>>>>>>>>>>>>>>> (really_probe+0x200/0x4fc)

>>>>>>>>>>>>>>>>> [<c0672530>] (really_probe) from [<c06729f4>]

>>>>>>>>>>>>>>>>> (driver_probe_device+0x78/0x1fc)

>>>>>>>>>>>>>>>>> [<c06729f4>] (driver_probe_device) from [<c0672ddc>]

>>>>>>>>>>>>>>>>> (device_driver_attach+0x58/0x60)

>>>>>>>>>>>>>>>>> [<c0672ddc>] (device_driver_attach) from [<c0672ec0>]

>>>>>>>>>>>>>>>>> (__driver_attach+0xdc/0x174)

>>>>>>>>>>>>>>>>> [<c0672ec0>] (__driver_attach) from [<c06702b8>]

>>>>>>>>>>>>>>>>> (bus_for_each_dev+0x68/0xb4)

>>>>>>>>>>>>>>>>> [<c06702b8>] (bus_for_each_dev) from [<c06715ec>]

>>>>>>>>>>>>>>>>> (bus_add_driver+0x158/0x214)

>>>>>>>>>>>>>>>>> [<c06715ec>] (bus_add_driver) from [<c0673d20>]

>>>>>>>>>>>>>>>>> (driver_register+0x78/0x110)

>>>>>>>>>>>>>>>>> [<c0673d20>] (driver_register) from [<c0102484>]

>>>>>>>>>>>>>>>>> (do_one_initcall+0x8c/0x42c)

>>>>>>>>>>>>>>>>> [<c0102484>] (do_one_initcall) from [<c11011c0>]

>>>>>>>>>>>>>>>>> (kernel_init_freeable+0x190/0x1dc)

>>>>>>>>>>>>>>>>> [<c11011c0>] (kernel_init_freeable) from [<c0af7988>]

>>>>>>>>>>>>>>>>> (kernel_init+0x8/0x118)

>>>>>>>>>>>>>>>>> [<c0af7988>] (kernel_init) from [<c0100114>] (ret_from_fork+0x14/0x20)

>>>>>>>>>>>>>>>>> Exception stack(0xef0dffb0 to 0xef0dfff8)

>>>>>>>>>>>>>>>>> ...

>>>>>>>>>>>>>>>>> ---[ end trace c06e996ec2e8234d ]---

>>>>>>>>>>>>>>>>>

>>>>>>>>>>>>>>>>> This means that dsi->encoder.dev is not initialized in

>>>>>>>>>>>>>>>>> __exynos_dsi_host_attach().

>>>>>>>>>>>>>>>>>

>>>>>>>>>>>>>>>>> This happens, because drm_bridge_attach() in exynos_dsi_bind() returned

>>>>>>>>>>>>>>>>> earlier -517 (deferred probe), what causes cleanup of encoder and

>>>>>>>>>>>>>>>>> release of all drm resources.

>>>>>>>>>>>>>>>>>

>>>>>>>>>>>>>>>>> Then however, the panel tries to register itself and

>>>>>>>>>>>>>>>>> exynos_dsi_host_attach() tries to access the released encoder (which is

>>>>>>>>>>>>>>>>> zeroed in drm_encoder_release) and rest of resources, what causes

>>>>>>>>>>>>>>>>> failure.

>>>>>>>>>>>>>>>>>

>>>>>>>>>>>>>>>>> It looks that something is missing. Maybe mipi host has to be

>>>>>>>>>>>>>>>>> registered

>>>>>>>>>>>>>>>>> later, when bridge is ready? I have no idea how it is handled before

>>>>>>>>>>>>>>>>> this patch. Andrzej, could you comment it a bit?

>>>>>>>>>>>>>>>> I intentionally changed the order, because if another bridge follows

>>>>>>>>>>>>>>>> in the

>>>>>>>>>>>>>>>> pipeline, the probe of the drm driver has to be deferred until some

>>>>>>>>>>>>>>>> bridge

>>>>>>>>>>>>>>>> provides a connector. The next bridge registers itself via the

>>>>>>>>>>>>>>>> host_attach

>>>>>>>>>>>>>>>> function and the deferral is ensured via the bind for the bind/unbind

>>>>>>>>>>>>>>>> API or

>>>>>>>>>>>>>>>> the bridge_attach function otherwise.

>>>>>>>>>>>>>>>>

>>>>>>>>>>>>>>>> On the other hand, the bridge does not have an encoder until the mipi

>>>>>>>>>>>>>>>> device

>>>>>>>>>>>>>>>> has been attached.

>>>>>>>>>>>>>>>>

>>>>>>>>>>>>>>>> As a solution, the exynos dsi driver must initialize the encoder in

>>>>>>>>>>>>>>>> exynos_dsi_probe instead of in exynos_dsi_bind and access the encoder

>>>>>>>>>>>>>>>> via

>>>>>>>>>>>>>>>> exynos_dsi instead of the bridge.

>>>>>>>>>>>>>>>>

>>>>>>>>>>>>>>>> Can you try to move everything except samsung_dsim_bind from

>>>>>>>>>>>>>>>> exynos_dsi_bind

>>>>>>>>>>>>>>>> to exynos_dsi_probe (respectively for unbind) and report if it fixes the

>>>>>>>>>>>>>>>> crash.

>>>>>>>>>>>>>>> The original behaviour is that encoder (exynos_dsi) is registered

>>>>>>>>>>>>>>> regardless of sink presence (initially panel, later also bridge) - it

>>>>>>>>>>>>>>> avoids multiple issues with deferred probe, device driver bind/unbind

>>>>>>>>>>>>>>> and module load/unload. Appearance or disappearance of sink is

>>>>>>>>>>>>>>> reported to host nicely via DSI attach/detach callbacks - and it is

>>>>>>>>>>>>>>> reflected in drm world as change state of the connector.

>>>>>>>>>>>>>>>

>>>>>>>>>>>>>>> Registering DSI host in bind and unregistering in unbind assures that

>>>>>>>>>>>>>>> if mipi_dsi device is attached/detached the drm device is always

>>>>>>>>>>>>>>> present - it makes device/driver binding race free and allows to avoid

>>>>>>>>>>>>>>> additional locking.

>>>>>>>>>>>>>>>

>>>>>>>>>>>>>>> Moving DSI host registration to probe changes everything, for sure it

>>>>>>>>>>>>>>> breaks the nice feature of DSI attach/detach callbacks and apparently

>>>>>>>>>>>>>>> can cause different issues depending on device bind order.

>>>>>>>>>>>>>>>

>>>>>>>>>>>>>>> I will try to look at the patches tomorrow and maybe I can find more

>>>>>>>>>>>>>>> constructive comments :)

>>>>>>>>>>>>>> As I said yesterday, exynos_dsi driver uses dsi host attach/detach

>>>>>>>>>>>>>> callbacks to control appearance/disappearance of downstream device. It

>>>>>>>>>>>>>> allows to:

>>>>>>>>>>>>>>

>>>>>>>>>>>>>> 1. Safely bind/unbind different device drivers at any time and at any

>>>>>>>>>>>>>> order, without killing exynos_drm and/or crashing system.

>>>>>>>>>>>>>>

>>>>>>>>>>>>>> 2. Avoid issues with late drm init - on some platforms exynos_drm device

>>>>>>>>>>>>>> appeared too late, due to deferred probe, and resulted in black screen

>>>>>>>>>>>>>> in userspace.

>>>>>>>>>>>>>>

>>>>>>>>>>>>>>

>>>>>>>>>>>>>> Now if we want to convert exynos_dsi to drm_bridge I see following options:

>>>>>>>>>>>>>>

>>>>>>>>>>>>>> A. Forgot about callbacks and make the exynos_drm to defer probing until

>>>>>>>>>>>>>> exynos_dsi bridge is available, probably it will cause later exynos_drm

>>>>>>>>>>>>>> appearance, thus probably black screen on some targets. So for sure it

>>>>>>>>>>>>>> will be suboptimal. Making it bridge unbind safe would be another

>>>>>>>>>>>>>> problem, but most developers do not care about it so why should we? :)

>>>>>>>>>>>>>>

>>>>>>>>>>>>>> B. Try to mimic current behaviour - exynos_dsi register bridge ASAP,

>>>>>>>>>>>>>> even if downstream devices are not yet attached, on attach/detach notify

>>>>>>>>>>>>>> drm about it via connector status change, for this dsi_host registration

>>>>>>>>>>>>>> should be performed from drm_bridge attach, I guess.

>>>>>>>>>>>>>>

>>>>>>>>>>>>>>

>>>>>>>>>>>>>> Option A is more standard, but is unsafe and causes other issues.

>>>>>>>>>>>>>>

>>>>>>>>>>>>>> Option B keeps current behaviour.

>>>>>>>>>>>>> Maybe we can have both, but I am not sure, if I am missing something:

>>>>>>>>>>>>>

>>>>>>>>>>>>> I still prefer option A for the samsung-dsim driver, because it is more

>>>>>>>>>>>>> standard, simpler and avoids issues with encoders, connectors or handling

>>>>>>>>>>>>> hotplug.

>>>>>>>>>>>>>

>>>>>>>>>>>>> The idea is to use two bridges in the exynos-dsi driver: One bridge in the

>>>>>>>>>>>>> samsung-dsim driver which implements option A and defers probing of the drm

>>>>>>>>>>>>> driver until the next bridge is attached. And a second bridge in the

>>>>>>>>>>>>> exynos_dsi that attaches to the first bridge (thus, allowing the exynos_drm

>>>>>>>>>>>>> device to appear) and implements the hotplug handling for notifying drm via

>>>>>>>>>>>>> connector status change.

>>>>>>>>>>>>>

>>>>>>>>>>>>> The driver for the i.MX8M would use the samsung-dsim bridge without an

>>>>>>>>>>>>> additional bridge.

>>>>>>>>>>>>>

>>>>>>>>>>>>> This allows the samsung-dsim driver to expose the standard behavior while the

>>>>>>>>>>>>> exynos_dsi may stick to the existing behavior for the exynos_drm driver.

>>>>>>>>>>>>>

>>>>>>>>>>>>> I hope this makes sense and does not sound too crazy. It might be difficult to

>>>>>>>>>>>>> get the probing and mipi host/device registration correct, but I will try, if

>>>>>>>>>>>>> this can work.

>>>>>>>>>>>> Adding two bridges for being able to support hotplugging adds many special

>>>>>>>>>>>> cases to the bridge driver and still requires more custom API to correctly add

>>>>>>>>>>>> the second bridge. I don't think that this a viable path to go.

>>>>>>>>>>> Just jumping in here: You cannot hotplug/hotremove anything from a

>>>>>>>>>>> drm_device after drm_dev_register has been called, except

>>>>>>>>>>> drm_connector. I didn't dig into details here so not sure whether you

>>>>>>>>>>> want to late-bind your bridge after drm_dev_register is called or not,

>>>>>>>>>>> so might just be fyi and not relevant to the discussion.

>>>>>>>>>> Thanks. AFAIC that is exactly what is currently implemented in the exynos_drm

>>>>>>>>>> driver (i.e. Option B)

>>>>>>>>>>

>>>>>>>>>> exynos_dsi_bind configures the encoder and registers a DSI host. Afterwards,

>>>>>>>>>> exynos_drm_bind (as component_master_ops) calls drm_dev_register. Later, a DSI

>>>>>>>>>> device might attach to the DSI host and call exynos_dsi_host_attach. In

>>>>>>>>>> exynos_dsi_host_attach, the driver finds the drm_bridge for the DSI device and

>>>>>>>>>> attaches this bridge to the encoder _after_ drm_dev_register has been called.

>>>>>>>>>> This is invalid behavior, right?

>>>>>>>>> Definitely not supported, I don't think we have the right locks in place

>>>>>>>>> to make sure this works.

>>>>>>>>>

>>>>>>>>> Now if your _only_ adding a drm_bridge (and not an encoder or anything

>>>>>>>>> like that), and you are adding the drm_connector correctly (like a

>>>>>>>>> hotplugged DP MST sink), then that would at least work from a uapi pov.

>>>>>>>>> Because drm_bridge isn't exposed as an uapi object.

>>>>>>>>>

>>>>>>>>> But yeah, as-is, don't :-)

>>>>>>>>>

>>>>>>>>> The solution here is a bunch of EPROBE_DEFER handling until all your

>>>>>>>>> bridges are loaded, with or without the assistance of component.c

>>>>>>>>> framework. Only then call drm_dev_register.

>>>>>>>> I have impression we have similar conversation already.

>>>>>>>>

>>>>>>>> As you stated drm_bridge and drm_panel are not exposed to userspace so

>>>>>>>> there shouldn't be problem with them from uapi PoV.

>>>>>>>>

>>>>>>>> On the other side drm_panel or drm_bridge are not used until pipeline

>>>>>>>> enters connected state (at least they were not some time ago :) ). The

>>>>>>>> issue is that bridge exposes drm_connector, but as you stated (again :)

>>>>>>>> ) connectors can be hotplugged, so in theory it should work. Practical

>>>>>>>> tests shows that it also works, but bugs can be still there.

>>>>>>>>

>>>>>>>> Bunch of EPROBE_DEFER was very slow (as a result userspace timeouted and

>>>>>>>> decided there is no display), and does not handle unbinding/re-binding

>>>>>>>> drivers.

>>>>>>> Rebinding drivers should be fixed now, with a bunch of fixes in driver

>>>>>>> core. If not, we need to fix this more.

>>>>>>>

>>>>>>> Also, EPROBE_DEFER is how this is supposed to work. If it's too slow,

>>>>>>> we need to fix EPROBE_DEFER (there's ideas for pre-sorting that never

>>>>>>> seem to go anywhere), not paper over it with bad architecture in

>>>>>>> drivers.

>>>>>> I've heard this argument multiple times, but it sounds more like an

>>>>>> attempt to ignore the problem and hope it will fall on someone else's

>>>>>> plate :-) Improvement in the probe deferral mechanism are certainly an

>>>>>> option to explore, but as far as I can tell nobody has proven that this

>>>>>> mechanism is or will be able to solve all problems related to probe

>>>>>> ordering dependencies. I wouldn't rule out the need for different

>>>>>> solutions for some of the issues.

>>>>> Then build another one. But adding hotplug for stuff that is there,

>>>>> and shouldn't be hotplugged, just because it's easier on driver

>>>>> writers and harder on userspace isn't really a good approach.

>>>>> -Daniel

>>>> I think it is quite clear that replacing or reworking the deferral mechanism

>>>> is out of scope for this discussion, which is why I would like to come back

>>>> to the original issue and sum this up as far as I understand it (which is

>>>> not really far when it comes to the details):

>>>>

>>>> We have the existing exynos driver that avoids the standard deferral

>>>> mechanism in favor of something that works but Daniel describes as

>>>> "definitely not supported".

>>>>

>>>> We have a proposal from Michael for converting the driver to the standard

>>>> drm_bridge behavior and more work from Michael and Marek based on this to

>>>> implement the platform specific parts for i.MX8MM.

>>>>

>>>>   From the i.MX8MM POV this approach already received some testing and looks

>>>> good as far as I can judge. Upstreaming this solution is blocked because of

>>>> objections from the Samsung maintainers.

>>>>

>>>> Sorry if I'm being blunt or naive, but where to go from here?

>>>>

>>> Maybe some more information by the Samsung maintainers would help:

>>>

>>> If I understand correctly, the main reason for the non-standard behavior is a

>>> userspace application that runs into a timeout if the drm-device does not

>>> appear in time. Correct? Is there something we can do about that?

>>>

>>> The other reason is the convenience of binding and unbinding a bridge driver,

>>> while the drm device is kept available. Correct? Is this used in development,

>>> testing, or production?

>>>

>>> Is there anything else that prevents the exynos drm from switching to the

>>> standard behavior?

>>>

>>> Would a exynos drm specific wrapper, which uses a standard bridge driver but

>>> exposes the non-standard behavior, be acceptable? (Unfortunately, my first try

>>> on something like that felt really awkward and didn't really work.)

>>

>> Even if we drop this 'non-standard' behaviour, your task will be still

>> quite difficult to fulfil - you are trying to completely rewrite core

>> component of Exynos display pipeline without hardware to test.

>>

>> ExynosDSI is used in almost all Exynos platforms supported mainline (ls

>> -1 arch/arm*/boot/dts/exynos*.dts | wc shows 35). It has different hw

>> versions (4 compatibles) and is used in different configurations (video

>> mode, command mode, with hw/sw trigger, connected to panels/bridges) and

>> for sure with big heritage, since it was one of the 1st DSI drivers.

>>

>> Rewriting such driver is challenging, even with access to hw.

>>

>> So maybe it would be better to move common parts in your and exynos

>> driver to 'shared library' and use it in both drivers - this way you

>> have bigger chances to avoid traps.

> 

> If exynos really can't be fixed up in a reasonable way, then I think

> sharing code doesn't make much sense - you drag the new driver down

> with the old one that's just hanging in there the wrong way round. For

> that case just copypaste the exynos code into a new clean drm_bridge

> driver, and done.

> 

> That would also mean that new exynos support in drm/exynos would need

> to be stalled until this is sorted out (at least for new platforms),

> since continuing the old way really doesn't sound so great. Wouldn't

> be the first time we just end up with a driver fork because the old

> one has too much heritage and is too hard to change.

> 

> Note that this can also be done within one driver codebase, e.g.

> nouveau has still legacy modeset code for nv04-nv4x, and atomic from

> nv50+ going forward.

> 

> Should be possible to find a pragmatic solution here going forward,

> despite tons of hw and heritage. If we use existing hard to retest hw

> support to stop new driver submissions from doing the right thing,

> that's a clear failure, we need a better approach here.

> -Daniel

> 


Right, and I just wanted to add that there seems to be a similar (maybe 
less complex?) situation for the CSIS CSI controller. In that case we 
already have two separate drivers for pretty much the same hardware in 
the media subsystem, media/platform/exynos4-is/mipi-csis.c for the 
exynos and staging/media/imx/imx7-mipi-csis.c for the imx.

I don't know the history for this, but it just came to my mind that this 
case is related and it might be interesting for the scope of this 
discussion.
Laurent Pinchart April 20, 2021, 2:27 p.m. UTC | #18
Hi Frieder,

On Tue, Apr 20, 2021 at 01:42:05PM +0200, Frieder Schrempf wrote:
> On 23.02.21 13:07, Daniel Vetter wrote:

> > On Thu, Feb 18, 2021 at 5:02 PM Andrzej Hajda <a.hajda@samsung.com> wrote:

> >> W dniu 18.02.2021 o 09:04, Michael Tretter pisze:

> >>> On Wed, 10 Feb 2021 10:10:37 +0100, Frieder Schrempf wrote:

> >>>> On 04.02.21 18:46, Daniel Vetter wrote:

> >>>>> On Thu, Feb 4, 2021 at 6:26 PM Laurent Pinchart <laurent.pinchart@ideasonboard.com> wrote:

> >>>>>> On Thu, Feb 04, 2021 at 06:19:22PM +0100, Daniel Vetter wrote:

> >>>>>>> On Thu, Feb 4, 2021 at 5:28 PM Andrzej Hajda wrote:

> >>>>>>>> W dniu 04.02.2021 o 17:05, Daniel Vetter pisze:

> >>>>>>>>> On Thu, Feb 04, 2021 at 11:56:32AM +0100, Michael Tretter wrote:

> >>>>>>>>>> On Thu, 04 Feb 2021 11:17:49 +0100, Daniel Vetter wrote:

> >>>>>>>>>>> On Wed, Feb 3, 2021 at 9:32 PM Michael Tretter wrote:

> >>>>>>>>>>>> On Mon, 01 Feb 2021 17:33:14 +0100, Michael Tretter wrote:

> >>>>>>>>>>>>> On Tue, 15 Sep 2020 21:40:40 +0200, Andrzej Hajda wrote:

> >>>>>>>>>>>>>> W dniu 14.09.2020 o 23:19, Andrzej Hajda pisze:

> >>>>>>>>>>>>>>> On 14.09.2020 22:01, Michael Tretter wrote:

> >>>>>>>>>>>>>>>> On Mon, 14 Sep 2020 14:31:19 +0200, Marek Szyprowski wrote:

> >>>>>>>>>>>>>>>>> On 14.09.2020 10:29, Marek Szyprowski wrote:

> >>>>>>>>>>>>>>>>>> On 11.09.2020 15:54, Michael Tretter wrote:

> >>>>>>>>>>>>>>>>>>> Make the exynos_dsi driver a full drm bridge that can be found and

> >>>>>>>>>>>>>>>>>>> used

> >>>>>>>>>>>>>>>>>>> from other drivers.

> >>>>>>>>>>>>>>>>>>>

> >>>>>>>>>>>>>>>>>>> Other drivers can only attach to the bridge, if a mipi dsi device

> >>>>>>>>>>>>>>>>>>> already attached to the bridge. This allows to defer the probe of the

> >>>>>>>>>>>>>>>>>>> display pipe until the downstream bridges are available, too.

> >>>>>>>>>>>>>>>>>>>

> >>>>>>>>>>>>>>>>>>> Signed-off-by: Michael Tretter <m.tretter@pengutronix.de>

> >>>>>>>>>>>>>>>>>> This one (and the whole series applied) still fails on Exynos boards:

> >>>>>>>>>>>>>>>>>>

> >>>>>>>>>>>>>>>>>> [drm] Exynos DRM: using 11c00000.fimd device for DMA mapping

> >>>>>>>>>>>>>>>>>> operations

> >>>>>>>>>>>>>>>>>> exynos-drm exynos-drm: bound 11c00000.fimd (ops fimd_component_ops)

> >>>>>>>>>>>>>>>>>> OF: graph: no port node found in /soc/dsi@11c80000

> >>>>>>>>>>>>>>>>>> 8<--- cut here ---

> >>>>>>>>>>>>>>>>>> Unable to handle kernel NULL pointer dereference at virtual address

> >>>>>>>>>>>>>>>>>> 00000084

> >>>>>>>>>>>>>>>>>> pgd = (ptrval)

> >>>>>>>>>>>>>>>>>> [00000084] *pgd=00000000

> >>>>>>>>>>>>>>>>>> Internal error: Oops: 5 [#1] PREEMPT SMP ARM

> >>>>>>>>>>>>>>>>>> Modules linked in:

> >>>>>>>>>>>>>>>>>> CPU: 1 PID: 1 Comm: swapper/0 Not tainted

> >>>>>>>>>>>>>>>>>> 5.9.0-rc4-next-20200911-00010-g417dc70d70ec #1608

> >>>>>>>>>>>>>>>>>> Hardware name: Samsung Exynos (Flattened Device Tree)

> >>>>>>>>>>>>>>>>>> PC is at drm_bridge_attach+0x18/0x164

> >>>>>>>>>>>>>>>>>> LR is at exynos_dsi_bind+0x88/0xa8

> >>>>>>>>>>>>>>>>>> pc : [<c0628c08>]    lr : [<c064d560>]    psr: 20000013

> >>>>>>>>>>>>>>>>>> sp : ef0dfca8  ip : 00000002  fp : c13190e0

> >>>>>>>>>>>>>>>>>> r10: 00000000  r9 : ee46d580  r8 : c13190e0

> >>>>>>>>>>>>>>>>>> r7 : ee438800  r6 : 00000018  r5 : ef253810  r4 : ef39e840

> >>>>>>>>>>>>>>>>>> r3 : 00000000  r2 : 00000018  r1 : ef39e888  r0 : ef39e840

> >>>>>>>>>>>>>>>>>> Flags: nzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none

> >>>>>>>>>>>>>>>>>> Control: 10c5387d  Table: 4000404a  DAC: 00000051

> >>>>>>>>>>>>>>>>>> Process swapper/0 (pid: 1, stack limit = 0x(ptrval))

> >>>>>>>>>>>>>>>>>> Stack: (0xef0dfca8 to 0xef0e0000)

> >>>>>>>>>>>>>>>>>> ...

> >>>>>>>>>>>>>>>>>> [<c0628c08>] (drm_bridge_attach) from [<c064d560>]

> >>>>>>>>>>>>>>>>>> (exynos_dsi_bind+0x88/0xa8)

> >>>>>>>>>>>>>>>>>> [<c064d560>] (exynos_dsi_bind) from [<c066a800>]

> >>>>>>>>>>>>>>>>>> (component_bind_all+0xfc/0x290)

> >>>>>>>>>>>>>>>>>> [<c066a800>] (component_bind_all) from [<c0649dc0>]

> >>>>>>>>>>>>>>>>>> (exynos_drm_bind+0xe4/0x19c)

> >>>>>>>>>>>>>>>>>> [<c0649dc0>] (exynos_drm_bind) from [<c066ad74>]

> >>>>>>>>>>>>>>>>>> (try_to_bring_up_master+0x1e4/0x2c4)

> >>>>>>>>>>>>>>>>>> [<c066ad74>] (try_to_bring_up_master) from [<c066b2b4>]

> >>>>>>>>>>>>>>>>>> (component_master_add_with_match+0xd4/0x108)

> >>>>>>>>>>>>>>>>>> [<c066b2b4>] (component_master_add_with_match) from [<c0649ae8>]

> >>>>>>>>>>>>>>>>>> (exynos_drm_platform_probe+0xe4/0x110)

> >>>>>>>>>>>>>>>>>> [<c0649ae8>] (exynos_drm_platform_probe) from [<c0674e6c>]

> >>>>>>>>>>>>>>>>>> (platform_drv_probe+0x6c/0xa4)

> >>>>>>>>>>>>>>>>>> [<c0674e6c>] (platform_drv_probe) from [<c067242c>]

> >>>>>>>>>>>>>>>>>> (really_probe+0x200/0x4fc)

> >>>>>>>>>>>>>>>>>> [<c067242c>] (really_probe) from [<c06728f0>]

> >>>>>>>>>>>>>>>>>> (driver_probe_device+0x78/0x1fc)

> >>>>>>>>>>>>>>>>>> [<c06728f0>] (driver_probe_device) from [<c0672cd8>]

> >>>>>>>>>>>>>>>>>> (device_driver_attach+0x58/0x60)

> >>>>>>>>>>>>>>>>>> [<c0672cd8>] (device_driver_attach) from [<c0672dbc>]

> >>>>>>>>>>>>>>>>>> (__driver_attach+0xdc/0x174)

> >>>>>>>>>>>>>>>>>> [<c0672dbc>] (__driver_attach) from [<c06701b4>]

> >>>>>>>>>>>>>>>>>> (bus_for_each_dev+0x68/0xb4)

> >>>>>>>>>>>>>>>>>> [<c06701b4>] (bus_for_each_dev) from [<c06714e8>]

> >>>>>>>>>>>>>>>>>> (bus_add_driver+0x158/0x214)

> >>>>>>>>>>>>>>>>>> [<c06714e8>] (bus_add_driver) from [<c0673c1c>]

> >>>>>>>>>>>>>>>>>> (driver_register+0x78/0x110)

> >>>>>>>>>>>>>>>>>> [<c0673c1c>] (driver_register) from [<c0649ca8>]

> >>>>>>>>>>>>>>>>>> (exynos_drm_init+0xe4/0x118)

> >>>>>>>>>>>>>>>>>> [<c0649ca8>] (exynos_drm_init) from [<c0102484>]

> >>>>>>>>>>>>>>>>>> (do_one_initcall+0x8c/0x42c)

> >>>>>>>>>>>>>>>>>> [<c0102484>] (do_one_initcall) from [<c11011c0>]

> >>>>>>>>>>>>>>>>>> (kernel_init_freeable+0x190/0x1dc)

> >>>>>>>>>>>>>>>>>> [<c11011c0>] (kernel_init_freeable) from [<c0af7880>]

> >>>>>>>>>>>>>>>>>> (kernel_init+0x8/0x118)

> >>>>>>>>>>>>>>>>>> [<c0af7880>] (kernel_init) from [<c0100114>] (ret_from_fork+0x14/0x20)

> >>>>>>>>>>>>>>>>>> Exception stack(0xef0dffb0 to 0xef0dfff8)

> >>>>>>>>>>>>>>>>>> ...

> >>>>>>>>>>>>>>>>>> ---[ end trace ee27f313f9ed9da1 ]---

> >>>>>>>>>>>>>>>>>>

> >>>>>>>>>>>>>>>>>> # arm-linux-gnueabi-addr2line -e vmlinux c0628c08

> >>>>>>>>>>>>>>>>>> drivers/gpu/drm/drm_bridge.c:184 (discriminator 1)

> >>>>>>>>>>>>>>>>>>

> >>>>>>>>>>>>>>>>>> I will try to debug it a bit more today.

> >>>>>>>>>>>>>>>>> The above crash has been caused by lack of in_bridge initialization to

> >>>>>>>>>>>>>>>>> NULL in exynos_dsi_bind() in this patch. However, fixing it reveals

> >>>>>>>>>>>>>>>>> another issue:

> >>>>>>>>>>>>>>>>>

> >>>>>>>>>>>>>>>>> [drm] Exynos DRM: using 11c00000.fimd device for DMA mapping operations

> >>>>>>>>>>>>>>>>> exynos-drm exynos-drm: bound 11c00000.fimd (ops fimd_component_ops)

> >>>>>>>>>>>>>>>>> OF: graph: no port node found in /soc/dsi@11c80000

> >>>>>>>>>>>>>>>>> 8<--- cut here ---

> >>>>>>>>>>>>>>>>> Unable to handle kernel NULL pointer dereference at virtual address

> >>>>>>>>>>>>>>>>> 00000280

> >>>>>>>>>>>>>>>>> pgd = (ptrval)

> >>>>>>>>>>>>>>>>> [00000280] *pgd=00000000

> >>>>>>>>>>>>>>>>> Internal error: Oops: 5 [#1] PREEMPT SMP ARM

> >>>>>>>>>>>>>>>>> Modules linked in:

> >>>>>>>>>>>>>>>>> CPU: 0 PID: 1 Comm: swapper/0 Not tainted

> >>>>>>>>>>>>>>>>> 5.9.0-rc4-next-20200911-00010-g417dc70d70ec-dirty #1613

> >>>>>>>>>>>>>>>>> Hardware name: Samsung Exynos (Flattened Device Tree)

> >>>>>>>>>>>>>>>>> PC is at __mutex_lock+0x54/0xb18

> >>>>>>>>>>>>>>>>> LR is at lock_is_held_type+0x80/0x138

> >>>>>>>>>>>>>>>>> pc : [<c0afc920>]    lr : [<c0af63e8>]    psr: 60000013

> >>>>>>>>>>>>>>>>> sp : ef0dfd30  ip : 33937b74  fp : c13193c8

> >>>>>>>>>>>>>>>>> r10: c1208eec  r9 : 00000000  r8 : ee45f808

> >>>>>>>>>>>>>>>>> r7 : c19561a4  r6 : 00000000  r5 : 00000000  r4 : 0000024c

> >>>>>>>>>>>>>>>>> r3 : 00000000  r2 : 00204140  r1 : c124f13c  r0 : 00000000

> >>>>>>>>>>>>>>>>> Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none

> >>>>>>>>>>>>>>>>> Control: 10c5387d  Table: 4000404a  DAC: 00000051

> >>>>>>>>>>>>>>>>> Process swapper/0 (pid: 1, stack limit = 0x(ptrval))

> >>>>>>>>>>>>>>>>> Stack: (0xef0dfd30 to 0xef0e0000)

> >>>>>>>>>>>>>>>>> ...

> >>>>>>>>>>>>>>>>> [<c0afc920>] (__mutex_lock) from [<c0afd400>]

> >>>>>>>>>>>>>>>>> (mutex_lock_nested+0x1c/0x24)

> >>>>>>>>>>>>>>>>> [<c0afd400>] (mutex_lock_nested) from [<c064d4b8>]

> >>>>>>>>>>>>>>>>> (__exynos_dsi_host_attach+0x20/0x6c)

> >>>>>>>>>>>>>>>>> [<c064d4b8>] (__exynos_dsi_host_attach) from [<c064d914>]

> >>>>>>>>>>>>>>>>> (exynos_dsi_host_attach+0x70/0x194)

> >>>>>>>>>>>>>>>>> [<c064d914>] (exynos_dsi_host_attach) from [<c0656b64>]

> >>>>>>>>>>>>>>>>> (s6e8aa0_probe+0x1b0/0x218)

> >>>>>>>>>>>>>>>>> [<c0656b64>] (s6e8aa0_probe) from [<c0672530>]

> >>>>>>>>>>>>>>>>> (really_probe+0x200/0x4fc)

> >>>>>>>>>>>>>>>>> [<c0672530>] (really_probe) from [<c06729f4>]

> >>>>>>>>>>>>>>>>> (driver_probe_device+0x78/0x1fc)

> >>>>>>>>>>>>>>>>> [<c06729f4>] (driver_probe_device) from [<c0672ddc>]

> >>>>>>>>>>>>>>>>> (device_driver_attach+0x58/0x60)

> >>>>>>>>>>>>>>>>> [<c0672ddc>] (device_driver_attach) from [<c0672ec0>]

> >>>>>>>>>>>>>>>>> (__driver_attach+0xdc/0x174)

> >>>>>>>>>>>>>>>>> [<c0672ec0>] (__driver_attach) from [<c06702b8>]

> >>>>>>>>>>>>>>>>> (bus_for_each_dev+0x68/0xb4)

> >>>>>>>>>>>>>>>>> [<c06702b8>] (bus_for_each_dev) from [<c06715ec>]

> >>>>>>>>>>>>>>>>> (bus_add_driver+0x158/0x214)

> >>>>>>>>>>>>>>>>> [<c06715ec>] (bus_add_driver) from [<c0673d20>]

> >>>>>>>>>>>>>>>>> (driver_register+0x78/0x110)

> >>>>>>>>>>>>>>>>> [<c0673d20>] (driver_register) from [<c0102484>]

> >>>>>>>>>>>>>>>>> (do_one_initcall+0x8c/0x42c)

> >>>>>>>>>>>>>>>>> [<c0102484>] (do_one_initcall) from [<c11011c0>]

> >>>>>>>>>>>>>>>>> (kernel_init_freeable+0x190/0x1dc)

> >>>>>>>>>>>>>>>>> [<c11011c0>] (kernel_init_freeable) from [<c0af7988>]

> >>>>>>>>>>>>>>>>> (kernel_init+0x8/0x118)

> >>>>>>>>>>>>>>>>> [<c0af7988>] (kernel_init) from [<c0100114>] (ret_from_fork+0x14/0x20)

> >>>>>>>>>>>>>>>>> Exception stack(0xef0dffb0 to 0xef0dfff8)

> >>>>>>>>>>>>>>>>> ...

> >>>>>>>>>>>>>>>>> ---[ end trace c06e996ec2e8234d ]---

> >>>>>>>>>>>>>>>>>

> >>>>>>>>>>>>>>>>> This means that dsi->encoder.dev is not initialized in

> >>>>>>>>>>>>>>>>> __exynos_dsi_host_attach().

> >>>>>>>>>>>>>>>>>

> >>>>>>>>>>>>>>>>> This happens, because drm_bridge_attach() in exynos_dsi_bind() returned

> >>>>>>>>>>>>>>>>> earlier -517 (deferred probe), what causes cleanup of encoder and

> >>>>>>>>>>>>>>>>> release of all drm resources.

> >>>>>>>>>>>>>>>>>

> >>>>>>>>>>>>>>>>> Then however, the panel tries to register itself and

> >>>>>>>>>>>>>>>>> exynos_dsi_host_attach() tries to access the released encoder (which is

> >>>>>>>>>>>>>>>>> zeroed in drm_encoder_release) and rest of resources, what causes

> >>>>>>>>>>>>>>>>> failure.

> >>>>>>>>>>>>>>>>>

> >>>>>>>>>>>>>>>>> It looks that something is missing. Maybe mipi host has to be

> >>>>>>>>>>>>>>>>> registered

> >>>>>>>>>>>>>>>>> later, when bridge is ready? I have no idea how it is handled before

> >>>>>>>>>>>>>>>>> this patch. Andrzej, could you comment it a bit?

> >>>>>>>>>>>>>>>> I intentionally changed the order, because if another bridge follows

> >>>>>>>>>>>>>>>> in the

> >>>>>>>>>>>>>>>> pipeline, the probe of the drm driver has to be deferred until some

> >>>>>>>>>>>>>>>> bridge

> >>>>>>>>>>>>>>>> provides a connector. The next bridge registers itself via the

> >>>>>>>>>>>>>>>> host_attach

> >>>>>>>>>>>>>>>> function and the deferral is ensured via the bind for the bind/unbind

> >>>>>>>>>>>>>>>> API or

> >>>>>>>>>>>>>>>> the bridge_attach function otherwise.

> >>>>>>>>>>>>>>>>

> >>>>>>>>>>>>>>>> On the other hand, the bridge does not have an encoder until the mipi

> >>>>>>>>>>>>>>>> device

> >>>>>>>>>>>>>>>> has been attached.

> >>>>>>>>>>>>>>>>

> >>>>>>>>>>>>>>>> As a solution, the exynos dsi driver must initialize the encoder in

> >>>>>>>>>>>>>>>> exynos_dsi_probe instead of in exynos_dsi_bind and access the encoder

> >>>>>>>>>>>>>>>> via

> >>>>>>>>>>>>>>>> exynos_dsi instead of the bridge.

> >>>>>>>>>>>>>>>>

> >>>>>>>>>>>>>>>> Can you try to move everything except samsung_dsim_bind from

> >>>>>>>>>>>>>>>> exynos_dsi_bind

> >>>>>>>>>>>>>>>> to exynos_dsi_probe (respectively for unbind) and report if it fixes the

> >>>>>>>>>>>>>>>> crash.

> >>>>>>>>>>>>>>> The original behaviour is that encoder (exynos_dsi) is registered

> >>>>>>>>>>>>>>> regardless of sink presence (initially panel, later also bridge) - it

> >>>>>>>>>>>>>>> avoids multiple issues with deferred probe, device driver bind/unbind

> >>>>>>>>>>>>>>> and module load/unload. Appearance or disappearance of sink is

> >>>>>>>>>>>>>>> reported to host nicely via DSI attach/detach callbacks - and it is

> >>>>>>>>>>>>>>> reflected in drm world as change state of the connector.

> >>>>>>>>>>>>>>>

> >>>>>>>>>>>>>>> Registering DSI host in bind and unregistering in unbind assures that

> >>>>>>>>>>>>>>> if mipi_dsi device is attached/detached the drm device is always

> >>>>>>>>>>>>>>> present - it makes device/driver binding race free and allows to avoid

> >>>>>>>>>>>>>>> additional locking.

> >>>>>>>>>>>>>>>

> >>>>>>>>>>>>>>> Moving DSI host registration to probe changes everything, for sure it

> >>>>>>>>>>>>>>> breaks the nice feature of DSI attach/detach callbacks and apparently

> >>>>>>>>>>>>>>> can cause different issues depending on device bind order.

> >>>>>>>>>>>>>>>

> >>>>>>>>>>>>>>> I will try to look at the patches tomorrow and maybe I can find more

> >>>>>>>>>>>>>>> constructive comments :)

> >>>>>>>>>>>>>> As I said yesterday, exynos_dsi driver uses dsi host attach/detach

> >>>>>>>>>>>>>> callbacks to control appearance/disappearance of downstream device. It

> >>>>>>>>>>>>>> allows to:

> >>>>>>>>>>>>>>

> >>>>>>>>>>>>>> 1. Safely bind/unbind different device drivers at any time and at any

> >>>>>>>>>>>>>> order, without killing exynos_drm and/or crashing system.

> >>>>>>>>>>>>>>

> >>>>>>>>>>>>>> 2. Avoid issues with late drm init - on some platforms exynos_drm device

> >>>>>>>>>>>>>> appeared too late, due to deferred probe, and resulted in black screen

> >>>>>>>>>>>>>> in userspace.

> >>>>>>>>>>>>>>

> >>>>>>>>>>>>>>

> >>>>>>>>>>>>>> Now if we want to convert exynos_dsi to drm_bridge I see following options:

> >>>>>>>>>>>>>>

> >>>>>>>>>>>>>> A. Forgot about callbacks and make the exynos_drm to defer probing until

> >>>>>>>>>>>>>> exynos_dsi bridge is available, probably it will cause later exynos_drm

> >>>>>>>>>>>>>> appearance, thus probably black screen on some targets. So for sure it

> >>>>>>>>>>>>>> will be suboptimal. Making it bridge unbind safe would be another

> >>>>>>>>>>>>>> problem, but most developers do not care about it so why should we? :)

> >>>>>>>>>>>>>>

> >>>>>>>>>>>>>> B. Try to mimic current behaviour - exynos_dsi register bridge ASAP,

> >>>>>>>>>>>>>> even if downstream devices are not yet attached, on attach/detach notify

> >>>>>>>>>>>>>> drm about it via connector status change, for this dsi_host registration

> >>>>>>>>>>>>>> should be performed from drm_bridge attach, I guess.

> >>>>>>>>>>>>>>

> >>>>>>>>>>>>>>

> >>>>>>>>>>>>>> Option A is more standard, but is unsafe and causes other issues.

> >>>>>>>>>>>>>>

> >>>>>>>>>>>>>> Option B keeps current behaviour.

> >>>>>>>>>>>>> Maybe we can have both, but I am not sure, if I am missing something:

> >>>>>>>>>>>>>

> >>>>>>>>>>>>> I still prefer option A for the samsung-dsim driver, because it is more

> >>>>>>>>>>>>> standard, simpler and avoids issues with encoders, connectors or handling

> >>>>>>>>>>>>> hotplug.

> >>>>>>>>>>>>>

> >>>>>>>>>>>>> The idea is to use two bridges in the exynos-dsi driver: One bridge in the

> >>>>>>>>>>>>> samsung-dsim driver which implements option A and defers probing of the drm

> >>>>>>>>>>>>> driver until the next bridge is attached. And a second bridge in the

> >>>>>>>>>>>>> exynos_dsi that attaches to the first bridge (thus, allowing the exynos_drm

> >>>>>>>>>>>>> device to appear) and implements the hotplug handling for notifying drm via

> >>>>>>>>>>>>> connector status change.

> >>>>>>>>>>>>>

> >>>>>>>>>>>>> The driver for the i.MX8M would use the samsung-dsim bridge without an

> >>>>>>>>>>>>> additional bridge.

> >>>>>>>>>>>>>

> >>>>>>>>>>>>> This allows the samsung-dsim driver to expose the standard behavior while the

> >>>>>>>>>>>>> exynos_dsi may stick to the existing behavior for the exynos_drm driver.

> >>>>>>>>>>>>>

> >>>>>>>>>>>>> I hope this makes sense and does not sound too crazy. It might be difficult to

> >>>>>>>>>>>>> get the probing and mipi host/device registration correct, but I will try, if

> >>>>>>>>>>>>> this can work.

> >>>>>>>>>>>> Adding two bridges for being able to support hotplugging adds many special

> >>>>>>>>>>>> cases to the bridge driver and still requires more custom API to correctly add

> >>>>>>>>>>>> the second bridge. I don't think that this a viable path to go.

> >>>>>>>>>>> Just jumping in here: You cannot hotplug/hotremove anything from a

> >>>>>>>>>>> drm_device after drm_dev_register has been called, except

> >>>>>>>>>>> drm_connector. I didn't dig into details here so not sure whether you

> >>>>>>>>>>> want to late-bind your bridge after drm_dev_register is called or not,

> >>>>>>>>>>> so might just be fyi and not relevant to the discussion.

> >>>>>>>>>> Thanks. AFAIC that is exactly what is currently implemented in the exynos_drm

> >>>>>>>>>> driver (i.e. Option B)

> >>>>>>>>>>

> >>>>>>>>>> exynos_dsi_bind configures the encoder and registers a DSI host. Afterwards,

> >>>>>>>>>> exynos_drm_bind (as component_master_ops) calls drm_dev_register. Later, a DSI

> >>>>>>>>>> device might attach to the DSI host and call exynos_dsi_host_attach. In

> >>>>>>>>>> exynos_dsi_host_attach, the driver finds the drm_bridge for the DSI device and

> >>>>>>>>>> attaches this bridge to the encoder _after_ drm_dev_register has been called.

> >>>>>>>>>> This is invalid behavior, right?

> >>>>>>>>> Definitely not supported, I don't think we have the right locks in place

> >>>>>>>>> to make sure this works.

> >>>>>>>>>

> >>>>>>>>> Now if your _only_ adding a drm_bridge (and not an encoder or anything

> >>>>>>>>> like that), and you are adding the drm_connector correctly (like a

> >>>>>>>>> hotplugged DP MST sink), then that would at least work from a uapi pov.

> >>>>>>>>> Because drm_bridge isn't exposed as an uapi object.

> >>>>>>>>>

> >>>>>>>>> But yeah, as-is, don't :-)

> >>>>>>>>>

> >>>>>>>>> The solution here is a bunch of EPROBE_DEFER handling until all your

> >>>>>>>>> bridges are loaded, with or without the assistance of component.c

> >>>>>>>>> framework. Only then call drm_dev_register.

> >>>>>>>> I have impression we have similar conversation already.

> >>>>>>>>

> >>>>>>>> As you stated drm_bridge and drm_panel are not exposed to userspace so

> >>>>>>>> there shouldn't be problem with them from uapi PoV.

> >>>>>>>>

> >>>>>>>> On the other side drm_panel or drm_bridge are not used until pipeline

> >>>>>>>> enters connected state (at least they were not some time ago :) ). The

> >>>>>>>> issue is that bridge exposes drm_connector, but as you stated (again :)

> >>>>>>>> ) connectors can be hotplugged, so in theory it should work. Practical

> >>>>>>>> tests shows that it also works, but bugs can be still there.

> >>>>>>>>

> >>>>>>>> Bunch of EPROBE_DEFER was very slow (as a result userspace timeouted and

> >>>>>>>> decided there is no display), and does not handle unbinding/re-binding

> >>>>>>>> drivers.

> >>>>>>> Rebinding drivers should be fixed now, with a bunch of fixes in driver

> >>>>>>> core. If not, we need to fix this more.

> >>>>>>>

> >>>>>>> Also, EPROBE_DEFER is how this is supposed to work. If it's too slow,

> >>>>>>> we need to fix EPROBE_DEFER (there's ideas for pre-sorting that never

> >>>>>>> seem to go anywhere), not paper over it with bad architecture in

> >>>>>>> drivers.

> >>>>>> I've heard this argument multiple times, but it sounds more like an

> >>>>>> attempt to ignore the problem and hope it will fall on someone else's

> >>>>>> plate :-) Improvement in the probe deferral mechanism are certainly an

> >>>>>> option to explore, but as far as I can tell nobody has proven that this

> >>>>>> mechanism is or will be able to solve all problems related to probe

> >>>>>> ordering dependencies. I wouldn't rule out the need for different

> >>>>>> solutions for some of the issues.

> >>>>> Then build another one. But adding hotplug for stuff that is there,

> >>>>> and shouldn't be hotplugged, just because it's easier on driver

> >>>>> writers and harder on userspace isn't really a good approach.

> >>>>> -Daniel

> >>>> I think it is quite clear that replacing or reworking the deferral mechanism

> >>>> is out of scope for this discussion, which is why I would like to come back

> >>>> to the original issue and sum this up as far as I understand it (which is

> >>>> not really far when it comes to the details):

> >>>>

> >>>> We have the existing exynos driver that avoids the standard deferral

> >>>> mechanism in favor of something that works but Daniel describes as

> >>>> "definitely not supported".

> >>>>

> >>>> We have a proposal from Michael for converting the driver to the standard

> >>>> drm_bridge behavior and more work from Michael and Marek based on this to

> >>>> implement the platform specific parts for i.MX8MM.

> >>>>

> >>>>   From the i.MX8MM POV this approach already received some testing and looks

> >>>> good as far as I can judge. Upstreaming this solution is blocked because of

> >>>> objections from the Samsung maintainers.

> >>>>

> >>>> Sorry if I'm being blunt or naive, but where to go from here?

> >>>>

> >>> Maybe some more information by the Samsung maintainers would help:

> >>>

> >>> If I understand correctly, the main reason for the non-standard behavior is a

> >>> userspace application that runs into a timeout if the drm-device does not

> >>> appear in time. Correct? Is there something we can do about that?

> >>>

> >>> The other reason is the convenience of binding and unbinding a bridge driver,

> >>> while the drm device is kept available. Correct? Is this used in development,

> >>> testing, or production?

> >>>

> >>> Is there anything else that prevents the exynos drm from switching to the

> >>> standard behavior?

> >>>

> >>> Would a exynos drm specific wrapper, which uses a standard bridge driver but

> >>> exposes the non-standard behavior, be acceptable? (Unfortunately, my first try

> >>> on something like that felt really awkward and didn't really work.)

> >>

> >> Even if we drop this 'non-standard' behaviour, your task will be still

> >> quite difficult to fulfil - you are trying to completely rewrite core

> >> component of Exynos display pipeline without hardware to test.

> >>

> >> ExynosDSI is used in almost all Exynos platforms supported mainline (ls

> >> -1 arch/arm*/boot/dts/exynos*.dts | wc shows 35). It has different hw

> >> versions (4 compatibles) and is used in different configurations (video

> >> mode, command mode, with hw/sw trigger, connected to panels/bridges) and

> >> for sure with big heritage, since it was one of the 1st DSI drivers.

> >>

> >> Rewriting such driver is challenging, even with access to hw.

> >>

> >> So maybe it would be better to move common parts in your and exynos

> >> driver to 'shared library' and use it in both drivers - this way you

> >> have bigger chances to avoid traps.

> > 

> > If exynos really can't be fixed up in a reasonable way, then I think

> > sharing code doesn't make much sense - you drag the new driver down

> > with the old one that's just hanging in there the wrong way round. For

> > that case just copypaste the exynos code into a new clean drm_bridge

> > driver, and done.

> > 

> > That would also mean that new exynos support in drm/exynos would need

> > to be stalled until this is sorted out (at least for new platforms),

> > since continuing the old way really doesn't sound so great. Wouldn't

> > be the first time we just end up with a driver fork because the old

> > one has too much heritage and is too hard to change.

> > 

> > Note that this can also be done within one driver codebase, e.g.

> > nouveau has still legacy modeset code for nv04-nv4x, and atomic from

> > nv50+ going forward.

> > 

> > Should be possible to find a pragmatic solution here going forward,

> > despite tons of hw and heritage. If we use existing hard to retest hw

> > support to stop new driver submissions from doing the right thing,

> > that's a clear failure, we need a better approach here.

> > -Daniel

> 

> Right, and I just wanted to add that there seems to be a similar (maybe 

> less complex?) situation for the CSIS CSI controller. In that case we 

> already have two separate drivers for pretty much the same hardware in 

> the media subsystem, media/platform/exynos4-is/mipi-csis.c for the 

> exynos and staging/media/imx/imx7-mipi-csis.c for the imx.


And we would have at least a third on in
staging/media/imx/imx8-mipi-csi2-sam.c if we followed the NXP BSP :-)
I've added support for i.MX8 to the imx7-mipi-csis driver recently, and
I'm half-tempted to merge it with the
media/platform/exynos4-is/mipi-csis.c driver at some point. Lack of
Exynos test hardware and documentation, as well as of time, will likely
prevent that from happening, but if someone wanted to give it a go, it
would be nice.

> I don't know the history for this, but it just came to my mind that this 

> case is related and it might be interesting for the scope of this 

> discussion.


I think staging/media/imx/imx7-mipi-csis.c was developed in the NXP BSP,
and we merged it upstream without realizing it was the same IP core as
media/platform/exynos4-is/mipi-csis.c.

-- 
Regards,

Laurent Pinchart
diff mbox series

Patch

diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
index 2d75f9877dc0..5e7c1a99a3ee 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
@@ -266,6 +266,7 @@  struct exynos_dsi_driver_data {
 
 struct exynos_dsi {
 	struct drm_encoder encoder;
+	struct drm_bridge bridge;
 	struct mipi_dsi_host dsi_host;
 	struct drm_connector connector;
 	struct drm_panel *panel;
@@ -1602,6 +1603,60 @@  static const struct drm_encoder_helper_funcs exynos_dsi_encoder_helper_funcs = {
 	.disable = exynos_dsi_disable,
 };
 
+static int exynos_dsi_bridge_attach(struct drm_bridge *bridge,
+				    enum drm_bridge_attach_flags flags)
+{
+	struct exynos_dsi *dsi = bridge->driver_private;
+	struct drm_encoder *encoder = bridge->encoder;
+	int ret;
+
+	if (!dsi->out_bridge && !dsi->panel)
+		return -EPROBE_DEFER;
+
+	if (dsi->out_bridge) {
+		ret = drm_bridge_attach(encoder, dsi->out_bridge,
+					bridge, flags);
+		if (ret)
+			return ret;
+		list_splice_init(&encoder->bridge_chain, &dsi->bridge_chain);
+	} else {
+		ret = exynos_dsi_create_connector(encoder);
+		if (ret)
+			return ret;
+
+		if (dsi->panel) {
+			dsi->connector.status = connector_status_connected;
+		}
+	}
+
+	return 0;
+}
+
+static void exynos_dsi_bridge_detach(struct drm_bridge *bridge)
+{
+	struct exynos_dsi *dsi = bridge->driver_private;
+	struct drm_encoder *encoder = bridge->encoder;
+	struct drm_device *drm = encoder->dev;
+
+	if (dsi->panel) {
+		mutex_lock(&drm->mode_config.mutex);
+		exynos_dsi_disable(&dsi->encoder);
+		dsi->panel = NULL;
+		dsi->connector.status = connector_status_disconnected;
+		mutex_unlock(&drm->mode_config.mutex);
+	} else {
+		if (dsi->out_bridge->funcs->detach)
+			dsi->out_bridge->funcs->detach(dsi->out_bridge);
+		dsi->out_bridge = NULL;
+		INIT_LIST_HEAD(&dsi->bridge_chain);
+	}
+}
+
+static const struct drm_bridge_funcs exynos_dsi_bridge_funcs = {
+	.attach = exynos_dsi_bridge_attach,
+	.detach = exynos_dsi_bridge_detach,
+};
+
 MODULE_DEVICE_TABLE(of, exynos_dsi_of_match);
 
 static int exynos_dsi_host_attach(struct mipi_dsi_host *host,
@@ -1609,25 +1664,12 @@  static int exynos_dsi_host_attach(struct mipi_dsi_host *host,
 {
 	struct exynos_dsi *dsi = host_to_dsi(host);
 	const struct exynos_dsi_host_ops *ops = dsi->driver_data->host_ops;
-	struct drm_encoder *encoder = &dsi->encoder;
 	struct drm_bridge *out_bridge;
 
-	out_bridge  = of_drm_find_bridge(device->dev.of_node);
+	out_bridge = of_drm_find_bridge(device->dev.of_node);
 	if (out_bridge) {
-		drm_bridge_attach(encoder, out_bridge, NULL, 0);
 		dsi->out_bridge = out_bridge;
-		list_splice_init(&encoder->bridge_chain, &dsi->bridge_chain);
 	} else {
-		int ret = exynos_dsi_create_connector(encoder);
-
-		if (ret) {
-			DRM_DEV_ERROR(dsi->dev,
-				      "failed to create connector ret = %d\n",
-				      ret);
-			drm_encoder_cleanup(encoder);
-			return ret;
-		}
-
 		dsi->panel = of_drm_find_panel(device->dev.of_node);
 		if (IS_ERR(dsi->panel))
 			dsi->panel = NULL;
@@ -1662,20 +1704,6 @@  static int exynos_dsi_host_detach(struct mipi_dsi_host *host,
 {
 	struct exynos_dsi *dsi = host_to_dsi(host);
 	const struct exynos_dsi_host_ops *ops = dsi->driver_data->host_ops;
-	struct drm_device *drm = dsi->encoder.dev;
-
-	if (dsi->panel) {
-		mutex_lock(&drm->mode_config.mutex);
-		exynos_dsi_disable(&dsi->encoder);
-		dsi->panel = NULL;
-		dsi->connector.status = connector_status_disconnected;
-		mutex_unlock(&drm->mode_config.mutex);
-	} else {
-		if (dsi->out_bridge->funcs->detach)
-			dsi->out_bridge->funcs->detach(dsi->out_bridge);
-		dsi->out_bridge = NULL;
-		INIT_LIST_HEAD(&dsi->bridge_chain);
-	}
 
 	if (ops && ops->detach)
 		ops->detach(dsi->dsi_host.dev, device);
@@ -1786,7 +1814,15 @@  static int exynos_dsi_bind(struct device *dev, struct device *master,
 		of_node_put(in_bridge_node);
 	}
 
+	ret = drm_bridge_attach(encoder, &dsi->bridge, in_bridge, 0);
+	if (ret)
+		goto err;
+
 	return 0;
+
+err:
+	drm_encoder_cleanup(encoder);
+	return ret;
 }
 
 static void exynos_dsi_unbind(struct device *dev, struct device *master,
@@ -1796,6 +1832,8 @@  static void exynos_dsi_unbind(struct device *dev, struct device *master,
 	struct drm_encoder *encoder = &dsi->encoder;
 
 	exynos_dsi_disable(encoder);
+
+	drm_encoder_cleanup(encoder);
 }
 
 static const struct component_ops exynos_dsi_component_ops = {
@@ -1806,6 +1844,7 @@  static const struct component_ops exynos_dsi_component_ops = {
 static struct exynos_dsi *__exynos_dsi_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
+	struct drm_bridge *bridge;
 	struct resource *res;
 	struct exynos_dsi *dsi;
 	int ret, i;
@@ -1894,11 +1933,19 @@  static struct exynos_dsi *__exynos_dsi_probe(struct platform_device *pdev)
 	if (ret)
 		return ERR_PTR(ret);
 
+	bridge = &dsi->bridge;
+	bridge->driver_private = dsi;
+	bridge->funcs = &exynos_dsi_bridge_funcs;
+	bridge->of_node = dev->of_node;
+	drm_bridge_add(bridge);
+
 	return dsi;
 }
 
 static void __exynos_dsi_remove(struct exynos_dsi *dsi)
 {
+	drm_bridge_remove(&dsi->bridge);
+
 	mipi_dsi_host_unregister(&dsi->dsi_host);
 }