diff mbox series

[3/3] drm/msm/dsi: Enable DATABUS_WIDEN for DSI command mode

Message ID 20230525-add-widebus-support-v1-3-c7069f2efca1@quicinc.com
State New
Headers show
Series [1/3] drm/msm/dpu: Add DPU_INTF_DATABUS_WIDEN feature flag for DPU >= 5.0 | expand

Commit Message

Jessica Zhang June 14, 2023, 1:57 a.m. UTC
DSI 6G v2.5.x+ supports a data-bus widen mode that allows DSI to send
48 bits of compressed data per pclk instead of 24.

For all chipsets that support this mode, enable it whenever DSC is
enabled as recommend by the hardware programming guide.

Only enable this for command mode as we are currently unable to validate
it for video mode.

Signed-off-by: Jessica Zhang <quic_jesszhan@quicinc.com>
---

Note: The dsi.xml.h changes were generated using the headergen2 script in
envytools [1], but the changes to the copyright and rules-ng-ng source file
paths were dropped.

[1] https://github.com/freedreno/envytools/

 drivers/gpu/drm/msm/dsi/dsi.xml.h  |  1 +
 drivers/gpu/drm/msm/dsi/dsi_host.c | 19 ++++++++++++++++++-
 2 files changed, 19 insertions(+), 1 deletion(-)


--
2.40.1

Comments

Abhinav Kumar June 22, 2023, 11:04 p.m. UTC | #1
On 6/14/2023 3:03 AM, Marijn Suijten wrote:
> On 2023-06-14 10:49:31, Dmitry Baryshkov wrote:
>> On 14/06/2023 04:57, Jessica Zhang wrote:
>>> DSI 6G v2.5.x+ supports a data-bus widen mode that allows DSI to send
>>> 48 bits of compressed data per pclk instead of 24.
>>>
>>> For all chipsets that support this mode, enable it whenever DSC is
>>> enabled as recommend by the hardware programming guide.
>>>
>>> Only enable this for command mode as we are currently unable to validate
>>> it for video mode.
>>>
>>> Signed-off-by: Jessica Zhang <quic_jesszhan@quicinc.com>
>>> ---
>>>
>>> Note: The dsi.xml.h changes were generated using the headergen2 script in
>>> envytools [1], but the changes to the copyright and rules-ng-ng source file
>>> paths were dropped.
>>>
>>> [1] https://github.com/freedreno/envytools/
>>>
>>>    drivers/gpu/drm/msm/dsi/dsi.xml.h  |  1 +
>>>    drivers/gpu/drm/msm/dsi/dsi_host.c | 19 ++++++++++++++++++-
>>>    2 files changed, 19 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/msm/dsi/dsi.xml.h b/drivers/gpu/drm/msm/dsi/dsi.xml.h
>>> index a4a154601114..2a7d980e12c3 100644
>>> --- a/drivers/gpu/drm/msm/dsi/dsi.xml.h
>>> +++ b/drivers/gpu/drm/msm/dsi/dsi.xml.h
>>> @@ -664,6 +664,7 @@ static inline uint32_t DSI_CMD_MODE_MDP_CTRL2_INPUT_RGB_SWAP(enum dsi_rgb_swap v
>>>    	return ((val) << DSI_CMD_MODE_MDP_CTRL2_INPUT_RGB_SWAP__SHIFT) & DSI_CMD_MODE_MDP_CTRL2_INPUT_RGB_SWAP__MASK;
>>>    }
>>>    #define DSI_CMD_MODE_MDP_CTRL2_BURST_MODE			0x00010000
>>> +#define DSI_CMD_MODE_MDP_CTRL2_DATABUS_WIDEN			0x00100000
>>>
>>>    #define REG_DSI_CMD_MODE_MDP_STREAM2_CTRL			0x000001b8
>>>    #define DSI_CMD_MODE_MDP_STREAM2_CTRL_DATA_TYPE__MASK		0x0000003f
>>> diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c
>>> index 5d7b4409e4e9..1da5238e7105 100644
>>> --- a/drivers/gpu/drm/msm/dsi/dsi_host.c
>>> +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c
>>> @@ -927,6 +927,9 @@ static void dsi_timing_setup(struct msm_dsi_host *msm_host, bool is_bonded_dsi)
>>>    	u32 hdisplay = mode->hdisplay;
>>>    	u32 wc;
>>>    	int ret;
>>> +	bool widebus_supported = msm_host->cfg_hnd->major == MSM_DSI_VER_MAJOR_6G &&
>>> +			msm_host->cfg_hnd->minor >= MSM_DSI_6G_VER_MINOR_V2_5_0;
>>> +
>>>
>>>    	DBG("");
>>>
>>> @@ -973,8 +976,15 @@ static void dsi_timing_setup(struct msm_dsi_host *msm_host, bool is_bonded_dsi)
>>>    		 *
>>>    		 * hdisplay will be divided by 3 here to account for the fact
>>>    		 * that DPU sends 3 bytes per pclk cycle to DSI.
>>> +		 *
>>> +		 * If widebus is supported, set DATABUS_WIDEN register and divide hdisplay by 6
>>> +		 * instead of 3
>>
>> This is useless, it is already obvious from the code below. Instead
>> there should be something like "wide bus extends that to 6 bytes per
>> pclk cycle"
> 
> Yes please.  In general, don't paraphrase the code, but explain _why_ it
> is doing what it does.  Saying that the widebus feature doubles the
> bandwidth per pclk tick is much more clear than "divide by 6 instead of
> 3" - we can read that from the code.
> 
> Overall comments have been very good so far (such as the original "to
> account for the fact that DPU sends 3 bytes per pclk cycle"), though!
> 
>>>    		 */
>>> -		hdisplay = DIV_ROUND_UP(msm_dsc_get_bytes_per_line(msm_host->dsc), 3);
>>> +		if (!(msm_host->mode_flags & MIPI_DSI_MODE_VIDEO) && widebus_supported)
>>> +			hdisplay = DIV_ROUND_UP(msm_dsc_get_bytes_per_line(msm_host->dsc), 6);
>>> +		else
>>> +			hdisplay = DIV_ROUND_UP(msm_dsc_get_bytes_per_line(msm_host->dsc), 3);
>>> +
>>>    		h_total += hdisplay;
>>>    		ha_end = ha_start + hdisplay;
>>>    	}
>>> @@ -1027,6 +1037,13 @@ static void dsi_timing_setup(struct msm_dsi_host *msm_host, bool is_bonded_dsi)
>>>    		dsi_write(msm_host, REG_DSI_CMD_MDP_STREAM0_TOTAL,
>>>    			DSI_CMD_MDP_STREAM0_TOTAL_H_TOTAL(hdisplay) |
>>>    			DSI_CMD_MDP_STREAM0_TOTAL_V_TOTAL(mode->vdisplay));
>>> +
>>> +		if (msm_host->dsc && widebus_supported) {
>>> +			u32 mdp_ctrl2 = dsi_read(msm_host, REG_DSI_CMD_MODE_MDP_CTRL2);
>>> +
>>> +			mdp_ctrl2 |= DSI_CMD_MODE_MDP_CTRL2_DATABUS_WIDEN;
>>> +			dsi_write(msm_host, REG_DSI_CMD_MODE_MDP_CTRL2, mdp_ctrl2);
>>
>> Is widebus applicable only to the CMD mode, or video mode can employ it too?
> 
> The patch description states that it was not tested on video-mode yet,
> so I assume it will.  But this should also be highlighted with a comment
> (e.g. /* XXX: Allow for video-mode once tested/fixed */), _especially_
> on the check for MIPI_DSI_MODE_VIDEO above.
>

Sure, we can leave a comment.


> If I understand this correctly DSC is not working for video mode at all
> on these setups, right?  Or no-one was able to test it?  I'm inclined to
> request dropping these artifical guards to have as little friction as
> possible when someone starts enabling and testing this - and less
> patches removing artificial bounds in the future.
> 

Noone was able to test it. Like I have said before, we dont have or have 
not brought up any DSI DSC panel with video mode. DP will be the first 
to validate the video mode path for DSC so even that time we cannot test 
DSI with DSC on video mode.

I think we should find a panel which supports cmd and video mode ( I 
believe one of the HDKs does have that ) and bring that one up first to 
validate this.

I believe we should keep this checks with the comment you have 
suggested. If someone tests it and then removes it, I am comfortable 
with that.

Till then, I would rather guard that configuration.

> - Marijn
Abhinav Kumar June 23, 2023, 12:01 a.m. UTC | #2
On 6/14/2023 2:56 AM, Marijn Suijten wrote:
> On 2023-06-13 18:57:13, Jessica Zhang wrote:
>> DSI 6G v2.5.x+ supports a data-bus widen mode that allows DSI to send
>> 48 bits of compressed data per pclk instead of 24.
>>
>> For all chipsets that support this mode, enable it whenever DSC is
>> enabled as recommend by the hardware programming guide.
>>
>> Only enable this for command mode as we are currently unable to validate
>> it for video mode.
>>
>> Signed-off-by: Jessica Zhang <quic_jesszhan@quicinc.com>
>> ---
>>
>> Note: The dsi.xml.h changes were generated using the headergen2 script in
>> envytools [1], but the changes to the copyright and rules-ng-ng source file
>> paths were dropped.
>>
>> [1] https://github.com/freedreno/envytools/
> 
> More interesting would be a link to the Mesa MR upstreaming this
> bitfield to dsi.xml [2] (which I have not found on my own yet).
> 
> [2]: https://gitlab.freedesktop.org/mesa/mesa/-/blame/main/src/freedreno/registers/dsi/dsi.xml
> 

Thats because we havent submitted a MR yet for this on mesa.

Generally, our team does not have legal permissions yet for mesa MRs 
other than mesa drm because we got permissions for the modetest.

Rob/Dmitry, can one of you pls help with the corresponding mesa MR for this?

The xml file change was autogenerated so this patch can go in.

>>   drivers/gpu/drm/msm/dsi/dsi.xml.h  |  1 +
>>   drivers/gpu/drm/msm/dsi/dsi_host.c | 19 ++++++++++++++++++-
>>   2 files changed, 19 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/msm/dsi/dsi.xml.h b/drivers/gpu/drm/msm/dsi/dsi.xml.h
>> index a4a154601114..2a7d980e12c3 100644
>> --- a/drivers/gpu/drm/msm/dsi/dsi.xml.h
>> +++ b/drivers/gpu/drm/msm/dsi/dsi.xml.h
>> @@ -664,6 +664,7 @@ static inline uint32_t DSI_CMD_MODE_MDP_CTRL2_INPUT_RGB_SWAP(enum dsi_rgb_swap v
>>   	return ((val) << DSI_CMD_MODE_MDP_CTRL2_INPUT_RGB_SWAP__SHIFT) & DSI_CMD_MODE_MDP_CTRL2_INPUT_RGB_SWAP__MASK;
>>   }
>>   #define DSI_CMD_MODE_MDP_CTRL2_BURST_MODE			0x00010000
>> +#define DSI_CMD_MODE_MDP_CTRL2_DATABUS_WIDEN			0x00100000
>>
>>   #define REG_DSI_CMD_MODE_MDP_STREAM2_CTRL			0x000001b8
>>   #define DSI_CMD_MODE_MDP_STREAM2_CTRL_DATA_TYPE__MASK		0x0000003f
>> diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c
>> index 5d7b4409e4e9..1da5238e7105 100644
>> --- a/drivers/gpu/drm/msm/dsi/dsi_host.c
>> +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c
>> @@ -927,6 +927,9 @@ static void dsi_timing_setup(struct msm_dsi_host *msm_host, bool is_bonded_dsi)
>>   	u32 hdisplay = mode->hdisplay;
>>   	u32 wc;
>>   	int ret;
>> +	bool widebus_supported = msm_host->cfg_hnd->major == MSM_DSI_VER_MAJOR_6G &&
>> +			msm_host->cfg_hnd->minor >= MSM_DSI_6G_VER_MINOR_V2_5_0;
>> +
>>
>>   	DBG("");
>>
>> @@ -973,8 +976,15 @@ static void dsi_timing_setup(struct msm_dsi_host *msm_host, bool is_bonded_dsi)
>>   		 *
>>   		 * hdisplay will be divided by 3 here to account for the fact
>>   		 * that DPU sends 3 bytes per pclk cycle to DSI.
>> +		 *
>> +		 * If widebus is supported, set DATABUS_WIDEN register and divide hdisplay by 6
>> +		 * instead of 3
> 
> So this should allow us to divide pclk by 2, or have much lower latency?
> Otherwise it'll tick enough times to transmit the data twice.
> 
> Note that I brought up the exact same concerns when you used the 3:1
> ratio from dsi_bpp / dsc_bpp in your pclk reduction patch (instad of the
> number of bits/bytes that DPU sends to DSI per pclk), but no-one has
> replied to my inquiry yet.
> 

Ideally yes, we could have done pclk/2 on uncompressed pixels but we are 
not going to add support for widebus on DSI without DSC as that is not 
recommended in our docs.

So this cannot be done.

We tried our best to respond and explain to all your queries both on the 
bug and the patch but i guess it just kept coming :)

I am going to try one more time to explain it in the documentation change.

>>   		 */
>> -		hdisplay = DIV_ROUND_UP(msm_dsc_get_bytes_per_line(msm_host->dsc), 3);
>> +		if (!(msm_host->mode_flags & MIPI_DSI_MODE_VIDEO) && widebus_supported)
>> +			hdisplay = DIV_ROUND_UP(msm_dsc_get_bytes_per_line(msm_host->dsc), 6);
>> +		else
>> +			hdisplay = DIV_ROUND_UP(msm_dsc_get_bytes_per_line(msm_host->dsc), 3);
> 
> Nit: I wonder if this is more concise when written as:
> 
>      u32 bytes_per_pclk;
>      ...
>      if (!video && widebus)
>          bytes_per_pclk = 6;
>      else
>          bytes_per_pclk = 3;
> 
>      hdisplay = DIV_ROUND_UP(msm_dsc_get_bytes_per_line(msm_host->dsc),
>                              bytes_per_pclk);
> 
> That is less duplication, **and** the value becomes self-documenting!
> 

Sure, no concerns with making this change.

>> +
>>   		h_total += hdisplay;
>>   		ha_end = ha_start + hdisplay;
>>   	}
>> @@ -1027,6 +1037,13 @@ static void dsi_timing_setup(struct msm_dsi_host *msm_host, bool is_bonded_dsi)
>>   		dsi_write(msm_host, REG_DSI_CMD_MDP_STREAM0_TOTAL,
>>   			DSI_CMD_MDP_STREAM0_TOTAL_H_TOTAL(hdisplay) |
>>   			DSI_CMD_MDP_STREAM0_TOTAL_V_TOTAL(mode->vdisplay));
>> +
>> +		if (msm_host->dsc && widebus_supported) {
> 
> Can we also support widebus for uncompressed streams (sending 2 pixels
> of bpp=24 per pclk), and if so, is that something you want to add in the
> future (a comment would be nice)?

No, we cannot support widebus on uncompressed streams on DSI so we wont 
be adding that.

> 
>> +			u32 mdp_ctrl2 = dsi_read(msm_host, REG_DSI_CMD_MODE_MDP_CTRL2);
>> +
>> +			mdp_ctrl2 |= DSI_CMD_MODE_MDP_CTRL2_DATABUS_WIDEN;
>> +			dsi_write(msm_host, REG_DSI_CMD_MODE_MDP_CTRL2, mdp_ctrl2);
>> +		}
> 
> Same comment as on your BURST_MODE patch (which this'll conflict with):
> does this belong to the timing setup or is it better moved to
> dsi_ctrl_config?
> 
> - Marijn
> 
>>   	}
>>   }
>>
>>
>> --
>> 2.40.1
>>
Marijn Suijten June 23, 2023, 7:19 a.m. UTC | #3
On 2023-06-22 17:01:34, Abhinav Kumar wrote:
<snip>
> > More interesting would be a link to the Mesa MR upstreaming this
> > bitfield to dsi.xml [2] (which I have not found on my own yet).
> > 
> > [2]: https://gitlab.freedesktop.org/mesa/mesa/-/blame/main/src/freedreno/registers/dsi/dsi.xml
> > 
> 
> Thats because we havent submitted a MR yet for this on mesa.
> 
> Generally, our team does not have legal permissions yet for mesa MRs 
> other than mesa drm because we got permissions for the modetest.
> 
> Rob/Dmitry, can one of you pls help with the corresponding mesa MR for this?

Thanks!

> The xml file change was autogenerated so this patch can go in.
<snip>
> >>   		 *
> >>   		 * hdisplay will be divided by 3 here to account for the fact
> >>   		 * that DPU sends 3 bytes per pclk cycle to DSI.
> >> +		 *
> >> +		 * If widebus is supported, set DATABUS_WIDEN register and divide hdisplay by 6
> >> +		 * instead of 3
> > 
> > So this should allow us to divide pclk by 2, or have much lower latency?
> > Otherwise it'll tick enough times to transmit the data twice.
> > 
> > Note that I brought up the exact same concerns when you used the 3:1
> > ratio from dsi_bpp / dsc_bpp in your pclk reduction patch (instad of the
> > number of bits/bytes that DPU sends to DSI per pclk), but no-one has
> > replied to my inquiry yet.
> > 
> 
> Ideally yes, we could have done pclk/2 on uncompressed pixels but we are 
> not going to add support for widebus on DSI without DSC as that is not 
> recommended in our docs.

No-one here mentioned uncompressed pixels?

None of this code suddenly makes DPU send twice as many pixels/bytes to
the DSI, yet we are enabling a feature that makes the bus twice as wide,
so the clock can be halved *for comressed pixels*?

> So this cannot be done.
> 
> We tried our best to respond and explain to all your queries both on the 
> bug and the patch but i guess it just kept coming :)

Then send less patches!  As long as there is activity on the mailing
list there'll always be questions going back and forth, and I don't
think that's unreasonable.

Unless you want to push patches into mainline without questioning.

> I am going to try one more time to explain it in the documentation change.

Would love to hear, why, for compressed streams, the bus is twice as
wide yet pclk is not reduced to account for that.

<snip>
> > Can we also support widebus for uncompressed streams (sending 2 pixels
> > of bpp=24 per pclk), and if so, is that something you want to add in the
> > future (a comment would be nice)?
> 
> No, we cannot support widebus on uncompressed streams on DSI so we wont 
> be adding that.

And here we start talking about uncompressed pixels *separately*.  Okay,
if it is not supported (e.g. widebus is specific to / reserved for DSC)
it of course makes no sense to add it.

- Marijn
Marijn Suijten June 23, 2023, 8:14 p.m. UTC | #4
On 2023-06-23 10:29:51, Abhinav Kumar wrote:
<snip>
> The concept is quite simple
> 
> one pixel per clock for uncompresssed without widebubus
> 
> 2 pixels per clock for uncompressed with widebus (only enabled for DP 
> not DSI)
> 
> 3 bytes worth of data for compressed without widebus
> 
> 6 bytes worth of data for compressed with widebus
> 
> When compression happens, we cannot quantify with pixels as the boundary 
> is not defined with respect to bytes.
> 
> You brought up uncompressed in your below comment so I assumed your 
> question of /2 was about uncompressed too.

No clue where things are going wrong, but you either avoid or
misunderstand the question.

(Talking exclusively about compressed data here!)

pclk is determined based on the number of bytes.

When widebus is enabled, we transfer twice as many bytes per pclk cycle.

Can pclk be reduced by a factor two, as that should still be enough to
transfer the same amount of bytes when widebus is enabled?

> >> We tried our best to respond and explain to all your queries both on the
> >> bug and the patch but i guess it just kept coming :)
> > 
> > Then send less patches!  As long as there is activity on the mailing
> > list there'll always be questions going back and forth, and I don't
> > think that's unreasonable.
> > 
> > Unless you want to push patches into mainline without questioning.
> > 
> 
> the comments were bordering the line of becoming irrelevant to the 
> patches like discussing video mode on a command mode patch when we had 
> explained many many times that we did not validate them.

You(r team) came up with irrelevant video-mode checks in these patches,
and you keep bringing up topics that I did not mention (such as
suddently talking about uncompressed formats above).  Stop pretending
there's any nefarious intent here unless you intend to push external
contributors away.

> I dont want to add more comments to this. Lets stop discussing this and 
> focus more on this patch.

Perhaps if you answer the question?

- Marijn
Abhinav Kumar June 23, 2023, 8:34 p.m. UTC | #5
On 6/23/2023 1:14 PM, Marijn Suijten wrote:
> On 2023-06-23 10:29:51, Abhinav Kumar wrote:
> <snip>
>> The concept is quite simple
>>
>> one pixel per clock for uncompresssed without widebubus
>>
>> 2 pixels per clock for uncompressed with widebus (only enabled for DP
>> not DSI)
>>
>> 3 bytes worth of data for compressed without widebus
>>
>> 6 bytes worth of data for compressed with widebus
>>
>> When compression happens, we cannot quantify with pixels as the boundary
>> is not defined with respect to bytes.
>>
>> You brought up uncompressed in your below comment so I assumed your
>> question of /2 was about uncompressed too.
> 
> No clue where things are going wrong, but you either avoid or
> misunderstand the question.
> 
> (Talking exclusively about compressed data here!)
> 
> pclk is determined based on the number of bytes.
> 
> When widebus is enabled, we transfer twice as many bytes per pclk cycle.
> 
> Can pclk be reduced by a factor two, as that should still be enough to
> transfer the same amount of bytes when widebus is enabled?
> 

I dont know where the misunderstanding is too.

I already did answer that pclk can be /2 for uncompressed.

But for compressed it will be divided by the compression ration.

What is still missing here. Please clarify.

>>>> We tried our best to respond and explain to all your queries both on the
>>>> bug and the patch but i guess it just kept coming :)
>>>
>>> Then send less patches!  As long as there is activity on the mailing
>>> list there'll always be questions going back and forth, and I don't
>>> think that's unreasonable.
>>>
>>> Unless you want to push patches into mainline without questioning.
>>>
>>
>> the comments were bordering the line of becoming irrelevant to the
>> patches like discussing video mode on a command mode patch when we had
>> explained many many times that we did not validate them.
> 
> You(r team) came up with irrelevant video-mode checks in these patches,
> and you keep bringing up topics that I did not mention (such as
> suddently talking about uncompressed formats above).  Stop pretending
> there's any nefarious intent here unless you intend to push external
> contributors away.
> 

There is no nefarious intent. If it was there we would not have spent 2 
weeks to answer every question on the bug and explaining every math 
about it despite calling our patches hacks.

So either something is still missing in our answers or the questions.

So please ask the question whatever has not been answered one more time.

>> I dont want to add more comments to this. Lets stop discussing this and
>> focus more on this patch.
> 
> Perhaps if you answer the question?
> 
> - Marijn
Marijn Suijten June 23, 2023, 9:33 p.m. UTC | #6
On 2023-06-23 13:34:06, Abhinav Kumar wrote:
> 
> 
> On 6/23/2023 1:14 PM, Marijn Suijten wrote:
> > On 2023-06-23 10:29:51, Abhinav Kumar wrote:
> > <snip>
> >> The concept is quite simple
> >>
> >> one pixel per clock for uncompresssed without widebubus
> >>
> >> 2 pixels per clock for uncompressed with widebus (only enabled for DP
> >> not DSI)
> >>
> >> 3 bytes worth of data for compressed without widebus
> >>
> >> 6 bytes worth of data for compressed with widebus
> >>
> >> When compression happens, we cannot quantify with pixels as the boundary
> >> is not defined with respect to bytes.
> >>
> >> You brought up uncompressed in your below comment so I assumed your
> >> question of /2 was about uncompressed too.
> > 
> > No clue where things are going wrong, but you either avoid or
> > misunderstand the question.
> > 
> > (Talking exclusively about compressed data here!)
> > 
> > pclk is determined based on the number of bytes.
> > 
> > When widebus is enabled, we transfer twice as many bytes per pclk cycle.
> > 
> > Can pclk be reduced by a factor two, as that should still be enough to
> > transfer the same amount of bytes when widebus is enabled?
> > 
> 
> I dont know where the misunderstanding is too.
> 
> I already did answer that pclk can be /2 for uncompressed.

Except that my question is about compressed.

> But for compressed it will be divided by the compression ration.

The question here is "why exactly"?  I am looking for the argument that
justifies pclk being twice as high for the number of bytes we need to
send.

Is that answer: pclk is not only used for the bus between DPU and DSI?

If the answer to that question is yes, then I'd ask what the advantage
is of widebus.

<snip>

Let's leave the rest for what it is.

- Marijn
Abhinav Kumar June 23, 2023, 9:47 p.m. UTC | #7
On 6/23/2023 2:33 PM, Marijn Suijten wrote:
> On 2023-06-23 13:34:06, Abhinav Kumar wrote:
>>
>>
>> On 6/23/2023 1:14 PM, Marijn Suijten wrote:
>>> On 2023-06-23 10:29:51, Abhinav Kumar wrote:
>>> <snip>
>>>> The concept is quite simple
>>>>
>>>> one pixel per clock for uncompresssed without widebubus
>>>>
>>>> 2 pixels per clock for uncompressed with widebus (only enabled for DP
>>>> not DSI)
>>>>
>>>> 3 bytes worth of data for compressed without widebus
>>>>
>>>> 6 bytes worth of data for compressed with widebus
>>>>
>>>> When compression happens, we cannot quantify with pixels as the boundary
>>>> is not defined with respect to bytes.
>>>>
>>>> You brought up uncompressed in your below comment so I assumed your
>>>> question of /2 was about uncompressed too.
>>>
>>> No clue where things are going wrong, but you either avoid or
>>> misunderstand the question.
>>>
>>> (Talking exclusively about compressed data here!)
>>>
>>> pclk is determined based on the number of bytes.
>>>
>>> When widebus is enabled, we transfer twice as many bytes per pclk cycle.
>>>
>>> Can pclk be reduced by a factor two, as that should still be enough to
>>> transfer the same amount of bytes when widebus is enabled?
>>>
>>
>> I dont know where the misunderstanding is too.
>>
>> I already did answer that pclk can be /2 for uncompressed.
> 
> Except that my question is about compressed.
> 
>> But for compressed it will be divided by the compression ration.
> 
> The question here is "why exactly"?  I am looking for the argument that
> justifies pclk being twice as high for the number of bytes we need to
> send.
> 

Ok I think I finally got your question. So you are asking that "Why 
cannot we half the pclk even for the compressed case?"

So pclk / = comp_ratio and then
further on top of that
pclk /= 2 for widebus?

Is that your question?

If so pls see below

> Is that answer: pclk is not only used for the bus between DPU and DSI?
> 
> If the answer to that question is yes, then I'd ask what the advantage
> is of widebus.
> 

pclk is only used between DPU and DSI.

let me explain the purpose of widebus.

If we take an input of 30bpp, then without widebus DPU can only send 24 
bits out.

So the compression ratio which we can achieve would only be 2.4 and not 
3 (24/10)

But with widebus, it can output 48bits and now since your source bpp is 
only 30bits we can send it out fully and get the 3:1 compression ratio.

So widebus actually makes it possible to get the full compression in the 
30bpp case. That would be the benefit of widebus for DSI.

Hope i was able to explain it now.

> <snip>
> 
> Let's leave the rest for what it is.
> 
> - Marijn
diff mbox series

Patch

diff --git a/drivers/gpu/drm/msm/dsi/dsi.xml.h b/drivers/gpu/drm/msm/dsi/dsi.xml.h
index a4a154601114..2a7d980e12c3 100644
--- a/drivers/gpu/drm/msm/dsi/dsi.xml.h
+++ b/drivers/gpu/drm/msm/dsi/dsi.xml.h
@@ -664,6 +664,7 @@  static inline uint32_t DSI_CMD_MODE_MDP_CTRL2_INPUT_RGB_SWAP(enum dsi_rgb_swap v
 	return ((val) << DSI_CMD_MODE_MDP_CTRL2_INPUT_RGB_SWAP__SHIFT) & DSI_CMD_MODE_MDP_CTRL2_INPUT_RGB_SWAP__MASK;
 }
 #define DSI_CMD_MODE_MDP_CTRL2_BURST_MODE			0x00010000
+#define DSI_CMD_MODE_MDP_CTRL2_DATABUS_WIDEN			0x00100000

 #define REG_DSI_CMD_MODE_MDP_STREAM2_CTRL			0x000001b8
 #define DSI_CMD_MODE_MDP_STREAM2_CTRL_DATA_TYPE__MASK		0x0000003f
diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c
index 5d7b4409e4e9..1da5238e7105 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_host.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_host.c
@@ -927,6 +927,9 @@  static void dsi_timing_setup(struct msm_dsi_host *msm_host, bool is_bonded_dsi)
 	u32 hdisplay = mode->hdisplay;
 	u32 wc;
 	int ret;
+	bool widebus_supported = msm_host->cfg_hnd->major == MSM_DSI_VER_MAJOR_6G &&
+			msm_host->cfg_hnd->minor >= MSM_DSI_6G_VER_MINOR_V2_5_0;
+

 	DBG("");

@@ -973,8 +976,15 @@  static void dsi_timing_setup(struct msm_dsi_host *msm_host, bool is_bonded_dsi)
 		 *
 		 * hdisplay will be divided by 3 here to account for the fact
 		 * that DPU sends 3 bytes per pclk cycle to DSI.
+		 *
+		 * If widebus is supported, set DATABUS_WIDEN register and divide hdisplay by 6
+		 * instead of 3
 		 */
-		hdisplay = DIV_ROUND_UP(msm_dsc_get_bytes_per_line(msm_host->dsc), 3);
+		if (!(msm_host->mode_flags & MIPI_DSI_MODE_VIDEO) && widebus_supported)
+			hdisplay = DIV_ROUND_UP(msm_dsc_get_bytes_per_line(msm_host->dsc), 6);
+		else
+			hdisplay = DIV_ROUND_UP(msm_dsc_get_bytes_per_line(msm_host->dsc), 3);
+
 		h_total += hdisplay;
 		ha_end = ha_start + hdisplay;
 	}
@@ -1027,6 +1037,13 @@  static void dsi_timing_setup(struct msm_dsi_host *msm_host, bool is_bonded_dsi)
 		dsi_write(msm_host, REG_DSI_CMD_MDP_STREAM0_TOTAL,
 			DSI_CMD_MDP_STREAM0_TOTAL_H_TOTAL(hdisplay) |
 			DSI_CMD_MDP_STREAM0_TOTAL_V_TOTAL(mode->vdisplay));
+
+		if (msm_host->dsc && widebus_supported) {
+			u32 mdp_ctrl2 = dsi_read(msm_host, REG_DSI_CMD_MODE_MDP_CTRL2);
+
+			mdp_ctrl2 |= DSI_CMD_MODE_MDP_CTRL2_DATABUS_WIDEN;
+			dsi_write(msm_host, REG_DSI_CMD_MODE_MDP_CTRL2, mdp_ctrl2);
+		}
 	}
 }