diff mbox series

[v5,5/8] scsi: ufs: core: Enable multi-level gear scaling

Message ID 20250213080008.2984807-6-quic_ziqichen@quicinc.com
State Superseded
Headers show
Series Support Multi-frequency scale for UFS | expand

Commit Message

Ziqi Chen Feb. 13, 2025, 8 a.m. UTC
From: Can Guo <quic_cang@quicinc.com>

With OPP V2 enabled, devfreq can scale clocks amongst multiple frequency
plans. However, the gear speed is only toggled between min and max during
clock scaling. Enable multi-level gear scaling by mapping clock frequencies
to gear speeds, so that when devfreq scales clock frequencies we can put
the UFS link at the appropriate gear speeds accordingly.

Signed-off-by: Can Guo <quic_cang@quicinc.com>
Co-developed-by: Ziqi Chen <quic_ziqichen@quicinc.com>
Signed-off-by: Ziqi Chen <quic_ziqichen@quicinc.com>
Reviewed-by: Bean Huo <beanhuo@micron.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Tested-by: Neil Armstrong <neil.armstrong@linaro.org>
---

v1 -> v2:
Rename the lable "do_pmc" to "config_pwr_mode".

v2 -> v3:
Use assignment instead memcpy() in function ufshcd_scale_gear().

v3 -> v4:
Typo fixed for commit message.

v4 -> v5:
Change the data type of "new_gear" from 'int' to 'u32'.
---
 drivers/ufs/core/ufshcd.c | 44 ++++++++++++++++++++++++++++++---------
 1 file changed, 34 insertions(+), 10 deletions(-)

Comments

Peter Wang (王信友) Feb. 14, 2025, 5:55 a.m. UTC | #1
On Thu, 2025-02-13 at 16:00 +0800, Ziqi Chen wrote:
> 
> External email : Please do not click links or open attachments until
> you have verified the sender or the content.
> 
> 
> From: Can Guo <quic_cang@quicinc.com>
> 
> With OPP V2 enabled, devfreq can scale clocks amongst multiple
> frequency
> plans. However, the gear speed is only toggled between min and max
> during
> clock scaling. Enable multi-level gear scaling by mapping clock
> frequencies
> to gear speeds, so that when devfreq scales clock frequencies we can
> put
> the UFS link at the appropriate gear speeds accordingly.
> 
> Signed-off-by: Can Guo <quic_cang@quicinc.com>
> Co-developed-by: Ziqi Chen <quic_ziqichen@quicinc.com>
> Signed-off-by: Ziqi Chen <quic_ziqichen@quicinc.com>
> Reviewed-by: Bean Huo <beanhuo@micron.com>
> Reviewed-by: Bart Van Assche <bvanassche@acm.org>
> Tested-by: Neil Armstrong <neil.armstrong@linaro.org>
> ---
> 
> v1 -> v2:
> Rename the lable "do_pmc" to "config_pwr_mode".
> 
> v2 -> v3:
> Use assignment instead memcpy() in function ufshcd_scale_gear().
> 
> v3 -> v4:
> Typo fixed for commit message.
> 
> v4 -> v5:
> Change the data type of "new_gear" from 'int' to 'u32'.
> ---
> 

Reviewed-by: Peter Wang <peter.wang@mediatek.com>
Neil Armstrong April 24, 2025, 3:35 p.m. UTC | #2
Hi,

On 13/02/2025 09:00, Ziqi Chen wrote:
> From: Can Guo <quic_cang@quicinc.com>
> 
> With OPP V2 enabled, devfreq can scale clocks amongst multiple frequency
> plans. However, the gear speed is only toggled between min and max during
> clock scaling. Enable multi-level gear scaling by mapping clock frequencies
> to gear speeds, so that when devfreq scales clock frequencies we can put
> the UFS link at the appropriate gear speeds accordingly.
> 
> Signed-off-by: Can Guo <quic_cang@quicinc.com>
> Co-developed-by: Ziqi Chen <quic_ziqichen@quicinc.com>
> Signed-off-by: Ziqi Chen <quic_ziqichen@quicinc.com>
> Reviewed-by: Bean Huo <beanhuo@micron.com>
> Reviewed-by: Bart Van Assche <bvanassche@acm.org>
> Tested-by: Neil Armstrong <neil.armstrong@linaro.org>
> ---
> 
> v1 -> v2:
> Rename the lable "do_pmc" to "config_pwr_mode".
> 
> v2 -> v3:
> Use assignment instead memcpy() in function ufshcd_scale_gear().
> 
> v3 -> v4:
> Typo fixed for commit message.
> 
> v4 -> v5:
> Change the data type of "new_gear" from 'int' to 'u32'.
> ---
>   drivers/ufs/core/ufshcd.c | 44 ++++++++++++++++++++++++++++++---------
>   1 file changed, 34 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
> index 8d295cc827cc..9908c0d6a1e1 100644
> --- a/drivers/ufs/core/ufshcd.c
> +++ b/drivers/ufs/core/ufshcd.c
> @@ -1308,16 +1308,26 @@ static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
>   /**
>    * ufshcd_scale_gear - scale up/down UFS gear
>    * @hba: per adapter instance
> + * @target_gear: target gear to scale to
>    * @scale_up: True for scaling up gear and false for scaling down
>    *
>    * Return: 0 for success; -EBUSY if scaling can't happen at this time;
>    * non-zero for any other errors.
>    */
> -static int ufshcd_scale_gear(struct ufs_hba *hba, bool scale_up)
> +static int ufshcd_scale_gear(struct ufs_hba *hba, u32 target_gear, bool scale_up)
>   {
>   	int ret = 0;
>   	struct ufs_pa_layer_attr new_pwr_info;
>   
> +	if (target_gear) {
> +		new_pwr_info = hba->pwr_info;
> +		new_pwr_info.gear_tx = target_gear;
> +		new_pwr_info.gear_rx = target_gear;
> +
> +		goto config_pwr_mode;
> +	}
> +
> +	/* Legacy gear scaling, in case vops_freq_to_gear_speed() is not implemented */
>   	if (scale_up) {
>   		memcpy(&new_pwr_info, &hba->clk_scaling.saved_pwr_info,
>   		       sizeof(struct ufs_pa_layer_attr));
> @@ -1338,6 +1348,7 @@ static int ufshcd_scale_gear(struct ufs_hba *hba, bool scale_up)
>   		}
>   	}
>   
> +config_pwr_mode:
>   	/* check if the power mode needs to be changed or not? */
>   	ret = ufshcd_config_pwr_mode(hba, &new_pwr_info);
>   	if (ret)
> @@ -1408,15 +1419,19 @@ static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, int err, bool sc
>   static int ufshcd_devfreq_scale(struct ufs_hba *hba, unsigned long freq,
>   				bool scale_up)
>   {
> +	u32 old_gear = hba->pwr_info.gear_rx;
> +	u32 new_gear = 0;
>   	int ret = 0;
>   
> +	new_gear = ufshcd_vops_freq_to_gear_speed(hba, freq);
> +
>   	ret = ufshcd_clock_scaling_prepare(hba, 1 * USEC_PER_SEC);
>   	if (ret)
>   		return ret;
>   
>   	/* scale down the gear before scaling down clocks */
>   	if (!scale_up) {
> -		ret = ufshcd_scale_gear(hba, false);
> +		ret = ufshcd_scale_gear(hba, new_gear, false);
>   		if (ret)
>   			goto out_unprepare;
>   	}
> @@ -1424,13 +1439,13 @@ static int ufshcd_devfreq_scale(struct ufs_hba *hba, unsigned long freq,
>   	ret = ufshcd_scale_clks(hba, freq, scale_up);
>   	if (ret) {
>   		if (!scale_up)
> -			ufshcd_scale_gear(hba, true);
> +			ufshcd_scale_gear(hba, old_gear, true);
>   		goto out_unprepare;
>   	}
>   
>   	/* scale up the gear after scaling up clocks */
>   	if (scale_up) {
> -		ret = ufshcd_scale_gear(hba, true);
> +		ret = ufshcd_scale_gear(hba, new_gear, true);
>   		if (ret) {
>   			ufshcd_scale_clks(hba, hba->devfreq->previous_freq,
>   					  false);
> @@ -1723,6 +1738,8 @@ static ssize_t ufshcd_clkscale_enable_store(struct device *dev,
>   		struct device_attribute *attr, const char *buf, size_t count)
>   {
>   	struct ufs_hba *hba = dev_get_drvdata(dev);
> +	struct ufs_clk_info *clki;
> +	unsigned long freq;
>   	u32 value;
>   	int err = 0;
>   
> @@ -1746,14 +1763,21 @@ static ssize_t ufshcd_clkscale_enable_store(struct device *dev,
>   
>   	if (value) {
>   		ufshcd_resume_clkscaling(hba);
> -	} else {
> -		ufshcd_suspend_clkscaling(hba);
> -		err = ufshcd_devfreq_scale(hba, ULONG_MAX, true);
> -		if (err)
> -			dev_err(hba->dev, "%s: failed to scale clocks up %d\n",
> -					__func__, err);
> +		goto out_rel;
>   	}
>   
> +	clki = list_first_entry(&hba->clk_list_head, struct ufs_clk_info, list);
> +	freq = clki->max_freq;
> +
> +	ufshcd_suspend_clkscaling(hba);
> +	err = ufshcd_devfreq_scale(hba, freq, true);
> +	if (err)
> +		dev_err(hba->dev, "%s: failed to scale clocks up %d\n",
> +				__func__, err);
> +	else
> +		hba->clk_scaling.target_freq = freq;
> +
> +out_rel:
>   	ufshcd_release(hba);
>   	ufshcd_rpm_put_sync(hba);
>   out:

This change causes UFS crash on the RB3gen2, after UFS successfully probe and scan:

[    9.383728] ufshcd-qcom 1d84000.ufshc: pwr ctrl cmd 0x2 with mode 0x11 completion timeout
[    9.393272] ufshcd-qcom 1d84000.ufshc: UFS Host state=1
[    9.403126] msm_dpu ae01000.display-controller: [drm:adreno_request_fw [msm]] *ERROR* failed to load a660_sqe.fw
[    9.408484] ufshcd-qcom 1d84000.ufshc: 0 outstanding reqs, tasks=0x0
[    9.425577] ufshcd-qcom 1d84000.ufshc: saved_err=0x0, saved_uic_err=0x0
[    9.432433] ufshcd-qcom 1d84000.ufshc: Device power mode=1, UIC link state=1
[    9.439716] ufshcd-qcom 1d84000.ufshc: PM in progress=0, sys. suspended=0
[    9.446742] ufshcd-qcom 1d84000.ufshc: Auto BKOPS=0, Host self-block=0
[    9.453468] ufshcd-qcom 1d84000.ufshc: Clk gate=1
...
[   10.529259] ufshcd-qcom 1d84000.ufshc: ufshcd_change_power_mode: power mode change failed -110
[   10.538102] ufshcd-qcom 1d84000.ufshc: ufshcd_scale_gear: failed err -110, old gear: (tx 1 rx 1), new gear: (tx 4 rx 4)
[   10.542841] WARNING: CPU: 0 PID: 274 at drivers/ufs/core/ufshcd.c:5504 ufshcd_sl_intr+0x64c/0x6a4
...

CI Run sample: https://git.codelinaro.org/linaro/qcomlt/ci/staging/cdba-tester/-/jobs/233352#L1479

Bisect run log:
# bad: [0af2f6be1b4281385b618cb86ad946eded089ac8] Linux 6.15-rc1
# good: [38fec10eb60d687e30c8c6b5420d86e8149f7557] Linux 6.14
git bisect start 'v6.15-rc1' 'v6.14'
# bad: [fd71def6d9abc5ae362fb9995d46049b7b0ed391] Merge tag 'for-6.15-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
git bisect bad fd71def6d9abc5ae362fb9995d46049b7b0ed391
# good: [fb1ceb29b27cda91af35851ebab01f298d82162e] Merge tag 'platform-drivers-x86-v6.15-1' of git://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86
git bisect good fb1ceb29b27cda91af35851ebab01f298d82162e
# good: [1952e19c02ae8ea0c663d30b19be14344b543068] Merge tag 'wireless-next-2025-03-20' of https://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless-next
git bisect good 1952e19c02ae8ea0c663d30b19be14344b543068
# bad: [1a9239bb4253f9076b5b4b2a1a4e8d7defd77a95] Merge tag 'net-next-6.15' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
git bisect bad 1a9239bb4253f9076b5b4b2a1a4e8d7defd77a95
# good: [22093997ac9220d3c606313efbf4ce564962d095] Merge tag 'ata-6.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/libata/linux
git bisect good 22093997ac9220d3c606313efbf4ce564962d095
# bad: [336b4dae6dfecc9aa53a3a68c71b9c1c1d466388] Merge tag 'iommu-updates-v6.15' of git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux
git bisect bad 336b4dae6dfecc9aa53a3a68c71b9c1c1d466388
# bad: [0711f1966a523d77d4c5f00776a7bd073d56251a] scsi: mpt3sas: Fix buffer overflow in mpt3sas_send_mctp_passthru_req()
git bisect bad 0711f1966a523d77d4c5f00776a7bd073d56251a
# good: [369552fd03f296261023872b8fc983d1fc55c8e9] Merge patch series "mpt3sas driver udpates"
git bisect good 369552fd03f296261023872b8fc983d1fc55c8e9
# bad: [42273e893157501ae119ea5459f3a7d2420c56d6] Merge patch series "scsi: scsi_debug: Add more tape support"
git bisect bad 42273e893157501ae119ea5459f3a7d2420c56d6
# bad: [7e72900272b61c11f2fd4020d4f186124d0d171b] Merge patch series "Support Multi-frequency scale for UFS"
git bisect bad 7e72900272b61c11f2fd4020d4f186124d0d171b
# good: [c02fe9e222d16bed8c270608c42f77bc62562ac3] scsi: ufs: qcom: Implement the freq_to_gear_speed() vop
git bisect good c02fe9e222d16bed8c270608c42f77bc62562ac3
# bad: [eff26ad4c34fc78303c14be749e10ca61c4d211f] scsi: ufs: core: Check if scaling up is required when disable clkscale
git bisect bad eff26ad4c34fc78303c14be749e10ca61c4d211f
# bad: [129b44c27c8a51cb74b2f68fde57f2a2e7f5696b] scsi: ufs: core: Enable multi-level gear scaling
git bisect bad 129b44c27c8a51cb74b2f68fde57f2a2e7f5696b
# first bad commit: [129b44c27c8a51cb74b2f68fde57f2a2e7f5696b] scsi: ufs: core: Enable multi-level gear scaling

#regzbot introduced 129b44c27c8a51cb74b2f68fde57f2a2e7f5696b

Thanks,
Neil
Ziqi Chen April 25, 2025, 7:29 a.m. UTC | #3
Hi Neil,

We analyzed this issue today , I don't think it is related to this patch:

[    9.383728] ufshcd-qcom 1d84000.ufshc: pwr ctrl cmd 0x2 with mode 
0x11 completion timeout
...
...
...
[    9.588545] host_regs: 00000090: 00000002 15710000 00000002 00000003

The timeout error log shows the mode 0x11 is the correct argument3 value 
of this pwr mode DME_SET cmd.

int ufshcd_uic_change_pwr_mode(struct ufs_hba *hba, u8 mode)
{
     struct uic_command uic_cmd = {
         .command = UIC_CMD_DME_SET,
         .argument1 = UIC_ARG_MIB(PA_PWRMODE),
         .argument3 = mode,
     };
...
...
  dev_err(hba->dev,
         "uic cmd 0x%x with arg3 0x%x completion timeout\n",
          uic_cmd->command, uic_cmd->argument3);


This prints means that we gave correct argument3 here.

But from the host register dump , we can see the argument3 written to 
register (address 0x***9C) is '0x00000003' which is a invalid value.

And according to the UFSHCI JEDEC, the return value of ConfigResultCode 
regiser (address 0x***0x98) is 0x00000002 also told us the value written 
to argument3 register is a INVALID_MIB_ATTRIBUTE_VALUE, this is the 
reason causes this timeout issue.

" 02h INVALID_MIB_ATTRIBUTE_VALUE "

However, though we don't know the final root cause, we can know there is 
mistake occur during writing register. The argument3 value is 0x11, but 
after writing to register , the readback value from register is 0x3... 
Anyway , this patch didn't touch the path of register writing.

Are you easy to reproduce this issue ? How many instances do you 
observed ? We never received similar report from our internal UFS test 
team and stabitily test team. If it is easy to reproduce , you can add 
readback prints at the place where after writing register to check it.

ufshcd_dispatch_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd)
{
     lockdep_assert_held(&hba->uic_cmd_mutex);

     WARN_ON(hba->active_uic_cmd);

     hba->active_uic_cmd = uic_cmd;

     /* Write Args */
     ufshcd_writel(hba, uic_cmd->argument1, REG_UIC_COMMAND_ARG_1);
     ufshcd_writel(hba, uic_cmd->argument2, REG_UIC_COMMAND_ARG_2);
     ufshcd_writel(hba, uic_cmd->argument3, REG_UIC_COMMAND_ARG_3);

-> you can add print here:
   pr_err ("READ BACK argument3 from register 0x%x: 0x%x",
       REG_UIC_COMMAND_ARG_3, ufshcd_readl(hba, REG_UIC_COMMAND_ARG_3);



Best Regards,
Ziqi

On 4/24/2025 11:35 PM, Neil Armstrong wrote:
> Hi,
> 
> On 13/02/2025 09:00, Ziqi Chen wrote:
>> From: Can Guo <quic_cang@quicinc.com>
>>
>> With OPP V2 enabled, devfreq can scale clocks amongst multiple frequency
>> plans. However, the gear speed is only toggled between min and max during
>> clock scaling. Enable multi-level gear scaling by mapping clock 
>> frequencies
>> to gear speeds, so that when devfreq scales clock frequencies we can put
>> the UFS link at the appropriate gear speeds accordingly.
>>
>> Signed-off-by: Can Guo <quic_cang@quicinc.com>
>> Co-developed-by: Ziqi Chen <quic_ziqichen@quicinc.com>
>> Signed-off-by: Ziqi Chen <quic_ziqichen@quicinc.com>
>> Reviewed-by: Bean Huo <beanhuo@micron.com>
>> Reviewed-by: Bart Van Assche <bvanassche@acm.org>
>> Tested-by: Neil Armstrong <neil.armstrong@linaro.org>
>> ---
>>
>> v1 -> v2:
>> Rename the lable "do_pmc" to "config_pwr_mode".
>>
>> v2 -> v3:
>> Use assignment instead memcpy() in function ufshcd_scale_gear().
>>
>> v3 -> v4:
>> Typo fixed for commit message.
>>
>> v4 -> v5:
>> Change the data type of "new_gear" from 'int' to 'u32'.
>> ---
>>   drivers/ufs/core/ufshcd.c | 44 ++++++++++++++++++++++++++++++---------
>>   1 file changed, 34 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
>> index 8d295cc827cc..9908c0d6a1e1 100644
>> --- a/drivers/ufs/core/ufshcd.c
>> +++ b/drivers/ufs/core/ufshcd.c
>> @@ -1308,16 +1308,26 @@ static int ufshcd_wait_for_doorbell_clr(struct 
>> ufs_hba *hba,
>>   /**
>>    * ufshcd_scale_gear - scale up/down UFS gear
>>    * @hba: per adapter instance
>> + * @target_gear: target gear to scale to
>>    * @scale_up: True for scaling up gear and false for scaling down
>>    *
>>    * Return: 0 for success; -EBUSY if scaling can't happen at this time;
>>    * non-zero for any other errors.
>>    */
>> -static int ufshcd_scale_gear(struct ufs_hba *hba, bool scale_up)
>> +static int ufshcd_scale_gear(struct ufs_hba *hba, u32 target_gear, 
>> bool scale_up)
>>   {
>>       int ret = 0;
>>       struct ufs_pa_layer_attr new_pwr_info;
>> +    if (target_gear) {
>> +        new_pwr_info = hba->pwr_info;
>> +        new_pwr_info.gear_tx = target_gear;
>> +        new_pwr_info.gear_rx = target_gear;
>> +
>> +        goto config_pwr_mode;
>> +    }
>> +
>> +    /* Legacy gear scaling, in case vops_freq_to_gear_speed() is not 
>> implemented */
>>       if (scale_up) {
>>           memcpy(&new_pwr_info, &hba->clk_scaling.saved_pwr_info,
>>                  sizeof(struct ufs_pa_layer_attr));
>> @@ -1338,6 +1348,7 @@ static int ufshcd_scale_gear(struct ufs_hba 
>> *hba, bool scale_up)
>>           }
>>       }
>> +config_pwr_mode:
>>       /* check if the power mode needs to be changed or not? */
>>       ret = ufshcd_config_pwr_mode(hba, &new_pwr_info);
>>       if (ret)
>> @@ -1408,15 +1419,19 @@ static void 
>> ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, int err, bool sc
>>   static int ufshcd_devfreq_scale(struct ufs_hba *hba, unsigned long 
>> freq,
>>                   bool scale_up)
>>   {
>> +    u32 old_gear = hba->pwr_info.gear_rx;
>> +    u32 new_gear = 0;
>>       int ret = 0;
>> +    new_gear = ufshcd_vops_freq_to_gear_speed(hba, freq);
>> +
>>       ret = ufshcd_clock_scaling_prepare(hba, 1 * USEC_PER_SEC);
>>       if (ret)
>>           return ret;
>>       /* scale down the gear before scaling down clocks */
>>       if (!scale_up) {
>> -        ret = ufshcd_scale_gear(hba, false);
>> +        ret = ufshcd_scale_gear(hba, new_gear, false);
>>           if (ret)
>>               goto out_unprepare;
>>       }
>> @@ -1424,13 +1439,13 @@ static int ufshcd_devfreq_scale(struct ufs_hba 
>> *hba, unsigned long freq,
>>       ret = ufshcd_scale_clks(hba, freq, scale_up);
>>       if (ret) {
>>           if (!scale_up)
>> -            ufshcd_scale_gear(hba, true);
>> +            ufshcd_scale_gear(hba, old_gear, true);
>>           goto out_unprepare;
>>       }
>>       /* scale up the gear after scaling up clocks */
>>       if (scale_up) {
>> -        ret = ufshcd_scale_gear(hba, true);
>> +        ret = ufshcd_scale_gear(hba, new_gear, true);
>>           if (ret) {
>>               ufshcd_scale_clks(hba, hba->devfreq->previous_freq,
>>                         false);
>> @@ -1723,6 +1738,8 @@ static ssize_t 
>> ufshcd_clkscale_enable_store(struct device *dev,
>>           struct device_attribute *attr, const char *buf, size_t count)
>>   {
>>       struct ufs_hba *hba = dev_get_drvdata(dev);
>> +    struct ufs_clk_info *clki;
>> +    unsigned long freq;
>>       u32 value;
>>       int err = 0;
>> @@ -1746,14 +1763,21 @@ static ssize_t 
>> ufshcd_clkscale_enable_store(struct device *dev,
>>       if (value) {
>>           ufshcd_resume_clkscaling(hba);
>> -    } else {
>> -        ufshcd_suspend_clkscaling(hba);
>> -        err = ufshcd_devfreq_scale(hba, ULONG_MAX, true);
>> -        if (err)
>> -            dev_err(hba->dev, "%s: failed to scale clocks up %d\n",
>> -                    __func__, err);
>> +        goto out_rel;
>>       }
>> +    clki = list_first_entry(&hba->clk_list_head, struct ufs_clk_info, 
>> list);
>> +    freq = clki->max_freq;
>> +
>> +    ufshcd_suspend_clkscaling(hba);
>> +    err = ufshcd_devfreq_scale(hba, freq, true);
>> +    if (err)
>> +        dev_err(hba->dev, "%s: failed to scale clocks up %d\n",
>> +                __func__, err);
>> +    else
>> +        hba->clk_scaling.target_freq = freq;
>> +
>> +out_rel:
>>       ufshcd_release(hba);
>>       ufshcd_rpm_put_sync(hba);
>>   out:
> 
> This change causes UFS crash on the RB3gen2, after UFS successfully 
> probe and scan:
> 
> [    9.383728] ufshcd-qcom 1d84000.ufshc: pwr ctrl cmd 0x2 with mode 
> 0x11 completion timeout
> [    9.393272] ufshcd-qcom 1d84000.ufshc: UFS Host state=1
> [    9.403126] msm_dpu ae01000.display-controller: 
> [drm:adreno_request_fw [msm]] *ERROR* failed to load a660_sqe.fw
> [    9.408484] ufshcd-qcom 1d84000.ufshc: 0 outstanding reqs, tasks=0x0
> [    9.425577] ufshcd-qcom 1d84000.ufshc: saved_err=0x0, saved_uic_err=0x0
> [    9.432433] ufshcd-qcom 1d84000.ufshc: Device power mode=1, UIC link 
> state=1
> [    9.439716] ufshcd-qcom 1d84000.ufshc: PM in progress=0, sys. 
> suspended=0
> [    9.446742] ufshcd-qcom 1d84000.ufshc: Auto BKOPS=0, Host self-block=0
> [    9.453468] ufshcd-qcom 1d84000.ufshc: Clk gate=1
> ...
> [   10.529259] ufshcd-qcom 1d84000.ufshc: ufshcd_change_power_mode: 
> power mode change failed -110
> [   10.538102] ufshcd-qcom 1d84000.ufshc: ufshcd_scale_gear: failed err 
> -110, old gear: (tx 1 rx 1), new gear: (tx 4 rx 4)
> [   10.542841] WARNING: CPU: 0 PID: 274 at drivers/ufs/core/ 
> ufshcd.c:5504 ufshcd_sl_intr+0x64c/0x6a4
> ...
> 
> CI Run sample: https://git.codelinaro.org/linaro/qcomlt/ci/staging/cdba- 
> tester/-/jobs/233352#L1479
> 
> Bisect run log:
> # bad: [0af2f6be1b4281385b618cb86ad946eded089ac8] Linux 6.15-rc1
> # good: [38fec10eb60d687e30c8c6b5420d86e8149f7557] Linux 6.14
> git bisect start 'v6.15-rc1' 'v6.14'
> # bad: [fd71def6d9abc5ae362fb9995d46049b7b0ed391] Merge tag 'for-6.15- 
> tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
> git bisect bad fd71def6d9abc5ae362fb9995d46049b7b0ed391
> # good: [fb1ceb29b27cda91af35851ebab01f298d82162e] Merge tag 'platform- 
> drivers-x86-v6.15-1' of git://git.kernel.org/pub/scm/linux/kernel/git/ 
> pdx86/platform-drivers-x86
> git bisect good fb1ceb29b27cda91af35851ebab01f298d82162e
> # good: [1952e19c02ae8ea0c663d30b19be14344b543068] Merge tag 'wireless- 
> next-2025-03-20' of https://git.kernel.org/pub/scm/linux/kernel/git/ 
> wireless/wireless-next
> git bisect good 1952e19c02ae8ea0c663d30b19be14344b543068
> # bad: [1a9239bb4253f9076b5b4b2a1a4e8d7defd77a95] Merge tag 'net- 
> next-6.15' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
> git bisect bad 1a9239bb4253f9076b5b4b2a1a4e8d7defd77a95
> # good: [22093997ac9220d3c606313efbf4ce564962d095] Merge tag 'ata-6.15- 
> rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/libata/linux
> git bisect good 22093997ac9220d3c606313efbf4ce564962d095
> # bad: [336b4dae6dfecc9aa53a3a68c71b9c1c1d466388] Merge tag 'iommu- 
> updates-v6.15' of git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux
> git bisect bad 336b4dae6dfecc9aa53a3a68c71b9c1c1d466388
> # bad: [0711f1966a523d77d4c5f00776a7bd073d56251a] scsi: mpt3sas: Fix 
> buffer overflow in mpt3sas_send_mctp_passthru_req()
> git bisect bad 0711f1966a523d77d4c5f00776a7bd073d56251a
> # good: [369552fd03f296261023872b8fc983d1fc55c8e9] Merge patch series 
> "mpt3sas driver udpates"
> git bisect good 369552fd03f296261023872b8fc983d1fc55c8e9
> # bad: [42273e893157501ae119ea5459f3a7d2420c56d6] Merge patch series 
> "scsi: scsi_debug: Add more tape support"
> git bisect bad 42273e893157501ae119ea5459f3a7d2420c56d6
> # bad: [7e72900272b61c11f2fd4020d4f186124d0d171b] Merge patch series 
> "Support Multi-frequency scale for UFS"
> git bisect bad 7e72900272b61c11f2fd4020d4f186124d0d171b
> # good: [c02fe9e222d16bed8c270608c42f77bc62562ac3] scsi: ufs: qcom: 
> Implement the freq_to_gear_speed() vop
> git bisect good c02fe9e222d16bed8c270608c42f77bc62562ac3
> # bad: [eff26ad4c34fc78303c14be749e10ca61c4d211f] scsi: ufs: core: Check 
> if scaling up is required when disable clkscale
> git bisect bad eff26ad4c34fc78303c14be749e10ca61c4d211f
> # bad: [129b44c27c8a51cb74b2f68fde57f2a2e7f5696b] scsi: ufs: core: 
> Enable multi-level gear scaling
> git bisect bad 129b44c27c8a51cb74b2f68fde57f2a2e7f5696b
> # first bad commit: [129b44c27c8a51cb74b2f68fde57f2a2e7f5696b] scsi: 
> ufs: core: Enable multi-level gear scaling
> 
> #regzbot introduced 129b44c27c8a51cb74b2f68fde57f2a2e7f5696b
> 
> Thanks,
> Neil
>
Neil Armstrong April 25, 2025, 7:43 a.m. UTC | #4
Hi,

On 25/04/2025 09:29, Ziqi Chen wrote:
> Hi Neil,
> 
> We analyzed this issue today , I don't think it is related to this patch:

The bisect point to the patch where the issue appears, it may be another one of this patchset.

> 
> [    9.383728] ufshcd-qcom 1d84000.ufshc: pwr ctrl cmd 0x2 with mode 0x11 completion timeout
> ...
> ...
> ...
> [    9.588545] host_regs: 00000090: 00000002 15710000 00000002 00000003
> 
> The timeout error log shows the mode 0x11 is the correct argument3 value of this pwr mode DME_SET cmd.
> 
> int ufshcd_uic_change_pwr_mode(struct ufs_hba *hba, u8 mode)
> {
>      struct uic_command uic_cmd = {
>          .command = UIC_CMD_DME_SET,
>          .argument1 = UIC_ARG_MIB(PA_PWRMODE),
>          .argument3 = mode,
>      };
> ...
> ...
>   dev_err(hba->dev,
>          "uic cmd 0x%x with arg3 0x%x completion timeout\n",
>           uic_cmd->command, uic_cmd->argument3);
> 
> 
> This prints means that we gave correct argument3 here.
> 
> But from the host register dump , we can see the argument3 written to register (address 0x***9C) is '0x00000003' which is a invalid value.
> 
> And according to the UFSHCI JEDEC, the return value of ConfigResultCode regiser (address 0x***0x98) is 0x00000002 also told us the value written to argument3 register is a INVALID_MIB_ATTRIBUTE_VALUE, this is the reason causes this timeout issue.
> 
> " 02h INVALID_MIB_ATTRIBUTE_VALUE "
> 
> However, though we don't know the final root cause, we can know there is mistake occur during writing register. The argument3 value is 0x11, but after writing to register , the readback value from register is 0x3... Anyway , this patch didn't touch the path of register writing.
> 
> Are you easy to reproduce this issue ? How many instances do you observed ? We never received similar report from our internal UFS test team and stabitily test team. If it is easy to reproduce , you can add readback prints at the place where after writing register to check it.

The issue is easy to reproduce, just boot the rb3gen2 with vanilla v6.15-rc1 and default defconfig.

> 
> ufshcd_dispatch_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd)
> {
>      lockdep_assert_held(&hba->uic_cmd_mutex);
> 
>      WARN_ON(hba->active_uic_cmd);
> 
>      hba->active_uic_cmd = uic_cmd;
> 
>      /* Write Args */
>      ufshcd_writel(hba, uic_cmd->argument1, REG_UIC_COMMAND_ARG_1);
>      ufshcd_writel(hba, uic_cmd->argument2, REG_UIC_COMMAND_ARG_2);
>      ufshcd_writel(hba, uic_cmd->argument3, REG_UIC_COMMAND_ARG_3);
> 
> -> you can add print here:
>    pr_err ("READ BACK argument3 from register 0x%x: 0x%x",
>        REG_UIC_COMMAND_ARG_3, ufshcd_readl(hba, REG_UIC_COMMAND_ARG_3);

I've run the kernel with:
=====================><=================================
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 0534390c2a35..232328ff6365 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -2494,6 +2494,14 @@ ufshcd_dispatch_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd)
         ufshcd_writel(hba, uic_cmd->argument2, REG_UIC_COMMAND_ARG_2);
         ufshcd_writel(hba, uic_cmd->argument3, REG_UIC_COMMAND_ARG_3);

+       {
+               uint32_t reg3 = ufshcd_readl(hba, REG_UIC_COMMAND_ARG_3);
+
+               if (reg3 != uic_cmd->argument3)
+                       pr_err("different READ BACK argument3 from register 0x% != 0x%x",
+                               uic_cmd->argument3, reg3);
+       }
+
         ufshcd_add_uic_command_trace(hba, uic_cmd, UFS_CMD_SEND);

         /* Write UIC Cmd */
=====================><=================================

And this print never appears.

But the log clearly shows the kernel tries to scale back from gear1 to gear4:

 >> [   10.538102] ufshcd-qcom 1d84000.ufshc: ufshcd_scale_gear: failed err -110, old gear: (tx 1 rx 1), new gear: (tx 4 rx 4)

did you validate the gear scaling on the sc7280 with the mainline UFS & PHY drivers ?

Neil

> 
> 
> 
> Best Regards,
> Ziqi
> 
> On 4/24/2025 11:35 PM, Neil Armstrong wrote:
>> Hi,
>>
>> On 13/02/2025 09:00, Ziqi Chen wrote:
>>> From: Can Guo <quic_cang@quicinc.com>
>>>
>>> With OPP V2 enabled, devfreq can scale clocks amongst multiple frequency
>>> plans. However, the gear speed is only toggled between min and max during
>>> clock scaling. Enable multi-level gear scaling by mapping clock frequencies
>>> to gear speeds, so that when devfreq scales clock frequencies we can put
>>> the UFS link at the appropriate gear speeds accordingly.
>>>
>>> Signed-off-by: Can Guo <quic_cang@quicinc.com>
>>> Co-developed-by: Ziqi Chen <quic_ziqichen@quicinc.com>
>>> Signed-off-by: Ziqi Chen <quic_ziqichen@quicinc.com>
>>> Reviewed-by: Bean Huo <beanhuo@micron.com>
>>> Reviewed-by: Bart Van Assche <bvanassche@acm.org>
>>> Tested-by: Neil Armstrong <neil.armstrong@linaro.org>
>>> ---
>>>
>>> v1 -> v2:
>>> Rename the lable "do_pmc" to "config_pwr_mode".
>>>
>>> v2 -> v3:
>>> Use assignment instead memcpy() in function ufshcd_scale_gear().
>>>
>>> v3 -> v4:
>>> Typo fixed for commit message.
>>>
>>> v4 -> v5:
>>> Change the data type of "new_gear" from 'int' to 'u32'.
>>> ---
>>>   drivers/ufs/core/ufshcd.c | 44 ++++++++++++++++++++++++++++++---------
>>>   1 file changed, 34 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
>>> index 8d295cc827cc..9908c0d6a1e1 100644
>>> --- a/drivers/ufs/core/ufshcd.c
>>> +++ b/drivers/ufs/core/ufshcd.c
>>> @@ -1308,16 +1308,26 @@ static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
>>>   /**
>>>    * ufshcd_scale_gear - scale up/down UFS gear
>>>    * @hba: per adapter instance
>>> + * @target_gear: target gear to scale to
>>>    * @scale_up: True for scaling up gear and false for scaling down
>>>    *
>>>    * Return: 0 for success; -EBUSY if scaling can't happen at this time;
>>>    * non-zero for any other errors.
>>>    */
>>> -static int ufshcd_scale_gear(struct ufs_hba *hba, bool scale_up)
>>> +static int ufshcd_scale_gear(struct ufs_hba *hba, u32 target_gear, bool scale_up)
>>>   {
>>>       int ret = 0;
>>>       struct ufs_pa_layer_attr new_pwr_info;
>>> +    if (target_gear) {
>>> +        new_pwr_info = hba->pwr_info;
>>> +        new_pwr_info.gear_tx = target_gear;
>>> +        new_pwr_info.gear_rx = target_gear;
>>> +
>>> +        goto config_pwr_mode;
>>> +    }
>>> +
>>> +    /* Legacy gear scaling, in case vops_freq_to_gear_speed() is not implemented */
>>>       if (scale_up) {
>>>           memcpy(&new_pwr_info, &hba->clk_scaling.saved_pwr_info,
>>>                  sizeof(struct ufs_pa_layer_attr));
>>> @@ -1338,6 +1348,7 @@ static int ufshcd_scale_gear(struct ufs_hba *hba, bool scale_up)
>>>           }
>>>       }
>>> +config_pwr_mode:
>>>       /* check if the power mode needs to be changed or not? */
>>>       ret = ufshcd_config_pwr_mode(hba, &new_pwr_info);
>>>       if (ret)
>>> @@ -1408,15 +1419,19 @@ static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, int err, bool sc
>>>   static int ufshcd_devfreq_scale(struct ufs_hba *hba, unsigned long freq,
>>>                   bool scale_up)
>>>   {
>>> +    u32 old_gear = hba->pwr_info.gear_rx;
>>> +    u32 new_gear = 0;
>>>       int ret = 0;
>>> +    new_gear = ufshcd_vops_freq_to_gear_speed(hba, freq);
>>> +
>>>       ret = ufshcd_clock_scaling_prepare(hba, 1 * USEC_PER_SEC);
>>>       if (ret)
>>>           return ret;
>>>       /* scale down the gear before scaling down clocks */
>>>       if (!scale_up) {
>>> -        ret = ufshcd_scale_gear(hba, false);
>>> +        ret = ufshcd_scale_gear(hba, new_gear, false);
>>>           if (ret)
>>>               goto out_unprepare;
>>>       }
>>> @@ -1424,13 +1439,13 @@ static int ufshcd_devfreq_scale(struct ufs_hba *hba, unsigned long freq,
>>>       ret = ufshcd_scale_clks(hba, freq, scale_up);
>>>       if (ret) {
>>>           if (!scale_up)
>>> -            ufshcd_scale_gear(hba, true);
>>> +            ufshcd_scale_gear(hba, old_gear, true);
>>>           goto out_unprepare;
>>>       }
>>>       /* scale up the gear after scaling up clocks */
>>>       if (scale_up) {
>>> -        ret = ufshcd_scale_gear(hba, true);
>>> +        ret = ufshcd_scale_gear(hba, new_gear, true);
>>>           if (ret) {
>>>               ufshcd_scale_clks(hba, hba->devfreq->previous_freq,
>>>                         false);
>>> @@ -1723,6 +1738,8 @@ static ssize_t ufshcd_clkscale_enable_store(struct device *dev,
>>>           struct device_attribute *attr, const char *buf, size_t count)
>>>   {
>>>       struct ufs_hba *hba = dev_get_drvdata(dev);
>>> +    struct ufs_clk_info *clki;
>>> +    unsigned long freq;
>>>       u32 value;
>>>       int err = 0;
>>> @@ -1746,14 +1763,21 @@ static ssize_t ufshcd_clkscale_enable_store(struct device *dev,
>>>       if (value) {
>>>           ufshcd_resume_clkscaling(hba);
>>> -    } else {
>>> -        ufshcd_suspend_clkscaling(hba);
>>> -        err = ufshcd_devfreq_scale(hba, ULONG_MAX, true);
>>> -        if (err)
>>> -            dev_err(hba->dev, "%s: failed to scale clocks up %d\n",
>>> -                    __func__, err);
>>> +        goto out_rel;
>>>       }
>>> +    clki = list_first_entry(&hba->clk_list_head, struct ufs_clk_info, list);
>>> +    freq = clki->max_freq;
>>> +
>>> +    ufshcd_suspend_clkscaling(hba);
>>> +    err = ufshcd_devfreq_scale(hba, freq, true);
>>> +    if (err)
>>> +        dev_err(hba->dev, "%s: failed to scale clocks up %d\n",
>>> +                __func__, err);
>>> +    else
>>> +        hba->clk_scaling.target_freq = freq;
>>> +
>>> +out_rel:
>>>       ufshcd_release(hba);
>>>       ufshcd_rpm_put_sync(hba);
>>>   out:
>>
>> This change causes UFS crash on the RB3gen2, after UFS successfully probe and scan:
>>
>> [    9.383728] ufshcd-qcom 1d84000.ufshc: pwr ctrl cmd 0x2 with mode 0x11 completion timeout
>> [    9.393272] ufshcd-qcom 1d84000.ufshc: UFS Host state=1
>> [    9.403126] msm_dpu ae01000.display-controller: [drm:adreno_request_fw [msm]] *ERROR* failed to load a660_sqe.fw
>> [    9.408484] ufshcd-qcom 1d84000.ufshc: 0 outstanding reqs, tasks=0x0
>> [    9.425577] ufshcd-qcom 1d84000.ufshc: saved_err=0x0, saved_uic_err=0x0
>> [    9.432433] ufshcd-qcom 1d84000.ufshc: Device power mode=1, UIC link state=1
>> [    9.439716] ufshcd-qcom 1d84000.ufshc: PM in progress=0, sys. suspended=0
>> [    9.446742] ufshcd-qcom 1d84000.ufshc: Auto BKOPS=0, Host self-block=0
>> [    9.453468] ufshcd-qcom 1d84000.ufshc: Clk gate=1
>> ...
>> [   10.529259] ufshcd-qcom 1d84000.ufshc: ufshcd_change_power_mode: power mode change failed -110
>> [   10.538102] ufshcd-qcom 1d84000.ufshc: ufshcd_scale_gear: failed err -110, old gear: (tx 1 rx 1), new gear: (tx 4 rx 4)
>> [   10.542841] WARNING: CPU: 0 PID: 274 at drivers/ufs/core/ ufshcd.c:5504 ufshcd_sl_intr+0x64c/0x6a4
>> ...
>>
>> CI Run sample: https://git.codelinaro.org/linaro/qcomlt/ci/staging/cdba- tester/-/jobs/233352#L1479
>>
>> Bisect run log:
>> # bad: [0af2f6be1b4281385b618cb86ad946eded089ac8] Linux 6.15-rc1
>> # good: [38fec10eb60d687e30c8c6b5420d86e8149f7557] Linux 6.14
>> git bisect start 'v6.15-rc1' 'v6.14'
>> # bad: [fd71def6d9abc5ae362fb9995d46049b7b0ed391] Merge tag 'for-6.15- tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
>> git bisect bad fd71def6d9abc5ae362fb9995d46049b7b0ed391
>> # good: [fb1ceb29b27cda91af35851ebab01f298d82162e] Merge tag 'platform- drivers-x86-v6.15-1' of git://git.kernel.org/pub/scm/linux/kernel/git/ pdx86/platform-drivers-x86
>> git bisect good fb1ceb29b27cda91af35851ebab01f298d82162e
>> # good: [1952e19c02ae8ea0c663d30b19be14344b543068] Merge tag 'wireless- next-2025-03-20' of https://git.kernel.org/pub/scm/linux/kernel/git/ wireless/wireless-next
>> git bisect good 1952e19c02ae8ea0c663d30b19be14344b543068
>> # bad: [1a9239bb4253f9076b5b4b2a1a4e8d7defd77a95] Merge tag 'net- next-6.15' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
>> git bisect bad 1a9239bb4253f9076b5b4b2a1a4e8d7defd77a95
>> # good: [22093997ac9220d3c606313efbf4ce564962d095] Merge tag 'ata-6.15- rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/libata/linux
>> git bisect good 22093997ac9220d3c606313efbf4ce564962d095
>> # bad: [336b4dae6dfecc9aa53a3a68c71b9c1c1d466388] Merge tag 'iommu- updates-v6.15' of git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux
>> git bisect bad 336b4dae6dfecc9aa53a3a68c71b9c1c1d466388
>> # bad: [0711f1966a523d77d4c5f00776a7bd073d56251a] scsi: mpt3sas: Fix buffer overflow in mpt3sas_send_mctp_passthru_req()
>> git bisect bad 0711f1966a523d77d4c5f00776a7bd073d56251a
>> # good: [369552fd03f296261023872b8fc983d1fc55c8e9] Merge patch series "mpt3sas driver udpates"
>> git bisect good 369552fd03f296261023872b8fc983d1fc55c8e9
>> # bad: [42273e893157501ae119ea5459f3a7d2420c56d6] Merge patch series "scsi: scsi_debug: Add more tape support"
>> git bisect bad 42273e893157501ae119ea5459f3a7d2420c56d6
>> # bad: [7e72900272b61c11f2fd4020d4f186124d0d171b] Merge patch series "Support Multi-frequency scale for UFS"
>> git bisect bad 7e72900272b61c11f2fd4020d4f186124d0d171b
>> # good: [c02fe9e222d16bed8c270608c42f77bc62562ac3] scsi: ufs: qcom: Implement the freq_to_gear_speed() vop
>> git bisect good c02fe9e222d16bed8c270608c42f77bc62562ac3
>> # bad: [eff26ad4c34fc78303c14be749e10ca61c4d211f] scsi: ufs: core: Check if scaling up is required when disable clkscale
>> git bisect bad eff26ad4c34fc78303c14be749e10ca61c4d211f
>> # bad: [129b44c27c8a51cb74b2f68fde57f2a2e7f5696b] scsi: ufs: core: Enable multi-level gear scaling
>> git bisect bad 129b44c27c8a51cb74b2f68fde57f2a2e7f5696b
>> # first bad commit: [129b44c27c8a51cb74b2f68fde57f2a2e7f5696b] scsi: ufs: core: Enable multi-level gear scaling
>>
>> #regzbot introduced 129b44c27c8a51cb74b2f68fde57f2a2e7f5696b
>>
>> Thanks,
>> Neil
>>
>
Ziqi Chen April 29, 2025, 10:23 a.m. UTC | #5
Hi Neil,

Thanks for report~
I got one rb3gen2 device and be able to reproduce this issue today.
I made a change to fix this corner case. It already been verifed from
my side. Could also help double check attached patch whether can fix 
this issue from your side?

Thanks,
Ziqi

On 4/25/2025 3:43 PM, neil.armstrong@linaro.org wrote:
> Hi,
> 
> On 25/04/2025 09:29, Ziqi Chen wrote:
>> Hi Neil,
>>
>> We analyzed this issue today , I don't think it is related to this patch:
> 
> The bisect point to the patch where the issue appears, it may be another 
> one of this patchset.
> 
>>
>> [    9.383728] ufshcd-qcom 1d84000.ufshc: pwr ctrl cmd 0x2 with mode 
>> 0x11 completion timeout
>> ...
>> ...
>> ...
>> [    9.588545] host_regs: 00000090: 00000002 15710000 00000002 00000003
>>
>> The timeout error log shows the mode 0x11 is the correct argument3 
>> value of this pwr mode DME_SET cmd.
>>
>> int ufshcd_uic_change_pwr_mode(struct ufs_hba *hba, u8 mode)
>> {
>>      struct uic_command uic_cmd = {
>>          .command = UIC_CMD_DME_SET,
>>          .argument1 = UIC_ARG_MIB(PA_PWRMODE),
>>          .argument3 = mode,
>>      };
>> ...
>> ...
>>   dev_err(hba->dev,
>>          "uic cmd 0x%x with arg3 0x%x completion timeout\n",
>>           uic_cmd->command, uic_cmd->argument3);
>>
>>
>> This prints means that we gave correct argument3 here.
>>
>> But from the host register dump , we can see the argument3 written to 
>> register (address 0x***9C) is '0x00000003' which is a invalid value.
>>
>> And according to the UFSHCI JEDEC, the return value of 
>> ConfigResultCode regiser (address 0x***0x98) is 0x00000002 also told 
>> us the value written to argument3 register is a 
>> INVALID_MIB_ATTRIBUTE_VALUE, this is the reason causes this timeout 
>> issue.
>>
>> " 02h INVALID_MIB_ATTRIBUTE_VALUE "
>>
>> However, though we don't know the final root cause, we can know there 
>> is mistake occur during writing register. The argument3 value is 0x11, 
>> but after writing to register , the readback value from register is 
>> 0x3... Anyway , this patch didn't touch the path of register writing.
>>
>> Are you easy to reproduce this issue ? How many instances do you 
>> observed ? We never received similar report from our internal UFS test 
>> team and stabitily test team. If it is easy to reproduce , you can add 
>> readback prints at the place where after writing register to check it.
> 
> The issue is easy to reproduce, just boot the rb3gen2 with vanilla 
> v6.15-rc1 and default defconfig.
> 
>>
>> ufshcd_dispatch_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd)
>> {
>>      lockdep_assert_held(&hba->uic_cmd_mutex);
>>
>>      WARN_ON(hba->active_uic_cmd);
>>
>>      hba->active_uic_cmd = uic_cmd;
>>
>>      /* Write Args */
>>      ufshcd_writel(hba, uic_cmd->argument1, REG_UIC_COMMAND_ARG_1);
>>      ufshcd_writel(hba, uic_cmd->argument2, REG_UIC_COMMAND_ARG_2);
>>      ufshcd_writel(hba, uic_cmd->argument3, REG_UIC_COMMAND_ARG_3);
>>
>> -> you can add print here:
>>    pr_err ("READ BACK argument3 from register 0x%x: 0x%x",
>>        REG_UIC_COMMAND_ARG_3, ufshcd_readl(hba, REG_UIC_COMMAND_ARG_3);
> 
> I've run the kernel with:
> =====================><=================================
> diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
> index 0534390c2a35..232328ff6365 100644
> --- a/drivers/ufs/core/ufshcd.c
> +++ b/drivers/ufs/core/ufshcd.c
> @@ -2494,6 +2494,14 @@ ufshcd_dispatch_uic_cmd(struct ufs_hba *hba, 
> struct uic_command *uic_cmd)
>          ufshcd_writel(hba, uic_cmd->argument2, REG_UIC_COMMAND_ARG_2);
>          ufshcd_writel(hba, uic_cmd->argument3, REG_UIC_COMMAND_ARG_3);
> 
> +       {
> +               uint32_t reg3 = ufshcd_readl(hba, REG_UIC_COMMAND_ARG_3);
> +
> +               if (reg3 != uic_cmd->argument3)
> +                       pr_err("different READ BACK argument3 from 
> register 0x% != 0x%x",
> +                               uic_cmd->argument3, reg3);
> +       }
> +
>          ufshcd_add_uic_command_trace(hba, uic_cmd, UFS_CMD_SEND);
> 
>          /* Write UIC Cmd */
> =====================><=================================
> 
> And this print never appears.
> 
> But the log clearly shows the kernel tries to scale back from gear1 to 
> gear4:
> 
>  >> [   10.538102] ufshcd-qcom 1d84000.ufshc: ufshcd_scale_gear: failed 
> err -110, old gear: (tx 1 rx 1), new gear: (tx 4 rx 4)
> 
> did you validate the gear scaling on the sc7280 with the mainline UFS & 
> PHY drivers ?
> 
> Neil
> 
>>
>>
>>
>> Best Regards,
>> Ziqi
>>
>> On 4/24/2025 11:35 PM, Neil Armstrong wrote:
>>> Hi,
>>>
>>> On 13/02/2025 09:00, Ziqi Chen wrote:
>>>> From: Can Guo <quic_cang@quicinc.com>
>>>>
>>>> With OPP V2 enabled, devfreq can scale clocks amongst multiple 
>>>> frequency
>>>> plans. However, the gear speed is only toggled between min and max 
>>>> during
>>>> clock scaling. Enable multi-level gear scaling by mapping clock 
>>>> frequencies
>>>> to gear speeds, so that when devfreq scales clock frequencies we can 
>>>> put
>>>> the UFS link at the appropriate gear speeds accordingly.
>>>>
>>>> Signed-off-by: Can Guo <quic_cang@quicinc.com>
>>>> Co-developed-by: Ziqi Chen <quic_ziqichen@quicinc.com>
>>>> Signed-off-by: Ziqi Chen <quic_ziqichen@quicinc.com>
>>>> Reviewed-by: Bean Huo <beanhuo@micron.com>
>>>> Reviewed-by: Bart Van Assche <bvanassche@acm.org>
>>>> Tested-by: Neil Armstrong <neil.armstrong@linaro.org>
>>>> ---
>>>>
>>>> v1 -> v2:
>>>> Rename the lable "do_pmc" to "config_pwr_mode".
>>>>
>>>> v2 -> v3:
>>>> Use assignment instead memcpy() in function ufshcd_scale_gear().
>>>>
>>>> v3 -> v4:
>>>> Typo fixed for commit message.
>>>>
>>>> v4 -> v5:
>>>> Change the data type of "new_gear" from 'int' to 'u32'.
>>>> ---
>>>>   drivers/ufs/core/ufshcd.c | 44 +++++++++++++++++++++++++++++ 
>>>> +---------
>>>>   1 file changed, 34 insertions(+), 10 deletions(-)
>>>>
>>>> diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
>>>> index 8d295cc827cc..9908c0d6a1e1 100644
>>>> --- a/drivers/ufs/core/ufshcd.c
>>>> +++ b/drivers/ufs/core/ufshcd.c
>>>> @@ -1308,16 +1308,26 @@ static int 
>>>> ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
>>>>   /**
>>>>    * ufshcd_scale_gear - scale up/down UFS gear
>>>>    * @hba: per adapter instance
>>>> + * @target_gear: target gear to scale to
>>>>    * @scale_up: True for scaling up gear and false for scaling down
>>>>    *
>>>>    * Return: 0 for success; -EBUSY if scaling can't happen at this 
>>>> time;
>>>>    * non-zero for any other errors.
>>>>    */
>>>> -static int ufshcd_scale_gear(struct ufs_hba *hba, bool scale_up)
>>>> +static int ufshcd_scale_gear(struct ufs_hba *hba, u32 target_gear, 
>>>> bool scale_up)
>>>>   {
>>>>       int ret = 0;
>>>>       struct ufs_pa_layer_attr new_pwr_info;
>>>> +    if (target_gear) {
>>>> +        new_pwr_info = hba->pwr_info;
>>>> +        new_pwr_info.gear_tx = target_gear;
>>>> +        new_pwr_info.gear_rx = target_gear;
>>>> +
>>>> +        goto config_pwr_mode;
>>>> +    }
>>>> +
>>>> +    /* Legacy gear scaling, in case vops_freq_to_gear_speed() is 
>>>> not implemented */
>>>>       if (scale_up) {
>>>>           memcpy(&new_pwr_info, &hba->clk_scaling.saved_pwr_info,
>>>>                  sizeof(struct ufs_pa_layer_attr));
>>>> @@ -1338,6 +1348,7 @@ static int ufshcd_scale_gear(struct ufs_hba 
>>>> *hba, bool scale_up)
>>>>           }
>>>>       }
>>>> +config_pwr_mode:
>>>>       /* check if the power mode needs to be changed or not? */
>>>>       ret = ufshcd_config_pwr_mode(hba, &new_pwr_info);
>>>>       if (ret)
>>>> @@ -1408,15 +1419,19 @@ static void 
>>>> ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, int err, bool sc
>>>>   static int ufshcd_devfreq_scale(struct ufs_hba *hba, unsigned long 
>>>> freq,
>>>>                   bool scale_up)
>>>>   {
>>>> +    u32 old_gear = hba->pwr_info.gear_rx;
>>>> +    u32 new_gear = 0;
>>>>       int ret = 0;
>>>> +    new_gear = ufshcd_vops_freq_to_gear_speed(hba, freq);
>>>> +
>>>>       ret = ufshcd_clock_scaling_prepare(hba, 1 * USEC_PER_SEC);
>>>>       if (ret)
>>>>           return ret;
>>>>       /* scale down the gear before scaling down clocks */
>>>>       if (!scale_up) {
>>>> -        ret = ufshcd_scale_gear(hba, false);
>>>> +        ret = ufshcd_scale_gear(hba, new_gear, false);
>>>>           if (ret)
>>>>               goto out_unprepare;
>>>>       }
>>>> @@ -1424,13 +1439,13 @@ static int ufshcd_devfreq_scale(struct 
>>>> ufs_hba *hba, unsigned long freq,
>>>>       ret = ufshcd_scale_clks(hba, freq, scale_up);
>>>>       if (ret) {
>>>>           if (!scale_up)
>>>> -            ufshcd_scale_gear(hba, true);
>>>> +            ufshcd_scale_gear(hba, old_gear, true);
>>>>           goto out_unprepare;
>>>>       }
>>>>       /* scale up the gear after scaling up clocks */
>>>>       if (scale_up) {
>>>> -        ret = ufshcd_scale_gear(hba, true);
>>>> +        ret = ufshcd_scale_gear(hba, new_gear, true);
>>>>           if (ret) {
>>>>               ufshcd_scale_clks(hba, hba->devfreq->previous_freq,
>>>>                         false);
>>>> @@ -1723,6 +1738,8 @@ static ssize_t 
>>>> ufshcd_clkscale_enable_store(struct device *dev,
>>>>           struct device_attribute *attr, const char *buf, size_t count)
>>>>   {
>>>>       struct ufs_hba *hba = dev_get_drvdata(dev);
>>>> +    struct ufs_clk_info *clki;
>>>> +    unsigned long freq;
>>>>       u32 value;
>>>>       int err = 0;
>>>> @@ -1746,14 +1763,21 @@ static ssize_t 
>>>> ufshcd_clkscale_enable_store(struct device *dev,
>>>>       if (value) {
>>>>           ufshcd_resume_clkscaling(hba);
>>>> -    } else {
>>>> -        ufshcd_suspend_clkscaling(hba);
>>>> -        err = ufshcd_devfreq_scale(hba, ULONG_MAX, true);
>>>> -        if (err)
>>>> -            dev_err(hba->dev, "%s: failed to scale clocks up %d\n",
>>>> -                    __func__, err);
>>>> +        goto out_rel;
>>>>       }
>>>> +    clki = list_first_entry(&hba->clk_list_head, struct 
>>>> ufs_clk_info, list);
>>>> +    freq = clki->max_freq;
>>>> +
>>>> +    ufshcd_suspend_clkscaling(hba);
>>>> +    err = ufshcd_devfreq_scale(hba, freq, true);
>>>> +    if (err)
>>>> +        dev_err(hba->dev, "%s: failed to scale clocks up %d\n",
>>>> +                __func__, err);
>>>> +    else
>>>> +        hba->clk_scaling.target_freq = freq;
>>>> +
>>>> +out_rel:
>>>>       ufshcd_release(hba);
>>>>       ufshcd_rpm_put_sync(hba);
>>>>   out:
>>>
>>> This change causes UFS crash on the RB3gen2, after UFS successfully 
>>> probe and scan:
>>>
>>> [    9.383728] ufshcd-qcom 1d84000.ufshc: pwr ctrl cmd 0x2 with mode 
>>> 0x11 completion timeout
>>> [    9.393272] ufshcd-qcom 1d84000.ufshc: UFS Host state=1
>>> [    9.403126] msm_dpu ae01000.display-controller: 
>>> [drm:adreno_request_fw [msm]] *ERROR* failed to load a660_sqe.fw
>>> [    9.408484] ufshcd-qcom 1d84000.ufshc: 0 outstanding reqs, tasks=0x0
>>> [    9.425577] ufshcd-qcom 1d84000.ufshc: saved_err=0x0, 
>>> saved_uic_err=0x0
>>> [    9.432433] ufshcd-qcom 1d84000.ufshc: Device power mode=1, UIC 
>>> link state=1
>>> [    9.439716] ufshcd-qcom 1d84000.ufshc: PM in progress=0, sys. 
>>> suspended=0
>>> [    9.446742] ufshcd-qcom 1d84000.ufshc: Auto BKOPS=0, Host self- 
>>> block=0
>>> [    9.453468] ufshcd-qcom 1d84000.ufshc: Clk gate=1
>>> ...
>>> [   10.529259] ufshcd-qcom 1d84000.ufshc: ufshcd_change_power_mode: 
>>> power mode change failed -110
>>> [   10.538102] ufshcd-qcom 1d84000.ufshc: ufshcd_scale_gear: failed 
>>> err -110, old gear: (tx 1 rx 1), new gear: (tx 4 rx 4)
>>> [   10.542841] WARNING: CPU: 0 PID: 274 at drivers/ufs/core/ 
>>> ufshcd.c:5504 ufshcd_sl_intr+0x64c/0x6a4
>>> ...
>>>
>>> CI Run sample: https://git.codelinaro.org/linaro/qcomlt/ci/staging/ 
>>> cdba- tester/-/jobs/233352#L1479
>>>
>>> Bisect run log:
>>> # bad: [0af2f6be1b4281385b618cb86ad946eded089ac8] Linux 6.15-rc1
>>> # good: [38fec10eb60d687e30c8c6b5420d86e8149f7557] Linux 6.14
>>> git bisect start 'v6.15-rc1' 'v6.14'
>>> # bad: [fd71def6d9abc5ae362fb9995d46049b7b0ed391] Merge tag 
>>> 'for-6.15- tag' of git://git.kernel.org/pub/scm/linux/kernel/git/ 
>>> kdave/linux
>>> git bisect bad fd71def6d9abc5ae362fb9995d46049b7b0ed391
>>> # good: [fb1ceb29b27cda91af35851ebab01f298d82162e] Merge tag 
>>> 'platform- drivers-x86-v6.15-1' of git://git.kernel.org/pub/scm/ 
>>> linux/kernel/git/ pdx86/platform-drivers-x86
>>> git bisect good fb1ceb29b27cda91af35851ebab01f298d82162e
>>> # good: [1952e19c02ae8ea0c663d30b19be14344b543068] Merge tag 
>>> 'wireless- next-2025-03-20' of https://git.kernel.org/pub/scm/linux/ 
>>> kernel/git/ wireless/wireless-next
>>> git bisect good 1952e19c02ae8ea0c663d30b19be14344b543068
>>> # bad: [1a9239bb4253f9076b5b4b2a1a4e8d7defd77a95] Merge tag 'net- 
>>> next-6.15' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/ 
>>> net-next
>>> git bisect bad 1a9239bb4253f9076b5b4b2a1a4e8d7defd77a95
>>> # good: [22093997ac9220d3c606313efbf4ce564962d095] Merge tag 
>>> 'ata-6.15- rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/ 
>>> libata/linux
>>> git bisect good 22093997ac9220d3c606313efbf4ce564962d095
>>> # bad: [336b4dae6dfecc9aa53a3a68c71b9c1c1d466388] Merge tag 'iommu- 
>>> updates-v6.15' of git://git.kernel.org/pub/scm/linux/kernel/git/ 
>>> iommu/linux
>>> git bisect bad 336b4dae6dfecc9aa53a3a68c71b9c1c1d466388
>>> # bad: [0711f1966a523d77d4c5f00776a7bd073d56251a] scsi: mpt3sas: Fix 
>>> buffer overflow in mpt3sas_send_mctp_passthru_req()
>>> git bisect bad 0711f1966a523d77d4c5f00776a7bd073d56251a
>>> # good: [369552fd03f296261023872b8fc983d1fc55c8e9] Merge patch series 
>>> "mpt3sas driver udpates"
>>> git bisect good 369552fd03f296261023872b8fc983d1fc55c8e9
>>> # bad: [42273e893157501ae119ea5459f3a7d2420c56d6] Merge patch series 
>>> "scsi: scsi_debug: Add more tape support"
>>> git bisect bad 42273e893157501ae119ea5459f3a7d2420c56d6
>>> # bad: [7e72900272b61c11f2fd4020d4f186124d0d171b] Merge patch series 
>>> "Support Multi-frequency scale for UFS"
>>> git bisect bad 7e72900272b61c11f2fd4020d4f186124d0d171b
>>> # good: [c02fe9e222d16bed8c270608c42f77bc62562ac3] scsi: ufs: qcom: 
>>> Implement the freq_to_gear_speed() vop
>>> git bisect good c02fe9e222d16bed8c270608c42f77bc62562ac3
>>> # bad: [eff26ad4c34fc78303c14be749e10ca61c4d211f] scsi: ufs: core: 
>>> Check if scaling up is required when disable clkscale
>>> git bisect bad eff26ad4c34fc78303c14be749e10ca61c4d211f
>>> # bad: [129b44c27c8a51cb74b2f68fde57f2a2e7f5696b] scsi: ufs: core: 
>>> Enable multi-level gear scaling
>>> git bisect bad 129b44c27c8a51cb74b2f68fde57f2a2e7f5696b
>>> # first bad commit: [129b44c27c8a51cb74b2f68fde57f2a2e7f5696b] scsi: 
>>> ufs: core: Enable multi-level gear scaling
>>>
>>> #regzbot introduced 129b44c27c8a51cb74b2f68fde57f2a2e7f5696b
>>>
>>> Thanks,
>>> Neil
>>>
>>
>
From be2f444c5cc30cccd410c24ebf4b5d33dc3a2b1d Mon Sep 17 00:00:00 2001
From: Ziqi Chen <quic_ziqichen@quicinc.com>
Date: Tue, 29 Apr 2025 18:08:44 +0800
Subject: [PATCH] scsi: ufs: qcom: check negotiatory max gear before return
 freq matched gear

Signed-off-by: Ziqi Chen <quic_ziqichen@quicinc.com>
---
 drivers/ufs/host/ufs-qcom.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c
index 1b37449fbffc..864be1e25c44 100644
--- a/drivers/ufs/host/ufs-qcom.c
+++ b/drivers/ufs/host/ufs-qcom.c
@@ -1903,9 +1903,11 @@ static u32 ufs_qcom_freq_to_gear_speed(struct ufs_hba *hba, unsigned long freq)
 		break;
 	default:
 		dev_err(hba->dev, "%s: Unsupported clock freq : %lu\n", __func__, freq);
-		break;
+		return gear;
 	}
 
+	gear = hba->max_pwr_info.is_valid ? min_t(u32, gear, hba->max_pwr_info.info.gear_rx) : gear;
+
 	return gear;
 }
Neil Armstrong April 29, 2025, 12:25 p.m. UTC | #6
Hi,

On 29/04/2025 12:23, Ziqi Chen wrote:
> Hi Neil,
> 
> Thanks for report~
> I got one rb3gen2 device and be able to reproduce this issue today.
> I made a change to fix this corner case. It already been verifed from
> my side. Could also help double check attached patch whether can fix this issue from your side?

This also fixes the issue on my side

Please add:

Tested-by: Neil Armstrong <neil.armstrong@linaro.org> # on RB3Gen2

If you send it on the lists.

Thanks,
Neil


> 
> Thanks,
> Ziqi
> 
> On 4/25/2025 3:43 PM, neil.armstrong@linaro.org wrote:
>> Hi,
>>
>> On 25/04/2025 09:29, Ziqi Chen wrote:
>>> Hi Neil,
>>>
>>> We analyzed this issue today , I don't think it is related to this patch:
>>
>> The bisect point to the patch where the issue appears, it may be another one of this patchset.
>>
>>>
>>> [    9.383728] ufshcd-qcom 1d84000.ufshc: pwr ctrl cmd 0x2 with mode 0x11 completion timeout
>>> ...
>>> ...
>>> ...
>>> [    9.588545] host_regs: 00000090: 00000002 15710000 00000002 00000003
>>>
>>> The timeout error log shows the mode 0x11 is the correct argument3 value of this pwr mode DME_SET cmd.
>>>
>>> int ufshcd_uic_change_pwr_mode(struct ufs_hba *hba, u8 mode)
>>> {
>>>      struct uic_command uic_cmd = {
>>>          .command = UIC_CMD_DME_SET,
>>>          .argument1 = UIC_ARG_MIB(PA_PWRMODE),
>>>          .argument3 = mode,
>>>      };
>>> ...
>>> ...
>>>   dev_err(hba->dev,
>>>          "uic cmd 0x%x with arg3 0x%x completion timeout\n",
>>>           uic_cmd->command, uic_cmd->argument3);
>>>
>>>
>>> This prints means that we gave correct argument3 here.
>>>
>>> But from the host register dump , we can see the argument3 written to register (address 0x***9C) is '0x00000003' which is a invalid value.
>>>
>>> And according to the UFSHCI JEDEC, the return value of ConfigResultCode regiser (address 0x***0x98) is 0x00000002 also told us the value written to argument3 register is a INVALID_MIB_ATTRIBUTE_VALUE, this is the reason causes this timeout issue.
>>>
>>> " 02h INVALID_MIB_ATTRIBUTE_VALUE "
>>>
>>> However, though we don't know the final root cause, we can know there is mistake occur during writing register. The argument3 value is 0x11, but after writing to register , the readback value from register is 0x3... Anyway , this patch didn't touch the path of register writing.
>>>
>>> Are you easy to reproduce this issue ? How many instances do you observed ? We never received similar report from our internal UFS test team and stabitily test team. If it is easy to reproduce , you can add readback prints at the place where after writing register to check it.
>>
>> The issue is easy to reproduce, just boot the rb3gen2 with vanilla v6.15-rc1 and default defconfig.
>>
>>>
>>> ufshcd_dispatch_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd)
>>> {
>>>      lockdep_assert_held(&hba->uic_cmd_mutex);
>>>
>>>      WARN_ON(hba->active_uic_cmd);
>>>
>>>      hba->active_uic_cmd = uic_cmd;
>>>
>>>      /* Write Args */
>>>      ufshcd_writel(hba, uic_cmd->argument1, REG_UIC_COMMAND_ARG_1);
>>>      ufshcd_writel(hba, uic_cmd->argument2, REG_UIC_COMMAND_ARG_2);
>>>      ufshcd_writel(hba, uic_cmd->argument3, REG_UIC_COMMAND_ARG_3);
>>>
>>> -> you can add print here:
>>>    pr_err ("READ BACK argument3 from register 0x%x: 0x%x",
>>>        REG_UIC_COMMAND_ARG_3, ufshcd_readl(hba, REG_UIC_COMMAND_ARG_3);
>>
>> I've run the kernel with:
>> =====================><=================================
>> diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
>> index 0534390c2a35..232328ff6365 100644
>> --- a/drivers/ufs/core/ufshcd.c
>> +++ b/drivers/ufs/core/ufshcd.c
>> @@ -2494,6 +2494,14 @@ ufshcd_dispatch_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd)
>>          ufshcd_writel(hba, uic_cmd->argument2, REG_UIC_COMMAND_ARG_2);
>>          ufshcd_writel(hba, uic_cmd->argument3, REG_UIC_COMMAND_ARG_3);
>>
>> +       {
>> +               uint32_t reg3 = ufshcd_readl(hba, REG_UIC_COMMAND_ARG_3);
>> +
>> +               if (reg3 != uic_cmd->argument3)
>> +                       pr_err("different READ BACK argument3 from register 0x% != 0x%x",
>> +                               uic_cmd->argument3, reg3);
>> +       }
>> +
>>          ufshcd_add_uic_command_trace(hba, uic_cmd, UFS_CMD_SEND);
>>
>>          /* Write UIC Cmd */
>> =====================><=================================
>>
>> And this print never appears.
>>
>> But the log clearly shows the kernel tries to scale back from gear1 to gear4:
>>
>>  >> [   10.538102] ufshcd-qcom 1d84000.ufshc: ufshcd_scale_gear: failed err -110, old gear: (tx 1 rx 1), new gear: (tx 4 rx 4)
>>
>> did you validate the gear scaling on the sc7280 with the mainline UFS & PHY drivers ?
>>
>> Neil
>>
>>>
>>>
>>>
>>> Best Regards,
>>> Ziqi
>>>
>>> On 4/24/2025 11:35 PM, Neil Armstrong wrote:
>>>> Hi,
>>>>
>>>> On 13/02/2025 09:00, Ziqi Chen wrote:
>>>>> From: Can Guo <quic_cang@quicinc.com>
>>>>>
>>>>> With OPP V2 enabled, devfreq can scale clocks amongst multiple frequency
>>>>> plans. However, the gear speed is only toggled between min and max during
>>>>> clock scaling. Enable multi-level gear scaling by mapping clock frequencies
>>>>> to gear speeds, so that when devfreq scales clock frequencies we can put
>>>>> the UFS link at the appropriate gear speeds accordingly.
>>>>>
>>>>> Signed-off-by: Can Guo <quic_cang@quicinc.com>
>>>>> Co-developed-by: Ziqi Chen <quic_ziqichen@quicinc.com>
>>>>> Signed-off-by: Ziqi Chen <quic_ziqichen@quicinc.com>
>>>>> Reviewed-by: Bean Huo <beanhuo@micron.com>
>>>>> Reviewed-by: Bart Van Assche <bvanassche@acm.org>
>>>>> Tested-by: Neil Armstrong <neil.armstrong@linaro.org>
>>>>> ---
>>>>>
>>>>> v1 -> v2:
>>>>> Rename the lable "do_pmc" to "config_pwr_mode".
>>>>>
>>>>> v2 -> v3:
>>>>> Use assignment instead memcpy() in function ufshcd_scale_gear().
>>>>>
>>>>> v3 -> v4:
>>>>> Typo fixed for commit message.
>>>>>
>>>>> v4 -> v5:
>>>>> Change the data type of "new_gear" from 'int' to 'u32'.
>>>>> ---
>>>>>   drivers/ufs/core/ufshcd.c | 44 +++++++++++++++++++++++++++++ +---------
>>>>>   1 file changed, 34 insertions(+), 10 deletions(-)
>>>>>
>>>>> diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
>>>>> index 8d295cc827cc..9908c0d6a1e1 100644
>>>>> --- a/drivers/ufs/core/ufshcd.c
>>>>> +++ b/drivers/ufs/core/ufshcd.c
>>>>> @@ -1308,16 +1308,26 @@ static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
>>>>>   /**
>>>>>    * ufshcd_scale_gear - scale up/down UFS gear
>>>>>    * @hba: per adapter instance
>>>>> + * @target_gear: target gear to scale to
>>>>>    * @scale_up: True for scaling up gear and false for scaling down
>>>>>    *
>>>>>    * Return: 0 for success; -EBUSY if scaling can't happen at this time;
>>>>>    * non-zero for any other errors.
>>>>>    */
>>>>> -static int ufshcd_scale_gear(struct ufs_hba *hba, bool scale_up)
>>>>> +static int ufshcd_scale_gear(struct ufs_hba *hba, u32 target_gear, bool scale_up)
>>>>>   {
>>>>>       int ret = 0;
>>>>>       struct ufs_pa_layer_attr new_pwr_info;
>>>>> +    if (target_gear) {
>>>>> +        new_pwr_info = hba->pwr_info;
>>>>> +        new_pwr_info.gear_tx = target_gear;
>>>>> +        new_pwr_info.gear_rx = target_gear;
>>>>> +
>>>>> +        goto config_pwr_mode;
>>>>> +    }
>>>>> +
>>>>> +    /* Legacy gear scaling, in case vops_freq_to_gear_speed() is not implemented */
>>>>>       if (scale_up) {
>>>>>           memcpy(&new_pwr_info, &hba->clk_scaling.saved_pwr_info,
>>>>>                  sizeof(struct ufs_pa_layer_attr));
>>>>> @@ -1338,6 +1348,7 @@ static int ufshcd_scale_gear(struct ufs_hba *hba, bool scale_up)
>>>>>           }
>>>>>       }
>>>>> +config_pwr_mode:
>>>>>       /* check if the power mode needs to be changed or not? */
>>>>>       ret = ufshcd_config_pwr_mode(hba, &new_pwr_info);
>>>>>       if (ret)
>>>>> @@ -1408,15 +1419,19 @@ static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, int err, bool sc
>>>>>   static int ufshcd_devfreq_scale(struct ufs_hba *hba, unsigned long freq,
>>>>>                   bool scale_up)
>>>>>   {
>>>>> +    u32 old_gear = hba->pwr_info.gear_rx;
>>>>> +    u32 new_gear = 0;
>>>>>       int ret = 0;
>>>>> +    new_gear = ufshcd_vops_freq_to_gear_speed(hba, freq);
>>>>> +
>>>>>       ret = ufshcd_clock_scaling_prepare(hba, 1 * USEC_PER_SEC);
>>>>>       if (ret)
>>>>>           return ret;
>>>>>       /* scale down the gear before scaling down clocks */
>>>>>       if (!scale_up) {
>>>>> -        ret = ufshcd_scale_gear(hba, false);
>>>>> +        ret = ufshcd_scale_gear(hba, new_gear, false);
>>>>>           if (ret)
>>>>>               goto out_unprepare;
>>>>>       }
>>>>> @@ -1424,13 +1439,13 @@ static int ufshcd_devfreq_scale(struct ufs_hba *hba, unsigned long freq,
>>>>>       ret = ufshcd_scale_clks(hba, freq, scale_up);
>>>>>       if (ret) {
>>>>>           if (!scale_up)
>>>>> -            ufshcd_scale_gear(hba, true);
>>>>> +            ufshcd_scale_gear(hba, old_gear, true);
>>>>>           goto out_unprepare;
>>>>>       }
>>>>>       /* scale up the gear after scaling up clocks */
>>>>>       if (scale_up) {
>>>>> -        ret = ufshcd_scale_gear(hba, true);
>>>>> +        ret = ufshcd_scale_gear(hba, new_gear, true);
>>>>>           if (ret) {
>>>>>               ufshcd_scale_clks(hba, hba->devfreq->previous_freq,
>>>>>                         false);
>>>>> @@ -1723,6 +1738,8 @@ static ssize_t ufshcd_clkscale_enable_store(struct device *dev,
>>>>>           struct device_attribute *attr, const char *buf, size_t count)
>>>>>   {
>>>>>       struct ufs_hba *hba = dev_get_drvdata(dev);
>>>>> +    struct ufs_clk_info *clki;
>>>>> +    unsigned long freq;
>>>>>       u32 value;
>>>>>       int err = 0;
>>>>> @@ -1746,14 +1763,21 @@ static ssize_t ufshcd_clkscale_enable_store(struct device *dev,
>>>>>       if (value) {
>>>>>           ufshcd_resume_clkscaling(hba);
>>>>> -    } else {
>>>>> -        ufshcd_suspend_clkscaling(hba);
>>>>> -        err = ufshcd_devfreq_scale(hba, ULONG_MAX, true);
>>>>> -        if (err)
>>>>> -            dev_err(hba->dev, "%s: failed to scale clocks up %d\n",
>>>>> -                    __func__, err);
>>>>> +        goto out_rel;
>>>>>       }
>>>>> +    clki = list_first_entry(&hba->clk_list_head, struct ufs_clk_info, list);
>>>>> +    freq = clki->max_freq;
>>>>> +
>>>>> +    ufshcd_suspend_clkscaling(hba);
>>>>> +    err = ufshcd_devfreq_scale(hba, freq, true);
>>>>> +    if (err)
>>>>> +        dev_err(hba->dev, "%s: failed to scale clocks up %d\n",
>>>>> +                __func__, err);
>>>>> +    else
>>>>> +        hba->clk_scaling.target_freq = freq;
>>>>> +
>>>>> +out_rel:
>>>>>       ufshcd_release(hba);
>>>>>       ufshcd_rpm_put_sync(hba);
>>>>>   out:
>>>>
>>>> This change causes UFS crash on the RB3gen2, after UFS successfully probe and scan:
>>>>
>>>> [    9.383728] ufshcd-qcom 1d84000.ufshc: pwr ctrl cmd 0x2 with mode 0x11 completion timeout
>>>> [    9.393272] ufshcd-qcom 1d84000.ufshc: UFS Host state=1
>>>> [    9.403126] msm_dpu ae01000.display-controller: [drm:adreno_request_fw [msm]] *ERROR* failed to load a660_sqe.fw
>>>> [    9.408484] ufshcd-qcom 1d84000.ufshc: 0 outstanding reqs, tasks=0x0
>>>> [    9.425577] ufshcd-qcom 1d84000.ufshc: saved_err=0x0, saved_uic_err=0x0
>>>> [    9.432433] ufshcd-qcom 1d84000.ufshc: Device power mode=1, UIC link state=1
>>>> [    9.439716] ufshcd-qcom 1d84000.ufshc: PM in progress=0, sys. suspended=0
>>>> [    9.446742] ufshcd-qcom 1d84000.ufshc: Auto BKOPS=0, Host self- block=0
>>>> [    9.453468] ufshcd-qcom 1d84000.ufshc: Clk gate=1
>>>> ...
>>>> [   10.529259] ufshcd-qcom 1d84000.ufshc: ufshcd_change_power_mode: power mode change failed -110
>>>> [   10.538102] ufshcd-qcom 1d84000.ufshc: ufshcd_scale_gear: failed err -110, old gear: (tx 1 rx 1), new gear: (tx 4 rx 4)
>>>> [   10.542841] WARNING: CPU: 0 PID: 274 at drivers/ufs/core/ ufshcd.c:5504 ufshcd_sl_intr+0x64c/0x6a4
>>>> ...
>>>>
>>>> CI Run sample: https://git.codelinaro.org/linaro/qcomlt/ci/staging/ cdba- tester/-/jobs/233352#L1479
>>>>
>>>> Bisect run log:
>>>> # bad: [0af2f6be1b4281385b618cb86ad946eded089ac8] Linux 6.15-rc1
>>>> # good: [38fec10eb60d687e30c8c6b5420d86e8149f7557] Linux 6.14
>>>> git bisect start 'v6.15-rc1' 'v6.14'
>>>> # bad: [fd71def6d9abc5ae362fb9995d46049b7b0ed391] Merge tag 'for-6.15- tag' of git://git.kernel.org/pub/scm/linux/kernel/git/ kdave/linux
>>>> git bisect bad fd71def6d9abc5ae362fb9995d46049b7b0ed391
>>>> # good: [fb1ceb29b27cda91af35851ebab01f298d82162e] Merge tag 'platform- drivers-x86-v6.15-1' of git://git.kernel.org/pub/scm/ linux/kernel/git/ pdx86/platform-drivers-x86
>>>> git bisect good fb1ceb29b27cda91af35851ebab01f298d82162e
>>>> # good: [1952e19c02ae8ea0c663d30b19be14344b543068] Merge tag 'wireless- next-2025-03-20' of https://git.kernel.org/pub/scm/linux/ kernel/git/ wireless/wireless-next
>>>> git bisect good 1952e19c02ae8ea0c663d30b19be14344b543068
>>>> # bad: [1a9239bb4253f9076b5b4b2a1a4e8d7defd77a95] Merge tag 'net- next-6.15' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/ net-next
>>>> git bisect bad 1a9239bb4253f9076b5b4b2a1a4e8d7defd77a95
>>>> # good: [22093997ac9220d3c606313efbf4ce564962d095] Merge tag 'ata-6.15- rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/ libata/linux
>>>> git bisect good 22093997ac9220d3c606313efbf4ce564962d095
>>>> # bad: [336b4dae6dfecc9aa53a3a68c71b9c1c1d466388] Merge tag 'iommu- updates-v6.15' of git://git.kernel.org/pub/scm/linux/kernel/git/ iommu/linux
>>>> git bisect bad 336b4dae6dfecc9aa53a3a68c71b9c1c1d466388
>>>> # bad: [0711f1966a523d77d4c5f00776a7bd073d56251a] scsi: mpt3sas: Fix buffer overflow in mpt3sas_send_mctp_passthru_req()
>>>> git bisect bad 0711f1966a523d77d4c5f00776a7bd073d56251a
>>>> # good: [369552fd03f296261023872b8fc983d1fc55c8e9] Merge patch series "mpt3sas driver udpates"
>>>> git bisect good 369552fd03f296261023872b8fc983d1fc55c8e9
>>>> # bad: [42273e893157501ae119ea5459f3a7d2420c56d6] Merge patch series "scsi: scsi_debug: Add more tape support"
>>>> git bisect bad 42273e893157501ae119ea5459f3a7d2420c56d6
>>>> # bad: [7e72900272b61c11f2fd4020d4f186124d0d171b] Merge patch series "Support Multi-frequency scale for UFS"
>>>> git bisect bad 7e72900272b61c11f2fd4020d4f186124d0d171b
>>>> # good: [c02fe9e222d16bed8c270608c42f77bc62562ac3] scsi: ufs: qcom: Implement the freq_to_gear_speed() vop
>>>> git bisect good c02fe9e222d16bed8c270608c42f77bc62562ac3
>>>> # bad: [eff26ad4c34fc78303c14be749e10ca61c4d211f] scsi: ufs: core: Check if scaling up is required when disable clkscale
>>>> git bisect bad eff26ad4c34fc78303c14be749e10ca61c4d211f
>>>> # bad: [129b44c27c8a51cb74b2f68fde57f2a2e7f5696b] scsi: ufs: core: Enable multi-level gear scaling
>>>> git bisect bad 129b44c27c8a51cb74b2f68fde57f2a2e7f5696b
>>>> # first bad commit: [129b44c27c8a51cb74b2f68fde57f2a2e7f5696b] scsi: ufs: core: Enable multi-level gear scaling
>>>>
>>>> #regzbot introduced 129b44c27c8a51cb74b2f68fde57f2a2e7f5696b
>>>>
>>>> Thanks,
>>>> Neil
>>>>
>>>
>>
diff mbox series

Patch

diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 8d295cc827cc..9908c0d6a1e1 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -1308,16 +1308,26 @@  static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
 /**
  * ufshcd_scale_gear - scale up/down UFS gear
  * @hba: per adapter instance
+ * @target_gear: target gear to scale to
  * @scale_up: True for scaling up gear and false for scaling down
  *
  * Return: 0 for success; -EBUSY if scaling can't happen at this time;
  * non-zero for any other errors.
  */
-static int ufshcd_scale_gear(struct ufs_hba *hba, bool scale_up)
+static int ufshcd_scale_gear(struct ufs_hba *hba, u32 target_gear, bool scale_up)
 {
 	int ret = 0;
 	struct ufs_pa_layer_attr new_pwr_info;
 
+	if (target_gear) {
+		new_pwr_info = hba->pwr_info;
+		new_pwr_info.gear_tx = target_gear;
+		new_pwr_info.gear_rx = target_gear;
+
+		goto config_pwr_mode;
+	}
+
+	/* Legacy gear scaling, in case vops_freq_to_gear_speed() is not implemented */
 	if (scale_up) {
 		memcpy(&new_pwr_info, &hba->clk_scaling.saved_pwr_info,
 		       sizeof(struct ufs_pa_layer_attr));
@@ -1338,6 +1348,7 @@  static int ufshcd_scale_gear(struct ufs_hba *hba, bool scale_up)
 		}
 	}
 
+config_pwr_mode:
 	/* check if the power mode needs to be changed or not? */
 	ret = ufshcd_config_pwr_mode(hba, &new_pwr_info);
 	if (ret)
@@ -1408,15 +1419,19 @@  static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, int err, bool sc
 static int ufshcd_devfreq_scale(struct ufs_hba *hba, unsigned long freq,
 				bool scale_up)
 {
+	u32 old_gear = hba->pwr_info.gear_rx;
+	u32 new_gear = 0;
 	int ret = 0;
 
+	new_gear = ufshcd_vops_freq_to_gear_speed(hba, freq);
+
 	ret = ufshcd_clock_scaling_prepare(hba, 1 * USEC_PER_SEC);
 	if (ret)
 		return ret;
 
 	/* scale down the gear before scaling down clocks */
 	if (!scale_up) {
-		ret = ufshcd_scale_gear(hba, false);
+		ret = ufshcd_scale_gear(hba, new_gear, false);
 		if (ret)
 			goto out_unprepare;
 	}
@@ -1424,13 +1439,13 @@  static int ufshcd_devfreq_scale(struct ufs_hba *hba, unsigned long freq,
 	ret = ufshcd_scale_clks(hba, freq, scale_up);
 	if (ret) {
 		if (!scale_up)
-			ufshcd_scale_gear(hba, true);
+			ufshcd_scale_gear(hba, old_gear, true);
 		goto out_unprepare;
 	}
 
 	/* scale up the gear after scaling up clocks */
 	if (scale_up) {
-		ret = ufshcd_scale_gear(hba, true);
+		ret = ufshcd_scale_gear(hba, new_gear, true);
 		if (ret) {
 			ufshcd_scale_clks(hba, hba->devfreq->previous_freq,
 					  false);
@@ -1723,6 +1738,8 @@  static ssize_t ufshcd_clkscale_enable_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct ufs_hba *hba = dev_get_drvdata(dev);
+	struct ufs_clk_info *clki;
+	unsigned long freq;
 	u32 value;
 	int err = 0;
 
@@ -1746,14 +1763,21 @@  static ssize_t ufshcd_clkscale_enable_store(struct device *dev,
 
 	if (value) {
 		ufshcd_resume_clkscaling(hba);
-	} else {
-		ufshcd_suspend_clkscaling(hba);
-		err = ufshcd_devfreq_scale(hba, ULONG_MAX, true);
-		if (err)
-			dev_err(hba->dev, "%s: failed to scale clocks up %d\n",
-					__func__, err);
+		goto out_rel;
 	}
 
+	clki = list_first_entry(&hba->clk_list_head, struct ufs_clk_info, list);
+	freq = clki->max_freq;
+
+	ufshcd_suspend_clkscaling(hba);
+	err = ufshcd_devfreq_scale(hba, freq, true);
+	if (err)
+		dev_err(hba->dev, "%s: failed to scale clocks up %d\n",
+				__func__, err);
+	else
+		hba->clk_scaling.target_freq = freq;
+
+out_rel:
 	ufshcd_release(hba);
 	ufshcd_rpm_put_sync(hba);
 out: