diff mbox series

[v4,16/17] scsi: ufs: Optimize the command queueing code

Message ID 20211203231950.193369-17-bvanassche@acm.org
State New
Headers show
Series UFS patches for kernel v5.17 | expand

Commit Message

Bart Van Assche Dec. 3, 2021, 11:19 p.m. UTC
Remove the clock scaling lock from ufshcd_queuecommand() since it is a
performance bottleneck. Instead check the SCSI device budget bitmaps in
the code that waits for ongoing ufshcd_queuecommand() calls. A bit is
set in sdev->budget_map just before scsi_queue_rq() is called and a bit
is cleared from that bitmap if scsi_queue_rq() does not submit the
request or after the request has finished. See also the
blk_mq_{get,put}_dispatch_budget() calls in the block layer.

There is no risk for a livelock since the block layer delays queue
reruns if queueing a request fails because the SCSI host has been
blocked.

Cc: Asutosh Das (asd) <asutoshd@codeaurora.org>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 drivers/scsi/ufs/ufshcd.c | 33 +++++++++++++++++++++++----------
 drivers/scsi/ufs/ufshcd.h |  1 +
 2 files changed, 24 insertions(+), 10 deletions(-)

Comments

Asutosh Das (asd) Dec. 6, 2021, 10:41 p.m. UTC | #1
On 12/3/2021 3:19 PM, Bart Van Assche wrote:
> Remove the clock scaling lock from ufshcd_queuecommand() since it is a
> performance bottleneck. Instead check the SCSI device budget bitmaps in
> the code that waits for ongoing ufshcd_queuecommand() calls. A bit is
> set in sdev->budget_map just before scsi_queue_rq() is called and a bit
> is cleared from that bitmap if scsi_queue_rq() does not submit the
> request or after the request has finished. See also the
> blk_mq_{get,put}_dispatch_budget() calls in the block layer.
> 
> There is no risk for a livelock since the block layer delays queue
> reruns if queueing a request fails because the SCSI host has been
> blocked.
> 
> Cc: Asutosh Das (asd) <asutoshd@codeaurora.org>
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---

Reviewed-by: Asutosh Das <asutoshd@codeaurora.org>

>   drivers/scsi/ufs/ufshcd.c | 33 +++++++++++++++++++++++----------
>   drivers/scsi/ufs/ufshcd.h |  1 +
>   2 files changed, 24 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index 9f0a1f637030..650dddf960c2 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -1070,13 +1070,31 @@ static bool ufshcd_is_devfreq_scaling_required(struct ufs_hba *hba,
>   	return false;
>   }
>   
> +/*
> + * Determine the number of pending commands by counting the bits in the SCSI
> + * device budget maps. This approach has been selected because a bit is set in
> + * the budget map before scsi_host_queue_ready() checks the host_self_blocked
> + * flag. The host_self_blocked flag can be modified by calling
> + * scsi_block_requests() or scsi_unblock_requests().
> + */
> +static u32 ufshcd_pending_cmds(struct ufs_hba *hba)
> +{
> +	struct scsi_device *sdev;
> +	u32 pending = 0;
> +
> +	shost_for_each_device(sdev, hba->host)
> +		pending += sbitmap_weight(&sdev->budget_map);
> +
> +	return pending;
> +}
> +
>   static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
>   					u64 wait_timeout_us)
>   {
>   	unsigned long flags;
>   	int ret = 0;
>   	u32 tm_doorbell;
> -	u32 tr_doorbell;
> +	u32 tr_pending;
>   	bool timeout = false, do_last_check = false;
>   	ktime_t start;
>   
> @@ -1094,8 +1112,8 @@ static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
>   		}
>   
>   		tm_doorbell = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL);
> -		tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
> -		if (!tm_doorbell && !tr_doorbell) {
> +		tr_pending = ufshcd_pending_cmds(hba);
> +		if (!tm_doorbell && !tr_pending) {
>   			timeout = false;
>   			break;
>   		} else if (do_last_check) {
> @@ -1115,12 +1133,12 @@ static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
>   			do_last_check = true;
>   		}
>   		spin_lock_irqsave(hba->host->host_lock, flags);
> -	} while (tm_doorbell || tr_doorbell);
> +	} while (tm_doorbell || tr_pending);
>   
>   	if (timeout) {
>   		dev_err(hba->dev,
>   			"%s: timedout waiting for doorbell to clear (tm=0x%x, tr=0x%x)\n",
> -			__func__, tm_doorbell, tr_doorbell);
> +			__func__, tm_doorbell, tr_pending);
>   		ret = -EBUSY;
>   	}
>   out:
> @@ -2681,9 +2699,6 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
>   
>   	WARN_ONCE(tag < 0, "Invalid tag %d\n", tag);
>   
> -	if (!down_read_trylock(&hba->clk_scaling_lock))
> -		return SCSI_MLQUEUE_HOST_BUSY;
> -
>   	/*
>   	 * Allows the UFS error handler to wait for prior ufshcd_queuecommand()
>   	 * calls.
> @@ -2772,8 +2787,6 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
>   out:
>   	rcu_read_unlock();
>   
> -	up_read(&hba->clk_scaling_lock);
> -
>   	if (ufs_trigger_eh()) {
>   		unsigned long flags;
>   
> diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
> index 8e942762e668..88c20f3608c2 100644
> --- a/drivers/scsi/ufs/ufshcd.h
> +++ b/drivers/scsi/ufs/ufshcd.h
> @@ -778,6 +778,7 @@ struct ufs_hba_monitor {
>    * @clk_list_head: UFS host controller clocks list node head
>    * @pwr_info: holds current power mode
>    * @max_pwr_info: keeps the device max valid pwm
> + * @clk_scaling_lock: used to serialize device commands and clock scaling
>    * @desc_size: descriptor sizes reported by device
>    * @urgent_bkops_lvl: keeps track of urgent bkops level for device
>    * @is_urgent_bkops_lvl_checked: keeps track if the urgent bkops level for
>
Bart Van Assche Dec. 8, 2021, 5:53 p.m. UTC | #2
On 12/8/21 9:28 AM, Asutosh Das (asd) wrote:
> On 12/6/2021 2:41 PM, Asutosh Das (asd) wrote:
>>> +static u32 ufshcd_pending_cmds(struct ufs_hba *hba)
>>> +{
>>> +    struct scsi_device *sdev;
>>> +    u32 pending = 0;
>>> +
>>> +    shost_for_each_device(sdev, hba->host)
>>> +        pending += sbitmap_weight(&sdev->budget_map);
>>> +
> I was porting this change to my downstream code and it occurred to me that in a high IO rate scenario it's possible that bits in the budget_map may be set even when that particular IO may not be issued to driver. So there would unnecessary waiting for that to be cleared.
> Do you think it's possible?
> I think we should wait only for requests which are already started.
> e.g. blk_mq_tagset_busy_iter() ?

Hi Asutosh,

Using blk_mq_tagset_busy_iter() would be racy since the "busy" state is set
after host_self_blocked has been checked.

Checking the budget_map should work fine since a bit in that bitmap is set
just before scsi_queue_rq() is called and since the corresponding bit is
cleared from that bitmap if scsi_queue_rq() fails or if a command is completed.

See also the output of the following command:

git grep -nHE 'blk_mq_(get|put)_dispatch_budget\('

See also the blk_mq_release_budgets() call in blk_mq_dispatch_rq_list().

Thanks,

Bart.
Bjorn Andersson Dec. 14, 2021, 4:04 a.m. UTC | #3
On Fri 03 Dec 15:19 PST 2021, Bart Van Assche wrote:

> Remove the clock scaling lock from ufshcd_queuecommand() since it is a
> performance bottleneck. Instead check the SCSI device budget bitmaps in
> the code that waits for ongoing ufshcd_queuecommand() calls. A bit is
> set in sdev->budget_map just before scsi_queue_rq() is called and a bit
> is cleared from that bitmap if scsi_queue_rq() does not submit the
> request or after the request has finished. See also the
> blk_mq_{get,put}_dispatch_budget() calls in the block layer.
> 
> There is no risk for a livelock since the block layer delays queue
> reruns if queueing a request fails because the SCSI host has been
> blocked.
> 

This patch landed between next-20211203 and today's (20211210)
linux-next/master and prevents all Qualcomm boards I've tested to boot.

Sometimes it locks up right around probe, sometimes I actually get some
partitions, but attempts to then access the storage media (e.g. fdisk
-l) results in one or more of my CPUs to be unresponsive.

> Cc: Asutosh Das (asd) <asutoshd@codeaurora.org>
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>  drivers/scsi/ufs/ufshcd.c | 33 +++++++++++++++++++++++----------
>  drivers/scsi/ufs/ufshcd.h |  1 +
>  2 files changed, 24 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index 9f0a1f637030..650dddf960c2 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -1070,13 +1070,31 @@ static bool ufshcd_is_devfreq_scaling_required(struct ufs_hba *hba,
>  	return false;
>  }
>  
> +/*
> + * Determine the number of pending commands by counting the bits in the SCSI
> + * device budget maps. This approach has been selected because a bit is set in
> + * the budget map before scsi_host_queue_ready() checks the host_self_blocked
> + * flag. The host_self_blocked flag can be modified by calling
> + * scsi_block_requests() or scsi_unblock_requests().
> + */
> +static u32 ufshcd_pending_cmds(struct ufs_hba *hba)
> +{
> +	struct scsi_device *sdev;
> +	u32 pending = 0;
> +
> +	shost_for_each_device(sdev, hba->host)

As far as I can tell, this will crab walk across hba->host->__devices,
while grabbing:

        spin_lock_irqsave(shost->host_lock, flags);

which afaict is:

        spin_lock_irqsave(hba->host->host_lock, flags);



> +		pending += sbitmap_weight(&sdev->budget_map);
> +
> +	return pending;
> +}
> +
>  static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
>  					u64 wait_timeout_us)
>  {
>  	unsigned long flags;
>  	int ret = 0;
>  	u32 tm_doorbell;
> -	u32 tr_doorbell;
> +	u32 tr_pending;
>  	bool timeout = false, do_last_check = false;
>  	ktime_t start;
>  
> @@ -1094,8 +1112,8 @@ static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,

But just before entering this loop you do:

        spin_lock_irqsave(hba->host->host_lock, flags);

In other words, you're taking the same spinlock twice, while being in
ufshcd_scsi_block_requests(). To me this seems that if we ever enter
this code path (i.e. try to do clock scaling) we will deadlock one CPU.

Can you please help me understand what I'm missing? Or how you tested
this?

Thanks,
Bjorn

>  		}
>  
>  		tm_doorbell = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL);
> -		tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
> -		if (!tm_doorbell && !tr_doorbell) {
> +		tr_pending = ufshcd_pending_cmds(hba);
> +		if (!tm_doorbell && !tr_pending) {
>  			timeout = false;
>  			break;
>  		} else if (do_last_check) {
> @@ -1115,12 +1133,12 @@ static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
>  			do_last_check = true;
>  		}
>  		spin_lock_irqsave(hba->host->host_lock, flags);
> -	} while (tm_doorbell || tr_doorbell);
> +	} while (tm_doorbell || tr_pending);
>  
>  	if (timeout) {
>  		dev_err(hba->dev,
>  			"%s: timedout waiting for doorbell to clear (tm=0x%x, tr=0x%x)\n",
> -			__func__, tm_doorbell, tr_doorbell);
> +			__func__, tm_doorbell, tr_pending);
>  		ret = -EBUSY;
>  	}
>  out:
> @@ -2681,9 +2699,6 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
>  
>  	WARN_ONCE(tag < 0, "Invalid tag %d\n", tag);
>  
> -	if (!down_read_trylock(&hba->clk_scaling_lock))
> -		return SCSI_MLQUEUE_HOST_BUSY;
> -
>  	/*
>  	 * Allows the UFS error handler to wait for prior ufshcd_queuecommand()
>  	 * calls.
> @@ -2772,8 +2787,6 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
>  out:
>  	rcu_read_unlock();
>  
> -	up_read(&hba->clk_scaling_lock);
> -
>  	if (ufs_trigger_eh()) {
>  		unsigned long flags;
>  
> diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
> index 8e942762e668..88c20f3608c2 100644
> --- a/drivers/scsi/ufs/ufshcd.h
> +++ b/drivers/scsi/ufs/ufshcd.h
> @@ -778,6 +778,7 @@ struct ufs_hba_monitor {
>   * @clk_list_head: UFS host controller clocks list node head
>   * @pwr_info: holds current power mode
>   * @max_pwr_info: keeps the device max valid pwm
> + * @clk_scaling_lock: used to serialize device commands and clock scaling
>   * @desc_size: descriptor sizes reported by device
>   * @urgent_bkops_lvl: keeps track of urgent bkops level for device
>   * @is_urgent_bkops_lvl_checked: keeps track if the urgent bkops level for
>
Bart Van Assche Dec. 14, 2021, 4:57 a.m. UTC | #4
On 12/13/21 20:04, Bjorn Andersson wrote:
> Can you please help me understand what I'm missing? Or how you tested
> this?

Hi Bjorn,

Unfortunately I don't have access to a test setup with a Qualcomm 
chipset. Please help verifying whether this patch is sufficient as a fix 
(see also 
https://lore.kernel.org/linux-scsi/101fa5ba-6d74-6c51-aaa2-e6c6d98f6bc7@acm.org/):

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 6d692aae67ce..244eddf0caf8 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -1084,7 +1084,9 @@ static u32 ufshcd_pending_cmds(struct ufs_hba *hba)
   	struct scsi_device *sdev;
   	u32 pending = 0;

-	shost_for_each_device(sdev, hba->host)
+	lockdep_assert_held(hba->host->host_lock);
+
+	__shost_for_each_device(sdev, hba->host)
   		pending += sbitmap_weight(&sdev->budget_map);

   	return pending;

Thanks,

Bart.
Bjorn Andersson Dec. 15, 2021, 3:52 a.m. UTC | #5
On Mon 13 Dec 22:57 CST 2021, Bart Van Assche wrote:

> On 12/13/21 20:04, Bjorn Andersson wrote:
> > Can you please help me understand what I'm missing? Or how you tested
> > this?
> 
> Hi Bjorn,
> 
> Unfortunately I don't have access to a test setup with a Qualcomm chipset.
> Please help verifying whether this patch is sufficient as a fix (see also https://lore.kernel.org/linux-scsi/101fa5ba-6d74-6c51-aaa2-e6c6d98f6bc7@acm.org/):

Thanks for the proposed fix!

> 
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index 6d692aae67ce..244eddf0caf8 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -1084,7 +1084,9 @@ static u32 ufshcd_pending_cmds(struct ufs_hba *hba)
>   	struct scsi_device *sdev;
>   	u32 pending = 0;
> 
> -	shost_for_each_device(sdev, hba->host)
> +	lockdep_assert_held(hba->host->host_lock);
> +
> +	__shost_for_each_device(sdev, hba->host)

I can confirm that this resolves the issue I saw and allow me to boot my
boards again. Can you please spin this in a patch?

Feel free to add my:

Tested-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>

Thanks,
Bjorn

>   		pending += sbitmap_weight(&sdev->budget_map);
> 
>   	return pending;
> 
> Thanks,
> 
> Bart.
Bart Van Assche Dec. 15, 2021, 6:44 p.m. UTC | #6
On 12/14/21 7:52 PM, Bjorn Andersson wrote:
> I can confirm that this resolves the issue I saw and allow me to boot my
> boards again. Can you please spin this in a patch?

Thanks for having tested this patch. Since Bean already posted this patch in the
proper form, please take a look at
https://lore.kernel.org/linux-scsi/20211214120537.531628-1-huobean@gmail.com/

Thanks,

Bart.
diff mbox series

Patch

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 9f0a1f637030..650dddf960c2 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -1070,13 +1070,31 @@  static bool ufshcd_is_devfreq_scaling_required(struct ufs_hba *hba,
 	return false;
 }
 
+/*
+ * Determine the number of pending commands by counting the bits in the SCSI
+ * device budget maps. This approach has been selected because a bit is set in
+ * the budget map before scsi_host_queue_ready() checks the host_self_blocked
+ * flag. The host_self_blocked flag can be modified by calling
+ * scsi_block_requests() or scsi_unblock_requests().
+ */
+static u32 ufshcd_pending_cmds(struct ufs_hba *hba)
+{
+	struct scsi_device *sdev;
+	u32 pending = 0;
+
+	shost_for_each_device(sdev, hba->host)
+		pending += sbitmap_weight(&sdev->budget_map);
+
+	return pending;
+}
+
 static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
 					u64 wait_timeout_us)
 {
 	unsigned long flags;
 	int ret = 0;
 	u32 tm_doorbell;
-	u32 tr_doorbell;
+	u32 tr_pending;
 	bool timeout = false, do_last_check = false;
 	ktime_t start;
 
@@ -1094,8 +1112,8 @@  static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
 		}
 
 		tm_doorbell = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL);
-		tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
-		if (!tm_doorbell && !tr_doorbell) {
+		tr_pending = ufshcd_pending_cmds(hba);
+		if (!tm_doorbell && !tr_pending) {
 			timeout = false;
 			break;
 		} else if (do_last_check) {
@@ -1115,12 +1133,12 @@  static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
 			do_last_check = true;
 		}
 		spin_lock_irqsave(hba->host->host_lock, flags);
-	} while (tm_doorbell || tr_doorbell);
+	} while (tm_doorbell || tr_pending);
 
 	if (timeout) {
 		dev_err(hba->dev,
 			"%s: timedout waiting for doorbell to clear (tm=0x%x, tr=0x%x)\n",
-			__func__, tm_doorbell, tr_doorbell);
+			__func__, tm_doorbell, tr_pending);
 		ret = -EBUSY;
 	}
 out:
@@ -2681,9 +2699,6 @@  static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 
 	WARN_ONCE(tag < 0, "Invalid tag %d\n", tag);
 
-	if (!down_read_trylock(&hba->clk_scaling_lock))
-		return SCSI_MLQUEUE_HOST_BUSY;
-
 	/*
 	 * Allows the UFS error handler to wait for prior ufshcd_queuecommand()
 	 * calls.
@@ -2772,8 +2787,6 @@  static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 out:
 	rcu_read_unlock();
 
-	up_read(&hba->clk_scaling_lock);
-
 	if (ufs_trigger_eh()) {
 		unsigned long flags;
 
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index 8e942762e668..88c20f3608c2 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -778,6 +778,7 @@  struct ufs_hba_monitor {
  * @clk_list_head: UFS host controller clocks list node head
  * @pwr_info: holds current power mode
  * @max_pwr_info: keeps the device max valid pwm
+ * @clk_scaling_lock: used to serialize device commands and clock scaling
  * @desc_size: descriptor sizes reported by device
  * @urgent_bkops_lvl: keeps track of urgent bkops level for device
  * @is_urgent_bkops_lvl_checked: keeps track if the urgent bkops level for