diff mbox series

[v2,2/2] ufs: core: requeue MCQ abort request

Message ID 20240902021805.1125-3-peter.wang@mediatek.com
State Superseded
Headers show
Series fix MCQ abort defect | expand

Commit Message

Peter Wang (王信友) Sept. 2, 2024, 2:18 a.m. UTC
From: Peter Wang <peter.wang@mediatek.com>

MCQ aborts a command using two methods below:
1. Nullified UTRD, Host controller skips this transfer request,
   reply Completion Queue entry to Host SW with OCS=ABORTED
2. SQ cleanup, The host controller will post to the Completion Queue
   to update the OCS field with ABORTED.

For these two cases, set a flag to notify SCSI to requeue the
command after receiving OCS_ABORTED.

Fixes: ab248643d3d6 ("scsi: ufs: core: Add error handling for MCQ mode")
Cc: stable@vger.kernel.org
Signed-off-by: Peter Wang <peter.wang@mediatek.com>
---
 drivers/ufs/core/ufs-mcq.c |  1 +
 drivers/ufs/core/ufshcd.c  | 21 ++++++++-------------
 include/ufs/ufshcd.h       |  2 ++
 3 files changed, 11 insertions(+), 13 deletions(-)

Comments

Bart Van Assche Sept. 5, 2024, 9:16 p.m. UTC | #1
On 9/1/24 7:18 PM, peter.wang@mediatek.com wrote:
> diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c
> index afd9541f4bd8..abdc55a8b960 100644
> --- a/drivers/ufs/core/ufs-mcq.c
> +++ b/drivers/ufs/core/ufs-mcq.c
> @@ -642,6 +642,7 @@ static bool ufshcd_mcq_sqe_search(struct ufs_hba *hba,
>   		match = le64_to_cpu(utrd->command_desc_base_addr) & CQE_UCD_BA;
>   		if (addr == match) {
>   			ufshcd_mcq_nullify_sqe(utrd);
> +			lrbp->host_initiate_abort = true;
>   			ret = true;
>   			goto out;
>   		}

I think this is wrong. The above code is only executed if the SCSI core
decides to abort a SCSI command. It is up to the SCSI core to decide
whether or not to retry an aborted command.

> -	/* Release cmd in MCQ mode if abort succeeds */
> -	if (hba->mcq_enabled && (*ret == 0)) {
> -		hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(lrbp->cmd));
> -		if (!hwq)
> -			return 0;
> -		spin_lock_irqsave(&hwq->cq_lock, flags);
> -		if (ufshcd_cmd_inflight(lrbp->cmd))
> -			ufshcd_release_scsi_cmd(hba, lrbp);
> -		spin_unlock_irqrestore(&hwq->cq_lock, flags);
> -	}
> +	/* Host will post to CQ with OCS_ABORTED after SQ cleanup */
> +	if (hba->mcq_enabled && (*ret == 0))
> +		lrbp->host_initiate_abort = true;

I think this code is racy because the UFS host controller may have 
posted a completion before the "lrbp->host_initiate_abort = true"
assignment is executed.

> + * @host_initiate_abort: Abort flag initiated by host

What is "Abort flag"? Please consider renaming "host_initiate_abort"
into "abort_initiated_by_err_handler" since I think that aborted
commands should only be retried if these have been aborted by
ufshcd_err_handler().

Thanks,

Bart.
Peter Wang (王信友) Sept. 9, 2024, 3:40 a.m. UTC | #2
On Thu, 2024-09-05 at 14:16 -0700, Bart Van Assche wrote:
>  	 
> External email : Please do not click links or open attachments until
> you have verified the sender or the content.
>  On 9/1/24 7:18 PM, peter.wang@mediatek.com wrote:
> > diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-
> mcq.c
> > index afd9541f4bd8..abdc55a8b960 100644
> > --- a/drivers/ufs/core/ufs-mcq.c
> > +++ b/drivers/ufs/core/ufs-mcq.c
> > @@ -642,6 +642,7 @@ static bool ufshcd_mcq_sqe_search(struct
> ufs_hba *hba,
> >   match = le64_to_cpu(utrd->command_desc_base_addr) & CQE_UCD_BA;
> >   if (addr == match) {
> >   ufshcd_mcq_nullify_sqe(utrd);
> > +lrbp->host_initiate_abort = true;
> >   ret = true;
> >   goto out;
> >   }
> 
> I think this is wrong. The above code is only executed if the SCSI
> core
> decides to abort a SCSI command. It is up to the SCSI core to decide
> whether or not to retry an aborted command.
> 

Hi Bart,

This is eh_abort_handler call flow for scsi err handler.
If abort is trigger because error, should't we do retry?
Anyway, I think this case could not happen because if scsi
timeout happen (30s), host hw should not keep cmd in SQ such 
a long time. But once it happen, ufshcd_mcq_sqe_search return 
true and scsi got eh_abort_handler fail. So, I think in this 
case, notify scsi retry this command is properly.


> > -/* Release cmd in MCQ mode if abort succeeds */
> > -if (hba->mcq_enabled && (*ret == 0)) {
> > -hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(lrbp->cmd));
> > -if (!hwq)
> > -return 0;
> > -spin_lock_irqsave(&hwq->cq_lock, flags);
> > -if (ufshcd_cmd_inflight(lrbp->cmd))
> > -ufshcd_release_scsi_cmd(hba, lrbp);
> > -spin_unlock_irqrestore(&hwq->cq_lock, flags);
> > -}
> > +/* Host will post to CQ with OCS_ABORTED after SQ cleanup */
> > +if (hba->mcq_enabled && (*ret == 0))
> > +lrbp->host_initiate_abort = true;
> 
> I think this code is racy because the UFS host controller may have 
> posted a completion before the "lrbp->host_initiate_abort = true"
> assignment is executed.
> 

Yes, I should add this code before ufshcd_clear_cmd, thanks.

> > + * @host_initiate_abort: Abort flag initiated by host
> 
> What is "Abort flag"? Please consider renaming "host_initiate_abort"
> into "abort_initiated_by_err_handler" since I think that aborted
> commands should only be retried if these have been aborted by
> ufshcd_err_handler().
> 

Okay, but abort_initiated_by_err maybe better because aborted by
ufshcd_err_handler or scsi err handler could happen. 
What do you think?


> Thanks,
> 
> Bart.
diff mbox series

Patch

diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c
index afd9541f4bd8..abdc55a8b960 100644
--- a/drivers/ufs/core/ufs-mcq.c
+++ b/drivers/ufs/core/ufs-mcq.c
@@ -642,6 +642,7 @@  static bool ufshcd_mcq_sqe_search(struct ufs_hba *hba,
 		match = le64_to_cpu(utrd->command_desc_base_addr) & CQE_UCD_BA;
 		if (addr == match) {
 			ufshcd_mcq_nullify_sqe(utrd);
+			lrbp->host_initiate_abort = true;
 			ret = true;
 			goto out;
 		}
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index a6f818cdef0e..fadea691d69d 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -3006,6 +3006,7 @@  static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 	ufshcd_prepare_lrbp_crypto(scsi_cmd_to_rq(cmd), lrbp);
 
 	lrbp->req_abort_skip = false;
+	lrbp->host_initiate_abort = false;
 
 	ufshcd_comp_scsi_upiu(hba, lrbp);
 
@@ -5404,7 +5405,10 @@  ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp,
 		}
 		break;
 	case OCS_ABORTED:
-		result |= DID_ABORT << 16;
+		if (lrbp->host_initiate_abort)
+			result |= DID_REQUEUE << 16;
+		else
+			result |= DID_ABORT << 16;
 		break;
 	case OCS_INVALID_COMMAND_STATUS:
 		result |= DID_REQUEUE << 16;
@@ -6472,24 +6476,15 @@  static bool ufshcd_abort_one(struct request *rq, void *priv)
 	struct Scsi_Host *shost = sdev->host;
 	struct ufs_hba *hba = shost_priv(shost);
 	struct ufshcd_lrb *lrbp = &hba->lrb[tag];
-	struct ufs_hw_queue *hwq;
-	unsigned long flags;
 
 	*ret = ufshcd_try_to_abort_task(hba, tag);
 	dev_err(hba->dev, "Aborting tag %d / CDB %#02x %s\n", tag,
 		hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1,
 		*ret ? "failed" : "succeeded");
 
-	/* Release cmd in MCQ mode if abort succeeds */
-	if (hba->mcq_enabled && (*ret == 0)) {
-		hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(lrbp->cmd));
-		if (!hwq)
-			return 0;
-		spin_lock_irqsave(&hwq->cq_lock, flags);
-		if (ufshcd_cmd_inflight(lrbp->cmd))
-			ufshcd_release_scsi_cmd(hba, lrbp);
-		spin_unlock_irqrestore(&hwq->cq_lock, flags);
-	}
+	/* Host will post to CQ with OCS_ABORTED after SQ cleanup */
+	if (hba->mcq_enabled && (*ret == 0))
+		lrbp->host_initiate_abort = true;
 
 	return *ret == 0;
 }
diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index 0fd2aebac728..49dd5ca8a4e7 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -173,6 +173,7 @@  struct ufs_pm_lvl_states {
  * @crypto_key_slot: the key slot to use for inline crypto (-1 if none)
  * @data_unit_num: the data unit number for the first block for inline crypto
  * @req_abort_skip: skip request abort task flag
+ * @host_initiate_abort: Abort flag initiated by host
  */
 struct ufshcd_lrb {
 	struct utp_transfer_req_desc *utr_descriptor_ptr;
@@ -202,6 +203,7 @@  struct ufshcd_lrb {
 #endif
 
 	bool req_abort_skip;
+	bool host_initiate_abort;
 };
 
 /**