diff mbox series

[13/16] fnic: allocate device reset command on the fly

Message ID 20231023091507.120828-14-hare@suse.de
State New
Headers show
Series scsi: EH rework prep patches, part 2 | expand

Commit Message

Hannes Reinecke Oct. 23, 2023, 9:15 a.m. UTC
Allocate a reset command on the fly instead of relying
on using the command which triggered the device failure.
This might fail if all available tags are busy, but in
that case it'll be safer to fall back to host reset anyway.

Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 drivers/scsi/fnic/fnic.h      |   1 -
 drivers/scsi/fnic/fnic_scsi.c | 113 ++++++++++++++++------------------
 drivers/scsi/snic/snic_scsi.c |   5 +-
 3 files changed, 55 insertions(+), 64 deletions(-)

Comments

Christoph Hellwig Oct. 24, 2023, 6:54 a.m. UTC | #1
Adding the fnic maintainers as they are probably most qualified to
review and test this.

On Mon, Oct 23, 2023 at 11:15:04AM +0200, Hannes Reinecke wrote:
> Allocate a reset command on the fly instead of relying
> on using the command which triggered the device failure.
> This might fail if all available tags are busy, but in
> that case it'll be safer to fall back to host reset anyway.
> 
> Signed-off-by: Hannes Reinecke <hare@suse.de>
> ---
>  drivers/scsi/fnic/fnic.h      |   1 -
>  drivers/scsi/fnic/fnic_scsi.c | 113 ++++++++++++++++------------------
>  drivers/scsi/snic/snic_scsi.c |   5 +-
>  3 files changed, 55 insertions(+), 64 deletions(-)
> 
> diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h
> index 93c68931a593..8ffcafb4687f 100644
> --- a/drivers/scsi/fnic/fnic.h
> +++ b/drivers/scsi/fnic/fnic.h
> @@ -236,7 +236,6 @@ struct fnic {
>  	unsigned int wq_count;
>  	unsigned int cq_count;
>  
> -	struct mutex sgreset_mutex;
>  	struct dentry *fnic_stats_debugfs_host;
>  	struct dentry *fnic_stats_debugfs_file;
>  	struct dentry *fnic_reset_debugfs_file;
> diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
> index 9761b2c9db48..7a8b6285a096 100644
> --- a/drivers/scsi/fnic/fnic_scsi.c
> +++ b/drivers/scsi/fnic/fnic_scsi.c
> @@ -1984,7 +1984,6 @@ static inline int fnic_queue_dr_io_req(struct fnic *fnic,
>  
>  struct fnic_pending_aborts_iter_data {
>  	struct fnic *fnic;
> -	struct scsi_cmnd *lr_sc;
>  	struct scsi_device *lun_dev;
>  	int ret;
>  };
> @@ -2002,7 +2001,7 @@ static bool fnic_pending_aborts_iter(struct scsi_cmnd *sc, void *data)
>  	DECLARE_COMPLETION_ONSTACK(tm_done);
>  	enum fnic_ioreq_state old_ioreq_state;
>  
> -	if (sc == iter_data->lr_sc || sc->device != lun_dev)
> +	if (sc->device != lun_dev)
>  		return true;
>  
>  	io_lock = fnic_io_lock_tag(fnic, abt_tag);
> @@ -2105,17 +2104,11 @@ static bool fnic_pending_aborts_iter(struct scsi_cmnd *sc, void *data)
>  		return false;
>  	}
>  	fnic_priv(sc)->state = FNIC_IOREQ_ABTS_COMPLETE;
> -
> -	/* original sc used for lr is handled by dev reset code */
> -	if (sc != iter_data->lr_sc)
> -		fnic_priv(sc)->io_req = NULL;
> +	fnic_priv(sc)->io_req = NULL;
>  	spin_unlock_irqrestore(io_lock, flags);
>  
> -	/* original sc used for lr is handled by dev reset code */
> -	if (sc != iter_data->lr_sc) {
> -		fnic_release_ioreq_buf(fnic, io_req, sc);
> -		mempool_free(io_req, fnic->io_req_pool);
> -	}
> +	fnic_release_ioreq_buf(fnic, io_req, sc);
> +	mempool_free(io_req, fnic->io_req_pool);
>  
>  	/*
>  	 * Any IO is returned during reset, it needs to call scsi_done
> @@ -2135,8 +2128,7 @@ static bool fnic_pending_aborts_iter(struct scsi_cmnd *sc, void *data)
>   * successfully aborted, 1 otherwise
>   */
>  static int fnic_clean_pending_aborts(struct fnic *fnic,
> -				     struct scsi_cmnd *lr_sc,
> -				     bool new_sc)
> +				     struct scsi_cmnd *lr_sc)
>  
>  {
>  	int ret = 0;
> @@ -2146,9 +2138,6 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
>  		.ret = SUCCESS,
>  	};
>  
> -	if (new_sc)
> -		iter_data.lr_sc = lr_sc;
> -
>  	scsi_host_busy_iter(fnic->lport->host,
>  			    fnic_pending_aborts_iter, &iter_data);
>  	if (iter_data.ret == FAILED) {
> @@ -2174,7 +2163,8 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
>   */
>  int fnic_device_reset(struct scsi_cmnd *sc)
>  {
> -	struct request *rq = scsi_cmd_to_rq(sc);
> +	struct scsi_device *sdev = sc->device;
> +	struct request *req;
>  	struct fc_lport *lp;
>  	struct fnic *fnic;
>  	struct fnic_io_req *io_req = NULL;
> @@ -2187,15 +2177,17 @@ int fnic_device_reset(struct scsi_cmnd *sc)
>  	struct scsi_lun fc_lun;
>  	struct fnic_stats *fnic_stats;
>  	struct reset_stats *reset_stats;
> -	int tag = rq->tag;
> +	int tag;
>  	DECLARE_COMPLETION_ONSTACK(tm_done);
> -	bool new_sc = 0;
>  
>  	/* Wait for rport to unblock */
> -	fc_block_scsi_eh(sc);
> +	rport = starget_to_rport(scsi_target(sdev));
> +	ret = fc_block_rport(rport);
> +	if (ret)
> +		return ret;
>  
>  	/* Get local-port, check ready and link up */
> -	lp = shost_priv(sc->device->host);
> +	lp = shost_priv(sdev->host);
>  
>  	fnic = lport_priv(lp);
>  	fnic_stats = &fnic->fnic_stats;
> @@ -2203,53 +2195,55 @@ int fnic_device_reset(struct scsi_cmnd *sc)
>  
>  	atomic64_inc(&reset_stats->device_resets);
>  
> -	rport = starget_to_rport(scsi_target(sc->device));
>  	FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
> -		      "Device reset called FCID 0x%x, LUN 0x%llx sc 0x%p\n",
> -		      rport->port_id, sc->device->lun, sc);
> +		      "Device reset called FCID 0x%x, LUN 0x%llx\n",
> +		      rport->port_id, sdev->lun);
>  
>  	if (lp->state != LPORT_ST_READY || !(lp->link_up))
> -		goto fnic_device_reset_end;
> +		return ret;
>  
>  	/* Check if remote port up */
>  	if (fc_remote_port_chkready(rport)) {
>  		atomic64_inc(&fnic_stats->misc_stats.rport_not_ready);
> -		goto fnic_device_reset_end;
> +		return ret;
>  	}
>  
> -	fnic_priv(sc)->flags = FNIC_DEVICE_RESET;
> -
> -	if (unlikely(tag < 0)) {
> +	req = scsi_alloc_request(sdev->request_queue, REQ_OP_DRV_IN,
> +				 BLK_MQ_REQ_NOWAIT);
> +	if (!req) {
>  		/*
> -		 * For device reset issued through sg3utils, we let
> -		 * only one LUN_RESET to go through and use a special
> -		 * tag equal to max_tag_id so that we don't have to allocate
> -		 * or free it. It won't interact with tags
> -		 * allocated by mid layer.
> +		 * Request allocation might fail, indicating that
> +		 * all tags are busy.
> +		 * But device reset will be called only from within
> +		 * SCSI EH, at which time all I/O is stopped. So the
> +		 * only active tags would be for failed I/O, but
> +		 * when all I/O is failed it'll be better to escalate
> +		 * to host reset anyway.
>  		 */
> -		mutex_lock(&fnic->sgreset_mutex);
> -		tag = fnic->fnic_max_tag_id;
> -		new_sc = 1;
> +		FNIC_SCSI_DBG(KERN_ERR, fnic->lport->host,
> +			      "Device reset allocation failed, all tags busy\n");
> +		return ret;
>  	}
> +	sc = blk_mq_rq_to_pdu(req);
> +
> +	tag = req->tag;
>  	io_lock = fnic_io_lock_hash(fnic, sc);
>  	spin_lock_irqsave(io_lock, flags);
>  	io_req = fnic_priv(sc)->io_req;
> +	if (io_req)
> +		goto fnic_device_reset_end;
>  
> -	/*
> -	 * If there is a io_req attached to this command, then use it,
> -	 * else allocate a new one.
> -	 */
> +	io_req = mempool_alloc(fnic->io_req_pool, GFP_ATOMIC);
>  	if (!io_req) {
> -		io_req = mempool_alloc(fnic->io_req_pool, GFP_ATOMIC);
> -		if (!io_req) {
> -			spin_unlock_irqrestore(io_lock, flags);
> -			goto fnic_device_reset_end;
> -		}
> -		memset(io_req, 0, sizeof(*io_req));
> -		io_req->port_id = rport->port_id;
> -		fnic_priv(sc)->io_req = io_req;
> +		spin_unlock_irqrestore(io_lock, flags);
> +		goto fnic_device_reset_end;
>  	}
> +	memset(io_req, 0, sizeof(*io_req));
> +	io_req->port_id = rport->port_id;
> +	fnic_priv(sc)->io_req = io_req;
> +
>  	io_req->dr_done = &tm_done;
> +	fnic_priv(sc)->flags = FNIC_DEVICE_RESET;
>  	fnic_priv(sc)->state = FNIC_IOREQ_CMD_PENDING;
>  	fnic_priv(sc)->lr_status = FCPIO_INVALID_CODE;
>  	spin_unlock_irqrestore(io_lock, flags);
> @@ -2260,11 +2254,13 @@ int fnic_device_reset(struct scsi_cmnd *sc)
>  	 * issue the device reset, if enqueue failed, clean up the ioreq
>  	 * and break assoc with scsi cmd
>  	 */
> +	WRITE_ONCE(req->state, MQ_RQ_IN_FLIGHT);
>  	if (fnic_queue_dr_io_req(fnic, sc, io_req)) {
>  		spin_lock_irqsave(io_lock, flags);
>  		io_req = fnic_priv(sc)->io_req;
>  		if (io_req)
>  			io_req->dr_done = NULL;
> +		WRITE_ONCE(req->state, MQ_RQ_IDLE);
>  		goto fnic_device_reset_clean;
>  	}
>  	spin_lock_irqsave(io_lock, flags);
> @@ -2279,11 +2275,12 @@ int fnic_device_reset(struct scsi_cmnd *sc)
>  				    msecs_to_jiffies(FNIC_LUN_RESET_TIMEOUT));
>  
>  	spin_lock_irqsave(io_lock, flags);
> +	WRITE_ONCE(req->state, MQ_RQ_IDLE);
>  	io_req = fnic_priv(sc)->io_req;
>  	if (!io_req) {
>  		spin_unlock_irqrestore(io_lock, flags);
>  		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
> -				"io_req is null tag 0x%x sc 0x%p\n", tag, sc);
> +				"io_req is null tag 0x%x\n", tag);
>  		goto fnic_device_reset_end;
>  	}
>  	io_req->dr_done = NULL;
> @@ -2326,7 +2323,7 @@ int fnic_device_reset(struct scsi_cmnd *sc)
>  				spin_unlock_irqrestore(io_lock, flags);
>  				FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
>  				"Abort and terminate issued on Device reset "
> -				"tag 0x%x sc 0x%p\n", tag, sc);
> +				"tag 0x%x\n", tag);
>  				break;
>  			}
>  		}
> @@ -2364,7 +2361,7 @@ int fnic_device_reset(struct scsi_cmnd *sc)
>  	 * the lun reset cmd. If all cmds get cleaned, the lun reset
>  	 * succeeds
>  	 */
> -	if (fnic_clean_pending_aborts(fnic, sc, new_sc)) {
> +	if (fnic_clean_pending_aborts(fnic, sc)) {
>  		spin_lock_irqsave(io_lock, flags);
>  		io_req = fnic_priv(sc)->io_req;
>  		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
> @@ -2393,15 +2390,15 @@ int fnic_device_reset(struct scsi_cmnd *sc)
>  	}
>  
>  fnic_device_reset_end:
> -	FNIC_TRACE(fnic_device_reset, sc->device->host->host_no, rq->tag, sc,
> +	FNIC_TRACE(fnic_device_reset, sc->device->host->host_no,
> +		  scsi_cmd_to_rq(sc)->tag, sc,
>  		  jiffies_to_msecs(jiffies - start_time),
>  		  0, ((u64)sc->cmnd[0] << 32 |
>  		  (u64)sc->cmnd[2] << 24 | (u64)sc->cmnd[3] << 16 |
>  		  (u64)sc->cmnd[4] << 8 | sc->cmnd[5]),
>  		  fnic_flags_and_state(sc));
>  
> -	if (new_sc)
> -		mutex_unlock(&fnic->sgreset_mutex);
> +	blk_mq_free_request(req);
>  
>  	FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
>  		      "Returning from device reset %s\n",
> @@ -2633,8 +2630,6 @@ static bool fnic_abts_pending_iter(struct scsi_cmnd *sc, void *data)
>  	 * ignore this lun reset cmd or cmds that do not belong to
>  	 * this lun
>  	 */
> -	if (iter_data->lr_sc && sc == iter_data->lr_sc)
> -		return true;
>  	if (iter_data->lun_dev && sc->device != iter_data->lun_dev)
>  		return true;
>  
> @@ -2677,10 +2672,8 @@ int fnic_is_abts_pending(struct fnic *fnic, struct scsi_cmnd *lr_sc)
>  		.ret = 0,
>  	};
>  
> -	if (lr_sc) {
> +	if (lr_sc)
>  		iter_data.lun_dev = lr_sc->device;
> -		iter_data.lr_sc = lr_sc;
> -	}
>  
>  	/* walk again to check, if IOs are still pending in fw */
>  	scsi_host_busy_iter(fnic->lport->host,
> diff --git a/drivers/scsi/snic/snic_scsi.c b/drivers/scsi/snic/snic_scsi.c
> index f123531e7dd1..c38f648da3d7 100644
> --- a/drivers/scsi/snic/snic_scsi.c
> +++ b/drivers/scsi/snic/snic_scsi.c
> @@ -2367,7 +2367,7 @@ snic_cmpl_pending_tmreq(struct snic *snic, struct scsi_cmnd *sc)
>  }
>  
>  static bool
> -snic_scsi_cleanup_iter(struct scsi_cmnd *sc, void *data, bool reserved)
> +snic_scsi_cleanup_iter(struct scsi_cmnd *sc, void *data)
>  {
>  	struct snic *snic = data;
>  	struct snic_req_info *rqi = NULL;
> @@ -2541,8 +2541,7 @@ struct snic_tgt_scsi_abort_io_data {
>  	int abt_cnt;
>  };
>  
> -static bool snic_tgt_scsi_abort_io_iter(struct scsi_cmnd *sc, void *data,
> -					bool reserved)
> +static bool snic_tgt_scsi_abort_io_iter(struct scsi_cmnd *sc, void *data)
>  {
>  	struct snic_tgt_scsi_abort_io_data *iter_data = data;
>  	struct snic *snic = iter_data->snic;
> -- 
> 2.35.3
---end quoted text---
Karan Tilak Kumar Nov. 6, 2023, 7:42 p.m. UTC | #2
On Monday, October 23, 2023 11:54 PM, Christoph Hellwig <hch@lst.de> wrote:
>
> Adding the fnic maintainers as they are probably most qualified to review and test this.
>
> On Mon, Oct 23, 2023 at 11:15:04AM +0200, Hannes Reinecke wrote:
> > Allocate a reset command on the fly instead of relying on using the
> > command which triggered the device failure.
> > This might fail if all available tags are busy, but in that case it'll
> > be safer to fall back to host reset anyway.
> >

Thanks for this fix, Hannes.
I'm working on integrating these changes and testing them. 
I'll get back to you about this.

Regards,
Karan
Karan Tilak Kumar Nov. 14, 2023, 2:29 a.m. UTC | #3
On Monday, November 6, 2023 11:42 AM, Karan Tilak Kumar (kartilak) wrote:
>
> On Monday, October 23, 2023 11:54 PM, Christoph Hellwig <hch@lst.de> wrote:
> >
> > Adding the fnic maintainers as they are probably most qualified to review and test this.
> >
> > On Mon, Oct 23, 2023 at 11:15:04AM +0200, Hannes Reinecke wrote:
> > > Allocate a reset command on the fly instead of relying on using the
> > > command which triggered the device failure.
> > > This might fail if all available tags are busy, but in that case
> > > it'll be safer to fall back to host reset anyway.
> > >
>
> Thanks for this fix, Hannes.
> I'm working on integrating these changes and testing them.
> I'll get back to you about this.
>

I integrated your patch set using "b4 shazam" and built all the changes to do some dev testing.
I instrumented the code to do the following:

- After one million IOs, drop one IO response.
- This will trigger an abort. Drop that abort response.
- This will trigger a device reset. I'm seeing that the tag here is 0xFFFFFFFF (SCSI_NO_TAG).

This tag fails the out-of-range tag check and escalates to host reset.
I've been able to repro this three times with the same result.

The expectation with this test case is that the device reset should go through successfully without escalating to host reset.

Regards,
Karan
Hannes Reinecke Nov. 14, 2023, 8:01 a.m. UTC | #4
On 11/14/23 03:29, Karan Tilak Kumar (kartilak) wrote:
> On Monday, November 6, 2023 11:42 AM, Karan Tilak Kumar (kartilak) wrote:
>>
>> On Monday, October 23, 2023 11:54 PM, Christoph Hellwig <hch@lst.de> wrote:
>>>
>>> Adding the fnic maintainers as they are probably most qualified to review and test this.
>>>
>>> On Mon, Oct 23, 2023 at 11:15:04AM +0200, Hannes Reinecke wrote:
>>>> Allocate a reset command on the fly instead of relying on using the
>>>> command which triggered the device failure.
>>>> This might fail if all available tags are busy, but in that case
>>>> it'll be safer to fall back to host reset anyway.
>>>>
>>
>> Thanks for this fix, Hannes.
>> I'm working on integrating these changes and testing them.
>> I'll get back to you about this.
>>
> 
> I integrated your patch set using "b4 shazam" and built all the changes to do some dev testing.
> I instrumented the code to do the following:
> 
> - After one million IOs, drop one IO response.
> - This will trigger an abort. Drop that abort response.
> - This will trigger a device reset. I'm seeing that the tag here is 0xFFFFFFFF (SCSI_NO_TAG).
> 
> This tag fails the out-of-range tag check and escalates to host reset.
> I've been able to repro this three times with the same result.
> 
> The expectation with this test case is that the device reset should go through successfully without escalating to host reset.
> 
Thanks for the report.
Certainly not what was intended with the patch. Can you try with this 
patch on top:

diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
index 8909cf09cf9e..50eea6d2344a 100644
--- a/drivers/scsi/fnic/fnic_scsi.c
+++ b/drivers/scsi/fnic/fnic_scsi.c
@@ -2210,7 +2210,7 @@ int fnic_device_reset(struct scsi_device *sdev)

         req = scsi_alloc_request(sdev->request_queue, REQ_OP_DRV_IN,
                                  BLK_MQ_REQ_NOWAIT);
-       if (!req) {
+       if (IS_ERR(req)) {
                 /*
                  * Request allocation might fail, indicating that
                  * all tags are busy.
@@ -2221,7 +2221,8 @@ int fnic_device_reset(struct scsi_device *sdev)
                  * to host reset anyway.
                  */
                 FNIC_SCSI_DBG(KERN_ERR, fnic->lport->host,
-                             "Device reset allocation failed, all tags 
busy\n");
+                             "Device reset allocation failed with %d\n",
+                             PTR_ERR(req));
                 return ret;
         }

(The first hunk is actually a bugfix, and will be included with
the next submission anyway.)
Please enable fnic debug during testing and send me the logs.
It _might_ be that this is actually by design, as we will be
failing if all tags are busy (ie if the error is EWOULDBLOCK).
But if we get EAGAIN it means that the queue is blocked and
we'll need to investigate.

Thanks for your help here.

Cheers,

Hannes
Karan Tilak Kumar Nov. 16, 2023, 12:22 a.m. UTC | #5
On Tuesday, November 14, 2023 12:02 AM, Hannes Reinecke <hare@suse.de> wrote:
> > I integrated your patch set using "b4 shazam" and built all the changes to do some dev testing.
> > I instrumented the code to do the following:
> >
> > - After one million IOs, drop one IO response.
> > - This will trigger an abort. Drop that abort response.
> > - This will trigger a device reset. I'm seeing that the tag here is 0xFFFFFFFF (SCSI_NO_TAG).
> >
> > This tag fails the out-of-range tag check and escalates to host reset.
> > I've been able to repro this three times with the same result.
> >
> > The expectation with this test case is that the device reset should go through successfully without escalating to host reset.
> >
> Thanks for the report.
> Certainly not what was intended with the patch. Can you try with this patch on top:

I applied this patch on top of the previous one and performed the unit tests that I had mentioned before.

>
> (The first hunk is actually a bugfix, and will be included with the next submission anyway.) Please enable fnic debug during testing and send me the logs.
> It _might_ be that this is actually by design, as we will be failing if all tags are busy (ie if the error is EWOULDBLOCK).
> But if we get EAGAIN it means that the queue is blocked and we'll need to investigate.

There were two attempts at reproduction. One of them resulted in a crash. I have enabled fnic debug and captured the log. 
I've pasted the excerpt here for reference.

Repro 1:

Nov 15 16:08:14 localhost kernel: fnic<7>: UT: fnic_fcpio_icmnd_cmpl_handler: 847: tag: 0x91 sc: 00000000bbe9fd78 CDB Opcode: 0x28 Dropping icmnd completion
Nov 15 16:08:35 localhost multipathd[1746]: burst continued 30025 ms, too long time, stopped
Nov 15 16:08:36 localhost kernel: sched: RT throttling activated
Nov 15 16:08:44 localhost kernel: scsi host7: Abort Cmd called FCID 0x5205fa, LUN 0x1 TAG 91 flags 3
Nov 15 16:08:44 localhost kernel: scsi host7: CBD Opcode: 28 Abort issued time: 30036 msec
Nov 15 16:08:44 localhost kernel: scsi host7: fnic<7>: UT: fnic_fcpio_itmf_cmpl_handler: 1113: tag: 0x91 sc: 00 status: FCPIO_IO_NOT_FOUND Dropping abort completion
Nov 15 16:09:05 localhost multipathd[1746]: burst continued 30041 ms, too long time, stopped
Nov 15 16:09:06 localhost kernel: scsi host7: Returning from abort cmd type 2 FAILED
Nov 15 16:09:06 localhost kernel: scsi host7: Device reset called FCID 0x5205fa, LUN 0x1 sc: 00000000bbe9fd78
Nov 15 16:09:06 localhost kernel: scsi host7: TAG ffffffff sc: 000000005ee1ff69
Nov 15 16:09:06 localhost kernel: scsi host7: Tag out of range tag ffffffff hdr status = FCPIO_SUCCESS
Nov 15 16:09:17 localhost kernel: scsi host7: Device reset timed out
Nov 15 16:09:17 localhost kernel: scsi host7: Abort and terminate issued on Device reset tag 0xffffffff
Nov 15 16:09:17 localhost kernel: scsi host7: Tag out of range tag ffffffff hdr status = FCPIO_IO_NOT_FOUND
Nov 15 16:09:27 localhost kernel: scsi host7: Device reset completed - failed
Nov 15 16:09:27 localhost kernel: scsi host7: Returning from device reset FAILED
Nov 15 16:09:27 localhost kernel: scsi host7: fnic_reset called
Nov 15 16:09:27 localhost kernel: scsi host7: fnic_rport_exch_reset called portid 0xfffffc
Nov 15 16:09:27 localhost kernel: scsi host7: fnic_rport_exch_reset called portid 0x52060c
Nov 15 16:09:27 localhost kernel: scsi host7: fnic_rport_exch_reset called portid 0x5205fb
Nov 15 16:09:27 localhost kernel: scsi host7: fnic_rport_exch_reset called portid 0x5205fa
Nov 15 16:09:27 localhost kernel: scsi host7: update_mac 00:25:b5:bb:bb:a0
Nov 15 16:09:27 localhost kernel: scsi host7: Issued fw reset
Nov 15 16:09:27 localhost kernel: scsi host7: set port_id 0 fp 0000000000000000
Nov 15 16:09:27 localhost kernel: scsi host7: Returning from fnic reset SUCCESS
Nov 15 16:09:27 localhost kernel: scsi host7: fnic_rport_exch_reset called portid 0xfffffc
Nov 15 16:09:27 localhost kernel: scsi host7: fnic_rport_exch_reset called portid 0x52060c
Nov 15 16:09:27 localhost kernel: scsi host7: fnic_rport_exch_reset called portid 0x5205fb
Nov 15 16:09:27 localhost kernel: scsi host7: fnic_cleanup_io: tag:0x91 : sc:0x00000000bbe9fd78 duration = 72835 DID_TRANSPORT_DISRUPTED
Nov 15 16:09:27 localhost kernel: scsi host7: Calling done for IO not issued to fw: tag:0x91 sc:0x00000000bbe9fd78
Nov 15 16:09:27 localhost kernel: scsi host7: reset cmpl success
Nov 15 16:09:27 localhost kernel: scsi host7: fnic_rport_exch_reset called portid 0x5205fa
Nov 15 16:09:29 localhost kernel: host7: Assigned Port ID 0d0840
Nov 15 16:09:29 localhost kernel: scsi host7: set port_id d0840 fp 0000000046a33a9d
Nov 15 16:09:29 localhost kernel: scsi host7: update_mac 0e:fc:00:0d:08:40
Nov 15 16:09:29 localhost kernel: scsi host7: FLOGI reg issued fcid d0840 map 0 dest 8c:60:4f:95:ea:a4
Nov 15 16:09:29 localhost kernel: scsi host7: flog reg succeeded
Nov 15 16:09:38 localhost multipathd[1746]: 3600a098038303873633f4d415156374d: sdb - tur checker timed out
Nov 15 16:09:38 localhost multipathd[1746]: checker failed path 8:16 in map 3600a098038303873633f4d415156374d
Nov 15 16:09:38 localhost multipathd[1746]: 3600a098038303873633f4d415156374d: remaining active paths: 1
Nov 15 16:09:38 localhost multipathd[1746]: 3600a098038303873633f4d415156374e: sde - tur checker timed out
Nov 15 16:09:38 localhost multipathd[1746]: checker failed path 8:64 in map 3600a098038303873633f4d415156374e
Nov 15 16:09:38 localhost multipathd[1746]: 3600a098038303873633f4d415156374e: remaining active paths: 1
Nov 15 16:09:38 localhost multipathd[1746]: sdb: mark as failed
Nov 15 16:09:38 localhost multipathd[1746]: sde: mark as failed
Nov 15 16:09:38 localhost kernel: device-mapper: multipath: 253:3: Failing path 8:16.
Nov 15 16:09:38 localhost kernel: device-mapper: multipath: 253:4: Failing path 8:64.
Nov 15 16:09:40 localhost kernel: sd 7:0:2:1: Power-on or device reset occurred
Nov 15 16:09:40 localhost kernel: sd 7:0:1:1: Power-on or device reset occurred
Nov 15 16:09:40 localhost kernel: sd 7:0:1:0: Power-on or device reset occurred
Nov 15 16:09:40 localhost kernel: sd 7:0:2:0: Power-on or device reset occurred
Nov 15 16:09:43 localhost multipathd[1746]: 3600a098038303873633f4d415156374d: sdb - tur checker reports path is up
Nov 15 16:09:43 localhost multipathd[1746]: 8:16: reinstated
Nov 15 16:09:43 localhost multipathd[1746]: 3600a098038303873633f4d415156374d: remaining active paths: 2
Nov 15 16:09:43 localhost kernel: device-mapper: multipath: 253:3: Reinstating path 8:16.
Nov 15 16:09:43 localhost kernel: device-mapper: multipath: 253:4: Reinstating path 8:64.
Nov 15 16:09:43 localhost multipathd[1746]: 3600a098038303873633f4d415156374e: sde - tur checker reports path is up
Nov 15 16:09:43 localhost multipathd[1746]: 8:64: reinstated
Nov 15 16:09:43 localhost multipathd[1746]: 3600a098038303873633f4d415156374e: remaining active paths: 2

=============
Repro 2:

[  184.517949] fnic<7>: UT: fnic_fcpio_icmnd_cmpl_handler: 847: tag: 0xeb sc: 000000000d852f23 CDB Opcode: 0x28 Dropping icmnd completion
[  214.523460] scsi host7: Abort Cmd called FCID 0x5205fb, LUN 0x1 TAG eb flags 3
[  214.523474] scsi host7: CBD Opcode: 28 Abort issued time: 30026 msec
[  214.523548] scsi host7: fnic<7>: UT: fnic_fcpio_itmf_cmpl_handler: 1113: tag: 0xeb sc: 00 status: FCPIO_IO_NOT_FOUND Dropping abort completion
[  236.835184] scsi host7: Returning from abort cmd type 2 FAILED
[  236.943669] scsi host7: Device reset called FCID 0x5205fb, LUN 0x1 sc: 000000000d852f23
[  236.943688] scsi host7: TAG ffffffff sc: 00000000ea33dc31
[  236.956270] scsi host7: Tag out of range tag ffffffff hdr status = FCPIO_SUCCESS
[  247.074054] scsi host7: Device reset timed out
[  247.074063] scsi host7: Abort and terminate issued on Device reset tag 0xffffffff
[  247.074070] scsi host7: Tag out of range tag ffffffff hdr status = FCPIO_IO_NOT_FOUND
[  257.312999] scsi host7: Device reset completed - failed
[  257.313008] BUG: kernel NULL pointer dereference, address: 000000000000001c
[  257.313016] #PF: supervisor read access in kernel mode
[  257.313021] #PF: error_code(0x0000) - not-present page
[  257.313028] PGD 80000010c746c067 P4D 80000010c746c067 PUD 10c746d067 PMD 0
[  257.313041] Oops: 0000 [#1] PREEMPT SMP PTI
[  257.313049] CPU: 0 PID: 1114 Comm: scsi_eh_7 Kdump: loaded Not tainted 6.4.7 #5
[  257.313058] Hardware name: Cisco Systems Inc UCSB-B200-M5/UCSB-B200-M5, BIOS B200M5.4.0.2a.0.1102181538 11/02/2018
[  257.313063] RIP: 0010:dma_direct_unmap_sg+0x58/0x1d0
[  257.313080] Code: 89 f5 89 d3 4d 89 c5 45 31 ff eb 1e 83 e0 fe 89 45 1c 48 89 ef 41 83 c7 01 e8 b4 b9 44 00 48 89 c5 44 39 fb 0f 84 39 01 00 00 <8b> 45 1c a8 01 75 db 4c 8b 05 f2 1b 68 01 49 8b bc 24 58 02 00 00
[  257.313087] RSP: 0018:ffff9cfa49157cc0 EFLAGS: 00010246
[  257.313094] RAX: 0000000000000000 RBX: 0000000000000003 RCX: 0000000000000002
[  257.313099] RDX: 0000000000000003 RSI: 0000000000000000 RDI: ffff8fab45f470d0
[  257.313103] RBP: 0000000000000000 R08: 0000000000000000 R09: 00000000ffff7fff
[  257.313107] R10: ffff9cfa49157b18 R11: ffffffff969e5d28 R12: ffff8fab45f470d0
[  257.313112] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
[  257.313116] FS:  0000000000000000(0000) GS:ffff8faaff800000(0000) knlGS:0000000000000000
[  257.313122] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  257.313128] CR2: 000000000000001c CR3: 00000010c874c002 CR4: 00000000007706f0
[  257.313133] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  257.313137] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  257.313141] PKRU: 55555554
[  257.313145] Call Trace:
[  257.313150]  <TASK>
[  257.313158]  ? __die+0x24/0x70
[  257.313171]  ? page_fault_oops+0x82/0x150
[  257.313181]  ? irq_work_queue+0x32/0x60
[  257.313190]  ? vprintk_emit+0x100/0x230
[  257.313200]  ? exc_page_fault+0x69/0x150
[  257.313211]  ? asm_exc_page_fault+0x26/0x30
[  257.313228]  ? dma_direct_unmap_sg+0x58/0x1d0
[  257.313244]  fnic_release_ioreq_buf+0x23/0xc0 [fnic]
[  257.313274]  fnic_device_reset+0x57c/0x860 [fnic]
[  257.313297]  ? sched_clock_cpu+0xf/0x190
[  257.313308]  ? raw_spin_rq_lock_nested+0x1d/0x80
[  257.313318]  ? newidle_balance+0x26e/0x400
[  257.313329]  scsi_eh_bus_device_reset+0xee/0x2e0
[  257.313343]  scsi_eh_ready_devs+0x6e/0x2f0
[  257.313366]  ? rpm_resume+0x411/0x750
[  257.313372]  scsi_unjam_host+0x10a/0x200
[  257.313377]  scsi_error_handler+0x144/0x1d0
[  257.313380]  ? __pfx_scsi_error_handler+0x10/0x10
[  257.313383]  kthread+0xe3/0x120
[  257.313386]  ? __pfx_kthread+0x10/0x10
[  257.313388]  ret_from_fork+0x29/0x50
[  257.313394]  </TASK>
[  257.313395] Modules linked in: dm_round_robin snd_seq_dummy snd_hrtimer snd_seq snd_timer snd_seq_device snd soundcore nft_compat nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables rfkill nfnetlink qrtr dm_multipath intel_rapl_msr intel_rapl_common intel_uncore_frequency intel_uncore_frequency_common isst_if_common skx_edac nfit libnvdimm x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm irqbypass rapl intel_cstate joydev mei_me intel_uncore ioatdma mei intel_pch_thermal ipmi_ssif pcspkr dca lpc_ich acpi_ipmi ipmi_si acpi_pad acpi_power_meter xfs libcrc32c mgag200 sd_mod t10_pi drm_kms_helper crc64_rocksoft_generic crc64_rocksoft crc64 sg fnic syscopyarea sysfillrect sysimgblt i2c_algo_bit libfcoe drm_shmem_helper crct10dif_pclmul ahci crc32_pclmul crc32c_intel libahci libfc ghash_clmulni_intel drm libata enic scsi_transport_fc sha512_ssse3 megaraid_sas wmi dm_mirror
[  257.313470]  dm_region_hash dm_log dm_mod ipmi_devintf ipmi_msghandler fuse
[  257.313478] CR2: 000000000000001c


Regards,
Karan
Karan Tilak Kumar Nov. 17, 2023, 11:31 p.m. UTC | #6
On Wednesday, November 15, 2023 4:22 PM, Karan Tilak Kumar (kartilak) wrote:
>
> On Tuesday, November 14, 2023 12:02 AM, Hannes Reinecke <hare@suse.de> wrote:
> > > I integrated your patch set using "b4 shazam" and built all the changes to do some dev testing.
> > > I instrumented the code to do the following:
> > >
> > > - After one million IOs, drop one IO response.
> > > - This will trigger an abort. Drop that abort response.
> > > - This will trigger a device reset. I'm seeing that the tag here is 0xFFFFFFFF (SCSI_NO_TAG).
> > >
> > > This tag fails the out-of-range tag check and escalates to host reset.
> > > I've been able to repro this three times with the same result.
> > >
> > > The expectation with this test case is that the device reset should go through successfully without escalating to host reset.
> > >
> > Thanks for the report.
> > Certainly not what was intended with the patch. Can you try with this patch on top:
>
> I applied this patch on top of the previous one and performed the unit tests that I had mentioned before.
>
> >
> > (The first hunk is actually a bugfix, and will be included with the next submission anyway.) Please enable fnic debug during testing and send me the logs.
> > It _might_ be that this is actually by design, as we will be failing if all tags are busy (ie if the error is EWOULDBLOCK).
> > But if we get EAGAIN it means that the queue is blocked and we'll need to investigate.
>
> There were two attempts at reproduction. One of them resulted in a crash. I have enabled fnic debug and captured the log.
> I've pasted the excerpt here for reference.
>
> Repro 1:
>
> Nov 15 16:08:14 localhost kernel: fnic<7>: UT: fnic_fcpio_icmnd_cmpl_handler: 847: tag: 0x91 sc: 00000000bbe9fd78 CDB Opcode: 0x28 Dropping icmnd completion
> Nov 15 16:08:35 localhost multipathd[1746]: burst continued 30025 ms, too long time, stopped
> Nov 15 16:08:36 localhost kernel: sched: RT throttling activated
> Nov 15 16:08:44 localhost kernel: scsi host7: Abort Cmd called FCID 0x5205fa, LUN 0x1 TAG 91 flags 3
> Nov 15 16:08:44 localhost kernel: scsi host7: CBD Opcode: 28 Abort issued time: 30036 msec
> Nov 15 16:08:44 localhost kernel: scsi host7: fnic<7>: UT: fnic_fcpio_itmf_cmpl_handler: 1113: tag: 0x91 sc: 00 status: FCPIO_IO_NOT_FOUND Dropping abort completion
> Nov 15 16:09:05 localhost multipathd[1746]: burst continued 30041 ms, too long time, stopped
> Nov 15 16:09:06 localhost kernel: scsi host7: Returning from abort cmd type 2 FAILED
> Nov 15 16:09:06 localhost kernel: scsi host7: Device reset called FCID 0x5205fa, LUN 0x1 sc: 00000000bbe9fd78
> Nov 15 16:09:06 localhost kernel: scsi host7: TAG ffffffff sc: 000000005ee1ff69
> Nov 15 16:09:06 localhost kernel: scsi host7: Tag out of range tag ffffffff hdr status = FCPIO_SUCCESS
> Nov 15 16:09:17 localhost kernel: scsi host7: Device reset timed out
> Nov 15 16:09:17 localhost kernel: scsi host7: Abort and terminate issued on Device reset tag 0xffffffff
> Nov 15 16:09:17 localhost kernel: scsi host7: Tag out of range tag ffffffff hdr status = FCPIO_IO_NOT_FOUND
> Nov 15 16:09:27 localhost kernel: scsi host7: Device reset completed - failed
> Nov 15 16:09:27 localhost kernel: scsi host7: Returning from device reset FAILED
> Nov 15 16:09:27 localhost kernel: scsi host7: fnic_reset called
> Nov 15 16:09:27 localhost kernel: scsi host7: fnic_rport_exch_reset called portid 0xfffffc
> Nov 15 16:09:27 localhost kernel: scsi host7: fnic_rport_exch_reset called portid 0x52060c
> Nov 15 16:09:27 localhost kernel: scsi host7: fnic_rport_exch_reset called portid 0x5205fb
> Nov 15 16:09:27 localhost kernel: scsi host7: fnic_rport_exch_reset called portid 0x5205fa
> Nov 15 16:09:27 localhost kernel: scsi host7: update_mac 00:25:b5:bb:bb:a0
> Nov 15 16:09:27 localhost kernel: scsi host7: Issued fw reset
> Nov 15 16:09:27 localhost kernel: scsi host7: set port_id 0 fp 0000000000000000
> Nov 15 16:09:27 localhost kernel: scsi host7: Returning from fnic reset SUCCESS
> Nov 15 16:09:27 localhost kernel: scsi host7: fnic_rport_exch_reset called portid 0xfffffc
> Nov 15 16:09:27 localhost kernel: scsi host7: fnic_rport_exch_reset called portid 0x52060c
> Nov 15 16:09:27 localhost kernel: scsi host7: fnic_rport_exch_reset called portid 0x5205fb
> Nov 15 16:09:27 localhost kernel: scsi host7: fnic_cleanup_io: tag:0x91 : sc:0x00000000bbe9fd78 duration = 72835 DID_TRANSPORT_DISRUPTED
> Nov 15 16:09:27 localhost kernel: scsi host7: Calling done for IO not issued to fw: tag:0x91 sc:0x00000000bbe9fd78
> Nov 15 16:09:27 localhost kernel: scsi host7: reset cmpl success
> Nov 15 16:09:27 localhost kernel: scsi host7: fnic_rport_exch_reset called portid 0x5205fa
> Nov 15 16:09:29 localhost kernel: host7: Assigned Port ID 0d0840
> Nov 15 16:09:29 localhost kernel: scsi host7: set port_id d0840 fp 0000000046a33a9d
> Nov 15 16:09:29 localhost kernel: scsi host7: update_mac 0e:fc:00:0d:08:40
> Nov 15 16:09:29 localhost kernel: scsi host7: FLOGI reg issued fcid d0840 map 0 dest 8c:60:4f:95:ea:a4
> Nov 15 16:09:29 localhost kernel: scsi host7: flog reg succeeded
> Nov 15 16:09:38 localhost multipathd[1746]: 3600a098038303873633f4d415156374d: sdb - tur checker timed out
> Nov 15 16:09:38 localhost multipathd[1746]: checker failed path 8:16 in map 3600a098038303873633f4d415156374d
> Nov 15 16:09:38 localhost multipathd[1746]: 3600a098038303873633f4d415156374d: remaining active paths: 1
> Nov 15 16:09:38 localhost multipathd[1746]: 3600a098038303873633f4d415156374e: sde - tur checker timed out
> Nov 15 16:09:38 localhost multipathd[1746]: checker failed path 8:64 in map 3600a098038303873633f4d415156374e
> Nov 15 16:09:38 localhost multipathd[1746]: 3600a098038303873633f4d415156374e: remaining active paths: 1
> Nov 15 16:09:38 localhost multipathd[1746]: sdb: mark as failed
> Nov 15 16:09:38 localhost multipathd[1746]: sde: mark as failed
> Nov 15 16:09:38 localhost kernel: device-mapper: multipath: 253:3: Failing path 8:16.
> Nov 15 16:09:38 localhost kernel: device-mapper: multipath: 253:4: Failing path 8:64.
> Nov 15 16:09:40 localhost kernel: sd 7:0:2:1: Power-on or device reset occurred
> Nov 15 16:09:40 localhost kernel: sd 7:0:1:1: Power-on or device reset occurred
> Nov 15 16:09:40 localhost kernel: sd 7:0:1:0: Power-on or device reset occurred
> Nov 15 16:09:40 localhost kernel: sd 7:0:2:0: Power-on or device reset occurred
> Nov 15 16:09:43 localhost multipathd[1746]: 3600a098038303873633f4d415156374d: sdb - tur checker reports path is up
> Nov 15 16:09:43 localhost multipathd[1746]: 8:16: reinstated
> Nov 15 16:09:43 localhost multipathd[1746]: 3600a098038303873633f4d415156374d: remaining active paths: 2
> Nov 15 16:09:43 localhost kernel: device-mapper: multipath: 253:3: Reinstating path 8:16.
> Nov 15 16:09:43 localhost kernel: device-mapper: multipath: 253:4: Reinstating path 8:64.
> Nov 15 16:09:43 localhost multipathd[1746]: 3600a098038303873633f4d415156374e: sde - tur checker reports path is up
> Nov 15 16:09:43 localhost multipathd[1746]: 8:64: reinstated
> Nov 15 16:09:43 localhost multipathd[1746]: 3600a098038303873633f4d415156374e: remaining active paths: 2
>
> =============
> Repro 2:
>
> [  184.517949] fnic<7>: UT: fnic_fcpio_icmnd_cmpl_handler: 847: tag: 0xeb sc: 000000000d852f23 CDB Opcode: 0x28 Dropping icmnd completion
> [  214.523460] scsi host7: Abort Cmd called FCID 0x5205fb, LUN 0x1 TAG eb flags 3
> [  214.523474] scsi host7: CBD Opcode: 28 Abort issued time: 30026 msec
> [  214.523548] scsi host7: fnic<7>: UT: fnic_fcpio_itmf_cmpl_handler: 1113: tag: 0xeb sc: 00 status: FCPIO_IO_NOT_FOUND Dropping abort completion
> [  236.835184] scsi host7: Returning from abort cmd type 2 FAILED
> [  236.943669] scsi host7: Device reset called FCID 0x5205fb, LUN 0x1 sc: 000000000d852f23
> [  236.943688] scsi host7: TAG ffffffff sc: 00000000ea33dc31
> [  236.956270] scsi host7: Tag out of range tag ffffffff hdr status = FCPIO_SUCCESS
> [  247.074054] scsi host7: Device reset timed out
> [  247.074063] scsi host7: Abort and terminate issued on Device reset tag 0xffffffff
> [  247.074070] scsi host7: Tag out of range tag ffffffff hdr status = FCPIO_IO_NOT_FOUND
> [  257.312999] scsi host7: Device reset completed - failed
> [  257.313008] BUG: kernel NULL pointer dereference, address: 000000000000001c
> [  257.313016] #PF: supervisor read access in kernel mode
> [  257.313021] #PF: error_code(0x0000) - not-present page
> [  257.313028] PGD 80000010c746c067 P4D 80000010c746c067 PUD 10c746d067 PMD 0
> [  257.313041] Oops: 0000 [#1] PREEMPT SMP PTI
> [  257.313049] CPU: 0 PID: 1114 Comm: scsi_eh_7 Kdump: loaded Not tainted 6.4.7 #5
> [  257.313058] Hardware name: Cisco Systems Inc UCSB-B200-M5/UCSB-B200-M5, BIOS B200M5.4.0.2a.0.1102181538 11/02/2018
> [  257.313063] RIP: 0010:dma_direct_unmap_sg+0x58/0x1d0
> [  257.313080] Code: 89 f5 89 d3 4d 89 c5 45 31 ff eb 1e 83 e0 fe 89 45 1c 48 89 ef 41 83 c7 01 e8 b4 b9 44 00 48 89 c5 44 39 fb 0f 84 39 01 00 00 <8b> 45 1c a8 01 75 db 4c 8b 05 f2 1b 68 01 49 8b bc 24 58 02 00 00
> [  257.313087] RSP: 0018:ffff9cfa49157cc0 EFLAGS: 00010246
> [  257.313094] RAX: 0000000000000000 RBX: 0000000000000003 RCX: 0000000000000002
> [  257.313099] RDX: 0000000000000003 RSI: 0000000000000000 RDI: ffff8fab45f470d0
> [  257.313103] RBP: 0000000000000000 R08: 0000000000000000 R09: 00000000ffff7fff
> [  257.313107] R10: ffff9cfa49157b18 R11: ffffffff969e5d28 R12: ffff8fab45f470d0
> [  257.313112] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
> [  257.313116] FS:  0000000000000000(0000) GS:ffff8faaff800000(0000) knlGS:0000000000000000
> [  257.313122] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [  257.313128] CR2: 000000000000001c CR3: 00000010c874c002 CR4: 00000000007706f0
> [  257.313133] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> [  257.313137] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> [  257.313141] PKRU: 55555554
> [  257.313145] Call Trace:
> [  257.313150]  <TASK>
> [  257.313158]  ? __die+0x24/0x70
> [  257.313171]  ? page_fault_oops+0x82/0x150
> [  257.313181]  ? irq_work_queue+0x32/0x60
> [  257.313190]  ? vprintk_emit+0x100/0x230
> [  257.313200]  ? exc_page_fault+0x69/0x150
> [  257.313211]  ? asm_exc_page_fault+0x26/0x30
> [  257.313228]  ? dma_direct_unmap_sg+0x58/0x1d0
> [  257.313244]  fnic_release_ioreq_buf+0x23/0xc0 [fnic]
> [  257.313274]  fnic_device_reset+0x57c/0x860 [fnic]
> [  257.313297]  ? sched_clock_cpu+0xf/0x190
> [  257.313308]  ? raw_spin_rq_lock_nested+0x1d/0x80
> [  257.313318]  ? newidle_balance+0x26e/0x400
> [  257.313329]  scsi_eh_bus_device_reset+0xee/0x2e0
> [  257.313343]  scsi_eh_ready_devs+0x6e/0x2f0
> [  257.313366]  ? rpm_resume+0x411/0x750
> [  257.313372]  scsi_unjam_host+0x10a/0x200
> [  257.313377]  scsi_error_handler+0x144/0x1d0
> [  257.313380]  ? __pfx_scsi_error_handler+0x10/0x10
> [  257.313383]  kthread+0xe3/0x120
> [  257.313386]  ? __pfx_kthread+0x10/0x10
> [  257.313388]  ret_from_fork+0x29/0x50
> [  257.313394]  </TASK>
> [  257.313395] Modules linked in: dm_round_robin snd_seq_dummy snd_hrtimer snd_seq snd_timer snd_seq_device snd soundcore nft_compat nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables rfkill nfnetlink qrtr dm_multipath intel_rapl_msr intel_rapl_common intel_uncore_frequency intel_uncore_frequency_common isst_if_common skx_edac nfit libnvdimm
> x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm irqbypass rapl intel_cstate joydev mei_me intel_uncore ioatdma mei intel_pch_thermal ipmi_ssif pcspkr dca lpc_ich acpi_ipmi ipmi_si acpi_pad acpi_power_meter xfs libcrc32c mgag200 sd_mod t10_pi drm_kms_helper crc64_rocksoft_generic crc64_rocksoft crc64 sg fnic syscopyarea sysfillrect sysimgblt i2c_algo_bit libfcoe drm_shmem_helper crct10dif_pclmul ahci crc32_pclmul crc32c_intel libahci libfc ghash_clmulni_intel drm libata
> enic scsi_transport_fc sha512_ssse3 megaraid_sas wmi dm_mirror
> [  257.313470]  dm_region_hash dm_log dm_mod ipmi_devintf ipmi_msghandler fuse
> [  257.313478] CR2: 000000000000001c
>
>
> Regards,
> Karan
>

Repro 3: 

Nov 17 15:27:00 rhel-c4s5 kernel: fnic<7>: UT: fnic_fcpio_icmnd_cmpl_handler: 847: tag: 0x52 sc: 00000000fab117f4 CDB Opcode: 0x28 Dropping icmnd completion
Nov 17 15:27:30 rhel-c4s5 kernel: scsi host7: Abort Cmd called FCID 0x5205f2, LUN 0x2 TAG 52 flags 3
Nov 17 15:27:30 rhel-c4s5 kernel: scsi host7: CBD Opcode: 28 Abort issued time: 30067 msec
Nov 17 15:27:30 rhel-c4s5 kernel: scsi host7: fnic<7>: UT: fnic_fcpio_itmf_cmpl_handler: 1113: tag: 0x52 sc: 00 status: FCPIO_IO_NOT_FOUND Dropping abort completion
Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: Returning from abort cmd type 2 FAILED
Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: Device reset called FCID 0x5205f2, LUN 0x2 sc: 00000000fab117f4
Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: Device reset allocation failed with -11
Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: fnic_reset called
Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: fnic_rport_exch_reset called portid 0xfffffc
Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: fnic_rport_exch_reset called portid 0x52061b
Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: fnic_rport_exch_reset called portid 0x5205f2
Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: fnic_rport_exch_reset called portid 0x5205cb
Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: fnic_rport_exch_reset called portid 0x5205ca
Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: update_mac 00:25:b5:cc:aa:00
Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: Issued fw reset
Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: set port_id 0 fp 0000000000000000
Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: Returning from fnic reset SUCCESS
Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: fnic_rport_exch_reset called portid 0xfffffc
Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: fnic_cleanup_io: tag:0x52 : sc:0x00000000fab117f4 duration = 52450 DID_TRANSPORT_DISRUPTED
Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: Calling done for IO not issued to fw: tag:0x52 sc:0x00000000fab117f4
Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: reset cmpl success
Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: fnic_rport_exch_reset called portid 0x52061b
Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: fnic_rport_exch_reset called portid 0x5205f2
Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: fnic_rport_exch_reset called portid 0x5205cb
Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: fnic_rport_exch_reset called portid 0x5205ca
Nov 17 15:27:54 rhel-c4s5 kernel: host7: Assigned Port ID 0d0880
Nov 17 15:27:54 rhel-c4s5 kernel: scsi host7: set port_id d0880 fp 0000000067a66565
Nov 17 15:27:54 rhel-c4s5 kernel: scsi host7: update_mac 0e:fc:00:0d:08:80
Nov 17 15:27:54 rhel-c4s5 kernel: scsi host7: FLOGI reg issued fcid d0880 map 0 dest 8c:60:4f:95:ea:a4
Nov 17 15:27:54 rhel-c4s5 kernel: scsi host7: flog reg succeeded
Nov 17 15:28:06 rhel-c4s5 kernel: sd 7:0:3:2: Power-on or device reset occurred

Regards,
Karan
Hannes Reinecke Nov. 20, 2023, 7:50 a.m. UTC | #7
On 11/18/23 00:31, Karan Tilak Kumar (kartilak) wrote:
> On Wednesday, November 15, 2023 4:22 PM, Karan Tilak Kumar (kartilak) wrote:
>>
>> On Tuesday, November 14, 2023 12:02 AM, Hannes Reinecke <hare@suse.de> wrote:
>>>> I integrated your patch set using "b4 shazam" and built all the changes to do some dev testing.
>>>> I instrumented the code to do the following:
>>>>
>>>> - After one million IOs, drop one IO response.
>>>> - This will trigger an abort. Drop that abort response.
>>>> - This will trigger a device reset. I'm seeing that the tag here is 0xFFFFFFFF (SCSI_NO_TAG).
>>>>
>>>> This tag fails the out-of-range tag check and escalates to host reset.
>>>> I've been able to repro this three times with the same result.
>>>>
>>>> The expectation with this test case is that the device reset should go through successfully without escalating to host reset.
>>>>
>>> Thanks for the report.
>>> Certainly not what was intended with the patch. Can you try with this patch on top:
>>
>> I applied this patch on top of the previous one and performed the unit tests that I had mentioned before.
>>
>>>
>>> (The first hunk is actually a bugfix, and will be included with the next submission anyway.) Please enable fnic debug during testing and send me the logs.
>>> It _might_ be that this is actually by design, as we will be failing if all tags are busy (ie if the error is EWOULDBLOCK).
>>> But if we get EAGAIN it means that the queue is blocked and we'll need to investigate.
>>
>> There were two attempts at reproduction. One of them resulted in a crash. I have enabled fnic debug and captured the log.
>> I've pasted the excerpt here for reference.
>>
>> Repro 1:
>>
[ .. ]
>>
>> =============
>> Repro 2:
>>
[ .. ]
>>
>>
>> Regards,
>> Karan
>>
> 
> Repro 3:
> 
> Nov 17 15:27:00 rhel-c4s5 kernel: fnic<7>: UT: fnic_fcpio_icmnd_cmpl_handler: 847: tag: 0x52 sc: 00000000fab117f4 CDB Opcode: 0x28 Dropping icmnd completion
> Nov 17 15:27:30 rhel-c4s5 kernel: scsi host7: Abort Cmd called FCID 0x5205f2, LUN 0x2 TAG 52 flags 3
> Nov 17 15:27:30 rhel-c4s5 kernel: scsi host7: CBD Opcode: 28 Abort issued time: 30067 msec
> Nov 17 15:27:30 rhel-c4s5 kernel: scsi host7: fnic<7>: UT: fnic_fcpio_itmf_cmpl_handler: 1113: tag: 0x52 sc: 00 status: FCPIO_IO_NOT_FOUND Dropping abort completion
> Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: Returning from abort cmd type 2 FAILED
> Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: Device reset called FCID 0x5205f2, LUN 0x2 sc: 00000000fab117f4
> Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: Device reset allocation failed with -11
> Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: fnic_reset called
> Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: fnic_rport_exch_reset called portid 0xfffffc
> Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: fnic_rport_exch_reset called portid 0x52061b
> Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: fnic_rport_exch_reset called portid 0x5205f2
> Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: fnic_rport_exch_reset called portid 0x5205cb
> Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: fnic_rport_exch_reset called portid 0x5205ca
> Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: update_mac 00:25:b5:cc:aa:00
> Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: Issued fw reset
> Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: set port_id 0 fp 0000000000000000
> Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: Returning from fnic reset SUCCESS
> Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: fnic_rport_exch_reset called portid 0xfffffc
> Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: fnic_cleanup_io: tag:0x52 : sc:0x00000000fab117f4 duration = 52450 DID_TRANSPORT_DISRUPTED
> Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: Calling done for IO not issued to fw: tag:0x52 sc:0x00000000fab117f4
> Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: reset cmpl success
> Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: fnic_rport_exch_reset called portid 0x52061b
> Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: fnic_rport_exch_reset called portid 0x5205f2
> Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: fnic_rport_exch_reset called portid 0x5205cb
> Nov 17 15:27:52 rhel-c4s5 kernel: scsi host7: fnic_rport_exch_reset called portid 0x5205ca
> Nov 17 15:27:54 rhel-c4s5 kernel: host7: Assigned Port ID 0d0880
> Nov 17 15:27:54 rhel-c4s5 kernel: scsi host7: set port_id d0880 fp 0000000067a66565
> Nov 17 15:27:54 rhel-c4s5 kernel: scsi host7: update_mac 0e:fc:00:0d:08:80
> Nov 17 15:27:54 rhel-c4s5 kernel: scsi host7: FLOGI reg issued fcid d0880 map 0 dest 8c:60:4f:95:ea:a4
> Nov 17 15:27:54 rhel-c4s5 kernel: scsi host7: flog reg succeeded
> Nov 17 15:28:06 rhel-c4s5 kernel: sd 7:0:3:2: Power-on or device reset occurred
> 
Seems that 'blk_queue_enter()' called from scsi_alloc_request() is 
failing, presumably as the queue is frozen/quiesced.
Can you try with the attached patch instead of the previous debug patch?

On, and incidentally: there's an unlock missing:

diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
index 0278c4a207f3..47bcc6bd7376 100644
--- a/drivers/scsi/fnic/fnic_scsi.c
+++ b/drivers/scsi/fnic/fnic_scsi.c
@@ -2233,8 +2233,10 @@ int fnic_device_reset(struct scsi_device *sdev)
         io_lock = fnic_io_lock_hash(fnic, sc);
         spin_lock_irqsave(io_lock, flags);
         io_req = fnic_priv(sc)->io_req;
-       if (io_req)
+       if (io_req) {
+               spin_unlock_irqrestore(io_lock, flags);
                 goto fnic_device_reset_end;
+       }

         io_req = mempool_alloc(fnic->io_req_pool, GFP_ATOMIC);
         if (!io_req) {

Maybe fold it in with your patchset (if it's not already merged).

Cheers,

Hannes
Karan Tilak Kumar Nov. 20, 2023, 10:27 p.m. UTC | #8
On Sunday, November 19, 2023 11:50 PM, Hannes Reinecke <hare@suse.de> wrote:
> Seems that 'blk_queue_enter()' called from scsi_alloc_request() is
> failing, presumably as the queue is frozen/quiesced.
> Can you try with the attached patch instead of the previous debug patch?
>
> On, and incidentally: there's an unlock missing:
>
> diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
> index 0278c4a207f3..47bcc6bd7376 100644
> --- a/drivers/scsi/fnic/fnic_scsi.c
> +++ b/drivers/scsi/fnic/fnic_scsi.c
> @@ -2233,8 +2233,10 @@ int fnic_device_reset(struct scsi_device *sdev)
> io_lock = fnic_io_lock_hash(fnic, sc);
> spin_lock_irqsave(io_lock, flags);
> io_req = fnic_priv(sc)->io_req;
> -       if (io_req)
> +       if (io_req) {
> +               spin_unlock_irqrestore(io_lock, flags);
> goto fnic_device_reset_end;
> +       }
>
> io_req = mempool_alloc(fnic->io_req_pool, GFP_ATOMIC);
> if (!io_req) {
>
> Maybe fold it in with your patchset (if it's not already merged).
>

Thanks Hannes. I've modified the code based on your patch. Here's the repro log:

Nov 20 13:59:01 rhel-c4s5 kernel: fnic<7>: UT: fnic_fcpio_icmnd_cmpl_handler: 847: tag: 0xf sc: 00000000d0bc6014 CDB Opcode: 0x28 Dropping icmnd completion
Nov 20 13:59:31 rhel-c4s5 kernel: scsi host7: Abort Cmd called FCID 0x52061b, LUN 0x2 TAG f flags 3
Nov 20 13:59:31 rhel-c4s5 kernel: scsi host7: CBD Opcode: 28 Abort issued time: 30029 msec
Nov 20 13:59:31 rhel-c4s5 kernel: scsi host7: fnic<7>: UT: fnic_fcpio_itmf_cmpl_handler: 1113: tag: 0xf sc: 00 status: FCPIO_IO_NOT_FOUND Dropping abort completion
Nov 20 13:59:53 rhel-c4s5 kernel: scsi host7: Returning from abort cmd type 2 FAILED
Nov 20 13:59:53 rhel-c4s5 kernel: scsi host7: Device reset called FCID 0x52061b, LUN 0x2 sc: 00000000d0bc6014
Nov 20 13:59:53 rhel-c4s5 kernel: scsi host7: Device reset allocation failed (error -11)
Nov 20 13:59:53 rhel-c4s5 kernel: scsi host7: fnic_reset called
Nov 20 13:59:53 rhel-c4s5 kernel: scsi host7: fnic_rport_exch_reset called portid 0xfffffc
Nov 20 13:59:53 rhel-c4s5 kernel: scsi host7: fnic_rport_exch_reset called portid 0x52061b
Nov 20 13:59:53 rhel-c4s5 kernel: scsi host7: fnic_rport_exch_reset called portid 0x5205f2
Nov 20 13:59:53 rhel-c4s5 kernel: scsi host7: fnic_rport_exch_reset called portid 0x5205cb
Nov 20 13:59:53 rhel-c4s5 kernel: scsi host7: fnic_rport_exch_reset called portid 0x5205ca
Nov 20 13:59:53 rhel-c4s5 kernel: scsi host7: update_mac 00:25:b5:cc:aa:00
Nov 20 13:59:53 rhel-c4s5 kernel: scsi host7: Issued fw reset
Nov 20 13:59:53 rhel-c4s5 kernel: scsi host7: set port_id 0 fp 0000000000000000
Nov 20 13:59:53 rhel-c4s5 kernel: scsi host7: Returning from fnic reset SUCCESS
Nov 20 13:59:53 rhel-c4s5 kernel: scsi host7: fnic_rport_exch_reset called portid 0xfffffc
Nov 20 13:59:53 rhel-c4s5 kernel: scsi host7: fnic_cleanup_io: tag:0xf : sc:0x00000000d0bc6014 duration = 52089 DID_TRANSPORT_DISRUPTED
Nov 20 13:59:53 rhel-c4s5 kernel: scsi host7: Calling done for IO not issued to fw: tag:0xf sc:0x00000000d0bc6014
Nov 20 13:59:53 rhel-c4s5 kernel: scsi host7: reset cmpl success
Nov 20 13:59:53 rhel-c4s5 kernel: scsi host7: fnic_rport_exch_reset called portid 0x52061b
Nov 20 13:59:53 rhel-c4s5 kernel: scsi host7: fnic_rport_exch_reset called portid 0x5205f2
Nov 20 13:59:53 rhel-c4s5 kernel: scsi host7: fnic_rport_exch_reset called portid 0x5205cb
Nov 20 13:59:53 rhel-c4s5 kernel: scsi host7: fnic_rport_exch_reset called portid 0x5205ca
Nov 20 13:59:55 rhel-c4s5 kernel: host7: Assigned Port ID 0d0880
Nov 20 13:59:55 rhel-c4s5 kernel: scsi host7: set port_id d0880 fp 00000000a1c453b9
Nov 20 13:59:55 rhel-c4s5 kernel: scsi host7: update_mac 0e:fc:00:0d:08:80
Nov 20 13:59:55 rhel-c4s5 kernel: scsi host7: FLOGI reg issued fcid d0880 map 0 dest 8c:60:4f:95:ea:a4
Nov 20 13:59:55 rhel-c4s5 kernel: scsi host7: flog reg succeeded
Nov 20 14:00:06 rhel-c4s5 kernel: sd 7:0:3:2: Power-on or device reset occurred
Nov 20 14:00:06 rhel-c4s5 kernel: sd 7:0:3:2: alua: transition timeout set to 120 seconds
Nov 20 14:00:06 rhel-c4s5 kernel: sd 7:0:3:2: alua: port group 3e9 state A non-preferred supports TolUsNA

This is what the code looks like for the above test:

        req = scsi_alloc_request(sdev->request_queue, REQ_OP_DRV_IN,
                                 BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PM);
    if (IS_ERR(req)) {
                /*
                 * Request allocation might fail, indicating that
                 * all tags are busy.
                 * But device reset will be called only from within
                 * SCSI EH, at which time all I/O is stopped. So the
                 * only active tags would be for failed I/O, but
                 * when all I/O is failed it'll be better to escalate
                 * to host reset anyway.
                 */
                FNIC_SCSI_DBG(KERN_ERR, fnic->lport->host,
                              "Device reset allocation failed (error %ld%s%s)\n",
                              PTR_ERR(req),
                              sdev->request_queue->mq_freeze_depth ? ",frozen" : "",
                              sdev->quiesced_by ? ",quiesced" : "");
                return ret;
        }
        sc = blk_mq_rq_to_pdu(req);

        tag = req->tag;
        io_lock = fnic_io_lock_hash(fnic, sc);
        spin_lock_irqsave(io_lock, flags);
        io_req = fnic_priv(sc)->io_req;
        if (io_req) {
            spin_unlock_irqrestore(io_lock, flags);
                goto fnic_device_reset_end;
        }

Regards,
Karan
Karan Tilak Kumar Nov. 21, 2023, 2:24 a.m. UTC | #9
On Monday, November 20, 2023 2:27 PM, Karan Tilak Kumar (kartilak) wrote:
> > Maybe fold it in with your patchset (if it's not already merged).

Sure Hannes. The patchset is not merged yet. 
I'll make the required change and send out V4.

Regards,
Karan
Karan Tilak Kumar Nov. 21, 2023, 9:06 p.m. UTC | #10
On Monday, November 20, 2023 6:24 PM, Karan Tilak Kumar (kartilak) wrote:
>
> On Monday, November 20, 2023 2:27 PM, Karan Tilak Kumar (kartilak) wrote:
> > > Maybe fold it in with your patchset (if it's not already merged).
>
> Sure Hannes. The patchset is not merged yet.
> I'll make the required change and send out V4.
>
> Regards,
> Karan
>

Thanks for pointing this out, Hannes.
The MQ code has corrected some missing unlocks. Therefore, this problem does not appear to be in the V3 patch set.

Regards,
Karan
diff mbox series

Patch

diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h
index 93c68931a593..8ffcafb4687f 100644
--- a/drivers/scsi/fnic/fnic.h
+++ b/drivers/scsi/fnic/fnic.h
@@ -236,7 +236,6 @@  struct fnic {
 	unsigned int wq_count;
 	unsigned int cq_count;
 
-	struct mutex sgreset_mutex;
 	struct dentry *fnic_stats_debugfs_host;
 	struct dentry *fnic_stats_debugfs_file;
 	struct dentry *fnic_reset_debugfs_file;
diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
index 9761b2c9db48..7a8b6285a096 100644
--- a/drivers/scsi/fnic/fnic_scsi.c
+++ b/drivers/scsi/fnic/fnic_scsi.c
@@ -1984,7 +1984,6 @@  static inline int fnic_queue_dr_io_req(struct fnic *fnic,
 
 struct fnic_pending_aborts_iter_data {
 	struct fnic *fnic;
-	struct scsi_cmnd *lr_sc;
 	struct scsi_device *lun_dev;
 	int ret;
 };
@@ -2002,7 +2001,7 @@  static bool fnic_pending_aborts_iter(struct scsi_cmnd *sc, void *data)
 	DECLARE_COMPLETION_ONSTACK(tm_done);
 	enum fnic_ioreq_state old_ioreq_state;
 
-	if (sc == iter_data->lr_sc || sc->device != lun_dev)
+	if (sc->device != lun_dev)
 		return true;
 
 	io_lock = fnic_io_lock_tag(fnic, abt_tag);
@@ -2105,17 +2104,11 @@  static bool fnic_pending_aborts_iter(struct scsi_cmnd *sc, void *data)
 		return false;
 	}
 	fnic_priv(sc)->state = FNIC_IOREQ_ABTS_COMPLETE;
-
-	/* original sc used for lr is handled by dev reset code */
-	if (sc != iter_data->lr_sc)
-		fnic_priv(sc)->io_req = NULL;
+	fnic_priv(sc)->io_req = NULL;
 	spin_unlock_irqrestore(io_lock, flags);
 
-	/* original sc used for lr is handled by dev reset code */
-	if (sc != iter_data->lr_sc) {
-		fnic_release_ioreq_buf(fnic, io_req, sc);
-		mempool_free(io_req, fnic->io_req_pool);
-	}
+	fnic_release_ioreq_buf(fnic, io_req, sc);
+	mempool_free(io_req, fnic->io_req_pool);
 
 	/*
 	 * Any IO is returned during reset, it needs to call scsi_done
@@ -2135,8 +2128,7 @@  static bool fnic_pending_aborts_iter(struct scsi_cmnd *sc, void *data)
  * successfully aborted, 1 otherwise
  */
 static int fnic_clean_pending_aborts(struct fnic *fnic,
-				     struct scsi_cmnd *lr_sc,
-				     bool new_sc)
+				     struct scsi_cmnd *lr_sc)
 
 {
 	int ret = 0;
@@ -2146,9 +2138,6 @@  static int fnic_clean_pending_aborts(struct fnic *fnic,
 		.ret = SUCCESS,
 	};
 
-	if (new_sc)
-		iter_data.lr_sc = lr_sc;
-
 	scsi_host_busy_iter(fnic->lport->host,
 			    fnic_pending_aborts_iter, &iter_data);
 	if (iter_data.ret == FAILED) {
@@ -2174,7 +2163,8 @@  static int fnic_clean_pending_aborts(struct fnic *fnic,
  */
 int fnic_device_reset(struct scsi_cmnd *sc)
 {
-	struct request *rq = scsi_cmd_to_rq(sc);
+	struct scsi_device *sdev = sc->device;
+	struct request *req;
 	struct fc_lport *lp;
 	struct fnic *fnic;
 	struct fnic_io_req *io_req = NULL;
@@ -2187,15 +2177,17 @@  int fnic_device_reset(struct scsi_cmnd *sc)
 	struct scsi_lun fc_lun;
 	struct fnic_stats *fnic_stats;
 	struct reset_stats *reset_stats;
-	int tag = rq->tag;
+	int tag;
 	DECLARE_COMPLETION_ONSTACK(tm_done);
-	bool new_sc = 0;
 
 	/* Wait for rport to unblock */
-	fc_block_scsi_eh(sc);
+	rport = starget_to_rport(scsi_target(sdev));
+	ret = fc_block_rport(rport);
+	if (ret)
+		return ret;
 
 	/* Get local-port, check ready and link up */
-	lp = shost_priv(sc->device->host);
+	lp = shost_priv(sdev->host);
 
 	fnic = lport_priv(lp);
 	fnic_stats = &fnic->fnic_stats;
@@ -2203,53 +2195,55 @@  int fnic_device_reset(struct scsi_cmnd *sc)
 
 	atomic64_inc(&reset_stats->device_resets);
 
-	rport = starget_to_rport(scsi_target(sc->device));
 	FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
-		      "Device reset called FCID 0x%x, LUN 0x%llx sc 0x%p\n",
-		      rport->port_id, sc->device->lun, sc);
+		      "Device reset called FCID 0x%x, LUN 0x%llx\n",
+		      rport->port_id, sdev->lun);
 
 	if (lp->state != LPORT_ST_READY || !(lp->link_up))
-		goto fnic_device_reset_end;
+		return ret;
 
 	/* Check if remote port up */
 	if (fc_remote_port_chkready(rport)) {
 		atomic64_inc(&fnic_stats->misc_stats.rport_not_ready);
-		goto fnic_device_reset_end;
+		return ret;
 	}
 
-	fnic_priv(sc)->flags = FNIC_DEVICE_RESET;
-
-	if (unlikely(tag < 0)) {
+	req = scsi_alloc_request(sdev->request_queue, REQ_OP_DRV_IN,
+				 BLK_MQ_REQ_NOWAIT);
+	if (!req) {
 		/*
-		 * For device reset issued through sg3utils, we let
-		 * only one LUN_RESET to go through and use a special
-		 * tag equal to max_tag_id so that we don't have to allocate
-		 * or free it. It won't interact with tags
-		 * allocated by mid layer.
+		 * Request allocation might fail, indicating that
+		 * all tags are busy.
+		 * But device reset will be called only from within
+		 * SCSI EH, at which time all I/O is stopped. So the
+		 * only active tags would be for failed I/O, but
+		 * when all I/O is failed it'll be better to escalate
+		 * to host reset anyway.
 		 */
-		mutex_lock(&fnic->sgreset_mutex);
-		tag = fnic->fnic_max_tag_id;
-		new_sc = 1;
+		FNIC_SCSI_DBG(KERN_ERR, fnic->lport->host,
+			      "Device reset allocation failed, all tags busy\n");
+		return ret;
 	}
+	sc = blk_mq_rq_to_pdu(req);
+
+	tag = req->tag;
 	io_lock = fnic_io_lock_hash(fnic, sc);
 	spin_lock_irqsave(io_lock, flags);
 	io_req = fnic_priv(sc)->io_req;
+	if (io_req)
+		goto fnic_device_reset_end;
 
-	/*
-	 * If there is a io_req attached to this command, then use it,
-	 * else allocate a new one.
-	 */
+	io_req = mempool_alloc(fnic->io_req_pool, GFP_ATOMIC);
 	if (!io_req) {
-		io_req = mempool_alloc(fnic->io_req_pool, GFP_ATOMIC);
-		if (!io_req) {
-			spin_unlock_irqrestore(io_lock, flags);
-			goto fnic_device_reset_end;
-		}
-		memset(io_req, 0, sizeof(*io_req));
-		io_req->port_id = rport->port_id;
-		fnic_priv(sc)->io_req = io_req;
+		spin_unlock_irqrestore(io_lock, flags);
+		goto fnic_device_reset_end;
 	}
+	memset(io_req, 0, sizeof(*io_req));
+	io_req->port_id = rport->port_id;
+	fnic_priv(sc)->io_req = io_req;
+
 	io_req->dr_done = &tm_done;
+	fnic_priv(sc)->flags = FNIC_DEVICE_RESET;
 	fnic_priv(sc)->state = FNIC_IOREQ_CMD_PENDING;
 	fnic_priv(sc)->lr_status = FCPIO_INVALID_CODE;
 	spin_unlock_irqrestore(io_lock, flags);
@@ -2260,11 +2254,13 @@  int fnic_device_reset(struct scsi_cmnd *sc)
 	 * issue the device reset, if enqueue failed, clean up the ioreq
 	 * and break assoc with scsi cmd
 	 */
+	WRITE_ONCE(req->state, MQ_RQ_IN_FLIGHT);
 	if (fnic_queue_dr_io_req(fnic, sc, io_req)) {
 		spin_lock_irqsave(io_lock, flags);
 		io_req = fnic_priv(sc)->io_req;
 		if (io_req)
 			io_req->dr_done = NULL;
+		WRITE_ONCE(req->state, MQ_RQ_IDLE);
 		goto fnic_device_reset_clean;
 	}
 	spin_lock_irqsave(io_lock, flags);
@@ -2279,11 +2275,12 @@  int fnic_device_reset(struct scsi_cmnd *sc)
 				    msecs_to_jiffies(FNIC_LUN_RESET_TIMEOUT));
 
 	spin_lock_irqsave(io_lock, flags);
+	WRITE_ONCE(req->state, MQ_RQ_IDLE);
 	io_req = fnic_priv(sc)->io_req;
 	if (!io_req) {
 		spin_unlock_irqrestore(io_lock, flags);
 		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
-				"io_req is null tag 0x%x sc 0x%p\n", tag, sc);
+				"io_req is null tag 0x%x\n", tag);
 		goto fnic_device_reset_end;
 	}
 	io_req->dr_done = NULL;
@@ -2326,7 +2323,7 @@  int fnic_device_reset(struct scsi_cmnd *sc)
 				spin_unlock_irqrestore(io_lock, flags);
 				FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
 				"Abort and terminate issued on Device reset "
-				"tag 0x%x sc 0x%p\n", tag, sc);
+				"tag 0x%x\n", tag);
 				break;
 			}
 		}
@@ -2364,7 +2361,7 @@  int fnic_device_reset(struct scsi_cmnd *sc)
 	 * the lun reset cmd. If all cmds get cleaned, the lun reset
 	 * succeeds
 	 */
-	if (fnic_clean_pending_aborts(fnic, sc, new_sc)) {
+	if (fnic_clean_pending_aborts(fnic, sc)) {
 		spin_lock_irqsave(io_lock, flags);
 		io_req = fnic_priv(sc)->io_req;
 		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
@@ -2393,15 +2390,15 @@  int fnic_device_reset(struct scsi_cmnd *sc)
 	}
 
 fnic_device_reset_end:
-	FNIC_TRACE(fnic_device_reset, sc->device->host->host_no, rq->tag, sc,
+	FNIC_TRACE(fnic_device_reset, sc->device->host->host_no,
+		  scsi_cmd_to_rq(sc)->tag, sc,
 		  jiffies_to_msecs(jiffies - start_time),
 		  0, ((u64)sc->cmnd[0] << 32 |
 		  (u64)sc->cmnd[2] << 24 | (u64)sc->cmnd[3] << 16 |
 		  (u64)sc->cmnd[4] << 8 | sc->cmnd[5]),
 		  fnic_flags_and_state(sc));
 
-	if (new_sc)
-		mutex_unlock(&fnic->sgreset_mutex);
+	blk_mq_free_request(req);
 
 	FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
 		      "Returning from device reset %s\n",
@@ -2633,8 +2630,6 @@  static bool fnic_abts_pending_iter(struct scsi_cmnd *sc, void *data)
 	 * ignore this lun reset cmd or cmds that do not belong to
 	 * this lun
 	 */
-	if (iter_data->lr_sc && sc == iter_data->lr_sc)
-		return true;
 	if (iter_data->lun_dev && sc->device != iter_data->lun_dev)
 		return true;
 
@@ -2677,10 +2672,8 @@  int fnic_is_abts_pending(struct fnic *fnic, struct scsi_cmnd *lr_sc)
 		.ret = 0,
 	};
 
-	if (lr_sc) {
+	if (lr_sc)
 		iter_data.lun_dev = lr_sc->device;
-		iter_data.lr_sc = lr_sc;
-	}
 
 	/* walk again to check, if IOs are still pending in fw */
 	scsi_host_busy_iter(fnic->lport->host,
diff --git a/drivers/scsi/snic/snic_scsi.c b/drivers/scsi/snic/snic_scsi.c
index f123531e7dd1..c38f648da3d7 100644
--- a/drivers/scsi/snic/snic_scsi.c
+++ b/drivers/scsi/snic/snic_scsi.c
@@ -2367,7 +2367,7 @@  snic_cmpl_pending_tmreq(struct snic *snic, struct scsi_cmnd *sc)
 }
 
 static bool
-snic_scsi_cleanup_iter(struct scsi_cmnd *sc, void *data, bool reserved)
+snic_scsi_cleanup_iter(struct scsi_cmnd *sc, void *data)
 {
 	struct snic *snic = data;
 	struct snic_req_info *rqi = NULL;
@@ -2541,8 +2541,7 @@  struct snic_tgt_scsi_abort_io_data {
 	int abt_cnt;
 };
 
-static bool snic_tgt_scsi_abort_io_iter(struct scsi_cmnd *sc, void *data,
-					bool reserved)
+static bool snic_tgt_scsi_abort_io_iter(struct scsi_cmnd *sc, void *data)
 {
 	struct snic_tgt_scsi_abort_io_data *iter_data = data;
 	struct snic *snic = iter_data->snic;