diff mbox series

[v4,02/17] scsi: iscsi: sync libiscsi and driver reset cleanup

Message ID 20210424220603.123703-3-michael.christie@oracle.com
State New
Headers show
Series libiscsi and qedi TMF fixes | expand

Commit Message

Mike Christie April 24, 2021, 10:05 p.m. UTC
If we are handling a sg io reset there is a small window where cmds can
sneak into iscsi_queuecommand even though libiscsi has sent a TMF to the
driver. This doesn't seem to be a problem for normal drivers because they
know exactly what was sent to the FW. For libiscsi this is a problem
because it knows what it has sent to the driver but not what the driver
sent to the FW. When we go to cleanup the running tasks, libiscsi might
cleanup the sneaky cmd but the driver/FW may not have seen the sneaky cmd
and it's left running in FW.

This has libiscsi just stop queueing cmds when it sends the TMF down to the
driver. Both then know they see the same set of cmds.

Signed-off-by: Mike Christie <michael.christie@oracle.com>
---
 drivers/scsi/libiscsi.c | 106 ++++++++++++++++++++++++++++------------
 1 file changed, 74 insertions(+), 32 deletions(-)

Comments

Lee Duncan April 25, 2021, 11:48 p.m. UTC | #1
On 4/24/21 3:05 PM, Mike Christie wrote:
> If we are handling a sg io reset there is a small window where cmds can

> sneak into iscsi_queuecommand even though libiscsi has sent a TMF to the

> driver. This doesn't seem to be a problem for normal drivers because they

> know exactly what was sent to the FW. For libiscsi this is a problem

> because it knows what it has sent to the driver but not what the driver

> sent to the FW. When we go to cleanup the running tasks, libiscsi might

> cleanup the sneaky cmd but the driver/FW may not have seen the sneaky cmd

> and it's left running in FW.

> 

> This has libiscsi just stop queueing cmds when it sends the TMF down to the

> driver. Both then know they see the same set of cmds.

> 

> Signed-off-by: Mike Christie <michael.christie@oracle.com>

> ---

>  drivers/scsi/libiscsi.c | 106 ++++++++++++++++++++++++++++------------

>  1 file changed, 74 insertions(+), 32 deletions(-)

> 

> diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c

> index 4834219497ee..80305b70298d 100644

> --- a/drivers/scsi/libiscsi.c

> +++ b/drivers/scsi/libiscsi.c

> @@ -1669,29 +1669,11 @@ enum {

>  	FAILURE_SESSION_NOT_READY,

>  };

>  

> -int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc)

> +static bool iscsi_eh_running(struct iscsi_conn *conn, struct scsi_cmnd *sc,

> +			     int *reason)

>  {

> -	struct iscsi_cls_session *cls_session;

> -	struct iscsi_host *ihost;

> -	int reason = 0;

> -	struct iscsi_session *session;

> -	struct iscsi_conn *conn;

> -	struct iscsi_task *task = NULL;

> -

> -	sc->result = 0;

> -	sc->SCp.ptr = NULL;

> -

> -	ihost = shost_priv(host);

> -

> -	cls_session = starget_to_session(scsi_target(sc->device));

> -	session = cls_session->dd_data;

> -	spin_lock_bh(&session->frwd_lock);

> -

> -	reason = iscsi_session_chkready(cls_session);

> -	if (reason) {

> -		sc->result = reason;

> -		goto fault;

> -	}

> +	struct iscsi_session *session = conn->session;

> +	struct iscsi_tm *tmf;

>  

>  	if (session->state != ISCSI_STATE_LOGGED_IN) {

>  		/*

> @@ -1707,31 +1689,94 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc)

>  			 * state is bad, allowing completion to happen

>  			 */

>  			if (unlikely(system_state != SYSTEM_RUNNING)) {

> -				reason = FAILURE_SESSION_FAILED;

> +				*reason = FAILURE_SESSION_FAILED;

>  				sc->result = DID_NO_CONNECT << 16;

>  				break;

>  			}

>  			fallthrough;

>  		case ISCSI_STATE_IN_RECOVERY:

> -			reason = FAILURE_SESSION_IN_RECOVERY;

> +			*reason = FAILURE_SESSION_IN_RECOVERY;

>  			sc->result = DID_IMM_RETRY << 16;

>  			break;

>  		case ISCSI_STATE_LOGGING_OUT:

> -			reason = FAILURE_SESSION_LOGGING_OUT;

> +			*reason = FAILURE_SESSION_LOGGING_OUT;

>  			sc->result = DID_IMM_RETRY << 16;

>  			break;

>  		case ISCSI_STATE_RECOVERY_FAILED:

> -			reason = FAILURE_SESSION_RECOVERY_TIMEOUT;

> +			*reason = FAILURE_SESSION_RECOVERY_TIMEOUT;

>  			sc->result = DID_TRANSPORT_FAILFAST << 16;

>  			break;

>  		case ISCSI_STATE_TERMINATE:

> -			reason = FAILURE_SESSION_TERMINATE;

> +			*reason = FAILURE_SESSION_TERMINATE;

>  			sc->result = DID_NO_CONNECT << 16;

>  			break;

>  		default:

> -			reason = FAILURE_SESSION_FREED;

> +			*reason = FAILURE_SESSION_FREED;

>  			sc->result = DID_NO_CONNECT << 16;

>  		}

> +		goto eh_running;

> +	}

> +

> +	if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) {

> +		*reason = FAILURE_SESSION_IN_RECOVERY;

> +		sc->result = DID_REQUEUE << 16;

> +		goto eh_running;

> +	}

> +

> +	/*

> +	 * For sg resets make sure libiscsi, the drivers and their fw see the

> +	 * same cmds. Once we get a TMF that can affect multiple cmds stop

> +	 * queueing.

> +	 */

> +	if (conn->tmf_state != TMF_INITIAL) {

> +		tmf = &conn->tmhdr;

> +

> +		switch (ISCSI_TM_FUNC_VALUE(tmf)) {

> +		case ISCSI_TM_FUNC_LOGICAL_UNIT_RESET:

> +			if (sc->device->lun != scsilun_to_int(&tmf->lun))

> +				break;

> +

> +			ISCSI_DBG_EH(conn->session,

> +				     "Requeue cmd sent during LU RESET processing.\n");

> +			sc->result = DID_REQUEUE << 16;

> +			goto eh_running;

> +		case ISCSI_TM_FUNC_TARGET_WARM_RESET:

> +			ISCSI_DBG_EH(conn->session,

> +				     "Requeue cmd sent during TARGET RESET processing.\n");

> +			sc->result = DID_REQUEUE << 16;

> +			goto eh_running;

> +		default:

> +			break;

> +		}

> +	}

> +

> +	return false;

> +

> +eh_running:

> +	return true;

> +}

> +

> +int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc)

> +{

> +	struct iscsi_cls_session *cls_session;

> +	struct iscsi_host *ihost;

> +	int reason = 0;

> +	struct iscsi_session *session;

> +	struct iscsi_conn *conn;

> +	struct iscsi_task *task = NULL;

> +

> +	sc->result = 0;

> +	sc->SCp.ptr = NULL;

> +

> +	ihost = shost_priv(host);

> +

> +	cls_session = starget_to_session(scsi_target(sc->device));

> +	session = cls_session->dd_data;

> +	spin_lock_bh(&session->frwd_lock);

> +

> +	reason = iscsi_session_chkready(cls_session);

> +	if (reason) {

> +		sc->result = reason;

>  		goto fault;

>  	}

>  

> @@ -1742,11 +1787,8 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc)

>  		goto fault;

>  	}

>  

> -	if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) {

> -		reason = FAILURE_SESSION_IN_RECOVERY;

> -		sc->result = DID_REQUEUE << 16;

> +	if (iscsi_eh_running(conn, sc, &reason))

>  		goto fault;

> -	}

>  

>  	if (iscsi_check_cmdsn_window_closed(conn)) {

>  		reason = FAILURE_WINDOW_CLOSED;

> 


Reviewed-by: Lee Duncan <lduncan@suse.com>
diff mbox series

Patch

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 4834219497ee..80305b70298d 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -1669,29 +1669,11 @@  enum {
 	FAILURE_SESSION_NOT_READY,
 };
 
-int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc)
+static bool iscsi_eh_running(struct iscsi_conn *conn, struct scsi_cmnd *sc,
+			     int *reason)
 {
-	struct iscsi_cls_session *cls_session;
-	struct iscsi_host *ihost;
-	int reason = 0;
-	struct iscsi_session *session;
-	struct iscsi_conn *conn;
-	struct iscsi_task *task = NULL;
-
-	sc->result = 0;
-	sc->SCp.ptr = NULL;
-
-	ihost = shost_priv(host);
-
-	cls_session = starget_to_session(scsi_target(sc->device));
-	session = cls_session->dd_data;
-	spin_lock_bh(&session->frwd_lock);
-
-	reason = iscsi_session_chkready(cls_session);
-	if (reason) {
-		sc->result = reason;
-		goto fault;
-	}
+	struct iscsi_session *session = conn->session;
+	struct iscsi_tm *tmf;
 
 	if (session->state != ISCSI_STATE_LOGGED_IN) {
 		/*
@@ -1707,31 +1689,94 @@  int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc)
 			 * state is bad, allowing completion to happen
 			 */
 			if (unlikely(system_state != SYSTEM_RUNNING)) {
-				reason = FAILURE_SESSION_FAILED;
+				*reason = FAILURE_SESSION_FAILED;
 				sc->result = DID_NO_CONNECT << 16;
 				break;
 			}
 			fallthrough;
 		case ISCSI_STATE_IN_RECOVERY:
-			reason = FAILURE_SESSION_IN_RECOVERY;
+			*reason = FAILURE_SESSION_IN_RECOVERY;
 			sc->result = DID_IMM_RETRY << 16;
 			break;
 		case ISCSI_STATE_LOGGING_OUT:
-			reason = FAILURE_SESSION_LOGGING_OUT;
+			*reason = FAILURE_SESSION_LOGGING_OUT;
 			sc->result = DID_IMM_RETRY << 16;
 			break;
 		case ISCSI_STATE_RECOVERY_FAILED:
-			reason = FAILURE_SESSION_RECOVERY_TIMEOUT;
+			*reason = FAILURE_SESSION_RECOVERY_TIMEOUT;
 			sc->result = DID_TRANSPORT_FAILFAST << 16;
 			break;
 		case ISCSI_STATE_TERMINATE:
-			reason = FAILURE_SESSION_TERMINATE;
+			*reason = FAILURE_SESSION_TERMINATE;
 			sc->result = DID_NO_CONNECT << 16;
 			break;
 		default:
-			reason = FAILURE_SESSION_FREED;
+			*reason = FAILURE_SESSION_FREED;
 			sc->result = DID_NO_CONNECT << 16;
 		}
+		goto eh_running;
+	}
+
+	if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) {
+		*reason = FAILURE_SESSION_IN_RECOVERY;
+		sc->result = DID_REQUEUE << 16;
+		goto eh_running;
+	}
+
+	/*
+	 * For sg resets make sure libiscsi, the drivers and their fw see the
+	 * same cmds. Once we get a TMF that can affect multiple cmds stop
+	 * queueing.
+	 */
+	if (conn->tmf_state != TMF_INITIAL) {
+		tmf = &conn->tmhdr;
+
+		switch (ISCSI_TM_FUNC_VALUE(tmf)) {
+		case ISCSI_TM_FUNC_LOGICAL_UNIT_RESET:
+			if (sc->device->lun != scsilun_to_int(&tmf->lun))
+				break;
+
+			ISCSI_DBG_EH(conn->session,
+				     "Requeue cmd sent during LU RESET processing.\n");
+			sc->result = DID_REQUEUE << 16;
+			goto eh_running;
+		case ISCSI_TM_FUNC_TARGET_WARM_RESET:
+			ISCSI_DBG_EH(conn->session,
+				     "Requeue cmd sent during TARGET RESET processing.\n");
+			sc->result = DID_REQUEUE << 16;
+			goto eh_running;
+		default:
+			break;
+		}
+	}
+
+	return false;
+
+eh_running:
+	return true;
+}
+
+int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc)
+{
+	struct iscsi_cls_session *cls_session;
+	struct iscsi_host *ihost;
+	int reason = 0;
+	struct iscsi_session *session;
+	struct iscsi_conn *conn;
+	struct iscsi_task *task = NULL;
+
+	sc->result = 0;
+	sc->SCp.ptr = NULL;
+
+	ihost = shost_priv(host);
+
+	cls_session = starget_to_session(scsi_target(sc->device));
+	session = cls_session->dd_data;
+	spin_lock_bh(&session->frwd_lock);
+
+	reason = iscsi_session_chkready(cls_session);
+	if (reason) {
+		sc->result = reason;
 		goto fault;
 	}
 
@@ -1742,11 +1787,8 @@  int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc)
 		goto fault;
 	}
 
-	if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) {
-		reason = FAILURE_SESSION_IN_RECOVERY;
-		sc->result = DID_REQUEUE << 16;
+	if (iscsi_eh_running(conn, sc, &reason))
 		goto fault;
-	}
 
 	if (iscsi_check_cmdsn_window_closed(conn)) {
 		reason = FAILURE_WINDOW_CLOSED;