diff mbox series

[v2] SCSI: libiscsi: fix NOP race condition

Message ID 20201106193317.16993-1-leeman.duncan@gmail.com
State New
Headers show
Series [v2] SCSI: libiscsi: fix NOP race condition | expand

Commit Message

Lee Duncan Nov. 6, 2020, 7:33 p.m. UTC
From: Lee Duncan <lduncan@suse.com>

iSCSI NOPs are sometimes "lost", mistakenly sent to the
user-land iscsid daemon instead of handled in the kernel,
as they should be, resulting in a message from the daemon like:

> iscsid: Got nop in, but kernel supports nop handling.

This can occur because of the new forward- and back-locks,
and the fact that an iSCSI NOP response can occur before
processing of the NOP send is complete. This can result
in "conn->ping_task" being NULL in iscsi_nop_out_rsp(),
when the pointer is actually in the process of being set.

To work around this, we add a new state to the "ping_task"
pointer. In addition to NULL (not assigned) and a pointer
(assigned), we add the state "being set", which is signaled
with an INVALID pointer (using "-1").

Changes since V1:
 - expanded using READ_ONCE()/WRITE_ONCE() to the whole path

Signed-off-by: Lee Duncan <lduncan@suse.com>
---
 drivers/scsi/libiscsi.c | 23 +++++++++++++++--------
 include/scsi/libiscsi.h |  3 +++
 2 files changed, 18 insertions(+), 8 deletions(-)

Comments

Mike Christie Nov. 11, 2020, 8:28 p.m. UTC | #1
On 11/6/20 1:33 PM, Lee Duncan wrote:
> From: Lee Duncan <lduncan@suse.com>

> 

> iSCSI NOPs are sometimes "lost", mistakenly sent to the

> user-land iscsid daemon instead of handled in the kernel,

> as they should be, resulting in a message from the daemon like:

> 

>> iscsid: Got nop in, but kernel supports nop handling.

> 

> This can occur because of the new forward- and back-locks,

> and the fact that an iSCSI NOP response can occur before

> processing of the NOP send is complete. This can result

> in "conn->ping_task" being NULL in iscsi_nop_out_rsp(),

> when the pointer is actually in the process of being set.

> 

> To work around this, we add a new state to the "ping_task"

> pointer. In addition to NULL (not assigned) and a pointer

> (assigned), we add the state "being set", which is signaled

> with an INVALID pointer (using "-1").

> 

> Changes since V1:

>   - expanded using READ_ONCE()/WRITE_ONCE() to the whole path

> 

> Signed-off-by: Lee Duncan <lduncan@suse.com>

> ---

>   drivers/scsi/libiscsi.c | 23 +++++++++++++++--------

>   include/scsi/libiscsi.h |  3 +++

>   2 files changed, 18 insertions(+), 8 deletions(-)

> 

> diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c

> index 1e9c3171fa9f..f9314f1393fb 100644

> --- a/drivers/scsi/libiscsi.c

> +++ b/drivers/scsi/libiscsi.c

> @@ -533,8 +533,8 @@ static void iscsi_complete_task(struct iscsi_task *task, int state)

>   	if (conn->task == task)

>   		conn->task = NULL;

>   

> -	if (conn->ping_task == task)

> -		conn->ping_task = NULL;

> +	if (READ_ONCE(conn->ping_task) == task)

> +		WRITE_ONCE(conn->ping_task, NULL);

>   

>   	/* release get from queueing */

>   	__iscsi_put_task(task);

> @@ -738,6 +738,9 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,

>   						   task->conn->session->age);

>   	}

>   

> +	if (unlikely(READ_ONCE(conn->ping_task) == INVALID_SCSI_TASK))

> +		WRITE_ONCE(conn->ping_task, task);

> +

>   	if (!ihost->workq) {

>   		if (iscsi_prep_mgmt_task(conn, task))

>   			goto free_task;

> @@ -941,8 +944,11 @@ static int iscsi_send_nopout(struct iscsi_conn *conn, struct iscsi_nopin *rhdr)

>           struct iscsi_nopout hdr;

>   	struct iscsi_task *task;

>   

> -	if (!rhdr && conn->ping_task)

> -		return -EINVAL;

> +	if (!rhdr) {

> +		if (READ_ONCE(conn->ping_task))

> +			return -EINVAL;

> +		WRITE_ONCE(conn->ping_task, INVALID_SCSI_TASK);

> +	}

>   

>   	memset(&hdr, 0, sizeof(struct iscsi_nopout));

>   	hdr.opcode = ISCSI_OP_NOOP_OUT | ISCSI_OP_IMMEDIATE;

> @@ -957,11 +963,12 @@ static int iscsi_send_nopout(struct iscsi_conn *conn, struct iscsi_nopin *rhdr)

>   

>   	task = __iscsi_conn_send_pdu(conn, (struct iscsi_hdr *)&hdr, NULL, 0);

>   	if (!task) {

> +		if (!rhdr)

> +			WRITE_ONCE(conn->ping_task, NULL);

>   		iscsi_conn_printk(KERN_ERR, conn, "Could not send nopout\n");

>   		return -EIO;

>   	} else if (!rhdr) {

>   		/* only track our nops */

> -		conn->ping_task = task;

>   		conn->last_ping = jiffies;

>   	}

>   

> @@ -984,7 +991,7 @@ static int iscsi_nop_out_rsp(struct iscsi_task *task,

>   	struct iscsi_conn *conn = task->conn;

>   	int rc = 0;

>   

> -	if (conn->ping_task != task) {

> +	if (READ_ONCE(conn->ping_task) != task) {

>   		/*

>   		 * If this is not in response to one of our

>   		 * nops then it must be from userspace.

> @@ -1923,7 +1930,7 @@ static void iscsi_start_tx(struct iscsi_conn *conn)

>    */

>   static int iscsi_has_ping_timed_out(struct iscsi_conn *conn)

>   {

> -	if (conn->ping_task &&

> +	if (READ_ONCE(conn->ping_task) &&

>   	    time_before_eq(conn->last_recv + (conn->recv_timeout * HZ) +

>   			   (conn->ping_timeout * HZ), jiffies))

>   		return 1;

> @@ -2058,7 +2065,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)

>   	 * Checking the transport already or nop from a cmd timeout still

>   	 * running

>   	 */

> -	if (conn->ping_task) {

> +	if (READ_ONCE(conn->ping_task)) {

>   		task->have_checked_conn = true;

>   		rc = BLK_EH_RESET_TIMER;

>   		goto done;

> diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h

> index c25fb86ffae9..b3bbd10eb3f0 100644

> --- a/include/scsi/libiscsi.h

> +++ b/include/scsi/libiscsi.h

> @@ -132,6 +132,9 @@ struct iscsi_task {

>   	void			*dd_data;	/* driver/transport data */

>   };

>   

> +/* invalid scsi_task pointer */

> +#define	INVALID_SCSI_TASK	(struct iscsi_task *)-1l

> +

>   static inline int iscsi_task_has_unsol_data(struct iscsi_task *task)

>   {

>   	return task->unsol_r2t.data_length > task->unsol_r2t.sent;

> 


Reviewed-by: Mike Christie <michael.christie@oracle.com>
Martin K. Petersen Nov. 17, 2020, 6:06 a.m. UTC | #2
On Fri, 6 Nov 2020 11:33:17 -0800, Lee Duncan wrote:

> iSCSI NOPs are sometimes "lost", mistakenly sent to the

> user-land iscsid daemon instead of handled in the kernel,

> as they should be, resulting in a message from the daemon like:

> 

> > iscsid: Got nop in, but kernel supports nop handling.

> 

> This can occur because of the new forward- and back-locks,

> and the fact that an iSCSI NOP response can occur before

> processing of the NOP send is complete. This can result

> in "conn->ping_task" being NULL in iscsi_nop_out_rsp(),

> when the pointer is actually in the process of being set.

> 

> [...]


Applied to 5.10/scsi-fixes, thanks!

[1/1] scsi: libiscsi: Fix NOP race condition
      https://git.kernel.org/mkp/scsi/c/fe0a8a95e713

-- 
Martin K. Petersen	Oracle Linux Engineering
diff mbox series

Patch

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 1e9c3171fa9f..f9314f1393fb 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -533,8 +533,8 @@  static void iscsi_complete_task(struct iscsi_task *task, int state)
 	if (conn->task == task)
 		conn->task = NULL;
 
-	if (conn->ping_task == task)
-		conn->ping_task = NULL;
+	if (READ_ONCE(conn->ping_task) == task)
+		WRITE_ONCE(conn->ping_task, NULL);
 
 	/* release get from queueing */
 	__iscsi_put_task(task);
@@ -738,6 +738,9 @@  __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 						   task->conn->session->age);
 	}
 
+	if (unlikely(READ_ONCE(conn->ping_task) == INVALID_SCSI_TASK))
+		WRITE_ONCE(conn->ping_task, task);
+
 	if (!ihost->workq) {
 		if (iscsi_prep_mgmt_task(conn, task))
 			goto free_task;
@@ -941,8 +944,11 @@  static int iscsi_send_nopout(struct iscsi_conn *conn, struct iscsi_nopin *rhdr)
         struct iscsi_nopout hdr;
 	struct iscsi_task *task;
 
-	if (!rhdr && conn->ping_task)
-		return -EINVAL;
+	if (!rhdr) {
+		if (READ_ONCE(conn->ping_task))
+			return -EINVAL;
+		WRITE_ONCE(conn->ping_task, INVALID_SCSI_TASK);
+	}
 
 	memset(&hdr, 0, sizeof(struct iscsi_nopout));
 	hdr.opcode = ISCSI_OP_NOOP_OUT | ISCSI_OP_IMMEDIATE;
@@ -957,11 +963,12 @@  static int iscsi_send_nopout(struct iscsi_conn *conn, struct iscsi_nopin *rhdr)
 
 	task = __iscsi_conn_send_pdu(conn, (struct iscsi_hdr *)&hdr, NULL, 0);
 	if (!task) {
+		if (!rhdr)
+			WRITE_ONCE(conn->ping_task, NULL);
 		iscsi_conn_printk(KERN_ERR, conn, "Could not send nopout\n");
 		return -EIO;
 	} else if (!rhdr) {
 		/* only track our nops */
-		conn->ping_task = task;
 		conn->last_ping = jiffies;
 	}
 
@@ -984,7 +991,7 @@  static int iscsi_nop_out_rsp(struct iscsi_task *task,
 	struct iscsi_conn *conn = task->conn;
 	int rc = 0;
 
-	if (conn->ping_task != task) {
+	if (READ_ONCE(conn->ping_task) != task) {
 		/*
 		 * If this is not in response to one of our
 		 * nops then it must be from userspace.
@@ -1923,7 +1930,7 @@  static void iscsi_start_tx(struct iscsi_conn *conn)
  */
 static int iscsi_has_ping_timed_out(struct iscsi_conn *conn)
 {
-	if (conn->ping_task &&
+	if (READ_ONCE(conn->ping_task) &&
 	    time_before_eq(conn->last_recv + (conn->recv_timeout * HZ) +
 			   (conn->ping_timeout * HZ), jiffies))
 		return 1;
@@ -2058,7 +2065,7 @@  enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
 	 * Checking the transport already or nop from a cmd timeout still
 	 * running
 	 */
-	if (conn->ping_task) {
+	if (READ_ONCE(conn->ping_task)) {
 		task->have_checked_conn = true;
 		rc = BLK_EH_RESET_TIMER;
 		goto done;
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index c25fb86ffae9..b3bbd10eb3f0 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -132,6 +132,9 @@  struct iscsi_task {
 	void			*dd_data;	/* driver/transport data */
 };
 
+/* invalid scsi_task pointer */
+#define	INVALID_SCSI_TASK	(struct iscsi_task *)-1l
+
 static inline int iscsi_task_has_unsol_data(struct iscsi_task *task)
 {
 	return task->unsol_r2t.data_length > task->unsol_r2t.sent;