diff mbox series

scsi: libiscsi: fix NOP race condition

Message ID 20200918210947.23800-1-leeman.duncan@gmail.com
State New
Headers show
Series scsi: libiscsi: fix NOP race condition | expand

Commit Message

Lee Duncan Sept. 18, 2020, 9:09 p.m. UTC
From: Lee Duncan <lduncan@suse.com>

iSCSI NOPs are sometimes "lost", mistakenly sent to the
user-land iscsid daemon instead of handled in the kernel,
as they should be, resulting in a message from the daemon like:

> iscsid: Got nop in, but kernel supports nop handling.

This can occur because of the new forward- and back-locks,
and the fact that an iSCSI NOP response can occur before
processing of the NOP send is complete. This can result
in "conn->ping_task" being NULL in iscsi_nop_out_rsp(),
when the pointer is actually in the process of being set.

To work around this, we add a new state to the "ping_task"
pointer. In addition to NULL (not assigned) and a pointer
(assigned), we add the state "being set", which is signaled
with an INVALID pointer (using "-1").

Signed-off-by: Lee Duncan <lduncan@suse.com>
---
 drivers/scsi/libiscsi.c | 11 ++++++++++-
 include/scsi/libiscsi.h |  3 +++
 2 files changed, 13 insertions(+), 1 deletion(-)

Comments

Mike Christie Nov. 4, 2020, 9:33 p.m. UTC | #1
On 9/18/20 4:09 PM, Lee Duncan wrote:
> From: Lee Duncan <lduncan@suse.com>
> 
> iSCSI NOPs are sometimes "lost", mistakenly sent to the
> user-land iscsid daemon instead of handled in the kernel,
> as they should be, resulting in a message from the daemon like:
> 
>> iscsid: Got nop in, but kernel supports nop handling.
> 
> This can occur because of the new forward- and back-locks,
> and the fact that an iSCSI NOP response can occur before
> processing of the NOP send is complete. This can result
> in "conn->ping_task" being NULL in iscsi_nop_out_rsp(),
> when the pointer is actually in the process of being set.
> 
> To work around this, we add a new state to the "ping_task"
> pointer. In addition to NULL (not assigned) and a pointer
> (assigned), we add the state "being set", which is signaled
> with an INVALID pointer (using "-1").
> 
> Signed-off-by: Lee Duncan <lduncan@suse.com>
> ---
>   drivers/scsi/libiscsi.c | 11 ++++++++++-
>   include/scsi/libiscsi.h |  3 +++
>   2 files changed, 13 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
> index 1e9c3171fa9f..5eb064787ee2 100644
> --- a/drivers/scsi/libiscsi.c
> +++ b/drivers/scsi/libiscsi.c
> @@ -738,6 +738,9 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
>   						   task->conn->session->age);
>   	}
>   
> +	if (unlikely(READ_ONCE(conn->ping_task) == INVALID_SCSI_TASK))
> +		WRITE_ONCE(conn->ping_task, task);
> +
>   	if (!ihost->workq) {
>   		if (iscsi_prep_mgmt_task(conn, task))
>   			goto free_task;
> @@ -941,6 +944,11 @@ static int iscsi_send_nopout(struct iscsi_conn *conn, struct iscsi_nopin *rhdr)
>           struct iscsi_nopout hdr;
>   	struct iscsi_task *task;
>   
> +	if (!rhdr) {
> +		if (READ_ONCE(conn->ping_task))
> +			return -EINVAL;
> +		WRITE_ONCE(conn->ping_task, INVALID_SCSI_TASK);
> +	}
>   	if (!rhdr && conn->ping_task)
>   		return -EINVAL;
>   
> @@ -957,11 +965,12 @@ static int iscsi_send_nopout(struct iscsi_conn *conn, struct iscsi_nopin *rhdr)
>   
>   	task = __iscsi_conn_send_pdu(conn, (struct iscsi_hdr *)&hdr, NULL, 0);
>   	if (!task) {
> +		if (!rhdr)
> +			WRITE_ONCE(conn->ping_task, NULL);

I don't think you need this. If __iscsi_conn_send_pdu returns NULL, it 
will have done __iscsi_put_task and done this already.

>   		iscsi_conn_printk(KERN_ERR, conn, "Could not send nopout\n");
>   		return -EIO;
>   	} else if (!rhdr) {
>   		/* only track our nops */
> -		conn->ping_task = task;
>   		conn->last_ping = jiffies;
>   	}

Why in the send path do we always use the READ_ONCE/WRITE_ONCE, but in 
the completion path like in iscsi_complete_task we don't.
Mike Christie Nov. 4, 2020, 9:37 p.m. UTC | #2
On 11/4/20 3:33 PM, Mike Christie wrote:
> On 9/18/20 4:09 PM, Lee Duncan wrote:

>> From: Lee Duncan <lduncan@suse.com>

>>

>> iSCSI NOPs are sometimes "lost", mistakenly sent to the

>> user-land iscsid daemon instead of handled in the kernel,

>> as they should be, resulting in a message from the daemon like:

>>

>>> iscsid: Got nop in, but kernel supports nop handling.

>>

>> This can occur because of the new forward- and back-locks,

>> and the fact that an iSCSI NOP response can occur before

>> processing of the NOP send is complete. This can result

>> in "conn->ping_task" being NULL in iscsi_nop_out_rsp(),

>> when the pointer is actually in the process of being set.

>>

>> To work around this, we add a new state to the "ping_task"

>> pointer. In addition to NULL (not assigned) and a pointer

>> (assigned), we add the state "being set", which is signaled

>> with an INVALID pointer (using "-1").

>>

>> Signed-off-by: Lee Duncan <lduncan@suse.com>

>> ---

>>   drivers/scsi/libiscsi.c | 11 ++++++++++-

>>   include/scsi/libiscsi.h |  3 +++

>>   2 files changed, 13 insertions(+), 1 deletion(-)

>>

>> diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c

>> index 1e9c3171fa9f..5eb064787ee2 100644

>> --- a/drivers/scsi/libiscsi.c

>> +++ b/drivers/scsi/libiscsi.c

>> @@ -738,6 +738,9 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, 

>> struct iscsi_hdr *hdr,

>>                              task->conn->session->age);

>>       }

>> +    if (unlikely(READ_ONCE(conn->ping_task) == INVALID_SCSI_TASK))

>> +        WRITE_ONCE(conn->ping_task, task);

>> +

>>       if (!ihost->workq) {

>>           if (iscsi_prep_mgmt_task(conn, task))

>>               goto free_task;

>> @@ -941,6 +944,11 @@ static int iscsi_send_nopout(struct iscsi_conn 

>> *conn, struct iscsi_nopin *rhdr)

>>           struct iscsi_nopout hdr;

>>       struct iscsi_task *task;

>> +    if (!rhdr) {

>> +        if (READ_ONCE(conn->ping_task))

>> +            return -EINVAL;

>> +        WRITE_ONCE(conn->ping_task, INVALID_SCSI_TASK);

>> +    }

>>       if (!rhdr && conn->ping_task)

>>           return -EINVAL;

>> @@ -957,11 +965,12 @@ static int iscsi_send_nopout(struct iscsi_conn 

>> *conn, struct iscsi_nopin *rhdr)

>>       task = __iscsi_conn_send_pdu(conn, (struct iscsi_hdr *)&hdr, 

>> NULL, 0);

>>       if (!task) {

>> +        if (!rhdr)

>> +            WRITE_ONCE(conn->ping_task, NULL);

> 

> I don't think you need this. If __iscsi_conn_send_pdu returns NULL, it 

> will have done __iscsi_put_task and done this already.


Ignore that. That is iscsi_complete_task that would do it.

> 

>>           iscsi_conn_printk(KERN_ERR, conn, "Could not send nopout\n");

>>           return -EIO;

>>       } else if (!rhdr) {

>>           /* only track our nops */

>> -        conn->ping_task = task;

>>           conn->last_ping = jiffies;

>>       }

> 

> Why in the send path do we always use the READ_ONCE/WRITE_ONCE, but in 

> the completion path like in iscsi_complete_task we don't.
Lee Duncan Nov. 5, 2020, 6:30 p.m. UTC | #3
On 11/4/20 1:33 PM, Mike Christie wrote:
> On 9/18/20 4:09 PM, Lee Duncan wrote:
>> From: Lee Duncan <lduncan@suse.com>
>>
>> iSCSI NOPs are sometimes "lost", mistakenly sent to the
>> user-land iscsid daemon instead of handled in the kernel,
>> as they should be, resulting in a message from the daemon like:
>>
>>> iscsid: Got nop in, but kernel supports nop handling.
>>
>> This can occur because of the new forward- and back-locks,
>> and the fact that an iSCSI NOP response can occur before
>> processing of the NOP send is complete. This can result
>> in "conn->ping_task" being NULL in iscsi_nop_out_rsp(),
>> when the pointer is actually in the process of being set.
>>
>> To work around this, we add a new state to the "ping_task"
>> pointer. In addition to NULL (not assigned) and a pointer
>> (assigned), we add the state "being set", which is signaled
>> with an INVALID pointer (using "-1").
>>
>> Signed-off-by: Lee Duncan <lduncan@suse.com>
>> ---
>>   drivers/scsi/libiscsi.c | 11 ++++++++++-
>>   include/scsi/libiscsi.h |  3 +++
>>   2 files changed, 13 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
>> index 1e9c3171fa9f..5eb064787ee2 100644
>> --- a/drivers/scsi/libiscsi.c
>> +++ b/drivers/scsi/libiscsi.c
>> @@ -738,6 +738,9 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn,
>> struct iscsi_hdr *hdr,
>>                              task->conn->session->age);
>>       }
>>   +    if (unlikely(READ_ONCE(conn->ping_task) == INVALID_SCSI_TASK))
>> +        WRITE_ONCE(conn->ping_task, task);
>> +
>>       if (!ihost->workq) {
>>           if (iscsi_prep_mgmt_task(conn, task))
>>               goto free_task;
>> @@ -941,6 +944,11 @@ static int iscsi_send_nopout(struct iscsi_conn
>> *conn, struct iscsi_nopin *rhdr)
>>           struct iscsi_nopout hdr;
>>       struct iscsi_task *task;
>>   +    if (!rhdr) {
>> +        if (READ_ONCE(conn->ping_task))
>> +            return -EINVAL;
>> +        WRITE_ONCE(conn->ping_task, INVALID_SCSI_TASK);
>> +    }
>>       if (!rhdr && conn->ping_task)
>>           return -EINVAL;
>>   @@ -957,11 +965,12 @@ static int iscsi_send_nopout(struct iscsi_conn
>> *conn, struct iscsi_nopin *rhdr)
>>         task = __iscsi_conn_send_pdu(conn, (struct iscsi_hdr *)&hdr,
>> NULL, 0);
>>       if (!task) {
>> +        if (!rhdr)
>> +            WRITE_ONCE(conn->ping_task, NULL);
> 
> I don't think you need this. If __iscsi_conn_send_pdu returns NULL, it
> will have done __iscsi_put_task and done this already.

Not an issue, as you already replied.

> 
>>           iscsi_conn_printk(KERN_ERR, conn, "Could not send nopout\n");
>>           return -EIO;
>>       } else if (!rhdr) {
>>           /* only track our nops */
>> -        conn->ping_task = task;
>>           conn->last_ping = jiffies;
>>       }
> 
> Why in the send path do we always use the READ_ONCE/WRITE_ONCE, but in
> the completion path like in iscsi_complete_task we don't.
> 

The answer is that I was only modifying the code that needed changing
for this bug. My first pass did not use READ_ONCE() or WRITE_ONCE(), but
Hannes suggested the change.

Now that I think about it more, the memory barrier stuff would make
sense only if all the access to that field are protected.

I will resubmit V2 of the patch.
diff mbox series

Patch

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 1e9c3171fa9f..5eb064787ee2 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -738,6 +738,9 @@  __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 						   task->conn->session->age);
 	}
 
+	if (unlikely(READ_ONCE(conn->ping_task) == INVALID_SCSI_TASK))
+		WRITE_ONCE(conn->ping_task, task);
+
 	if (!ihost->workq) {
 		if (iscsi_prep_mgmt_task(conn, task))
 			goto free_task;
@@ -941,6 +944,11 @@  static int iscsi_send_nopout(struct iscsi_conn *conn, struct iscsi_nopin *rhdr)
         struct iscsi_nopout hdr;
 	struct iscsi_task *task;
 
+	if (!rhdr) {
+		if (READ_ONCE(conn->ping_task))
+			return -EINVAL;
+		WRITE_ONCE(conn->ping_task, INVALID_SCSI_TASK);
+	}
 	if (!rhdr && conn->ping_task)
 		return -EINVAL;
 
@@ -957,11 +965,12 @@  static int iscsi_send_nopout(struct iscsi_conn *conn, struct iscsi_nopin *rhdr)
 
 	task = __iscsi_conn_send_pdu(conn, (struct iscsi_hdr *)&hdr, NULL, 0);
 	if (!task) {
+		if (!rhdr)
+			WRITE_ONCE(conn->ping_task, NULL);
 		iscsi_conn_printk(KERN_ERR, conn, "Could not send nopout\n");
 		return -EIO;
 	} else if (!rhdr) {
 		/* only track our nops */
-		conn->ping_task = task;
 		conn->last_ping = jiffies;
 	}
 
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index c25fb86ffae9..b3bbd10eb3f0 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -132,6 +132,9 @@  struct iscsi_task {
 	void			*dd_data;	/* driver/transport data */
 };
 
+/* invalid scsi_task pointer */
+#define	INVALID_SCSI_TASK	(struct iscsi_task *)-1l
+
 static inline int iscsi_task_has_unsol_data(struct iscsi_task *task)
 {
 	return task->unsol_r2t.data_length > task->unsol_r2t.sent;