diff mbox series

[v3] target: core: remove from tmr_list at lun unlink

Message ID 20210915141719.1484-1-d.bogdanov@yadro.com
State New
Headers show
Series [v3] target: core: remove from tmr_list at lun unlink | expand

Commit Message

Dmitry Bogdanov Sept. 15, 2021, 2:17 p.m. UTC
Currently TMF commands are removed from de_device.dev_tmf_list at
the very end of se_cmd lifecycle. But se_lun unlinks from se_cmd
up on a command status (response) is queued in transport layer.
It means that LUN and backend device can be deleted meantime and at
the moment of repsonse completion a panic is occured:

target_tmr_work()
	cmd->se_tfo->queue_tm_rsp(cmd); // send abort_rsp to a wire
	transport_lun_remove_cmd(cmd) // unlink se_cmd from se_lun
- // - // - // -
<<<--- lun remove
<<<--- core backend device remove
- // - // - // -
qlt_handle_abts_completion()
  tfo->free_mcmd()
    transport_generic_free_cmd()
      target_put_sess_cmd()
        core_tmr_release_req() {
          if (dev) { // backend device, can not be null
            spin_lock_irqsave(&dev->se_tmr_lock, flags); //<<<--- CRASH

Call Trace:
NIP [c000000000e1683c] _raw_spin_lock_irqsave+0x2c/0xc0
LR [c00800000e433338] core_tmr_release_req+0x40/0xa0 [target_core_mod]
Call Trace:
(unreliable)
0x0
target_put_sess_cmd+0x2a0/0x370 [target_core_mod]
transport_generic_free_cmd+0x6c/0x1b0 [target_core_mod]
tcm_qla2xxx_complete_mcmd+0x28/0x50 [tcm_qla2xxx]
process_one_work+0x2c4/0x5c0
worker_thread+0x88/0x690

For FC protocol it is a race condition, but for iSCSI protocol it is
easyly reproduced by manual sending iSCSI commands:
- Send some SCSI sommand
- Send Abort of that command over iSCSI
- Remove LUN on target
- Send next iSCSI command to acknowledge the Abort_Response
- target panics

There is no sense to keep the command in tmr_list until response
completion, so move the removal from tmr_list from the response
completion to the response queueing when lun is unlinked.
Move the removal from state list too as it is a subject to the same
race condition.

Fixes: c66ac9db8d4a ("[SCSI] target: Add LIO target core v4.0.0-rc6")
Reviewed-by: Roman Bolshakov <r.bolshakov@yadro.com>
Signed-off-by: Dmitry Bogdanov <d.bogdanov@yadro.com>

---
v3:
 remove iscsi fix as not related to the issue
 avoid double removal from tmr_list
v2:
 fix stuck in tmr list in error case

The issue exists from the very begining.
I uploaded a scapy script that helps to reproduce the issue at
https://gist.github.com/logost/cb93df41dd2432454324449b390403c4
---
 drivers/target/target_core_tmr.c       | 10 +--------
 drivers/target/target_core_transport.c | 30 ++++++++++++++++++++------
 2 files changed, 24 insertions(+), 16 deletions(-)

Comments

Mike Christie Sept. 17, 2021, 4:57 p.m. UTC | #1
On 9/15/21 9:17 AM, Dmitry Bogdanov wrote:
> Currently TMF commands are removed from de_device.dev_tmf_list at

> the very end of se_cmd lifecycle. But se_lun unlinks from se_cmd

> up on a command status (response) is queued in transport layer.

> It means that LUN and backend device can be deleted meantime and at

> the moment of repsonse completion a panic is occured:

> 

> target_tmr_work()

> 	cmd->se_tfo->queue_tm_rsp(cmd); // send abort_rsp to a wire

> 	transport_lun_remove_cmd(cmd) // unlink se_cmd from se_lun

> - // - // - // -

> <<<--- lun remove

> <<<--- core backend device remove

> - // - // - // -

> qlt_handle_abts_completion()

>   tfo->free_mcmd()

>     transport_generic_free_cmd()

>       target_put_sess_cmd()

>         core_tmr_release_req() {

>           if (dev) { // backend device, can not be null

>             spin_lock_irqsave(&dev->se_tmr_lock, flags); //<<<--- CRASH

> 

> Call Trace:

> NIP [c000000000e1683c] _raw_spin_lock_irqsave+0x2c/0xc0

> LR [c00800000e433338] core_tmr_release_req+0x40/0xa0 [target_core_mod]

> Call Trace:

> (unreliable)

> 0x0

> target_put_sess_cmd+0x2a0/0x370 [target_core_mod]

> transport_generic_free_cmd+0x6c/0x1b0 [target_core_mod]

> tcm_qla2xxx_complete_mcmd+0x28/0x50 [tcm_qla2xxx]

> process_one_work+0x2c4/0x5c0

> worker_thread+0x88/0x690

> 

> For FC protocol it is a race condition, but for iSCSI protocol it is

> easyly reproduced by manual sending iSCSI commands:

> - Send some SCSI sommand

> - Send Abort of that command over iSCSI

> - Remove LUN on target

> - Send next iSCSI command to acknowledge the Abort_Response

> - target panics

> 

> There is no sense to keep the command in tmr_list until response

> completion, so move the removal from tmr_list from the response

> completion to the response queueing when lun is unlinked.

> Move the removal from state list too as it is a subject to the same

> race condition.

> 

> Fixes: c66ac9db8d4a ("[SCSI] target: Add LIO target core v4.0.0-rc6")

> Reviewed-by: Roman Bolshakov <r.bolshakov@yadro.com>

> Signed-off-by: Dmitry Bogdanov <d.bogdanov@yadro.com>

> 

> ---

> v3:

>  remove iscsi fix as not related to the issue

>  avoid double removal from tmr_list

> v2:

>  fix stuck in tmr list in error case

> 

> The issue exists from the very begining.

> I uploaded a scapy script that helps to reproduce the issue at

> https://gist.github.com/logost/cb93df41dd2432454324449b390403c4

> ---

>  drivers/target/target_core_tmr.c       | 10 +--------

>  drivers/target/target_core_transport.c | 30 ++++++++++++++++++++------

>  2 files changed, 24 insertions(+), 16 deletions(-)

> 

> diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c

> index e7fcbc09f9db..84ae2fe456ec 100644

> --- a/drivers/target/target_core_tmr.c

> +++ b/drivers/target/target_core_tmr.c

> @@ -50,15 +50,6 @@ EXPORT_SYMBOL(core_tmr_alloc_req);

>  

>  void core_tmr_release_req(struct se_tmr_req *tmr)

>  {

> -	struct se_device *dev = tmr->tmr_dev;

> -	unsigned long flags;

> -

> -	if (dev) {

> -		spin_lock_irqsave(&dev->se_tmr_lock, flags);

> -		list_del_init(&tmr->tmr_list);

> -		spin_unlock_irqrestore(&dev->se_tmr_lock, flags);

> -	}

> -

>  	kfree(tmr);

>  }

>  

> @@ -234,6 +225,7 @@ static void core_tmr_drain_tmr_list(

>  		}

>  

>  		list_move_tail(&tmr_p->tmr_list, &drain_tmr_list);

> +		tmr_p->tmr_dev = NULL;


Is this patch now adding a way to hit:

if (!tmr->tmr_dev)
	WARN_ON_ONCE(transport_lookup_tmr_lun(tmr->task_cmd) < 0);                      

in core_tmr_abort_task?

You have the abort and lun reset works running on different CPUs.
The lun reset hits the above code first and clears tmr_dev.
The abort then hits the tmr->tmr_dev check and tries to do
transport_lookup_tmr_lun.

For the case where the lun is not removed, it looks like
transport_lookup_tmr_lun will add the tmr to the dev_tmr_list
but it would also be on the drain_tmr_list above so we would
hit list corruption.

For the case where the lun is getting removed, percpu_ref_tryget_live
would fail in transport_lookup_tmr_lun and we hit the WARN_ON_ONCE.
I think though with your patch, we would be ok and don't want
the WARN_ON_ONCE, right? The lun reset would just wait for the
abort. When it completes the abort and reset complete as expected.
Dmitry Bogdanov Sept. 20, 2021, 4:40 p.m. UTC | #2
Hi Mike,

> > @@ -234,6 +225,7 @@ static void core_tmr_drain_tmr_list(

> >  		}

> >  

> >  		list_move_tail(&tmr_p->tmr_list, &drain_tmr_list);

> > +		tmr_p->tmr_dev = NULL;

>

> Is this patch now adding a way to hit:

>

> if (!tmr->tmr_dev)

>	WARN_ON_ONCE(transport_lookup_tmr_lun(tmr->task_cmd) < 0);                      

>

> in core_tmr_abort_task?

>

> You have the abort and lun reset works running on different CPUs.

> The lun reset hits the above code first and clears tmr_dev.

> The abort then hits the tmr->tmr_dev check and tries to do

> transport_lookup_tmr_lun.

>

> For the case where the lun is not removed, it looks like

> transport_lookup_tmr_lun will add the tmr to the dev_tmr_list

> but it would also be on the drain_tmr_list above so we would

> hit list corruption.


Yes, there is a such race. I think,  I can solve it by changing the order of
draining the tmr_list and state_list at LUN Reset to make the raced lines 
be under the same lock.

Especially SAM-5 describes(but does not require) aborting commands
before tmfs:
| When responding to a logical unit reset condition, the logical unit shall:
|	a) abort all commands as described in 5.6;
|	b) abort all copy operations (see SPC-4);
|	c) terminate all task management functions;


> For the case where the lun is getting removed, percpu_ref_tryget_live

> would fail in transport_lookup_tmr_lun and we hit the WARN_ON_ONCE.

> I think though with your patch, we would be ok and don't want

> the WARN_ON_ONCE, right? The lun reset would just wait for the

> abort. When it completes the abort and reset complete as expected.


I don’t understand the meaning of that transport_lookup_tmr_lun there.
Every TMF Abort has already executed transport_lookup_tmr_lun at the very
beginning of its handling. 
Eliminating the race will eliminate the impact of my patch on this case too. 

BR,
 Dmitry
Mike Christie Sept. 22, 2021, 4 p.m. UTC | #3
On 9/20/21 11:40 AM, Dmitriy Bogdanov wrote:
> Hi Mike,

> 

>>> @@ -234,6 +225,7 @@ static void core_tmr_drain_tmr_list(

>>>  		}

>>>  

>>>  		list_move_tail(&tmr_p->tmr_list, &drain_tmr_list);

>>> +		tmr_p->tmr_dev = NULL;

>>

>> Is this patch now adding a way to hit:

>>

>> if (!tmr->tmr_dev)

>> 	WARN_ON_ONCE(transport_lookup_tmr_lun(tmr->task_cmd) < 0);                      

>>

>> in core_tmr_abort_task?

>>

>> You have the abort and lun reset works running on different CPUs.

>> The lun reset hits the above code first and clears tmr_dev.

>> The abort then hits the tmr->tmr_dev check and tries to do

>> transport_lookup_tmr_lun.

>>

>> For the case where the lun is not removed, it looks like

>> transport_lookup_tmr_lun will add the tmr to the dev_tmr_list

>> but it would also be on the drain_tmr_list above so we would

>> hit list corruption.

> 

> Yes, there is a such race. I think,  I can solve it by changing the order of

> draining the tmr_list and state_list at LUN Reset to make the raced lines 

> be under the same lock.

> 

> Especially SAM-5 describes(but does not require) aborting commands

> before tmfs:

> | When responding to a logical unit reset condition, the logical unit shall:

> |	a) abort all commands as described in 5.6;

> |	b) abort all copy operations (see SPC-4);

> |	c) terminate all task management functions;

> 

> 

>> For the case where the lun is getting removed, percpu_ref_tryget_live

>> would fail in transport_lookup_tmr_lun and we hit the WARN_ON_ONCE.

>> I think though with your patch, we would be ok and don't want

>> the WARN_ON_ONCE, right? The lun reset would just wait for the

>> abort. When it completes the abort and reset complete as expected.

> 

> I don’t understand the meaning of that transport_lookup_tmr_lun there.

> Every TMF Abort has already executed transport_lookup_tmr_lun at the very

> beginning of its handling. 


Yeah, I think it's not needed. It came in with:

commit 2c9fa49e100f962af988f1c0529231bf14905cda
Author: Bart Van Assche <bvanassche@acm.org>
Date:   Tue Nov 27 15:52:03 2018 -0800

    scsi: target/core: Make ABORT and LUN RESET handling synchronous
gree. It looks like it was added in:

and in that patch I can't see it ever happening. We have 2 ways to submit
an abort tmr:

1. target_submit_tmr - Calls transport_lookup_tmr_lun then
transport_generic_handle_tmr.

2. iscsit_handle_task_mgt_cmd - Will call transport_lookup_tmr_lun
for every tmr except the iscsi specific TASK REASSIGN. TASK REASSING
is not passed to transport_generic_handle_tmr.

I don't see any places where tmr_dev is NULL after transport_lookup_tmr_lun
has set it and added it to the list.

So I think you can just kill it.
Dmitry Bogdanov Sept. 22, 2021, 4:43 p.m. UTC | #4
Hi Mike,

> Yeah, I think it's not needed. It came in with:

>

> commit 2c9fa49e100f962af988f1c0529231bf14905cda

> Author: Bart Van Assche <bvanassche@acm.org>

> Date:   Tue Nov 27 15:52:03 2018 -0800

>

>     scsi: target/core: Make ABORT and LUN RESET handling synchronous

> gree. It looks like it was added in:

>

> and in that patch I can't see it ever happening. We have 2 ways to submit

> an abort tmr:

>

> 1. target_submit_tmr - Calls transport_lookup_tmr_lun then

> transport_generic_handle_tmr.

>

> 2. iscsit_handle_task_mgt_cmd - Will call transport_lookup_tmr_lun

> for every tmr except the iscsi specific TASK REASSIGN. TASK REASSING

> is not passed to transport_generic_handle_tmr.

>

> I don't see any places where tmr_dev is NULL after transport_lookup_tmr_lun

> has set it and added it to the list.

>

> So I think you can just kill it.


Ok, then in the next revision of the patch I will just remove that transport_lookup_tmr_lun with WARN_ON.

BR,
 Dmitry
diff mbox series

Patch

diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index e7fcbc09f9db..84ae2fe456ec 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -50,15 +50,6 @@  EXPORT_SYMBOL(core_tmr_alloc_req);
 
 void core_tmr_release_req(struct se_tmr_req *tmr)
 {
-	struct se_device *dev = tmr->tmr_dev;
-	unsigned long flags;
-
-	if (dev) {
-		spin_lock_irqsave(&dev->se_tmr_lock, flags);
-		list_del_init(&tmr->tmr_list);
-		spin_unlock_irqrestore(&dev->se_tmr_lock, flags);
-	}
-
 	kfree(tmr);
 }
 
@@ -234,6 +225,7 @@  static void core_tmr_drain_tmr_list(
 		}
 
 		list_move_tail(&tmr_p->tmr_list, &drain_tmr_list);
+		tmr_p->tmr_dev = NULL;
 	}
 	spin_unlock_irqrestore(&dev->se_tmr_lock, flags);
 
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 14c6f2bb1b01..e60abd230e90 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -676,6 +676,21 @@  static void target_remove_from_state_list(struct se_cmd *cmd)
 	spin_unlock_irqrestore(&dev->queues[cmd->cpuid].lock, flags);
 }
 
+static void target_remove_from_tmr_list(struct se_cmd *cmd)
+{
+	struct se_device *dev = NULL;
+	unsigned long flags;
+
+	if (cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)
+		dev = cmd->se_tmr_req->tmr_dev;
+
+	if (dev) {
+		spin_lock_irqsave(&dev->se_tmr_lock, flags);
+		if (cmd->se_tmr_req->tmr_dev)
+			list_del_init(&cmd->se_tmr_req->tmr_list);
+		spin_unlock_irqrestore(&dev->se_tmr_lock, flags);
+	}
+}
 /*
  * This function is called by the target core after the target core has
  * finished processing a SCSI command or SCSI TMF. Both the regular command
@@ -687,13 +702,6 @@  static int transport_cmd_check_stop_to_fabric(struct se_cmd *cmd)
 {
 	unsigned long flags;
 
-	target_remove_from_state_list(cmd);
-
-	/*
-	 * Clear struct se_cmd->se_lun before the handoff to FE.
-	 */
-	cmd->se_lun = NULL;
-
 	spin_lock_irqsave(&cmd->t_state_lock, flags);
 	/*
 	 * Determine if frontend context caller is requesting the stopping of
@@ -728,8 +736,16 @@  static void transport_lun_remove_cmd(struct se_cmd *cmd)
 	if (!lun)
 		return;
 
+	target_remove_from_state_list(cmd);
+	target_remove_from_tmr_list(cmd);
+
 	if (cmpxchg(&cmd->lun_ref_active, true, false))
 		percpu_ref_put(&lun->lun_ref);
+
+	/*
+	 * Clear struct se_cmd->se_lun before the handoff to FE.
+	 */
+	cmd->se_lun = NULL;
 }
 
 static void target_complete_failure_work(struct work_struct *work)