diff mbox series

[06/16] qla2xxx: Fix scheduling while atomic

Message ID 20211224070712.17905-7-njavali@marvell.com
State Superseded
Headers show
Series qla2xxx misc bug fixes and features | expand

Commit Message

Nilesh Javali Dec. 24, 2021, 7:07 a.m. UTC
From: Quinn Tran <qutran@marvell.com>

QLA makes a call into midlayer (fc_remote_port_delete) which
can put the thread to sleep. The thread that originate the call
is in interrupt context. The combination of the 2 trigger a
crash. This patch schedule the call in non-interrupt context
where it is more safe.

kernel: BUG: scheduling while atomic: swapper/7/0/0x00010000
kernel: Call Trace:
kernel:  <IRQ>
kernel:  dump_stack+0x66/0x81
kernel:  __schedule_bug.cold.90+0x5/0x1d
kernel:  __schedule+0x7af/0x960
kernel:  schedule+0x28/0x80
kernel:  schedule_timeout+0x26d/0x3b0
kernel:  wait_for_completion+0xb4/0x140
kernel:  ? wake_up_q+0x70/0x70
kernel:  __wait_rcu_gp+0x12c/0x160
kernel:  ? sdev_evt_alloc+0xc0/0x180 [scsi_mod]
kernel:  synchronize_sched+0x6c/0x80
kernel:  ? call_rcu_bh+0x20/0x20
kernel:  ? __bpf_trace_rcu_invoke_callback+0x10/0x10
kernel:  sdev_evt_alloc+0xfd/0x180 [scsi_mod]
kernel:  starget_for_each_device+0x85/0xb0 [scsi_mod]
kernel:  ? scsi_init_io+0x360/0x3d0 [scsi_mod]
kernel:  scsi_init_io+0x388/0x3d0 [scsi_mod]
kernel:  device_for_each_child+0x54/0x90
kernel:  fc_remote_port_delete+0x70/0xe0 [scsi_transport_fc]
kernel:  qla2x00_schedule_rport_del+0x62/0xf0 [qla2xxx]
kernel:  qla2x00_mark_device_lost+0x9c/0xd0 [qla2xxx]
kernel:  qla24xx_handle_plogi_done_event+0x55f/0x570 [qla2xxx]
kernel:  qla2x00_async_login_sp_done+0xd2/0x100 [qla2xxx]
kernel:  qla24xx_logio_entry+0x13a/0x3c0 [qla2xxx]
kernel:  qla24xx_process_response_queue+0x306/0x400 [qla2xxx]
kernel:  qla24xx_msix_rsp_q+0x3f/0xb0 [qla2xxx]
kernel:  __handle_irq_event_percpu+0x40/0x180
kernel:  handle_irq_event_percpu+0x30/0x80
kernel:  handle_irq_event+0x36/0x60

Cc: stable@vger.kernel.org
Signed-off-by: Quinn Tran <qutran@marvell.com>
Signed-off-by: Nilesh Javali <njavali@marvell.com>
---
 drivers/scsi/qla2xxx/qla_init.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

Comments

Himanshu Madhani Jan. 3, 2022, 12:41 a.m. UTC | #1
> On Dec 23, 2021, at 11:07 PM, Nilesh Javali <njavali@marvell.com> wrote:
> 
> From: Quinn Tran <qutran@marvell.com>
> 
> QLA makes a call into midlayer (fc_remote_port_delete) which
> can put the thread to sleep. The thread that originate the call
> is in interrupt context. The combination of the 2 trigger a
> crash. This patch schedule the call in non-interrupt context
> where it is more safe.
> 
> kernel: BUG: scheduling while atomic: swapper/7/0/0x00010000
> kernel: Call Trace:
> kernel:  <IRQ>
> kernel:  dump_stack+0x66/0x81
> kernel:  __schedule_bug.cold.90+0x5/0x1d
> kernel:  __schedule+0x7af/0x960
> kernel:  schedule+0x28/0x80
> kernel:  schedule_timeout+0x26d/0x3b0
> kernel:  wait_for_completion+0xb4/0x140
> kernel:  ? wake_up_q+0x70/0x70
> kernel:  __wait_rcu_gp+0x12c/0x160
> kernel:  ? sdev_evt_alloc+0xc0/0x180 [scsi_mod]
> kernel:  synchronize_sched+0x6c/0x80
> kernel:  ? call_rcu_bh+0x20/0x20
> kernel:  ? __bpf_trace_rcu_invoke_callback+0x10/0x10
> kernel:  sdev_evt_alloc+0xfd/0x180 [scsi_mod]
> kernel:  starget_for_each_device+0x85/0xb0 [scsi_mod]
> kernel:  ? scsi_init_io+0x360/0x3d0 [scsi_mod]
> kernel:  scsi_init_io+0x388/0x3d0 [scsi_mod]
> kernel:  device_for_each_child+0x54/0x90
> kernel:  fc_remote_port_delete+0x70/0xe0 [scsi_transport_fc]
> kernel:  qla2x00_schedule_rport_del+0x62/0xf0 [qla2xxx]
> kernel:  qla2x00_mark_device_lost+0x9c/0xd0 [qla2xxx]
> kernel:  qla24xx_handle_plogi_done_event+0x55f/0x570 [qla2xxx]
> kernel:  qla2x00_async_login_sp_done+0xd2/0x100 [qla2xxx]
> kernel:  qla24xx_logio_entry+0x13a/0x3c0 [qla2xxx]
> kernel:  qla24xx_process_response_queue+0x306/0x400 [qla2xxx]
> kernel:  qla24xx_msix_rsp_q+0x3f/0xb0 [qla2xxx]
> kernel:  __handle_irq_event_percpu+0x40/0x180
> kernel:  handle_irq_event_percpu+0x30/0x80
> kernel:  handle_irq_event+0x36/0x60
> 
> Cc: stable@vger.kernel.org
> Signed-off-by: Quinn Tran <qutran@marvell.com>
> Signed-off-by: Nilesh Javali <njavali@marvell.com>
> ---
> drivers/scsi/qla2xxx/qla_init.c | 7 +------
> 1 file changed, 1 insertion(+), 6 deletions(-)
> 
> diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
> index e54c31296fab..ac25d2bfa90b 100644
> --- a/drivers/scsi/qla2xxx/qla_init.c
> +++ b/drivers/scsi/qla2xxx/qla_init.c
> @@ -2231,12 +2231,7 @@ qla24xx_handle_plogi_done_event(struct scsi_qla_host *vha, struct event_arg *ea)
> 		ql_dbg(ql_dbg_disc, vha, 0x20eb, "%s %d %8phC cmd error %x\n",
> 		    __func__, __LINE__, ea->fcport->port_name, ea->data[1]);
> 
> -		ea->fcport->flags &= ~FCF_ASYNC_SENT;
> -		qla2x00_set_fcport_disc_state(ea->fcport, DSC_LOGIN_FAILED);
> -		if (ea->data[1] & QLA_LOGIO_LOGIN_RETRIED)
> -			set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
> -		else
> -			qla2x00_mark_device_lost(vha, ea->fcport, 1);
> +		qlt_schedule_sess_for_deletion(ea->fcport);
> 		break;
> 	case MBS_LOOP_ID_USED:
> 		/* data[1] = IO PARAM 1 = nport ID  */
> -- 
> 2.23.1
> 

Looks Good.

Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>

--
Himanshu Madhani	 Oracle Linux Engineering
diff mbox series

Patch

diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index e54c31296fab..ac25d2bfa90b 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -2231,12 +2231,7 @@  qla24xx_handle_plogi_done_event(struct scsi_qla_host *vha, struct event_arg *ea)
 		ql_dbg(ql_dbg_disc, vha, 0x20eb, "%s %d %8phC cmd error %x\n",
 		    __func__, __LINE__, ea->fcport->port_name, ea->data[1]);
 
-		ea->fcport->flags &= ~FCF_ASYNC_SENT;
-		qla2x00_set_fcport_disc_state(ea->fcport, DSC_LOGIN_FAILED);
-		if (ea->data[1] & QLA_LOGIO_LOGIN_RETRIED)
-			set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
-		else
-			qla2x00_mark_device_lost(vha, ea->fcport, 1);
+		qlt_schedule_sess_for_deletion(ea->fcport);
 		break;
 	case MBS_LOOP_ID_USED:
 		/* data[1] = IO PARAM 1 = nport ID  */