@@ -1644,6 +1644,12 @@ lpfc_sli4_pdev_status_reg_wait(struct lpfc_hba *phba)
!bf_get(lpfc_sliport_status_err, &portstat_reg))
return -EPERM;
+ /* There is no point to wait if the port is in an unrecoverable
+ * state.
+ */
+ if (lpfc_sli4_unrecoverable_port(&portstat_reg))
+ return -EIO;
+
/* wait for the SLI port firmware ready after firmware reset */
for (i = 0; i < LPFC_FW_RESET_MAXIMUM_WAIT_10MS_CNT; i++) {
msleep(10);
@@ -2147,7 +2147,7 @@ lpfc_handle_eratt_s4(struct lpfc_hba *phba)
/* fall through for not able to recover */
lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
"3152 Unrecoverable error\n");
- phba->link_state = LPFC_HBA_ERROR;
+ lpfc_sli4_offline_eratt(phba);
break;
case LPFC_SLI_INTF_IF_TYPE_1:
default:
@@ -9566,8 +9566,7 @@ lpfc_sli4_post_status_check(struct lpfc_hba *phba)
/* Final checks. The port status should be clean. */
if (lpfc_readl(phba->sli4_hba.u.if_type2.STATUSregaddr,
®_data.word0) ||
- (bf_get(lpfc_sliport_status_err, ®_data) &&
- !bf_get(lpfc_sliport_status_rn, ®_data))) {
+ lpfc_sli4_unrecoverable_port(®_data)) {
phba->work_status[0] =
readl(phba->sli4_hba.u.if_type2.
ERR1regaddr);
@@ -2265,6 +2265,7 @@ lpfc_nvme_lport_unreg_wait(struct lpfc_vport *vport,
}
if (!vport->localport ||
test_bit(HBA_PCI_ERR, &vport->phba->bit_flags) ||
+ phba->link_state == LPFC_HBA_ERROR ||
vport->load_flag & FC_UNLOADING)
return;
@@ -2630,7 +2631,8 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
* return values is ignored. The upcall is a courtesy to the
* transport.
*/
- if (vport->load_flag & FC_UNLOADING)
+ if (vport->load_flag & FC_UNLOADING ||
+ unlikely(vport->phba->link_state == LPFC_HBA_ERROR))
(void)nvme_fc_set_remoteport_devloss(remoteport, 0);
ret = nvme_fc_unregister_remoteport(remoteport);
@@ -9895,7 +9895,8 @@ lpfc_sli4_async_mbox_unblock(struct lpfc_hba *phba)
* port for twice the regular mailbox command timeout value.
*
* 0 - no timeout on waiting for bootstrap mailbox register ready.
- * MBXERR_ERROR - wait for bootstrap mailbox register timed out.
+ * MBXERR_ERROR - wait for bootstrap mailbox register timed out or port
+ * is in an unrecoverable state.
**/
static int
lpfc_sli4_wait_bmbx_ready(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
@@ -9903,6 +9904,23 @@ lpfc_sli4_wait_bmbx_ready(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
uint32_t db_ready;
unsigned long timeout;
struct lpfc_register bmbx_reg;
+ struct lpfc_register portstat_reg = {-1};
+
+ /* Sanity check - there is no point to wait if the port is in an
+ * unrecoverable state.
+ */
+ if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) >=
+ LPFC_SLI_INTF_IF_TYPE_2) {
+ if (lpfc_readl(phba->sli4_hba.u.if_type2.STATUSregaddr,
+ &portstat_reg.word0) ||
+ lpfc_sli4_unrecoverable_port(&portstat_reg)) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ "3858 Skipping bmbx ready because "
+ "Port Status x%x\n",
+ portstat_reg.word0);
+ return MBXERR_ERROR;
+ }
+ }
timeout = msecs_to_jiffies(lpfc_mbox_tmo_val(phba, mboxq)
* 1000) + jiffies;
@@ -1180,3 +1180,22 @@ static inline void *lpfc_sli4_qe(struct lpfc_queue *q, uint16_t idx)
return q->q_pgs[idx / q->entry_cnt_per_pg] +
(q->entry_size * (idx % q->entry_cnt_per_pg));
}
+
+/**
+ * lpfc_sli4_unrecoverable_port - Check ERR and RN bits in portstat_reg
+ * @portstat_reg: portstat_reg pointer containing portstat_reg contents
+ *
+ * Description:
+ * Use only for SLI4 interface type-2 or later. If ERR is set && RN is 0, then
+ * port is deemed unrecoverable.
+ *
+ * Returns:
+ * true - ERR && !RN
+ * false - otherwise
+ */
+static inline bool
+lpfc_sli4_unrecoverable_port(struct lpfc_register *portstat_reg)
+{
+ return bf_get(lpfc_sliport_status_err, portstat_reg) &&
+ !bf_get(lpfc_sliport_status_rn, portstat_reg);
+}
During tolerance tests that force an HBA to become unresponsive, rmmod hangs resulting in the inability to remove the driver. The lpfc_pci_remove_one_s4 routine attempts to submit a clean up mailbox command via the lpfc_sli4_post_sync_mbox routine, but ends up waiting forever for a mailbox register to set its ready bit. Because the HBA is in an unrecoverable and unresponsive state, the ready bit will never be set. Create a new routine called lpfc_sli4_unrecoverable_port, which checks a port status register's error notification bits. Use the lpfc_sli4_unrecoverable_port routine in ready bit check routines to early return error if port is deemed unrecoverable. Also, when the lpfc_handle_eratt_s4 handler detects an unrecoverable state, call the lpfc_sli4_offline_eratt routine to kick off flushing outstanding I/O. Signed-off-by: Justin Tee <justin.tee@broadcom.com> --- drivers/scsi/lpfc/lpfc_attr.c | 6 ++++++ drivers/scsi/lpfc/lpfc_init.c | 5 ++--- drivers/scsi/lpfc/lpfc_nvme.c | 4 +++- drivers/scsi/lpfc/lpfc_sli.c | 20 +++++++++++++++++++- drivers/scsi/lpfc/lpfc_sli4.h | 19 +++++++++++++++++++ 5 files changed, 49 insertions(+), 5 deletions(-)