@@ -1101,6 +1101,7 @@ struct scmd_priv {
* @ts_update_interval: Timestamp update interval
* @reset_in_progress: Reset in progress flag
* @unrecoverable: Controller unrecoverable flag
+ * @io_admin_reset_sync: Manage state of I/O ops during an admin reset process
* @prev_reset_result: Result of previous reset
* @reset_mutex: Controller reset mutex
* @reset_waitq: Controller reset wait queue
@@ -1293,6 +1294,7 @@ struct mpi3mr_ioc {
u16 ts_update_interval;
u8 reset_in_progress;
u8 unrecoverable;
+ u8 io_admin_reset_sync;
int prev_reset_result;
struct mutex reset_mutex;
wait_queue_head_t reset_waitq;
@@ -17,7 +17,7 @@ static void mpi3mr_process_factsdata(struct mpi3mr_ioc *mrioc,
struct mpi3_ioc_facts_data *facts_data);
static void mpi3mr_pel_wait_complete(struct mpi3mr_ioc *mrioc,
struct mpi3mr_drv_cmd *drv_cmd);
-
+static int mpi3mr_check_op_admin_proc(struct mpi3mr_ioc *mrioc);
static int poll_queues;
module_param(poll_queues, int, 0444);
MODULE_PARM_DESC(poll_queues, "Number of queues for io_uring poll mode. (Range 1 - 126)");
@@ -459,7 +459,7 @@ int mpi3mr_process_admin_reply_q(struct mpi3mr_ioc *mrioc)
}
do {
- if (mrioc->unrecoverable)
+ if (mrioc->unrecoverable || mrioc->io_admin_reset_sync)
break;
mrioc->admin_req_ci = le16_to_cpu(reply_desc->request_queue_ci);
@@ -554,7 +554,7 @@ int mpi3mr_process_op_reply_q(struct mpi3mr_ioc *mrioc,
}
do {
- if (mrioc->unrecoverable)
+ if (mrioc->unrecoverable || mrioc->io_admin_reset_sync)
break;
req_q_idx = le16_to_cpu(reply_desc->request_queue_id) - 1;
@@ -4411,6 +4411,7 @@ int mpi3mr_reinit_ioc(struct mpi3mr_ioc *mrioc, u8 is_resume)
goto out_failed_noretry;
}
+ mrioc->io_admin_reset_sync = 0;
if (is_resume || mrioc->block_on_pci_err) {
dprint_reset(mrioc, "setting up single ISR\n");
retval = mpi3mr_setup_isr(mrioc, 1);
@@ -5289,6 +5290,55 @@ void mpi3mr_pel_get_seqnum_complete(struct mpi3mr_ioc *mrioc,
drv_cmd->retry_count = 0;
}
+/**
+ * mpi3mr_check_op_admin_proc -
+ * @mrioc: Adapter instance reference
+ *
+ * It checks if any of the operation reply queues
+ * or the admin reply queue are currently in use.
+ * If any queue is in use, the function waits for
+ * a maximum of 10 seconds for them to become available.
+ *
+ * Return: 0 on success, non-zero on failure.
+ */
+static int mpi3mr_check_op_admin_proc(struct mpi3mr_ioc *mrioc)
+{
+
+ u16 timeout = 10 * 10;
+ u16 elapsed_time = 0;
+ bool op_admin_in_use = false;
+
+ do {
+ op_admin_in_use = false;
+
+ /* Check admin_reply queue first to exit early */
+ if (atomic_read(&mrioc->admin_reply_q_in_use) == 1)
+ op_admin_in_use = true;
+ else {
+ /* Check op_reply queues */
+ int i;
+
+ for (i = 0; i < mrioc->num_queues; i++) {
+ if (atomic_read(&mrioc->op_reply_qinfo[i].in_use) == 1) {
+ op_admin_in_use = true;
+ break;
+ }
+ }
+ }
+
+ if (!op_admin_in_use)
+ break;
+
+ msleep(100);
+
+ } while (++elapsed_time < timeout);
+
+ if (op_admin_in_use)
+ return 1;
+
+ return 0;
+}
+
/**
* mpi3mr_soft_reset_handler - Reset the controller
* @mrioc: Adapter instance reference
@@ -5369,6 +5419,7 @@ int mpi3mr_soft_reset_handler(struct mpi3mr_ioc *mrioc,
mpi3mr_wait_for_host_io(mrioc, MPI3MR_RESET_HOST_IOWAIT_TIMEOUT);
mpi3mr_ioc_disable_intr(mrioc);
+ mrioc->io_admin_reset_sync = 1;
if (snapdump) {
mpi3mr_set_diagsave(mrioc);
@@ -5396,6 +5447,16 @@ int mpi3mr_soft_reset_handler(struct mpi3mr_ioc *mrioc,
ioc_err(mrioc, "Failed to issue soft reset to the ioc\n");
goto out;
}
+
+ retval = mpi3mr_check_op_admin_proc(mrioc);
+ if (retval) {
+ ioc_err(mrioc, "soft reset is failed due to an Admin or I/O queue polling\n"
+ "thread was still processing replies even after 10 seconds\n"
+ "timeout, marking the controller as unrecoverable\n");
+
+ goto out;
+ }
+
if (mrioc->num_io_throttle_group !=
mrioc->facts.max_io_throttle_group) {
ioc_err(mrioc,