@@ -23,6 +23,7 @@
#include <linux/miscdevice.h>
#include <linux/module.h>
#include <linux/pci.h>
+#include <linux/aer.h>
#include <linux/poll.h>
#include <linux/sched.h>
#include <linux/slab.h>
@@ -129,6 +130,7 @@ extern atomic64_t event_counter;
#define MPI3MR_PREPARE_FOR_RESET_TIMEOUT 180
#define MPI3MR_RESET_ACK_TIMEOUT 30
#define MPI3MR_MUR_TIMEOUT 120
+#define MPI3MR_RESET_TIMEOUT 510
#define MPI3MR_WATCHDOG_INTERVAL 1000 /* in milli seconds */
@@ -1153,6 +1155,8 @@ struct scmd_priv {
* @trace_release_trigger_active: Trace trigger active flag
* @fw_release_trigger_active: Fw release trigger active flag
* @snapdump_trigger_active: Snapdump trigger active flag
+ * @pci_err_recovery: PCI error recovery in progress
+ * @block_on_pci_err: Block IO during PCI error recovery
*/
struct mpi3mr_ioc {
struct list_head list;
@@ -1353,6 +1357,8 @@ struct mpi3mr_ioc {
bool snapdump_trigger_active;
bool trace_release_trigger_active;
bool fw_release_trigger_active;
+ bool pci_err_recovery;
+ bool block_on_pci_err;
};
/**
@@ -5546,6 +5546,197 @@ mpi3mr_resume(struct device *dev)
return 0;
}
+/**
+ * mpi3mr_pcierr_error_detected - PCI error detected callback
+ * @pdev: PCI device instance
+ * @state: channel state
+ *
+ * This function is called by the PCI error recovery driver and
+ * based on the state passed the driver decides what actions to
+ * be recommended back to PCI driver.
+ *
+ * For all of the states if there is no valid mrioc or scsi host
+ * references in the PCI device then this function will return
+ * the result as disconnect.
+ *
+ * For normal state, this function will return the result as can
+ * recover.
+ *
+ * For frozen state, this function will block for any pending
+ * controller initialization or re-initialization to complete,
+ * stop any new interactions with the controller and return
+ * status as reset required.
+ *
+ * For permanent failure state, this function will mark the
+ * controller as unrecoverable and return status as disconnect.
+ *
+ * Returns: PCI_ERS_RESULT_NEED_RESET or CAN_RECOVER or
+ * DISCONNECT based on the controller state.
+ */
+static pci_ers_result_t
+mpi3mr_pcierr_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
+{
+ struct Scsi_Host *shost;
+ struct mpi3mr_ioc *mrioc;
+ unsigned int timeout = MPI3MR_RESET_TIMEOUT;
+
+ dev_info(&pdev->dev, "%s: callback invoked state(%d)\n", __func__,
+ state);
+
+ shost = pci_get_drvdata(pdev);
+ mrioc = shost_priv(shost);
+
+ switch (state) {
+ case pci_channel_io_normal:
+ return PCI_ERS_RESULT_CAN_RECOVER;
+ case pci_channel_io_frozen:
+ mrioc->pci_err_recovery = true;
+ mrioc->block_on_pci_err = true;
+ do {
+ if (mrioc->reset_in_progress || mrioc->is_driver_loading)
+ ssleep(1);
+ else
+ break;
+ } while (--timeout);
+
+ if (!timeout) {
+ mrioc->pci_err_recovery = true;
+ mrioc->block_on_pci_err = true;
+ mrioc->unrecoverable = 1;
+ mpi3mr_stop_watchdog(mrioc);
+ mpi3mr_flush_cmds_for_unrecovered_controller(mrioc);
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+
+ scsi_block_requests(mrioc->shost);
+ mpi3mr_stop_watchdog(mrioc);
+ mpi3mr_cleanup_resources(mrioc);
+ return PCI_ERS_RESULT_NEED_RESET;
+ case pci_channel_io_perm_failure:
+ mrioc->pci_err_recovery = true;
+ mrioc->block_on_pci_err = true;
+ mrioc->unrecoverable = 1;
+ mpi3mr_stop_watchdog(mrioc);
+ mpi3mr_flush_cmds_for_unrecovered_controller(mrioc);
+ return PCI_ERS_RESULT_DISCONNECT;
+ default:
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+}
+
+/**
+ * mpi3mr_pcierr_slot_reset - Post slot reset callback
+ * @pdev: PCI device instance
+ *
+ * This function is called by the PCI error recovery driver
+ * after a slot or link reset issued by it for the recovery, the
+ * driver is expected to bring back the controller and
+ * initialize it.
+ *
+ * This function restores PCI state and reinitializes controller
+ * resources and the controller, this blocks for any pending
+ * reset to complete.
+ *
+ * Returns: PCI_ERS_RESULT_DISCONNECT on failure or
+ * PCI_ERS_RESULT_RECOVERED
+ */
+static pci_ers_result_t mpi3mr_pcierr_slot_reset(struct pci_dev *pdev)
+{
+ struct Scsi_Host *shost;
+ struct mpi3mr_ioc *mrioc;
+ unsigned int timeout = MPI3MR_RESET_TIMEOUT;
+
+ dev_info(&pdev->dev, "%s: callback invoked\n", __func__);
+
+ shost = pci_get_drvdata(pdev);
+ mrioc = shost_priv(shost);
+
+ do {
+ if (mrioc->reset_in_progress)
+ ssleep(1);
+ else
+ break;
+ } while (--timeout);
+
+ if (!timeout)
+ goto out_failed;
+
+ pci_restore_state(pdev);
+
+ if (mpi3mr_setup_resources(mrioc)) {
+ ioc_err(mrioc, "setup resources failed\n");
+ goto out_failed;
+ }
+ mrioc->unrecoverable = 0;
+ mrioc->pci_err_recovery = false;
+
+ if (mpi3mr_soft_reset_handler(mrioc, MPI3MR_RESET_FROM_FIRMWARE, 0))
+ goto out_failed;
+
+ return PCI_ERS_RESULT_RECOVERED;
+
+out_failed:
+ mrioc->unrecoverable = 1;
+ mrioc->block_on_pci_err = false;
+ scsi_unblock_requests(shost);
+ mpi3mr_start_watchdog(mrioc);
+ return PCI_ERS_RESULT_DISCONNECT;
+}
+
+/**
+ * mpi3mr_pcierr_resume - PCI error recovery resume
+ * callback
+ * @pdev: PCI device instance
+ *
+ * This function enables all I/O and IOCTLs post reset issued as
+ * part of the PCI error recovery
+ *
+ * Return: Nothing.
+ */
+static void mpi3mr_pcierr_resume(struct pci_dev *pdev)
+{
+ struct Scsi_Host *shost;
+ struct mpi3mr_ioc *mrioc;
+
+ dev_info(&pdev->dev, "%s: callback invoked\n", __func__);
+
+ shost = pci_get_drvdata(pdev);
+ mrioc = shost_priv(shost);
+
+ if (mrioc->block_on_pci_err) {
+ mrioc->block_on_pci_err = false;
+ scsi_unblock_requests(shost);
+ mpi3mr_start_watchdog(mrioc);
+ }
+}
+
+/**
+ * mpi3mr_pcierr_mmio_enabled - PCI error recovery callback
+ * @pdev: PCI device instance
+ *
+ * This is called only if mpi3mr_pcierr_error_detected returns
+ * PCI_ERS_RESULT_CAN_RECOVER.
+ *
+ * Return: PCI_ERS_RESULT_DISCONNECT when the controller is
+ * unrecoverable or when the shost/mrioc reference cannot be
+ * found, else return PCI_ERS_RESULT_RECOVERED
+ */
+static pci_ers_result_t mpi3mr_pcierr_mmio_enabled(struct pci_dev *pdev)
+{
+ struct Scsi_Host *shost;
+ struct mpi3mr_ioc *mrioc;
+
+ dev_info(&pdev->dev, "%s: callback invoked\n", __func__);
+
+ shost = pci_get_drvdata(pdev);
+ mrioc = shost_priv(shost);
+
+ if (mrioc->unrecoverable)
+ return PCI_ERS_RESULT_DISCONNECT;
+
+ return PCI_ERS_RESULT_RECOVERED;
+}
+
static const struct pci_device_id mpi3mr_pci_id_table[] = {
{
PCI_DEVICE_SUB(MPI3_MFGPAGE_VENDORID_BROADCOM,
@@ -5563,6 +5754,13 @@ static const struct pci_device_id mpi3mr_pci_id_table[] = {
};
MODULE_DEVICE_TABLE(pci, mpi3mr_pci_id_table);
+static struct pci_error_handlers mpi3mr_err_handler = {
+ .error_detected = mpi3mr_pcierr_error_detected,
+ .mmio_enabled = mpi3mr_pcierr_mmio_enabled,
+ .slot_reset = mpi3mr_pcierr_slot_reset,
+ .resume = mpi3mr_pcierr_resume,
+};
+
static SIMPLE_DEV_PM_OPS(mpi3mr_pm_ops, mpi3mr_suspend, mpi3mr_resume);
static struct pci_driver mpi3mr_pci_driver = {
@@ -5571,6 +5769,7 @@ static struct pci_driver mpi3mr_pci_driver = {
.probe = mpi3mr_probe,
.remove = mpi3mr_remove,
.shutdown = mpi3mr_shutdown,
+ .err_handler = &mpi3mr_err_handler,
.driver.pm = &mpi3mr_pm_ops,
};