@@ -3601,16 +3601,14 @@ static void ena_fw_reset_device(struct work_struct *work)
{
struct ena_adapter *adapter =
container_of(work, struct ena_adapter, reset_task);
- struct pci_dev *pdev = adapter->pdev;
- if (unlikely(!test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
- dev_err(&pdev->dev,
- "device reset schedule while reset bit is off\n");
- return;
- }
rtnl_lock();
- ena_destroy_device(adapter, false);
- ena_restore_device(adapter);
+
+ if (likely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
+ ena_destroy_device(adapter, false);
+ ena_restore_device(adapter);
+ }
+
rtnl_unlock();
}
@@ -4389,9 +4387,6 @@ static void __ena_shutoff(struct pci_dev *pdev, bool shutdown)
netdev->rx_cpu_rmap = NULL;
}
#endif /* CONFIG_RFS_ACCEL */
- del_timer_sync(&adapter->timer_service);
-
- cancel_work_sync(&adapter->reset_task);
rtnl_lock(); /* lock released inside the below if-else block */
adapter->reset_reason = ENA_REGS_RESET_SHUTDOWN;
The reset work is scheduled by the timer routine whenever it detects that a device reset is required (e.g. when a keep_alive signal is missing). When releasing device resources in ena_destroy() the driver cancels the scheduling of the timer routine without destroying the reset work explicitly. This creates the following bug: The driver is suspended and the ena_suspend() function is called -> This function calls ena_destroy() to free the net device resources -> The driver waits for the timer routine to finish its execution and then cancels it, thus preventing from it to be called again. If, in its final execution, the timer routine schedules a reset, the reset routine might be called after the device resources are freed, which might cause a kernel panic. By changing the reset routine so that it cannot run simultaneously with the destruction routine, we allow the reset routine read the device's state accurately. This is achieved by checking whether ENA_FLAG_TRIGGER_RESET flag is set before resetting the device and making both the destruction function and the flag check are under rtnl lock. The ENA_FLAG_TRIGGER_RESET is cleared at the end of the destruction routine. Also surround the flag check with 'likely' because we expect that the reset routine would be called only when ENA_FLAG_TRIGGER_RESET flag is set. This patch also removes the destruction of the timer and reset services from ena_remove() since the timer is destroyed by the destruction routine and the reset work is handled by this patch. Fixes: 8c5c7abdeb2d ("net: ena: add power management ops to the ENA driver") Signed-off-by: Shay Agroskin <shayagr@amazon.com> --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-)