@@ -1077,6 +1077,12 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset,
goto out_err;
}
+ rc = hdev->asic_funcs->nic_init(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to init NIC driver\n");
+ goto out_err;
+ }
+
hl_set_max_power(hdev);
} else {
rc = hdev->asic_funcs->soft_reset_late_init(hdev);
@@ -1312,6 +1318,13 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
goto out_disabled;
}
+ rc = hdev->asic_funcs->nic_init(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to init NIC driver\n");
+ rc = 0;
+ goto out_disabled;
+ }
+
/*
* Expose devices and sysfs nodes to user.
* From here there is no need to add char devices and create sysfs nodes
@@ -1463,6 +1476,11 @@ void hl_device_fini(struct hl_device *hdev)
hl_cb_pool_fini(hdev);
+ /* the NIC uses the kernel context for MMU mappings, therefore must be
+ * cleaned before it
+ */
+ hdev->asic_funcs->nic_fini(hdev);
+
/* Release kernel context */
if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1))
dev_err(hdev->dev, "kernel ctx is still alive\n");
@@ -680,6 +680,10 @@ struct hl_info_mac_addr;
* then the timeout is the default timeout for the specific
* ASIC
* @get_hw_state: retrieve the H/W state
+ * @nic_init: init the NIC H/W and I/F. This should be called in the final satge
+ * of the init flow, as we must not have anything that might fail
+ * during its initialization after the NIC init.
+ * @nic_fini: perform NIC cleanup.
* @nic_control: Perform NIC related operations.
* @nic_cq_mmap: map the NIC CQ buffer.
* @pci_bars_map: Map PCI BARs.
@@ -786,6 +790,8 @@ struct hl_asic_funcs {
int (*send_cpu_message)(struct hl_device *hdev, u32 *msg,
u16 len, u32 timeout, long *result);
enum hl_device_hw_state (*get_hw_state)(struct hl_device *hdev);
+ int (*nic_init)(struct hl_device *hdev);
+ void (*nic_fini)(struct hl_device *hdev);
int (*nic_control)(struct hl_device *hdev, u32 op, void *input,
void *output);
int (*nic_cq_mmap)(struct hl_device *hdev, struct vm_area_struct *vma);
@@ -247,6 +247,7 @@ static void set_driver_behavior_per_device(struct hl_device *hdev)
hdev->bmc_enable = 1;
hdev->hard_reset_on_fw_events = 1;
hdev->card_type = cpucp_card_type_pci;
+ hdev->nic_ports_mask = 0x3FF;
hdev->nic_ports_ext_mask = 0x3FF;
hdev->nic_auto_neg_mask = 0x3FF;
hdev->nic_load_fw = 0;
@@ -230,6 +230,7 @@ int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
lower_32_bits(pci_region->addr));
rc |= hl_pci_iatu_write(hdev, offset + 0x18,
upper_32_bits(pci_region->addr));
+ /* Set bar type as memory */
rc |= hl_pci_iatu_write(hdev, offset + 0x0, 0);
/* Enable + bar/address match + match enable + bar number */
@@ -882,6 +882,27 @@ static void gaudi_late_fini(struct hl_device *hdev)
hdev->hl_chip_info->info = NULL;
}
+static int gaudi_nic_init(struct hl_device *hdev)
+{
+ /*
+ * In init flow we initialize the NIC ports from scratch. In hard reset
+ * flow, we get here after the NIC ports were halted, hence we only
+ * need to reopen them.
+ */
+ if (atomic_read(&hdev->in_reset)) {
+ gaudi_nic_ports_reopen(hdev);
+ return 0;
+ }
+
+ return gaudi_nic_ports_init(hdev);
+}
+
+static void gaudi_nic_fini(struct hl_device *hdev)
+{
+ /* must be called after MSI was disabled */
+ gaudi_nic_ports_fini(hdev);
+}
+
static void gaudi_nic_handle_rx(struct gaudi_nic_device *gaudi_nic)
{
/* at this point, interrupts were disabled by the H/W */
@@ -7482,6 +7503,8 @@ static const struct hl_asic_funcs gaudi_funcs = {
.get_eeprom_data = gaudi_get_eeprom_data,
.send_cpu_message = gaudi_send_cpu_message,
.get_hw_state = gaudi_get_hw_state,
+ .nic_init = gaudi_nic_init,
+ .nic_fini = gaudi_nic_fini,
.nic_control = gaudi_nic_control,
.nic_cq_mmap = gaudi_nic_cq_mmap,
.pci_bars_map = gaudi_pci_bars_map,
@@ -5265,6 +5265,16 @@ static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev)
return RREG32(mmHW_STATE);
}
+static int goya_nic_init(struct hl_device *hdev)
+{
+ return 0;
+}
+
+static void goya_nic_fini(struct hl_device *hdev)
+{
+
+}
+
static int goya_nic_control(struct hl_device *hdev, u32 op, void *input,
void *output)
{
@@ -5405,6 +5415,8 @@ static const struct hl_asic_funcs goya_funcs = {
.get_eeprom_data = goya_get_eeprom_data,
.send_cpu_message = goya_send_cpu_message,
.get_hw_state = goya_get_hw_state,
+ .nic_init = goya_nic_init,
+ .nic_fini = goya_nic_fini,
.nic_control = goya_nic_control,
.nic_cq_mmap = goya_nic_mmap,
.pci_bars_map = goya_pci_bars_map,