@@ -919,6 +919,8 @@ static int pci_pm_resume_noirq(struct device *dev)
struct pci_dev *pci_dev = to_pci_dev(dev);
struct device_driver *drv = dev->driver;
int error = 0;
+ pci_power_t prev_state = pci_dev->current_state;
+ bool skip_bus_pm = pci_dev->skip_bus_pm;
if (dev_pm_may_skip_resume(dev))
return 0;
@@ -937,12 +939,15 @@ static int pci_pm_resume_noirq(struct device *dev)
* configuration here and attempting to put them into D0 again is
* pointless, so avoid doing that.
*/
- if (!(pci_dev->skip_bus_pm && pm_suspend_no_platform()))
+ if (!(skip_bus_pm && pm_suspend_no_platform()))
pci_pm_default_resume_early(pci_dev);
pci_fixup_device(pci_fixup_resume_early, pci_dev);
pcie_pme_root_status_cleanup(pci_dev);
+ if (!skip_bus_pm && prev_state == PCI_D3cold)
+ pci_bridge_wait_for_secondary_bus(pci_dev);
+
if (pci_has_legacy_pm_support(pci_dev))
return pci_legacy_resume_early(dev);
@@ -1333,6 +1338,7 @@ static int pci_pm_runtime_resume(struct device *dev)
int rc = 0;
struct pci_dev *pci_dev = to_pci_dev(dev);
const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+ pci_power_t prev_state = pci_dev->current_state;
/*
* Restoring config space is necessary even if the device is not bound
@@ -1348,6 +1354,9 @@ static int pci_pm_runtime_resume(struct device *dev)
pci_enable_wake(pci_dev, PCI_D0, false);
pci_fixup_device(pci_fixup_resume, pci_dev);
+ if (prev_state == PCI_D3cold)
+ pci_bridge_wait_for_secondary_bus(pci_dev);
+
if (pm && pm->runtime_resume)
rc = pm->runtime_resume(dev);
@@ -1019,8 +1019,6 @@ static void __pci_start_power_transition(struct pci_dev *dev, pci_power_t state)
* because have already delayed for the bridge.
*/
if (dev->runtime_d3cold) {
- if (dev->d3cold_delay && !dev->imm_ready)
- msleep(dev->d3cold_delay);
/*
* When powering on a bridge from D3cold, the
* whole hierarchy may be powered on into
@@ -4671,6 +4669,125 @@ bool pcie_wait_for_link(struct pci_dev *pdev, bool active)
return pcie_wait_for_link_delay(pdev, active, 100);
}
+/*
+ * Find maximum D3cold delay required by all the devices on the bus. The
+ * spec says 100 ms, but firmware can lower it and we allow drivers to
+ * increase it as well.
+ *
+ * Called with @pci_bus_sem locked for reading.
+ */
+static int pci_bus_max_d3cold_delay(const struct pci_bus *bus)
+{
+ const struct pci_dev *pdev;
+ int min_delay = 100;
+ int max_delay = 0;
+
+ list_for_each_entry(pdev, &bus->devices, bus_list) {
+ if (pdev->d3cold_delay < min_delay)
+ min_delay = pdev->d3cold_delay;
+ if (pdev->d3cold_delay > max_delay)
+ max_delay = pdev->d3cold_delay;
+ }
+
+ return max(min_delay, max_delay);
+}
+
+/**
+ * pci_bridge_wait_for_secondary_bus - Wait for secondary bus to be accessible
+ * @dev: PCI bridge
+ *
+ * Handle necessary delays before access to the devices on the secondary
+ * side of the bridge are permitted after D3cold to D0 transition.
+ *
+ * For PCIe this means the delays in PCIe 5.0 section 6.6.1. For
+ * conventional PCI it means Tpvrh + Trhfa specified in PCI 3.0 section
+ * 4.3.2.
+ */
+void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev)
+{
+ struct pci_dev *child;
+ int delay;
+
+ if (pci_dev_is_disconnected(dev))
+ return;
+
+ if (!pci_is_bridge(dev) || !dev->bridge_d3)
+ return;
+
+ down_read(&pci_bus_sem);
+
+ /*
+ * We only deal with devices that are present currently on the bus.
+ * For any hot-added devices the access delay is handled in pciehp
+ * board_added(). In case of ACPI hotplug the firmware is expected
+ * to configure the devices before OS is notified.
+ */
+ if (!dev->subordinate || list_empty(&dev->subordinate->devices)) {
+ up_read(&pci_bus_sem);
+ return;
+ }
+
+ /* Take d3cold_delay requirements into account */
+ delay = pci_bus_max_d3cold_delay(dev->subordinate);
+ if (!delay) {
+ up_read(&pci_bus_sem);
+ return;
+ }
+
+ child = list_first_entry(&dev->subordinate->devices, struct pci_dev,
+ bus_list);
+ up_read(&pci_bus_sem);
+
+ /*
+ * Conventional PCI and PCI-X we need to wait Tpvrh + Trhfa before
+ * accessing the device after reset (that is 1000 ms + 100 ms). In
+ * practice this should not be needed because we don't do power
+ * management for them (see pci_bridge_d3_possible()).
+ */
+ if (!pci_is_pcie(dev)) {
+ pci_dbg(dev, "waiting %d ms for secondary bus\n", 1000 + delay);
+ msleep(1000 + delay);
+ return;
+ }
+
+ /*
+ * For PCIe downstream and root ports that do not support speeds
+ * greater than 5 GT/s need to wait minimum 100 ms. For higher
+ * speeds (gen3) we need to wait first for the data link layer to
+ * become active.
+ *
+ * However, 100 ms is the minimum and the PCIe spec says the
+ * software must allow at least 1s before it can determine that the
+ * device that did not respond is a broken device. There is
+ * evidence that 100 ms is not always enough, for example certain
+ * Titan Ridge xHCI controller does not always respond to
+ * configuration requests if we only wait for 100 ms (see
+ * https://bugzilla.kernel.org/show_bug.cgi?id=203885).
+ *
+ * Therefore we wait for 100 ms and check for the device presence.
+ * If it is still not present give it an additional 100 ms.
+ */
+ if (!pcie_downstream_port(dev))
+ return;
+
+ if (pcie_get_speed_cap(dev) <= PCIE_SPEED_5_0GT) {
+ pci_dbg(dev, "waiting %d ms for downstream link\n", delay);
+ msleep(delay);
+ } else {
+ pci_dbg(dev, "waiting %d ms for downstream link, after activation\n",
+ delay);
+ if (!pcie_wait_for_link_delay(dev, true, delay)) {
+ /* Did not train, no need to wait any further */
+ return;
+ }
+ }
+
+ if (!pci_device_is_present(child)) {
+ pci_dbg(child, "waiting additional %d ms to become accessible\n", delay);
+ msleep(delay);
+ }
+}
+
void pci_reset_secondary_bus(struct pci_dev *dev)
{
u16 ctrl;
@@ -107,6 +107,7 @@ void pci_allocate_cap_save_buffers(struct pci_dev *dev);
void pci_free_cap_save_buffers(struct pci_dev *dev);
bool pci_bridge_d3_possible(struct pci_dev *dev);
void pci_bridge_d3_update(struct pci_dev *dev);
+void pci_bridge_wait_for_secondary_bus(struct pci_dev *dev);
static inline void pci_wakeup_event(struct pci_dev *dev)
{