@@ -763,6 +763,12 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
free_cpumask_var(available_mask);
}
+#define UNLOAD_DELAY_UNIT_MS 10 /* 10 milliseconds */
+#define UNLOAD_WAIT_MS (100*1000) /* 100 seconds */
+#define UNLOAD_WAIT_LOOPS (UNLOAD_WAIT_MS/UNLOAD_DELAY_UNIT_MS)
+#define UNLOAD_MSG_MS (5*1000) /* Every 5 seconds */
+#define UNLOAD_MSG_LOOPS (UNLOAD_MSG_MS/UNLOAD_DELAY_UNIT_MS)
+
static void vmbus_wait_for_unload(void)
{
int cpu;
@@ -780,12 +786,17 @@ static void vmbus_wait_for_unload(void)
* vmbus_connection.unload_event. If not, the last thing we can do is
* read message pages for all CPUs directly.
*
- * Wait no more than 10 seconds so that the panic path can't get
- * hung forever in case the response message isn't seen.
+ * Wait up to 100 seconds since an Azure host must writeback any dirty
+ * data in its disk cache before the VMbus UNLOAD request will
+ * complete. This flushing has been empirically observed to take up
+ * to 50 seconds in cases with a lot of dirty data, so allow additional
+ * leeway and for inaccuracies in mdelay(). But eventually time out so
+ * that the panic path can't get hung forever in case the response
+ * message isn't seen.
*/
- for (i = 0; i < 1000; i++) {
+ for (i = 1; i <= UNLOAD_WAIT_LOOPS; i++) {
if (completion_done(&vmbus_connection.unload_event))
- break;
+ goto completed;
for_each_online_cpu(cpu) {
struct hv_per_cpu_context *hv_cpu
@@ -808,9 +819,18 @@ static void vmbus_wait_for_unload(void)
vmbus_signal_eom(msg, message_type);
}
- mdelay(10);
+ /*
+ * Give a notice periodically so someone watching the
+ * serial output won't think it is completely hung.
+ */
+ if (!(i % UNLOAD_MSG_LOOPS))
+ pr_notice("Waiting for VMBus UNLOAD to complete\n");
+
+ mdelay(UNLOAD_DELAY_UNIT_MS);
}
+ pr_err("Continuing even though VMBus UNLOAD did not complete\n");
+completed:
/*
* We're crashing and already got the UNLOAD_RESPONSE, cleanup all
* maybe-pending messages on all CPUs to be able to receive new