diff mbox series

[V1,21/32] exec, memory: exec(3) to restart

Message ID 1596122076-341293-22-git-send-email-steven.sistare@oracle.com
State New
Headers show
Series [V1,01/32] savevm: add vmstate handler iterators | expand

Commit Message

Steven Sistare July 30, 2020, 3:14 p.m. UTC
Use exec() to restart qemu to a potentially new version, while preserving
guest RAM.  The guest pauses briefly.

cprsave saves the address and length of RAM blocks to the environment via
setenv, tags the RAM with the new madvise(MADV_DOEXEC) option to preserve
it across exec, then exec()'s the (typically updated) qemu binary with the
original argv.

On qemu restart, ram_block_add() finds the env vars that describe preserved
RAM segments and does not reallocate them.

Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
---
 exec.c                | 36 ++++++++++++++++++++++++++++++++++--
 include/exec/memory.h |  2 ++
 migration/savevm.c    | 16 ++++++++++++++++
 3 files changed, 52 insertions(+), 2 deletions(-)
diff mbox series

Patch

diff --git a/exec.c b/exec.c
index 359e437..5473c09 100644
--- a/exec.c
+++ b/exec.c
@@ -2235,7 +2235,7 @@  static void ram_block_add(RAMBlock *new_block, Error **errp, bool shared)
     Error *err = NULL;
     const char *name;
     void *addr;
-    size_t maxlen;
+    size_t len, maxlen;
 
     old_ram_size = last_ram_page();
 
@@ -2253,7 +2253,12 @@  static void ram_block_add(RAMBlock *new_block, Error **errp, bool shared)
             }
         } else {
             name = memory_region_name(new_block->mr);
-            addr = phys_mem_alloc(maxlen, &new_block->mr->align, shared);
+            if (getenv_ram(name, &addr, &len)) {
+                assert(len == maxlen);
+            } else {
+                addr = phys_mem_alloc(maxlen, &new_block->mr->align, shared);
+                setenv_ram(name, addr, maxlen);
+            }
 
             if (!addr) {
                 error_setg_errno(errp, errno,
@@ -2499,6 +2504,8 @@  void qemu_ram_free(RAMBlock *block)
         return;
     }
 
+    unsetenv_ram(memory_region_name(block->mr));
+
     if (block->host) {
         ram_block_notify_remove(block->host, block->max_length);
     }
@@ -2763,6 +2770,31 @@  bool qemu_ram_volatile(Error **errp)
     return ret;
 }
 
+static int preserve_ram(const char *name, const char *val, void *handle)
+{
+    void *addr;
+    size_t len;
+    Error **errp = handle;
+
+    getenv_ram(name, &addr, &len);
+    if (qemu_madvise(addr, len, QEMU_MADV_DOEXEC)) {
+        error_setg_errno(errp, errno,
+                         "MADV_DOEXEC failed on memory region %s", name);
+        return 1;
+    }
+    return 0;
+}
+
+
+int qemu_preserve_ram(Error **errp)
+{
+    int ret;
+    qemu_mutex_lock_ramlist();
+    ret = walkenv(ADDR_PREFIX, preserve_ram, errp);
+    qemu_mutex_unlock_ramlist();
+    return ret;
+}
+
 /* Generate a debug exception if a watchpoint has been hit.  */
 void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len,
                           MemTxAttrs attrs, int flags, uintptr_t ra)
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 6aafbb0..e2d297d 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -2521,6 +2521,8 @@  bool ram_block_discard_is_required(void);
 
 bool qemu_ram_volatile(Error **errp);
 
+int qemu_preserve_ram(Error **errp);
+
 #endif
 
 #endif
diff --git a/migration/savevm.c b/migration/savevm.c
index 38cc63a..2902006 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -2719,6 +2719,16 @@  void save_cpr_snapshot(const char *file, const char *mode, Error **errp)
         return;
     }
 
+    if (op == VMS_RESTART && QEMU_MADV_DOEXEC == QEMU_MADV_INVALID) {
+        error_setg(errp, "kernel does not support MADV_DOEXEC.");
+        return;
+    }
+
+    if (op == VMS_RESTART && xen_enabled()) {
+        error_setg(errp, "xen does not support cprsave restart");
+        return;
+    }
+
     f = qf_file_open(file, O_CREAT | O_WRONLY | O_TRUNC, 0600, errp);
     if (!f) {
         return;
@@ -2747,6 +2757,12 @@  void save_cpr_snapshot(const char *file, const char *mode, Error **errp)
     if (op == VMS_REBOOT) {
         no_shutdown = 0;
         qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
+    } else if (op == VMS_RESTART) {
+        if (qemu_preserve_ram(errp)) {
+            return;
+        }
+        qemu_system_exec_request();
+        putenv((char *)"QEMU_START_FREEZE=");
     }
 }