Message ID | 1600817059-26721-8-git-send-email-kwankhede@nvidia.com |
---|---|
State | New |
Headers | show |
Series | Add migration support for VFIO devices | expand |
On 9/23/20 1:24 AM, Kirti Wankhede wrote: > Define flags to be used as delimeter in migration file stream. Typo "delimiter". > Added .save_setup and .save_cleanup functions. Mapped & unmapped migration > region from these functions at source during saving or pre-copy phase. > Set VFIO device state depending on VM's state. During live migration, VM is > running when .save_setup is called, _SAVING | _RUNNING state is set for VFIO > device. During save-restore, VM is paused, _SAVING state is set for VFIO device. > > Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > Reviewed-by: Neo Jia <cjia@nvidia.com> > --- > hw/vfio/migration.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++ > hw/vfio/trace-events | 2 ++ > 2 files changed, 93 insertions(+) > > diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c > index f650fe9fc3c8..8e8adaa25779 100644 > --- a/hw/vfio/migration.c > +++ b/hw/vfio/migration.c > @@ -8,12 +8,15 @@ > */ > > #include "qemu/osdep.h" > +#include "qemu/main-loop.h" > +#include "qemu/cutils.h" > #include <linux/vfio.h> > > #include "sysemu/runstate.h" > #include "hw/vfio/vfio-common.h" > #include "cpu.h" > #include "migration/migration.h" > +#include "migration/vmstate.h" > #include "migration/qemu-file.h" > #include "migration/register.h" > #include "migration/blocker.h" > @@ -25,6 +28,17 @@ > #include "trace.h" > #include "hw/hw.h" > > +/* > + * Flags used as delimiter: > + * 0xffffffff => MSB 32-bit all 1s > + * 0xef10 => emulated (virtual) function IO > + * 0x0000 => 16-bits reserved for flags > + */ > +#define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL) > +#define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL) > +#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) > +#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) > + > static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count, > off_t off, bool iswrite) > { > @@ -166,6 +180,65 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, > return 0; > } > > +/* ---------------------------------------------------------------------- */ > + > +static int vfio_save_setup(QEMUFile *f, void *opaque) > +{ > + VFIODevice *vbasedev = opaque; > + VFIOMigration *migration = vbasedev->migration; > + int ret; > + > + trace_vfio_save_setup(vbasedev->name); > + > + qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE); > + > + if (migration->region.mmaps) { > + qemu_mutex_lock_iothread(); > + ret = vfio_region_mmap(&migration->region); > + qemu_mutex_unlock_iothread(); > + if (ret) { > + error_report("%s: Failed to mmap VFIO migration region %d: %s", > + vbasedev->name, migration->region.nr, > + strerror(-ret)); > + error_report("%s: Falling back to slow path", vbasedev->name); > + } > + } > + > + ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK, > + VFIO_DEVICE_STATE_SAVING); > + if (ret) { > + error_report("%s: Failed to set state SAVING", vbasedev->name); > + return ret; > + } > + > + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); > + > + ret = qemu_file_get_error(f); > + if (ret) { > + return ret; > + } > + > + return 0; > +} > + > +static void vfio_save_cleanup(void *opaque) > +{ > + VFIODevice *vbasedev = opaque; > + VFIOMigration *migration = vbasedev->migration; > + > + if (migration->region.mmaps) { > + vfio_region_unmap(&migration->region); > + } > + trace_vfio_save_cleanup(vbasedev->name); > +} > + > +static SaveVMHandlers savevm_vfio_handlers = { > + .save_setup = vfio_save_setup, > + .save_cleanup = vfio_save_cleanup, > +}; > + > +/* ---------------------------------------------------------------------- */ > + > static void vfio_vmstate_change(void *opaque, int running, RunState state) > { > VFIODevice *vbasedev = opaque; > @@ -225,6 +298,8 @@ static int vfio_migration_init(VFIODevice *vbasedev, > struct vfio_region_info *info) > { > int ret = -EINVAL; > + char id[256] = ""; > + Object *obj; > > if (!vbasedev->ops->vfio_get_object) { > return ret; > @@ -241,6 +316,22 @@ static int vfio_migration_init(VFIODevice *vbasedev, > return ret; > } > > + obj = vbasedev->ops->vfio_get_object(vbasedev); > + > + if (obj) { > + DeviceState *dev = DEVICE(obj); > + char *oid = vmstate_if_get_id(VMSTATE_IF(dev)); > + > + if (oid) { > + pstrcpy(id, sizeof(id), oid); > + pstrcat(id, sizeof(id), "/"); > + g_free(oid); > + } > + } > + pstrcat(id, sizeof(id), "vfio"); Alternatively (easier to review, matter of taste): g_autofree char *path = NULL; if (oid) { path = g_strdup_printf("%s/vfio", vmstate_if_get_id(VMSTATE_IF(obj));); } else { path = g_strdup("vfio"); } strpadcpy(id, sizeof(id), path, '\0'); > + > + register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers, > + vbasedev); > vbasedev->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change, > vbasedev); > vbasedev->migration_state.notify = vfio_migration_state_notifier; > diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events > index bcb3fa7314d7..982d8dccb219 100644 > --- a/hw/vfio/trace-events > +++ b/hw/vfio/trace-events > @@ -152,3 +152,5 @@ vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d" > vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d" > vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" > vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s" > +vfio_save_setup(const char *name) " (%s)" > +vfio_save_cleanup(const char *name) " (%s)" >
On Wed, 23 Sep 2020 04:54:09 +0530 Kirti Wankhede <kwankhede@nvidia.com> wrote: > Define flags to be used as delimeter in migration file stream. > Added .save_setup and .save_cleanup functions. Mapped & unmapped migration > region from these functions at source during saving or pre-copy phase. > Set VFIO device state depending on VM's state. During live migration, VM is > running when .save_setup is called, _SAVING | _RUNNING state is set for VFIO > device. During save-restore, VM is paused, _SAVING state is set for VFIO device. > > Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > Reviewed-by: Neo Jia <cjia@nvidia.com> > --- > hw/vfio/migration.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++ > hw/vfio/trace-events | 2 ++ > 2 files changed, 93 insertions(+) > (...) > +/* > + * Flags used as delimiter: > + * 0xffffffff => MSB 32-bit all 1s > + * 0xef10 => emulated (virtual) function IO Where is this value coming from? > + * 0x0000 => 16-bits reserved for flags > + */ > +#define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL) > +#define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL) > +#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) > +#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) I think we need some more documentation what these values mean and how they are used. From reading ahead a bit, it seems there is always supposed to be a pair of DEV_*_STATE and END_OF_STATE framing some kind of data? (...)
On Wed, 23 Sep 2020 04:54:09 +0530 Kirti Wankhede <kwankhede@nvidia.com> wrote: > Define flags to be used as delimeter in migration file stream. > Added .save_setup and .save_cleanup functions. Mapped & unmapped migration > region from these functions at source during saving or pre-copy phase. > Set VFIO device state depending on VM's state. During live migration, VM is > running when .save_setup is called, _SAVING | _RUNNING state is set for VFIO > device. During save-restore, VM is paused, _SAVING state is set for VFIO device. > > Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > Reviewed-by: Neo Jia <cjia@nvidia.com> > --- > hw/vfio/migration.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++ > hw/vfio/trace-events | 2 ++ > 2 files changed, 93 insertions(+) > > diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c > index f650fe9fc3c8..8e8adaa25779 100644 > --- a/hw/vfio/migration.c > +++ b/hw/vfio/migration.c > @@ -8,12 +8,15 @@ > */ > > #include "qemu/osdep.h" > +#include "qemu/main-loop.h" > +#include "qemu/cutils.h" > #include <linux/vfio.h> > > #include "sysemu/runstate.h" > #include "hw/vfio/vfio-common.h" > #include "cpu.h" > #include "migration/migration.h" > +#include "migration/vmstate.h" > #include "migration/qemu-file.h" > #include "migration/register.h" > #include "migration/blocker.h" > @@ -25,6 +28,17 @@ > #include "trace.h" > #include "hw/hw.h" > > +/* > + * Flags used as delimiter: > + * 0xffffffff => MSB 32-bit all 1s > + * 0xef10 => emulated (virtual) function IO > + * 0x0000 => 16-bits reserved for flags > + */ > +#define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL) > +#define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL) > +#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) > +#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) > + > static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count, > off_t off, bool iswrite) > { > @@ -166,6 +180,65 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, > return 0; > } > > +/* ---------------------------------------------------------------------- */ > + > +static int vfio_save_setup(QEMUFile *f, void *opaque) > +{ > + VFIODevice *vbasedev = opaque; > + VFIOMigration *migration = vbasedev->migration; > + int ret; > + > + trace_vfio_save_setup(vbasedev->name); > + > + qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE); > + > + if (migration->region.mmaps) { > + qemu_mutex_lock_iothread(); > + ret = vfio_region_mmap(&migration->region); > + qemu_mutex_unlock_iothread(); Please add a comment identifying why the iothread mutex lock is necessary here. > + if (ret) { > + error_report("%s: Failed to mmap VFIO migration region %d: %s", > + vbasedev->name, migration->region.nr, We don't support multiple migration regions, is it useful to include the region index here? > + strerror(-ret)); > + error_report("%s: Falling back to slow path", vbasedev->name); > + } > + } > + > + ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK, > + VFIO_DEVICE_STATE_SAVING); > + if (ret) { > + error_report("%s: Failed to set state SAVING", vbasedev->name); > + return ret; > + } Again, doesn't match the function semantics that success only means the device is in a non-error state, maybe the one that was asked for. > + > + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); What's the overall purpose of writing these markers into the migration stream? vfio_load_state() doesn't do anything with this other than validate that the end-of-state immediately follows. Is this a placeholder for something in the future? > + > + ret = qemu_file_get_error(f); > + if (ret) { > + return ret; > + } > + > + return 0; > +} > + > +static void vfio_save_cleanup(void *opaque) > +{ > + VFIODevice *vbasedev = opaque; > + VFIOMigration *migration = vbasedev->migration; > + > + if (migration->region.mmaps) { > + vfio_region_unmap(&migration->region); > + } > + trace_vfio_save_cleanup(vbasedev->name); > +} > + > +static SaveVMHandlers savevm_vfio_handlers = { > + .save_setup = vfio_save_setup, > + .save_cleanup = vfio_save_cleanup, > +}; > + > +/* ---------------------------------------------------------------------- */ > + > static void vfio_vmstate_change(void *opaque, int running, RunState state) > { > VFIODevice *vbasedev = opaque; > @@ -225,6 +298,8 @@ static int vfio_migration_init(VFIODevice *vbasedev, > struct vfio_region_info *info) > { > int ret = -EINVAL; > + char id[256] = ""; > + Object *obj; > > if (!vbasedev->ops->vfio_get_object) { > return ret; > @@ -241,6 +316,22 @@ static int vfio_migration_init(VFIODevice *vbasedev, > return ret; > } > > + obj = vbasedev->ops->vfio_get_object(vbasedev); > + > + if (obj) { > + DeviceState *dev = DEVICE(obj); > + char *oid = vmstate_if_get_id(VMSTATE_IF(dev)); > + > + if (oid) { > + pstrcpy(id, sizeof(id), oid); > + pstrcat(id, sizeof(id), "/"); > + g_free(oid); > + } > + } Here's where vfio_migration_init() starts using vfio_get_object() as I referenced back on patch 04. We might as well get the object before calling vfio_migration_region_init() and then pass the object. The conditional branch to handle obj is strange here too, it's fatal if vfio_migration_region_init() doesn't find an object, why do we handle it as optional here? Also, what is this doing? Comments would be nice... Thanks, Alex > + pstrcat(id, sizeof(id), "vfio"); > + > + register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers, > + vbasedev); > vbasedev->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change, > vbasedev); > vbasedev->migration_state.notify = vfio_migration_state_notifier; > diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events > index bcb3fa7314d7..982d8dccb219 100644 > --- a/hw/vfio/trace-events > +++ b/hw/vfio/trace-events > @@ -152,3 +152,5 @@ vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d" > vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d" > vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" > vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s" > +vfio_save_setup(const char *name) " (%s)" > +vfio_save_cleanup(const char *name) " (%s)"
* Philippe Mathieu-Daudé (philmd@redhat.com) wrote: > On 9/23/20 1:24 AM, Kirti Wankhede wrote: > > Define flags to be used as delimeter in migration file stream. > > Typo "delimiter". > > > Added .save_setup and .save_cleanup functions. Mapped & unmapped migration > > region from these functions at source during saving or pre-copy phase. > > Set VFIO device state depending on VM's state. During live migration, VM is > > running when .save_setup is called, _SAVING | _RUNNING state is set for VFIO > > device. During save-restore, VM is paused, _SAVING state is set for VFIO device. > > > > Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > > Reviewed-by: Neo Jia <cjia@nvidia.com> > > --- > > hw/vfio/migration.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++ > > hw/vfio/trace-events | 2 ++ > > 2 files changed, 93 insertions(+) > > > > diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c > > index f650fe9fc3c8..8e8adaa25779 100644 > > --- a/hw/vfio/migration.c > > +++ b/hw/vfio/migration.c > > @@ -8,12 +8,15 @@ > > */ > > > > #include "qemu/osdep.h" > > +#include "qemu/main-loop.h" > > +#include "qemu/cutils.h" > > #include <linux/vfio.h> > > > > #include "sysemu/runstate.h" > > #include "hw/vfio/vfio-common.h" > > #include "cpu.h" > > #include "migration/migration.h" > > +#include "migration/vmstate.h" > > #include "migration/qemu-file.h" > > #include "migration/register.h" > > #include "migration/blocker.h" > > @@ -25,6 +28,17 @@ > > #include "trace.h" > > #include "hw/hw.h" > > > > +/* > > + * Flags used as delimiter: > > + * 0xffffffff => MSB 32-bit all 1s > > + * 0xef10 => emulated (virtual) function IO > > + * 0x0000 => 16-bits reserved for flags > > + */ > > +#define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL) > > +#define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL) > > +#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) > > +#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) > > + > > static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count, > > off_t off, bool iswrite) > > { > > @@ -166,6 +180,65 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, > > return 0; > > } > > > > +/* ---------------------------------------------------------------------- */ > > + > > +static int vfio_save_setup(QEMUFile *f, void *opaque) > > +{ > > + VFIODevice *vbasedev = opaque; > > + VFIOMigration *migration = vbasedev->migration; > > + int ret; > > + > > + trace_vfio_save_setup(vbasedev->name); > > + > > + qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE); > > + > > + if (migration->region.mmaps) { > > + qemu_mutex_lock_iothread(); > > + ret = vfio_region_mmap(&migration->region); > > + qemu_mutex_unlock_iothread(); > > + if (ret) { > > + error_report("%s: Failed to mmap VFIO migration region %d: %s", > > + vbasedev->name, migration->region.nr, > > + strerror(-ret)); > > + error_report("%s: Falling back to slow path", vbasedev->name); > > + } > > + } > > + > > + ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK, > > + VFIO_DEVICE_STATE_SAVING); > > + if (ret) { > > + error_report("%s: Failed to set state SAVING", vbasedev->name); > > + return ret; > > + } > > + > > + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); > > + > > + ret = qemu_file_get_error(f); > > + if (ret) { > > + return ret; > > + } > > + > > + return 0; > > +} > > + > > +static void vfio_save_cleanup(void *opaque) > > +{ > > + VFIODevice *vbasedev = opaque; > > + VFIOMigration *migration = vbasedev->migration; > > + > > + if (migration->region.mmaps) { > > + vfio_region_unmap(&migration->region); > > + } > > + trace_vfio_save_cleanup(vbasedev->name); > > +} > > + > > +static SaveVMHandlers savevm_vfio_handlers = { > > + .save_setup = vfio_save_setup, > > + .save_cleanup = vfio_save_cleanup, > > +}; > > + > > +/* ---------------------------------------------------------------------- */ > > + > > static void vfio_vmstate_change(void *opaque, int running, RunState state) > > { > > VFIODevice *vbasedev = opaque; > > @@ -225,6 +298,8 @@ static int vfio_migration_init(VFIODevice *vbasedev, > > struct vfio_region_info *info) > > { > > int ret = -EINVAL; > > + char id[256] = ""; > > + Object *obj; > > > > if (!vbasedev->ops->vfio_get_object) { > > return ret; > > @@ -241,6 +316,22 @@ static int vfio_migration_init(VFIODevice *vbasedev, > > return ret; > > } > > > > + obj = vbasedev->ops->vfio_get_object(vbasedev); > > + > > + if (obj) { > > + DeviceState *dev = DEVICE(obj); > > + char *oid = vmstate_if_get_id(VMSTATE_IF(dev)); > > + > > + if (oid) { > > + pstrcpy(id, sizeof(id), oid); > > + pstrcat(id, sizeof(id), "/"); > > + g_free(oid); > > + } > > + } > > + pstrcat(id, sizeof(id), "vfio"); > > Alternatively (easier to review, matter of taste): > > g_autofree char *path = NULL; > > if (oid) { > path = g_strdup_printf("%s/vfio", > vmstate_if_get_id(VMSTATE_IF(obj));); > } else { > path = g_strdup("vfio"); > } > strpadcpy(id, sizeof(id), path, '\0'); Maybe, although it's a straight copy of the magic in unregister_savevm. Dave > > + > > + register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers, > > + vbasedev); > > vbasedev->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change, > > vbasedev); > > vbasedev->migration_state.notify = vfio_migration_state_notifier; > > diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events > > index bcb3fa7314d7..982d8dccb219 100644 > > --- a/hw/vfio/trace-events > > +++ b/hw/vfio/trace-events > > @@ -152,3 +152,5 @@ vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d" > > vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d" > > vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" > > vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s" > > +vfio_save_setup(const char *name) " (%s)" > > +vfio_save_cleanup(const char *name) " (%s)" > > > -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On 9/25/2020 5:23 PM, Cornelia Huck wrote: > On Wed, 23 Sep 2020 04:54:09 +0530 > Kirti Wankhede <kwankhede@nvidia.com> wrote: > >> Define flags to be used as delimeter in migration file stream. >> Added .save_setup and .save_cleanup functions. Mapped & unmapped migration >> region from these functions at source during saving or pre-copy phase. >> Set VFIO device state depending on VM's state. During live migration, VM is >> running when .save_setup is called, _SAVING | _RUNNING state is set for VFIO >> device. During save-restore, VM is paused, _SAVING state is set for VFIO device. >> >> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> >> Reviewed-by: Neo Jia <cjia@nvidia.com> >> --- >> hw/vfio/migration.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++ >> hw/vfio/trace-events | 2 ++ >> 2 files changed, 93 insertions(+) >> > > (...) > >> +/* >> + * Flags used as delimiter: >> + * 0xffffffff => MSB 32-bit all 1s >> + * 0xef10 => emulated (virtual) function IO > > Where is this value coming from? > Delimiter flags should be unique and this is a magic number that represents (e)mulated (f)unction (10) representing IO. >> + * 0x0000 => 16-bits reserved for flags >> + */ >> +#define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL) >> +#define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL) >> +#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) >> +#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) > > I think we need some more documentation what these values mean and how > they are used. From reading ahead a bit, it seems there is always > supposed to be a pair of DEV_*_STATE and END_OF_STATE framing some kind > of data? > Adding comment as below, hope it helps. /* * Flags used as delimiter for VFIO devices should be unique in migration stream * These flags are composed as: * 0xffffffff => MSB 32-bit all 1s * 0xef10 => Magic ID, represents emulated (virtual) function IO * 0x0000 => 16-bits reserved for flags * * Flags _DEV_CONFIG_STATE, _DEV_SETUP_STATE and _DEV_DATA_STATE marks start of * respective states in migration stream. * FLAG _END_OF_STATE indicates end of current state, state could be any * of above states. */ Thanks, Kirti
On Mon, 19 Oct 2020 02:25:28 +0530 Kirti Wankhede <kwankhede@nvidia.com> wrote: > On 9/25/2020 5:23 PM, Cornelia Huck wrote: > > On Wed, 23 Sep 2020 04:54:09 +0530 > > Kirti Wankhede <kwankhede@nvidia.com> wrote: > > > >> Define flags to be used as delimeter in migration file stream. > >> Added .save_setup and .save_cleanup functions. Mapped & unmapped migration > >> region from these functions at source during saving or pre-copy phase. > >> Set VFIO device state depending on VM's state. During live migration, VM is > >> running when .save_setup is called, _SAVING | _RUNNING state is set for VFIO > >> device. During save-restore, VM is paused, _SAVING state is set for VFIO device. > >> > >> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > >> Reviewed-by: Neo Jia <cjia@nvidia.com> > >> --- > >> hw/vfio/migration.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++ > >> hw/vfio/trace-events | 2 ++ > >> 2 files changed, 93 insertions(+) > >> > > > > (...) > > > >> +/* > >> + * Flags used as delimiter: > >> + * 0xffffffff => MSB 32-bit all 1s > >> + * 0xef10 => emulated (virtual) function IO > > > > Where is this value coming from? > > > > Delimiter flags should be unique and this is a magic number that > represents (e)mulated (f)unction (10) representing IO. > > >> + * 0x0000 => 16-bits reserved for flags > >> + */ > >> +#define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL) > >> +#define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL) > >> +#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) > >> +#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) > > > > I think we need some more documentation what these values mean and how > > they are used. From reading ahead a bit, it seems there is always > > supposed to be a pair of DEV_*_STATE and END_OF_STATE framing some kind > > of data? > > > > Adding comment as below, hope it helps. > > /* > * Flags used as delimiter for VFIO devices should be unique in > migration stream Maybe "Flags to be used as unique delimiters for VFIO devices in the migration stream" ? > * These flags are composed as: > * 0xffffffff => MSB 32-bit all 1s > * 0xef10 => Magic ID, represents emulated (virtual) function IO > * 0x0000 => 16-bits reserved for flags > * > * Flags _DEV_CONFIG_STATE, _DEV_SETUP_STATE and _DEV_DATA_STATE marks > start of > * respective states in migration stream. > * FLAG _END_OF_STATE indicates end of current state, state could be any > * of above states. > */ "The beginning of state information is marked by _DEV_CONFIG_STATE, _DEV_SETUP_STATE, or _DEV_DATA_STATE, respectively. The end of a certain state information is marked by _END_OF_STATE." ? > > Thanks, > Kirti >
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index f650fe9fc3c8..8e8adaa25779 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -8,12 +8,15 @@ */ #include "qemu/osdep.h" +#include "qemu/main-loop.h" +#include "qemu/cutils.h" #include <linux/vfio.h> #include "sysemu/runstate.h" #include "hw/vfio/vfio-common.h" #include "cpu.h" #include "migration/migration.h" +#include "migration/vmstate.h" #include "migration/qemu-file.h" #include "migration/register.h" #include "migration/blocker.h" @@ -25,6 +28,17 @@ #include "trace.h" #include "hw/hw.h" +/* + * Flags used as delimiter: + * 0xffffffff => MSB 32-bit all 1s + * 0xef10 => emulated (virtual) function IO + * 0x0000 => 16-bits reserved for flags + */ +#define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL) +#define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL) +#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) +#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) + static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count, off_t off, bool iswrite) { @@ -166,6 +180,65 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, return 0; } +/* ---------------------------------------------------------------------- */ + +static int vfio_save_setup(QEMUFile *f, void *opaque) +{ + VFIODevice *vbasedev = opaque; + VFIOMigration *migration = vbasedev->migration; + int ret; + + trace_vfio_save_setup(vbasedev->name); + + qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE); + + if (migration->region.mmaps) { + qemu_mutex_lock_iothread(); + ret = vfio_region_mmap(&migration->region); + qemu_mutex_unlock_iothread(); + if (ret) { + error_report("%s: Failed to mmap VFIO migration region %d: %s", + vbasedev->name, migration->region.nr, + strerror(-ret)); + error_report("%s: Falling back to slow path", vbasedev->name); + } + } + + ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK, + VFIO_DEVICE_STATE_SAVING); + if (ret) { + error_report("%s: Failed to set state SAVING", vbasedev->name); + return ret; + } + + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); + + ret = qemu_file_get_error(f); + if (ret) { + return ret; + } + + return 0; +} + +static void vfio_save_cleanup(void *opaque) +{ + VFIODevice *vbasedev = opaque; + VFIOMigration *migration = vbasedev->migration; + + if (migration->region.mmaps) { + vfio_region_unmap(&migration->region); + } + trace_vfio_save_cleanup(vbasedev->name); +} + +static SaveVMHandlers savevm_vfio_handlers = { + .save_setup = vfio_save_setup, + .save_cleanup = vfio_save_cleanup, +}; + +/* ---------------------------------------------------------------------- */ + static void vfio_vmstate_change(void *opaque, int running, RunState state) { VFIODevice *vbasedev = opaque; @@ -225,6 +298,8 @@ static int vfio_migration_init(VFIODevice *vbasedev, struct vfio_region_info *info) { int ret = -EINVAL; + char id[256] = ""; + Object *obj; if (!vbasedev->ops->vfio_get_object) { return ret; @@ -241,6 +316,22 @@ static int vfio_migration_init(VFIODevice *vbasedev, return ret; } + obj = vbasedev->ops->vfio_get_object(vbasedev); + + if (obj) { + DeviceState *dev = DEVICE(obj); + char *oid = vmstate_if_get_id(VMSTATE_IF(dev)); + + if (oid) { + pstrcpy(id, sizeof(id), oid); + pstrcat(id, sizeof(id), "/"); + g_free(oid); + } + } + pstrcat(id, sizeof(id), "vfio"); + + register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers, + vbasedev); vbasedev->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change, vbasedev); vbasedev->migration_state.notify = vfio_migration_state_notifier; diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index bcb3fa7314d7..982d8dccb219 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -152,3 +152,5 @@ vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d" vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d" vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s" +vfio_save_setup(const char *name) " (%s)" +vfio_save_cleanup(const char *name) " (%s)"