@@ -337,11 +337,6 @@ static void vhost_user_blk_disconnect(DeviceState *dev)
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
VHostUserBlk *s = VHOST_USER_BLK(vdev);
- if (!s->connected) {
- return;
- }
- s->connected = false;
-
if (s->dev.started) {
vhost_user_blk_stop(vdev);
}
@@ -349,6 +344,19 @@ static void vhost_user_blk_disconnect(DeviceState *dev)
vhost_dev_cleanup(&s->dev);
}
+static void vhost_user_blk_event(void *opaque, QEMUChrEvent event);
+
+static void vhost_user_blk_chr_closed_bh(void *opaque)
+{
+ DeviceState *dev = opaque;
+ VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+ VHostUserBlk *s = VHOST_USER_BLK(vdev);
+
+ vhost_user_blk_disconnect(dev);
+ qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event,
+ NULL, opaque, NULL, true);
+}
+
static void vhost_user_blk_event(void *opaque, QEMUChrEvent event)
{
DeviceState *dev = opaque;
@@ -363,7 +371,28 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event)
}
break;
case CHR_EVENT_CLOSED:
- vhost_user_blk_disconnect(dev);
+ /*
+ * A close event may happen during a read/write, but vhost
+ * code assumes the vhost_dev remains setup, so delay the
+ * stop & clear. There are two possible paths to hit this
+ * disconnect event:
+ * 1. When VM is in the RUN_STATE_PRELAUNCH state. The
+ * vhost_user_blk_device_realize() is a caller.
+ * 2. In tha main loop phase after VM start.
+ *
+ * For p2 the disconnect event will be delayed. We can't
+ * do the same for p1, because we are not running the loop
+ * at this moment. So just skip this step and perform
+ * disconnect in the caller function.
+ */
+ if (s->connected && runstate_is_running()) {
+ AioContext *ctx = qemu_get_current_aio_context();
+
+ qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, NULL, NULL,
+ NULL, NULL, false);
+ aio_bh_schedule_oneshot(ctx, vhost_user_blk_chr_closed_bh, opaque);
+ }
+ s->connected = false;
break;
case CHR_EVENT_BREAK:
case CHR_EVENT_MUX_IN:
@@ -428,6 +457,14 @@ reconnect:
ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg,
sizeof(struct virtio_blk_config));
+ if (!s->connected) {
+ /*
+ * Perform disconnect before making reconnect. More detailed
+ * comment why it was delayed is in the vhost_user_blk_event()
+ * routine.
+ */
+ vhost_user_blk_disconnect(dev);
+ }
if (ret < 0) {
error_report("vhost-user-blk: get block config failed");
goto reconnect;
A socket write during vhost-user communication may trigger a disconnect event, calling vhost_user_blk_disconnect() and clearing all the vhost_dev structures holding data that vhost-user functions expect to remain valid to roll back initialization correctly. Delay the cleanup to keep vhost_dev structure valid. There are two possible states to handle: 1. RUN_STATE_PRELAUNCH: skip bh oneshot call and perform disconnect in the caller routine. 2. RUN_STATE_RUNNING: delay by using bh BH changes are based on the similar changes for the vhost-user-net device: commit e7c83a885f865128ae3cf1946f8cb538b63cbfba "vhost-user: delay vhost_user_stop" Signed-off-by: Dima Stepanov <dimastep@yandex-team.ru> --- hw/block/vhost-user-blk.c | 49 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 6 deletions(-)