@@ -48,6 +48,8 @@
#include <linux/list.h>
#include <linux/workqueue.h>
#include <linux/sched/mm.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
#include <xen/xen.h>
#include <xen/xenbus.h>
@@ -80,6 +82,8 @@ enum blkif_state {
BLKIF_STATE_DISCONNECTED,
BLKIF_STATE_CONNECTED,
BLKIF_STATE_SUSPENDED,
+ BLKIF_STATE_FREEZING,
+ BLKIF_STATE_FROZEN
};
struct grant {
@@ -219,6 +223,7 @@ struct blkfront_info
struct list_head requests;
struct bio_list bio_list;
struct list_head info_list;
+ struct completion wait_backend_disconnected;
};
static unsigned int nr_minors;
@@ -1005,6 +1010,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
info->sector_size = sector_size;
info->physical_sector_size = physical_sector_size;
blkif_set_queue_limits(info);
+ init_completion(&info->wait_backend_disconnected);
return 0;
}
@@ -1057,7 +1063,7 @@ static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
case XEN_SCSI_DISK5_MAJOR:
case XEN_SCSI_DISK6_MAJOR:
case XEN_SCSI_DISK7_MAJOR:
- *offset = (*minor / PARTS_PER_DISK) +
+ *offset = (*minor / PARTS_PER_DISK) +
((major - XEN_SCSI_DISK1_MAJOR + 1) * 16) +
EMULATED_SD_DISK_NAME_OFFSET;
*minor = *minor +
@@ -1072,7 +1078,7 @@ static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
case XEN_SCSI_DISK13_MAJOR:
case XEN_SCSI_DISK14_MAJOR:
case XEN_SCSI_DISK15_MAJOR:
- *offset = (*minor / PARTS_PER_DISK) +
+ *offset = (*minor / PARTS_PER_DISK) +
((major - XEN_SCSI_DISK8_MAJOR + 8) * 16) +
EMULATED_SD_DISK_NAME_OFFSET;
*minor = *minor +
@@ -1353,6 +1359,8 @@ static void blkif_free(struct blkfront_info *info, int suspend)
unsigned int i;
struct blkfront_ring_info *rinfo;
+ if (info->connected == BLKIF_STATE_FREEZING)
+ goto free_rings;
/* Prevent new requests being issued until we fix things up. */
info->connected = suspend ?
BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
@@ -1360,6 +1368,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
if (info->rq)
blk_mq_stop_hw_queues(info->rq);
+free_rings:
for_each_rinfo(info, rinfo, i)
blkif_free_ring(rinfo);
@@ -1563,8 +1572,10 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id;
struct blkfront_info *info = rinfo->dev_info;
- if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
- return IRQ_HANDLED;
+ if (unlikely(info->connected != BLKIF_STATE_CONNECTED
+ && info->connected != BLKIF_STATE_FREEZING)){
+ return IRQ_HANDLED;
+ }
spin_lock_irqsave(&rinfo->ring_lock, flags);
again:
@@ -2027,6 +2038,7 @@ static int blkif_recover(struct blkfront_info *info)
unsigned int segs;
struct blkfront_ring_info *rinfo;
+ bool frozen = info->connected == BLKIF_STATE_FROZEN;
blkfront_gather_backend_features(info);
/* Reset limits changed by blk_mq_update_nr_hw_queues(). */
blkif_set_queue_limits(info);
@@ -2048,6 +2060,9 @@ static int blkif_recover(struct blkfront_info *info)
kick_pending_request_queues(rinfo);
}
+ if (frozen)
+ return 0;
+
list_for_each_entry_safe(req, n, &info->requests, queuelist) {
/* Requeue pending requests (flush or discard) */
list_del_init(&req->queuelist);
@@ -2364,6 +2379,7 @@ static void blkfront_connect(struct blkfront_info *info)
return;
case BLKIF_STATE_SUSPENDED:
+ case BLKIF_STATE_FROZEN:
/*
* If we are recovering from suspension, we need to wait
* for the backend to announce it's features before
@@ -2481,12 +2497,36 @@ static void blkback_changed(struct xenbus_device *dev,
break;
case XenbusStateClosed:
- if (dev->state == XenbusStateClosed)
+ if (dev->state == XenbusStateClosed) {
+ if (info->connected == BLKIF_STATE_FREEZING) {
+ blkif_free(info, 0);
+ info->connected = BLKIF_STATE_FROZEN;
+ complete(&info->wait_backend_disconnected);
+ break;
+ }
+
break;
+ }
+
+ /*
+ * We may somehow receive backend's Closed again while thawing
+ * or restoring and it causes thawing or restoring to fail.
+ * Ignore such unexpected state regardless of the backend state.
+ */
+ if (info->connected == BLKIF_STATE_FROZEN) {
+ dev_dbg(&dev->dev,
+ "ignore the backend's Closed state: %s",
+ dev->nodename);
+ break;
+ }
/* fall through */
case XenbusStateClosing:
- if (info)
- blkfront_closing(info);
+ if (info) {
+ if (info->connected == BLKIF_STATE_FREEZING)
+ xenbus_frontend_closed(dev);
+ else
+ blkfront_closing(info);
+ }
break;
}
}
@@ -2630,6 +2670,71 @@ static void blkif_release(struct gendisk *disk, fmode_t mode)
mutex_unlock(&blkfront_mutex);
}
+static int blkfront_freeze(struct xenbus_device *dev)
+{
+ unsigned int i;
+ struct blkfront_info *info = dev_get_drvdata(&dev->dev);
+ struct blkfront_ring_info *rinfo;
+ /* This would be reasonable timeout as used in xenbus_dev_shutdown() */
+ unsigned int timeout = 5 * HZ;
+ unsigned long flags;
+ int err = 0;
+
+ info->connected = BLKIF_STATE_FREEZING;
+
+ blk_mq_freeze_queue(info->rq);
+ blk_mq_quiesce_queue(info->rq);
+
+ for_each_rinfo(info, rinfo, i) {
+ /* No more gnttab callback work. */
+ gnttab_cancel_free_callback(&rinfo->callback);
+ /* Flush gnttab callback work. Must be done with no locks held. */
+ flush_work(&rinfo->work);
+ }
+
+ for_each_rinfo(info, rinfo, i) {
+ spin_lock_irqsave(&rinfo->ring_lock, flags);
+ if (RING_FULL(&rinfo->ring)
+ || RING_HAS_UNCONSUMED_RESPONSES(&rinfo->ring)) {
+ xenbus_dev_error(dev, err, "Hibernation Failed.
+ The ring is still busy");
+ info->connected = BLKIF_STATE_CONNECTED;
+ spin_unlock_irqrestore(&rinfo->ring_lock, flags);
+ return -EBUSY;
+ }
+ spin_unlock_irqrestore(&rinfo->ring_lock, flags);
+ }
+ /* Kick the backend to disconnect */
+ xenbus_switch_state(dev, XenbusStateClosing);
+
+ /*
+ * We don't want to move forward before the frontend is diconnected
+ * from the backend cleanly.
+ */
+ timeout = wait_for_completion_timeout(&info->wait_backend_disconnected,
+ timeout);
+ if (!timeout) {
+ err = -EBUSY;
+ xenbus_dev_error(dev, err, "Freezing timed out;"
+ "the device may become inconsistent state");
+ }
+
+ return err;
+}
+
+static int blkfront_restore(struct xenbus_device *dev)
+{
+ struct blkfront_info *info = dev_get_drvdata(&dev->dev);
+ int err = 0;
+
+ err = talk_to_blkback(dev, info);
+ blk_mq_unquiesce_queue(info->rq);
+ blk_mq_unfreeze_queue(info->rq);
+ if (!err)
+ blk_mq_update_nr_hw_queues(&info->tag_set, info->nr_rings);
+ return err;
+}
+
static const struct block_device_operations xlvbd_block_fops =
{
.owner = THIS_MODULE,
@@ -2653,6 +2758,9 @@ static struct xenbus_driver blkfront_driver = {
.resume = blkfront_resume,
.otherend_changed = blkback_changed,
.is_ready = blkfront_is_ready,
+ .freeze = blkfront_freeze,
+ .thaw = blkfront_restore,
+ .restore = blkfront_restore
};
static void purge_persistent_grants(struct blkfront_info *info)