diff mbox series

[16/16] hw/block/nvme: support reset/finish recommended limits

Message ID 20200924204516.1881843-17-its@irrelevant.dk
State New
Headers show
Series hw/block/nvme: zoned namespace command set | expand

Commit Message

Klaus Jensen Sept. 24, 2020, 8:45 p.m. UTC
From: Klaus Jensen <k.jensen@samsung.com>

Add the rrl,rrld,frl and frld device parameters. The frld and rrld
parameters specify the delay in seconds until the device sets the Finish
and Reset Zone Recommended attributes on zones when they are
transitioned to Full or an Opened state respectively. The rrl and frl
parameters specify the number of seconds before the device may perform
an internal operation to "clear" the attributes again.

Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
---
 docs/specs/nvme.txt   |  9 +++++
 hw/block/nvme-ns.h    | 18 ++++++++--
 hw/block/nvme.h       |  2 ++
 include/block/nvme.h  |  2 ++
 hw/block/nvme-ns.c    | 79 +++++++++++++++++++++++++++++++++++++++++++
 hw/block/nvme.c       | 56 ++++++++++++++++++++++++++++--
 hw/block/trace-events |  3 ++
 7 files changed, 165 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/docs/specs/nvme.txt b/docs/specs/nvme.txt
index a286d9c07025..c2f30cfaaea6 100644
--- a/docs/specs/nvme.txt
+++ b/docs/specs/nvme.txt
@@ -41,6 +41,15 @@  nvme-ns Options
      will release active resources when needed by transitioning zones to the
      Full state.
 
+  `zns.{rrld,frld}`; Specifies the Reset and Finish Recommended Limit Delays,
+     that is, the number of seconds until the controller sets the Reset and
+     Finish Zone Recommended attributes on finished and opened zones
+     respectively.
+
+  `zns.{rrl,frl}`; Specifies the Reset and Finish Recommended Limits, that is,
+     the number of seconds until the controller clears the Reset and Finish
+     Zone Recommended attributes on finished and opened zones respectively.
+
 
 Reference Specifications
 ------------------------
diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h
index d9093fdad984..8daf3a90d7f0 100644
--- a/hw/block/nvme-ns.h
+++ b/hw/block/nvme-ns.h
@@ -31,6 +31,10 @@  typedef struct NvmeNamespaceParams {
         uint32_t mar;
         uint32_t mor;
         uint16_t zoc;
+        uint32_t rrl;
+        uint32_t frl;
+        uint32_t rrld;
+        uint32_t frld;
     } zns;
 } NvmeNamespaceParams;
 
@@ -40,9 +44,15 @@  typedef struct NvmeZone {
 
     uint64_t wp_staging;
 
+    struct {
+        int64_t timestamp;
+    } stats;
+
     QTAILQ_ENTRY(NvmeZone) lru_entry;
 } NvmeZone;
 
+typedef QTAILQ_HEAD(, NvmeZone) NvmeZoneList;
+
 typedef struct NvmeNamespace {
     DeviceState  parent_obj;
     BlockConf    blkconf;
@@ -81,11 +91,15 @@  typedef struct NvmeNamespace {
             uint32_t open;
             uint32_t active;
 
-            QTAILQ_HEAD(, NvmeZone) lru_open;
-            QTAILQ_HEAD(, NvmeZone) lru_active;
+            NvmeZoneList lru_open;
+            NvmeZoneList lru_active;
         } resources;
 
         NvmeChangedZoneList changed_list;
+
+        NvmeZoneList lru_finished;
+        QEMUTimer *timer;
+        int64_t rrl_ns, rrld_ns, frl_ns, frld_ns;
     } zns;
 } NvmeNamespace;
 
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 8cd2d936548e..d840ff2cb97c 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -203,5 +203,7 @@  static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req)
 }
 
 int nvme_register_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp);
+void nvme_zone_changed(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone,
+                       NvmeRequest *req);
 
 #endif /* HW_NVME_H */
diff --git a/include/block/nvme.h b/include/block/nvme.h
index 0948a262b2df..a66da8c46f9e 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -894,6 +894,8 @@  typedef struct QEMU_PACKED NvmeChangedZoneList {
 } NvmeChangedZoneList;
 
 #define NVME_ZA_ZFC  (1 << 0)
+#define NVME_ZA_FZR  (1 << 1)
+#define NVME_ZA_RZR  (1 << 2)
 #define NVME_ZA_ZDEV (1 << 7)
 
 #define NVME_ZA_SET(za, attrs)   ((za) |= (attrs))
diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
index 9a9f728d791f..6243458a436c 100644
--- a/hw/block/nvme-ns.c
+++ b/hw/block/nvme-ns.c
@@ -50,6 +50,64 @@  const char *nvme_zs_to_str(NvmeZoneState zs)
     return NULL;
 }
 
+static void nvme_ns_set_zone_attrs(NvmeCtrl *n, NvmeNamespace *ns,
+                                   NvmeZoneList *zone_list, int64_t delay,
+                                   int64_t limit, int64_t *next_timer,
+                                   uint8_t attr)
+{
+    NvmeZone *zone, *next;
+    int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    int64_t timestamp;
+
+    QTAILQ_FOREACH_SAFE(zone, zone_list, lru_entry, next) {
+        timestamp = zone->stats.timestamp;
+        if (now - timestamp < delay) {
+            *next_timer = MIN(*next_timer, timestamp + delay);
+            return;
+        }
+
+        if (now - timestamp < delay + limit) {
+            trace_pci_nvme_ns_set_zone_attr(nvme_zslba(zone), attr);
+            zone->zd->za |= attr;
+            *next_timer = MIN(*next_timer, timestamp + delay + limit);
+        } else {
+            trace_pci_nvme_ns_clear_zone_attr(nvme_zslba(zone), attr);
+            zone->zd->za &= ~attr;
+            QTAILQ_REMOVE(zone_list, zone, lru_entry);
+            QTAILQ_INSERT_TAIL(zone_list, zone, lru_entry);
+
+            zone->stats.timestamp = now;
+            *next_timer = MIN(*next_timer, now + delay);
+        }
+
+        nvme_zone_changed(n, ns, zone, NULL);
+    }
+}
+
+static void nvme_ns_process_timer(void *opaque)
+{
+    NvmeNamespace *ns = opaque;
+    BusState *s = qdev_get_parent_bus(&ns->parent_obj);
+    NvmeCtrl *n = NVME(s->parent);
+    int64_t next_timer = INT64_MAX;
+
+    trace_pci_nvme_ns_process_timer(ns->params.nsid);
+
+    nvme_ns_set_zone_attrs(n, ns, &ns->zns.resources.lru_open, ns->zns.frld_ns,
+                           ns->zns.frl_ns, &next_timer, NVME_ZA_FZR);
+
+    nvme_ns_set_zone_attrs(n, ns, &ns->zns.resources.lru_active,
+                           ns->zns.frld_ns, ns->zns.frl_ns, &next_timer,
+                           NVME_ZA_FZR);
+
+    nvme_ns_set_zone_attrs(n, ns, &ns->zns.lru_finished, ns->zns.rrld_ns,
+                           ns->zns.rrl_ns, &next_timer, NVME_ZA_RZR);
+
+    if (next_timer != INT64_MAX) {
+        timer_mod(ns->zns.timer, next_timer);
+    }
+}
+
 static int nvme_blk_truncate(BlockBackend *blk, size_t len, Error **errp)
 {
     int ret;
@@ -121,6 +179,21 @@  static void nvme_ns_init_zoned(NvmeNamespace *ns)
 
     id_ns->ncap = ns->zns.num_zones * ns->params.zns.zcap;
 
+    id_ns_zns->rrl = ns->params.zns.rrl;
+    id_ns_zns->frl = ns->params.zns.frl;
+
+    if (ns->params.zns.rrl || ns->params.zns.frl) {
+        ns->zns.rrl_ns = ns->params.zns.rrl * NANOSECONDS_PER_SECOND;
+        ns->zns.rrld_ns = ns->params.zns.rrld * NANOSECONDS_PER_SECOND;
+        ns->zns.frl_ns = ns->params.zns.frl * NANOSECONDS_PER_SECOND;
+        ns->zns.frld_ns = ns->params.zns.frld * NANOSECONDS_PER_SECOND;
+
+        ns->zns.timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+                                     nvme_ns_process_timer, ns);
+
+        QTAILQ_INIT(&ns->zns.lru_finished);
+    }
+
     id_ns_zns->mar = cpu_to_le32(ns->params.zns.mar);
     id_ns_zns->mor = cpu_to_le32(ns->params.zns.mor);
 
@@ -266,6 +339,8 @@  static int nvme_ns_setup_blk_pstate(NvmeNamespace *ns, Error **errp)
                         ns->zns.resources.active--;
                         QTAILQ_INSERT_TAIL(&ns->zns.resources.lru_active, zone,
                                            lru_entry);
+                        zone->stats.timestamp =
+                            qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
                         continue;
                     }
 
@@ -445,6 +520,10 @@  static Property nvme_ns_props[] = {
     DEFINE_PROP_UINT32("zns.mar", NvmeNamespace, params.zns.mar, 0xffffffff),
     DEFINE_PROP_UINT32("zns.mor", NvmeNamespace, params.zns.mor, 0xffffffff),
     DEFINE_PROP_UINT16("zns.zoc", NvmeNamespace, params.zns.zoc, 0),
+    DEFINE_PROP_UINT32("zns.rrl", NvmeNamespace, params.zns.rrl, 0),
+    DEFINE_PROP_UINT32("zns.frl", NvmeNamespace, params.zns.frl, 0),
+    DEFINE_PROP_UINT32("zns.rrld", NvmeNamespace, params.zns.rrld, 0),
+    DEFINE_PROP_UINT32("zns.frld", NvmeNamespace, params.zns.frld, 0),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index ecc88e858eee..d472ed240059 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -1200,8 +1200,8 @@  static inline void nvme_zone_reset_wp(NvmeZone *zone)
     zone->wp_staging = nvme_zslba(zone);
 }
 
-static void nvme_zone_changed(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone,
-                              NvmeRequest *req)
+void nvme_zone_changed(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone,
+                       NvmeRequest *req)
 {
     uint16_t num_ids = le16_to_cpu(ns->zns.changed_list.num_ids);
 
@@ -1244,6 +1244,7 @@  static void nvme_zone_active_excursion(NvmeCtrl *n, NvmeNamespace *ns,
 
     nvme_zrm_transition(n, ns, zone, NVME_ZS_ZSF, req);
     NVME_ZA_SET(zone->zd->za, NVME_ZA_ZFC);
+    NVME_ZA_CLEAR(zone->zd->za, NVME_ZA_FZR);
 
     nvme_zone_changed(n, ns, zone, req);
 }
@@ -1340,6 +1341,16 @@  out:
     return NVME_SUCCESS;
 }
 
+static void nvme_zone_activate(NvmeNamespace *ns, NvmeZone *zone)
+{
+    zone->stats.timestamp = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+
+    if (ns->params.zns.frld && !timer_pending(ns->zns.timer)) {
+        int64_t next_timer = zone->stats.timestamp + ns->zns.frld_ns;
+        timer_mod(ns->zns.timer, next_timer);
+    }
+}
+
 static uint16_t nvme_zrm_transition(NvmeCtrl *n, NvmeNamespace *ns,
                                     NvmeZone *zone, NvmeZoneState to,
                                     NvmeRequest *req)
@@ -1373,6 +1384,8 @@  static uint16_t nvme_zrm_transition(NvmeCtrl *n, NvmeNamespace *ns,
             ns->zns.resources.active--;
             QTAILQ_INSERT_TAIL(&ns->zns.resources.lru_active, zone, lru_entry);
 
+            nvme_zone_activate(ns, zone);
+
             break;
 
         case NVME_ZS_ZSIO:
@@ -1395,6 +1408,7 @@  static uint16_t nvme_zrm_transition(NvmeCtrl *n, NvmeNamespace *ns,
             ns->zns.resources.open--;
             QTAILQ_INSERT_TAIL(&ns->zns.resources.lru_open, zone, lru_entry);
 
+            nvme_zone_activate(ns, zone);
             break;
 
         default:
@@ -1521,6 +1535,43 @@  static uint16_t nvme_zrm_transition(NvmeCtrl *n, NvmeNamespace *ns,
     }
 
     nvme_zs_set(zone, to);
+
+    if (ns->params.zns.rrld) {
+        switch (to) {
+        case NVME_ZS_ZSRO:
+            /* clock is already ticking if the zone was already full */
+            if (from == NVME_ZS_ZSF) {
+                break;
+            }
+
+            /* fallthrough */
+
+        case NVME_ZS_ZSF:
+            QTAILQ_INSERT_TAIL(&ns->zns.lru_finished, zone, lru_entry);
+
+            zone->stats.timestamp = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+
+            if (!timer_pending(ns->zns.timer)) {
+                int64_t next_timer = zone->stats.timestamp + ns->zns.rrld_ns;
+                timer_mod(ns->zns.timer, next_timer);
+            }
+
+            break;
+
+        case NVME_ZS_ZSE:
+        case NVME_ZS_ZSO:
+            if (from == NVME_ZS_ZSF) {
+                QTAILQ_REMOVE(&ns->zns.lru_finished, zone, lru_entry);
+                zone->stats.timestamp = 0;
+            }
+
+            break;
+
+        default:
+            break;
+        }
+    }
+
     return NVME_SUCCESS;
 }
 
@@ -1727,6 +1778,7 @@  static uint16_t nvme_zone_mgmt_send_finish(NvmeCtrl *n, NvmeRequest *req,
         return status;
     }
 
+    NVME_ZA_CLEAR(zone->zd->za, NVME_ZA_FZR);
     if (nvme_zns_commit_zone(ns, zone) < 0) {
         return NVME_INTERNAL_DEV_ERROR;
     }
diff --git a/hw/block/trace-events b/hw/block/trace-events
index ebd786b77868..917520b6f4d4 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -94,6 +94,9 @@  pci_nvme_zone_zrm_release_open(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid
 pci_nvme_zone_zrm_release_active(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32""
 pci_nvme_zone_changed(uint16_t cid, uint32_t nsid, uint64_t zslba) "cid %"PRIu16" nsid %"PRIu32" zslba 0x%"PRIx64""
 pci_nvme_zone_active_excursion(uint32_t nsid, uint64_t zslba, const char *zc) "nsid %"PRIu32" zslba 0x%"PRIx64" zc \"%s\""
+pci_nvme_ns_process_timer(uint32_t nsid) "nsid %"PRIu32""
+pci_nvme_ns_set_zone_attr(uint64_t zslba, uint8_t attr) "zslba 0x%"PRIx64" attr 0x%"PRIu8""
+pci_nvme_ns_clear_zone_attr(uint64_t zslba, uint8_t attr) "zslba 0x%"PRIx64" attr 0x%"PRIu8""
 pci_nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64""
 pci_nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64""
 pci_nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64""