@@ -41,6 +41,15 @@ nvme-ns Options
will release active resources when needed by transitioning zones to the
Full state.
+ `zns.{rrld,frld}`; Specifies the Reset and Finish Recommended Limit Delays,
+ that is, the number of seconds until the controller sets the Reset and
+ Finish Zone Recommended attributes on finished and opened zones
+ respectively.
+
+ `zns.{rrl,frl}`; Specifies the Reset and Finish Recommended Limits, that is,
+ the number of seconds until the controller clears the Reset and Finish
+ Zone Recommended attributes on finished and opened zones respectively.
+
Reference Specifications
------------------------
@@ -31,6 +31,10 @@ typedef struct NvmeNamespaceParams {
uint32_t mar;
uint32_t mor;
uint16_t zoc;
+ uint32_t rrl;
+ uint32_t frl;
+ uint32_t rrld;
+ uint32_t frld;
} zns;
} NvmeNamespaceParams;
@@ -40,9 +44,15 @@ typedef struct NvmeZone {
uint64_t wp_staging;
+ struct {
+ int64_t timestamp;
+ } stats;
+
QTAILQ_ENTRY(NvmeZone) lru_entry;
} NvmeZone;
+typedef QTAILQ_HEAD(, NvmeZone) NvmeZoneList;
+
typedef struct NvmeNamespace {
DeviceState parent_obj;
BlockConf blkconf;
@@ -81,11 +91,15 @@ typedef struct NvmeNamespace {
uint32_t open;
uint32_t active;
- QTAILQ_HEAD(, NvmeZone) lru_open;
- QTAILQ_HEAD(, NvmeZone) lru_active;
+ NvmeZoneList lru_open;
+ NvmeZoneList lru_active;
} resources;
NvmeChangedZoneList changed_list;
+
+ NvmeZoneList lru_finished;
+ QEMUTimer *timer;
+ int64_t rrl_ns, rrld_ns, frl_ns, frld_ns;
} zns;
} NvmeNamespace;
@@ -203,5 +203,7 @@ static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req)
}
int nvme_register_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp);
+void nvme_zone_changed(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone,
+ NvmeRequest *req);
#endif /* HW_NVME_H */
@@ -894,6 +894,8 @@ typedef struct QEMU_PACKED NvmeChangedZoneList {
} NvmeChangedZoneList;
#define NVME_ZA_ZFC (1 << 0)
+#define NVME_ZA_FZR (1 << 1)
+#define NVME_ZA_RZR (1 << 2)
#define NVME_ZA_ZDEV (1 << 7)
#define NVME_ZA_SET(za, attrs) ((za) |= (attrs))
@@ -50,6 +50,64 @@ const char *nvme_zs_to_str(NvmeZoneState zs)
return NULL;
}
+static void nvme_ns_set_zone_attrs(NvmeCtrl *n, NvmeNamespace *ns,
+ NvmeZoneList *zone_list, int64_t delay,
+ int64_t limit, int64_t *next_timer,
+ uint8_t attr)
+{
+ NvmeZone *zone, *next;
+ int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+ int64_t timestamp;
+
+ QTAILQ_FOREACH_SAFE(zone, zone_list, lru_entry, next) {
+ timestamp = zone->stats.timestamp;
+ if (now - timestamp < delay) {
+ *next_timer = MIN(*next_timer, timestamp + delay);
+ return;
+ }
+
+ if (now - timestamp < delay + limit) {
+ trace_pci_nvme_ns_set_zone_attr(nvme_zslba(zone), attr);
+ zone->zd->za |= attr;
+ *next_timer = MIN(*next_timer, timestamp + delay + limit);
+ } else {
+ trace_pci_nvme_ns_clear_zone_attr(nvme_zslba(zone), attr);
+ zone->zd->za &= ~attr;
+ QTAILQ_REMOVE(zone_list, zone, lru_entry);
+ QTAILQ_INSERT_TAIL(zone_list, zone, lru_entry);
+
+ zone->stats.timestamp = now;
+ *next_timer = MIN(*next_timer, now + delay);
+ }
+
+ nvme_zone_changed(n, ns, zone, NULL);
+ }
+}
+
+static void nvme_ns_process_timer(void *opaque)
+{
+ NvmeNamespace *ns = opaque;
+ BusState *s = qdev_get_parent_bus(&ns->parent_obj);
+ NvmeCtrl *n = NVME(s->parent);
+ int64_t next_timer = INT64_MAX;
+
+ trace_pci_nvme_ns_process_timer(ns->params.nsid);
+
+ nvme_ns_set_zone_attrs(n, ns, &ns->zns.resources.lru_open, ns->zns.frld_ns,
+ ns->zns.frl_ns, &next_timer, NVME_ZA_FZR);
+
+ nvme_ns_set_zone_attrs(n, ns, &ns->zns.resources.lru_active,
+ ns->zns.frld_ns, ns->zns.frl_ns, &next_timer,
+ NVME_ZA_FZR);
+
+ nvme_ns_set_zone_attrs(n, ns, &ns->zns.lru_finished, ns->zns.rrld_ns,
+ ns->zns.rrl_ns, &next_timer, NVME_ZA_RZR);
+
+ if (next_timer != INT64_MAX) {
+ timer_mod(ns->zns.timer, next_timer);
+ }
+}
+
static int nvme_blk_truncate(BlockBackend *blk, size_t len, Error **errp)
{
int ret;
@@ -121,6 +179,21 @@ static void nvme_ns_init_zoned(NvmeNamespace *ns)
id_ns->ncap = ns->zns.num_zones * ns->params.zns.zcap;
+ id_ns_zns->rrl = ns->params.zns.rrl;
+ id_ns_zns->frl = ns->params.zns.frl;
+
+ if (ns->params.zns.rrl || ns->params.zns.frl) {
+ ns->zns.rrl_ns = ns->params.zns.rrl * NANOSECONDS_PER_SECOND;
+ ns->zns.rrld_ns = ns->params.zns.rrld * NANOSECONDS_PER_SECOND;
+ ns->zns.frl_ns = ns->params.zns.frl * NANOSECONDS_PER_SECOND;
+ ns->zns.frld_ns = ns->params.zns.frld * NANOSECONDS_PER_SECOND;
+
+ ns->zns.timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+ nvme_ns_process_timer, ns);
+
+ QTAILQ_INIT(&ns->zns.lru_finished);
+ }
+
id_ns_zns->mar = cpu_to_le32(ns->params.zns.mar);
id_ns_zns->mor = cpu_to_le32(ns->params.zns.mor);
@@ -266,6 +339,8 @@ static int nvme_ns_setup_blk_pstate(NvmeNamespace *ns, Error **errp)
ns->zns.resources.active--;
QTAILQ_INSERT_TAIL(&ns->zns.resources.lru_active, zone,
lru_entry);
+ zone->stats.timestamp =
+ qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
continue;
}
@@ -445,6 +520,10 @@ static Property nvme_ns_props[] = {
DEFINE_PROP_UINT32("zns.mar", NvmeNamespace, params.zns.mar, 0xffffffff),
DEFINE_PROP_UINT32("zns.mor", NvmeNamespace, params.zns.mor, 0xffffffff),
DEFINE_PROP_UINT16("zns.zoc", NvmeNamespace, params.zns.zoc, 0),
+ DEFINE_PROP_UINT32("zns.rrl", NvmeNamespace, params.zns.rrl, 0),
+ DEFINE_PROP_UINT32("zns.frl", NvmeNamespace, params.zns.frl, 0),
+ DEFINE_PROP_UINT32("zns.rrld", NvmeNamespace, params.zns.rrld, 0),
+ DEFINE_PROP_UINT32("zns.frld", NvmeNamespace, params.zns.frld, 0),
DEFINE_PROP_END_OF_LIST(),
};
@@ -1200,8 +1200,8 @@ static inline void nvme_zone_reset_wp(NvmeZone *zone)
zone->wp_staging = nvme_zslba(zone);
}
-static void nvme_zone_changed(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone,
- NvmeRequest *req)
+void nvme_zone_changed(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone,
+ NvmeRequest *req)
{
uint16_t num_ids = le16_to_cpu(ns->zns.changed_list.num_ids);
@@ -1244,6 +1244,7 @@ static void nvme_zone_active_excursion(NvmeCtrl *n, NvmeNamespace *ns,
nvme_zrm_transition(n, ns, zone, NVME_ZS_ZSF, req);
NVME_ZA_SET(zone->zd->za, NVME_ZA_ZFC);
+ NVME_ZA_CLEAR(zone->zd->za, NVME_ZA_FZR);
nvme_zone_changed(n, ns, zone, req);
}
@@ -1340,6 +1341,16 @@ out:
return NVME_SUCCESS;
}
+static void nvme_zone_activate(NvmeNamespace *ns, NvmeZone *zone)
+{
+ zone->stats.timestamp = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+
+ if (ns->params.zns.frld && !timer_pending(ns->zns.timer)) {
+ int64_t next_timer = zone->stats.timestamp + ns->zns.frld_ns;
+ timer_mod(ns->zns.timer, next_timer);
+ }
+}
+
static uint16_t nvme_zrm_transition(NvmeCtrl *n, NvmeNamespace *ns,
NvmeZone *zone, NvmeZoneState to,
NvmeRequest *req)
@@ -1373,6 +1384,8 @@ static uint16_t nvme_zrm_transition(NvmeCtrl *n, NvmeNamespace *ns,
ns->zns.resources.active--;
QTAILQ_INSERT_TAIL(&ns->zns.resources.lru_active, zone, lru_entry);
+ nvme_zone_activate(ns, zone);
+
break;
case NVME_ZS_ZSIO:
@@ -1395,6 +1408,7 @@ static uint16_t nvme_zrm_transition(NvmeCtrl *n, NvmeNamespace *ns,
ns->zns.resources.open--;
QTAILQ_INSERT_TAIL(&ns->zns.resources.lru_open, zone, lru_entry);
+ nvme_zone_activate(ns, zone);
break;
default:
@@ -1521,6 +1535,43 @@ static uint16_t nvme_zrm_transition(NvmeCtrl *n, NvmeNamespace *ns,
}
nvme_zs_set(zone, to);
+
+ if (ns->params.zns.rrld) {
+ switch (to) {
+ case NVME_ZS_ZSRO:
+ /* clock is already ticking if the zone was already full */
+ if (from == NVME_ZS_ZSF) {
+ break;
+ }
+
+ /* fallthrough */
+
+ case NVME_ZS_ZSF:
+ QTAILQ_INSERT_TAIL(&ns->zns.lru_finished, zone, lru_entry);
+
+ zone->stats.timestamp = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+
+ if (!timer_pending(ns->zns.timer)) {
+ int64_t next_timer = zone->stats.timestamp + ns->zns.rrld_ns;
+ timer_mod(ns->zns.timer, next_timer);
+ }
+
+ break;
+
+ case NVME_ZS_ZSE:
+ case NVME_ZS_ZSO:
+ if (from == NVME_ZS_ZSF) {
+ QTAILQ_REMOVE(&ns->zns.lru_finished, zone, lru_entry);
+ zone->stats.timestamp = 0;
+ }
+
+ break;
+
+ default:
+ break;
+ }
+ }
+
return NVME_SUCCESS;
}
@@ -1727,6 +1778,7 @@ static uint16_t nvme_zone_mgmt_send_finish(NvmeCtrl *n, NvmeRequest *req,
return status;
}
+ NVME_ZA_CLEAR(zone->zd->za, NVME_ZA_FZR);
if (nvme_zns_commit_zone(ns, zone) < 0) {
return NVME_INTERNAL_DEV_ERROR;
}
@@ -94,6 +94,9 @@ pci_nvme_zone_zrm_release_open(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid
pci_nvme_zone_zrm_release_active(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32""
pci_nvme_zone_changed(uint16_t cid, uint32_t nsid, uint64_t zslba) "cid %"PRIu16" nsid %"PRIu32" zslba 0x%"PRIx64""
pci_nvme_zone_active_excursion(uint32_t nsid, uint64_t zslba, const char *zc) "nsid %"PRIu32" zslba 0x%"PRIx64" zc \"%s\""
+pci_nvme_ns_process_timer(uint32_t nsid) "nsid %"PRIu32""
+pci_nvme_ns_set_zone_attr(uint64_t zslba, uint8_t attr) "zslba 0x%"PRIx64" attr 0x%"PRIu8""
+pci_nvme_ns_clear_zone_attr(uint64_t zslba, uint8_t attr) "zslba 0x%"PRIx64" attr 0x%"PRIu8""
pci_nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64""
pci_nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64""
pci_nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64""