@@ -38,6 +38,8 @@ typedef struct NvmeZone {
uint8_t *zde;
uint64_t wp_staging;
+
+ QTAILQ_ENTRY(NvmeZone) lru_entry;
} NvmeZone;
typedef struct NvmeNamespace {
@@ -64,6 +66,9 @@ typedef struct NvmeNamespace {
struct {
uint32_t open;
uint32_t active;
+
+ QTAILQ_HEAD(, NvmeZone) lru_open;
+ QTAILQ_HEAD(, NvmeZone) lru_active;
} resources;
} zns;
} NvmeNamespace;
@@ -135,6 +135,9 @@ void nvme_ns_zns_init_zone_state(NvmeNamespace *ns)
ns->zns.resources.open = ns->params.zns.mor != 0xffffffff ?
ns->params.zns.mor + 1 : ns->zns.num_zones;
+ QTAILQ_INIT(&ns->zns.resources.lru_open);
+ QTAILQ_INIT(&ns->zns.resources.lru_active);
+
for (int i = 0; i < ns->zns.num_zones; i++) {
NvmeZone *zone = &ns->zns.zones[i];
zone->zd = &ns->zns.zd[i];
@@ -158,6 +161,8 @@ void nvme_ns_zns_init_zone_state(NvmeNamespace *ns)
if (ns->zns.resources.active) {
ns->zns.resources.active--;
+ QTAILQ_INSERT_TAIL(&ns->zns.resources.lru_active, zone,
+ lru_entry);
break;
}
@@ -1105,11 +1105,47 @@ static inline void nvme_zone_reset_wp(NvmeZone *zone)
zone->wp_staging = nvme_zslba(zone);
}
+static uint16_t nvme_zrm_transition(NvmeNamespace *ns, NvmeZone *zone,
+ NvmeZoneState to);
+
+static uint16_t nvme_zrm_release_open(NvmeNamespace *ns)
+{
+ NvmeZone *candidate;
+ NvmeZoneState zs;
+ uint16_t status;
+
+ trace_pci_nvme_zrm_release_open(ns->params.nsid);
+
+ QTAILQ_FOREACH(candidate, &ns->zns.resources.lru_open, lru_entry) {
+ zs = nvme_zs(candidate);
+
+ /* skip explicitly opened zones */
+ if (zs == NVME_ZS_ZSEO) {
+ continue;
+ }
+
+ /* skip zones that have in-flight writes */
+ if (candidate->wp_staging != nvme_wp(candidate)) {
+ continue;
+ }
+
+ status = nvme_zrm_transition(ns, candidate, NVME_ZS_ZSC);
+ if (status) {
+ return status;
+ }
+
+ return NVME_SUCCESS;
+ }
+
+ return NVME_TOO_MANY_OPEN_ZONES;
+}
+
static uint16_t nvme_zrm_transition(NvmeNamespace *ns, NvmeZone *zone,
NvmeZoneState to)
{
NvmeZoneState from = nvme_zs(zone);
NvmeZoneDescriptor *zd = zone->zd;
+ uint16_t status;
trace_pci_nvme_zrm_transition(ns->params.nsid, nvme_zslba(zone), from, to);
@@ -1131,6 +1167,7 @@ static uint16_t nvme_zrm_transition(NvmeNamespace *ns, NvmeZone *zone,
}
ns->zns.resources.active--;
+ QTAILQ_INSERT_TAIL(&ns->zns.resources.lru_active, zone, lru_entry);
break;
@@ -1141,11 +1178,15 @@ static uint16_t nvme_zrm_transition(NvmeNamespace *ns, NvmeZone *zone,
}
if (!ns->zns.resources.open) {
- return NVME_TOO_MANY_OPEN_ZONES;
+ status = nvme_zrm_release_open(ns);
+ if (status) {
+ return status;
+ }
}
ns->zns.resources.active--;
ns->zns.resources.open--;
+ QTAILQ_INSERT_TAIL(&ns->zns.resources.lru_open, zone, lru_entry);
break;
@@ -1172,11 +1213,15 @@ static uint16_t nvme_zrm_transition(NvmeNamespace *ns, NvmeZone *zone,
case NVME_ZS_ZSF:
case NVME_ZS_ZSRO:
ns->zns.resources.active++;
+ ns->zns.resources.open++;
+ QTAILQ_REMOVE(&ns->zns.resources.lru_open, zone, lru_entry);
- /* fallthrough */
+ break;
case NVME_ZS_ZSC:
ns->zns.resources.open++;
+ QTAILQ_REMOVE(&ns->zns.resources.lru_open, zone, lru_entry);
+ QTAILQ_INSERT_TAIL(&ns->zns.resources.lru_active, zone, lru_entry);
break;
@@ -1201,16 +1246,22 @@ static uint16_t nvme_zrm_transition(NvmeNamespace *ns, NvmeZone *zone,
case NVME_ZS_ZSF:
case NVME_ZS_ZSRO:
ns->zns.resources.active++;
+ QTAILQ_REMOVE(&ns->zns.resources.lru_active, zone, lru_entry);
break;
case NVME_ZS_ZSIO:
case NVME_ZS_ZSEO:
if (!ns->zns.resources.open) {
- return NVME_TOO_MANY_OPEN_ZONES;
+ status = nvme_zrm_release_open(ns);
+ if (status) {
+ return status;
+ }
}
ns->zns.resources.open--;
+ QTAILQ_REMOVE(&ns->zns.resources.lru_active, zone, lru_entry);
+ QTAILQ_INSERT_TAIL(&ns->zns.resources.lru_open, zone, lru_entry);
break;
@@ -83,6 +83,7 @@ pci_nvme_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8""
pci_nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs"
pci_nvme_enqueue_req_completion(uint16_t cid, uint16_t cqid, uint16_t status) "cid %"PRIu16" cqid %"PRIu16" status 0x%"PRIx16""
pci_nvme_zrm_transition(uint32_t nsid, uint64_t zslba, uint8_t from, uint8_t to) "nsid %"PRIu32" zslba 0x%"PRIx64" from 0x%"PRIx8" to 0x%"PRIx8""
+pci_nvme_zrm_release_open(uint32_t nsid) "nsid %"PRIu32""
pci_nvme_mmio_read(uint64_t addr) "addr 0x%"PRIx64""
pci_nvme_mmio_write(uint64_t addr, uint64_t data) "addr 0x%"PRIx64" data 0x%"PRIx64""
pci_nvme_mmio_doorbell_cq(uint16_t cqid, uint16_t new_head) "cqid %"PRIu16" new_head %"PRIu16""