@@ -27,11 +27,13 @@ typedef struct NvmeNamespaceParams {
struct {
uint64_t zcap;
uint64_t zsze;
+ uint8_t zdes;
} zns;
} NvmeNamespaceParams;
typedef struct NvmeZone {
NvmeZoneDescriptor *zd;
+ uint8_t *zde;
uint64_t wp_staging;
} NvmeZone;
@@ -55,6 +57,7 @@ typedef struct NvmeNamespace {
NvmeZone *zones;
NvmeZoneDescriptor *zd;
+ uint8_t *zde;
} zns;
} NvmeNamespace;
@@ -105,6 +108,11 @@ static inline uint64_t nvme_ns_zsze(NvmeNamespace *ns)
return nvme_ns_lbafe(ns)->zsze;
}
+static inline size_t nvme_ns_zdes_bytes(NvmeNamespace *ns)
+{
+ return ns->params.zns.zdes << 6;
+}
+
/* calculate the number of LBAs that the namespace can accomodate */
static inline uint64_t nvme_ns_nlbas(NvmeNamespace *ns)
{
@@ -71,6 +71,7 @@ static inline const char *nvme_io_opc_str(uint8_t opc)
case NVME_CMD_WRITE: return "NVME_NVM_CMD_WRITE";
case NVME_CMD_READ: return "NVME_NVM_CMD_READ";
case NVME_CMD_WRITE_ZEROES: return "NVME_NVM_CMD_WRITE_ZEROES";
+ case NVME_CMD_ZONE_MGMT_RECV: return "NVME_ZONED_CMD_ZONE_MGMT_RECV";
default: return "NVME_NVM_CMD_UNKNOWN";
}
}
@@ -480,6 +480,7 @@ enum NvmeIoCommands {
NVME_CMD_COMPARE = 0x05,
NVME_CMD_WRITE_ZEROES = 0x08,
NVME_CMD_DSM = 0x09,
+ NVME_CMD_ZONE_MGMT_RECV = 0x7a,
};
typedef struct QEMU_PACKED NvmeDeleteQ {
@@ -592,6 +593,44 @@ enum {
NVME_RW_PRINFO_PRCHK_REF = 1 << 10,
};
+typedef struct QEMU_PACKED NvmeZoneManagementRecvCmd {
+ uint8_t opcode;
+ uint8_t flags;
+ uint16_t cid;
+ uint32_t nsid;
+ uint8_t rsvd8[16];
+ NvmeCmdDptr dptr;
+ uint64_t slba;
+ uint32_t numdw;
+ uint8_t zra;
+ uint8_t zrasp;
+ uint8_t zrasf;
+ uint8_t rsvd55[9];
+} NvmeZoneManagementRecvCmd;
+
+typedef enum NvmeZoneManagementRecvAction {
+ NVME_CMD_ZONE_MGMT_RECV_REPORT_ZONES = 0x0,
+ NVME_CMD_ZONE_MGMT_RECV_EXTENDED_REPORT_ZONES = 0x1,
+} NvmeZoneManagementRecvAction;
+
+typedef enum NvmeZoneManagementRecvActionSpecificField {
+ NVME_CMD_ZONE_MGMT_RECV_LIST_ALL = 0x0,
+ NVME_CMD_ZONE_MGMT_RECV_LIST_ZSE = 0x1,
+ NVME_CMD_ZONE_MGMT_RECV_LIST_ZSIO = 0x2,
+ NVME_CMD_ZONE_MGMT_RECV_LIST_ZSEO = 0x3,
+ NVME_CMD_ZONE_MGMT_RECV_LIST_ZSC = 0x4,
+ NVME_CMD_ZONE_MGMT_RECV_LIST_ZSF = 0x5,
+ NVME_CMD_ZONE_MGMT_RECV_LIST_ZSRO = 0x6,
+ NVME_CMD_ZONE_MGMT_RECV_LIST_ZSO = 0x7,
+} NvmeZoneManagementRecvActionSpecificField;
+
+#define NVME_CMD_ZONE_MGMT_RECEIVE_PARTIAL 0x1
+
+typedef struct QEMU_PACKED NvmeZoneReportHeader {
+ uint64_t num_zones;
+ uint8_t rsvd[56];
+} NvmeZoneReportHeader;
+
typedef struct QEMU_PACKED NvmeDsmCmd {
uint8_t opcode;
uint8_t flags;
@@ -810,6 +849,12 @@ typedef struct QEMU_PACKED NvmeZoneDescriptor {
uint8_t rsvd32[32];
} NvmeZoneDescriptor;
+#define NVME_ZA_ZDEV (1 << 7)
+
+#define NVME_ZA_SET(za, attrs) ((za) |= (attrs))
+#define NVME_ZA_CLEAR(za, attrs) ((za) &= ~(attrs))
+#define NVME_ZA_CLEAR_ALL(za) ((za) = 0x0)
+
enum NvmeSmartWarn {
NVME_SMART_SPARE = 1 << 0,
NVME_SMART_TEMPERATURE = 1 << 1,
@@ -1161,6 +1206,7 @@ static inline void _nvme_check_size(void)
QEMU_BUILD_BUG_ON(sizeof(NvmeIdentify) != 64);
QEMU_BUILD_BUG_ON(sizeof(NvmeRwCmd) != 64);
QEMU_BUILD_BUG_ON(sizeof(NvmeDsmCmd) != 64);
+ QEMU_BUILD_BUG_ON(sizeof(NvmeZoneManagementRecvCmd) != 64);
QEMU_BUILD_BUG_ON(sizeof(NvmeRangeType) != 64);
QEMU_BUILD_BUG_ON(sizeof(NvmeErrorLog) != 64);
QEMU_BUILD_BUG_ON(sizeof(NvmeFwSlotInfoLog) != 512);
@@ -59,6 +59,9 @@ static void nvme_ns_zns_init_zones(NvmeNamespace *ns)
zone = &ns->zns.zones[i];
zone->zd = &ns->zns.zd[i];
+ if (ns->params.zns.zdes) {
+ zone->zde = &ns->zns.zde[i];
+ }
zone->wp_staging = zslba;
zd = zone->zd;
@@ -79,11 +82,15 @@ static void nvme_ns_init_zoned(NvmeNamespace *ns)
id_ns_zns->lbafe[i].zsze = ns->params.zns.zsze ?
cpu_to_le64(ns->params.zns.zsze) :
cpu_to_le64(pow2ceil(ns->params.zns.zcap));
+ id_ns_zns->lbafe[i].zdes = ns->params.zns.zdes;
}
ns->zns.num_zones = nvme_ns_nlbas(ns) / nvme_ns_zsze(ns);
ns->zns.zones = g_malloc0_n(ns->zns.num_zones, sizeof(NvmeZone));
ns->zns.zd = g_malloc0_n(ns->zns.num_zones, sizeof(NvmeZoneDescriptor));
+ if (ns->params.zns.zdes) {
+ ns->zns.zde = g_malloc0_n(ns->zns.num_zones, nvme_ns_zdes_bytes(ns));
+ }
id_ns_zns->mar = 0xffffffff;
id_ns_zns->mor = 0xffffffff;
@@ -126,6 +133,7 @@ void nvme_ns_zns_init_zone_state(NvmeNamespace *ns)
for (int i = 0; i < ns->zns.num_zones; i++) {
NvmeZone *zone = &ns->zns.zones[i];
zone->zd = &ns->zns.zd[i];
+ zone->zde = &ns->zns.zde[i];
zone->wp_staging = nvme_wp(zone);
@@ -137,7 +145,8 @@ void nvme_ns_zns_init_zone_state(NvmeNamespace *ns)
break;
case NVME_ZS_ZSC:
- if (nvme_wp(zone) == nvme_zslba(zone)) {
+ if (nvme_wp(zone) == nvme_zslba(zone) &&
+ !(zone->zd->za & NVME_ZA_ZDEV)) {
nvme_zs_set(zone, NVME_ZS_ZSE);
}
@@ -262,6 +271,7 @@ static Property nvme_ns_props[] = {
DEFINE_PROP_UUID("uuid", NvmeNamespace, params.uuid),
DEFINE_PROP_UINT64("zns.zcap", NvmeNamespace, params.zns.zcap, 0),
DEFINE_PROP_UINT64("zns.zsze", NvmeNamespace, params.zns.zsze, 0),
+ DEFINE_PROP_UINT8("zns.zdes", NvmeNamespace, params.zns.zdes, 0),
DEFINE_PROP_END_OF_LIST(),
};
@@ -162,6 +162,7 @@ static const NvmeEffectsLog nvme_effects[NVME_IOCS_MAX] = {
.iocs = {
NVME_EFFECTS_NVM_INITIALIZER,
+ [NVME_CMD_ZONE_MGMT_RECV] = NVME_EFFECTS_CSUPP,
},
},
};
@@ -1083,6 +1084,7 @@ static uint16_t nvme_zrm_transition(NvmeNamespace *ns, NvmeZone *zone,
NvmeZoneState to)
{
NvmeZoneState from = nvme_zs(zone);
+ NvmeZoneDescriptor *zd = zone->zd;
trace_pci_nvme_zrm_transition(ns->params.nsid, nvme_zslba(zone), from, to);
@@ -1099,6 +1101,10 @@ static uint16_t nvme_zrm_transition(NvmeNamespace *ns, NvmeZone *zone,
switch (to) {
case NVME_ZS_ZSE:
case NVME_ZS_ZSO:
+ NVME_ZA_CLEAR_ALL(zd->za);
+
+ /* fallthrough */
+
case NVME_ZS_ZSEO:
case NVME_ZS_ZSF:
case NVME_ZS_ZSRO:
@@ -1115,6 +1121,10 @@ static uint16_t nvme_zrm_transition(NvmeNamespace *ns, NvmeZone *zone,
switch (to) {
case NVME_ZS_ZSE:
case NVME_ZS_ZSO:
+ NVME_ZA_CLEAR_ALL(zd->za);
+
+ /* fallthrough */
+
case NVME_ZS_ZSF:
case NVME_ZS_ZSRO:
case NVME_ZS_ZSIO:
@@ -1130,6 +1140,7 @@ static uint16_t nvme_zrm_transition(NvmeNamespace *ns, NvmeZone *zone,
case NVME_ZS_ZSRO:
switch (to) {
case NVME_ZS_ZSO:
+ NVME_ZA_CLEAR_ALL(zd->za);
break;
default:
@@ -1142,6 +1153,10 @@ static uint16_t nvme_zrm_transition(NvmeNamespace *ns, NvmeZone *zone,
switch (to) {
case NVME_ZS_ZSE:
case NVME_ZS_ZSO:
+ NVME_ZA_CLEAR_ALL(zd->za);
+
+ /* fallthrough */
+
case NVME_ZS_ZSRO:
break;
@@ -1239,6 +1254,135 @@ static void nvme_rw_cb(void *opaque, int ret)
nvme_enqueue_req_completion(nvme_cq(req), req);
}
+static uint16_t nvme_zone_mgmt_recv(NvmeCtrl *n, NvmeRequest *req)
+{
+ NvmeZoneManagementRecvCmd *recv;
+ NvmeZoneManagementRecvAction zra;
+ NvmeZoneManagementRecvActionSpecificField zrasp;
+ NvmeNamespace *ns = req->ns;
+ NvmeZone *zone;
+
+ uint8_t *buf, *bufp, zs_list;
+ uint64_t slba;
+ int num_zones = 0, zidx = 0, zidx_begin;
+ uint16_t zes, status;
+ size_t len;
+
+ recv = (NvmeZoneManagementRecvCmd *) &req->cmd;
+
+ zra = recv->zra;
+ zrasp = recv->zrasp;
+ slba = le64_to_cpu(recv->slba);
+ len = (le32_to_cpu(recv->numdw) + 1) << 2;
+
+ if (!nvme_ns_zoned(ns)) {
+ return NVME_INVALID_OPCODE | NVME_DNR;
+ }
+
+ trace_pci_nvme_zone_mgmt_recv(nvme_cid(req), nvme_nsid(ns), slba, len,
+ zra, zrasp, recv->zrasf);
+
+ if (!len) {
+ return NVME_SUCCESS;
+ }
+
+ switch (zrasp) {
+ case NVME_CMD_ZONE_MGMT_RECV_LIST_ALL:
+ zs_list = 0;
+ break;
+
+ case NVME_CMD_ZONE_MGMT_RECV_LIST_ZSE:
+ zs_list = NVME_ZS_ZSE;
+ break;
+
+ case NVME_CMD_ZONE_MGMT_RECV_LIST_ZSIO:
+ zs_list = NVME_ZS_ZSIO;
+ break;
+
+ case NVME_CMD_ZONE_MGMT_RECV_LIST_ZSEO:
+ zs_list = NVME_ZS_ZSEO;
+ break;
+
+ case NVME_CMD_ZONE_MGMT_RECV_LIST_ZSC:
+ zs_list = NVME_ZS_ZSC;
+ break;
+
+ case NVME_CMD_ZONE_MGMT_RECV_LIST_ZSF:
+ zs_list = NVME_ZS_ZSF;
+ break;
+
+ case NVME_CMD_ZONE_MGMT_RECV_LIST_ZSRO:
+ zs_list = NVME_ZS_ZSRO;
+ break;
+
+ case NVME_CMD_ZONE_MGMT_RECV_LIST_ZSO:
+ zs_list = NVME_ZS_ZSO;
+ break;
+ default:
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ status = nvme_check_mdts(n, len);
+ if (status) {
+ return status;
+ }
+
+ if (!nvme_ns_zone(ns, slba)) {
+ trace_pci_nvme_err_invalid_zone(nvme_cid(req), slba);
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ zidx_begin = zidx = slba / nvme_ns_zsze(ns);
+ zes = sizeof(NvmeZoneDescriptor);
+ if (zra == NVME_CMD_ZONE_MGMT_RECV_EXTENDED_REPORT_ZONES) {
+ zes += nvme_ns_zdes_bytes(ns);
+ }
+
+ buf = bufp = g_malloc0(len);
+ bufp += sizeof(NvmeZoneReportHeader);
+
+ while ((bufp + zes) - buf <= len && zidx < ns->zns.num_zones) {
+ zone = &ns->zns.zones[zidx++];
+
+ if (zs_list && zs_list != nvme_zs(zone)) {
+ continue;
+ }
+
+ num_zones++;
+
+ memcpy(bufp, zone->zd, sizeof(NvmeZoneDescriptor));
+
+ if (zra == NVME_CMD_ZONE_MGMT_RECV_EXTENDED_REPORT_ZONES) {
+ memcpy(bufp + sizeof(NvmeZoneDescriptor), zone->zde,
+ nvme_ns_zdes_bytes(ns));
+ }
+
+ bufp += zes;
+ }
+
+ if (!(recv->zrasf & NVME_CMD_ZONE_MGMT_RECEIVE_PARTIAL)) {
+ if (!zs_list) {
+ num_zones = ns->zns.num_zones - zidx_begin;
+ } else {
+ num_zones = 0;
+ for (int i = zidx_begin; i < ns->zns.num_zones; i++) {
+ zone = &ns->zns.zones[i];
+
+ if (zs_list == nvme_zs(zone)) {
+ num_zones++;
+ }
+ }
+ }
+ }
+
+ stq_le_p(buf, (uint64_t)num_zones);
+
+ status = nvme_dma(n, buf, len, DMA_DIRECTION_FROM_DEVICE, req);
+ g_free(buf);
+
+ return status;
+}
+
static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req)
{
block_acct_start(blk_get_stats(req->ns->blkconf.blk), &req->acct, 0,
@@ -1427,6 +1571,8 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
case NVME_CMD_WRITE:
case NVME_CMD_READ:
return nvme_rw(n, req);
+ case NVME_CMD_ZONE_MGMT_RECV:
+ return nvme_zone_mgmt_recv(n, req);
default:
trace_pci_nvme_err_invalid_opc(req->cmd.opcode);
return NVME_INVALID_OPCODE | NVME_DNR;
@@ -43,6 +43,7 @@ pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode, const char *opna
pci_nvme_rw(uint16_t cid, const char *verb, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" opname '%s' nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64""
pci_nvme_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
pci_nvme_write_zeroes(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba %"PRIu64" nlb %"PRIu32""
+pci_nvme_zone_mgmt_recv(uint16_t cid, uint32_t nsid, uint64_t slba, uint64_t len, uint8_t zra, uint8_t zrasp, uint8_t zrasf) "cid %"PRIu16" nsid %"PRIu32" slba 0x%"PRIx64" len %"PRIu64" zra 0x%"PRIx8" zrasp 0x%"PRIx8" zrasf 0x%"PRIx8""
pci_nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16""
pci_nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d"
pci_nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16""