@@ -37,9 +37,10 @@ typedef struct NvmePstateHeader {
struct {
uint64_t zcap;
uint64_t zsze;
+ uint8_t zdes;
} QEMU_PACKED zns;
- uint8_t rsvd3088[1008];
+ uint8_t rsvd3089[1007];
} QEMU_PACKED NvmePstateHeader;
typedef struct NvmeNamespaceParams {
@@ -50,11 +51,13 @@ typedef struct NvmeNamespaceParams {
struct {
uint64_t zcap;
uint64_t zsze;
+ uint8_t zdes;
} zns;
} NvmeNamespaceParams;
typedef struct NvmeZone {
NvmeZoneDescriptor *zd;
+ uint8_t *zde;
uint64_t wp_staging;
} NvmeZone;
@@ -91,6 +94,7 @@ typedef struct NvmeNamespace {
NvmeZone *zones;
NvmeZoneDescriptor *zd;
+ uint8_t *zde;
} zns;
} NvmeNamespace;
@@ -183,6 +187,11 @@ static inline void nvme_zs_set(NvmeZone *zone, NvmeZoneState zs)
zone->zd->zs = zs << 4;
}
+static inline size_t nvme_ns_zdes_bytes(NvmeNamespace *ns)
+{
+ return ns->params.zns.zdes << 6;
+}
+
static inline bool nvme_ns_zone_wp_valid(NvmeZone *zone)
{
switch (nvme_zs(zone)) {
@@ -71,6 +71,7 @@ static inline const char *nvme_io_opc_str(uint8_t opc)
case NVME_CMD_WRITE: return "NVME_NVM_CMD_WRITE";
case NVME_CMD_READ: return "NVME_NVM_CMD_READ";
case NVME_CMD_WRITE_ZEROES: return "NVME_NVM_CMD_WRITE_ZEROES";
+ case NVME_CMD_ZONE_MGMT_RECV: return "NVME_ZONED_CMD_ZONE_MGMT_RECV";
default: return "NVME_NVM_CMD_UNKNOWN";
}
}
@@ -481,6 +481,7 @@ enum NvmeIoCommands {
NVME_CMD_COMPARE = 0x05,
NVME_CMD_WRITE_ZEROES = 0x08,
NVME_CMD_DSM = 0x09,
+ NVME_CMD_ZONE_MGMT_RECV = 0x7a,
};
typedef struct QEMU_PACKED NvmeDeleteQ {
@@ -593,6 +594,44 @@ enum {
NVME_RW_PRINFO_PRCHK_REF = 1 << 10,
};
+typedef struct QEMU_PACKED NvmeZoneManagementRecvCmd {
+ uint8_t opcode;
+ uint8_t flags;
+ uint16_t cid;
+ uint32_t nsid;
+ uint8_t rsvd8[16];
+ NvmeCmdDptr dptr;
+ uint64_t slba;
+ uint32_t numdw;
+ uint8_t zra;
+ uint8_t zrasp;
+ uint8_t zrasf;
+ uint8_t rsvd55[9];
+} NvmeZoneManagementRecvCmd;
+
+typedef enum NvmeZoneManagementRecvAction {
+ NVME_CMD_ZONE_MGMT_RECV_REPORT_ZONES = 0x0,
+ NVME_CMD_ZONE_MGMT_RECV_EXTENDED_REPORT_ZONES = 0x1,
+} NvmeZoneManagementRecvAction;
+
+typedef enum NvmeZoneManagementRecvActionSpecificField {
+ NVME_CMD_ZONE_MGMT_RECV_LIST_ALL = 0x0,
+ NVME_CMD_ZONE_MGMT_RECV_LIST_ZSE = 0x1,
+ NVME_CMD_ZONE_MGMT_RECV_LIST_ZSIO = 0x2,
+ NVME_CMD_ZONE_MGMT_RECV_LIST_ZSEO = 0x3,
+ NVME_CMD_ZONE_MGMT_RECV_LIST_ZSC = 0x4,
+ NVME_CMD_ZONE_MGMT_RECV_LIST_ZSF = 0x5,
+ NVME_CMD_ZONE_MGMT_RECV_LIST_ZSRO = 0x6,
+ NVME_CMD_ZONE_MGMT_RECV_LIST_ZSO = 0x7,
+} NvmeZoneManagementRecvActionSpecificField;
+
+#define NVME_CMD_ZONE_MGMT_RECEIVE_PARTIAL 0x1
+
+typedef struct QEMU_PACKED NvmeZoneReportHeader {
+ uint64_t num_zones;
+ uint8_t rsvd[56];
+} NvmeZoneReportHeader;
+
typedef struct QEMU_PACKED NvmeDsmCmd {
uint8_t opcode;
uint8_t flags;
@@ -812,6 +851,12 @@ typedef struct QEMU_PACKED NvmeZoneDescriptor {
uint8_t rsvd32[32];
} NvmeZoneDescriptor;
+#define NVME_ZA_ZDEV (1 << 7)
+
+#define NVME_ZA_SET(za, attrs) ((za) |= (attrs))
+#define NVME_ZA_CLEAR(za, attrs) ((za) &= ~(attrs))
+#define NVME_ZA_CLEAR_ALL(za) ((za) = 0x0)
+
enum NvmeSmartWarn {
NVME_SMART_SPARE = 1 << 0,
NVME_SMART_TEMPERATURE = 1 << 1,
@@ -1162,6 +1207,7 @@ static inline void _nvme_check_size(void)
QEMU_BUILD_BUG_ON(sizeof(NvmeIdentify) != 64);
QEMU_BUILD_BUG_ON(sizeof(NvmeRwCmd) != 64);
QEMU_BUILD_BUG_ON(sizeof(NvmeDsmCmd) != 64);
+ QEMU_BUILD_BUG_ON(sizeof(NvmeZoneManagementRecvCmd) != 64);
QEMU_BUILD_BUG_ON(sizeof(NvmeRangeType) != 64);
QEMU_BUILD_BUG_ON(sizeof(NvmeErrorLog) != 64);
QEMU_BUILD_BUG_ON(sizeof(NvmeFwSlotInfoLog) != 512);
@@ -86,6 +86,9 @@ static void nvme_ns_zns_init_zones(NvmeNamespace *ns)
zone = &ns->zns.zones[i];
zone->zd = &ns->zns.zd[i];
+ if (ns->params.zns.zdes) {
+ zone->zde = &ns->zns.zde[i];
+ }
zone->wp_staging = zslba;
zd = zone->zd;
@@ -106,11 +109,15 @@ static void nvme_ns_init_zoned(NvmeNamespace *ns)
id_ns_zns->lbafe[i].zsze = ns->params.zns.zsze ?
cpu_to_le64(ns->params.zns.zsze) :
cpu_to_le64(pow2ceil(ns->params.zns.zcap));
+ id_ns_zns->lbafe[i].zdes = ns->params.zns.zdes;
}
ns->zns.num_zones = nvme_ns_nlbas(ns) / nvme_ns_zsze(ns);
ns->zns.zones = g_malloc0_n(ns->zns.num_zones, sizeof(NvmeZone));
ns->zns.zd = g_malloc0_n(ns->zns.num_zones, sizeof(NvmeZoneDescriptor));
+ if (ns->params.zns.zdes) {
+ ns->zns.zde = g_malloc0_n(ns->zns.num_zones, nvme_ns_zdes_bytes(ns));
+ }
id_ns_zns->mar = 0xffffffff;
id_ns_zns->mor = 0xffffffff;
@@ -148,7 +155,7 @@ static int nvme_ns_pstate_init(NvmeNamespace *ns, Error **errp)
BlockBackend *blk = ns->pstate.blk;
NvmePstateHeader header;
uint64_t nlbas = nvme_ns_nlbas(ns);
- size_t bitmap_len, pstate_len, zd_len = 0;
+ size_t bitmap_len, pstate_len, zd_len = 0, zde_len = 0;
int ret;
ret = nvme_blk_truncate(blk, sizeof(NvmePstateHeader), errp);
@@ -170,6 +177,7 @@ static int nvme_ns_pstate_init(NvmeNamespace *ns, Error **errp)
header.zns.zsze = ns->params.zns.zsze ?
cpu_to_le64(ns->params.zns.zsze) :
cpu_to_le64(pow2ceil(ns->params.zns.zcap));
+ header.zns.zdes = ns->params.zns.zdes;
}
ret = blk_pwrite(blk, 0, &header, sizeof(header), 0);
@@ -181,9 +189,11 @@ static int nvme_ns_pstate_init(NvmeNamespace *ns, Error **errp)
bitmap_len = DIV_ROUND_UP(nlbas, sizeof(unsigned long));
if (nvme_ns_zoned(ns)) {
zd_len = ns->zns.num_zones * sizeof(NvmeZoneDescriptor);
+ zde_len = nvme_ns_zoned(ns) ?
+ ns->zns.num_zones * nvme_ns_zdes_bytes(ns) : 0;
}
- pstate_len = ROUND_UP(sizeof(NvmePstateHeader) + bitmap_len + zd_len,
- BDRV_SECTOR_SIZE);
+ pstate_len = ROUND_UP(sizeof(NvmePstateHeader) + bitmap_len + zd_len +
+ zde_len, BDRV_SECTOR_SIZE);
ret = nvme_blk_truncate(blk, pstate_len, errp);
if (ret < 0) {
@@ -213,6 +223,7 @@ void nvme_ns_zns_init_zone_state(NvmeNamespace *ns)
for (int i = 0; i < ns->zns.num_zones; i++) {
NvmeZone *zone = &ns->zns.zones[i];
zone->zd = &ns->zns.zd[i];
+ zone->zde = &ns->zns.zde[i];
zone->wp_staging = nvme_wp(zone);
@@ -224,7 +235,8 @@ void nvme_ns_zns_init_zone_state(NvmeNamespace *ns)
continue;
case NVME_ZS_ZSC:
- if (nvme_wp(zone) == nvme_zslba(zone)) {
+ if (nvme_wp(zone) == nvme_zslba(zone) &&
+ !(zone->zd->za & NVME_ZA_ZDEV)) {
nvme_zs_set(zone, NVME_ZS_ZSE);
}
@@ -243,7 +255,7 @@ static int nvme_ns_pstate_load(NvmeNamespace *ns, size_t len, Error **errp)
BlockBackend *blk = ns->pstate.blk;
NvmePstateHeader header;
uint64_t nlbas = nvme_ns_nlbas(ns);
- size_t bitmap_len, pstate_len, zd_len = 0;
+ size_t bitmap_len, pstate_len, zd_len = 0, zde_len = 0;
unsigned long *map;
int ret;
@@ -294,12 +306,21 @@ static int nvme_ns_pstate_load(NvmeNamespace *ns, size_t len, Error **errp)
return -1;
}
+ if (header.zns.zdes != ns->params.zns.zdes) {
+ error_setg(errp, "zns.zdes parameter inconsistent with pstate "
+ "(pstate %u; parameter %u)",
+ header.zns.zdes, ns->params.zns.zdes);
+ return -1;
+ }
+
bitmap_len = DIV_ROUND_UP(nlbas, sizeof(unsigned long));
if (nvme_ns_zoned(ns)) {
zd_len = ns->zns.num_zones * sizeof(NvmeZoneDescriptor);
+ zde_len = nvme_ns_zoned(ns) ?
+ ns->zns.num_zones * nvme_ns_zdes_bytes(ns) : 0;
}
- pstate_len = ROUND_UP(sizeof(NvmePstateHeader) + bitmap_len + zd_len,
- BDRV_SECTOR_SIZE);
+ pstate_len = ROUND_UP(sizeof(NvmePstateHeader) + bitmap_len + zd_len +
+ zde_len, BDRV_SECTOR_SIZE);
if (len != pstate_len) {
error_setg(errp, "pstate size mismatch "
@@ -335,10 +356,19 @@ static int nvme_ns_pstate_load(NvmeNamespace *ns, size_t len, Error **errp)
return ret;
}
+ if (zde_len) {
+ ret = blk_pread(blk, ns->pstate.zns.offset + zd_len, ns->zns.zde,
+ zde_len);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "could not read zone descriptor "
+ "extensions from pstate");
+ return ret;
+ }
+ }
+
nvme_ns_zns_init_zone_state(ns);
- ret = blk_pwrite(blk, ns->pstate.utilization.offset + bitmap_len,
- ns->zns.zd, zd_len, 0);
+ ret = blk_pwrite(blk, ns->pstate.zns.offset, ns->zns.zd, zd_len, 0);
if (ret < 0) {
error_setg_errno(errp, -ret,
"could not write zone descriptors to pstate");
@@ -516,6 +546,7 @@ static Property nvme_ns_props[] = {
DEFINE_PROP_UINT8("iocs", NvmeNamespace, params.iocs, NVME_IOCS_NVM),
DEFINE_PROP_UINT64("zns.zcap", NvmeNamespace, params.zns.zcap, 0),
DEFINE_PROP_UINT64("zns.zsze", NvmeNamespace, params.zns.zsze, 0),
+ DEFINE_PROP_UINT8("zns.zdes", NvmeNamespace, params.zns.zdes, 0),
DEFINE_PROP_END_OF_LIST(),
};
@@ -163,6 +163,7 @@ static const NvmeEffectsLog nvme_effects[NVME_IOCS_MAX] = {
.iocs = {
NVME_EFFECTS_NVM_INITIALIZER,
+ [NVME_CMD_ZONE_MGMT_RECV] = NVME_EFFECTS_CSUPP,
},
},
};
@@ -1218,6 +1219,9 @@ static void nvme_rw_cb(void *opaque, int ret)
NVME_ZS_ZSRO : NVME_ZS_ZSO;
nvme_zs_set(zone, zs);
+ if (zs == NVME_ZS_ZSO) {
+ NVME_ZA_CLEAR_ALL(zone->zd->za);
+ }
if (nvme_zns_commit_zone(ns, zone) < 0) {
req->status = NVME_INTERNAL_DEV_ERROR;
@@ -1286,6 +1290,135 @@ static uint16_t nvme_do_aio(BlockBackend *blk, int64_t offset, size_t len,
return NVME_NO_COMPLETE;
}
+static uint16_t nvme_zone_mgmt_recv(NvmeCtrl *n, NvmeRequest *req)
+{
+ NvmeZoneManagementRecvCmd *recv;
+ NvmeZoneManagementRecvAction zra;
+ NvmeZoneManagementRecvActionSpecificField zrasp;
+ NvmeNamespace *ns = req->ns;
+ NvmeZone *zone;
+
+ uint8_t *buf, *bufp, zs_list;
+ uint64_t slba;
+ int num_zones = 0, zidx = 0, zidx_begin;
+ uint16_t zes, status;
+ size_t len;
+
+ recv = (NvmeZoneManagementRecvCmd *) &req->cmd;
+
+ zra = recv->zra;
+ zrasp = recv->zrasp;
+ slba = le64_to_cpu(recv->slba);
+ len = (le32_to_cpu(recv->numdw) + 1) << 2;
+
+ if (!nvme_ns_zoned(ns)) {
+ return NVME_INVALID_OPCODE | NVME_DNR;
+ }
+
+ trace_pci_nvme_zone_mgmt_recv(nvme_cid(req), nvme_nsid(ns), slba, len,
+ zra, zrasp, recv->zrasf);
+
+ if (!len) {
+ return NVME_SUCCESS;
+ }
+
+ switch (zrasp) {
+ case NVME_CMD_ZONE_MGMT_RECV_LIST_ALL:
+ zs_list = 0;
+ break;
+
+ case NVME_CMD_ZONE_MGMT_RECV_LIST_ZSE:
+ zs_list = NVME_ZS_ZSE;
+ break;
+
+ case NVME_CMD_ZONE_MGMT_RECV_LIST_ZSIO:
+ zs_list = NVME_ZS_ZSIO;
+ break;
+
+ case NVME_CMD_ZONE_MGMT_RECV_LIST_ZSEO:
+ zs_list = NVME_ZS_ZSEO;
+ break;
+
+ case NVME_CMD_ZONE_MGMT_RECV_LIST_ZSC:
+ zs_list = NVME_ZS_ZSC;
+ break;
+
+ case NVME_CMD_ZONE_MGMT_RECV_LIST_ZSF:
+ zs_list = NVME_ZS_ZSF;
+ break;
+
+ case NVME_CMD_ZONE_MGMT_RECV_LIST_ZSRO:
+ zs_list = NVME_ZS_ZSRO;
+ break;
+
+ case NVME_CMD_ZONE_MGMT_RECV_LIST_ZSO:
+ zs_list = NVME_ZS_ZSO;
+ break;
+ default:
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ status = nvme_check_mdts(n, len);
+ if (status) {
+ return status;
+ }
+
+ if (!nvme_ns_get_zone(ns, slba)) {
+ trace_pci_nvme_err_invalid_zone(nvme_cid(req), slba);
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ zidx_begin = zidx = nvme_ns_zone_idx(ns, slba);
+ zes = sizeof(NvmeZoneDescriptor);
+ if (zra == NVME_CMD_ZONE_MGMT_RECV_EXTENDED_REPORT_ZONES) {
+ zes += nvme_ns_zdes_bytes(ns);
+ }
+
+ buf = bufp = g_malloc0(len);
+ bufp += sizeof(NvmeZoneReportHeader);
+
+ while ((bufp + zes) - buf <= len && zidx < ns->zns.num_zones) {
+ zone = &ns->zns.zones[zidx++];
+
+ if (zs_list && zs_list != nvme_zs(zone)) {
+ continue;
+ }
+
+ num_zones++;
+
+ memcpy(bufp, zone->zd, sizeof(NvmeZoneDescriptor));
+
+ if (zra == NVME_CMD_ZONE_MGMT_RECV_EXTENDED_REPORT_ZONES) {
+ memcpy(bufp + sizeof(NvmeZoneDescriptor), zone->zde,
+ nvme_ns_zdes_bytes(ns));
+ }
+
+ bufp += zes;
+ }
+
+ if (!(recv->zrasf & NVME_CMD_ZONE_MGMT_RECEIVE_PARTIAL)) {
+ if (!zs_list) {
+ num_zones = ns->zns.num_zones - zidx_begin;
+ } else {
+ num_zones = 0;
+ for (int i = zidx_begin; i < ns->zns.num_zones; i++) {
+ zone = &ns->zns.zones[i];
+
+ if (zs_list == nvme_zs(zone)) {
+ num_zones++;
+ }
+ }
+ }
+ }
+
+ stq_le_p(buf, (uint64_t)num_zones);
+
+ status = nvme_dma(n, buf, len, DMA_DIRECTION_FROM_DEVICE, req);
+ g_free(buf);
+
+ return status;
+}
+
static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req)
{
NvmeNamespace *ns = req->ns;
@@ -1425,6 +1558,8 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
case NVME_CMD_WRITE:
case NVME_CMD_READ:
return nvme_rwz(n, req);
+ case NVME_CMD_ZONE_MGMT_RECV:
+ return nvme_zone_mgmt_recv(n, req);
default:
trace_pci_nvme_err_invalid_opc(req->cmd.opcode);
return NVME_INVALID_OPCODE | NVME_DNR;
@@ -42,6 +42,7 @@ pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode, cons
pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'"
pci_nvme_rwz(uint16_t cid, const char *verb, uint32_t nsid, uint32_t nlb, uint64_t len, uint64_t lba) "cid %"PRIu16" opname '%s' nsid %"PRIu32" nlb %"PRIu32" len %"PRIu64" lba 0x%"PRIx64""
pci_nvme_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
+pci_nvme_zone_mgmt_recv(uint16_t cid, uint32_t nsid, uint64_t slba, uint64_t len, uint8_t zra, uint8_t zrasp, uint8_t zrasf) "cid %"PRIu16" nsid %"PRIu32" slba 0x%"PRIx64" len %"PRIu64" zra 0x%"PRIx8" zrasp 0x%"PRIx8" zrasf 0x%"PRIx8""
pci_nvme_allocate(uint32_t ns, uint64_t slba, uint32_t nlb) "nsid %"PRIu32" slba 0x%"PRIx64" nlb %"PRIu32""
pci_nvme_do_aio(uint16_t cid, uint8_t opc, const char *opname, const char *blkname, int64_t offset, size_t len) "cid %"PRIu16" opc 0x%"PRIx8" opname '%s' blk '%s' offset %"PRId64" len %zu"
pci_nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16""