@@ -53,6 +53,7 @@
#include "qemu/osdep.h"
#include "qemu/units.h"
#include "qemu/error-report.h"
+#include "crypto/random.h"
#include "hw/block/block.h"
#include "hw/pci/msix.h"
#include "hw/pci/pci.h"
@@ -125,6 +126,98 @@ static uint16_t nvme_sqid(NvmeRequest *req)
return le16_to_cpu(req->sq->sqid);
}
+/*
+ * Add a zone to the tail of a zone list.
+ */
+static void nvme_add_zone_tail(NvmeCtrl *n, NvmeNamespace *ns, NvmeZoneList *zl,
+ NvmeZone *zone)
+{
+ uint32_t idx = (uint32_t)(zone - ns->zone_array);
+
+ assert(nvme_zone_not_in_list(zone));
+
+ if (!zl->size) {
+ zl->head = zl->tail = idx;
+ zone->next = zone->prev = NVME_ZONE_LIST_NIL;
+ } else {
+ ns->zone_array[zl->tail].next = idx;
+ zone->prev = zl->tail;
+ zone->next = NVME_ZONE_LIST_NIL;
+ zl->tail = idx;
+ }
+ zl->size++;
+}
+
+/*
+ * Remove a zone from a zone list. The zone must be linked in the list.
+ */
+static void nvme_remove_zone(NvmeCtrl *n, NvmeNamespace *ns, NvmeZoneList *zl,
+ NvmeZone *zone)
+{
+ uint32_t idx = (uint32_t)(zone - ns->zone_array);
+
+ assert(!nvme_zone_not_in_list(zone));
+
+ --zl->size;
+ if (zl->size == 0) {
+ zl->head = NVME_ZONE_LIST_NIL;
+ zl->tail = NVME_ZONE_LIST_NIL;
+ } else if (idx == zl->head) {
+ zl->head = zone->next;
+ ns->zone_array[zl->head].prev = NVME_ZONE_LIST_NIL;
+ } else if (idx == zl->tail) {
+ zl->tail = zone->prev;
+ ns->zone_array[zl->tail].next = NVME_ZONE_LIST_NIL;
+ } else {
+ ns->zone_array[zone->next].prev = zone->prev;
+ ns->zone_array[zone->prev].next = zone->next;
+ }
+
+ zone->prev = zone->next = 0;
+}
+
+static void nvme_assign_zone_state(NvmeCtrl *n, NvmeNamespace *ns,
+ NvmeZone *zone, uint8_t state)
+{
+ if (!nvme_zone_not_in_list(zone)) {
+ switch (nvme_get_zone_state(zone)) {
+ case NVME_ZONE_STATE_EXPLICITLY_OPEN:
+ nvme_remove_zone(n, ns, ns->exp_open_zones, zone);
+ break;
+ case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+ nvme_remove_zone(n, ns, ns->imp_open_zones, zone);
+ break;
+ case NVME_ZONE_STATE_CLOSED:
+ nvme_remove_zone(n, ns, ns->closed_zones, zone);
+ break;
+ case NVME_ZONE_STATE_FULL:
+ nvme_remove_zone(n, ns, ns->full_zones, zone);
+ }
+ }
+
+ nvme_set_zone_state(zone, state);
+
+ switch (state) {
+ case NVME_ZONE_STATE_EXPLICITLY_OPEN:
+ nvme_add_zone_tail(n, ns, ns->exp_open_zones, zone);
+ break;
+ case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+ nvme_add_zone_tail(n, ns, ns->imp_open_zones, zone);
+ break;
+ case NVME_ZONE_STATE_CLOSED:
+ nvme_add_zone_tail(n, ns, ns->closed_zones, zone);
+ break;
+ case NVME_ZONE_STATE_FULL:
+ nvme_add_zone_tail(n, ns, ns->full_zones, zone);
+ break;
+ default:
+ zone->d.za = 0;
+ /* fall through */
+ case NVME_ZONE_STATE_READ_ONLY:
+ zone->tstamp = 0;
+ }
+}
+
static bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr)
{
hwaddr low = n->ctrl_mem.addr;
@@ -483,6 +576,33 @@ static void nvme_post_cqes(void *opaque)
}
}
+static void nvme_fill_data(QEMUSGList *qsg, QEMUIOVector *iov,
+ uint64_t offset, uint8_t pattern)
+{
+ ScatterGatherEntry *entry;
+ uint32_t len, ent_len;
+
+ if (qsg->nsg > 0) {
+ entry = qsg->sg;
+ for (len = qsg->size; len > 0; len -= ent_len) {
+ ent_len = MIN(len, entry->len);
+ if (offset > ent_len) {
+ offset -= ent_len;
+ } else if (offset != 0) {
+ dma_memory_set(qsg->as, entry->base + offset,
+ pattern, ent_len - offset);
+ offset = 0;
+ } else {
+ dma_memory_set(qsg->as, entry->base, pattern, ent_len);
+ }
+ entry++;
+ }
+ } else if (iov->iov) {
+ qemu_iovec_memset(iov, offset, pattern,
+ iov_size(iov->iov, iov->niov) - offset);
+ }
+}
+
static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req)
{
assert(cq->cqid == req->sq->cqid);
@@ -595,6 +715,138 @@ static inline uint16_t nvme_check_bounds(NvmeCtrl *n, NvmeNamespace *ns,
return NVME_SUCCESS;
}
+static uint16_t nvme_check_zone_write(NvmeZone *zone, uint64_t slba,
+ uint32_t nlb)
+{
+ uint16_t status;
+
+ if (unlikely((slba + nlb) > nvme_zone_wr_boundary(zone))) {
+ return NVME_ZONE_BOUNDARY_ERROR;
+ }
+
+ switch (nvme_get_zone_state(zone)) {
+ case NVME_ZONE_STATE_EMPTY:
+ case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+ case NVME_ZONE_STATE_EXPLICITLY_OPEN:
+ case NVME_ZONE_STATE_CLOSED:
+ status = NVME_SUCCESS;
+ break;
+ case NVME_ZONE_STATE_FULL:
+ status = NVME_ZONE_FULL;
+ break;
+ case NVME_ZONE_STATE_OFFLINE:
+ status = NVME_ZONE_OFFLINE;
+ break;
+ case NVME_ZONE_STATE_READ_ONLY:
+ status = NVME_ZONE_READ_ONLY;
+ break;
+ default:
+ assert(false);
+ }
+ return status;
+}
+
+static uint16_t nvme_check_zone_read(NvmeCtrl *n, NvmeZone *zone, uint64_t slba,
+ uint32_t nlb, bool zone_x_ok)
+{
+ uint64_t lba = slba, count;
+ uint16_t status;
+ uint8_t zs;
+
+ do {
+ if (!zone_x_ok && (lba + nlb > nvme_zone_rd_boundary(n, zone))) {
+ return NVME_ZONE_BOUNDARY_ERROR | NVME_DNR;
+ }
+
+ zs = nvme_get_zone_state(zone);
+ switch (zs) {
+ case NVME_ZONE_STATE_EMPTY:
+ case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+ case NVME_ZONE_STATE_EXPLICITLY_OPEN:
+ case NVME_ZONE_STATE_FULL:
+ case NVME_ZONE_STATE_CLOSED:
+ case NVME_ZONE_STATE_READ_ONLY:
+ status = NVME_SUCCESS;
+ break;
+ case NVME_ZONE_STATE_OFFLINE:
+ status = NVME_ZONE_OFFLINE | NVME_DNR;
+ break;
+ default:
+ assert(false);
+ }
+ if (status != NVME_SUCCESS) {
+ break;
+ }
+
+ if (lba + nlb > nvme_zone_rd_boundary(n, zone)) {
+ count = nvme_zone_rd_boundary(n, zone) - lba;
+ } else {
+ count = nlb;
+ }
+
+ lba += count;
+ nlb -= count;
+ zone++;
+ } while (nlb);
+
+ return status;
+}
+
+static inline uint32_t nvme_zone_idx(NvmeCtrl *n, uint64_t slba)
+{
+ return n->zone_size_log2 > 0 ? slba >> n->zone_size_log2 :
+ slba / n->zone_size;
+}
+
+static void nvme_finalize_zone_write(NvmeCtrl *n, NvmeRequest *req)
+{
+ NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+ NvmeNamespace *ns;
+ NvmeZone *zone;
+ uint64_t slba;
+ uint32_t nlb, zone_idx;
+ uint8_t zs;
+
+ if (rw->opcode != NVME_CMD_WRITE &&
+ rw->opcode != NVME_CMD_ZONE_APND &&
+ rw->opcode != NVME_CMD_WRITE_ZEROES) {
+ return;
+ }
+
+ slba = le64_to_cpu(rw->slba);
+ nlb = le16_to_cpu(rw->nlb) + 1;
+ zone_idx = nvme_zone_idx(n, slba);
+ assert(zone_idx < n->num_zones);
+ ns = req->ns;
+ zone = &ns->zone_array[zone_idx];
+
+ zone->d.wp += nlb;
+
+ zs = nvme_get_zone_state(zone);
+ if (zone->d.wp == nvme_zone_wr_boundary(zone)) {
+ switch (zs) {
+ case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+ case NVME_ZONE_STATE_EXPLICITLY_OPEN:
+ case NVME_ZONE_STATE_CLOSED:
+ case NVME_ZONE_STATE_EMPTY:
+ break;
+ default:
+ assert(false);
+ }
+ nvme_assign_zone_state(n, ns, zone, NVME_ZONE_STATE_FULL);
+ } else {
+ switch (zs) {
+ case NVME_ZONE_STATE_EMPTY:
+ case NVME_ZONE_STATE_CLOSED:
+ nvme_assign_zone_state(n, ns, zone,
+ NVME_ZONE_STATE_IMPLICITLY_OPEN);
+ }
+ }
+
+ req->cqe.result64 = zone->d.wp;
+ return;
+}
+
static void nvme_rw_cb(void *opaque, int ret)
{
NvmeRequest *req = opaque;
@@ -605,6 +857,13 @@ static void nvme_rw_cb(void *opaque, int ret)
trace_pci_nvme_rw_cb(nvme_cid(req));
if (!ret) {
+ if (n->params.zoned) {
+ if (req->fill_ofs >= 0) {
+ nvme_fill_data(&req->qsg, &req->iov, req->fill_ofs,
+ n->params.fill_pattern);
+ }
+ nvme_finalize_zone_write(n, req);
+ }
block_acct_done(blk_get_stats(n->conf.blk), &req->acct);
req->status = NVME_SUCCESS;
} else {
@@ -628,12 +887,14 @@ static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeRequest *req)
{
NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
NvmeNamespace *ns = req->ns;
+ NvmeZone *zone = NULL;
const uint8_t lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
const uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds;
uint64_t slba = le64_to_cpu(rw->slba);
uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
uint64_t offset = slba << data_shift;
uint32_t count = nlb << data_shift;
+ uint32_t zone_idx;
uint16_t status;
trace_pci_nvme_write_zeroes(nvme_cid(req), slba, nlb);
@@ -644,25 +905,47 @@ static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeRequest *req)
return status;
}
+ if (n->params.zoned) {
+ zone_idx = nvme_zone_idx(n, slba);
+ assert(zone_idx < n->num_zones);
+ zone = &ns->zone_array[zone_idx];
+
+ status = nvme_check_zone_write(zone, slba, nlb);
+ if (status != NVME_SUCCESS) {
+ trace_pci_nvme_err_zone_write_not_ok(slba, nlb, status);
+ return status | NVME_DNR;
+ }
+
+ assert(nvme_wp_is_valid(zone));
+ if (unlikely(slba != zone->d.wp)) {
+ trace_pci_nvme_err_write_not_at_wp(slba, zone->d.zslba,
+ zone->d.wp);
+ return NVME_ZONE_INVALID_WRITE | NVME_DNR;
+ }
+ }
+
block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0,
BLOCK_ACCT_WRITE);
req->aiocb = blk_aio_pwrite_zeroes(n->conf.blk, offset, count,
BDRV_REQ_MAY_UNMAP, nvme_rw_cb, req);
+
return NVME_NO_COMPLETE;
}
-static uint16_t nvme_rw(NvmeCtrl *n, NvmeRequest *req)
+static uint16_t nvme_rw(NvmeCtrl *n, NvmeRequest *req, bool append)
{
NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
NvmeNamespace *ns = req->ns;
+ NvmeZone *zone = NULL;
uint32_t nlb = le32_to_cpu(rw->nlb) + 1;
uint64_t slba = le64_to_cpu(rw->slba);
uint8_t lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds;
uint64_t data_size = (uint64_t)nlb << data_shift;
- uint64_t data_offset = slba << data_shift;
- int is_write = rw->opcode == NVME_CMD_WRITE ? 1 : 0;
+ uint64_t data_offset;
+ uint32_t zone_idx = 0;
+ bool is_write = rw->opcode == NVME_CMD_WRITE || append;
enum BlockAcctType acct = is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ;
uint16_t status;
@@ -682,11 +965,77 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeRequest *req)
return status;
}
+ if (n->params.zoned) {
+ zone_idx = nvme_zone_idx(n, slba);
+ assert(zone_idx < n->num_zones);
+ zone = &ns->zone_array[zone_idx];
+
+ if (is_write) {
+ status = nvme_check_zone_write(zone, slba, nlb);
+ if (status != NVME_SUCCESS) {
+ trace_pci_nvme_err_zone_write_not_ok(slba, nlb, status);
+ return status | NVME_DNR;
+ }
+
+ assert(nvme_wp_is_valid(zone));
+ if (append) {
+ if (unlikely(slba != zone->d.zslba)) {
+ trace_pci_nvme_err_append_not_at_start(slba, zone->d.zslba);
+ return NVME_ZONE_INVALID_WRITE | NVME_DNR;
+ }
+ if (data_size > (n->page_size << n->zasl)) {
+ trace_pci_nvme_err_append_too_large(slba, nlb, n->zasl);
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+ slba = zone->d.wp;
+ } else if (unlikely(slba != zone->d.wp)) {
+ trace_pci_nvme_err_write_not_at_wp(slba, zone->d.zslba,
+ zone->d.wp);
+ return NVME_ZONE_INVALID_WRITE | NVME_DNR;
+ }
+ req->fill_ofs = -1LL;
+ } else {
+ status = nvme_check_zone_read(n, zone, slba, nlb,
+ n->params.cross_zone_read);
+ if (status != NVME_SUCCESS) {
+ trace_pci_nvme_err_zone_read_not_ok(slba, nlb, status);
+ return status | NVME_DNR;
+ }
+
+ if (slba + nlb > zone->d.wp) {
+ /*
+ * All or some data is read above the WP. Need to
+ * fill out the buffer area that has no backing data
+ * with a predefined data pattern (zeros by default)
+ */
+ if (slba >= zone->d.wp) {
+ req->fill_ofs = 0;
+ } else {
+ req->fill_ofs = ((zone->d.wp - slba) << data_shift);
+ }
+ } else {
+ req->fill_ofs = -1LL;
+ }
+ }
+ } else if (append) {
+ trace_pci_nvme_err_invalid_opc(rw->opcode);
+ return NVME_INVALID_OPCODE | NVME_DNR;
+ }
+
if (nvme_map_dptr(n, data_size, req)) {
block_acct_invalid(blk_get_stats(n->conf.blk), acct);
return NVME_INVALID_FIELD | NVME_DNR;
}
+ if (unlikely(n->params.zoned && req->fill_ofs == 0)) {
+ /* No backend I/O necessary, only need to fill the buffer */
+ nvme_fill_data(&req->qsg, &req->iov, 0, n->params.fill_pattern);
+ req->status = NVME_SUCCESS;
+ return NVME_SUCCESS;
+ }
+
+ data_offset = slba << data_shift;
+
if (req->qsg.nsg > 0) {
block_acct_start(blk_get_stats(n->conf.blk), &req->acct, req->qsg.size,
acct);
@@ -708,6 +1057,380 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeRequest *req)
return NVME_NO_COMPLETE;
}
+static uint16_t nvme_get_mgmt_zone_slba_idx(NvmeCtrl *n, NvmeNamespace *ns,
+ NvmeCmd *c, uint64_t *slba,
+ uint32_t *zone_idx)
+{
+ uint32_t dw10 = le32_to_cpu(c->cdw10);
+ uint32_t dw11 = le32_to_cpu(c->cdw11);
+
+ if (!n->params.zoned) {
+ trace_pci_nvme_err_invalid_opc(c->opcode);
+ return NVME_INVALID_OPCODE | NVME_DNR;
+ }
+
+ *slba = ((uint64_t)dw11) << 32 | dw10;
+ if (unlikely(*slba >= ns->id_ns.nsze)) {
+ trace_pci_nvme_err_invalid_lba_range(*slba, 0, ns->id_ns.nsze);
+ *slba = 0;
+ return NVME_LBA_RANGE | NVME_DNR;
+ }
+
+ *zone_idx = nvme_zone_idx(n, *slba);
+ assert(*zone_idx < n->num_zones);
+
+ return NVME_SUCCESS;
+}
+
+static uint16_t nvme_open_zone(NvmeCtrl *n, NvmeNamespace *ns,
+ NvmeZone *zone, uint8_t state)
+{
+ switch (state) {
+ case NVME_ZONE_STATE_EMPTY:
+ case NVME_ZONE_STATE_CLOSED:
+ case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+ nvme_assign_zone_state(n, ns, zone, NVME_ZONE_STATE_EXPLICITLY_OPEN);
+ /* fall through */
+ case NVME_ZONE_STATE_EXPLICITLY_OPEN:
+ return NVME_SUCCESS;
+ }
+
+ return NVME_ZONE_INVAL_TRANSITION;
+}
+
+static bool nvme_cond_open_all(uint8_t state)
+{
+ return state == NVME_ZONE_STATE_CLOSED;
+}
+
+static uint16_t nvme_close_zone(NvmeCtrl *n, NvmeNamespace *ns,
+ NvmeZone *zone, uint8_t state)
+{
+ switch (state) {
+ case NVME_ZONE_STATE_EXPLICITLY_OPEN:
+ case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+ nvme_assign_zone_state(n, ns, zone, NVME_ZONE_STATE_CLOSED);
+ /* fall through */
+ case NVME_ZONE_STATE_CLOSED:
+ return NVME_SUCCESS;
+ }
+
+ return NVME_ZONE_INVAL_TRANSITION;
+}
+
+static bool nvme_cond_close_all(uint8_t state)
+{
+ return state == NVME_ZONE_STATE_IMPLICITLY_OPEN ||
+ state == NVME_ZONE_STATE_EXPLICITLY_OPEN;
+}
+
+static uint16_t nvme_finish_zone(NvmeCtrl *n, NvmeNamespace *ns,
+ NvmeZone *zone, uint8_t state)
+{
+ switch (state) {
+ case NVME_ZONE_STATE_EXPLICITLY_OPEN:
+ case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+ case NVME_ZONE_STATE_CLOSED:
+ case NVME_ZONE_STATE_EMPTY:
+ zone->d.wp = nvme_zone_wr_boundary(zone);
+ nvme_assign_zone_state(n, ns, zone, NVME_ZONE_STATE_FULL);
+ /* fall through */
+ case NVME_ZONE_STATE_FULL:
+ return NVME_SUCCESS;
+ }
+
+ return NVME_ZONE_INVAL_TRANSITION;
+}
+
+static bool nvme_cond_finish_all(uint8_t state)
+{
+ return state == NVME_ZONE_STATE_IMPLICITLY_OPEN ||
+ state == NVME_ZONE_STATE_EXPLICITLY_OPEN ||
+ state == NVME_ZONE_STATE_CLOSED;
+}
+
+static uint16_t nvme_reset_zone(NvmeCtrl *n, NvmeNamespace *ns,
+ NvmeZone *zone, uint8_t state)
+{
+ switch (state) {
+ case NVME_ZONE_STATE_EXPLICITLY_OPEN:
+ case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+ case NVME_ZONE_STATE_CLOSED:
+ case NVME_ZONE_STATE_FULL:
+ zone->d.wp = zone->d.zslba;
+ nvme_assign_zone_state(n, ns, zone, NVME_ZONE_STATE_EMPTY);
+ /* fall through */
+ case NVME_ZONE_STATE_EMPTY:
+ return NVME_SUCCESS;
+ }
+
+ return NVME_ZONE_INVAL_TRANSITION;
+}
+
+static bool nvme_cond_reset_all(uint8_t state)
+{
+ return state == NVME_ZONE_STATE_IMPLICITLY_OPEN ||
+ state == NVME_ZONE_STATE_EXPLICITLY_OPEN ||
+ state == NVME_ZONE_STATE_CLOSED ||
+ state == NVME_ZONE_STATE_FULL;
+}
+
+static uint16_t nvme_offline_zone(NvmeCtrl *n, NvmeNamespace *ns,
+ NvmeZone *zone, uint8_t state)
+{
+ switch (state) {
+ case NVME_ZONE_STATE_READ_ONLY:
+ nvme_assign_zone_state(n, ns, zone, NVME_ZONE_STATE_OFFLINE);
+ /* fall through */
+ case NVME_ZONE_STATE_OFFLINE:
+ return NVME_SUCCESS;
+ }
+
+ return NVME_ZONE_INVAL_TRANSITION;
+}
+
+static bool nvme_cond_offline_all(uint8_t state)
+{
+ return state == NVME_ZONE_STATE_READ_ONLY;
+}
+
+typedef uint16_t (*op_handler_t)(NvmeCtrl *, NvmeNamespace *, NvmeZone *,
+ uint8_t);
+typedef bool (*need_to_proc_zone_t)(uint8_t);
+
+static uint16_t name_do_zone_op(NvmeCtrl *n, NvmeNamespace *ns,
+ NvmeZone *zone, uint8_t state, bool all,
+ op_handler_t op_hndlr,
+ need_to_proc_zone_t proc_zone)
+{
+ int i;
+ uint16_t status = 0;
+
+ if (!all) {
+ status = op_hndlr(n, ns, zone, state);
+ } else {
+ for (i = 0; i < n->num_zones; i++, zone++) {
+ state = nvme_get_zone_state(zone);
+ if (proc_zone(state)) {
+ status = op_hndlr(n, ns, zone, state);
+ if (status != NVME_SUCCESS) {
+ break;
+ }
+ }
+ }
+ }
+
+ return status;
+}
+
+static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req)
+{
+ NvmeCmd *cmd = (NvmeCmd *)&req->cmd;
+ NvmeNamespace *ns = req->ns;
+ uint32_t dw13 = le32_to_cpu(cmd->cdw13);
+ uint64_t slba = 0;
+ uint32_t zone_idx = 0;
+ uint16_t status;
+ uint8_t action, state;
+ bool all;
+ NvmeZone *zone;
+
+ action = dw13 & 0xff;
+ all = dw13 & 0x100;
+
+ req->status = NVME_SUCCESS;
+
+ if (!all) {
+ status = nvme_get_mgmt_zone_slba_idx(n, ns, cmd, &slba, &zone_idx);
+ if (status) {
+ return status;
+ }
+ }
+
+ zone = &ns->zone_array[zone_idx];
+ if (slba != zone->d.zslba) {
+ trace_pci_nvme_err_unaligned_zone_cmd(action, slba, zone->d.zslba);
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+ state = nvme_get_zone_state(zone);
+
+ switch (action) {
+
+ case NVME_ZONE_ACTION_OPEN:
+ trace_pci_nvme_open_zone(slba, zone_idx, all);
+ status = name_do_zone_op(n, ns, zone, state, all,
+ nvme_open_zone, nvme_cond_open_all);
+ break;
+
+ case NVME_ZONE_ACTION_CLOSE:
+ trace_pci_nvme_close_zone(slba, zone_idx, all);
+ status = name_do_zone_op(n, ns, zone, state, all,
+ nvme_close_zone, nvme_cond_close_all);
+ break;
+
+ case NVME_ZONE_ACTION_FINISH:
+ trace_pci_nvme_finish_zone(slba, zone_idx, all);
+ status = name_do_zone_op(n, ns, zone, state, all,
+ nvme_finish_zone, nvme_cond_finish_all);
+ break;
+
+ case NVME_ZONE_ACTION_RESET:
+ trace_pci_nvme_reset_zone(slba, zone_idx, all);
+ status = name_do_zone_op(n, ns, zone, state, all,
+ nvme_reset_zone, nvme_cond_reset_all);
+ break;
+
+ case NVME_ZONE_ACTION_OFFLINE:
+ trace_pci_nvme_offline_zone(slba, zone_idx, all);
+ status = name_do_zone_op(n, ns, zone, state, all,
+ nvme_offline_zone, nvme_cond_offline_all);
+ break;
+
+ case NVME_ZONE_ACTION_SET_ZD_EXT:
+ trace_pci_nvme_set_descriptor_extension(slba, zone_idx);
+ return NVME_INVALID_FIELD | NVME_DNR;
+ break;
+
+ default:
+ trace_pci_nvme_err_invalid_mgmt_action(action);
+ status = NVME_INVALID_FIELD;
+ }
+
+ if (status == NVME_ZONE_INVAL_TRANSITION) {
+ trace_pci_nvme_err_invalid_zone_state_transition(state, action, slba,
+ zone->d.za);
+ }
+ if (status) {
+ status |= NVME_DNR;
+ }
+
+ return status;
+}
+
+static bool nvme_zone_matches_filter(uint32_t zafs, NvmeZone *zl)
+{
+ int zs = nvme_get_zone_state(zl);
+
+ switch (zafs) {
+ case NVME_ZONE_REPORT_ALL:
+ return true;
+ case NVME_ZONE_REPORT_EMPTY:
+ return (zs == NVME_ZONE_STATE_EMPTY);
+ case NVME_ZONE_REPORT_IMPLICITLY_OPEN:
+ return (zs == NVME_ZONE_STATE_IMPLICITLY_OPEN);
+ case NVME_ZONE_REPORT_EXPLICITLY_OPEN:
+ return (zs == NVME_ZONE_STATE_EXPLICITLY_OPEN);
+ case NVME_ZONE_REPORT_CLOSED:
+ return (zs == NVME_ZONE_STATE_CLOSED);
+ case NVME_ZONE_REPORT_FULL:
+ return (zs == NVME_ZONE_STATE_FULL);
+ case NVME_ZONE_REPORT_READ_ONLY:
+ return (zs == NVME_ZONE_STATE_READ_ONLY);
+ case NVME_ZONE_REPORT_OFFLINE:
+ return (zs == NVME_ZONE_STATE_OFFLINE);
+ default:
+ return false;
+ }
+}
+
+static uint16_t nvme_zone_mgmt_recv(NvmeCtrl *n, NvmeRequest *req)
+{
+ NvmeCmd *cmd = (NvmeCmd *)&req->cmd;
+ NvmeNamespace *ns = req->ns;
+ uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
+ uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
+ /* cdw12 is zero-based number of dwords to return. Convert to bytes */
+ uint32_t len = (le32_to_cpu(cmd->cdw12) + 1) << 2;
+ uint32_t dw13 = le32_to_cpu(cmd->cdw13);
+ uint32_t zone_idx, zra, zrasf, partial;
+ uint64_t max_zones, nr_zones = 0;
+ uint16_t ret;
+ uint64_t slba;
+ NvmeZoneDescr *z;
+ NvmeZone *zs;
+ NvmeZoneReportHeader *header;
+ void *buf, *buf_p;
+ size_t zone_entry_sz;
+
+ req->status = NVME_SUCCESS;
+
+ ret = nvme_get_mgmt_zone_slba_idx(n, ns, cmd, &slba, &zone_idx);
+ if (ret) {
+ return ret;
+ }
+
+ if (len < sizeof(NvmeZoneReportHeader)) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ zra = dw13 & 0xff;
+ if (!(zra == NVME_ZONE_REPORT || zra == NVME_ZONE_REPORT_EXTENDED)) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ if (zra == NVME_ZONE_REPORT_EXTENDED) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ zrasf = (dw13 >> 8) & 0xff;
+ if (zrasf > NVME_ZONE_REPORT_OFFLINE) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ partial = (dw13 >> 16) & 0x01;
+
+ zone_entry_sz = sizeof(NvmeZoneDescr);
+
+ max_zones = (len - sizeof(NvmeZoneReportHeader)) / zone_entry_sz;
+ buf = g_malloc0(len);
+
+ header = (NvmeZoneReportHeader *)buf;
+ buf_p = buf + sizeof(NvmeZoneReportHeader);
+
+ while (zone_idx < n->num_zones && nr_zones < max_zones) {
+ zs = &ns->zone_array[zone_idx];
+
+ if (!nvme_zone_matches_filter(zrasf, zs)) {
+ zone_idx++;
+ continue;
+ }
+
+ z = (NvmeZoneDescr *)buf_p;
+ buf_p += sizeof(NvmeZoneDescr);
+ nr_zones++;
+
+ z->zt = zs->d.zt;
+ z->zs = zs->d.zs;
+ z->zcap = cpu_to_le64(zs->d.zcap);
+ z->zslba = cpu_to_le64(zs->d.zslba);
+ z->za = zs->d.za;
+
+ if (nvme_wp_is_valid(zs)) {
+ z->wp = cpu_to_le64(zs->d.wp);
+ } else {
+ z->wp = cpu_to_le64(~0ULL);
+ }
+
+ zone_idx++;
+ }
+
+ if (!partial) {
+ for (; zone_idx < n->num_zones; zone_idx++) {
+ zs = &ns->zone_array[zone_idx];
+ if (nvme_zone_matches_filter(zrasf, zs)) {
+ nr_zones++;
+ }
+ }
+ }
+ header->nr_zones = cpu_to_le64(nr_zones);
+
+ ret = nvme_dma_prp(n, (uint8_t *)buf, len, prp1, prp2,
+ DMA_DIRECTION_FROM_DEVICE, req);
+ g_free(buf);
+
+ return ret;
+}
+
static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
{
uint32_t nsid = le32_to_cpu(req->cmd.nsid);
@@ -726,9 +1449,15 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
return nvme_flush(n, req);
case NVME_CMD_WRITE_ZEROES:
return nvme_write_zeroes(n, req);
+ case NVME_CMD_ZONE_APND:
+ return nvme_rw(n, req, true);
case NVME_CMD_WRITE:
case NVME_CMD_READ:
- return nvme_rw(n, req);
+ return nvme_rw(n, req, false);
+ case NVME_CMD_ZONE_MGMT_SEND:
+ return nvme_zone_mgmt_send(n, req);
+ case NVME_CMD_ZONE_MGMT_RECV:
+ return nvme_zone_mgmt_recv(n, req);
default:
trace_pci_nvme_err_invalid_opc(req->cmd.opcode);
return NVME_INVALID_OPCODE | NVME_DNR;
@@ -957,7 +1686,7 @@ static uint16_t nvme_error_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
DMA_DIRECTION_FROM_DEVICE, req);
}
-static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint32_t buf_len,
+static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len,
uint64_t off, NvmeRequest *req)
{
NvmeCmd *cmd = &req->cmd;
@@ -985,11 +1714,19 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint32_t buf_len,
acs[NVME_ADM_CMD_GET_LOG_PAGE] = NVME_CMD_EFFECTS_CSUPP;
acs[NVME_ADM_CMD_ASYNC_EV_REQ] = NVME_CMD_EFFECTS_CSUPP;
- iocs[NVME_CMD_FLUSH] = NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC;
- iocs[NVME_CMD_WRITE_ZEROES] = NVME_CMD_EFFECTS_CSUPP |
- NVME_CMD_EFFECTS_LBCC;
- iocs[NVME_CMD_WRITE] = NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC;
- iocs[NVME_CMD_READ] = NVME_CMD_EFFECTS_CSUPP;
+ if (NVME_CC_CSS(n->bar.cc) != CSS_ADMIN_ONLY) {
+ iocs[NVME_CMD_FLUSH] = NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC;
+ iocs[NVME_CMD_WRITE_ZEROES] = NVME_CMD_EFFECTS_CSUPP |
+ NVME_CMD_EFFECTS_LBCC;
+ iocs[NVME_CMD_WRITE] = NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC;
+ iocs[NVME_CMD_READ] = NVME_CMD_EFFECTS_CSUPP;
+ }
+ if (csi == NVME_CSI_ZONED && NVME_CC_CSS(n->bar.cc) == CSS_CSI) {
+ iocs[NVME_CMD_ZONE_APND] = NVME_CMD_EFFECTS_CSUPP |
+ NVME_CMD_EFFECTS_LBCC;
+ iocs[NVME_CMD_ZONE_MGMT_SEND] = NVME_CMD_EFFECTS_CSUPP;
+ iocs[NVME_CMD_ZONE_MGMT_RECV] = NVME_CMD_EFFECTS_CSUPP;
+ }
trans_len = MIN(sizeof(cmd_eff_log) - off, buf_len);
@@ -1008,6 +1745,7 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req)
uint8_t lid = dw10 & 0xff;
uint8_t lsp = (dw10 >> 8) & 0xf;
uint8_t rae = (dw10 >> 15) & 0x1;
+ uint8_t csi = le32_to_cpu(cmd->cdw14) >> 24;
uint32_t numdl, numdu;
uint64_t off, lpol, lpou;
size_t len;
@@ -1041,7 +1779,7 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req)
case NVME_LOG_FW_SLOT_INFO:
return nvme_fw_log_info(n, len, off, req);
case NVME_LOG_CMD_EFFECTS:
- return nvme_cmd_effects(n, len, off, req);
+ return nvme_cmd_effects(n, csi, len, off, req);
default:
trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid);
return NVME_INVALID_FIELD | NVME_DNR;
@@ -1166,6 +1904,16 @@ static uint16_t nvme_rpt_empty_id_struct(NvmeCtrl *n, uint64_t prp1,
return NVME_SUCCESS;
}
+static inline bool nvme_csi_has_nvm_support(NvmeNamespace *ns)
+{
+ switch (ns->csi) {
+ case NVME_CSI_NVM:
+ case NVME_CSI_ZONED:
+ return true;
+ }
+ return false;
+}
+
static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeRequest *req)
{
NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
@@ -1181,13 +1929,22 @@ static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeRequest *req)
static uint16_t nvme_identify_ctrl_csi(NvmeCtrl *n, NvmeRequest *req)
{
NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
+ NvmeIdCtrlZoned *id;
uint64_t prp1 = le64_to_cpu(c->prp1);
uint64_t prp2 = le64_to_cpu(c->prp2);
+ uint16_t ret;
trace_pci_nvme_identify_ctrl_csi(c->csi);
if (c->csi == NVME_CSI_NVM) {
return nvme_rpt_empty_id_struct(n, prp1, prp2, req);
+ } else if (c->csi == NVME_CSI_ZONED && n->params.zoned) {
+ id = g_malloc0(sizeof(*id));
+ id->zasl = n->zasl;
+ ret = nvme_dma_prp(n, (uint8_t *)id, sizeof(*id), prp1, prp2,
+ DMA_DIRECTION_FROM_DEVICE, req);
+ g_free(id);
+ return ret;
}
return NVME_INVALID_FIELD | NVME_DNR;
@@ -1216,8 +1973,12 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req,
return nvme_rpt_empty_id_struct(n, prp1, prp2, req);
}
- return nvme_dma_prp(n, (uint8_t *)&ns->id_ns, sizeof(ns->id_ns), prp1,
- prp2, DMA_DIRECTION_FROM_DEVICE, req);
+ if (c->csi == NVME_CSI_NVM && nvme_csi_has_nvm_support(ns)) {
+ return nvme_dma_prp(n, (uint8_t *)&ns->id_ns, sizeof(ns->id_ns), prp1,
+ prp2, DMA_DIRECTION_FROM_DEVICE, req);
+ }
+
+ return NVME_INVALID_CMD_SET | NVME_DNR;
}
static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req,
@@ -1243,8 +2004,12 @@ static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req,
return nvme_rpt_empty_id_struct(n, prp1, prp2, req);
}
- if (c->csi == NVME_CSI_NVM) {
+ if (c->csi == NVME_CSI_NVM && nvme_csi_has_nvm_support(ns)) {
return nvme_rpt_empty_id_struct(n, prp1, prp2, req);
+ } else if (c->csi == NVME_CSI_ZONED && ns->csi == NVME_CSI_ZONED) {
+ return nvme_dma_prp(n, (uint8_t *)ns->id_ns_zoned,
+ sizeof(*ns->id_ns_zoned), prp1, prp2,
+ DMA_DIRECTION_FROM_DEVICE, req);
}
return NVME_INVALID_FIELD | NVME_DNR;
@@ -1304,7 +2069,7 @@ static uint16_t nvme_identify_nslist_csi(NvmeCtrl *n, NvmeRequest *req,
trace_pci_nvme_identify_nslist_csi(min_nsid, c->csi);
- if (c->csi != NVME_CSI_NVM) {
+ if (c->csi != NVME_CSI_NVM && c->csi != NVME_CSI_ZONED) {
return NVME_INVALID_FIELD | NVME_DNR;
}
@@ -1368,7 +2133,7 @@ static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req)
desc->nidt = NVME_NIDT_CSI;
desc->nidl = NVME_NIDL_CSI;
buf_ptr += sizeof(*desc);
- *(uint8_t *)buf_ptr = NVME_CSI_NVM;
+ *(uint8_t *)buf_ptr = ns->csi;
status = nvme_dma_prp(n, buf, data_len, prp1, prp2,
DMA_DIRECTION_FROM_DEVICE, req);
@@ -1391,6 +2156,9 @@ static uint16_t nvme_identify_cmd_set(NvmeCtrl *n, NvmeRequest *req)
list = g_malloc0(data_len);
ptr = (uint8_t *)list;
NVME_SET_CSI(*ptr, NVME_CSI_NVM);
+ if (n->params.zoned) {
+ NVME_SET_CSI(*ptr, NVME_CSI_ZONED);
+ }
status = nvme_dma_prp(n, (uint8_t *)list, data_len, prp1, prp2,
DMA_DIRECTION_FROM_DEVICE, req);
g_free(list);
@@ -1959,6 +2727,20 @@ static int nvme_start_ctrl(NvmeCtrl *n)
n->namespaces[i].attached = true;
}
break;
+ case NVME_CSI_ZONED:
+ if (NVME_CC_CSS(n->bar.cc) == CSS_CSI) {
+ n->namespaces[i].attached = true;
+ }
+ break;
+ }
+ }
+
+ if (n->params.zoned) {
+ if (!n->zasl_bs) {
+ assert(n->params.mdts);
+ n->zasl = n->params.mdts;
+ } else {
+ n->zasl = nvme_ilog2(n->zasl_bs / n->page_size);
}
}
@@ -2022,12 +2804,18 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
} else {
switch (NVME_CC_CSS(data)) {
case CSS_NVM_ONLY:
- trace_pci_nvme_css_nvm_cset_selected_by_host(data &
- 0xffffffff);
+ if (n->params.zoned) {
+ NVME_GUEST_ERR(pci_nvme_err_only_zoned_cmd_set_avail,
+ "only NVM+ZONED command set can be selected");
break;
+ }
+ trace_pci_nvme_css_nvm_cset_selected_by_host(data &
+ 0xffffffff);
+ break;
case CSS_CSI:
NVME_SET_CC_CSS(n->bar.cc, CSS_CSI);
- trace_pci_nvme_css_all_csets_sel_by_host(data & 0xffffffff);
+ trace_pci_nvme_css_all_csets_sel_by_host(data &
+ 0xffffffff);
break;
case CSS_ADMIN_ONLY:
break;
@@ -2359,6 +3147,126 @@ static const MemoryRegionOps nvme_cmb_ops = {
},
};
+static int nvme_init_zone_meta(NvmeCtrl *n, NvmeNamespace *ns,
+ uint64_t capacity)
+{
+ NvmeZone *zone;
+ uint64_t start = 0, zone_size = n->zone_size;
+ int i;
+
+ ns->zone_array = g_malloc0(n->zone_array_size);
+ ns->exp_open_zones = g_malloc0(sizeof(NvmeZoneList));
+ ns->imp_open_zones = g_malloc0(sizeof(NvmeZoneList));
+ ns->closed_zones = g_malloc0(sizeof(NvmeZoneList));
+ ns->full_zones = g_malloc0(sizeof(NvmeZoneList));
+ zone = ns->zone_array;
+
+ nvme_init_zone_list(ns->exp_open_zones);
+ nvme_init_zone_list(ns->imp_open_zones);
+ nvme_init_zone_list(ns->closed_zones);
+ nvme_init_zone_list(ns->full_zones);
+
+ for (i = 0; i < n->num_zones; i++, zone++) {
+ if (start + zone_size > capacity) {
+ zone_size = capacity - start;
+ }
+ zone->d.zt = NVME_ZONE_TYPE_SEQ_WRITE;
+ nvme_set_zone_state(zone, NVME_ZONE_STATE_EMPTY);
+ zone->d.za = 0;
+ zone->d.zcap = n->zone_capacity;
+ zone->d.zslba = start;
+ zone->d.wp = start;
+ zone->prev = 0;
+ zone->next = 0;
+ start += zone_size;
+ }
+
+ return 0;
+}
+
+static void nvme_zoned_init_ctrl(NvmeCtrl *n, Error **errp)
+{
+ uint64_t zone_size, zone_cap;
+ uint32_t nz;
+
+ if (n->params.zone_size_mb) {
+ zone_size = n->params.zone_size_mb;
+ } else {
+ zone_size = NVME_DEFAULT_ZONE_SIZE;
+ }
+ if (n->params.zone_capacity_mb) {
+ zone_cap = n->params.zone_capacity_mb;
+ } else {
+ zone_cap = zone_size;
+ }
+ n->zone_size = zone_size * MiB / n->conf.logical_block_size;
+ n->zone_capacity = zone_cap * MiB / n->conf.logical_block_size;
+ if (n->zone_capacity > n->zone_size) {
+ error_setg(errp, "zone capacity exceeds zone size");
+ return;
+ }
+
+ nz = DIV_ROUND_UP(n->ns_size / n->conf.logical_block_size, n->zone_size);
+ n->num_zones = nz;
+ n->zone_array_size = sizeof(NvmeZone) * nz;
+ n->zone_size_log2 = is_power_of_2(n->zone_size) ? nvme_ilog2(n->zone_size) :
+ 0;
+
+ if (!n->params.zasl_kb) {
+ n->zasl_bs = n->params.mdts ? 0 : NVME_DEFAULT_MAX_ZA_SIZE * KiB;
+ } else {
+ n->zasl_bs = n->params.zasl_kb * KiB;
+ }
+
+ return;
+}
+
+static int nvme_zoned_init_ns(NvmeCtrl *n, NvmeNamespace *ns, int lba_index,
+ Error **errp)
+{
+ int ret;
+
+ ret = nvme_init_zone_meta(n, ns, n->num_zones * n->zone_size);
+ if (ret) {
+ error_setg(errp, "could not init zone metadata");
+ return -1;
+ }
+
+ ns->id_ns_zoned = g_malloc0(sizeof(*ns->id_ns_zoned));
+
+ /* MAR/MOR are zeroes-based, 0xffffffff means no limit */
+ ns->id_ns_zoned->mar = 0xffffffff;
+ ns->id_ns_zoned->mor = 0xffffffff;
+ ns->id_ns_zoned->zoc = 0;
+ ns->id_ns_zoned->ozcs = n->params.cross_zone_read ? 0x01 : 0x00;
+
+ ns->id_ns_zoned->lbafe[lba_index].zsze = cpu_to_le64(n->zone_size);
+ ns->id_ns_zoned->lbafe[lba_index].zdes = 0;
+
+ if (n->params.fill_pattern == 0) {
+ ns->id_ns.dlfeat = 0x01;
+ } else if (n->params.fill_pattern == 0xff) {
+ ns->id_ns.dlfeat = 0x02;
+ }
+
+ return 0;
+}
+
+static void nvme_zoned_clear(NvmeCtrl *n)
+{
+ int i;
+
+ for (i = 0; i < n->num_namespaces; i++) {
+ NvmeNamespace *ns = &n->namespaces[i];
+ g_free(ns->id_ns_zoned);
+ g_free(ns->zone_array);
+ g_free(ns->exp_open_zones);
+ g_free(ns->imp_open_zones);
+ g_free(ns->closed_zones);
+ g_free(ns->full_zones);
+ }
+}
+
static void nvme_check_constraints(NvmeCtrl *n, Error **errp)
{
NvmeParams *params = &n->params;
@@ -2427,18 +3335,13 @@ static void nvme_init_state(NvmeCtrl *n)
static void nvme_init_blk(NvmeCtrl *n, Error **errp)
{
+ int64_t bs_size;
+
if (!blkconf_blocksizes(&n->conf, errp)) {
return;
}
blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk),
false, errp);
-}
-
-static void nvme_init_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
-{
- int64_t bs_size;
- NvmeIdNs *id_ns = &ns->id_ns;
- int lba_index;
bs_size = blk_getlength(n->conf.blk);
if (bs_size < 0) {
@@ -2447,6 +3350,12 @@ static void nvme_init_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
}
n->ns_size = bs_size;
+}
+
+static void nvme_init_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
+{
+ NvmeIdNs *id_ns = &ns->id_ns;
+ int lba_index;
ns->csi = NVME_CSI_NVM;
qemu_uuid_generate(&ns->uuid); /* TODO make UUIDs persistent */
@@ -2454,8 +3363,18 @@ static void nvme_init_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
id_ns->lbaf[lba_index].ds = nvme_ilog2(n->conf.logical_block_size);
id_ns->nsze = cpu_to_le64(nvme_ns_nlbas(n, ns));
+ if (n->params.zoned) {
+ ns->csi = NVME_CSI_ZONED;
+ id_ns->ncap = cpu_to_le64(n->zone_capacity * n->num_zones);
+ if (nvme_zoned_init_ns(n, ns, lba_index, errp) != 0) {
+ return;
+ }
+ } else {
+ ns->csi = NVME_CSI_NVM;
+ id_ns->ncap = id_ns->nsze;
+ }
+
/* no thin provisioning */
- id_ns->ncap = id_ns->nsze;
id_ns->nuse = id_ns->ncap;
}
@@ -2615,8 +3534,9 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
NVME_CAP_SET_CQR(n->bar.cap, 1);
NVME_CAP_SET_TO(n->bar.cap, 0xf);
/*
- * The device now always supports NS Types, but all commands
- * that support CSI field will only handle NVM Command Set.
+ * The device now always supports NS Types, even when "zoned" property
+ * is set to zero. If this is the case, all commands that support CSI
+ * field only handle NVM Command Set.
*/
NVME_CAP_SET_CSS(n->bar.cap, (CAP_CSS_NVM | CAP_CSS_CSI_SUPP));
NVME_CAP_SET_MPSMAX(n->bar.cap, 4);
@@ -2652,6 +3572,13 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
return;
}
+ if (n->params.zoned) {
+ nvme_zoned_init_ctrl(n, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ }
nvme_init_ctrl(n, pci_dev);
ns = n->namespaces;
@@ -2670,6 +3597,9 @@ static void nvme_exit(PCIDevice *pci_dev)
NvmeCtrl *n = NVME(pci_dev);
nvme_clear_ctrl(n);
+ if (n->params.zoned) {
+ nvme_zoned_clear(n);
+ }
g_free(n->namespaces);
g_free(n->cq);
g_free(n->sq);
@@ -2697,6 +3627,13 @@ static Property nvme_props[] = {
DEFINE_PROP_UINT8("aerl", NvmeCtrl, params.aerl, 3),
DEFINE_PROP_UINT32("aer_max_queued", NvmeCtrl, params.aer_max_queued, 64),
DEFINE_PROP_UINT8("mdts", NvmeCtrl, params.mdts, 7),
+ DEFINE_PROP_BOOL("zoned", NvmeCtrl, params.zoned, false),
+ DEFINE_PROP_UINT64("zone_size", NvmeCtrl, params.zone_size_mb,
+ NVME_DEFAULT_ZONE_SIZE),
+ DEFINE_PROP_UINT64("zone_capacity", NvmeCtrl, params.zone_capacity_mb, 0),
+ DEFINE_PROP_UINT32("zone_append_size_limit", NvmeCtrl, params.zasl_kb, 0),
+ DEFINE_PROP_BOOL("cross_zone_read", NvmeCtrl, params.cross_zone_read, true),
+ DEFINE_PROP_UINT8("fill_pattern", NvmeCtrl, params.fill_pattern, 0),
DEFINE_PROP_END_OF_LIST(),
};