@@ -30,7 +30,17 @@ typedef struct NvmeNamespace {
int64_t size;
NvmeIdNs id_ns;
+ struct {
+ struct {
+ unsigned long *map;
+ } utilization;
+ } state;
+
NvmeNamespaceParams params;
+
+ struct {
+ uint32_t err_rec;
+ } features;
} NvmeNamespace;
static inline uint32_t nvme_nsid(NvmeNamespace *ns)
@@ -686,6 +686,7 @@ enum NvmeStatusCodes {
NVME_E2E_REF_ERROR = 0x0284,
NVME_CMP_FAILURE = 0x0285,
NVME_ACCESS_DENIED = 0x0286,
+ NVME_DULB = 0x0287,
NVME_MORE = 0x2000,
NVME_DNR = 0x4000,
NVME_NO_COMPLETE = 0xffff,
@@ -902,6 +903,9 @@ enum NvmeIdCtrlLpa {
#define NVME_AEC_NS_ATTR(aec) ((aec >> 8) & 0x1)
#define NVME_AEC_FW_ACTIVATION(aec) ((aec >> 9) & 0x1)
+#define NVME_ERR_REC_TLER(err_rec) (err_rec & 0xffff)
+#define NVME_ERR_REC_DULBE(err_rec) (err_rec & 0x10000)
+
enum NvmeFeatureIds {
NVME_ARBITRATION = 0x1,
NVME_POWER_MANAGEMENT = 0x2,
@@ -1022,6 +1026,7 @@ enum NvmeNsIdentifierType {
#define NVME_ID_NS_NSFEAT_THIN(nsfeat) ((nsfeat & 0x1))
+#define NVME_ID_NS_NSFEAT_DULBE(nsfeat) ((nsfeat >> 2) & 0x1)
#define NVME_ID_NS_FLBAS_EXTENDED(flbas) ((flbas >> 4) & 0x1)
#define NVME_ID_NS_FLBAS_INDEX(flbas) ((flbas & 0xf))
#define NVME_ID_NS_MC_SEPARATE(mc) ((mc >> 1) & 0x1)
@@ -47,6 +47,9 @@ static void nvme_ns_init(NvmeNamespace *ns)
lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
ns->id_ns.lbaf[lba_index].ds = 31 - clz32(ns->blkconf.logical_block_size);
+
+ /* support DULBE */
+ id_ns->nsfeat |= 0x4;
}
static int nvme_ns_init_blk(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
@@ -86,6 +89,8 @@ static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp)
int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
{
+ uint64_t nlbas;
+
if (nvme_ns_check_constraints(ns, errp)) {
return -1;
}
@@ -95,6 +100,11 @@ int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
}
nvme_ns_init(ns);
+
+ nlbas = nvme_ns_nlbas(ns);
+ ns->state.utilization.map = bitmap_new(nlbas);
+ bitmap_fill(ns->state.utilization.map, nlbas);
+
if (nvme_register_namespace(n, ns, errp)) {
return -1;
}
@@ -105,6 +105,7 @@ static const bool nvme_feature_support[NVME_FID_MAX] = {
static const uint32_t nvme_feature_cap[NVME_FID_MAX] = {
[NVME_TEMPERATURE_THRESHOLD] = NVME_FEAT_CAP_CHANGE,
+ [NVME_ERROR_RECOVERY] = NVME_FEAT_CAP_CHANGE | NVME_FEAT_CAP_NS,
[NVME_VOLATILE_WRITE_CACHE] = NVME_FEAT_CAP_CHANGE,
[NVME_NUMBER_OF_QUEUES] = NVME_FEAT_CAP_CHANGE,
[NVME_ASYNCHRONOUS_EVENT_CONF] = NVME_FEAT_CAP_CHANGE,
@@ -888,6 +889,38 @@ static inline uint16_t nvme_check_bounds(NvmeCtrl *n, NvmeNamespace *ns,
return NVME_SUCCESS;
}
+static inline uint16_t nvme_check_dulbe(NvmeNamespace *ns, uint64_t slba,
+ uint32_t nlb)
+{
+ uint64_t elba = slba + nlb;
+
+ if (find_next_zero_bit(ns->state.utilization.map, elba, slba) < elba) {
+ return NVME_DULB;
+ }
+
+ return NVME_SUCCESS;
+}
+
+static inline void nvme_allocate(NvmeNamespace *ns, uint64_t slba,
+ uint32_t nlb)
+{
+ if (!nvme_check_dulbe(ns, slba, nlb)) {
+ return;
+ }
+
+ trace_pci_nvme_allocate(nvme_nsid(ns), slba, nlb);
+
+ bitmap_set(ns->state.utilization.map, slba, nlb);
+}
+
+static inline void nvme_deallocate(NvmeNamespace *ns, uint64_t slba,
+ uint32_t nlb)
+{
+ trace_pci_nvme_deallocate(nvme_nsid(ns), slba, nlb);
+
+ bitmap_clear(ns->state.utilization.map, slba, nlb);
+}
+
static void nvme_rw_cb(void *opaque, int ret)
{
NvmeRequest *req = opaque;
@@ -959,6 +992,8 @@ static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeRequest *req)
return status;
}
+ nvme_deallocate(ns, slba, nlb);
+
block_acct_start(blk_get_stats(req->ns->blkconf.blk), &req->acct, 0,
BLOCK_ACCT_WRITE);
req->aiocb = blk_aio_pwrite_zeroes(req->ns->blkconf.blk, offset, count,
@@ -995,11 +1030,24 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeRequest *req)
goto invalid;
}
+ if (acct == BLOCK_ACCT_READ) {
+ if (NVME_ERR_REC_DULBE(ns->features.err_rec)) {
+ status = nvme_check_dulbe(ns, slba, nlb);
+ if (status) {
+ goto invalid;
+ }
+ }
+ }
+
status = nvme_map_dptr(n, data_size, req);
if (status) {
goto invalid;
}
+ if (acct == BLOCK_ACCT_WRITE) {
+ nvme_allocate(ns, slba, nlb);
+ }
+
block_acct_start(blk_get_stats(blk), &req->acct, data_size, acct);
if (req->qsg.sg) {
if (acct == BLOCK_ACCT_WRITE) {
@@ -1645,6 +1693,7 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeRequest *req)
uint8_t fid = NVME_GETSETFEAT_FID(dw10);
NvmeGetFeatureSelect sel = NVME_GETFEAT_SELECT(dw10);
uint16_t iv;
+ NvmeNamespace *ns;
static const uint32_t nvme_feature_default[NVME_FID_MAX] = {
[NVME_ARBITRATION] = NVME_ARB_AB_NOLIMIT,
@@ -1707,6 +1756,18 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeRequest *req)
}
return NVME_INVALID_FIELD | NVME_DNR;
+ case NVME_ERROR_RECOVERY:
+ if (!nvme_nsid_valid(n, nsid)) {
+ return NVME_INVALID_NSID | NVME_DNR;
+ }
+
+ ns = nvme_ns(n, nsid);
+ if (unlikely(!ns)) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ result = ns->features.err_rec;
+ goto out;
case NVME_VOLATILE_WRITE_CACHE:
result = n->features.vwc;
trace_pci_nvme_getfeat_vwcache(result ? "enabled" : "disabled");
@@ -1846,6 +1907,26 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req)
NVME_LOG_SMART_INFO);
}
+ break;
+ case NVME_ERROR_RECOVERY:
+ if (nsid == NVME_NSID_BROADCAST) {
+ for (int i = 1; i <= n->num_namespaces; i++) {
+ ns = nvme_ns(n, i);
+
+ if (!ns) {
+ continue;
+ }
+
+ if (NVME_ID_NS_NSFEAT_DULBE(ns->id_ns.nsfeat)) {
+ ns->features.err_rec = dw11;
+ }
+ }
+
+ break;
+ }
+
+ assert(ns);
+ ns->features.err_rec = dw11;
break;
case NVME_VOLATILE_WRITE_CACHE:
n->features.vwc = dw11 & 0x1;
@@ -43,6 +43,8 @@ pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode, const char *opna
pci_nvme_rw(uint16_t cid, const char *verb, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" opname '%s' nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64""
pci_nvme_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
pci_nvme_write_zeroes(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba %"PRIu64" nlb %"PRIu32""
+pci_nvme_allocate(uint32_t ns, uint64_t slba, uint32_t nlb) "nsid %"PRIu32" slba 0x%"PRIx64" nlb %"PRIu32""
+pci_nvme_deallocate(uint32_t ns, uint64_t slba, uint32_t nlb) "nsid %"PRIu32" slba 0x%"PRIx64" nlb %"PRIu32""
pci_nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16""
pci_nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d"
pci_nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16""