@@ -32,6 +32,17 @@ typedef struct NvmeRequest {
QTAILQ_ENTRY(NvmeRequest)entry;
} NvmeRequest;
+static inline bool nvme_req_is_write(NvmeRequest *req)
+{
+ switch (req->cmd.opcode) {
+ case NVME_CMD_WRITE:
+ case NVME_CMD_WRITE_ZEROES:
+ return true;
+ default:
+ return false;
+ }
+}
+
static inline const char *nvme_adm_opc_str(uint8_t opc)
{
switch (opc) {
@@ -171,4 +182,18 @@ static inline uint64_t nvme_ns_nlbas(NvmeCtrl *n, NvmeNamespace *ns)
return n->ns_size >> nvme_ns_lbads(ns);
}
+static inline NvmeCQueue *nvme_cq(NvmeRequest *req)
+{
+ NvmeSQueue *sq = req->sq;
+ NvmeCtrl *n = sq->ctrl;
+
+ return n->cq[sq->cqid];
+}
+
+static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req)
+{
+ NvmeSQueue *sq = req->sq;
+ return sq->ctrl;
+}
+
#endif /* HW_NVME_H */
@@ -614,30 +614,108 @@ static inline uint16_t nvme_check_bounds(NvmeCtrl *n, NvmeNamespace *ns,
static void nvme_rw_cb(void *opaque, int ret)
{
NvmeRequest *req = opaque;
- NvmeSQueue *sq = req->sq;
- NvmeCtrl *n = sq->ctrl;
- NvmeCQueue *cq = n->cq[sq->cqid];
+ NvmeCtrl *n = nvme_ctrl(req);
- trace_pci_nvme_rw_cb(nvme_cid(req));
+ BlockBackend *blk = n->conf.blk;
+ BlockAcctCookie *acct = &req->acct;
+ BlockAcctStats *stats = blk_get_stats(blk);
+
+ Error *local_err = NULL;
+
+ trace_pci_nvme_rw_cb(nvme_cid(req), blk_name(blk));
if (!ret) {
- block_acct_done(blk_get_stats(n->conf.blk), &req->acct);
+ block_acct_done(stats, acct);
req->status = NVME_SUCCESS;
} else {
- block_acct_failed(blk_get_stats(n->conf.blk), &req->acct);
- req->status = NVME_INTERNAL_DEV_ERROR;
+ uint16_t status;
+
+ block_acct_failed(stats, acct);
+
+ switch (req->cmd.opcode) {
+ case NVME_CMD_READ:
+ status = NVME_UNRECOVERED_READ;
+ break;
+ case NVME_CMD_FLUSH:
+ case NVME_CMD_WRITE:
+ case NVME_CMD_WRITE_ZEROES:
+ status = NVME_WRITE_FAULT;
+ break;
+ default:
+ status = NVME_INTERNAL_DEV_ERROR;
+ break;
+ }
+
+ trace_pci_nvme_err_aio(nvme_cid(req), strerror(ret), status);
+
+ error_setg_errno(&local_err, -ret, "aio failed");
+ error_report_err(local_err);
+
+ req->status = status;
}
- nvme_enqueue_req_completion(cq, req);
+ nvme_enqueue_req_completion(nvme_cq(req), req);
+}
+
+static uint16_t nvme_do_aio(BlockBackend *blk, int64_t offset, size_t len,
+ NvmeRequest *req)
+{
+ BlockAcctCookie *acct = &req->acct;
+ BlockAcctStats *stats = blk_get_stats(blk);
+
+ bool is_write;
+
+ trace_pci_nvme_do_aio(nvme_cid(req), req->cmd.opcode,
+ nvme_io_opc_str(req->cmd.opcode), blk_name(blk),
+ offset, len);
+
+ switch (req->cmd.opcode) {
+ case NVME_CMD_FLUSH:
+ block_acct_start(stats, acct, 0, BLOCK_ACCT_FLUSH);
+ req->aiocb = blk_aio_flush(blk, nvme_rw_cb, req);
+ break;
+
+ case NVME_CMD_WRITE_ZEROES:
+ block_acct_start(stats, acct, len, BLOCK_ACCT_WRITE);
+ req->aiocb = blk_aio_pwrite_zeroes(blk, offset, len,
+ BDRV_REQ_MAY_UNMAP, nvme_rw_cb,
+ req);
+ break;
+
+ case NVME_CMD_READ:
+ case NVME_CMD_WRITE:
+ is_write = nvme_req_is_write(req);
+
+ block_acct_start(stats, acct, len,
+ is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ);
+
+ if (req->qsg.sg) {
+ if (is_write) {
+ req->aiocb = dma_blk_write(blk, &req->qsg, offset,
+ BDRV_SECTOR_SIZE, nvme_rw_cb, req);
+ } else {
+ req->aiocb = dma_blk_read(blk, &req->qsg, offset,
+ BDRV_SECTOR_SIZE, nvme_rw_cb, req);
+ }
+ } else {
+ if (is_write) {
+ req->aiocb = blk_aio_pwritev(blk, offset, &req->iov, 0,
+ nvme_rw_cb, req);
+ } else {
+ req->aiocb = blk_aio_preadv(blk, offset, &req->iov, 0,
+ nvme_rw_cb, req);
+ }
+ }
+
+ break;
+ }
+
+ return NVME_NO_COMPLETE;
}
static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req)
{
- block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0,
- BLOCK_ACCT_FLUSH);
- req->aiocb = blk_aio_flush(n->conf.blk, nvme_rw_cb, req);
-
- return NVME_NO_COMPLETE;
+ return nvme_do_aio(n->conf.blk, 0, 0, req);
}
static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeRequest *req)
@@ -658,11 +736,7 @@ static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeRequest *req)
return status;
}
- block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0,
- BLOCK_ACCT_WRITE);
- req->aiocb = blk_aio_pwrite_zeroes(n->conf.blk, offset, count,
- BDRV_REQ_MAY_UNMAP, nvme_rw_cb, req);
- return NVME_NO_COMPLETE;
+ return nvme_do_aio(n->conf.blk, offset, count, req);
}
static uint16_t nvme_rw(NvmeCtrl *n, NvmeRequest *req)
@@ -674,8 +748,7 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeRequest *req)
uint64_t data_size = nvme_l2b(ns, nlb);
uint64_t data_offset = nvme_l2b(ns, slba);
- int is_write = rw->opcode == NVME_CMD_WRITE ? 1 : 0;
- enum BlockAcctType acct = is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ;
+ bool is_write = nvme_req_is_write(req);
uint16_t status;
trace_pci_nvme_rw(nvme_cid(req), nvme_io_opc_str(rw->opcode), nlb,
@@ -698,28 +771,11 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeRequest *req)
goto invalid;
}
- if (req->qsg.nsg > 0) {
- block_acct_start(blk_get_stats(n->conf.blk), &req->acct, req->qsg.size,
- acct);
- req->aiocb = is_write ?
- dma_blk_write(n->conf.blk, &req->qsg, data_offset, BDRV_SECTOR_SIZE,
- nvme_rw_cb, req) :
- dma_blk_read(n->conf.blk, &req->qsg, data_offset, BDRV_SECTOR_SIZE,
- nvme_rw_cb, req);
- } else {
- block_acct_start(blk_get_stats(n->conf.blk), &req->acct, req->iov.size,
- acct);
- req->aiocb = is_write ?
- blk_aio_pwritev(n->conf.blk, data_offset, &req->iov, 0, nvme_rw_cb,
- req) :
- blk_aio_preadv(n->conf.blk, data_offset, &req->iov, 0, nvme_rw_cb,
- req);
- }
-
- return NVME_NO_COMPLETE;
+ return nvme_do_aio(n->conf.blk, data_offset, data_size, req);
invalid:
- block_acct_invalid(blk_get_stats(n->conf.blk), acct);
+ block_acct_invalid(blk_get_stats(n->conf.blk),
+ is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ);
return status;
}
@@ -39,8 +39,9 @@ pci_nvme_map_prp(uint64_t trans_len, uint32_t len, uint64_t prp1, uint64_t prp2,
pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" nsid %"PRIu32" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'"
pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'"
pci_nvme_rw(uint16_t cid, const char *verb, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" '%s' nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64""
-pci_nvme_rw_cb(uint16_t cid) "cid %"PRIu16""
+pci_nvme_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
pci_nvme_write_zeroes(uint16_t cid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" slba %"PRIu64" nlb %"PRIu32""
+pci_nvme_do_aio(uint16_t cid, uint8_t opc, const char *opname, const char *blkname, int64_t offset, size_t len) "cid %"PRIu16" opc 0x%"PRIx8" opname '%s' blk '%s' offset %"PRId64" len %zu"
pci_nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16""
pci_nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d"
pci_nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16""
@@ -88,6 +89,7 @@ pci_nvme_mmio_shutdown_cleared(void) "shutdown bit cleared"
pci_nvme_err_mdts(uint16_t cid, size_t len) "cid %"PRIu16" len %zu"
pci_nvme_err_addr_read(uint64_t addr) "addr 0x%"PRIx64""
pci_nvme_err_addr_write(uint64_t addr) "addr 0x%"PRIx64""
+pci_nvme_err_aio(uint16_t cid, const char *errname, uint16_t status) "cid %"PRIu16" err '%s' status 0x%"PRIx16""
pci_nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size"
pci_nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null or not page aligned: 0x%"PRIx64""
pci_nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64""