[QEMU-NVME][CMB] Added CMBLOC and CMBSZ params to NVMe devices
Keith Busch
keith.busch at intel.com
Tue Aug 25 16:32:59 PDT 2015
On Tue, 25 Aug 2015, Stephen Bates wrote:
> In order to allow for more rigorous testing of the CMB feature in NVMe
> 1.2 we allow for more flexible setting of the CMBLOC and CMBSZ
> registers in NVMe drives. Refer to version 1.2 of the NVM Express
> standard for more information on these registers.
>
> Setting CMBSZ to something other than 0 (the default) will implement a
> CMB as per the other settings in those registers. Note certain invalid
> combinations exist and it is currently a case of buyer beware when you
> choose to implement a CMB.
If we're going to allow arbitrary CMBSZ, then RDS/WDS can be set, but
we need the IO path to accept these. AFAIK, the DMA helpers we're using
right now won't work with CMB based addresses.
I took a quick stab at a patch to bring read/write data CMBs into the
fold. This is compile tested only right now.
---
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 2e4ac19..d55c25c 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -254,19 +254,26 @@ static hwaddr nvme_discontig(uint64_t *dma_addr, uint16_t page_size,
return dma_addr[prp_index] + index_in_prp * entry_size;
}
-static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2,
- uint32_t len, NvmeCtrl *n)
+static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov,
+ uint64_t prp1, uint64_t prp2, uint32_t len, NvmeCtrl *n)
{
hwaddr trans_len = n->page_size - (prp1 % n->page_size);
trans_len = MIN(len, trans_len);
int num_prps = (len >> n->page_bits) + 1;
+ bool cmb = false;
if (!prp1) {
return NVME_INVALID_FIELD | NVME_DNR;
+ } else if (n->cmb && prp1 >= n->ctrl_mem.addr &&
+ prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) {
+ cmb = true;
+ qsg->nsg = 0;
+ qemu_iovec_init(iov, num_prps);
+ qemu_iovec_add(iov, (void *)&n->cmbuf[prp1 - n->ctrl_mem.addr], trans_len);
+ } else {
+ pci_dma_sglist_init(qsg, &n->parent_obj, num_prps);
+ qemu_sglist_add(qsg, prp1, trans_len);
}
-
- pci_dma_sglist_init(qsg, &n->parent_obj, num_prps);
- qemu_sglist_add(qsg, prp1, trans_len);
len -= trans_len;
if (len) {
if (!prp2) {
@@ -301,7 +308,11 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2,
}
trans_len = MIN(len, n->page_size);
- qemu_sglist_add(qsg, prp_ent, trans_len);
+ if (!cmb) {
+ qemu_sglist_add(qsg, prp_ent, trans_len);
+ } else {
+ qemu_iovec_add(iov, (void *)&n->cmbuf[prp_ent - n->ctrl_mem.addr], trans_len);
+ }
len -= trans_len;
i++;
}
@@ -309,13 +320,21 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2,
if (prp2 & (n->page_size - 1)) {
goto unmap;
}
- qemu_sglist_add(qsg, prp2, len);
+ if (!cmb) {
+ qemu_sglist_add(qsg, prp2, len);
+ } else {
+ qemu_iovec_add(iov, (void *)&n->cmbuf[prp2 - n->ctrl_mem.addr], trans_len);
+ }
}
}
return NVME_SUCCESS;
unmap:
- qemu_sglist_destroy(qsg);
+ if (!cmb) {
+ qemu_sglist_destroy(qsg);
+ } else {
+ qemu_iovec_destroy(iov);
+ }
return NVME_INVALID_FIELD | NVME_DNR;
}
@@ -323,31 +342,48 @@ static uint16_t nvme_dma_write_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
uint64_t prp1, uint64_t prp2)
{
QEMUSGList qsg;
+ QEMUIOVector iov;
+ uint16_t status = NVME_SUCCESS;
- if (nvme_map_prp(&qsg, prp1, prp2, len, n)) {
+ if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) {
return NVME_INVALID_FIELD | NVME_DNR;
}
- if (dma_buf_write(ptr, len, &qsg)) {
+ if (qsg.nsg > 0) {
+ if (dma_buf_write(ptr, len, &qsg)) {
+ status = NVME_INVALID_FIELD | NVME_DNR;
+ }
qemu_sglist_destroy(&qsg);
- return NVME_INVALID_FIELD | NVME_DNR;
+ } else {
+ if (qemu_iovec_from_buf(&iov, 0, ptr, len) != len) {
+ status = NVME_INVALID_FIELD | NVME_DNR;
+ }
+ qemu_iovec_destroy(&iov);
}
- qemu_sglist_destroy(&qsg);
- return NVME_SUCCESS;
+ return status;
}
static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
uint64_t prp1, uint64_t prp2)
{
QEMUSGList qsg;
+ QEMUIOVector iov;
+ uint16_t status = NVME_SUCCESS;
- if (nvme_map_prp(&qsg, prp1, prp2, len, n)) {
+ if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) {
return NVME_INVALID_FIELD | NVME_DNR;
}
- if (dma_buf_read(ptr, len, &qsg)) {
+ if (qsg.nsg > 0) {
+ if (dma_buf_read(ptr, len, &qsg)) {
+ status = NVME_INVALID_FIELD | NVME_DNR;
+ }
qemu_sglist_destroy(&qsg);
- return NVME_INVALID_FIELD | NVME_DNR;
+ } else {
+ if (qemu_iovec_to_buf(&iov, 0, ptr, len) != len) {
+ status = NVME_INVALID_FIELD | NVME_DNR;
+ }
+ qemu_iovec_destroy(&iov);
}
- return NVME_SUCCESS;
+ return status;
}
static void nvme_post_cqe(NvmeCQueue *cq, NvmeRequest *req)
@@ -508,7 +544,11 @@ static void nvme_rw_cb(void *opaque, int ret)
}
}
- qemu_sglist_destroy(&req->qsg);
+ if (req->qsg.nsg) {
+ qemu_sglist_destroy(&req->qsg);
+ } else {
+ qemu_iovec_destroy(&req->iov);
+ }
nvme_enqueue_req_completion(cq, req);
}
@@ -558,7 +598,7 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
return NVME_UNRECOVERED_READ;
}
- if (nvme_map_prp(&req->qsg, prp1, prp2, data_size, n)) {
+ if (nvme_map_prp(&req->qsg, &req->iov, prp1, prp2, data_size, n)) {
nvme_set_error_page(n, req->sq->sqid, cmd->cid, NVME_INVALID_FIELD,
offsetof(NvmeRwCmd, prp1), 0, ns->id);
return NVME_INVALID_FIELD | NVME_DNR;
@@ -570,10 +610,16 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
req->ns = ns;
dma_acct_start(n->conf.blk, &req->acct, &req->qsg, req->is_write ?
- BLOCK_ACCT_WRITE : BLOCK_ACCT_READ);
- req->aiocb = req->is_write ?
- dma_blk_write(n->conf.blk, &req->qsg, aio_slba, nvme_rw_cb, req) :
- dma_blk_read(n->conf.blk, &req->qsg, aio_slba, nvme_rw_cb, req);
+ BLOCK_ACCT_WRITE : BLOCK_ACCT_READ);
+ if (req->qsg.nsg > 0) {
+ req->aiocb = req->is_write ?
+ dma_blk_write(n->conf.blk, &req->qsg, aio_slba, nvme_rw_cb, req) :
+ dma_blk_read(n->conf.blk, &req->qsg, aio_slba, nvme_rw_cb, req);
+ } else {
+ req->aiocb = req->is_write ?
+ blk_aio_writev(n->conf.blk, aio_slba, &req->iov, data_size >> 9, nvme_rw_cb, req) :
+ blk_aio_readv(n->conf.blk, aio_slba, &req->iov, data_size >> 9, nvme_rw_cb, req);
+ }
return NVME_NO_COMPLETE;
}
@@ -662,7 +708,7 @@ static uint16_t nvme_compare(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
offsetof(NvmeRwCmd, nlb), nlb, ns->id);
return NVME_INVALID_FIELD | NVME_DNR;
}
- if (nvme_map_prp(&req->qsg, prp1, prp2, data_size, n)) {
+ if (nvme_map_prp(&req->qsg, &req->iov, prp1, prp2, data_size, n)) {
nvme_set_error_page(n, req->sq->sqid, cmd->cid, NVME_INVALID_FIELD,
offsetof(NvmeRwCmd, prp1), 0, ns->id);
return NVME_INVALID_FIELD | NVME_DNR;
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 3c0fda9..9d3070d 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -716,6 +716,7 @@ typedef struct NvmeRequest {
NvmeCqe cqe;
BlockAcctCookie acct;
QEMUSGList qsg;
+ QEMUIOVector iov;
QTAILQ_ENTRY(NvmeRequest)entry;
} NvmeRequest;
--
More information about the Linux-nvme
mailing list