[PATCH] NVMe: Meta data handling through submit io ioctl
Keith Busch
keith.busch at intel.com
Tue Apr 7 15:57:19 PDT 2015
This adds support for the extended metadata formats through the submit
IO ioctl, and simplifies the rest when using a separate metadata format.
Signed-off-by: Keith Busch <keith.busch at intel.com>
---
drivers/block/nvme-core.c | 123 ++++++++++++++++++---------------------------
include/linux/nvme.h | 5 +-
2 files changed, 52 insertions(+), 76 deletions(-)
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 8729266..879b030 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -1745,25 +1745,31 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
struct nvme_dev *dev = ns->dev;
struct nvme_user_io io;
struct nvme_command c;
- unsigned length, meta_len;
- int status, i;
- struct nvme_iod *iod, *meta_iod = NULL;
- dma_addr_t meta_dma_addr;
- void *meta, *uninitialized_var(meta_mem);
+ unsigned length, meta_len, prp_len;
+ int status, write;
+ struct nvme_iod *iod;
+ dma_addr_t meta_dma = 0;
+ void *meta = NULL;
if (copy_from_user(&io, uio, sizeof(io)))
return -EFAULT;
length = (io.nblocks + 1) << ns->lba_shift;
meta_len = (io.nblocks + 1) * ns->ms;
- if (meta_len && ((io.metadata & 3) || !io.metadata))
+ if (meta_len && ((io.metadata & 3) || !io.metadata) && !ns->ext)
return -EINVAL;
+ else if (meta_len && ns->ext) {
+ length += meta_len;
+ meta_len = 0;
+ }
+
+ write = io.opcode & 1;
switch (io.opcode) {
case nvme_cmd_write:
case nvme_cmd_read:
case nvme_cmd_compare:
- iod = nvme_map_user_pages(dev, io.opcode & 1, io.addr, length);
+ iod = nvme_map_user_pages(dev, write, io.addr, length);
break;
default:
return -EINVAL;
@@ -1772,6 +1778,27 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
if (IS_ERR(iod))
return PTR_ERR(iod);
+ prp_len = nvme_setup_prps(dev, iod, length, GFP_KERNEL);
+ if (length != prp_len) {
+ status = -ENOMEM;
+ goto unmap;
+ }
+ if (meta_len) {
+ meta = dma_alloc_coherent(&dev->pci_dev->dev, meta_len,
+ &meta_dma, GFP_KERNEL);
+ if (!meta) {
+ status = -ENOMEM;
+ goto unmap;
+ }
+ if (write) {
+ if (copy_from_user(meta, (void __user *)io.metadata,
+ meta_len)) {
+ status = -EFAULT;
+ goto unmap;
+ }
+ }
+ }
+
memset(&c, 0, sizeof(c));
c.rw.opcode = io.opcode;
c.rw.flags = io.flags;
@@ -1783,75 +1810,21 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
c.rw.reftag = cpu_to_le32(io.reftag);
c.rw.apptag = cpu_to_le16(io.apptag);
c.rw.appmask = cpu_to_le16(io.appmask);
-
- if (meta_len) {
- meta_iod = nvme_map_user_pages(dev, io.opcode & 1, io.metadata,
- meta_len);
- if (IS_ERR(meta_iod)) {
- status = PTR_ERR(meta_iod);
- meta_iod = NULL;
- goto unmap;
- }
-
- meta_mem = dma_alloc_coherent(&dev->pci_dev->dev, meta_len,
- &meta_dma_addr, GFP_KERNEL);
- if (!meta_mem) {
- status = -ENOMEM;
- goto unmap;
- }
-
- if (io.opcode & 1) {
- int meta_offset = 0;
-
- for (i = 0; i < meta_iod->nents; i++) {
- meta = kmap_atomic(sg_page(&meta_iod->sg[i])) +
- meta_iod->sg[i].offset;
- memcpy(meta_mem + meta_offset, meta,
- meta_iod->sg[i].length);
- kunmap_atomic(meta);
- meta_offset += meta_iod->sg[i].length;
- }
- }
-
- c.rw.metadata = cpu_to_le64(meta_dma_addr);
- }
-
- length = nvme_setup_prps(dev, iod, length, GFP_KERNEL);
c.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
c.rw.prp2 = cpu_to_le64(iod->first_dma);
-
- if (length != (io.nblocks + 1) << ns->lba_shift)
- status = -ENOMEM;
- else
- status = nvme_submit_io_cmd(dev, ns, &c, NULL);
-
- if (meta_len) {
- if (status == NVME_SC_SUCCESS && !(io.opcode & 1)) {
- int meta_offset = 0;
-
- for (i = 0; i < meta_iod->nents; i++) {
- meta = kmap_atomic(sg_page(&meta_iod->sg[i])) +
- meta_iod->sg[i].offset;
- memcpy(meta, meta_mem + meta_offset,
- meta_iod->sg[i].length);
- kunmap_atomic(meta);
- meta_offset += meta_iod->sg[i].length;
- }
- }
-
- dma_free_coherent(&dev->pci_dev->dev, meta_len, meta_mem,
- meta_dma_addr);
- }
-
+ c.rw.metadata = cpu_to_le64(meta_dma);
+ status = nvme_submit_io_cmd(dev, ns, &c, NULL);
unmap:
- nvme_unmap_user_pages(dev, io.opcode & 1, iod);
+ nvme_unmap_user_pages(dev, write, iod);
nvme_free_iod(dev, iod);
-
- if (meta_iod) {
- nvme_unmap_user_pages(dev, io.opcode & 1, meta_iod);
- nvme_free_iod(dev, meta_iod);
+ if (meta) {
+ if (status == NVME_SC_SUCCESS && !write) {
+ if (copy_to_user((void __user *)io.metadata, meta,
+ meta_len))
+ status = -EFAULT;
+ }
+ dma_free_coherent(&dev->pci_dev->dev, meta_len, meta, meta_dma);
}
-
return status;
}
@@ -2014,7 +1987,8 @@ static int nvme_revalidate_disk(struct gendisk *disk)
struct nvme_dev *dev = ns->dev;
struct nvme_id_ns *id;
dma_addr_t dma_addr;
- int lbaf, pi_type, old_ms;
+ u8 lbaf, pi_type;
+ u16 old_ms;
unsigned short bs;
id = dma_alloc_coherent(&dev->pci_dev->dev, 4096, &dma_addr,
@@ -2035,6 +2009,7 @@ static int nvme_revalidate_disk(struct gendisk *disk)
lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
ns->lba_shift = id->lbaf[lbaf].ds;
ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
+ ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
/*
* If identify namespace failed, use default 512 byte block size so
@@ -2051,14 +2026,14 @@ static int nvme_revalidate_disk(struct gendisk *disk)
if (blk_get_integrity(disk) && (ns->pi_type != pi_type ||
ns->ms != old_ms ||
bs != queue_logical_block_size(disk->queue) ||
- (ns->ms && id->flbas & NVME_NS_FLBAS_META_EXT)))
+ (ns->ms && ns->ext)))
blk_integrity_unregister(disk);
ns->pi_type = pi_type;
blk_queue_logical_block_size(ns->queue, bs);
if (ns->ms && !blk_get_integrity(disk) && (disk->flags & GENHD_FL_UP) &&
- !(id->flbas & NVME_NS_FLBAS_META_EXT))
+ !ns->ext)
nvme_init_integrity(ns);
if (id->ncap == 0 || (ns->ms && !blk_get_integrity(disk)))
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 0adad4a..8dbd05e 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -117,8 +117,9 @@ struct nvme_ns {
unsigned ns_id;
int lba_shift;
- int ms;
- int pi_type;
+ u16 ms;
+ bool ext;
+ u8 pi_type;
u64 mode_select_num_blocks;
u32 mode_select_block_len;
};
--
1.7.10.4
More information about the Linux-nvme
mailing list