[PATCH 7/8] nvmet: add a generic NVMe target

Christoph Hellwig hch at lst.de
Sat Nov 7 09:00:42 PST 2015


Signed-off-by: Christoph Hellwig <hch at lst.de>
---
 drivers/nvme/Kconfig            |   1 +
 drivers/nvme/Makefile           |   1 +
 drivers/nvme/target/Kconfig     |   4 +
 drivers/nvme/target/Makefile    |   4 +
 drivers/nvme/target/admin-cmd.c | 353 +++++++++++++++++++++++++++++++
 drivers/nvme/target/configfs.c  | 205 ++++++++++++++++++
 drivers/nvme/target/core.c      | 454 ++++++++++++++++++++++++++++++++++++++++
 drivers/nvme/target/io-cmd.c    | 114 ++++++++++
 drivers/nvme/target/nvmet.h     | 172 +++++++++++++++
 9 files changed, 1308 insertions(+)
 create mode 100644 drivers/nvme/target/Kconfig
 create mode 100644 drivers/nvme/target/Makefile
 create mode 100644 drivers/nvme/target/admin-cmd.c
 create mode 100644 drivers/nvme/target/configfs.c
 create mode 100644 drivers/nvme/target/core.c
 create mode 100644 drivers/nvme/target/io-cmd.c
 create mode 100644 drivers/nvme/target/nvmet.h

diff --git a/drivers/nvme/Kconfig b/drivers/nvme/Kconfig
index a39d943..b7c78a5 100644
--- a/drivers/nvme/Kconfig
+++ b/drivers/nvme/Kconfig
@@ -1 +1,2 @@
 source "drivers/nvme/host/Kconfig"
+source "drivers/nvme/target/Kconfig"
diff --git a/drivers/nvme/Makefile b/drivers/nvme/Makefile
index 9421e82..0096a7f 100644
--- a/drivers/nvme/Makefile
+++ b/drivers/nvme/Makefile
@@ -1,2 +1,3 @@
 
 obj-y		+= host/
+obj-y		+= target/
diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig
new file mode 100644
index 0000000..9a3d742
--- /dev/null
+++ b/drivers/nvme/target/Kconfig
@@ -0,0 +1,4 @@
+
+config NVME_TARGET
+	depends on BLOCK
+	tristate
diff --git a/drivers/nvme/target/Makefile b/drivers/nvme/target/Makefile
new file mode 100644
index 0000000..9c236e4
--- /dev/null
+++ b/drivers/nvme/target/Makefile
@@ -0,0 +1,4 @@
+
+obj-$(CONFIG_NVME_TARGET)		+= nvmet.o
+
+nvmet-y		+= core.o configfs.o admin-cmd.o io-cmd.o
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
new file mode 100644
index 0000000..d9db0d4
--- /dev/null
+++ b/drivers/nvme/target/admin-cmd.c
@@ -0,0 +1,353 @@
+/*
+ * Copyright (c) 2015 HGST, a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/blkdev.h>
+#include <linux/module.h>
+#include "nvmet.h"
+
+static void nvmet_execute_get_error_log(struct nvmet_req *req)
+{
+	void *buf;
+
+	/*
+	 * We currently never set the More bit in the status field,
+	 * so all error log entries are invalid and can be zeroed out.
+	 * This is called a minum viable implementation (TM) of this
+	 * mandatory log page.
+	 */
+	buf = kmap_atomic(sg_page(req->sg)) + req->sg->offset;
+	memset(buf, 0, req->data_len);
+	kunmap_atomic(buf);
+
+	nvmet_req_complete(req, 0);
+}
+
+static void nvmet_execute_get_smart_log(struct nvmet_req *req)
+{
+	struct nvme_smart_log *log;
+
+	/*
+	 * XXX: fill out actual smart log
+	 *
+	 * We might have a hard time coming up with useful values for many
+	 * of the fields, and even when we have useful data available
+	 * (e.g. units or commands read/written) those aren't persistent
+	 * over power loss.
+	 */
+	log = kmap_atomic(sg_page(req->sg)) + req->sg->offset;
+	memset(log, 0, req->data_len);
+	kunmap_atomic(log);
+
+	nvmet_req_complete(req, 0);
+}
+
+static void nvmet_execute_get_fwslot_log(struct nvmet_req *req)
+{
+	void *buf;
+
+	/*
+	 * We only support a single firmware slot which always is active,
+	 * so we can zero out the whole firmware slot log and still claim
+	 * to fully implement this mandatory log page.
+	 */
+	buf = kmap_atomic(sg_page(req->sg)) + req->sg->offset;
+	memset(buf, 0, req->data_len);
+	kunmap_atomic(buf);
+
+	nvmet_req_complete(req, 0);
+}
+
+static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
+{
+	struct nvmet_ctrl *ctrl = req->sq->ctrl;
+	struct nvme_id_ctrl *id;
+
+	id = kmap_atomic(sg_page(req->sg)) + req->sg->offset;
+	memset(id, 0, sizeof(*id));
+
+	/* XXX: figure out how to assign real vendors IDs. */
+	id->vid = 0;
+	id->ssvid = 0;
+
+	/* XXX: figure out real serial / model / revision values */
+	memset(id->sn, ' ', sizeof(id->sn));
+	memset(id->mn, ' ', sizeof(id->mn));
+	memset(id->fr, ' ', sizeof(id->fr));
+	strcpy((char *)id->mn, "Fake NVMe");
+
+	id->rab = 6;
+
+	/* XXX: figure out a real IEEE OUI */
+	id->ieee[0] = 0x00;
+	id->ieee[1] = 0x02;
+	id->ieee[2] = 0xb3;
+
+	/* we may have multiple controllers attached to the subsystem */
+	id->mic = (1 << 1);
+
+	/* no limit on data transfer sizes for now */
+	id->mdts = 0;
+	id->cntlid = cpu_to_le16(ctrl->cntlid);
+	id->ver = cpu_to_le32(ctrl->subsys->ver);
+
+	/* XXX: figure out what to do about RTD3R/RTD3 */
+
+	id->oacs = 0;
+	id->acl = 3;
+	id->aerl = 3;
+
+	/* first slot is read-only, only one slot supported */
+	id->frmw = (1 << 0) | (1 << 1);
+	id->lpa = 1 << 0;
+#define NVMET_ERROR_LOG_SLOTS	128
+	id->elpe = NVMET_ERROR_LOG_SLOTS - 1;
+	id->npss = 0;
+
+	id->sqes = (0x6 << 4) | 0x6;
+	id->cqes = (0x4 << 4) | 0x4;
+	id->nn = cpu_to_le32(ctrl->subsys->max_nsid);
+
+	/* XXX: don't report vwc if the underlying device is write through */
+	id->vwc = NVME_CTRL_VWC_PRESENT;
+
+	/*
+	 * We can't support atomic writes bigger than a LBA without support
+	 * from the backend device.
+	 */
+	id->awun = 0;
+	id->awupf = 0;
+
+	/*
+	 * We support SGLs, but nothing fancy.
+	 */
+	id->sgls = (1 << 0);
+
+	/*
+	 * Meh, we don't really support any power state.  Fake up the same
+	 * values that qemu does.
+	 */
+	id->psd[0].max_power = cpu_to_le16(0x9c4);
+	id->psd[0].entry_lat = cpu_to_le32(0x10);
+	id->psd[0].exit_lat = cpu_to_le32(0x4);
+
+	kunmap_atomic(id);
+
+	nvmet_req_complete(req, 0);
+}
+
+static void nvmet_execute_identify_ns(struct nvmet_req *req)
+{
+	struct nvmet_ns *ns;
+	struct nvme_id_ns *id;
+	u16 status = 0;
+
+	ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->identify.nsid);
+	if (!ns) {
+		status = NVME_SC_INVALID_NS | NVME_SC_DNR;
+		goto out;
+	}
+
+	id = kmap_atomic(sg_page(req->sg)) + req->sg->offset;
+	memset(id, 0, sizeof(*id));
+
+	/*
+	 * nuse = ncap = nsze isn't aways true, but we have no way to find
+	 * that out from the underlying device.
+	 */
+	id->ncap = id->nuse = id->nsze =
+		cpu_to_le64(ns->size >> ns->blksize_shift);
+
+	/*
+	 * We just provide a single LBA format that matches what the
+	 * underlying device reports.
+	 */
+	id->nlbaf = 0;
+	id->flbas = 0;
+
+	/*
+	 * Our namespace might always be shared.  Not just with other
+	 * controllers, but also with any other user of the block device.
+	 */
+	id->nmic = (1 << 0);
+
+	/* XXX: provide a nguid value! */
+
+	id->lbaf[0].ds = ns->blksize_shift;
+
+	kunmap_atomic(id);
+
+	nvmet_put_namespace(ns);
+out:
+	nvmet_req_complete(req, status);
+}
+
+static void nvmet_execute_identify_nslist(struct nvmet_req *req)
+{
+	struct nvmet_ctrl *ctrl = req->sq->ctrl;
+	struct nvmet_ns *ns;
+	u32 min_nsid = le32_to_cpu(req->cmd->identify.nsid);
+	__le32 *list;
+	int i = 0;
+
+	list = kmap_atomic(sg_page(req->sg)) + req->sg->offset;
+	rcu_read_lock();
+	list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) {
+		if (ns->nsid <= min_nsid)
+			continue;
+		list[i++] = cpu_to_le32(ns->nsid);
+		if (i == req->data_len / sizeof(__le32))
+			goto out;
+	}
+
+	list[i] = 0;
+out:
+	rcu_read_unlock();
+	kunmap_atomic(list);
+
+	nvmet_req_complete(req, 0);
+}
+
+static void nvmet_execute_set_features(struct nvmet_req *req)
+{
+	struct nvmet_subsys *subsys = req->sq->ctrl->subsys;
+	u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10[0]);
+	u16 status = 0;
+
+	switch (cdw10 & 0xf) {
+	case NVME_FEAT_NUM_QUEUES:
+		nvmet_set_result(req,
+			subsys->max_qid | (subsys->max_qid << 16));
+		break;
+	default:
+		status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+		break;
+	}
+
+	nvmet_req_complete(req, status);
+}
+
+static void nvmet_execute_get_features(struct nvmet_req *req)
+{
+	struct nvmet_subsys *subsys = req->sq->ctrl->subsys;
+	u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10[0]);
+	u16 status = 0;
+
+	switch (cdw10 & 0xf) {
+	/*
+	 * These features are mandatory in the spec, but we don't
+	 * have a useful way to implement them.  We'll eventually
+	 * need to come up with some fake values for these.
+	 */
+#if 0
+	case NVME_FEAT_ARBITRATION:
+		break;
+	case NVME_FEAT_POWER_MGMT:
+		break;
+	case NVME_FEAT_TEMP_THRESH:
+		break;
+	case NVME_FEAT_ERR_RECOVERY:
+		break;
+	case NVME_FEAT_IRQ_COALESCE:
+		break;
+	case NVME_FEAT_IRQ_CONFIG:
+		break;
+	case NVME_FEAT_WRITE_ATOMIC:
+		break;
+	case NVME_FEAT_ASYNC_EVENT:
+		break;
+#endif
+	case NVME_FEAT_VOLATILE_WC:
+		nvmet_set_result(req, 1);
+		break;
+	case NVME_FEAT_NUM_QUEUES:
+		nvmet_set_result(req,
+			subsys->max_qid | (subsys->max_qid << 16));
+		break;
+	default:
+		status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+		break;
+	}
+
+	nvmet_req_complete(req, status);
+}
+
+static inline u32 nvmet_get_log_page_len(struct nvme_command *cmd)
+{
+	u32 cdw10 = cmd->common.cdw10[0];
+
+	return ((cdw10 >> 16) & 0xff) * sizeof(u32);
+}
+
+int nvmet_parse_admin_cmd(struct nvmet_req *req)
+{
+	struct nvme_command *cmd = req->cmd;
+
+	req->ns = NULL;
+
+	switch (cmd->common.opcode) {
+	case nvme_admin_get_log_page:
+		req->data_len = nvmet_get_log_page_len(cmd);
+
+		switch (cmd->common.cdw10[0] & 0xf) {
+		case 0x01:
+			req->execute = nvmet_execute_get_error_log;
+			return 0;
+		case 0x02:
+			req->execute = nvmet_execute_get_smart_log;
+			return 0;
+		case 0x03:
+			req->execute = nvmet_execute_get_fwslot_log;
+			return 0;
+		}
+		break;
+	case nvme_admin_identify:
+		switch (cmd->identify.cns) {
+		case 0x00:
+			req->execute = nvmet_execute_identify_ns;
+			req->data_len = sizeof(struct nvme_id_ns);
+			return 0;
+		case 0x01:
+			req->execute = nvmet_execute_identify_ctrl;
+			req->data_len = sizeof(struct nvme_id_ctrl);
+			return 0;
+		case 0x02:
+			req->execute = nvmet_execute_identify_nslist;
+			req->data_len = 4096;
+			return 0;
+		}
+		break;
+#if 0
+	case nvme_admin_abort_cmd:
+		req->execute = nvmet_execute_abort;
+		req->data_len = 0;
+		return 0;
+#endif
+	case nvme_admin_set_features:
+		req->execute = nvmet_execute_set_features;
+		req->data_len = 0;
+		return 0;
+	case nvme_admin_get_features:
+		req->execute = nvmet_execute_get_features;
+		req->data_len = 0;
+		return 0;
+#if 0
+	case nvme_admin_async_event:
+		req->exectute = nvmet_execute_aen;
+		req->data = 0;
+		return 0;
+#endif
+	}
+
+	pr_err("nvmet: unhandled cmd %d\n", cmd->common.opcode);
+	return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+}
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
new file mode 100644
index 0000000..7dcdc58e
--- /dev/null
+++ b/drivers/nvme/target/configfs.c
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2015 HGST, a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/stat.h>
+
+#include "nvmet.h"
+
+
+CONFIGFS_ATTR_STRUCT(nvmet_ns);
+CONFIGFS_ATTR_OPS(nvmet_ns);
+
+static ssize_t nvmet_ns_device_path_show(struct nvmet_ns *ns, char *page)
+{
+	return sprintf(page, "%s", ns->device_path);
+}
+
+static ssize_t nvmet_ns_device_path_store(struct nvmet_ns *ns, const char *page,
+		size_t count)
+{
+	int ret = nvmet_ns_enable(ns, page);
+
+	return ret ? ret : count;
+}
+
+static struct nvmet_ns_attribute nvmet_ns_attr_device_path = {
+	.attr = {
+		.ca_name	= "device_path",
+		.ca_mode	= S_IRUSR | S_IWUSR,
+		.ca_owner	= THIS_MODULE,
+	},
+	.show			= nvmet_ns_device_path_show,
+	.store			= nvmet_ns_device_path_store,
+};
+
+static struct configfs_attribute *nvmet_ns_attrs[] = {
+	&nvmet_ns_attr_device_path.attr,
+	NULL,
+};
+
+static void nvmet_ns_release(struct config_item *item)
+{
+	struct nvmet_ns *ns = to_nvmet_ns(item);
+
+	nvmet_ns_free(ns);
+}
+
+static struct configfs_item_operations nvmet_ns_item_ops = {
+	.release		= nvmet_ns_release,
+	.show_attribute		= nvmet_ns_attr_show,
+	.store_attribute        = nvmet_ns_attr_store,
+};
+
+static struct config_item_type nvmet_ns_type = {
+	.ct_item_ops		= &nvmet_ns_item_ops,
+	.ct_attrs		= nvmet_ns_attrs,
+	.ct_owner		= THIS_MODULE,
+};
+
+static struct config_group *nvmet_ns_make(struct config_group *group,
+		const char *name)
+{
+	struct nvmet_subsys *subsys = namespaces_to_subsys(&group->cg_item);
+	struct nvmet_ns *ns;
+	int ret;
+	u32 nsid;
+
+	ret = kstrtou32(name, 0, &nsid);
+	if (ret)
+		goto out;
+
+	ret = -EINVAL;
+	if (nsid == 0 || nsid == 0xffffffff)
+		goto out;
+
+	ret = -ENOMEM;
+	ns = nvmet_ns_alloc(subsys, nsid);
+	if (!ns)
+		goto out;
+	config_group_init_type_name(&ns->group, name, &nvmet_ns_type);
+
+	pr_info("adding nsid %d to subsystem %s\n", nsid, subsys->subsys_name);
+
+	return &ns->group;
+out:
+	return ERR_PTR(ret);
+}
+
+static struct configfs_group_operations nvmet_namespaces_group_ops = {
+	.make_group		= nvmet_ns_make,
+};
+
+static struct config_item_type nvmet_namespaces_type = {
+	.ct_group_ops		= &nvmet_namespaces_group_ops,
+	.ct_owner		= THIS_MODULE,
+};
+
+static struct config_item_type nvmet_controllers_type = {
+	.ct_owner		= THIS_MODULE,
+};
+
+static void nvmet_subsys_release(struct config_item *item)
+{
+	struct nvmet_subsys *subsys = to_subsys(item);
+
+	nvmet_subsys_free(subsys);
+}
+
+static struct configfs_item_operations nvmet_subsys_item_ops = {
+	.release		= nvmet_subsys_release,
+};
+
+static struct config_item_type nvmet_subsys_type = {
+	.ct_item_ops		= &nvmet_subsys_item_ops,
+	.ct_owner		= THIS_MODULE,
+};
+
+static struct config_group *nvmet_subsys_make(struct config_group *group,
+		const char *name)
+{
+	struct nvmet_subsys *subsys;
+
+	subsys = nvmet_subsys_alloc(name);
+	if (!subsys)
+		return ERR_PTR(-ENOMEM);
+
+	config_group_init_type_name(&subsys->group, name, &nvmet_subsys_type);
+
+	config_group_init_type_name(&subsys->namespaces_group,
+			"namespaces", &nvmet_namespaces_type);
+	config_group_init_type_name(&subsys->controllers_group,
+			"controllers", &nvmet_controllers_type);
+
+	subsys->default_groups[0] = &subsys->namespaces_group;
+	subsys->default_groups[1] = &subsys->controllers_group;
+	subsys->default_groups[2] = NULL;
+
+	subsys->group.default_groups = subsys->default_groups;
+	return &subsys->group;
+}
+
+static struct configfs_group_operations nvmet_subsystems_group_ops = {
+	.make_group		= nvmet_subsys_make,
+};
+
+static struct config_item_type nvmet_subsystems_type = {
+	.ct_group_ops		= &nvmet_subsystems_group_ops,
+	.ct_owner		= THIS_MODULE,
+};
+
+struct config_group nvmet_subsystems_group;
+
+struct config_group *nvmet_root_default_groups[] = {
+	&nvmet_subsystems_group,
+	NULL,
+};
+
+static struct config_item_type nvmet_root_type = {
+	.ct_owner		= THIS_MODULE,
+};
+
+static struct configfs_subsystem nvmet_configfs_subsystem = {
+	.su_group = {
+		.cg_item = {
+			.ci_namebuf	= "nvmet",
+			.ci_type	= &nvmet_root_type,
+		},
+		.default_groups = nvmet_root_default_groups,
+	},
+};
+
+int __init nvmet_init_configfs(void)
+{
+	int ret;
+
+	config_group_init(&nvmet_configfs_subsystem.su_group);
+	mutex_init(&nvmet_configfs_subsystem.su_mutex);
+
+	config_group_init_type_name(&nvmet_subsystems_group,
+			"subsystems", &nvmet_subsystems_type);
+
+	ret = configfs_register_subsystem(&nvmet_configfs_subsystem);
+	if (ret) {
+		pr_err("configfs_register_subsystem: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+void __exit nvmet_exit_configfs(void)
+{
+	configfs_unregister_subsystem(&nvmet_configfs_subsystem);
+}
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
new file mode 100644
index 0000000..5c770bf
--- /dev/null
+++ b/drivers/nvme/target/core.c
@@ -0,0 +1,454 @@
+/*
+ * Copyright (c) 2015 HGST, a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/blkdev.h>
+#include <linux/module.h>
+#include "nvmet.h"
+
+static DEFINE_MUTEX(nvmet_subsystem_mutex);
+static LIST_HEAD(nvmet_subsystems);
+
+static struct nvmet_ns *__nvmet_find_namespace(struct nvmet_ctrl *ctrl,
+		__le32 nsid)
+{
+	struct nvmet_ns *ns;
+
+	list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) {
+		if (ns->nsid == le32_to_cpu(nsid))
+			return ns;
+	}
+
+	return NULL;
+}
+
+struct nvmet_ns *nvmet_find_namespace(struct nvmet_ctrl *ctrl, __le32 nsid)
+{
+	struct nvmet_ns *ns;
+
+	rcu_read_lock();
+	ns = __nvmet_find_namespace(ctrl, nsid);
+	if (ns && !kref_get_unless_zero(&ns->ref))
+		ns = NULL;
+	rcu_read_unlock();
+
+	return ns;
+}
+
+static void nvmet_destroy_namespace(struct kref *ref)
+{
+	struct nvmet_ns *ns = container_of(ref, struct nvmet_ns, ref);
+
+	if (ns->bdev)
+		blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ);
+	kfree(ns->device_path);
+	kfree(ns);
+}
+
+void nvmet_put_namespace(struct nvmet_ns *ns)
+{
+	kref_put(&ns->ref, nvmet_destroy_namespace);
+}
+
+int nvmet_ns_enable(struct nvmet_ns *ns, const char *path)
+{
+	int ret;
+
+	mutex_lock(&ns->subsys->lock);
+	ret = -EBUSY;
+	if (ns->device_path)
+		goto out_unlock;
+
+	ret = -ENOMEM;
+	ns->device_path = kstrdup(path, GFP_KERNEL);
+	if (!ns->device_path)
+		goto out_unlock;
+
+	ns->bdev = blkdev_get_by_path(path, FMODE_READ|FMODE_WRITE, NULL);
+	if (IS_ERR(ns->bdev)) {
+		pr_err("nvmet: failed to open block device %s: (%ld)\n",
+			path, PTR_ERR(ns->bdev));
+		ret = PTR_ERR(ns->bdev);
+		goto out_free_device_path;
+	}
+
+	ns->size = i_size_read(ns->bdev->bd_inode);
+	ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
+
+	if (ns->nsid > ns->subsys->max_nsid)
+		ns->subsys->max_nsid = ns->nsid;
+
+	list_add_rcu(&ns->dev_link, &ns->subsys->namespaces);
+	mutex_unlock(&ns->subsys->lock);
+
+	return 0;
+
+out_free_device_path:
+	kfree(ns->device_path);
+	ns->device_path = NULL;
+out_unlock:
+	mutex_unlock(&ns->subsys->lock);
+	return ret;
+}
+
+void nvmet_ns_free(struct nvmet_ns *ns)
+{
+	struct nvmet_subsys *subsys = ns->subsys;
+
+	mutex_lock(&subsys->lock);
+	if (!list_empty(&ns->dev_link))
+		list_del_init(&ns->dev_link);
+	mutex_unlock(&subsys->lock);
+
+	nvmet_put_namespace(ns);
+}
+
+struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
+{
+	struct nvmet_ns *ns;
+
+	ns = kzalloc(sizeof(*ns), GFP_KERNEL);
+	if (!ns)
+		return NULL;
+
+	kref_init(&ns->ref);
+	ns->nsid = nsid;
+	ns->subsys = subsys;
+	return ns;
+}
+
+void nvmet_req_complete(struct nvmet_req *req, u16 status)
+{
+	if (status)
+		nvmet_set_status(req, status);
+
+	/* XXX: need to fill in something useful for sq_head */
+	req->rsp->sq_head = 0;
+	req->rsp->sq_id = cpu_to_le16(req->sq->qid);
+	req->rsp->command_id = req->cmd->common.command_id;
+
+	if (req->ns)
+		nvmet_put_namespace(req->ns);
+	req->queue_response(req);
+}
+EXPORT_SYMBOL_GPL(nvmet_req_complete);
+
+void nvmet_cq_init(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq,
+		u16 qid, u16 size)
+{
+	cq->qid = qid;
+	cq->size = size;
+
+	ctrl->cqs[qid] = cq;
+}
+EXPORT_SYMBOL_GPL(nvmet_cq_init);
+
+void nvmet_sq_init(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
+		u16 qid, u16 size)
+{
+	sq->ctrl = ctrl;
+	sq->qid = qid;
+	sq->size = size;
+
+	ctrl->sqs[qid] = sq;
+}
+EXPORT_SYMBOL_GPL(nvmet_sq_init);
+
+void nvmet_sq_destroy(struct nvmet_sq *sq)
+{
+	if (sq->ctrl)
+		nvmet_ctrl_put(sq->ctrl);
+}
+EXPORT_SYMBOL_GPL(nvmet_sq_destroy);
+
+u16 nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
+		struct nvmet_sq *sq,
+		void (*queue_response)(struct nvmet_req *req))
+{
+	u16 status;
+
+	req->cq = cq;
+	req->sq = sq;
+	req->queue_response = queue_response;
+	req->sg = NULL;
+	req->sg_cnt = 0;
+	req->rsp->status = 0;
+
+	if (unlikely(req->sq->qid == 0))
+		status = nvmet_parse_admin_cmd(req);
+	else
+		status = nvmet_parse_io_cmd(req);
+
+	if (status)
+		return status;
+
+	if (unlikely(!req->sq->ctrl)) {
+		pr_err("queue not connected!\n");
+		return NVME_SC_QID_INVALID | NVME_SC_DNR;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nvmet_req_init);
+
+static inline bool nvmet_cc_en(u32 cc)
+{
+	return cc & 0x1;
+}
+
+static inline u8 nvmet_cc_css(u32 cc)
+{
+	return (cc >> 4) & 0x7;
+}
+
+static inline u8 nvmet_cc_mps(u32 cc)
+{
+	return (cc >> 7) & 0xf;
+}
+
+static inline u8 nvmet_cc_ams(u32 cc)
+{
+	return (cc >> 11) & 0x7;
+}
+
+static inline u8 nvmet_cc_shn(u32 cc)
+{
+	return (cc >> 14) & 0x3;
+}
+
+static inline u8 nvmet_cc_iosqes(u32 cc)
+{
+	return (cc >> 16) & 0xf;
+}
+
+static inline u8 nvmet_cc_iocqes(u32 cc)
+{
+	return (cc >> 20) & 0xf;
+}
+
+static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
+{
+#if 0
+	    nvmet_cc_iosqes(n->bar.cc) < NVME_CTRL_SQES_MIN(n->id_ctrl.sqes) ||
+	    nvmet_cc_iosqes(n->bar.cc) > NVME_CTRL_SQES_MAX(n->id_ctrl.sqes) ||
+	    nvmet_cc_iocqes(n->bar.cc) < NVME_CTRL_CQES_MIN(n->id_ctrl.cqes) ||
+	    nvmet_cc_iocqes(n->bar.cc) > NVME_CTRL_CQES_MAX(n->id_ctrl.cqes) ||
+#endif
+	if (nvmet_cc_mps(ctrl->cc) != 0 ||
+	    nvmet_cc_ams(ctrl->cc) != 0 ||
+	    nvmet_cc_css(ctrl->cc) != 0) {
+		ctrl->csts = NVME_CSTS_CFS;
+		return;
+	}
+
+	ctrl->csts = NVME_CSTS_RDY;
+}
+
+static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl)
+{
+	/* XXX: tear down queues? */
+	ctrl->csts &= ~NVME_CSTS_RDY;
+	ctrl->cc = 0;
+}
+
+void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new)
+{
+	u32 old;
+
+	/* XXX: locking? */
+	old = ctrl->cc;
+	ctrl->cc = new;
+
+	if (nvmet_cc_en(new) && !nvmet_cc_en(old))
+		nvmet_start_ctrl(ctrl);
+	if (!nvmet_cc_en(new) && nvmet_cc_en(old))
+		nvmet_clear_ctrl(ctrl);
+	if (nvmet_cc_shn(new) && !nvmet_cc_shn(old)) {
+		nvmet_clear_ctrl(ctrl);
+		ctrl->csts |= NVME_CSTS_SHST_CMPLT;
+	}
+	if (!nvmet_cc_shn(new) && nvmet_cc_shn(old))
+		ctrl->csts &= ~NVME_CSTS_SHST_CMPLT;
+}
+EXPORT_SYMBOL_GPL(nvmet_update_cc);
+
+struct nvmet_ctrl *nvmet_ctrl_find_get(struct nvmet_subsys *subsys, u16 cntlid)
+{
+	struct nvmet_ctrl *ctrl;
+
+	lockdep_assert_held(&subsys->lock);
+
+	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
+		if (ctrl->cntlid == cntlid) {
+			if (kref_get_unless_zero(&ctrl->ref))
+				return ctrl;
+			return NULL;
+		}
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(nvmet_ctrl_find_get);
+
+struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_subsys *subsys,
+		const char *subsys_name)
+{
+	struct nvmet_ctrl *ctrl;
+	int ret = -ENOMEM;
+
+	lockdep_assert_held(&subsys->lock);
+
+	ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
+	if (!ctrl)
+		goto out;
+
+	/* command sets supported: NVMe command set: */
+	ctrl->cap |= (1ULL << 37);
+	/* CC.EN timeout in 500msec units: */
+	ctrl->cap |= (15ULL << 24);
+	/* maximum queue entries supported: */
+	ctrl->cap |= NVMET_QUEUE_SIZE - 1;
+
+	memcpy(ctrl->subsys_name, subsys_name, NVMET_SUBSYS_NAME_LEN);
+
+	kref_init(&ctrl->ref);
+	ctrl->subsys = subsys;
+
+	ctrl->cqs = kcalloc(subsys->max_qid + 1,
+			sizeof(struct nvmet_queue *),
+			GFP_KERNEL);
+	if (!ctrl->cqs)
+		goto out_free_ctrl;
+
+	ctrl->sqs = kcalloc(subsys->max_qid + 1,
+			sizeof(struct nvmet_queue *),
+			GFP_KERNEL);
+	if (!ctrl->sqs)
+		goto out_free_cqs;
+
+	ctrl->cntlid = ida_simple_get(&subsys->cntlid_ida, 0, USHRT_MAX - 1,
+			GFP_KERNEL);
+	if (ctrl->cntlid < 0) {
+		ret = ctrl->cntlid;
+		goto out_free_sqs;
+	}
+
+	list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
+	return ctrl;
+
+out_free_sqs:
+	kfree(ctrl->sqs);
+out_free_cqs:
+	kfree(ctrl->cqs);
+out_free_ctrl:
+	kfree(ctrl);
+out:
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(nvmet_alloc_ctrl);
+
+static void nvmet_ctrl_free(struct kref *ref)
+{
+	struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref);
+	struct nvmet_subsys *subsys = ctrl->subsys;
+
+	mutex_lock(&ctrl->subsys->lock);
+	list_del(&ctrl->subsys_entry);
+	mutex_unlock(&ctrl->subsys->lock);
+
+	mutex_lock(&subsys->lock);
+	ida_simple_remove(&subsys->cntlid_ida, ctrl->cntlid);
+	mutex_unlock(&subsys->lock);
+
+	kfree(ctrl->sqs);
+	kfree(ctrl->cqs);
+	kfree(ctrl);
+}
+
+void nvmet_ctrl_put(struct nvmet_ctrl *ctrl)
+{
+	kref_put(&ctrl->ref, nvmet_ctrl_free);
+}
+
+struct nvmet_subsys *nvmet_find_subsys(char *subsys_name)
+{
+	struct nvmet_subsys *subsys;
+
+	mutex_lock(&nvmet_subsystem_mutex);
+	list_for_each_entry(subsys, &nvmet_subsystems, entry) {
+		if (!strncmp(subsys->subsys_name, subsys_name,
+				NVMET_SUBSYS_NAME_LEN)) {
+			/* XXX: need to start refcounting subsystems.. */
+			mutex_unlock(&nvmet_subsystem_mutex);
+			return subsys;
+		}
+	}
+	mutex_unlock(&nvmet_subsystem_mutex);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(nvmet_find_subsys);
+
+struct nvmet_subsys *nvmet_subsys_alloc(const char *subsys_name)
+{
+	struct nvmet_subsys *subsys;
+
+	subsys = kzalloc(sizeof(*subsys), GFP_KERNEL);
+	if (!subsys)
+		return NULL;
+
+	subsys->subsys_name = kstrndup(subsys_name, NVMET_SUBSYS_NAME_LEN,
+			GFP_KERNEL);
+	if (IS_ERR(subsys->subsys_name)) {
+		kfree(subsys);
+		return NULL;
+	}
+
+	mutex_init(&subsys->lock);
+	INIT_LIST_HEAD(&subsys->namespaces);
+	INIT_LIST_HEAD(&subsys->ctrls);
+
+	ida_init(&subsys->cntlid_ida);
+	subsys->max_qid = NVMET_NR_QUEUES;
+
+	mutex_lock(&nvmet_subsystem_mutex);
+	list_add_tail(&subsys->entry, &nvmet_subsystems);
+	mutex_unlock(&nvmet_subsystem_mutex);
+
+	return subsys;
+}
+
+void nvmet_subsys_free(struct nvmet_subsys *subsys)
+{
+	WARN_ON_ONCE(!list_empty(&subsys->namespaces));
+
+	mutex_lock(&nvmet_subsystem_mutex);
+	list_del(&subsys->entry);
+	mutex_unlock(&nvmet_subsystem_mutex);
+
+	kfree(subsys->subsys_name);
+	kfree(subsys);
+}
+
+static int __init nvmet_init(void)
+{
+	return nvmet_init_configfs();
+}
+
+static void __exit nvmet_exit(void)
+{
+	nvmet_exit_configfs();
+}
+
+module_init(nvmet_init);
+module_exit(nvmet_exit);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c
new file mode 100644
index 0000000..2cf1811
--- /dev/null
+++ b/drivers/nvme/target/io-cmd.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2015 HGST, a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/blkdev.h>
+#include <linux/module.h>
+#include "nvmet.h"
+
+static void nvmet_bio_done(struct bio *bio)
+{
+	nvmet_req_complete(bio->bi_private,
+		bio->bi_error ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+	bio_put(bio);
+}
+
+static void nvmet_execute_rw(struct nvmet_req *req)
+{
+	int sg_cnt = req->sg_cnt;
+	struct scatterlist *sg;
+	struct bio *bio;
+	sector_t sector;
+	int rw, i;
+
+	if (!req->sg_cnt) {
+		nvmet_req_complete(req, 0);
+		return;
+	}
+
+	if (req->cmd->rw.opcode == nvme_cmd_write) {
+		if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
+			rw = WRITE_FUA;
+		else
+			rw = WRITE;
+	} else {
+		rw = READ;
+	}
+
+	sector = le64_to_cpu(req->cmd->rw.slba);
+	sector <<= (req->ns->blksize_shift - 9);
+
+	bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
+	bio->bi_bdev = req->ns->bdev;
+	bio->bi_iter.bi_sector = sector;
+	bio->bi_private = req;
+	bio->bi_end_io = nvmet_bio_done;
+
+	for_each_sg(req->sg, sg, req->sg_cnt, i) {
+		if (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
+				!= sg->length) {
+			struct bio *prev = bio;
+
+			bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
+			bio->bi_bdev = req->ns->bdev;
+			bio->bi_iter.bi_sector = sector;
+
+			bio_chain(bio, prev);
+			submit_bio(rw, prev);
+		}
+
+		sector += sg->length >> 9;
+		sg_cnt--;
+	}
+
+	submit_bio(rw, bio);
+}
+
+static void nvmet_execute_flush(struct nvmet_req *req)
+{
+	struct bio *bio = bio_alloc(GFP_KERNEL, 0);
+
+	bio->bi_bdev = req->ns->bdev;
+	bio->bi_private = req;
+	bio->bi_end_io = nvmet_bio_done;
+
+	submit_bio(WRITE_FLUSH, bio);
+}
+
+int nvmet_parse_io_cmd(struct nvmet_req *req)
+{
+	struct nvme_command *cmd = req->cmd;
+
+	req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid);
+	if (!req->ns)
+		return NVME_SC_INVALID_NS | NVME_SC_DNR;
+
+	switch (cmd->common.opcode) {
+	case nvme_cmd_read:
+		req->execute = nvmet_execute_rw;
+		req->data_len = ((u32)le16_to_cpu(cmd->rw.length) + 1) <<
+				req->ns->blksize_shift;
+		return 0;
+	case nvme_cmd_write:
+		req->execute = nvmet_execute_rw;
+		req->data_len = ((u32)le16_to_cpu(cmd->rw.length) + 1) <<
+				req->ns->blksize_shift;
+		return 0;
+	case nvme_cmd_flush:
+		req->execute = nvmet_execute_flush;
+		req->data_len = 0;
+		return 0;
+	default:
+		pr_err("nvmet: unhandled cmd %d\n", cmd->common.opcode);
+		return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+	}
+}
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
new file mode 100644
index 0000000..9335584
--- /dev/null
+++ b/drivers/nvme/target/nvmet.h
@@ -0,0 +1,172 @@
+#ifndef _NVME_CMD_H
+#define _NVME_CMD_H
+
+#include <linux/dma-mapping.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/nvme.h>
+#include <linux/configfs.h>
+#include <linux/rcupdate.h>
+
+struct nvmet_ns {
+	struct list_head	dev_link;
+	struct kref		ref;
+	struct block_device	*bdev;
+	u32			nsid;
+	u32			blksize_shift;
+	loff_t			size;
+
+	struct nvmet_subsys	*subsys;
+	const char		*device_path;
+
+	struct config_group	device_group;
+	struct config_group	default_groups[2];
+	struct config_group	group;
+	struct rcu_head		rcu;
+};
+
+static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item)
+{
+	return container_of(to_config_group(item), struct nvmet_ns, group);
+}
+
+struct nvmet_cq {
+	u16			qid;
+	u16			size;
+};
+
+struct nvmet_sq {
+	struct nvmet_ctrl	*ctrl;
+	u16			qid;
+	u16			size;
+};
+
+struct nvmet_ctrl {
+	struct nvmet_subsys	*subsys;
+	struct nvmet_cq		**cqs;
+	struct nvmet_sq		**sqs;
+
+	u64			cap;
+	u32			cc;
+	u32			csts;
+
+	u16			cntlid;
+
+	struct list_head	subsys_entry;
+	struct kref		ref;
+#define NVMET_SUBSYS_NAME_LEN		256
+	char			subsys_name[NVMET_SUBSYS_NAME_LEN];
+};
+
+struct nvmet_subsys {
+	struct mutex		lock;
+
+	struct list_head	namespaces;
+	unsigned int		max_nsid;
+
+	struct list_head	ctrls;
+	struct ida		cntlid_ida;
+
+	u16			max_qid;
+
+	u64			ver;
+	char			*subsys_name;
+
+	struct list_head	entry;
+	struct config_group	group;
+
+	struct config_group	namespaces_group;
+	struct config_group	controllers_group;
+	struct config_group	*default_groups[3];
+};
+
+static inline struct nvmet_subsys *to_subsys(struct config_item *item)
+{
+	return container_of(to_config_group(item), struct nvmet_subsys, group);
+}
+
+static inline struct nvmet_subsys *namespaces_to_subsys(
+		struct config_item *item)
+{
+	return container_of(to_config_group(item), struct nvmet_subsys,
+			namespaces_group);
+}
+
+struct nvmet_req {
+	struct nvme_command	*cmd;
+	struct nvme_completion	*rsp;
+	struct nvmet_sq		*sq;
+	struct nvmet_cq		*cq;
+	struct nvmet_ns		*ns;
+	struct scatterlist	*sg;
+	int			sg_cnt;
+	size_t			data_len;
+
+	void (*execute)(struct nvmet_req *req);
+	void (*queue_response)(struct nvmet_req *req);
+};
+
+static inline void nvmet_set_status(struct nvmet_req *req, u16 status)
+{
+	req->rsp->status = cpu_to_le16(status << 1);
+}
+
+static inline void nvmet_set_result(struct nvmet_req *req, u32 result)
+{
+	req->rsp->result = cpu_to_le32(result);
+}
+
+static inline bool nvmet_is_write(struct nvmet_req *req)
+{
+	return req->cmd->common.opcode & 1;
+}
+
+/*
+ * NVMe command writes actually are DMA reads for us on the target side.
+ */
+static inline enum dma_data_direction
+nvmet_data_dir(struct nvmet_req *cmd)
+{
+	return nvmet_is_write(cmd) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+}
+
+int nvmet_parse_io_cmd(struct nvmet_req *req);
+int nvmet_parse_admin_cmd(struct nvmet_req *req);
+
+u16 nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
+		struct nvmet_sq *sq,
+		void (*queue_response)(struct nvmet_req *req));
+void nvmet_req_complete(struct nvmet_req *req, u16 status);
+
+void nvmet_cq_init(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, u16 qid,
+		u16 size);
+void nvmet_sq_init(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, u16 qid,
+		u16 size);
+void nvmet_sq_destroy(struct nvmet_sq *sq);
+
+void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new);
+struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_subsys *subsys,
+		const char *subsys_name);
+struct nvmet_ctrl *nvmet_ctrl_find_get(struct nvmet_subsys *subsys, u16 cntlid);
+void nvmet_ctrl_put(struct nvmet_ctrl *ctrl);
+
+struct nvmet_subsys *nvmet_find_subsys(char *subsys_name);
+struct nvmet_subsys *nvmet_subsys_alloc(const char *subsys_name);
+void nvmet_subsys_free(struct nvmet_subsys *subsys);
+
+struct nvmet_ns *nvmet_find_namespace(struct nvmet_ctrl *ctrl, __le32 nsid);
+void nvmet_put_namespace(struct nvmet_ns *ns);
+int nvmet_ns_enable(struct nvmet_ns *ns, const char *path);
+struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid);
+void nvmet_ns_free(struct nvmet_ns *ns);
+
+#define NVMET_QUEUE_SIZE	1024
+#define NVMET_NR_QUEUES		64
+
+int __init nvmet_init_configfs(void);
+void __exit nvmet_exit_configfs(void);
+
+#endif /* _NVME_CMD_H */
-- 
1.9.1




More information about the Linux-nvme mailing list