[PATCH V2 1/1] nvme: introduce generic per-namespace chardev

Minwoo Im minwoo.im.dev at gmail.com
Tue Apr 6 07:48:41 BST 2021


Userspace has not been allowed to I/O to device that's failed to
be initialized.  This patch introduces generic per-namespace character
device to allow userspace to I/O regardless the block device is there or
not.

The chardev naming convention will exactly be the same with the existing
blkdev's one.  nvme_set_generic_ns_name() introduced in this patch may
look like a duplication of nvme_set_disk_name(), but it's not allowing
controller path specific.  It just for per-namespace path specific.  So,
the naming will be:

	- /dev/nvme-generic-XnY

It also supports multipath which means it will not expose chardev for
the hidden namespace blkdevs (e.g., nvmeXcYnZ).  If
/dev/nvme-generic-XnY is created for the ns_head, then I/O request will
be routed to the controller-specified path by the iopolicy in the
subsystem.

This can be controlled by the module parameter `generic_ns` which is
turned on by default.

Signed-off-by: Minwoo Im <minwoo.im.dev at gmail.com>
Signed-off-by: Javier González <javier.gonz at samsung.com>
---
 drivers/nvme/host/core.c      | 172 +++++++++++++++++++++++++++++++++-
 drivers/nvme/host/multipath.c |  17 ++++
 drivers/nvme/host/nvme.h      |  25 +++++
 3 files changed, 212 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 83dc9403fbf4..a06634e7bad1 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -61,6 +61,10 @@ static bool streams;
 module_param(streams, bool, 0644);
 MODULE_PARM_DESC(streams, "turn on support for Streams write directives");
 
+static bool generic_ns = true;
+module_param(generic_ns, bool, 0644);
+MODULE_PARM_DESC(generic_ns, "support generic namespace character device");
+
 /*
  * nvme_wq - hosts nvme related works that are not reset or delete
  * nvme_reset_wq - hosts nvme reset works
@@ -85,8 +89,11 @@ static LIST_HEAD(nvme_subsystems);
 static DEFINE_MUTEX(nvme_subsystems_lock);
 
 static DEFINE_IDA(nvme_instance_ida);
+static DEFINE_IDA(nvme_generic_ns_minor_ida);
 static dev_t nvme_ctrl_base_chr_devt;
+static dev_t nvme_generic_ns_devt;
 static struct class *nvme_class;
+static struct class *nvme_generic_ns_class;
 static struct class *nvme_subsys_class;
 
 static void nvme_put_subsystem(struct nvme_subsystem *subsys);
@@ -542,6 +549,7 @@ static void nvme_free_ns_head(struct kref *ref)
 	struct nvme_ns_head *head =
 		container_of(ref, struct nvme_ns_head, ref);
 
+	cdev_device_del(&head->generic_ns->cdev, &head->generic_ns->device);
 	nvme_mpath_remove_disk(head);
 	ida_simple_remove(&head->subsys->ns_ida, head->instance);
 	cleanup_srcu_struct(&head->srcu);
@@ -3859,6 +3867,126 @@ static int __nvme_check_ids(struct nvme_subsystem *subsys,
 	return 0;
 }
 
+#ifdef CONFIG_NVME_MULTIPATH
+static int nvme_generic_ns_open(struct inode *inode, struct file *file)
+{
+	struct nvme_generic_ns *generic_ns = container_of(inode->i_cdev,
+			struct nvme_generic_ns, cdev);
+
+	if (!generic_ns->head->disk)
+		return -ENODEV;
+
+	file->private_data = generic_ns;
+	return nvme_ns_head_open(generic_ns->head->disk->part0, file->f_mode);
+}
+
+static int nvme_generic_ns_release(struct inode *inode, struct file *file)
+{
+	struct nvme_generic_ns *generic_ns = container_of(inode->i_cdev,
+			struct nvme_generic_ns, cdev);
+
+	nvme_ns_head_release(generic_ns->head->disk, file->f_mode);
+
+	return 0;
+}
+
+static long nvme_generic_ns_ioctl(struct file *file, unsigned int cmd,
+		unsigned long arg)
+{
+	struct nvme_generic_ns *generic_ns = file->private_data;
+
+	return nvme_ioctl(generic_ns->head->disk->part0, file->f_mode, cmd, arg);
+}
+#else
+static int nvme_generic_ns_open(struct inode *inode, struct file *file)
+{
+	struct nvme_generic_ns *generic_ns = container_of(inode->i_cdev,
+			struct nvme_generic_ns, cdev);
+
+	if (!generic_ns->ns)
+		return -ENODEV;
+
+	file->private_data = generic_ns;
+	return nvme_open(generic_ns->ns->disk->part0, file->f_mode);
+}
+
+static int nvme_generic_ns_release(struct inode *inode, struct file *file)
+{
+	struct nvme_generic_ns *generic_ns = container_of(inode->i_cdev,
+			struct nvme_generic_ns, cdev);
+
+	nvme_release(generic_ns->ns->disk, file->f_mode);
+
+	return 0;
+}
+
+static long nvme_generic_ns_ioctl(struct file *file, unsigned int cmd,
+		unsigned long arg)
+{
+	struct nvme_generic_ns *generic_ns = file->private_data;
+
+	return nvme_ioctl(generic_ns->ns->disk->part0, file->f_mode, cmd, arg);
+}
+#endif
+
+static const struct file_operations nvme_generic_ns_fops = {
+	.owner		= THIS_MODULE,
+	.open		= nvme_generic_ns_open,
+	.release	= nvme_generic_ns_release,
+	.unlocked_ioctl	= nvme_generic_ns_ioctl,
+	.compat_ioctl	= compat_ptr_ioctl,
+};
+
+static int nvme_alloc_generic_ns(struct nvme_ctrl *ctrl,
+		struct nvme_ns_head *head, struct nvme_ns *ns)
+{
+	struct nvme_generic_ns *generic_ns;
+	char name[DISK_NAME_LEN];
+	int minor;
+	int ret;
+
+	generic_ns = kzalloc_node(sizeof(*generic_ns), GFP_KERNEL,
+			ctrl->numa_node);
+	if (!generic_ns)
+		return -ENOMEM;
+
+	ret = ida_simple_get(&nvme_generic_ns_minor_ida, 0, 0, GFP_KERNEL);
+	if (ret < 0)
+		goto free_ns;
+	minor = ret;
+
+	device_initialize(&generic_ns->device);
+	generic_ns->device.devt = MKDEV(MAJOR(nvme_generic_ns_devt), minor);
+	generic_ns->device.class = nvme_generic_ns_class;
+	generic_ns->device.parent = ctrl->device;
+	dev_set_drvdata(&generic_ns->device, generic_ns);
+
+	nvme_set_generic_ns_name(name, head, ctrl);
+	ret = dev_set_name(&generic_ns->device, "%s", name);
+	if (ret)
+		goto put_ida;
+
+	cdev_init(&generic_ns->cdev, &nvme_generic_ns_fops);
+	generic_ns->cdev.owner = ctrl->ops->module;
+
+	ret = cdev_device_add(&generic_ns->cdev, &generic_ns->device);
+	if (ret)
+		goto free_kobj;
+
+	head->generic_ns = generic_ns;
+	generic_ns->head = head;
+	generic_ns->ns = ns;
+	return 0;
+
+free_kobj:
+	kfree_const(generic_ns->device.kobj.name);
+put_ida:
+	ida_simple_remove(&nvme_generic_ns_minor_ida, minor);
+free_ns:
+	kfree(generic_ns);
+	return ret;
+}
+
 static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
 		unsigned nsid, struct nvme_ns_ids *ids)
 {
@@ -3952,6 +4080,13 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
 		}
 	}
 
+	if (generic_ns && nvme_alloc_generic_ns(ctrl, head, ns)) {
+		dev_err(ctrl->device,
+			"Failed to initialize generic namespace %d\n",
+			nsid);
+		goto out_put_ns_head;
+	}
+
 	list_add_tail_rcu(&ns->siblings, &head->list);
 	ns->head = head;
 	mutex_unlock(&ctrl->subsys->lock);
@@ -4038,8 +4173,17 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
 	memcpy(disk->disk_name, disk_name, DISK_NAME_LEN);
 	ns->disk = disk;
 
-	if (nvme_update_ns_info(ns, id))
-		goto out_put_disk;
+	/*
+	 * If the namespace update fails in a graceful manner, hide the block
+	 * device, but still allow for the generic namespae device to be
+	 * craeted.
+	 */
+	if (nvme_update_ns_info(ns, id)) {
+		if (generic_ns)
+			ns->disk->flags |= GENHD_FL_HIDDEN;
+		else
+			goto out_put_disk;
+	}
 
 	if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) {
 		if (nvme_nvm_register(ns, disk_name, node)) {
@@ -4058,6 +4202,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
 
 	nvme_mpath_add_disk(ns, id);
 	nvme_fault_inject_init(&ns->fault_inject, ns->disk->disk_name);
+
 	kfree(id);
 
 	return;
@@ -4889,8 +5034,27 @@ static int __init nvme_core_init(void)
 		result = PTR_ERR(nvme_subsys_class);
 		goto destroy_class;
 	}
+
+	if (!generic_ns)
+		return 0;
+
+	result = alloc_chrdev_region(&nvme_generic_ns_devt, 0,
+			NVME_MINORS, "nvme-generic-ns");
+	if (result < 0)
+		goto destroy_subsys_class;
+
+	nvme_generic_ns_class = class_create(THIS_MODULE, "nvme-generic-ns");
+	if (IS_ERR(nvme_generic_ns_class)) {
+		result = PTR_ERR(nvme_subsys_class);
+		goto unregister_generic_ns;
+	}
+
 	return 0;
 
+unregister_generic_ns:
+	unregister_chrdev_region(nvme_generic_ns_devt, NVME_MINORS);
+destroy_subsys_class:
+	class_destroy(nvme_subsys_class);
 destroy_class:
 	class_destroy(nvme_class);
 unregister_chrdev:
@@ -4907,12 +5071,16 @@ static int __init nvme_core_init(void)
 
 static void __exit nvme_core_exit(void)
 {
+	if (generic_ns)
+		class_destroy(nvme_generic_ns_class);
 	class_destroy(nvme_subsys_class);
 	class_destroy(nvme_class);
+	unregister_chrdev_region(nvme_generic_ns_devt, NVME_MINORS);
 	unregister_chrdev_region(nvme_ctrl_base_chr_devt, NVME_MINORS);
 	destroy_workqueue(nvme_delete_wq);
 	destroy_workqueue(nvme_reset_wq);
 	destroy_workqueue(nvme_wq);
+	ida_destroy(&nvme_generic_ns_minor_ida);
 	ida_destroy(&nvme_instance_ida);
 }
 
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index f2d0ce0f4d38..6dad52871e93 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -65,6 +65,23 @@ void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
 	}
 }
 
+/*
+ * It sets generic namespace chardev name based on the given namespace head.
+ * The naming convention follows the existing namespace disk name from
+ * nvme_set_disk_name(), but not allowing controller path specific.
+ */
+void nvme_set_generic_ns_name(char *dev_name, struct nvme_ns_head *head,
+			      struct nvme_ctrl *ctrl)
+{
+	if (!multipath) {
+		sprintf(dev_name, "nvme-generic-%dn%d",
+				ctrl->instance, head->instance);
+	} else {
+		sprintf(dev_name, "nvme-generic-%dn%d",
+				ctrl->subsys->instance, head->instance);
+	}
+}
+
 void nvme_failover_req(struct request *req)
 {
 	struct nvme_ns *ns = req->q->queuedata;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 815c032a190e..f6cc47316d0f 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -413,6 +413,8 @@ struct nvme_ns_head {
 	bool			shared;
 	int			instance;
 	struct nvme_effects_log *effects;
+	struct nvme_generic_ns	*generic_ns;
+
 #ifdef CONFIG_NVME_MULTIPATH
 	struct gendisk		*disk;
 	struct bio_list		requeue_list;
@@ -464,6 +466,20 @@ struct nvme_ns {
 
 };
 
+/*
+ * Generic character device for a namespace.  This device is to support I/O to
+ * namespace even it's failed to initialize due to some reasons (e.g.,
+ * unsupported protection information, etc).
+ */
+struct nvme_generic_ns {
+	struct device		device;
+	struct cdev		cdev;
+	struct nvme_ns_head	*head;
+
+	/* targted namespace instance.  only valid in non-multipath */
+	struct nvme_ns		*ns;
+};
+
 /* NVMe ns supports metadata actions by the controller (generate/strip) */
 static inline bool nvme_ns_has_pi(struct nvme_ns *ns)
 {
@@ -665,6 +681,8 @@ void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys);
 void nvme_mpath_start_freeze(struct nvme_subsystem *subsys);
 void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
 			struct nvme_ctrl *ctrl, int *flags);
+void nvme_set_generic_ns_name(char *dev_name, struct nvme_ns_head *head,
+			      struct nvme_ctrl *ctrl);
 void nvme_failover_req(struct request *req);
 void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
 int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
@@ -713,6 +731,13 @@ static inline void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
 	sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance);
 }
 
+static inline void nvme_set_generic_ns_name(char *dev_name,
+		struct nvme_ns_head *head, struct nvme_ctrl *ctrl)
+{
+	sprintf(dev_name, "nvme-generic-%dn%d", ctrl->instance,
+			head->instance);
+}
+
 static inline void nvme_failover_req(struct request *req)
 {
 }
-- 
2.27.0




More information about the Linux-nvme mailing list