[PATCH V3 1/1] nvme: introduce generic per-namespace chardev

Keith Busch kbusch at kernel.org
Wed Apr 21 18:02:29 BST 2021


On Wed, Apr 21, 2021 at 04:45:04PM +0900, Minwoo Im wrote:
> Userspace has not been allowed to I/O to device that's failed to
> be initialized.  This patch introduces generic per-namespace character
> device to allow userspace to I/O regardless the block device is there or
> not.
> 
> The chardev naming convention will similar to the existing blkdev naming,
> using a ng prefix instead of nvme, i.e.
> 
> 	- /dev/ngXnY
> 
> It also supports multipath which means it will not expose chardev for the
> hidden namespace blkdevs (e.g., nvmeXcYnZ).  If /dev/ngXnY is created for
> a ns_head, then I/O request will be routed to a specific controller
> selected by the iopolicy of the subsystem.

Looks good.

Reviewed-by: Keith Busch <kbusch at kernel.org>

> Signed-off-by: Minwoo Im <minwoo.im.dev at gmail.com>
> Signed-off-by: Javier González <javier.gonz at samsung.com>
> [hch: rebased on top of the ioctl cleanups, split the ns_head and ns
>  implementations]
> Signed-off-by: Christoph Hellwig <hch at lst.de>
> ---
>  drivers/nvme/host/core.c      | 87 +++++++++++++++++++++++++++++++++++
>  drivers/nvme/host/ioctl.c     | 41 ++++++++++++++---
>  drivers/nvme/host/multipath.c | 51 ++++++++++++++++++--
>  drivers/nvme/host/nvme.h      | 13 ++++++
>  4 files changed, 182 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index b905f91f14eb..2f45e8fcdd7c 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -89,6 +89,10 @@ static dev_t nvme_ctrl_base_chr_devt;
>  static struct class *nvme_class;
>  static struct class *nvme_subsys_class;
>  
> +static DEFINE_IDA(nvme_ns_chr_minor_ida);
> +static dev_t nvme_ns_chr_devt;
> +static struct class *nvme_ns_chr_class;
> +
>  static void nvme_put_subsystem(struct nvme_subsystem *subsys);
>  static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
>  					   unsigned nsid);
> @@ -3429,6 +3433,66 @@ static int __nvme_check_ids(struct nvme_subsystem *subsys,
>  	return 0;
>  }
>  
> +void nvme_cdev_del(struct cdev *cdev, struct device *cdev_device)
> +{
> +	cdev_device_del(cdev, cdev_device);
> +	ida_simple_remove(&nvme_ns_chr_minor_ida, MINOR(cdev_device->devt));
> +}
> +
> +int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device,
> +		const struct file_operations *fops, struct module *owner)
> +{
> +	int minor, ret;
> +
> +	minor = ida_simple_get(&nvme_ns_chr_minor_ida, 0, 0, GFP_KERNEL);
> +	if (minor < 0)
> +		return minor;
> +	cdev_device->devt = MKDEV(MAJOR(nvme_ns_chr_devt), minor);
> +	cdev_device->class = nvme_ns_chr_class;
> +	device_initialize(cdev_device);
> +	cdev_init(cdev, fops);
> +	cdev->owner = owner;
> +	ret = cdev_device_add(cdev, cdev_device);
> +	if (ret)
> +		ida_simple_remove(&nvme_ns_chr_minor_ida, minor);
> +	return ret;
> +}
> +
> +static int nvme_ns_chr_open(struct inode *inode, struct file *file)
> +{
> +	return nvme_ns_open(container_of(inode->i_cdev, struct nvme_ns, cdev));
> +}
> +
> +static int nvme_ns_chr_release(struct inode *inode, struct file *file)
> +{
> +	nvme_ns_release(container_of(inode->i_cdev, struct nvme_ns, cdev));
> +	return 0;
> +}
> +
> +static const struct file_operations nvme_ns_chr_fops = {
> +	.owner		= THIS_MODULE,
> +	.open		= nvme_ns_chr_open,
> +	.release	= nvme_ns_chr_release,
> +	.unlocked_ioctl	= nvme_ns_chr_ioctl,
> +	.compat_ioctl	= compat_ptr_ioctl,
> +};
> +
> +static int nvme_add_ns_cdev(struct nvme_ns *ns)
> +{
> +	int ret;
> +
> +	ns->cdev_device.parent = ns->ctrl->device;
> +	ret = dev_set_name(&ns->cdev_device, "ng%dn%d",
> +			   ns->ctrl->instance, ns->head->instance);
> +	if (ret)
> +		return ret;
> +	ret = nvme_cdev_add(&ns->cdev, &ns->cdev_device, &nvme_ns_chr_fops,
> +			    ns->ctrl->ops->module);
> +	if (ret)
> +		kfree_const(ns->cdev_device.kobj.name);
> +	return ret;
> +}
> +
>  static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
>  		unsigned nsid, struct nvme_ns_ids *ids)
>  {
> @@ -3630,6 +3694,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
>  	nvme_get_ctrl(ctrl);
>  
>  	device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups);
> +	if (!nvme_ns_head_multipath(ns->head))
> +		nvme_add_ns_cdev(ns);
>  
>  	nvme_mpath_add_disk(ns, id);
>  	nvme_fault_inject_init(&ns->fault_inject, ns->disk->disk_name);
> @@ -3674,6 +3740,8 @@ static void nvme_ns_remove(struct nvme_ns *ns)
>  	synchronize_srcu(&ns->head->srcu); /* wait for concurrent submissions */
>  
>  	if (ns->disk->flags & GENHD_FL_UP) {
> +		if (!nvme_ns_head_multipath(ns->head))
> +			nvme_cdev_del(&ns->cdev, &ns->cdev_device);
>  		del_gendisk(ns->disk);
>  		blk_cleanup_queue(ns->queue);
>  		if (blk_get_integrity(ns->disk))
> @@ -4464,8 +4532,24 @@ static int __init nvme_core_init(void)
>  		result = PTR_ERR(nvme_subsys_class);
>  		goto destroy_class;
>  	}
> +
> +	result = alloc_chrdev_region(&nvme_ns_chr_devt, 0, NVME_MINORS,
> +				     "nvme-generic");
> +	if (result < 0)
> +		goto destroy_subsys_class;
> +
> +	nvme_ns_chr_class = class_create(THIS_MODULE, "nvme-generic");
> +	if (IS_ERR(nvme_ns_chr_class)) {
> +		result = PTR_ERR(nvme_ns_chr_class);
> +		goto unregister_generic_ns;
> +	}
> +
>  	return 0;
>  
> +unregister_generic_ns:
> +	unregister_chrdev_region(nvme_ns_chr_devt, NVME_MINORS);
> +destroy_subsys_class:
> +	class_destroy(nvme_subsys_class);
>  destroy_class:
>  	class_destroy(nvme_class);
>  unregister_chrdev:
> @@ -4482,12 +4566,15 @@ static int __init nvme_core_init(void)
>  
>  static void __exit nvme_core_exit(void)
>  {
> +	class_destroy(nvme_ns_chr_class);
>  	class_destroy(nvme_subsys_class);
>  	class_destroy(nvme_class);
> +	unregister_chrdev_region(nvme_ns_chr_devt, NVME_MINORS);
>  	unregister_chrdev_region(nvme_ctrl_base_chr_devt, NVME_MINORS);
>  	destroy_workqueue(nvme_delete_wq);
>  	destroy_workqueue(nvme_reset_wq);
>  	destroy_workqueue(nvme_wq);
> +	ida_destroy(&nvme_ns_chr_minor_ida);
>  	ida_destroy(&nvme_instance_ida);
>  }
>  
> diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
> index 8e05d65c9e93..8f27c2f16f24 100644
> --- a/drivers/nvme/host/ioctl.c
> +++ b/drivers/nvme/host/ioctl.c
> @@ -346,15 +346,27 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd,
>  	}
>  }
>  
> +static int __nvme_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *arg)
> +{
> +       if (is_ctrl_ioctl(cmd))
> +               return nvme_ctrl_ioctl(ns->ctrl, cmd, arg);
> +       return nvme_ns_ioctl(ns, cmd, arg);
> +}
> +
>  int nvme_ioctl(struct block_device *bdev, fmode_t mode,
>  		unsigned int cmd, unsigned long arg)
>  {
>  	struct nvme_ns *ns = bdev->bd_disk->private_data;
> -	void __user *argp = (void __user *)arg;
>  
> -	if (is_ctrl_ioctl(cmd))
> -		return nvme_ctrl_ioctl(ns->ctrl, cmd, argp);
> -	return nvme_ns_ioctl(ns, cmd, argp);
> +	return __nvme_ioctl(ns, cmd, (void __user *)arg);
> +}
> +
> +long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
> +{
> +	struct nvme_ns *ns =
> +		container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev);
> +
> +	return __nvme_ioctl(ns, cmd, (void __user *)arg);
>  }
>  
>  #ifdef CONFIG_NVME_MULTIPATH
> @@ -384,14 +396,29 @@ static int nvme_ns_head_ns_ioctl(struct nvme_ns_head *head,
>  	return ret;
>  }
>  
> +static int __nvme_ns_head_ioctl(struct nvme_ns_head *head, unsigned int cmd,
> +               void __user *arg)
> +{
> +       if (is_ctrl_ioctl(cmd))
> +               return nvme_ns_head_ctrl_ioctl(head, cmd, arg);
> +       return nvme_ns_head_ns_ioctl(head, cmd, arg);
> +}
> +
>  int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode,
>  		unsigned int cmd, unsigned long arg)
>  {
>  	struct nvme_ns_head *head = bdev->bd_disk->private_data;
>  
> -	if (is_ctrl_ioctl(cmd))
> -		return nvme_ns_head_ctrl_ioctl(head, cmd, (void __user *)arg);
> -	return nvme_ns_head_ns_ioctl(head, cmd, (void __user *)arg);
> +	return __nvme_ns_head_ioctl(head, cmd, (void __user *)arg);
> +}
> +
> +long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
> +		unsigned long arg)
> +{
> +	struct cdev *cdev = file_inode(file)->i_cdev;
> +	struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev);
> +
> +	return __nvme_ns_head_ioctl(head, cmd, (void __user *)arg);
>  }
>  #endif /* CONFIG_NVME_MULTIPATH */
>  
> diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
> index 68918ea1d3d0..0d0de3433f37 100644
> --- a/drivers/nvme/host/multipath.c
> +++ b/drivers/nvme/host/multipath.c
> @@ -357,6 +357,48 @@ const struct block_device_operations nvme_ns_head_ops = {
>  	.pr_ops		= &nvme_pr_ops,
>  };
>  
> +static inline struct nvme_ns_head *cdev_to_ns_head(struct cdev *cdev)
> +{
> +	return container_of(cdev, struct nvme_ns_head, cdev);
> +}
> +
> +static int nvme_ns_head_chr_open(struct inode *inode, struct file *file)
> +{
> +	if (!nvme_tryget_ns_head(cdev_to_ns_head(inode->i_cdev)))
> +		return -ENXIO;
> +	return 0;
> +}
> +
> +static int nvme_ns_head_chr_release(struct inode *inode, struct file *file)
> +{
> +	nvme_put_ns_head(cdev_to_ns_head(inode->i_cdev));
> +	return 0;
> +}
> +
> +static const struct file_operations nvme_ns_head_chr_fops = {
> +	.owner		= THIS_MODULE,
> +	.open		= nvme_ns_head_chr_open,
> +	.release	= nvme_ns_head_chr_release,
> +	.unlocked_ioctl	= nvme_ns_head_chr_ioctl,
> +	.compat_ioctl	= compat_ptr_ioctl,
> +};
> +
> +static int nvme_add_ns_head_cdev(struct nvme_ns_head *head)
> +{
> +	int ret;
> +
> +	head->cdev_device.parent = &head->subsys->dev;
> +	ret = dev_set_name(&head->cdev_device, "ng%dn%d",
> +			   head->subsys->instance, head->instance);
> +	if (ret)
> +		return ret;
> +	ret = nvme_cdev_add(&head->cdev, &head->cdev_device,
> +			    &nvme_ns_head_chr_fops, THIS_MODULE);
> +	if (ret)
> +		kfree_const(head->cdev_device.kobj.name);
> +	return ret;
> +}
> +
>  static void nvme_requeue_work(struct work_struct *work)
>  {
>  	struct nvme_ns_head *head =
> @@ -435,9 +477,11 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
>  	if (!head->disk)
>  		return;
>  
> -	if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags))
> +	if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
>  		device_add_disk(&head->subsys->dev, head->disk,
>  				nvme_ns_id_attr_groups);
> +		nvme_add_ns_head_cdev(head);
> +	}
>  
>  	mutex_lock(&head->lock);
>  	if (nvme_path_is_optimized(ns)) {
> @@ -714,8 +758,10 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
>  {
>  	if (!head->disk)
>  		return;
> -	if (head->disk->flags & GENHD_FL_UP)
> +	if (head->disk->flags & GENHD_FL_UP) {
> +		nvme_cdev_del(&head->cdev, &head->cdev_device);
>  		del_gendisk(head->disk);
> +	}
>  	blk_set_queue_dying(head->disk->queue);
>  	/* make sure all pending bios are cleaned up */
>  	kblockd_schedule_work(&head->requeue_work);
> @@ -785,4 +831,3 @@ void nvme_mpath_uninit(struct nvme_ctrl *ctrl)
>  	kfree(ctrl->ana_log_buf);
>  	ctrl->ana_log_buf = NULL;
>  }
> -
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index 49276186d5bd..773dde5b231d 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -412,6 +412,10 @@ struct nvme_ns_head {
>  	bool			shared;
>  	int			instance;
>  	struct nvme_effects_log *effects;
> +
> +	struct cdev		cdev;
> +	struct device		cdev_device;
> +
>  	struct gendisk		*disk;
>  #ifdef CONFIG_NVME_MULTIPATH
>  	struct bio_list		requeue_list;
> @@ -464,6 +468,9 @@ struct nvme_ns {
>  #define NVME_NS_ANA_PENDING	2
>  #define NVME_NS_FORCE_RO	3
>  
> +	struct cdev		cdev;
> +	struct device		cdev_device;
> +
>  	struct nvme_fault_inject fault_inject;
>  
>  };
> @@ -658,10 +665,16 @@ void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx);
>  bool nvme_tryget_ns_head(struct nvme_ns_head *head);
>  void nvme_put_ns_head(struct nvme_ns_head *head);
>  struct nvme_ctrl *nvme_find_get_live_ctrl(struct nvme_subsystem *subsys);
> +int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device,
> +		const struct file_operations *fops, struct module *owner);
> +void nvme_cdev_del(struct cdev *cdev, struct device *cdev_device);
>  int nvme_ioctl(struct block_device *bdev, fmode_t mode,
>  		unsigned int cmd, unsigned long arg);
> +long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
>  int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode,
>  		unsigned int cmd, unsigned long arg);
> +long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
> +		unsigned long arg);
>  long nvme_dev_ioctl(struct file *file, unsigned int cmd,
>  		unsigned long arg);
>  int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo);
> -- 
> 2.27.0
> 



More information about the Linux-nvme mailing list