[PATCH v2 06/13] libmultipath: Add cdev support
John Garry
john.g.garry at oracle.com
Tue Apr 28 04:10:58 PDT 2026
Add support to create a cdev multipath device. The functionality is much
the same as NVMe, where the cdev is created when a mpath device is set
live.
The driver must provide a mpath_head_template.cdev_ioctl callback to
actually handle the ioctl.
Structure mpath_generic_chr_fops would be used for setting the cdev fops in
the mpath_head_template.add_cdev callback.
NVMe cdev iotcl handler has special handling for NVMe controller commands.
In this case, the SRCU read lock is dropped before executing the ioctl.
For reference, see nvme_ns_head_ctrl_ioctl(). This makes having the SRCU
lock when calling not always possible. To handle this scenario, add template
callbacks .ioctl_begin and .ioctl_finish to be called around the before and
after the ioctl callback - if the .ioctl_begin returns data then we know
to drop the SRCU lock before calling the ioctl callback, and then later
call .ioctl_finish callback with that same data. For NVMe using
libmultipath, we would take a reference to the controller structure and
pass a pointer to the controller structure back in .ioctl_begin callback
and use that same data in the .ioctl_finish callback to put the reference
to the controller.
Signed-off-by: John Garry <john.g.garry at oracle.com>
---
include/linux/multipath.h | 18 ++++++
lib/multipath.c | 129 ++++++++++++++++++++++++++++++++++++++
2 files changed, 147 insertions(+)
diff --git a/include/linux/multipath.h b/include/linux/multipath.h
index 72186ab220083..3ac77c089a58c 100644
--- a/include/linux/multipath.h
+++ b/include/linux/multipath.h
@@ -4,8 +4,11 @@
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
+#include <linux/cdev.h>
#include <linux/srcu.h>
+#include <linux/io_uring/cmd.h>
+extern const struct file_operations mpath_chr_fops;
extern const struct block_device_operations mpath_ops;
enum mpath_iopolicy_e {
@@ -37,12 +40,24 @@ struct mpath_device {
struct mpath_head_template {
bool (*available_path)(struct mpath_device *);
+ int (*add_cdev)(struct mpath_head *);
+ void (*del_cdev)(struct mpath_head *);
bool (*is_disabled)(struct mpath_device *);
bool (*is_optimized)(struct mpath_device *);
int (*get_nr_active)(struct mpath_device *);
+ long (*cdev_ioctl)(struct mpath_device *, unsigned int cmd,
+ unsigned long arg, bool open_for_write);
+ int (*chr_uring_cmd)(struct mpath_device *,
+ struct io_uring_cmd *ioucmd,
+ unsigned int issue_flags);
+ int (*chr_uring_cmd_iopoll)(struct io_uring_cmd *ioucmd,
+ struct io_comp_batch *iob,
+ unsigned int poll_flags);
enum mpath_iopolicy_e (*get_iopolicy)(struct mpath_head *);
struct bio *(*clone_bio)(struct bio *);
const struct attribute_group **device_groups;
+ void (*ioctl_begin)(struct mpath_device *, unsigned int cmd, void **);
+ void (*ioctl_finish)(void *opaque);
};
#define MPATH_HEAD_DISK_LIVE 0
@@ -58,6 +73,9 @@ struct mpath_head {
spinlock_t requeue_lock;
struct work_struct requeue_work; /* work struct for requeue */
+ struct cdev cdev;
+ struct device cdev_device;
+
void *drvdata;
unsigned long flags;
struct gendisk *disk;
diff --git a/lib/multipath.c b/lib/multipath.c
index 1232e057199ae..69e48ca3169c2 100644
--- a/lib/multipath.c
+++ b/lib/multipath.c
@@ -462,6 +462,122 @@ const struct block_device_operations mpath_ops = {
};
EXPORT_SYMBOL_GPL(mpath_ops);
+static int mpath_chr_open(struct inode *inode, struct file *file)
+{
+ struct cdev *cdev = file_inode(file)->i_cdev;
+ struct mpath_head *mpath_head =
+ container_of(cdev, struct mpath_head, cdev);
+
+ return mpath_get_head(mpath_head);
+}
+
+static int mpath_chr_release(struct inode *inode, struct file *file)
+{
+ struct cdev *cdev = file_inode(file)->i_cdev;
+ struct mpath_head *mpath_head =
+ container_of(cdev, struct mpath_head, cdev);
+
+ mpath_put_head(mpath_head);
+ return 0;
+}
+
+static long mpath_chr_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ struct cdev *cdev = file_inode(file)->i_cdev;
+ struct mpath_head *mpath_head =
+ container_of(cdev, struct mpath_head, cdev);
+ struct mpath_device *mpath_device;
+ int srcu_idx, err = -EWOULDBLOCK;
+ void *unlocked_ioctl_data = NULL;
+
+ srcu_idx = srcu_read_lock(&mpath_head->srcu);
+ mpath_device = mpath_find_path(mpath_head);
+ if (!mpath_device)
+ goto out_unlock;
+ if (mpath_head->mpdt->ioctl_begin)
+ mpath_head->mpdt->ioctl_begin(mpath_device, cmd,
+ &unlocked_ioctl_data);
+ if (unlocked_ioctl_data)
+ srcu_read_unlock(&mpath_head->srcu, srcu_idx);
+ err = mpath_head->mpdt->cdev_ioctl(mpath_device, cmd, arg,
+ file->f_mode & FMODE_WRITE);
+ if (unlocked_ioctl_data) {
+ mpath_head->mpdt->ioctl_finish(unlocked_ioctl_data);
+ return err;
+ }
+
+out_unlock:
+ srcu_read_unlock(&mpath_head->srcu, srcu_idx);
+ return err;
+}
+
+static int mpath_chr_uring_cmd(struct io_uring_cmd *ioucmd,
+ unsigned int issue_flags)
+{
+ struct cdev *cdev = file_inode(ioucmd->file)->i_cdev;
+ struct mpath_head *mpath_head =
+ container_of(cdev, struct mpath_head, cdev);
+ struct mpath_device *mpath_device;
+ /* error code copied from nvme_ns_head_chr_uring_cmd */
+ int srcu_idx, ret = -EINVAL;
+
+ srcu_idx = srcu_read_lock(&mpath_head->srcu);
+ mpath_device = mpath_find_path(mpath_head);
+
+ if (!mpath_device)
+ goto out_unlock;
+
+ if (!mpath_head->mpdt->chr_uring_cmd) {
+ ret = -EOPNOTSUPP;
+ goto out_unlock;
+ }
+
+ ret = mpath_head->mpdt->chr_uring_cmd(mpath_device, ioucmd,
+ issue_flags);
+out_unlock:
+ srcu_read_unlock(&mpath_head->srcu, srcu_idx);
+ return ret;
+}
+
+static int mpath_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
+ struct io_comp_batch *iob,
+ unsigned int poll_flags)
+{
+ struct cdev *cdev = file_inode(ioucmd->file)->i_cdev;
+ struct mpath_head *mpath_head =
+ container_of(cdev, struct mpath_head, cdev);
+
+ if (!mpath_head->mpdt->chr_uring_cmd_iopoll)
+ return -EOPNOTSUPP;
+
+ return mpath_head->mpdt->chr_uring_cmd_iopoll(ioucmd, iob, poll_flags);
+}
+
+const struct file_operations mpath_chr_fops = {
+ .owner = THIS_MODULE,
+ .open = mpath_chr_open,
+ .release = mpath_chr_release,
+ .unlocked_ioctl = mpath_chr_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
+ .uring_cmd = mpath_chr_uring_cmd,
+ .uring_cmd_iopoll = mpath_chr_uring_cmd_iopoll,
+};
+EXPORT_SYMBOL_GPL(mpath_chr_fops);
+
+static int mpath_head_add_cdev(struct mpath_head *mpath_head)
+{
+ if (mpath_head->mpdt->add_cdev)
+ return mpath_head->mpdt->add_cdev(mpath_head);
+ return 0;
+}
+
+static void mpath_head_del_cdev(struct mpath_head *mpath_head)
+{
+ if (mpath_head->mpdt->del_cdev)
+ mpath_head->mpdt->del_cdev(mpath_head);
+}
+
static void multipath_partition_scan_work(struct work_struct *work)
{
struct mpath_head *mpath_head =
@@ -504,6 +620,7 @@ void mpath_remove_disk(struct mpath_head *mpath_head)
*/
mpath_schedule_requeue_work(mpath_head);
+ mpath_head_del_cdev(mpath_head);
mpath_synchronize(mpath_head);
del_gendisk(disk);
}
@@ -526,6 +643,16 @@ EXPORT_SYMBOL_GPL(mpath_put_disk);
int mpath_alloc_head_disk(struct mpath_head *mpath_head,
struct queue_limits *lim, int numa_node)
{
+ /* Do limited sanity checks on template */
+ if (!mpath_head->mpdt->ioctl_begin ^ !mpath_head->mpdt->ioctl_finish)
+ return -EINVAL;
+
+ if (!mpath_head->mpdt->add_cdev ^ !mpath_head->mpdt->del_cdev)
+ return -EINVAL;
+
+ if (!mpath_head->mpdt->add_cdev ^ !mpath_head->mpdt->cdev_ioctl)
+ return -EINVAL;
+
mpath_head->disk = blk_alloc_disk(lim, numa_node);
if (IS_ERR(mpath_head->disk))
return PTR_ERR(mpath_head->disk);
@@ -555,6 +682,8 @@ void mpath_device_set_live(struct mpath_device *mpath_device)
clear_bit(MPATH_HEAD_DISK_LIVE, &mpath_head->flags);
return;
}
+
+ mpath_head_add_cdev(mpath_head);
queue_work(mpath_wq, &mpath_head->partition_scan_work);
}
--
2.43.5
More information about the Linux-nvme
mailing list