[PATCH 07/13] libmultipath: Add delayed removal support
Nilay Shroff
nilay at linux.ibm.com
Mon Mar 2 04:41:45 PST 2026
On 2/25/26 9:02 PM, John Garry wrote:
> Add support for delayed removal, same as exists for NVMe.
>
> The purpose of this feature is to keep the multipath disk and cdev present
> for intermittent periods of no available path.
>
> Helpers mpath_delayed_removal_secs_show() and
> mpath_delayed_removal_secs_store() may be used in the driver sysfs code.
>
> The driver is responsible for supplying the removal work callback for
> the delayed work.
>
> Signed-off-by: John Garry <john.g.garry at oracle.com>
> ---
> include/linux/multipath.h | 17 +++++++++
> lib/multipath.c | 79 ++++++++++++++++++++++++++++++++++++++-
> 2 files changed, 95 insertions(+), 1 deletion(-)
>
> diff --git a/include/linux/multipath.h b/include/linux/multipath.h
> index 0dcfdd205237c..f7998de261899 100644
> --- a/include/linux/multipath.h
> +++ b/include/linux/multipath.h
> @@ -66,6 +66,7 @@ struct mpath_head_template {
> };
>
> #define MPATH_HEAD_DISK_LIVE 0
> +#define MPATH_HEAD_QUEUE_IF_NO_PATH 1
>
> struct mpath_head {
> struct srcu_struct srcu;
> @@ -81,6 +82,10 @@ struct mpath_head {
> struct cdev cdev;
> struct device cdev_device;
>
> + struct delayed_work remove_work;
> + unsigned int delayed_removal_secs;
> + struct module *drv_module;
> +
> unsigned long flags;
> struct mpath_device __rcu *current_path[MAX_NUMNODES];
> const struct mpath_head_template *mpdt;
> @@ -132,6 +137,7 @@ void mpath_put_head(struct mpath_head *mpath_head);
> void mpath_requeue_work(struct work_struct *work);
> struct mpath_head *mpath_alloc_head(void);
> void mpath_put_disk(struct mpath_disk *mpath_disk);
> +bool mpath_can_remove_head(struct mpath_head *mpath_head);
> void mpath_remove_disk(struct mpath_disk *mpath_disk);
> void mpath_unregister_disk(struct mpath_disk *mpath_disk);
> struct mpath_disk *mpath_alloc_head_disk(struct queue_limits *lim,
> @@ -139,6 +145,10 @@ struct mpath_disk *mpath_alloc_head_disk(struct queue_limits *lim,
> void mpath_device_set_live(struct mpath_disk *mpath_disk,
> struct mpath_device *mpath_device);
> void mpath_unregister_disk(struct mpath_disk *mpath_disk);
> +ssize_t mpath_delayed_removal_secs_show(struct mpath_head *mpath_head,
> + char *buf);
> +ssize_t mpath_delayed_removal_secs_store(struct mpath_head *mpath_head,
> + const char *buf, size_t count);
>
> static inline bool is_mpath_head(struct gendisk *disk)
> {
> @@ -150,4 +160,11 @@ static inline bool mpath_qd_iopolicy(struct mpath_iopolicy *mpath_iopolicy)
> return mpath_read_iopolicy(mpath_iopolicy) == MPATH_IOPOLICY_QD;
> }
>
> +static inline bool mpath_head_queue_if_no_path(struct mpath_head *mpath_head)
> +{
> + if (test_bit(MPATH_HEAD_QUEUE_IF_NO_PATH, &mpath_head->flags))
> + return true;
> + return false;
> +}
> +
> #endif // _LIBMULTIPATH_H
> diff --git a/lib/multipath.c b/lib/multipath.c
> index ce12d42918fdd..1ce57b9b14d2e 100644
> --- a/lib/multipath.c
> +++ b/lib/multipath.c
> @@ -52,6 +52,7 @@ void mpath_add_device(struct mpath_head *mpath_head,
> mutex_lock(&mpath_head->lock);
> list_add_tail_rcu(&mpath_device->siblings, &mpath_head->dev_list);
> mutex_unlock(&mpath_head->lock);
> + cancel_delayed_work(&mpath_head->remove_work);
> }
> EXPORT_SYMBOL_GPL(mpath_add_device);
>
> @@ -356,7 +357,17 @@ static bool mpath_available_path(struct mpath_head *mpath_head)
> return true;
> }
>
> - return false;
> + /*
> + * If "mpahead->delayed_removal_secs" is configured (i.e., non-zero), do
> + * not immediately fail I/O. Instead, requeue the I/O for the configured
> + * duration, anticipating that if there's a transient link failure then
> + * it may recover within this time window. This parameter is exported to
> + * userspace via sysfs, and its default value is zero. It is internally
> + * mapped to MPATH_HEAD_QUEUE_IF_NO_PATH. When delayed_removal_secs is
> + * non-zero, this flag is set to true. When zero, the flag is cleared.
> + */
> + return mpath_head_queue_if_no_path(mpath_head);
> +
> }
>
> static void mpath_bdev_submit_bio(struct bio *bio)
> @@ -614,6 +625,29 @@ static void mpath_head_del_cdev(struct mpath_head *mpath_head)
> mpath_head->mpdt->del_cdev(mpath_head);
> }
>
> +bool mpath_can_remove_head(struct mpath_head *mpath_head)
> +{
> + bool remove = false;
> +
> + mutex_lock(&mpath_head->lock);
> + /*
> + * Ensure that no one could remove this module while the head
> + * remove work is pending.
> + */
> + if (mpath_head_queue_if_no_path(mpath_head) &&
> + try_module_get(mpath_head->drv_module)) {
> +
> + mod_delayed_work(mpath_wq, &mpath_head->remove_work,
> + mpath_head->delayed_removal_secs * HZ);
> + } else {
> + remove = true;
> + }
> +
> + mutex_unlock(&mpath_head->lock);
> + return remove;
> +}
> +EXPORT_SYMBOL_GPL(mpath_can_remove_head);
> +
> void mpath_remove_disk(struct mpath_disk *mpath_disk)
> {
> struct mpath_head *mpath_head = mpath_disk->mpath_head;
> @@ -711,6 +745,47 @@ void mpath_device_set_live(struct mpath_disk *mpath_disk,
> }
> EXPORT_SYMBOL_GPL(mpath_device_set_live);
>
> +ssize_t mpath_delayed_removal_secs_show(struct mpath_head *mpath_head,
> + char *buf)
> +{
> + int ret;
> +
> + mutex_lock(&mpath_head->lock);
> + ret = sysfs_emit(buf, "%u\n", mpath_head->delayed_removal_secs);
> + mutex_unlock(&mpath_head->lock);
> +
> + return ret;
> +}
> +EXPORT_SYMBOL_GPL(mpath_delayed_removal_secs_show);
> +
> +ssize_t mpath_delayed_removal_secs_store(struct mpath_head *mpath_head,
> + const char *buf, size_t count)
> +{
> + ssize_t ret;
> + int sec;
> +
> + ret = kstrtouint(buf, 0, &sec);
> + if (ret < 0)
> + return ret;
> +
> + mutex_lock(&mpath_head->lock);
> + mpath_head->delayed_removal_secs = sec;
> + if (sec)
> + set_bit(MPATH_HEAD_QUEUE_IF_NO_PATH, &mpath_head->flags);
> + else
> + clear_bit(MPATH_HEAD_QUEUE_IF_NO_PATH, &mpath_head->flags);
> + mutex_unlock(&mpath_head->lock);
> +
> + /*
> + * Ensure that update to MPATH_HEAD_QUEUE_IF_NO_PATH is seen
> + * by its reader.
> + */
> + mpath_synchronize(mpath_head);
> +
> + return count;
> +}
> +EXPORT_SYMBOL_GPL(mpath_delayed_removal_secs_store);
> +
> void mpath_add_sysfs_link(struct mpath_disk *mpath_disk)
> {
> struct mpath_head *mpath_head = mpath_disk->mpath_head;
> @@ -793,6 +868,8 @@ struct mpath_head *mpath_alloc_head(void)
> mutex_init(&mpath_head->lock);
> kref_init(&mpath_head->ref);
>
> + mpath_head->delayed_removal_secs = 0;
> +
> INIT_WORK(&mpath_head->requeue_work, mpath_requeue_work);
> spin_lock_init(&mpath_head->requeue_lock);
> bio_list_init(&mpath_head->requeue_list);
I think we also need to initialize ->drv_module here.
Thanks,
--Nilay
More information about the Linux-nvme
mailing list