[PATCH 07/13] libmultipath: Add delayed removal support

Nilay Shroff nilay at linux.ibm.com
Mon Mar 2 04:41:45 PST 2026


On 2/25/26 9:02 PM, John Garry wrote:
> Add support for delayed removal, same as exists for NVMe.
> 
> The purpose of this feature is to keep the multipath disk and cdev present
> for intermittent periods of no available path.
> 
> Helpers mpath_delayed_removal_secs_show() and
> mpath_delayed_removal_secs_store() may be used in the driver sysfs code.
> 
> The driver is responsible for supplying the removal work callback for
> the delayed work.
> 
> Signed-off-by: John Garry <john.g.garry at oracle.com>
> ---
>   include/linux/multipath.h | 17 +++++++++
>   lib/multipath.c           | 79 ++++++++++++++++++++++++++++++++++++++-
>   2 files changed, 95 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/multipath.h b/include/linux/multipath.h
> index 0dcfdd205237c..f7998de261899 100644
> --- a/include/linux/multipath.h
> +++ b/include/linux/multipath.h
> @@ -66,6 +66,7 @@ struct mpath_head_template {
>   };
>   
>   #define MPATH_HEAD_DISK_LIVE 			0
> +#define MPATH_HEAD_QUEUE_IF_NO_PATH		1
>   
>   struct mpath_head {
>   	struct srcu_struct	srcu;
> @@ -81,6 +82,10 @@ struct mpath_head {
>   	struct cdev		cdev;
>   	struct device		cdev_device;
>   
> +	struct delayed_work	remove_work;
> +	unsigned int		delayed_removal_secs;
> +	struct module		*drv_module;
> +
>   	unsigned long		flags;
>   	struct mpath_device __rcu 		*current_path[MAX_NUMNODES];
>   	const struct mpath_head_template	*mpdt;
> @@ -132,6 +137,7 @@ void mpath_put_head(struct mpath_head *mpath_head);
>   void mpath_requeue_work(struct work_struct *work);
>   struct mpath_head *mpath_alloc_head(void);
>   void mpath_put_disk(struct mpath_disk *mpath_disk);
> +bool mpath_can_remove_head(struct mpath_head *mpath_head);
>   void mpath_remove_disk(struct mpath_disk *mpath_disk);
>   void mpath_unregister_disk(struct mpath_disk *mpath_disk);
>   struct mpath_disk *mpath_alloc_head_disk(struct queue_limits *lim,
> @@ -139,6 +145,10 @@ struct mpath_disk *mpath_alloc_head_disk(struct queue_limits *lim,
>   void mpath_device_set_live(struct mpath_disk *mpath_disk,
>   			struct mpath_device *mpath_device);
>   void mpath_unregister_disk(struct mpath_disk *mpath_disk);
> +ssize_t mpath_delayed_removal_secs_show(struct mpath_head *mpath_head,
> +			char *buf);
> +ssize_t mpath_delayed_removal_secs_store(struct mpath_head *mpath_head,
> +			const char *buf, size_t count);
>   
>   static inline bool is_mpath_head(struct gendisk *disk)
>   {
> @@ -150,4 +160,11 @@ static inline bool mpath_qd_iopolicy(struct mpath_iopolicy *mpath_iopolicy)
>   	return mpath_read_iopolicy(mpath_iopolicy) == MPATH_IOPOLICY_QD;
>   }
>   
> +static inline bool mpath_head_queue_if_no_path(struct mpath_head *mpath_head)
> +{
> +	if (test_bit(MPATH_HEAD_QUEUE_IF_NO_PATH, &mpath_head->flags))
> +		return true;
> +	return false;
> +}
> +
>   #endif // _LIBMULTIPATH_H
> diff --git a/lib/multipath.c b/lib/multipath.c
> index ce12d42918fdd..1ce57b9b14d2e 100644
> --- a/lib/multipath.c
> +++ b/lib/multipath.c
> @@ -52,6 +52,7 @@ void mpath_add_device(struct mpath_head *mpath_head,
>   	mutex_lock(&mpath_head->lock);
>   	list_add_tail_rcu(&mpath_device->siblings, &mpath_head->dev_list);
>   	mutex_unlock(&mpath_head->lock);
> +	cancel_delayed_work(&mpath_head->remove_work);
>   }
>   EXPORT_SYMBOL_GPL(mpath_add_device);
>   
> @@ -356,7 +357,17 @@ static bool mpath_available_path(struct mpath_head *mpath_head)
>   			return true;
>   	}
>   
> -	return false;
> +	/*
> +	 * If "mpahead->delayed_removal_secs" is configured (i.e., non-zero), do
> +	 * not immediately fail I/O. Instead, requeue the I/O for the configured
> +	 * duration, anticipating that if there's a transient link failure then
> +	 * it may recover within this time window. This parameter is exported to
> +	 * userspace via sysfs, and its default value is zero. It is internally
> +	 * mapped to MPATH_HEAD_QUEUE_IF_NO_PATH. When delayed_removal_secs is
> +	 * non-zero, this flag is set to true. When zero, the flag is cleared.
> +	 */
> +	return mpath_head_queue_if_no_path(mpath_head);
> +
>   }
>   
>   static void mpath_bdev_submit_bio(struct bio *bio)
> @@ -614,6 +625,29 @@ static void mpath_head_del_cdev(struct mpath_head *mpath_head)
>   		mpath_head->mpdt->del_cdev(mpath_head);
>   }
>   
> +bool mpath_can_remove_head(struct mpath_head *mpath_head)
> +{
> +	bool remove = false;
> +
> +	mutex_lock(&mpath_head->lock);
> +	/*
> +	 * Ensure that no one could remove this module while the head
> +	 * remove work is pending.
> +	 */
> +	if (mpath_head_queue_if_no_path(mpath_head) &&
> +		try_module_get(mpath_head->drv_module)) {
> +
> +		mod_delayed_work(mpath_wq, &mpath_head->remove_work,
> +				mpath_head->delayed_removal_secs * HZ);
> +	} else {
> +		remove = true;
> +	}
> +
> +	mutex_unlock(&mpath_head->lock);
> +	return remove;
> +}
> +EXPORT_SYMBOL_GPL(mpath_can_remove_head);
> +
>   void mpath_remove_disk(struct mpath_disk *mpath_disk)
>   {
>   	struct mpath_head *mpath_head = mpath_disk->mpath_head;
> @@ -711,6 +745,47 @@ void mpath_device_set_live(struct mpath_disk *mpath_disk,
>   }
>   EXPORT_SYMBOL_GPL(mpath_device_set_live);
>   
> +ssize_t mpath_delayed_removal_secs_show(struct mpath_head *mpath_head,
> +					char *buf)
> +{
> +	int ret;
> +
> +	mutex_lock(&mpath_head->lock);
> +	ret = sysfs_emit(buf, "%u\n", mpath_head->delayed_removal_secs);
> +	mutex_unlock(&mpath_head->lock);
> +
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(mpath_delayed_removal_secs_show);
> +
> +ssize_t mpath_delayed_removal_secs_store(struct mpath_head *mpath_head,
> +			const char *buf, size_t count)
> +{
> +	ssize_t ret;
> +	int sec;
> +
> +	ret = kstrtouint(buf, 0, &sec);
> +	if (ret < 0)
> +		return ret;
> +
> +	mutex_lock(&mpath_head->lock);
> +	mpath_head->delayed_removal_secs = sec;
> +	if (sec)
> +		set_bit(MPATH_HEAD_QUEUE_IF_NO_PATH, &mpath_head->flags);
> +	else
> +		clear_bit(MPATH_HEAD_QUEUE_IF_NO_PATH, &mpath_head->flags);
> +	mutex_unlock(&mpath_head->lock);
> +
> +	/*
> +	 * Ensure that update to MPATH_HEAD_QUEUE_IF_NO_PATH is seen
> +	 * by its reader.
> +	 */
> +	mpath_synchronize(mpath_head);
> +
> +	return count;
> +}
> +EXPORT_SYMBOL_GPL(mpath_delayed_removal_secs_store);
> +
>   void mpath_add_sysfs_link(struct mpath_disk *mpath_disk)
>   {
>   	struct mpath_head *mpath_head = mpath_disk->mpath_head;
> @@ -793,6 +868,8 @@ struct mpath_head *mpath_alloc_head(void)
>   	mutex_init(&mpath_head->lock);
>   	kref_init(&mpath_head->ref);
>   
> +	mpath_head->delayed_removal_secs = 0;
> +
>   	INIT_WORK(&mpath_head->requeue_work, mpath_requeue_work);
>   	spin_lock_init(&mpath_head->requeue_lock);
>   	bio_list_init(&mpath_head->requeue_list);

I think we also need to initialize ->drv_module here.

Thanks,
--Nilay




More information about the Linux-nvme mailing list