[PATCH 3/3] nvme: freeze both the ns and multipath queues whe updating queue limits

Thu Aug 29 07:34:39 PDT 2024

On 8/29/24 08:31, Christoph Hellwig wrote:
> We really should not have any I/O outstanding while updating the queue
> limits, as that could introduce inconsistencies between the multipath
> and underlying nodes.  So always freeze the multipath node first and then
> the underlying one, matching the order in nvme_passthru_start.
> 
> Signed-off-by: Christoph Hellwig <hch at lst.de>
> ---
>   drivers/nvme/host/core.c | 41 ++++++++++++++++++++++++++++------------
>   1 file changed, 29 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 81d2a09dd00f30..0f01a0bf976fd3 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -2095,6 +2095,24 @@ static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id,
>   	lim->chunk_sectors = iob;
>   }
>   
> +/*
> + * Freeze all I/O for a namespace, including clearing out anything on the
> + * multipath node.
> + */
> +static void nvme_ns_freeze(struct nvme_ns *ns)
> +{
> +	if (nvme_ns_head_multipath(ns->head))
> +		blk_mq_freeze_queue(ns->head->disk->queue);
> +	blk_mq_freeze_queue(ns->disk->queue);
> +}
> +
> +static void nvme_ns_unfreeze(struct nvme_ns *ns)
> +{
> +	blk_mq_unfreeze_queue(ns->disk->queue);
> +	if (nvme_ns_head_multipath(ns->head))
> +		blk_mq_unfreeze_queue(ns->head->disk->queue);
> +}
> +
>   /*
>    * The queue_limits structure mixes values that are the hardware limitations for
>    * bio splitting with what is the device configuration.
> @@ -2116,7 +2134,8 @@ static int nvme_update_ns_info_mpath(struct nvme_ns *ns,
>   	struct queue_limits lim;
>   	int ret;
>   
> -	blk_mq_freeze_queue(disk->queue);
> +	WARN_ON_ONCE(disk->queue->mq_freeze_depth < 1);
> +
>   	lim = queue_limits_start_update(disk->queue);
>   	lim.logical_block_size = ns_lim->logical_block_size;
>   	lim.physical_block_size = ns_lim->physical_block_size;
> @@ -2132,7 +2151,6 @@ static int nvme_update_ns_info_mpath(struct nvme_ns *ns,
>   	set_capacity_and_notify(disk, get_capacity(ns->disk));
>   	set_disk_ro(disk, nvme_ns_is_readonly(ns, info));
>   	nvme_mpath_revalidate_paths(ns);
> -	blk_mq_unfreeze_queue(disk->queue);
>   
>   	return ret;
>   }
> @@ -2143,7 +2161,7 @@ static int nvme_update_ns_info_generic(struct nvme_ns *ns,
>   	struct queue_limits lim;
>   	int ret;
>   
> -	blk_mq_freeze_queue(ns->disk->queue);
> +	nvme_ns_freeze(ns);
>   	lim = queue_limits_start_update(ns->disk->queue);
>   	nvme_set_ctrl_limits(ns->ctrl, &lim);
>   	ret = queue_limits_commit_update(ns->disk->queue, &lim);
> @@ -2154,11 +2172,11 @@ static int nvme_update_ns_info_generic(struct nvme_ns *ns,
>   	ns->disk->flags |= GENHD_FL_HIDDEN;
>   	set_bit(NVME_NS_READY, &ns->flags);
>   	set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info));
> +	if (nvme_ns_head_multipath(ns->head))
> +		ret = nvme_update_ns_info_mpath(ns, info, true);
>   
>   out_unfreeze:
> -	blk_mq_unfreeze_queue(ns->disk->queue);
> -	if (nvme_ns_head_multipath(ns->head) && !ret)
> -		ret = nvme_update_ns_info_mpath(ns, info, true);
> +	nvme_ns_unfreeze(ns);
>   	return ret;
>   }
>   
> @@ -2204,7 +2222,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
>   			goto out;
>   	}
>   
> -	blk_mq_freeze_queue(ns->disk->queue);
> +	nvme_ns_freeze(ns);
>   	ns->head->lba_shift = id->lbaf[lbaf].ds;
>   	ns->head->nuse = le64_to_cpu(id->nuse);
>   	capacity = nvme_lba_to_sect(ns->head, le64_to_cpu(id->nsze));
> @@ -2236,7 +2254,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
>   
>   	ret = queue_limits_commit_update(ns->disk->queue, &lim);
>   	if (ret) {
> -		blk_mq_unfreeze_queue(ns->disk->queue);
> +		nvme_ns_unfreeze(ns);
>   		goto out;
>   	}
>   
> @@ -2254,7 +2272,9 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
>   done:
>   	set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info));
>   	set_bit(NVME_NS_READY, &ns->flags);
> -	blk_mq_unfreeze_queue(ns->disk->queue);
> +	if (nvme_ns_head_multipath(ns->head))
> +		ret = nvme_update_ns_info_mpath(ns, info, unsupported);
> +	nvme_ns_unfreeze(ns);
>   
>   	if (blk_queue_is_zoned(ns->queue)) {
>   		ret = blk_revalidate_disk_zones(ns->disk);

While we're shuffling things around; doesn't the mean that we unfreeze 
the queue (and allow I/O) before we've validated all zones?
Wouldn't it be better if we validate zoned before unfreezing queues?

Otherwise it looks good; I'll give it a spin on my map/unmap testcases.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                Kernel Storage Architect
hare at suse.de                              +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), GF: Ivo Totev, Andrew McDonald,
Werner Knoblich