[PATCH 3/3] nvme: freeze both the ns and multipath queues whe updating queue limits
Hannes Reinecke
hare at suse.de
Thu Aug 29 07:34:39 PDT 2024
On 8/29/24 08:31, Christoph Hellwig wrote:
> We really should not have any I/O outstanding while updating the queue
> limits, as that could introduce inconsistencies between the multipath
> and underlying nodes. So always freeze the multipath node first and then
> the underlying one, matching the order in nvme_passthru_start.
>
> Signed-off-by: Christoph Hellwig <hch at lst.de>
> ---
> drivers/nvme/host/core.c | 41 ++++++++++++++++++++++++++++------------
> 1 file changed, 29 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 81d2a09dd00f30..0f01a0bf976fd3 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -2095,6 +2095,24 @@ static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id,
> lim->chunk_sectors = iob;
> }
>
> +/*
> + * Freeze all I/O for a namespace, including clearing out anything on the
> + * multipath node.
> + */
> +static void nvme_ns_freeze(struct nvme_ns *ns)
> +{
> + if (nvme_ns_head_multipath(ns->head))
> + blk_mq_freeze_queue(ns->head->disk->queue);
> + blk_mq_freeze_queue(ns->disk->queue);
> +}
> +
> +static void nvme_ns_unfreeze(struct nvme_ns *ns)
> +{
> + blk_mq_unfreeze_queue(ns->disk->queue);
> + if (nvme_ns_head_multipath(ns->head))
> + blk_mq_unfreeze_queue(ns->head->disk->queue);
> +}
> +
> /*
> * The queue_limits structure mixes values that are the hardware limitations for
> * bio splitting with what is the device configuration.
> @@ -2116,7 +2134,8 @@ static int nvme_update_ns_info_mpath(struct nvme_ns *ns,
> struct queue_limits lim;
> int ret;
>
> - blk_mq_freeze_queue(disk->queue);
> + WARN_ON_ONCE(disk->queue->mq_freeze_depth < 1);
> +
> lim = queue_limits_start_update(disk->queue);
> lim.logical_block_size = ns_lim->logical_block_size;
> lim.physical_block_size = ns_lim->physical_block_size;
> @@ -2132,7 +2151,6 @@ static int nvme_update_ns_info_mpath(struct nvme_ns *ns,
> set_capacity_and_notify(disk, get_capacity(ns->disk));
> set_disk_ro(disk, nvme_ns_is_readonly(ns, info));
> nvme_mpath_revalidate_paths(ns);
> - blk_mq_unfreeze_queue(disk->queue);
>
> return ret;
> }
> @@ -2143,7 +2161,7 @@ static int nvme_update_ns_info_generic(struct nvme_ns *ns,
> struct queue_limits lim;
> int ret;
>
> - blk_mq_freeze_queue(ns->disk->queue);
> + nvme_ns_freeze(ns);
> lim = queue_limits_start_update(ns->disk->queue);
> nvme_set_ctrl_limits(ns->ctrl, &lim);
> ret = queue_limits_commit_update(ns->disk->queue, &lim);
> @@ -2154,11 +2172,11 @@ static int nvme_update_ns_info_generic(struct nvme_ns *ns,
> ns->disk->flags |= GENHD_FL_HIDDEN;
> set_bit(NVME_NS_READY, &ns->flags);
> set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info));
> + if (nvme_ns_head_multipath(ns->head))
> + ret = nvme_update_ns_info_mpath(ns, info, true);
>
> out_unfreeze:
> - blk_mq_unfreeze_queue(ns->disk->queue);
> - if (nvme_ns_head_multipath(ns->head) && !ret)
> - ret = nvme_update_ns_info_mpath(ns, info, true);
> + nvme_ns_unfreeze(ns);
> return ret;
> }
>
> @@ -2204,7 +2222,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
> goto out;
> }
>
> - blk_mq_freeze_queue(ns->disk->queue);
> + nvme_ns_freeze(ns);
> ns->head->lba_shift = id->lbaf[lbaf].ds;
> ns->head->nuse = le64_to_cpu(id->nuse);
> capacity = nvme_lba_to_sect(ns->head, le64_to_cpu(id->nsze));
> @@ -2236,7 +2254,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
>
> ret = queue_limits_commit_update(ns->disk->queue, &lim);
> if (ret) {
> - blk_mq_unfreeze_queue(ns->disk->queue);
> + nvme_ns_unfreeze(ns);
> goto out;
> }
>
> @@ -2254,7 +2272,9 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
> done:
> set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info));
> set_bit(NVME_NS_READY, &ns->flags);
> - blk_mq_unfreeze_queue(ns->disk->queue);
> + if (nvme_ns_head_multipath(ns->head))
> + ret = nvme_update_ns_info_mpath(ns, info, unsupported);
> + nvme_ns_unfreeze(ns);
>
> if (blk_queue_is_zoned(ns->queue)) {
> ret = blk_revalidate_disk_zones(ns->disk);
While we're shuffling things around; doesn't the mean that we unfreeze
the queue (and allow I/O) before we've validated all zones?
Wouldn't it be better if we validate zoned before unfreezing queues?
Otherwise it looks good; I'll give it a spin on my map/unmap testcases.
Cheers,
Hannes
--
Dr. Hannes Reinecke Kernel Storage Architect
hare at suse.de +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), GF: Ivo Totev, Andrew McDonald,
Werner Knoblich
More information about the Linux-nvme
mailing list