[PATCHv2] nvme-fabrics: add queue setup helpers

Max Gurtovoy mgurtovoy at nvidia.com
Wed Apr 26 16:05:29 PDT 2023



On 26/04/2023 18:04, Keith Busch wrote:
> From: Keith Busch <kbusch at kernel.org>
> 
> tcp and rdma transports have lots of duplicate code setting up the
> different queue mappings. Add common helpers.
> 
> Cc: Chaitanya Kulkarni <kch at nvidia.com>
> Signed-off-by: Keith Busch <kbusch at kernel.org>
> ---
> v1->v2:
> 
>    Merged up to latest that doesn't have the RDMA specifics
> 
>    Simplified io queue count function (Christoph)
> 
>    Use 'nvmf_' prefix for function names.
> 
>   drivers/nvme/host/fabrics.c | 76 ++++++++++++++++++++++++++++++
>   drivers/nvme/host/fabrics.h | 11 +++++
>   drivers/nvme/host/rdma.c    | 79 ++-----------------------------
>   drivers/nvme/host/tcp.c     | 92 ++-----------------------------------
>   4 files changed, 96 insertions(+), 162 deletions(-)
> 
> diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
> index bbaa04a0c502b..3ff5030562088 100644
> --- a/drivers/nvme/host/fabrics.c
> +++ b/drivers/nvme/host/fabrics.c
> @@ -957,6 +957,82 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
>   	return ret;
>   }
>   
> +void nvmf_set_io_queues(struct nvmf_ctrl_options *opts, u32 nr_io_queues,
> +			u32 io_queues[HCTX_MAX_TYPES])
> +{
> +	if (opts->nr_write_queues && opts->nr_io_queues < nr_io_queues) {
> +		/*
> +		 * separate read/write queues
> +		 * hand out dedicated default queues only after we have
> +		 * sufficient read queues.
> +		 */
> +		io_queues[HCTX_TYPE_READ] = opts->nr_io_queues;
> +		nr_io_queues -= io_queues[HCTX_TYPE_READ];
> +		io_queues[HCTX_TYPE_DEFAULT] =
> +			min(opts->nr_write_queues, nr_io_queues);
> +		nr_io_queues -= io_queues[HCTX_TYPE_DEFAULT];
> +	} else {
> +		/*
> +		 * shared read/write queues
> +		 * either no write queues were requested, or we don't have
> +		 * sufficient queue count to have dedicated default queues.
> +		 */
> +		io_queues[HCTX_TYPE_DEFAULT] =
> +			min(opts->nr_io_queues, nr_io_queues);
> +		nr_io_queues -= io_queues[HCTX_TYPE_DEFAULT];
> +	}
> +
> +	if (opts->nr_poll_queues && nr_io_queues) {
> +		/* map dedicated poll queues only if we have queues left */
> +		io_queues[HCTX_TYPE_POLL] =
> +			min(opts->nr_poll_queues, nr_io_queues);
> +	}
> +}
> +EXPORT_SYMBOL_GPL(nvmf_set_io_queues);
> +
> +void nvmf_map_queues(struct blk_mq_tag_set *set, struct nvme_ctrl *ctrl,
> +		     u32 io_queues[HCTX_MAX_TYPES])
> +{
> +	struct nvmf_ctrl_options *opts = ctrl->opts;
> +
> +	if (opts->nr_write_queues && io_queues[HCTX_TYPE_READ]) {
> +		/* separate read/write queues */
> +		set->map[HCTX_TYPE_DEFAULT].nr_queues =
> +			io_queues[HCTX_TYPE_DEFAULT];
> +		set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
> +		set->map[HCTX_TYPE_READ].nr_queues =
> +			io_queues[HCTX_TYPE_READ];
> +		set->map[HCTX_TYPE_READ].queue_offset =
> +			io_queues[HCTX_TYPE_DEFAULT];
> +	} else {
> +		/* shared read/write queues */
> +		set->map[HCTX_TYPE_DEFAULT].nr_queues =
> +			io_queues[HCTX_TYPE_DEFAULT];
> +		set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
> +		set->map[HCTX_TYPE_READ].nr_queues =
> +			io_queues[HCTX_TYPE_DEFAULT];
> +		set->map[HCTX_TYPE_READ].queue_offset = 0;
> +	}
> +
> +	blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
> +	blk_mq_map_queues(&set->map[HCTX_TYPE_READ]);
> +	if (opts->nr_poll_queues && io_queues[HCTX_TYPE_POLL]) {
> +		/* map dedicated poll queues only if we have queues left */
> +		set->map[HCTX_TYPE_POLL].nr_queues = io_queues[HCTX_TYPE_POLL];
> +		set->map[HCTX_TYPE_POLL].queue_offset =
> +			io_queues[HCTX_TYPE_DEFAULT] +
> +			io_queues[HCTX_TYPE_READ];
> +		blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]);
> +	}
> +
> +	dev_info(ctrl->device,
> +		"mapped %d/%d/%d default/read/poll queues.\n",
> +		io_queues[HCTX_TYPE_DEFAULT],
> +		io_queues[HCTX_TYPE_READ],
> +		io_queues[HCTX_TYPE_POLL]);
> +}
> +EXPORT_SYMBOL_GPL(nvmf_map_queues);
> +
>   static int nvmf_check_required_opts(struct nvmf_ctrl_options *opts,
>   		unsigned int required_opts)
>   {
> diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
> index dcac3df8a5f76..e438d67a319b5 100644
> --- a/drivers/nvme/host/fabrics.h
> +++ b/drivers/nvme/host/fabrics.h
> @@ -203,6 +203,13 @@ static inline void nvmf_complete_timed_out_request(struct request *rq)
>   	}
>   }
>   
> +static inline unsigned int nvmf_nr_io_queues(struct nvmf_ctrl_options *opts)
> +{
> +	return min(opts->nr_io_queues, num_online_cpus()) +
> +		min(opts->nr_write_queues, num_online_cpus()) +
> +		min(opts->nr_poll_queues, num_online_cpus());
> +}
> +
>   int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val);
>   int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val);
>   int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val);
> @@ -215,5 +222,9 @@ int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
>   bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
>   bool nvmf_ip_options_match(struct nvme_ctrl *ctrl,
>   		struct nvmf_ctrl_options *opts);
> +void nvmf_set_io_queues(struct nvmf_ctrl_options *opts, u32 nr_io_queues,
> +			u32 io_queues[HCTX_MAX_TYPES]);
> +void nvmf_map_queues(struct blk_mq_tag_set *set, struct nvme_ctrl *ctrl,
> +		     u32 io_queues[HCTX_MAX_TYPES]);
>   
>   #endif /* _NVME_FABRICS_H */
> diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
> index 0eb79696fb736..168fdf5e11113 100644
> --- a/drivers/nvme/host/rdma.c
> +++ b/drivers/nvme/host/rdma.c
> @@ -713,18 +713,10 @@ static int nvme_rdma_start_io_queues(struct nvme_rdma_ctrl *ctrl,
>   static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
>   {
>   	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
> -	struct ib_device *ibdev = ctrl->device->dev;
> -	unsigned int nr_io_queues, nr_default_queues;
> -	unsigned int nr_read_queues, nr_poll_queues;
> +	unsigned int nr_io_queues;
>   	int i, ret;
>   
> -	nr_read_queues = min_t(unsigned int, ibdev->num_comp_vectors,
> -				min(opts->nr_io_queues, num_online_cpus()));

what about the logic of ibdev->num_comp_vectors in the common code ?

> -	nr_default_queues =  min_t(unsigned int, ibdev->num_comp_vectors,
> -				min(opts->nr_write_queues, num_online_cpus()));

same comment here too.

> -	nr_poll_queues = min(opts->nr_poll_queues, num_online_cpus());
> -	nr_io_queues = nr_read_queues + nr_default_queues + nr_poll_queues;
> -
> +	nr_io_queues = nvmf_nr_io_queues(opts);
>   	ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
>   	if (ret)
>   		return ret;
> @@ -739,34 +731,7 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
>   	dev_info(ctrl->ctrl.device,
>   		"creating %d I/O queues.\n", nr_io_queues);
>   
> -	if (opts->nr_write_queues && nr_read_queues < nr_io_queues) {
> -		/*
> -		 * separate read/write queues
> -		 * hand out dedicated default queues only after we have
> -		 * sufficient read queues.
> -		 */
> -		ctrl->io_queues[HCTX_TYPE_READ] = nr_read_queues;
> -		nr_io_queues -= ctrl->io_queues[HCTX_TYPE_READ];
> -		ctrl->io_queues[HCTX_TYPE_DEFAULT] =
> -			min(nr_default_queues, nr_io_queues);
> -		nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
> -	} else {
> -		/*
> -		 * shared read/write queues
> -		 * either no write queues were requested, or we don't have
> -		 * sufficient queue count to have dedicated default queues.
> -		 */
> -		ctrl->io_queues[HCTX_TYPE_DEFAULT] =
> -			min(nr_read_queues, nr_io_queues);
> -		nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];

I wonder if this is the right time to move io_queues[HCTX_MAX_TYPES] 
from the rdma/pci/tcp controllers to common nvme_ctrl structure and save 
more code.
I know that fc and loop ctrl don't use it so we can allocate it dynamically.
WDYT ?

> -	}
> -
> -	if (opts->nr_poll_queues && nr_io_queues) {
> -		/* map dedicated poll queues only if we have queues left */
> -		ctrl->io_queues[HCTX_TYPE_POLL] =
> -			min(nr_poll_queues, nr_io_queues);
> -	}
> -
> +	nvmf_set_io_queues(opts, nr_io_queues, ctrl->io_queues);
>   	for (i = 1; i < ctrl->ctrl.queue_count; i++) {
>   		ret = nvme_rdma_alloc_queue(ctrl, i,
>   				ctrl->ctrl.sqsize + 1);
> @@ -2138,44 +2103,8 @@ static void nvme_rdma_complete_rq(struct request *rq)
>   static void nvme_rdma_map_queues(struct blk_mq_tag_set *set)
>   {
>   	struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(set->driver_data);
> -	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
>   
> -	if (opts->nr_write_queues && ctrl->io_queues[HCTX_TYPE_READ]) {
> -		/* separate read/write queues */
> -		set->map[HCTX_TYPE_DEFAULT].nr_queues =
> -			ctrl->io_queues[HCTX_TYPE_DEFAULT];
> -		set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
> -		set->map[HCTX_TYPE_READ].nr_queues =
> -			ctrl->io_queues[HCTX_TYPE_READ];
> -		set->map[HCTX_TYPE_READ].queue_offset =
> -			ctrl->io_queues[HCTX_TYPE_DEFAULT];
> -	} else {
> -		/* shared read/write queues */
> -		set->map[HCTX_TYPE_DEFAULT].nr_queues =
> -			ctrl->io_queues[HCTX_TYPE_DEFAULT];
> -		set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
> -		set->map[HCTX_TYPE_READ].nr_queues =
> -			ctrl->io_queues[HCTX_TYPE_DEFAULT];
> -		set->map[HCTX_TYPE_READ].queue_offset = 0;
> -	}
> -	blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
> -	blk_mq_map_queues(&set->map[HCTX_TYPE_READ]);
> -
> -	if (opts->nr_poll_queues && ctrl->io_queues[HCTX_TYPE_POLL]) {
> -		/* map dedicated poll queues only if we have queues left */
> -		set->map[HCTX_TYPE_POLL].nr_queues =
> -				ctrl->io_queues[HCTX_TYPE_POLL];
> -		set->map[HCTX_TYPE_POLL].queue_offset =
> -			ctrl->io_queues[HCTX_TYPE_DEFAULT] +
> -			ctrl->io_queues[HCTX_TYPE_READ];
> -		blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]);
> -	}
> -
> -	dev_info(ctrl->ctrl.device,
> -		"mapped %d/%d/%d default/read/poll queues.\n",
> -		ctrl->io_queues[HCTX_TYPE_DEFAULT],
> -		ctrl->io_queues[HCTX_TYPE_READ],
> -		ctrl->io_queues[HCTX_TYPE_POLL]);
> +	nvmf_map_queues(set, &ctrl->ctrl, ctrl->io_queues);
>   }
>   
>   static const struct blk_mq_ops nvme_rdma_mq_ops = {
> diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
> index bf0230442d570..260b3554d821d 100644
> --- a/drivers/nvme/host/tcp.c
> +++ b/drivers/nvme/host/tcp.c
> @@ -1802,58 +1802,12 @@ static int __nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
>   	return ret;
>   }
>   
> -static unsigned int nvme_tcp_nr_io_queues(struct nvme_ctrl *ctrl)
> -{
> -	unsigned int nr_io_queues;
> -
> -	nr_io_queues = min(ctrl->opts->nr_io_queues, num_online_cpus());
> -	nr_io_queues += min(ctrl->opts->nr_write_queues, num_online_cpus());
> -	nr_io_queues += min(ctrl->opts->nr_poll_queues, num_online_cpus());
> -
> -	return nr_io_queues;
> -}
> -
> -static void nvme_tcp_set_io_queues(struct nvme_ctrl *nctrl,
> -		unsigned int nr_io_queues)
> -{
> -	struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
> -	struct nvmf_ctrl_options *opts = nctrl->opts;
> -
> -	if (opts->nr_write_queues && opts->nr_io_queues < nr_io_queues) {
> -		/*
> -		 * separate read/write queues
> -		 * hand out dedicated default queues only after we have
> -		 * sufficient read queues.
> -		 */
> -		ctrl->io_queues[HCTX_TYPE_READ] = opts->nr_io_queues;
> -		nr_io_queues -= ctrl->io_queues[HCTX_TYPE_READ];
> -		ctrl->io_queues[HCTX_TYPE_DEFAULT] =
> -			min(opts->nr_write_queues, nr_io_queues);
> -		nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
> -	} else {
> -		/*
> -		 * shared read/write queues
> -		 * either no write queues were requested, or we don't have
> -		 * sufficient queue count to have dedicated default queues.
> -		 */
> -		ctrl->io_queues[HCTX_TYPE_DEFAULT] =
> -			min(opts->nr_io_queues, nr_io_queues);
> -		nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
> -	}
> -
> -	if (opts->nr_poll_queues && nr_io_queues) {
> -		/* map dedicated poll queues only if we have queues left */
> -		ctrl->io_queues[HCTX_TYPE_POLL] =
> -			min(opts->nr_poll_queues, nr_io_queues);
> -	}
> -}
> -
>   static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
>   {
>   	unsigned int nr_io_queues;
>   	int ret;
>   
> -	nr_io_queues = nvme_tcp_nr_io_queues(ctrl);
> +	nr_io_queues = nvmf_nr_io_queues(ctrl->opts);
>   	ret = nvme_set_queue_count(ctrl, &nr_io_queues);
>   	if (ret)
>   		return ret;
> @@ -1868,8 +1822,8 @@ static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
>   	dev_info(ctrl->device,
>   		"creating %d I/O queues.\n", nr_io_queues);
>   
> -	nvme_tcp_set_io_queues(ctrl, nr_io_queues);
> -
> +	nvmf_set_io_queues(ctrl->opts, nr_io_queues,
> +			   to_tcp_ctrl(ctrl)->io_queues);
>   	return __nvme_tcp_alloc_io_queues(ctrl);
>   }
>   
> @@ -2449,44 +2403,8 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
>   static void nvme_tcp_map_queues(struct blk_mq_tag_set *set)
>   {
>   	struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(set->driver_data);
> -	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
> -
> -	if (opts->nr_write_queues && ctrl->io_queues[HCTX_TYPE_READ]) {
> -		/* separate read/write queues */
> -		set->map[HCTX_TYPE_DEFAULT].nr_queues =
> -			ctrl->io_queues[HCTX_TYPE_DEFAULT];
> -		set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
> -		set->map[HCTX_TYPE_READ].nr_queues =
> -			ctrl->io_queues[HCTX_TYPE_READ];
> -		set->map[HCTX_TYPE_READ].queue_offset =
> -			ctrl->io_queues[HCTX_TYPE_DEFAULT];
> -	} else {
> -		/* shared read/write queues */
> -		set->map[HCTX_TYPE_DEFAULT].nr_queues =
> -			ctrl->io_queues[HCTX_TYPE_DEFAULT];
> -		set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
> -		set->map[HCTX_TYPE_READ].nr_queues =
> -			ctrl->io_queues[HCTX_TYPE_DEFAULT];
> -		set->map[HCTX_TYPE_READ].queue_offset = 0;
> -	}
> -	blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
> -	blk_mq_map_queues(&set->map[HCTX_TYPE_READ]);
> -
> -	if (opts->nr_poll_queues && ctrl->io_queues[HCTX_TYPE_POLL]) {
> -		/* map dedicated poll queues only if we have queues left */
> -		set->map[HCTX_TYPE_POLL].nr_queues =
> -				ctrl->io_queues[HCTX_TYPE_POLL];
> -		set->map[HCTX_TYPE_POLL].queue_offset =
> -			ctrl->io_queues[HCTX_TYPE_DEFAULT] +
> -			ctrl->io_queues[HCTX_TYPE_READ];
> -		blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]);
> -	}
> -
> -	dev_info(ctrl->ctrl.device,
> -		"mapped %d/%d/%d default/read/poll queues.\n",
> -		ctrl->io_queues[HCTX_TYPE_DEFAULT],
> -		ctrl->io_queues[HCTX_TYPE_READ],
> -		ctrl->io_queues[HCTX_TYPE_POLL]);
> +
> +	nvmf_map_queues(set, &ctrl->ctrl, ctrl->io_queues);
>   }
>   
>   static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)



More information about the Linux-nvme mailing list