[PATCHv2] nvme-fabrics: add queue setup helpers

Thu Apr 27 14:28:03 PDT 2023

On Thu, Apr 27, 2023 at 02:05:29AM +0300, Max Gurtovoy wrote:
> 
> 
> On 26/04/2023 18:04, Keith Busch wrote:
> > From: Keith Busch <kbusch at kernel.org>
> > 
> > tcp and rdma transports have lots of duplicate code setting up the
> > different queue mappings. Add common helpers.
> > 
> > Cc: Chaitanya Kulkarni <kch at nvidia.com>
> > Signed-off-by: Keith Busch <kbusch at kernel.org>
> > ---
> > v1->v2:
> > 
> >    Merged up to latest that doesn't have the RDMA specifics
> > 
> >    Simplified io queue count function (Christoph)
> > 
> >    Use 'nvmf_' prefix for function names.
> > 
> >   drivers/nvme/host/fabrics.c | 76 ++++++++++++++++++++++++++++++
> >   drivers/nvme/host/fabrics.h | 11 +++++
> >   drivers/nvme/host/rdma.c    | 79 ++-----------------------------
> >   drivers/nvme/host/tcp.c     | 92 ++-----------------------------------
> >   4 files changed, 96 insertions(+), 162 deletions(-)
> > 
> > diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
> > index bbaa04a0c502b..3ff5030562088 100644
> > --- a/drivers/nvme/host/fabrics.c
> > +++ b/drivers/nvme/host/fabrics.c
> > @@ -957,6 +957,82 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
> >   	return ret;
> >   }
> > +void nvmf_set_io_queues(struct nvmf_ctrl_options *opts, u32 nr_io_queues,
> > +			u32 io_queues[HCTX_MAX_TYPES])
> > +{
> > +	if (opts->nr_write_queues && opts->nr_io_queues < nr_io_queues) {
> > +		/*
> > +		 * separate read/write queues
> > +		 * hand out dedicated default queues only after we have
> > +		 * sufficient read queues.
> > +		 */
> > +		io_queues[HCTX_TYPE_READ] = opts->nr_io_queues;
> > +		nr_io_queues -= io_queues[HCTX_TYPE_READ];
> > +		io_queues[HCTX_TYPE_DEFAULT] =
> > +			min(opts->nr_write_queues, nr_io_queues);
> > +		nr_io_queues -= io_queues[HCTX_TYPE_DEFAULT];
> > +	} else {
> > +		/*
> > +		 * shared read/write queues
> > +		 * either no write queues were requested, or we don't have
> > +		 * sufficient queue count to have dedicated default queues.
> > +		 */
> > +		io_queues[HCTX_TYPE_DEFAULT] =
> > +			min(opts->nr_io_queues, nr_io_queues);
> > +		nr_io_queues -= io_queues[HCTX_TYPE_DEFAULT];
> > +	}
> > +
> > +	if (opts->nr_poll_queues && nr_io_queues) {
> > +		/* map dedicated poll queues only if we have queues left */
> > +		io_queues[HCTX_TYPE_POLL] =
> > +			min(opts->nr_poll_queues, nr_io_queues);
> > +	}
> > +}
> > +EXPORT_SYMBOL_GPL(nvmf_set_io_queues);
> > +
> > +void nvmf_map_queues(struct blk_mq_tag_set *set, struct nvme_ctrl *ctrl,
> > +		     u32 io_queues[HCTX_MAX_TYPES])
> > +{
> > +	struct nvmf_ctrl_options *opts = ctrl->opts;
> > +
> > +	if (opts->nr_write_queues && io_queues[HCTX_TYPE_READ]) {
> > +		/* separate read/write queues */
> > +		set->map[HCTX_TYPE_DEFAULT].nr_queues =
> > +			io_queues[HCTX_TYPE_DEFAULT];
> > +		set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
> > +		set->map[HCTX_TYPE_READ].nr_queues =
> > +			io_queues[HCTX_TYPE_READ];
> > +		set->map[HCTX_TYPE_READ].queue_offset =
> > +			io_queues[HCTX_TYPE_DEFAULT];
> > +	} else {
> > +		/* shared read/write queues */
> > +		set->map[HCTX_TYPE_DEFAULT].nr_queues =
> > +			io_queues[HCTX_TYPE_DEFAULT];
> > +		set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
> > +		set->map[HCTX_TYPE_READ].nr_queues =
> > +			io_queues[HCTX_TYPE_DEFAULT];
> > +		set->map[HCTX_TYPE_READ].queue_offset = 0;
> > +	}
> > +
> > +	blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
> > +	blk_mq_map_queues(&set->map[HCTX_TYPE_READ]);
> > +	if (opts->nr_poll_queues && io_queues[HCTX_TYPE_POLL]) {
> > +		/* map dedicated poll queues only if we have queues left */
> > +		set->map[HCTX_TYPE_POLL].nr_queues = io_queues[HCTX_TYPE_POLL];
> > +		set->map[HCTX_TYPE_POLL].queue_offset =
> > +			io_queues[HCTX_TYPE_DEFAULT] +
> > +			io_queues[HCTX_TYPE_READ];
> > +		blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]);
> > +	}
> > +
> > +	dev_info(ctrl->device,
> > +		"mapped %d/%d/%d default/read/poll queues.\n",
> > +		io_queues[HCTX_TYPE_DEFAULT],
> > +		io_queues[HCTX_TYPE_READ],
> > +		io_queues[HCTX_TYPE_POLL]);
> > +}
> > +EXPORT_SYMBOL_GPL(nvmf_map_queues);
> > +
> >   static int nvmf_check_required_opts(struct nvmf_ctrl_options *opts,
> >   		unsigned int required_opts)
> >   {
> > diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
> > index dcac3df8a5f76..e438d67a319b5 100644
> > --- a/drivers/nvme/host/fabrics.h
> > +++ b/drivers/nvme/host/fabrics.h
> > @@ -203,6 +203,13 @@ static inline void nvmf_complete_timed_out_request(struct request *rq)
> >   	}
> >   }
> > +static inline unsigned int nvmf_nr_io_queues(struct nvmf_ctrl_options *opts)
> > +{
> > +	return min(opts->nr_io_queues, num_online_cpus()) +
> > +		min(opts->nr_write_queues, num_online_cpus()) +
> > +		min(opts->nr_poll_queues, num_online_cpus());
> > +}
> > +
> >   int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val);
> >   int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val);
> >   int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val);
> > @@ -215,5 +222,9 @@ int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
> >   bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
> >   bool nvmf_ip_options_match(struct nvme_ctrl *ctrl,
> >   		struct nvmf_ctrl_options *opts);
> > +void nvmf_set_io_queues(struct nvmf_ctrl_options *opts, u32 nr_io_queues,
> > +			u32 io_queues[HCTX_MAX_TYPES]);
> > +void nvmf_map_queues(struct blk_mq_tag_set *set, struct nvme_ctrl *ctrl,
> > +		     u32 io_queues[HCTX_MAX_TYPES]);
> >   #endif /* _NVME_FABRICS_H */
> > diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
> > index 0eb79696fb736..168fdf5e11113 100644
> > --- a/drivers/nvme/host/rdma.c
> > +++ b/drivers/nvme/host/rdma.c
> > @@ -713,18 +713,10 @@ static int nvme_rdma_start_io_queues(struct nvme_rdma_ctrl *ctrl,
> >   static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
> >   {
> >   	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
> > -	struct ib_device *ibdev = ctrl->device->dev;
> > -	unsigned int nr_io_queues, nr_default_queues;
> > -	unsigned int nr_read_queues, nr_poll_queues;
> > +	unsigned int nr_io_queues;
> >   	int i, ret;
> > -	nr_read_queues = min_t(unsigned int, ibdev->num_comp_vectors,
> > -				min(opts->nr_io_queues, num_online_cpus()));
> 
> what about the logic of ibdev->num_comp_vectors in the common code ?

Good catch. This may have more submission contexts that share a
completion vector, and that usually works out better. The existing
code already handles this, but I shouldn't have mixed that functional
change into this patch.

> > -		ctrl->io_queues[HCTX_TYPE_READ] = nr_read_queues;
> > -		nr_io_queues -= ctrl->io_queues[HCTX_TYPE_READ];
> > -		ctrl->io_queues[HCTX_TYPE_DEFAULT] =
> > -			min(nr_default_queues, nr_io_queues);
> > -		nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
> > -	} else {
> > -		/*
> > -		 * shared read/write queues
> > -		 * either no write queues were requested, or we don't have
> > -		 * sufficient queue count to have dedicated default queues.
> > -		 */
> > -		ctrl->io_queues[HCTX_TYPE_DEFAULT] =
> > -			min(nr_read_queues, nr_io_queues);
> > -		nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
> 
> I wonder if this is the right time to move io_queues[HCTX_MAX_TYPES] from
> the rdma/pci/tcp controllers to common nvme_ctrl structure and save more
> code.
> I know that fc and loop ctrl don't use it so we can allocate it dynamically.
> WDYT ?

Less duplication is preferred! The pci transport was just harder
to unify with the other two, but it's a good idea.