[RFC PATCH v4 10/27] nvme-tcp-offload: Add device scan implementation

Shai Malin malin1024 at gmail.com
Wed May 5 18:52:16 BST 2021


On 5/1/21 3:25 PM, Hannes Reinecke wrote:
> On 4/29/21 9:09 PM, Shai Malin wrote:
> > From: Dean Balandin <dbalandin at marvell.com>
> >
> > As part of create_ctrl(), it scans the registered devices and calls
> > the claim_dev op on each of them, to find the first devices that matches
> > the connection params. Once the correct devices is found (claim_dev
> > returns true), we raise the refcnt of that device and return that device
> > as the device to be used for ctrl currently being created.
> >
> > Acked-by: Igor Russkikh <irusskikh at marvell.com>
> > Signed-off-by: Dean Balandin <dbalandin at marvell.com>
> > Signed-off-by: Prabhakar Kushwaha <pkushwaha at marvell.com>
> > Signed-off-by: Omkar Kulkarni <okulkarni at marvell.com>
> > Signed-off-by: Michal Kalderon <mkalderon at marvell.com>
> > Signed-off-by: Ariel Elior <aelior at marvell.com>
> > Signed-off-by: Shai Malin <smalin at marvell.com>
> > ---
> >   drivers/nvme/host/tcp-offload.c | 94 +++++++++++++++++++++++++++++++++
> >   1 file changed, 94 insertions(+)
> >
> > diff --git a/drivers/nvme/host/tcp-offload.c b/drivers/nvme/host/tcp-offload.c
> > index 711232eba339..aa7cc239abf2 100644
> > --- a/drivers/nvme/host/tcp-offload.c
> > +++ b/drivers/nvme/host/tcp-offload.c
> > @@ -13,6 +13,11 @@
> >   static LIST_HEAD(nvme_tcp_ofld_devices);
> >   static DECLARE_RWSEM(nvme_tcp_ofld_devices_rwsem);
> >
> > +static inline struct nvme_tcp_ofld_ctrl *to_tcp_ofld_ctrl(struct nvme_ctrl *nctrl)
> > +{
> > +     return container_of(nctrl, struct nvme_tcp_ofld_ctrl, nctrl);
> > +}
> > +
> >   /**
> >    * nvme_tcp_ofld_register_dev() - NVMeTCP Offload Library registration
> >    * function.
> > @@ -98,6 +103,94 @@ void nvme_tcp_ofld_req_done(struct nvme_tcp_ofld_req *req,
> >       /* Placeholder - complete request with/without error */
> >   }
> >
> > +struct nvme_tcp_ofld_dev *
> > +nvme_tcp_ofld_lookup_dev(struct nvme_tcp_ofld_ctrl *ctrl)
> > +{
> > +     struct nvme_tcp_ofld_dev *dev;
> > +
> > +     down_read(&nvme_tcp_ofld_devices_rwsem);
> > +     list_for_each_entry(dev, &nvme_tcp_ofld_devices, entry) {
> > +             if (dev->ops->claim_dev(dev, &ctrl->conn_params)) {
> > +                     /* Increase driver refcnt */
> > +                     if (!try_module_get(dev->ops->module)) {
> > +                             pr_err("try_module_get failed\n");
> > +                             dev = NULL;
> > +                     }
> > +
> > +                     goto out;
> > +             }
> > +     }
> > +
> > +     dev = NULL;
> > +out:
> > +     up_read(&nvme_tcp_ofld_devices_rwsem);
> > +
> > +     return dev;
> > +}
> > +
> > +static int nvme_tcp_ofld_setup_ctrl(struct nvme_ctrl *nctrl, bool new)
> > +{
> > +     /* Placeholder - validates inputs and creates admin and IO queues */
> > +
> > +     return 0;
> > +}
> > +
> > +static struct nvme_ctrl *
> > +nvme_tcp_ofld_create_ctrl(struct device *ndev, struct nvmf_ctrl_options *opts)
> > +{
> > +     struct nvme_tcp_ofld_ctrl *ctrl;
> > +     struct nvme_tcp_ofld_dev *dev;
> > +     struct nvme_ctrl *nctrl;
> > +     int rc = 0;
> > +
> > +     ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
> > +     if (!ctrl)
> > +             return ERR_PTR(-ENOMEM);
> > +
> > +     nctrl = &ctrl->nctrl;
> > +
> > +     /* Init nvme_tcp_ofld_ctrl and nvme_ctrl params based on received opts */
> > +
> > +     /* Find device that can reach the dest addr */
> > +     dev = nvme_tcp_ofld_lookup_dev(ctrl);
> > +     if (!dev) {
> > +             pr_info("no device found for addr %s:%s.\n",
> > +                     opts->traddr, opts->trsvcid);
> > +             rc = -EINVAL;
> > +             goto out_free_ctrl;
> > +     }
> > +
> > +     ctrl->dev = dev;
> > +
> > +     if (ctrl->dev->ops->max_hw_sectors)
> > +             nctrl->max_hw_sectors = ctrl->dev->ops->max_hw_sectors;
> > +     if (ctrl->dev->ops->max_segments)
> > +             nctrl->max_segments = ctrl->dev->ops->max_segments;
> > +
> > +     /* Init queues */
> > +
> > +     /* Call nvme_init_ctrl */
> > +
> > +     rc = ctrl->dev->ops->setup_ctrl(ctrl, true);
> > +     if (rc)
> > +             goto out_module_put;
> > +
> > +     rc = nvme_tcp_ofld_setup_ctrl(nctrl, true);
> > +     if (rc)
> > +             goto out_uninit_ctrl;
> > +
> > +     return nctrl;
> > +
> > +out_uninit_ctrl:
> > +     ctrl->dev->ops->release_ctrl(ctrl);
> > +out_module_put:
> > +     module_put(dev->ops->module);
> > +out_free_ctrl:
> > +     kfree(ctrl);
> > +
> > +     return ERR_PTR(rc);
> > +}
> > +
> >   static struct nvmf_transport_ops nvme_tcp_ofld_transport = {
> >       .name           = "tcp_offload",
> >       .module         = THIS_MODULE,
> > @@ -107,6 +200,7 @@ static struct nvmf_transport_ops nvme_tcp_ofld_transport = {
> >                         NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_HDR_DIGEST |
> >                         NVMF_OPT_DATA_DIGEST | NVMF_OPT_NR_POLL_QUEUES |
> >                         NVMF_OPT_TOS,
> > +     .create_ctrl    = nvme_tcp_ofld_create_ctrl,
> >   };
> >
> >   static int __init nvme_tcp_ofld_init_module(void)
> >
> I wonder if we shouldn't take the approach from Martin Belanger, and
> introduce a new option 'host_iface' to select the interface to use.
> That is, _if_ the nvme-tcp offload driver would present itself as a
> network interface; one might argue that it would put too much
> restriction on the implementations.

We should add the new option 'host_iface' also to nvme-tcp-offload and
each vendor-specific driver shall register with this to nvme_tcp_ofld_ops.

> But if it does not present itself as a network interface, how do we
> address it? And if it does, wouldn't we be better off to specify the
> interface directly, and not try to imply the interface from the IP address?

Specifically for the Marvell qedn driver, we are pairing between the net-device
(qede) and the offload-device (qedn) on each port.

The usage for the user is:
Assign IP to the net-device (from any existing linux tool):

    ip addr add 100.100.0.101/24 dev p1p1

This IP will be used by both net-device (qede) and offload-device (qedn).
In order to connect from "sw" nvme-tcp through the net-device (qede):

    nvme connect -t tcp -s 4420 -a 100.100.0.100 -n testnqn

In order to connect from "offload" nvme-tcp through the offload-device (qedn):

    nvme connect -t tcp_offload -s 4420 -a 100.100.0.100 -n testnqn

A suggestion (by Hannes Reinecke), as a future enhancement, and in
order to simplify the usage, we suggest modifying nvme-cli with a new flag
that will determine if "-t tcp" should be the regular nvme-tcp (which will
be the default) or nvme-tcp-offload.
For exmaple:

    nvme connect -t tcp -s 4420 -a 100.100.0.100 -n testnqn -[new flag]

>
> Cheers,
>
> Hannes
> --
> Dr. Hannes Reinecke                Kernel Storage Architect
> hare at suse.de                              +49 911 74053 688
> SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
> HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer



More information about the Linux-nvme mailing list