[PATCH 1/1] Add 'Transport Interface' (triface) option. This can be used to specify the IP interface to use for the connection. The driver uses that to set SO_BINDTODEVICE on the socket before connecting.
Hannes Reinecke
hare at suse.de
Sat May 1 12:34:25 BST 2021
On 4/15/21 9:28 PM, Martin Belanger wrote:
> From: Martin Belanger <martin.belanger at dell.com>
>
Please fix up the subject and description.
> ---
> drivers/nvme/host/core.c | 5 +++++
> drivers/nvme/host/fabrics.c | 14 +++++++++++++
> drivers/nvme/host/fabrics.h | 6 +++++-
> drivers/nvme/host/tcp.c | 41 ++++++++++++++++++++++++++++++++++---
> 4 files changed, 62 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 288ac47ff5b4..91ae11a1ae26 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -3961,6 +3961,11 @@ static int nvme_class_uevent(struct device *dev, struct kobj_uevent_env *env)
>
> ret = add_uevent_var(env, "NVME_HOST_TRADDR=%s",
> opts->host_traddr ?: "none");
> + if (ret)
> + return ret;
> +
> + ret = add_uevent_var(env, "NVME_HOST_TRIFACE=%s",
> + opts->host_triface ?: "none");
> }
> return ret;
> }
Why not simply 'host_iface' ? 'triface' is a bit awkward.
> diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
> index 604ab0e5a2ad..f5d0d760b53b 100644
> --- a/drivers/nvme/host/fabrics.c
> +++ b/drivers/nvme/host/fabrics.c
> @@ -112,6 +112,9 @@ int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
> if (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR)
> len += scnprintf(buf + len, size - len, "%shost_traddr=%s",
> (len) ? "," : "", ctrl->opts->host_traddr);
> + if (ctrl->opts->mask & NVMF_OPT_HOST_TRIFACE)
> + len += scnprintf(buf + len, size - len, "%shost_triface=%s",
> + (len) ? "," : "", ctrl->opts->host_triface);
> len += scnprintf(buf + len, size - len, "\n");
>
> return len;
> @@ -604,6 +607,7 @@ static const match_table_t opt_tokens = {
> { NVMF_OPT_KATO, "keep_alive_tmo=%d" },
> { NVMF_OPT_HOSTNQN, "hostnqn=%s" },
> { NVMF_OPT_HOST_TRADDR, "host_traddr=%s" },
> + { NVMF_OPT_HOST_TRIFACE, "host_triface=%s" },
> { NVMF_OPT_HOST_ID, "hostid=%s" },
> { NVMF_OPT_DUP_CONNECT, "duplicate_connect" },
> { NVMF_OPT_DISABLE_SQFLOW, "disable_sqflow" },
> @@ -813,6 +817,15 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
> kfree(opts->host_traddr);
> opts->host_traddr = p;
> break;
> + case NVMF_OPT_HOST_TRIFACE:
> + p = match_strdup(args);
> + if (!p) {
> + ret = -ENOMEM;
> + goto out;
> + }
> + kfree(opts->host_triface);
> + opts->host_triface = p;
> + break;
> case NVMF_OPT_HOST_ID:
> p = match_strdup(args);
> if (!p) {
> @@ -997,6 +1010,7 @@ void nvmf_free_options(struct nvmf_ctrl_options *opts)
> kfree(opts->trsvcid);
> kfree(opts->subsysnqn);
> kfree(opts->host_traddr);
> + kfree(opts->host_triface);
> kfree(opts);
> }
> EXPORT_SYMBOL_GPL(nvmf_free_options);
> diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
> index 733010d2eafd..17c64ff4db8c 100644
> --- a/drivers/nvme/host/fabrics.h
> +++ b/drivers/nvme/host/fabrics.h
> @@ -59,6 +59,7 @@ enum {
> NVMF_OPT_NR_POLL_QUEUES = 1 << 18,
> NVMF_OPT_TOS = 1 << 19,
> NVMF_OPT_FAIL_FAST_TMO = 1 << 20,
> + NVMF_OPT_HOST_TRIFACE = 1 << 21,
> };
>
> /**
> @@ -76,7 +77,9 @@ enum {
> * @trsvcid: The transport-specific TRSVCID field for a port on the
> * subsystem which is adding a controller.
> * @host_traddr: A transport-specific field identifying the NVME host port
> - * to use for the connection to the controller.
> + * to use for the connection to the controller.
> + * @host_triface: A transport-specific field identifying the NVME host
> + * interface to use for the connection to the controller.
> * @queue_size: Number of IO queue elements.
> * @nr_io_queues: Number of controller IO queues that will be established.
> * @reconnect_delay: Time between two consecutive reconnect attempts.
> @@ -101,6 +104,7 @@ struct nvmf_ctrl_options {
> char *traddr;
> char *trsvcid;
> char *host_traddr;
> + char *host_triface;
> size_t queue_size;
> unsigned int nr_io_queues;
> unsigned int reconnect_delay;
> diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
> index 8e55d8bc0c50..28eb7f88b487 100644
> --- a/drivers/nvme/host/tcp.c
> +++ b/drivers/nvme/host/tcp.c
> @@ -1447,6 +1447,20 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
> }
> }
>
> + if (nctrl->opts->mask & NVMF_OPT_HOST_TRIFACE) {
> + char *iface = nctrl->opts->host_triface;
> + sockptr_t optval = KERNEL_SOCKPTR(iface);
> +
> + ret = sock_setsockopt(queue->sock, SOL_SOCKET, SO_BINDTODEVICE,
> + optval, strlen(iface));
> + if (ret) {
> + dev_err(nctrl->device,
> + "failed to bind to interface %s queue %d err %d\n",
> + iface, qid, ret);
> + goto err_sock;
> + }
> + }
> +
> queue->hdr_digest = nctrl->opts->hdr_digest;
> queue->data_digest = nctrl->opts->data_digest;
> if (queue->hdr_digest || queue->data_digest) {
Is this valid for all transports? I guess it would only work for 'tcp',
and maybe 'rdma' if one would be running ROCE.
Shouldn't we error out on other transports like 'fc' or 'loop'?
> @@ -2457,6 +2471,10 @@ nvme_tcp_existing_controller(struct nvmf_ctrl_options *opts)
> static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
> struct nvmf_ctrl_options *opts)
> {
> + const char *iface_key = "";
> + const char *iface_val = "";
> + const char *srce_key = "";
> + const char *srce_val = "";
> struct nvme_tcp_ctrl *ctrl;
> int ret;
>
> @@ -2502,6 +2520,22 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
> opts->host_traddr);
> goto out_free_ctrl;
> }
> + srce_key = ", src-addr ";
> + srce_val = opts->host_traddr;
> + }
> +
> + if (opts->mask & NVMF_OPT_HOST_TRIFACE) {
> + struct net_device *ndev;
> +
> + ndev = dev_get_by_name(&init_net, opts->host_triface);
> + if (!ndev) {
> + pr_err("invalid interface passed: %s\n",
> + opts->host_triface);
> + ret = -ENODEV;
> + goto out_free_ctrl;
> + }
> + iface_key = ", iface ";
> + iface_val = opts->host_triface;
> }
>
> if (!opts->duplicate_connect && nvme_tcp_existing_controller(opts)) {
Normally the options are just parts of the 'address' string; why didn't
you use that approach here?
> @@ -2530,8 +2564,9 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
> if (ret)
> goto out_uninit_ctrl;
>
> - dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp\n",
> - ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
> + dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp%s%s%s%s\n",
> + ctrl->ctrl.opts->subsysnqn, &ctrl->addr,
> + srce_key, srce_val, iface_key, iface_val);
>
> mutex_lock(&nvme_tcp_ctrl_mutex);
> list_add_tail(&ctrl->list, &nvme_tcp_ctrl_list);
> @@ -2560,7 +2595,7 @@ static struct nvmf_transport_ops nvme_tcp_transport = {
> NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO |
> NVMF_OPT_HDR_DIGEST | NVMF_OPT_DATA_DIGEST |
> NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES |
> - NVMF_OPT_TOS,
> + NVMF_OPT_TOS | NVMF_OPT_HOST_TRIFACE,
> .create_ctrl = nvme_tcp_create_ctrl,
> };
>
>
Cheers,
Hannes
--
Dr. Hannes Reinecke Kernel Storage Architect
hare at suse.de +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer
More information about the Linux-nvme
mailing list