[PATCH 09/11] nvme-tcp: implement host mptcp proto
Geliang Tang
geliang at kernel.org
Wed May 27 20:10:43 PDT 2026
From: Geliang Tang <tanggeliang at kylinos.cn>
An MPTCP-specific version of struct nvme_tcp_proto is implemented,
and it is assigned to ctrl->proto when the transport string is "mptcp".
The socket option setting logic is similar to the target side, except that
mptcp_sock_set_syncnt is newly defined for the host side.
These helpers set the values on all existing subflows of an MPTCP
connection, except for set_reuseaddr which only applies to the first
subflow. The values are then synchronized to other newly created
subflows in sync_socket_options().
A separate nvme_mptcp_ctrl_ops structure with .name = "mptcp" is defined
and used for MPTCP controllers.
"mptcp" is planned to be introduced as a new NVMe transport type into the
NVMe Base Specification in the future.
Currently, the Discovery Log does not yet recognize trtype=4 (MPTCP), and
will show "trtype: unrecognized" for such entries:
=====Discovery Log Entry 0======
trtype: unrecognized
adrfam: ipv4
subtype: current discovery subsystem
treq: not specified, sq flow control disable supported
portid: 23106
trsvcid: 23601
subnqn: nqn.2014-08.org.nvmexpress.discovery
traddr: 10.1.1.1
eflags: none
Cc: Hannes Reinecke <hare at suse.de>
Cc: John Meneghini <jmeneghi at redhat.com>
Cc: Randy Jennings <randyj at purestorage.com>
Cc: Nilay Shroff <nilay at linux.ibm.com>
Co-developed-by: zhenwei pi <zhenwei.pi at linux.dev>
Signed-off-by: zhenwei pi <zhenwei.pi at linux.dev>
Co-developed-by: Hui Zhu <zhuhui at kylinos.cn>
Signed-off-by: Hui Zhu <zhuhui at kylinos.cn>
Co-developed-by: Gang Yan <yangang at kylinos.cn>
Signed-off-by: Gang Yan <yangang at kylinos.cn>
Signed-off-by: Geliang Tang <tanggeliang at kylinos.cn>
---
drivers/nvme/host/tcp.c | 34 ++++++++++++++++++++++++++++++++++
include/net/mptcp.h | 11 +++++++++++
net/mptcp/sockopt.c | 30 +++++++++++++++++++++++++++++-
3 files changed, 74 insertions(+), 1 deletion(-)
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 305624d59c50..2388a8c443cc 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2895,6 +2895,24 @@ static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = {
.get_virt_boundary = nvmf_get_virt_boundary,
};
+#ifdef CONFIG_MPTCP
+static const struct nvme_ctrl_ops nvme_mptcp_ctrl_ops = {
+ .name = "mptcp",
+ .module = THIS_MODULE,
+ .flags = NVME_F_FABRICS | NVME_F_BLOCKING,
+ .reg_read32 = nvmf_reg_read32,
+ .reg_read64 = nvmf_reg_read64,
+ .reg_write32 = nvmf_reg_write32,
+ .subsystem_reset = nvmf_subsystem_reset,
+ .free_ctrl = nvme_tcp_free_ctrl,
+ .submit_async_event = nvme_tcp_submit_async_event,
+ .delete_ctrl = nvme_tcp_delete_ctrl,
+ .get_address = nvme_tcp_get_address,
+ .stop_ctrl = nvme_tcp_stop_ctrl,
+ .get_virt_boundary = nvmf_get_virt_boundary,
+};
+#endif
+
static bool
nvme_tcp_existing_controller(struct nvmf_ctrl_options *opts)
{
@@ -2923,6 +2941,18 @@ static const struct nvme_tcp_proto nvme_tcp_proto = {
};
+#ifdef CONFIG_MPTCP
+static const struct nvme_tcp_proto nvme_mptcp_proto = {
+ .protocol = IPPROTO_MPTCP,
+ .set_syncnt = mptcp_sock_set_syncnt,
+ .set_nodelay = mptcp_sock_set_nodelay,
+ .no_linger = mptcp_sock_no_linger,
+ .set_priority = mptcp_sock_set_priority,
+ .set_tos = __mptcp_sock_set_tos,
+ .ops = &nvme_mptcp_ctrl_ops,
+};
+#endif
+
static struct nvme_tcp_ctrl *nvme_tcp_alloc_ctrl(struct device *dev,
struct nvmf_ctrl_options *opts)
{
@@ -2989,6 +3019,10 @@ static struct nvme_tcp_ctrl *nvme_tcp_alloc_ctrl(struct device *dev,
if (!strcmp(ctrl->ctrl.opts->transport, "tcp")) {
ctrl->proto = &nvme_tcp_proto;
+#ifdef CONFIG_MPTCP
+ } else if (!strcmp(ctrl->ctrl.opts->transport, "mptcp")) {
+ ctrl->proto = &nvme_mptcp_proto;
+#endif
} else {
ret = -EINVAL;
goto out_free_ctrl;
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index b8ab214a7890..160267e35b13 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -238,11 +238,15 @@ void mptcp_sock_no_linger(struct sock *sk);
void mptcp_sock_set_priority(struct sock *sk, u32 priority);
+void __mptcp_sock_set_tos(struct sock *sk, int val);
+
void mptcp_sock_set_tos(struct sock *sk);
void mptcp_sock_set_reuseaddr(struct sock *sk);
void mptcp_sock_set_nodelay(struct sock *sk);
+
+int mptcp_sock_set_syncnt(struct sock *sk, int val);
#else
static inline void mptcp_init(void)
@@ -334,11 +338,18 @@ static inline void mptcp_sock_no_linger(struct sock *sk) { }
static inline void mptcp_sock_set_priority(struct sock *sk, u32 priority) { }
+static inline void __mptcp_sock_set_tos(struct sock *sk, int val) { }
+
static inline void mptcp_sock_set_tos(struct sock *sk) { }
static inline void mptcp_sock_set_reuseaddr(struct sock *sk) { }
static inline void mptcp_sock_set_nodelay(struct sock *sk) { }
+
+static inline int mptcp_sock_set_syncnt(struct sock *sk, int val)
+{
+ return 0;
+}
#endif /* CONFIG_MPTCP */
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 0adbbe568f6e..7857dac62afc 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -1598,6 +1598,8 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
WRITE_ONCE(inet_sk(ssk)->local_port_range, READ_ONCE(inet_sk(sk)->local_port_range));
ssk->sk_reuse = sk->sk_reuse;
+ if (inet_csk(sk)->icsk_syn_retries > 0)
+ tcp_sock_set_syncnt(ssk, inet_csk(sk)->icsk_syn_retries);
}
void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk)
@@ -1709,7 +1711,7 @@ void mptcp_sock_set_priority(struct sock *sk, u32 priority)
}
EXPORT_SYMBOL(mptcp_sock_set_priority);
-static void __mptcp_sock_set_tos(struct sock *sk, int val)
+void __mptcp_sock_set_tos(struct sock *sk, int val)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_subflow_context *subflow;
@@ -1728,6 +1730,7 @@ static void __mptcp_sock_set_tos(struct sock *sk, int val)
}
release_sock(sk);
}
+EXPORT_SYMBOL(__mptcp_sock_set_tos);
void mptcp_sock_set_tos(struct sock *sk)
{
@@ -1783,3 +1786,28 @@ void mptcp_sock_set_nodelay(struct sock *sk)
release_sock(sk);
}
EXPORT_SYMBOL(mptcp_sock_set_nodelay);
+
+int mptcp_sock_set_syncnt(struct sock *sk, int val)
+{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+ struct mptcp_subflow_context *subflow;
+ struct sock *ssk;
+
+ if (val < 1 || val > MAX_TCP_SYNCNT)
+ return -EINVAL;
+
+ lock_sock(sk);
+ sockopt_seq_inc(msk);
+ inet_csk(sk)->icsk_syn_retries = val;
+ mptcp_for_each_subflow(msk, subflow) {
+ ssk = mptcp_subflow_tcp_sock(subflow);
+ if (ssk) {
+ lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
+ tcp_sock_set_syncnt(ssk, val);
+ release_sock(ssk);
+ }
+ }
+ release_sock(sk);
+ return 0;
+}
+EXPORT_SYMBOL(mptcp_sock_set_syncnt);
--
2.53.0
More information about the Linux-nvme
mailing list