[PATCH 1/2] iouring: one capable call per iouring instance

Keith Busch kbusch at meta.com
Mon Dec 4 09:53:41 PST 2023


From: Keith Busch <kbusch at kernel.org>

The uring_cmd operation is often used for privileged actions, so drivers
subscribing to this interface check capable() for each command. The
capable() function is not fast path friendly for many kernel configs,
and this can really harm performance. Stash the capable sys admin
attribute in the io_uring context and set a new issue_flag for the
uring_cmd interface.

Signed-off-by: Keith Busch <kbusch at kernel.org>
---
 include/linux/io_uring_types.h | 4 ++++
 io_uring/io_uring.c            | 1 +
 io_uring/uring_cmd.c           | 2 ++
 3 files changed, 7 insertions(+)

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index bebab36abce89..d64d6916753f0 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -36,6 +36,9 @@ enum io_uring_cmd_flags {
 	/* set when uring wants to cancel a previously issued command */
 	IO_URING_F_CANCEL		= (1 << 11),
 	IO_URING_F_COMPAT		= (1 << 12),
+
+	/* ring validated as CAP_SYS_ADMIN capable */
+	IO_URING_F_SYS_ADMIN		= (1 << 13),
 };
 
 struct io_wq_work_node {
@@ -240,6 +243,7 @@ struct io_ring_ctx {
 		unsigned int		poll_activated: 1;
 		unsigned int		drain_disabled: 1;
 		unsigned int		compat: 1;
+		unsigned int		sys_admin: 1;
 
 		struct task_struct	*submitter_task;
 		struct io_rings		*rings;
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 1d254f2c997de..4aa10b64f539e 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -3980,6 +3980,7 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
 		ctx->syscall_iopoll = 1;
 
 	ctx->compat = in_compat_syscall();
+	ctx->sys_admin = capable(CAP_SYS_ADMIN);
 	if (!ns_capable_noaudit(&init_user_ns, CAP_IPC_LOCK))
 		ctx->user = get_uid(current_user());
 
diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index 8a38b9f75d841..764f0e004aa00 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -164,6 +164,8 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
 		issue_flags |= IO_URING_F_CQE32;
 	if (ctx->compat)
 		issue_flags |= IO_URING_F_COMPAT;
+	if (ctx->sys_admin)
+		issue_flags |= IO_URING_F_SYS_ADMIN;
 	if (ctx->flags & IORING_SETUP_IOPOLL) {
 		if (!file->f_op->uring_cmd_iopoll)
 			return -EOPNOTSUPP;
-- 
2.34.1




More information about the Linux-nvme mailing list