[PATCH v2] nvme: fine-granular CAP_SYS_ADMIN for nvme io commands
Kanchan Joshi
joshi.k at samsung.com
Tue Sep 27 11:36:20 PDT 2022
Currently both io and admin commands are kept under a
coarse-granular CAP_SYS_ADMIN check, without any regard to file open
mode.
$ ls -l /dev/ng*
crw-rw-rw- 1 root root 242, 0 Sep 9 19:20 /dev/ng0n1
crw------- 1 root root 242, 1 Sep 9 19:20 /dev/ng0n2
In the example above, ng0n1 appears as if it may allow unprivileged
read/write operation but it does not and behaves same as ng0n2.
This patch implements a shift from CAP_SYS_ADMIN to more fine-granular
control for io-commands.
If CAP_SYS_ADMIN is present, nothing else is checked as before.
Otherwise, following rules are enforced
- any admin-cmd is not allowed
- vendor-specific and fabric commmand are not allowed
- io-commands that can write are allowed if matching FMODE_WRITE
permission is present
- io-commands that read are allowed
Add a helper nvme_cmd_allowed that implements above policy.
Change all the callers of CAP_SYS_ADMIN to go through nvme_cmd_allowed
for any decision making.
Since file open mode is taken into consideration for any
approval/denial, change at various places to keep file-mode information
handy.
Signed-off-by: Kanchan Joshi <joshi.k at samsung.com>
---
Changes since v1:
- Move nvme_cmd_allowed check at a place that allows using nvme_is_write
helper (hch)
- Keep everything into single patch (chaitanya, hch)
- Comments cleanup (hch, chaitanya)
- Part of cover-letter moved to commit-description
drivers/nvme/host/ioctl.c | 97 ++++++++++++++++++++++++++-------------
include/linux/nvme.h | 1 +
2 files changed, 66 insertions(+), 32 deletions(-)
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 548aca8b5b9f..ebe04e977baa 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -20,6 +20,29 @@ static void __user *nvme_to_user_ptr(uintptr_t ptrval)
return (void __user *)ptrval;
}
+bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, fmode_t mode)
+{
+ u8 opcode = c->common.opcode;
+
+ if (capable(CAP_SYS_ADMIN))
+ return true;
+
+ /* admin commands are not allowed */
+ if (ns == NULL)
+ return false;
+
+ /* exclude vendor-specific io and fabrics commands */
+ if (opcode >= nvme_cmd_vendor_start || opcode == nvme_fabrics_command)
+ return false;
+
+ /* allow write cmds only if matching FMODE is present */
+ if (nvme_is_write(c))
+ return mode & FMODE_WRITE;
+
+ /* allow read cmds as the device permission allows access */
+ return true;
+}
+
static void *nvme_add_user_metadata(struct bio *bio, void __user *ubuf,
unsigned len, u32 seed, bool write)
{
@@ -240,7 +263,7 @@ static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl,
}
static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
- struct nvme_passthru_cmd __user *ucmd)
+ struct nvme_passthru_cmd __user *ucmd, fmode_t mode)
{
struct nvme_passthru_cmd cmd;
struct nvme_command c;
@@ -248,8 +271,6 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
u64 result;
int status;
- if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
return -EFAULT;
if (cmd.flags)
@@ -259,6 +280,10 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
memset(&c, 0, sizeof(c));
c.common.opcode = cmd.opcode;
+
+ if (!nvme_cmd_allowed(ns, &c, mode))
+ return -EACCES;
+
c.common.flags = cmd.flags;
c.common.nsid = cpu_to_le32(cmd.nsid);
c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
@@ -287,15 +312,14 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
}
static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
- struct nvme_passthru_cmd64 __user *ucmd, bool vec)
+ struct nvme_passthru_cmd64 __user *ucmd, bool vec,
+ fmode_t mode)
{
struct nvme_passthru_cmd64 cmd;
struct nvme_command c;
unsigned timeout = 0;
int status;
- if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
return -EFAULT;
if (cmd.flags)
@@ -305,6 +329,10 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
memset(&c, 0, sizeof(c));
c.common.opcode = cmd.opcode;
+
+ if (!nvme_cmd_allowed(ns, &c, mode))
+ return -EACCES;
+
c.common.flags = cmd.flags;
c.common.nsid = cpu_to_le32(cmd.nsid);
c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
@@ -419,14 +447,14 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
blk_mq_req_flags_t blk_flags = 0;
void *meta = NULL;
- if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
-
c.common.opcode = READ_ONCE(cmd->opcode);
c.common.flags = READ_ONCE(cmd->flags);
if (c.common.flags)
return -EINVAL;
+ if (!nvme_cmd_allowed(ns, &c, ioucmd->file->f_mode))
+ return -EACCES;
+
c.common.command_id = 0;
c.common.nsid = cpu_to_le32(cmd->nsid);
if (!nvme_validate_passthru_nsid(ctrl, ns, le32_to_cpu(c.common.nsid)))
@@ -498,13 +526,13 @@ static bool is_ctrl_ioctl(unsigned int cmd)
}
static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd,
- void __user *argp)
+ void __user *argp, fmode_t mode)
{
switch (cmd) {
case NVME_IOCTL_ADMIN_CMD:
- return nvme_user_cmd(ctrl, NULL, argp);
+ return nvme_user_cmd(ctrl, NULL, argp, mode);
case NVME_IOCTL_ADMIN64_CMD:
- return nvme_user_cmd64(ctrl, NULL, argp, false);
+ return nvme_user_cmd64(ctrl, NULL, argp, false, mode);
default:
return sed_ioctl(ctrl->opal_dev, cmd, argp);
}
@@ -529,14 +557,14 @@ struct nvme_user_io32 {
#endif /* COMPAT_FOR_U64_ALIGNMENT */
static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd,
- void __user *argp)
+ void __user *argp, fmode_t mode)
{
switch (cmd) {
case NVME_IOCTL_ID:
force_successful_syscall_return();
return ns->head->ns_id;
case NVME_IOCTL_IO_CMD:
- return nvme_user_cmd(ns->ctrl, ns, argp);
+ return nvme_user_cmd(ns->ctrl, ns, argp, mode);
/*
* struct nvme_user_io can have different padding on some 32-bit ABIs.
* Just accept the compat version as all fields that are used are the
@@ -548,19 +576,20 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd,
case NVME_IOCTL_SUBMIT_IO:
return nvme_submit_io(ns, argp);
case NVME_IOCTL_IO64_CMD:
- return nvme_user_cmd64(ns->ctrl, ns, argp, false);
+ return nvme_user_cmd64(ns->ctrl, ns, argp, false, mode);
case NVME_IOCTL_IO64_CMD_VEC:
- return nvme_user_cmd64(ns->ctrl, ns, argp, true);
+ return nvme_user_cmd64(ns->ctrl, ns, argp, true, mode);
default:
return -ENOTTY;
}
}
-static int __nvme_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *arg)
+static int __nvme_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *arg,
+ fmode_t mode)
{
if (is_ctrl_ioctl(cmd))
- return nvme_ctrl_ioctl(ns->ctrl, cmd, arg);
- return nvme_ns_ioctl(ns, cmd, arg);
+ return nvme_ctrl_ioctl(ns->ctrl, cmd, arg, mode);
+ return nvme_ns_ioctl(ns, cmd, arg, mode);
}
int nvme_ioctl(struct block_device *bdev, fmode_t mode,
@@ -568,7 +597,7 @@ int nvme_ioctl(struct block_device *bdev, fmode_t mode,
{
struct nvme_ns *ns = bdev->bd_disk->private_data;
- return __nvme_ioctl(ns, cmd, (void __user *)arg);
+ return __nvme_ioctl(ns, cmd, (void __user *)arg, mode);
}
long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
@@ -576,7 +605,7 @@ long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
struct nvme_ns *ns =
container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev);
- return __nvme_ioctl(ns, cmd, (void __user *)arg);
+ return __nvme_ioctl(ns, cmd, (void __user *)arg, file->f_mode);
}
static int nvme_uring_cmd_checks(unsigned int issue_flags)
@@ -644,7 +673,8 @@ int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
}
#ifdef CONFIG_NVME_MULTIPATH
static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
- void __user *argp, struct nvme_ns_head *head, int srcu_idx)
+ void __user *argp, struct nvme_ns_head *head, int srcu_idx,
+ fmode_t mode)
__releases(&head->srcu)
{
struct nvme_ctrl *ctrl = ns->ctrl;
@@ -652,7 +682,7 @@ static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
nvme_get_ctrl(ns->ctrl);
srcu_read_unlock(&head->srcu, srcu_idx);
- ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp);
+ ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp, mode);
nvme_put_ctrl(ctrl);
return ret;
@@ -677,9 +707,10 @@ int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode,
* deadlock when deleting namespaces using the passthrough interface.
*/
if (is_ctrl_ioctl(cmd))
- return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx);
+ return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx,
+ mode);
- ret = nvme_ns_ioctl(ns, cmd, argp);
+ ret = nvme_ns_ioctl(ns, cmd, argp, mode);
out_unlock:
srcu_read_unlock(&head->srcu, srcu_idx);
return ret;
@@ -701,9 +732,10 @@ long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
goto out_unlock;
if (is_ctrl_ioctl(cmd))
- return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx);
+ return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx,
+ file->f_mode);
- ret = nvme_ns_ioctl(ns, cmd, argp);
+ ret = nvme_ns_ioctl(ns, cmd, argp, file->f_mode);
out_unlock:
srcu_read_unlock(&head->srcu, srcu_idx);
return ret;
@@ -777,7 +809,8 @@ int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags)
return ret;
}
-static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
+static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp,
+ fmode_t mode)
{
struct nvme_ns *ns;
int ret;
@@ -801,7 +834,7 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
kref_get(&ns->kref);
up_read(&ctrl->namespaces_rwsem);
- ret = nvme_user_cmd(ctrl, ns, argp);
+ ret = nvme_user_cmd(ctrl, ns, argp, mode);
nvme_put_ns(ns);
return ret;
@@ -818,11 +851,11 @@ long nvme_dev_ioctl(struct file *file, unsigned int cmd,
switch (cmd) {
case NVME_IOCTL_ADMIN_CMD:
- return nvme_user_cmd(ctrl, NULL, argp);
+ return nvme_user_cmd(ctrl, NULL, argp, file->f_mode);
case NVME_IOCTL_ADMIN64_CMD:
- return nvme_user_cmd64(ctrl, NULL, argp, false);
+ return nvme_user_cmd64(ctrl, NULL, argp, false, file->f_mode);
case NVME_IOCTL_IO_CMD:
- return nvme_dev_user_cmd(ctrl, argp);
+ return nvme_dev_user_cmd(ctrl, argp, file->f_mode);
case NVME_IOCTL_RESET:
dev_warn(ctrl->device, "resetting controller\n");
return nvme_reset_ctrl_sync(ctrl);
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index ae53d74f3696..8396eb7ecb68 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -797,6 +797,7 @@ enum nvme_opcode {
nvme_cmd_zone_mgmt_send = 0x79,
nvme_cmd_zone_mgmt_recv = 0x7a,
nvme_cmd_zone_append = 0x7d,
+ nvme_cmd_vendor_start = 0x80,
};
#define nvme_opcode_name(opcode) { opcode, #opcode }
--
2.25.1
More information about the Linux-nvme
mailing list