[PATCH 2/2] nvme: fine-granular CAP_SYS_ADMIN for nvme io/admin commands
Kanchan Joshi
joshi.k at samsung.com
Wed Oct 18 11:30:03 PDT 2023
Passthrough commands are guarded by heavy-handed CAP_SYS_ADMIN checks
that neglects the file-mode completely.
Add a fine-granular policy that considers file-mode and various other
parameters for any approval/denial. Also enable few admin commands that
are necssary to get the information required to form the i/o commands.
The policy is implemented in a new helper 'nvme_cmd_allowed'.
I/O commands needing data-transfers are sent only via SGL.
Signed-off-by: Kanchan Joshi <joshi.k at samsung.com>
---
drivers/nvme/host/ioctl.c | 162 ++++++++++++++++++++++++++++++--------
include/linux/nvme.h | 2 +
2 files changed, 130 insertions(+), 34 deletions(-)
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index eb2ef3e14961..3d0d91633946 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -10,8 +10,80 @@
enum {
NVME_IOCTL_VEC = (1 << 0),
+ NVME_IOCTL_PARTITION = (1 << 1),
};
+static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c,
+ unsigned int flags, bool open_for_write)
+{
+ u32 effects;
+
+ if (capable(CAP_SYS_ADMIN))
+ return true;
+
+ /*
+ * Do not allow unprivileged passthrough on partitions, as that allows an
+ * escape from the containment of the partition.
+ */
+ if (flags & NVME_IOCTL_PARTITION)
+ return false;
+
+ /*
+ * Do not allow unprivileged processes to send vendor specific or fabrics
+ * commands as we can't be sure about their effects.
+ */
+ if (c->common.opcode >= nvme_cmd_vendor_start ||
+ c->common.opcode == nvme_fabrics_command)
+ return false;
+
+ /*
+ * Do not allow unprivileged passthrough of admin commands except
+ * for a subset of identify commands that contain information required
+ * to form proper I/O commands in userspace and do not expose any
+ * potentially sensitive information.
+ */
+ if (!ns) {
+ if (c->common.opcode == nvme_admin_identify) {
+ switch (c->identify.cns) {
+ case NVME_ID_CNS_NS:
+ case NVME_ID_CNS_CS_NS:
+ case NVME_ID_CNS_NS_CS_INDEP:
+ case NVME_ID_CNS_CS_CTRL:
+ case NVME_ID_CNS_CTRL:
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /*
+ * Check if the controller provides a Commands Supported and Effects log
+ * and marks this command as supported. If not reject unprivileged
+ * passthrough.
+ */
+ effects = nvme_command_effects(ns->ctrl, ns, c->common.opcode);
+ if (!(effects & NVME_CMD_EFFECTS_CSUPP))
+ return false;
+
+ /*
+ * Don't allow passthrough for command that have intrusive (or unknown)
+ * effects.
+ */
+ if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC |
+ NVME_CMD_EFFECTS_UUID_SEL |
+ NVME_CMD_EFFECTS_SCOPE_MASK))
+ return false;
+
+ /*
+ * Only allow I/O commands that transfer data to the controller or that
+ * change the logical block contents if the file descriptor is open for
+ * writing.
+ */
+ if (nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC))
+ return open_for_write;
+ return true;
+}
+
/*
* Convert integer values from ioctl structures to user pointers, silently
* ignoring the upper bits in the compat case to match behaviour of 32-bit
@@ -134,6 +206,9 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer,
}
*metap = meta;
}
+ /* Data transfer for unprivileged passthrough is only via SGL */
+ if (bdev && !capable(CAP_SYS_ADMIN))
+ nvme_req(req)->flags |= NVME_REQ_FORCE_SGL;
return ret;
@@ -267,7 +342,8 @@ static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl,
}
static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
- struct nvme_passthru_cmd __user *ucmd)
+ struct nvme_passthru_cmd __user *ucmd, unsigned int flags,
+ bool open_for_write)
{
struct nvme_passthru_cmd cmd;
struct nvme_command c;
@@ -275,8 +351,6 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
u64 result;
int status;
- if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
return -EFAULT;
if (cmd.flags)
@@ -297,6 +371,9 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
c.common.cdw14 = cpu_to_le32(cmd.cdw14);
c.common.cdw15 = cpu_to_le32(cmd.cdw15);
+ if (!nvme_cmd_allowed(ns, &c, 0, open_for_write))
+ return -EACCES;
+
if (cmd.timeout_ms)
timeout = msecs_to_jiffies(cmd.timeout_ms);
@@ -313,16 +390,14 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
}
static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
- struct nvme_passthru_cmd64 __user *ucmd,
- unsigned int flags)
+ struct nvme_passthru_cmd64 __user *ucmd, unsigned int flags,
+ bool open_for_write)
{
struct nvme_passthru_cmd64 cmd;
struct nvme_command c;
unsigned timeout = 0;
int status;
- if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
return -EFAULT;
if (cmd.flags)
@@ -343,6 +418,9 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
c.common.cdw14 = cpu_to_le32(cmd.cdw14);
c.common.cdw15 = cpu_to_le32(cmd.cdw15);
+ if (!nvme_cmd_allowed(ns, &c, flags, open_for_write))
+ return -EACCES;
+
if (cmd.timeout_ms)
timeout = msecs_to_jiffies(cmd.timeout_ms);
@@ -492,9 +570,6 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
void *meta = NULL;
int ret;
- if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
-
c.common.opcode = READ_ONCE(cmd->opcode);
c.common.flags = READ_ONCE(cmd->flags);
if (c.common.flags)
@@ -516,6 +591,9 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14));
c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15));
+ if (!nvme_cmd_allowed(ns, &c, 0, ioucmd->file->f_mode & FMODE_WRITE))
+ return -EACCES;
+
d.metadata = READ_ONCE(cmd->metadata);
d.addr = READ_ONCE(cmd->addr);
d.data_len = READ_ONCE(cmd->data_len);
@@ -572,13 +650,13 @@ static bool is_ctrl_ioctl(unsigned int cmd)
}
static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd,
- void __user *argp)
+ void __user *argp, bool open_for_write)
{
switch (cmd) {
case NVME_IOCTL_ADMIN_CMD:
- return nvme_user_cmd(ctrl, NULL, argp);
+ return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write);
case NVME_IOCTL_ADMIN64_CMD:
- return nvme_user_cmd64(ctrl, NULL, argp, 0);
+ return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write);
default:
return sed_ioctl(ctrl->opal_dev, cmd, argp);
}
@@ -603,16 +681,14 @@ struct nvme_user_io32 {
#endif /* COMPAT_FOR_U64_ALIGNMENT */
static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd,
- void __user *argp)
+ void __user *argp, unsigned int flags, bool open_for_write)
{
- unsigned int flags = 0;
-
switch (cmd) {
case NVME_IOCTL_ID:
force_successful_syscall_return();
return ns->head->ns_id;
case NVME_IOCTL_IO_CMD:
- return nvme_user_cmd(ns->ctrl, ns, argp);
+ return nvme_user_cmd(ns->ctrl, ns, argp, flags, open_for_write);
/*
* struct nvme_user_io can have different padding on some 32-bit ABIs.
* Just accept the compat version as all fields that are used are the
@@ -627,32 +703,39 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd,
flags |= NVME_IOCTL_VEC;
fallthrough;
case NVME_IOCTL_IO64_CMD:
- return nvme_user_cmd64(ns->ctrl, ns, argp, flags);
+ return nvme_user_cmd64(ns->ctrl, ns, argp, flags,
+ open_for_write);
default:
return -ENOTTY;
}
}
-int nvme_ioctl(struct block_device *bdev, fmode_t mode,
+int nvme_ioctl(struct block_device *bdev, blk_mode_t mode,
unsigned int cmd, unsigned long arg)
{
struct nvme_ns *ns = bdev->bd_disk->private_data;
+ bool open_for_write = mode & BLK_OPEN_WRITE;
void __user *argp = (void __user *)arg;
+ unsigned int flags = 0;
+
+ if (bdev_is_partition(bdev))
+ flags |= NVME_IOCTL_PARTITION;
if (is_ctrl_ioctl(cmd))
- return nvme_ctrl_ioctl(ns->ctrl, cmd, argp);
- return nvme_ns_ioctl(ns, cmd, argp);
+ return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write);
+ return nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write);
}
long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
struct nvme_ns *ns =
container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev);
+ bool open_for_write = file->f_mode & FMODE_WRITE;
void __user *argp = (void __user *)arg;
if (is_ctrl_ioctl(cmd))
- return nvme_ctrl_ioctl(ns->ctrl, cmd, argp);
- return nvme_ns_ioctl(ns, cmd, argp);
+ return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write);
+ return nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write);
}
static int nvme_uring_cmd_checks(unsigned int issue_flags)
@@ -716,7 +799,8 @@ int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
}
#ifdef CONFIG_NVME_MULTIPATH
static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
- void __user *argp, struct nvme_ns_head *head, int srcu_idx)
+ void __user *argp, struct nvme_ns_head *head, int srcu_idx,
+ bool open_for_write)
__releases(&head->srcu)
{
struct nvme_ctrl *ctrl = ns->ctrl;
@@ -724,7 +808,7 @@ static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
nvme_get_ctrl(ns->ctrl);
srcu_read_unlock(&head->srcu, srcu_idx);
- ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp);
+ ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write);
nvme_put_ctrl(ctrl);
return ret;
@@ -734,9 +818,14 @@ int nvme_ns_head_ioctl(struct block_device *bdev, blk_mode_t mode,
unsigned int cmd, unsigned long arg)
{
struct nvme_ns_head *head = bdev->bd_disk->private_data;
+ bool open_for_write = mode & BLK_OPEN_WRITE;
void __user *argp = (void __user *)arg;
struct nvme_ns *ns;
int srcu_idx, ret = -EWOULDBLOCK;
+ unsigned int flags = 0;
+
+ if (bdev_is_partition(bdev))
+ flags |= NVME_IOCTL_PARTITION;
srcu_idx = srcu_read_lock(&head->srcu);
ns = nvme_find_path(head);
@@ -749,9 +838,10 @@ int nvme_ns_head_ioctl(struct block_device *bdev, blk_mode_t mode,
* deadlock when deleting namespaces using the passthrough interface.
*/
if (is_ctrl_ioctl(cmd))
- return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx);
+ return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx,
+ open_for_write);
- ret = nvme_ns_ioctl(ns, cmd, argp);
+ ret = nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write);
out_unlock:
srcu_read_unlock(&head->srcu, srcu_idx);
return ret;
@@ -760,6 +850,7 @@ int nvme_ns_head_ioctl(struct block_device *bdev, blk_mode_t mode,
long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
+ bool open_for_write = file->f_mode & FMODE_WRITE;
struct cdev *cdev = file_inode(file)->i_cdev;
struct nvme_ns_head *head =
container_of(cdev, struct nvme_ns_head, cdev);
@@ -773,9 +864,10 @@ long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
goto out_unlock;
if (is_ctrl_ioctl(cmd))
- return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx);
+ return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx,
+ open_for_write);
- ret = nvme_ns_ioctl(ns, cmd, argp);
+ ret = nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write);
out_unlock:
srcu_read_unlock(&head->srcu, srcu_idx);
return ret;
@@ -824,7 +916,8 @@ int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags)
return ret;
}
-static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
+static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp,
+ bool open_for_write)
{
struct nvme_ns *ns;
int ret;
@@ -848,7 +941,7 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
kref_get(&ns->kref);
up_read(&ctrl->namespaces_rwsem);
- ret = nvme_user_cmd(ctrl, ns, argp);
+ ret = nvme_user_cmd(ctrl, ns, argp, 0, open_for_write);
nvme_put_ns(ns);
return ret;
@@ -860,16 +953,17 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
long nvme_dev_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
+ bool open_for_write = file->f_mode & FMODE_WRITE;
struct nvme_ctrl *ctrl = file->private_data;
void __user *argp = (void __user *)arg;
switch (cmd) {
case NVME_IOCTL_ADMIN_CMD:
- return nvme_user_cmd(ctrl, NULL, argp);
+ return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write);
case NVME_IOCTL_ADMIN64_CMD:
- return nvme_user_cmd64(ctrl, NULL, argp, 0);
+ return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write);
case NVME_IOCTL_IO_CMD:
- return nvme_dev_user_cmd(ctrl, argp);
+ return nvme_dev_user_cmd(ctrl, argp, open_for_write);
case NVME_IOCTL_RESET:
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index df3e2c619448..26dd3f859d9d 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -642,6 +642,7 @@ enum {
NVME_CMD_EFFECTS_CCC = 1 << 4,
NVME_CMD_EFFECTS_CSE_MASK = GENMASK(18, 16),
NVME_CMD_EFFECTS_UUID_SEL = 1 << 19,
+ NVME_CMD_EFFECTS_SCOPE_MASK = GENMASK(31, 20),
};
struct nvme_effects_log {
@@ -833,6 +834,7 @@ enum nvme_opcode {
nvme_cmd_zone_mgmt_send = 0x79,
nvme_cmd_zone_mgmt_recv = 0x7a,
nvme_cmd_zone_append = 0x7d,
+ nvme_cmd_vendor_start = 0x80,
};
#define nvme_opcode_name(opcode) { opcode, #opcode }
--
2.25.1
More information about the Linux-nvme
mailing list