[PATCH] nvmet: allow associating port to a cgroup via configfs
Ofir Gal
ofir.gal at volumez.com
Tue Jun 27 03:02:15 PDT 2023
Currently there is no way to throttle nvme targets with cgroup v2.
The IOs that the nvme target submits lack associating to a cgroup,
which makes them act as root cgroup. The root cgroup can't be throttled
with the cgroup v2 mechanism.
Signed-off-by: Ofir Gal <ofir.gal at volumez.com>
---
drivers/nvme/target/configfs.c | 77 +++++++++++++++++++++++++++++++
drivers/nvme/target/core.c | 3 ++
drivers/nvme/target/io-cmd-bdev.c | 13 ++++++
drivers/nvme/target/nvmet.h | 3 ++
include/linux/cgroup.h | 5 ++
kernel/cgroup/cgroup-internal.h | 5 --
6 files changed, 101 insertions(+), 5 deletions(-)
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index 907143870da5..2e8f93a07498 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -12,6 +12,7 @@
#include <linux/ctype.h>
#include <linux/pci.h>
#include <linux/pci-p2pdma.h>
+#include <linux/cgroup.h>
#ifdef CONFIG_NVME_TARGET_AUTH
#include <linux/nvme-auth.h>
#endif
@@ -281,6 +282,81 @@ static ssize_t nvmet_param_pi_enable_store(struct config_item *item,
CONFIGFS_ATTR(nvmet_, param_pi_enable);
#endif
+static ssize_t nvmet_param_associated_cgroup_show(struct config_item *item,
+ char *page)
+{
+ struct nvmet_port *port = to_nvmet_port(item);
+ ssize_t len = 0;
+ ssize_t retval;
+ char *suffix;
+
+ /* No cgroup has been set means the IOs are assoicated to the root cgroup */
+ if (!port->cgrp)
+ goto root_cgroup;
+
+ retval = cgroup_path_ns(port->cgrp, page, PAGE_SIZE,
+ current->nsproxy->cgroup_ns);
+ if (retval >= PATH_MAX || retval >= PAGE_SIZE)
+ return -ENAMETOOLONG;
+
+ /* No cgroup found means the IOs are assoicated to the root cgroup */
+ if (retval < 0)
+ goto root_cgroup;
+
+ len += retval;
+
+ suffix = cgroup_is_dead(port->cgrp) ? " (deleted)\n" : "\n";
+ len += snprintf(page + len, PAGE_SIZE - len, suffix);
+
+ return len;
+
+root_cgroup:
+ return snprintf(page, PAGE_SIZE, "/\n");
+}
+
+static ssize_t nvmet_param_associated_cgroup_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nvmet_port *port = to_nvmet_port(item);
+ struct cgroup_subsys_state *blkcg;
+ ssize_t retval = -EINVAL;
+ struct cgroup *cgrp;
+ char *path;
+ int len;
+
+ len = strcspn(page, "\n");
+ if (!len)
+ return -EINVAL;
+
+ path = kmemdup_nul(page, len, GFP_KERNEL);
+ if (!path)
+ return -ENOMEM;
+
+ cgrp = cgroup_get_from_path(path);
+ kfree(path);
+ if (IS_ERR(cgrp))
+ return -ENOENT;
+
+ blkcg = cgroup_get_e_css(cgrp, &io_cgrp_subsys);
+ if (!blkcg)
+ goto out_put_cgroup;
+
+ /* Put old cgroup */
+ if (port->cgrp)
+ cgroup_put(port->cgrp);
+
+ port->cgrp = cgrp;
+ port->blkcg = blkcg;
+
+ return count;
+
+out_put_cgroup:
+ cgroup_put(cgrp);
+ return retval;
+}
+
+CONFIGFS_ATTR(nvmet_, param_associated_cgroup);
+
static ssize_t nvmet_addr_trtype_show(struct config_item *item,
char *page)
{
@@ -1742,6 +1818,7 @@ static struct configfs_attribute *nvmet_port_attrs[] = {
&nvmet_attr_addr_trsvcid,
&nvmet_attr_addr_trtype,
&nvmet_attr_param_inline_data_size,
+ &nvmet_attr_param_associated_cgroup,
#ifdef CONFIG_BLK_DEV_INTEGRITY
&nvmet_attr_param_pi_enable,
#endif
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 3935165048e7..b63996b61c6d 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -376,6 +376,9 @@ void nvmet_disable_port(struct nvmet_port *port)
port->enabled = false;
port->tr_ops = NULL;
+ if (port->cgrp)
+ cgroup_put(port->cgrp);
+
ops = nvmet_transports[port->disc_addr.trtype];
ops->remove_port(port);
module_put(ops->owner);
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index c2d6cea0236b..eb63a071131d 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -8,6 +8,8 @@
#include <linux/blk-integrity.h>
#include <linux/memremap.h>
#include <linux/module.h>
+#include <linux/cgroup.h>
+#include <linux/blk-cgroup.h>
#include "nvmet.h"
void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id)
@@ -285,6 +287,8 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
bio->bi_iter.bi_sector = sector;
bio->bi_private = req;
bio->bi_end_io = nvmet_bio_done;
+ if (req->port->blkcg)
+ bio_associate_blkg_from_css(bio, req->port->blkcg);
blk_start_plug(&plug);
if (req->metadata_len)
@@ -308,6 +312,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt),
opf, GFP_KERNEL);
bio->bi_iter.bi_sector = sector;
+ bio_clone_blkg_association(bio, prev);
bio_chain(bio, prev);
submit_bio(prev);
@@ -345,6 +350,8 @@ static void nvmet_bdev_execute_flush(struct nvmet_req *req)
ARRAY_SIZE(req->inline_bvec), REQ_OP_WRITE | REQ_PREFLUSH);
bio->bi_private = req;
bio->bi_end_io = nvmet_bio_done;
+ if (req->port->blkcg)
+ bio_associate_blkg_from_css(bio, req->port->blkcg);
submit_bio(bio);
}
@@ -397,6 +404,9 @@ static void nvmet_bdev_execute_discard(struct nvmet_req *req)
if (bio) {
bio->bi_private = req;
bio->bi_end_io = nvmet_bio_done;
+ if (req->port->blkcg)
+ bio_associate_blkg_from_css(bio, req->port->blkcg);
+
if (status)
bio_io_error(bio);
else
@@ -444,6 +454,9 @@ static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req)
if (bio) {
bio->bi_private = req;
bio->bi_end_io = nvmet_bio_done;
+ if (req->port->blkcg)
+ bio_associate_blkg_from_css(bio, req->port->blkcg);
+
submit_bio(bio);
} else {
nvmet_req_complete(req, errno_to_nvme_status(req, ret));
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index dc60a22646f7..3e5c9737d07e 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -20,6 +20,7 @@
#include <linux/blkdev.h>
#include <linux/radix-tree.h>
#include <linux/t10-pi.h>
+#include <linux/cgroup.h>
#define NVMET_DEFAULT_VS NVME_VS(1, 3, 0)
@@ -163,6 +164,8 @@ struct nvmet_port {
int inline_data_size;
const struct nvmet_fabrics_ops *tr_ops;
bool pi_enable;
+ struct cgroup *cgrp;
+ struct cgroup_subsys_state *blkcg;
};
static inline struct nvmet_port *to_nvmet_port(struct config_item *item)
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 885f5395fcd0..47e2a7cdc31e 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -562,6 +562,11 @@ static inline bool cgroup_is_populated(struct cgroup *cgrp)
cgrp->nr_populated_threaded_children;
}
+static inline bool cgroup_is_dead(const struct cgroup *cgrp)
+{
+ return !(cgrp->self.flags & CSS_ONLINE);
+}
+
/* returns ino associated with a cgroup */
static inline ino_t cgroup_ino(struct cgroup *cgrp)
{
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index 367b0a42ada9..8c5c83e9edd7 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -181,11 +181,6 @@ extern struct list_head cgroup_roots;
for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT && \
(((ss) = cgroup_subsys[ssid]) || true); (ssid)++)
-static inline bool cgroup_is_dead(const struct cgroup *cgrp)
-{
- return !(cgrp->self.flags & CSS_ONLINE);
-}
-
static inline bool notify_on_release(const struct cgroup *cgrp)
{
return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
--
2.39.1
More information about the Linux-nvme
mailing list