[PATCH] nvmet: allow associating port to a cgroup via configfs

Ofir Gal ofir.gal at volumez.com
Tue Jun 27 03:02:15 PDT 2023


Currently there is no way to throttle nvme targets with cgroup v2.

The IOs that the nvme target submits lack associating to a cgroup,
which makes them act as root cgroup. The root cgroup can't be throttled
with the cgroup v2 mechanism.

Signed-off-by: Ofir Gal <ofir.gal at volumez.com>
---
 drivers/nvme/target/configfs.c    | 77 +++++++++++++++++++++++++++++++
 drivers/nvme/target/core.c        |  3 ++
 drivers/nvme/target/io-cmd-bdev.c | 13 ++++++
 drivers/nvme/target/nvmet.h       |  3 ++
 include/linux/cgroup.h            |  5 ++
 kernel/cgroup/cgroup-internal.h   |  5 --
 6 files changed, 101 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index 907143870da5..2e8f93a07498 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -12,6 +12,7 @@
 #include <linux/ctype.h>
 #include <linux/pci.h>
 #include <linux/pci-p2pdma.h>
+#include <linux/cgroup.h>
 #ifdef CONFIG_NVME_TARGET_AUTH
 #include <linux/nvme-auth.h>
 #endif
@@ -281,6 +282,81 @@ static ssize_t nvmet_param_pi_enable_store(struct config_item *item,
 CONFIGFS_ATTR(nvmet_, param_pi_enable);
 #endif
 
+static ssize_t nvmet_param_associated_cgroup_show(struct config_item *item,
+		char *page)
+{
+	struct nvmet_port *port = to_nvmet_port(item);
+	ssize_t len = 0;
+	ssize_t retval;
+	char *suffix;
+
+	/* No cgroup has been set means the IOs are assoicated to the root cgroup */
+	if (!port->cgrp)
+		goto root_cgroup;
+
+	retval = cgroup_path_ns(port->cgrp, page, PAGE_SIZE,
+						 current->nsproxy->cgroup_ns);
+	if (retval >= PATH_MAX || retval >= PAGE_SIZE)
+		return -ENAMETOOLONG;
+
+	/* No cgroup found means the IOs are assoicated to the root cgroup */
+	if (retval < 0)
+		goto root_cgroup;
+
+	len += retval;
+
+	suffix = cgroup_is_dead(port->cgrp) ? " (deleted)\n" : "\n";
+	len += snprintf(page + len, PAGE_SIZE - len, suffix);
+
+	return len;
+
+root_cgroup:
+	return snprintf(page, PAGE_SIZE, "/\n");
+}
+
+static ssize_t nvmet_param_associated_cgroup_store(struct config_item *item,
+		const char *page, size_t count)
+{
+	struct nvmet_port *port = to_nvmet_port(item);
+	struct cgroup_subsys_state *blkcg;
+	ssize_t retval = -EINVAL;
+	struct cgroup *cgrp;
+	char *path;
+	int len;
+
+	len = strcspn(page, "\n");
+	if (!len)
+		return -EINVAL;
+
+	path = kmemdup_nul(page, len, GFP_KERNEL);
+	if (!path)
+		return -ENOMEM;
+
+	cgrp = cgroup_get_from_path(path);
+	kfree(path);
+	if (IS_ERR(cgrp))
+		return -ENOENT;
+
+	blkcg = cgroup_get_e_css(cgrp, &io_cgrp_subsys);
+	if (!blkcg)
+		goto out_put_cgroup;
+
+	/* Put old cgroup */
+	if (port->cgrp)
+		cgroup_put(port->cgrp);
+
+	port->cgrp = cgrp;
+	port->blkcg = blkcg;
+
+	return count;
+
+out_put_cgroup:
+	cgroup_put(cgrp);
+	return retval;
+}
+
+CONFIGFS_ATTR(nvmet_, param_associated_cgroup);
+
 static ssize_t nvmet_addr_trtype_show(struct config_item *item,
 		char *page)
 {
@@ -1742,6 +1818,7 @@ static struct configfs_attribute *nvmet_port_attrs[] = {
 	&nvmet_attr_addr_trsvcid,
 	&nvmet_attr_addr_trtype,
 	&nvmet_attr_param_inline_data_size,
+	&nvmet_attr_param_associated_cgroup,
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 	&nvmet_attr_param_pi_enable,
 #endif
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 3935165048e7..b63996b61c6d 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -376,6 +376,9 @@ void nvmet_disable_port(struct nvmet_port *port)
 	port->enabled = false;
 	port->tr_ops = NULL;
 
+	if (port->cgrp)
+		cgroup_put(port->cgrp);
+
 	ops = nvmet_transports[port->disc_addr.trtype];
 	ops->remove_port(port);
 	module_put(ops->owner);
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index c2d6cea0236b..eb63a071131d 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -8,6 +8,8 @@
 #include <linux/blk-integrity.h>
 #include <linux/memremap.h>
 #include <linux/module.h>
+#include <linux/cgroup.h>
+#include <linux/blk-cgroup.h>
 #include "nvmet.h"
 
 void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id)
@@ -285,6 +287,8 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
 	bio->bi_iter.bi_sector = sector;
 	bio->bi_private = req;
 	bio->bi_end_io = nvmet_bio_done;
+	if (req->port->blkcg)
+		bio_associate_blkg_from_css(bio, req->port->blkcg);
 
 	blk_start_plug(&plug);
 	if (req->metadata_len)
@@ -308,6 +312,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
 			bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt),
 					opf, GFP_KERNEL);
 			bio->bi_iter.bi_sector = sector;
+			bio_clone_blkg_association(bio, prev);
 
 			bio_chain(bio, prev);
 			submit_bio(prev);
@@ -345,6 +350,8 @@ static void nvmet_bdev_execute_flush(struct nvmet_req *req)
 		 ARRAY_SIZE(req->inline_bvec), REQ_OP_WRITE | REQ_PREFLUSH);
 	bio->bi_private = req;
 	bio->bi_end_io = nvmet_bio_done;
+	if (req->port->blkcg)
+		bio_associate_blkg_from_css(bio, req->port->blkcg);
 
 	submit_bio(bio);
 }
@@ -397,6 +404,9 @@ static void nvmet_bdev_execute_discard(struct nvmet_req *req)
 	if (bio) {
 		bio->bi_private = req;
 		bio->bi_end_io = nvmet_bio_done;
+		if (req->port->blkcg)
+			bio_associate_blkg_from_css(bio, req->port->blkcg);
+
 		if (status)
 			bio_io_error(bio);
 		else
@@ -444,6 +454,9 @@ static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req)
 	if (bio) {
 		bio->bi_private = req;
 		bio->bi_end_io = nvmet_bio_done;
+		if (req->port->blkcg)
+			bio_associate_blkg_from_css(bio, req->port->blkcg);
+
 		submit_bio(bio);
 	} else {
 		nvmet_req_complete(req, errno_to_nvme_status(req, ret));
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index dc60a22646f7..3e5c9737d07e 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -20,6 +20,7 @@
 #include <linux/blkdev.h>
 #include <linux/radix-tree.h>
 #include <linux/t10-pi.h>
+#include <linux/cgroup.h>
 
 #define NVMET_DEFAULT_VS		NVME_VS(1, 3, 0)
 
@@ -163,6 +164,8 @@ struct nvmet_port {
 	int				inline_data_size;
 	const struct nvmet_fabrics_ops	*tr_ops;
 	bool				pi_enable;
+	struct cgroup				*cgrp;
+	struct cgroup_subsys_state	*blkcg;
 };
 
 static inline struct nvmet_port *to_nvmet_port(struct config_item *item)
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 885f5395fcd0..47e2a7cdc31e 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -562,6 +562,11 @@ static inline bool cgroup_is_populated(struct cgroup *cgrp)
 		cgrp->nr_populated_threaded_children;
 }
 
+static inline bool cgroup_is_dead(const struct cgroup *cgrp)
+{
+	return !(cgrp->self.flags & CSS_ONLINE);
+}
+
 /* returns ino associated with a cgroup */
 static inline ino_t cgroup_ino(struct cgroup *cgrp)
 {
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index 367b0a42ada9..8c5c83e9edd7 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -181,11 +181,6 @@ extern struct list_head cgroup_roots;
 	for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT &&		\
 	     (((ss) = cgroup_subsys[ssid]) || true); (ssid)++)
 
-static inline bool cgroup_is_dead(const struct cgroup *cgrp)
-{
-	return !(cgrp->self.flags & CSS_ONLINE);
-}
-
 static inline bool notify_on_release(const struct cgroup *cgrp)
 {
 	return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
-- 
2.39.1




More information about the Linux-nvme mailing list