[PATCH RFC v4 13/18] riscv_cbqri: resctrl: Add MB_MIN bandwidth allocation via Rbwb

Drew Fustini fustini at kernel.org
Sun May 10 22:11:09 PDT 2026


Add bandwidth allocation through Rbwb (reserved bandwidth blocks)
exposed as the MB_MIN resource. Rbwb's sum constraint does not fit MBA's
percentage cap, so MB_MIN lands as a new RDT_RESOURCE_* rather than
masquerading as MBA.

The sum(Rbwb) <= MRBWB (max resv bw blocks) invariant from the CBQRI
spec is enforced at schemata-write time using a per-RCID software cache
under ctrl->lock. -EINVAL on overflow, matching the existing
schemata-write rejection convention.

Reset gives RCID 0 the remaining MRBWB budget after reserving 1 block
per other RCID. default_to_min=true on MB_MIN so mkdir cannot overflow
the sum constraint.

Assisted-by: Claude:claude-opus-4-7
Co-developed-by: Adrien Ricciardi <aricciardi at baylibre.com>
Signed-off-by: Adrien Ricciardi <aricciardi at baylibre.com>
Signed-off-by: Drew Fustini <fustini at kernel.org>
---
 drivers/resctrl/cbqri_resctrl.c | 209 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 189 insertions(+), 20 deletions(-)

diff --git a/drivers/resctrl/cbqri_resctrl.c b/drivers/resctrl/cbqri_resctrl.c
index d8fd9b06703f..bcd9367e3555 100644
--- a/drivers/resctrl/cbqri_resctrl.c
+++ b/drivers/resctrl/cbqri_resctrl.c
@@ -415,6 +415,9 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d,
 	case RDT_RESOURCE_L2:
 	case RDT_RESOURCE_L3:
 		return cbqri_apply_cache_config_dom(dom, r, closid, t, cfg_val);
+	case RDT_RESOURCE_MB_MIN:
+		/* sum(Rbwb) <= MRBWB validation runs inside cbqri_apply_rbwb(). */
+		return cbqri_apply_rbwb(dom->hw_ctrl, closid, cfg_val, true);
 	default:
 		return -EINVAL;
 	}
@@ -467,6 +470,14 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d,
 		if (err < 0)
 			val = resctrl_get_default_ctrl(r);
 		break;
+	case RDT_RESOURCE_MB_MIN: {
+		u64 rbwb;
+
+		err = cbqri_read_rbwb(ctrl, closid, &rbwb);
+		if (err == 0)
+			val = (u32)rbwb;
+		break;
+	}
 	default:
 		break;
 	}
@@ -477,6 +488,7 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d,
 void resctrl_arch_reset_all_ctrls(struct rdt_resource *r)
 {
 	struct cbqri_resctrl_res *hw_res;
+	struct cbqri_resctrl_dom *dom;
 	struct rdt_ctrl_domain *d;
 	enum resctrl_conf_type t;
 	u32 default_ctrl;
@@ -491,15 +503,42 @@ void resctrl_arch_reset_all_ctrls(struct rdt_resource *r)
 		return;
 
 	list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
-		for (i = 0; i < hw_res->ctrl->rcid_count; i++) {
-			for (t = 0; t < CDP_NUM_TYPES; t++) {
+		dom = container_of(d, struct cbqri_resctrl_dom,
+				   resctrl_ctrl_dom);
+
+		switch (r->rid) {
+		case RDT_RESOURCE_MB_MIN:
+			/*
+			 * CBQRI section 4.5: Rbwb >= 1, sum(Rbwb) <= MRBWB.
+			 * Walk N-1..1 first so the final sum lands at
+			 * MRBWB. Use the unchecked helper since the
+			 * intermediate sum may exceed MRBWB.
+			 */
+			for (i = 0; i < hw_res->ctrl->rcid_count; i++) {
+				u32 rcid = (i + 1) % hw_res->ctrl->rcid_count;
+				u64 rbwb = (rcid == 0) ?
+					dom->hw_ctrl->bc.mrbwb - (hw_res->ctrl->rcid_count - 1) : 1;
 				int rerr;
 
-				rerr = resctrl_arch_update_one(r, d, i, t, default_ctrl);
+				rerr = cbqri_apply_rbwb(dom->hw_ctrl, rcid, rbwb, false);
 				if (rerr)
-					pr_err_ratelimited("rid=%d reset RCID %u type %u failed (%d)\n",
-							   r->rid, i, t, rerr);
+					pr_err_ratelimited("RBWB reset RCID %u failed (%d)\n",
+							   rcid, rerr);
+			}
+			break;
+		default:
+			for (i = 0; i < hw_res->ctrl->rcid_count; i++) {
+				for (t = 0; t < CDP_NUM_TYPES; t++) {
+					int rerr;
+
+					rerr = resctrl_arch_update_one(r, d, i, t,
+								       default_ctrl);
+					if (rerr)
+						pr_err_ratelimited("rid=%d reset RCID %u type %u failed (%d)\n",
+								   r->rid, i, t, rerr);
+				}
 			}
+			break;
 		}
 	}
 }
@@ -524,24 +563,53 @@ static struct rdt_ctrl_domain *cbqri_new_domain(struct cbqri_controller *ctrl)
 static int cbqri_init_domain_ctrlval(struct rdt_resource *r, struct rdt_ctrl_domain *d)
 {
 	struct cbqri_resctrl_res *hw_res;
+	struct cbqri_resctrl_dom *dom;
 	enum resctrl_conf_type t;
 	int err = 0;
 	int i;
 
 	hw_res = container_of(r, struct cbqri_resctrl_res, resctrl_res);
+	dom = container_of(d, struct cbqri_resctrl_dom, resctrl_ctrl_dom);
 
 	for (i = 0; i < hw_res->ctrl->rcid_count; i++) {
 		/*
-		 * Seed both DATA and CODE staged slots so a later mount
-		 * with -o cdp does not see stale CODE values.
-		 * CDP_NUM_TYPES is 1 on non-CDP controllers.
+		 * For MB_MIN walk RCIDs 1..N-1 then RCID 0 last so the sum
+		 * trends toward MRBWB during the walk. Other rids iterate
+		 * in natural order.
 		 */
-		for (t = 0; t < CDP_NUM_TYPES; t++) {
-			err = resctrl_arch_update_one(r, d, i, t,
-						      resctrl_get_default_ctrl(r));
-			if (err)
-				return err;
+		u32 rcid = (r->rid == RDT_RESOURCE_MB_MIN) ?
+				((i + 1) % hw_res->ctrl->rcid_count) : i;
+
+		switch (r->rid) {
+		case RDT_RESOURCE_MB_MIN: {
+			/*
+			 * CBQRI section 4.5: Rbwb >= 1, sum(Rbwb) <= MRBWB.
+			 * RCID 0 gets the remaining budget. Use the
+			 * unchecked helper since intermediate states
+			 * transiently exceed MRBWB.
+			 */
+			u64 rbwb = (rcid == 0) ?
+				dom->hw_ctrl->bc.mrbwb - (hw_res->ctrl->rcid_count - 1) : 1;
+
+			err = cbqri_apply_rbwb(dom->hw_ctrl, rcid, rbwb, false);
+			break;
 		}
+		default:
+			/*
+			 * Seed both DATA and CODE staged slots so a later
+			 * mount with -o cdp does not see stale CODE values.
+			 * CDP_NUM_TYPES is 1 on non-CDP controllers.
+			 */
+			for (t = 0; t < CDP_NUM_TYPES; t++) {
+				err = resctrl_arch_update_one(r, d, i, t,
+							      resctrl_get_default_ctrl(r));
+				if (err)
+					break;
+			}
+			break;
+		}
+		if (err)
+			return err;
 	}
 	return 0;
 }
@@ -638,6 +706,31 @@ static int cbqri_resctrl_control_init(struct cbqri_resctrl_res *cbqri_res)
 			res->mon_capable = true;
 		}
 		break;
+
+	case RDT_RESOURCE_MB_MIN:
+		res->name = "MB_MIN";
+		res->schema_fmt = RESCTRL_SCHEMA_RANGE;
+		/*
+		 * resctrl requires a cache scope for MBA-style domains.
+		 * Use L3 as a proxy until the framework supports non-cache
+		 * scopes for bandwidth resources.
+		 */
+		res->ctrl_scope = RESCTRL_L3_CACHE;
+		/* Rbwb is an integer block count, not a percentage. No MBA delay_linear. */
+		res->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED;
+		res->membw.min_bw = 1;
+		res->membw.max_bw = ctrl->bc.mrbwb;
+		res->membw.bw_gran = 1;
+		/*
+		 * CBQRI section 4.5 caps sum(Rbwb) <= MRBWB. Default new
+		 * groups to min_bw so mkdir cannot overflow that sum.
+		 */
+		res->membw.default_to_min = true;
+		res->alloc_capable = ctrl->alloc_capable;
+		INIT_LIST_HEAD(&res->ctrl_domains);
+		INIT_LIST_HEAD(&res->mon_domains);
+		break;
+
 	default:
 		break;
 	}
@@ -645,6 +738,37 @@ static int cbqri_resctrl_control_init(struct cbqri_resctrl_res *cbqri_res)
 	return 0;
 }
 
+/*
+ * Pick one BC to back MB_MIN.  Multiple BCs must agree on rcid_count
+ * and mrbwb.  Mismatch is fatal because resctrl exposes a single set
+ * of caps per rid.
+ */
+static int cbqri_resctrl_pick_bw_alloc(void)
+{
+	struct cbqri_resctrl_res *mb_min = &cbqri_resctrl_resources[RDT_RESOURCE_MB_MIN];
+	struct cbqri_controller *ctrl;
+
+	list_for_each_entry(ctrl, &cbqri_controllers, list) {
+		if (ctrl->type != CBQRI_CONTROLLER_TYPE_BANDWIDTH)
+			continue;
+		if (!ctrl->alloc_capable)
+			continue;
+
+		if (mb_min->ctrl) {
+			if (mb_min->ctrl->rcid_count != ctrl->rcid_count ||
+			    mb_min->ctrl->bc.mrbwb != ctrl->bc.mrbwb) {
+				pr_err("BW controllers have mismatched capabilities\n");
+				return -EINVAL;
+			}
+			continue;
+		}
+
+		mb_min->ctrl = ctrl;
+	}
+
+	return 0;
+}
+
 /*
  * Pick one controller per monitoring event.  L3 OCCUP comes from the
  * picked L3 CC (if mon_capable).
@@ -830,6 +954,37 @@ static int cbqri_attach_cpu_to_cap_ctrl(struct cbqri_controller *ctrl,
 	return 0;
 }
 
+static int cbqri_attach_cpu_to_one_bw_res(struct cbqri_controller *ctrl,
+					  enum resctrl_res_level rid,
+					  unsigned int cpu)
+{
+	struct cbqri_resctrl_res *hw_res = &cbqri_resctrl_resources[rid];
+	struct rdt_resource *res = &hw_res->resctrl_res;
+	struct rdt_ctrl_domain *domain;
+	int dom_id = ctrl->mem.prox_dom;
+
+	if (!hw_res->ctrl)
+		return 0;
+
+	domain = cbqri_find_ctrl_domain(&res->ctrl_domains, dom_id);
+	if (domain) {
+		cpumask_set_cpu(cpu, &domain->hdr.cpu_mask);
+		return 0;
+	}
+
+	domain = cbqri_create_ctrl_domain(ctrl, res, cpu, dom_id);
+	if (IS_ERR(domain))
+		return PTR_ERR(domain);
+
+	return 0;
+}
+
+static int cbqri_attach_cpu_to_bw_ctrl(struct cbqri_controller *ctrl,
+				       unsigned int cpu)
+{
+	return cbqri_attach_cpu_to_one_bw_res(ctrl, RDT_RESOURCE_MB_MIN, cpu);
+}
+
 static void cbqri_detach_cpu_from_l3_mon(struct rdt_resource *res,
 					 unsigned int cpu)
 {
@@ -910,6 +1065,10 @@ static int cbqri_resctrl_setup(void)
 	if (err)
 		return err;
 
+	err = cbqri_resctrl_pick_bw_alloc();
+	if (err)
+		return err;
+
 	cbqri_resctrl_pick_counters();
 
 	for (rid = 0; rid < RDT_NUM_RESOURCES; rid++) {
@@ -941,14 +1100,24 @@ static int cbqri_resctrl_online_cpu(unsigned int cpu)
 	mutex_lock(&cbqri_domain_list_lock);
 
 	list_for_each_entry(ctrl, &cbqri_controllers, list) {
-		if (ctrl->type != CBQRI_CONTROLLER_TYPE_CAPACITY)
-			continue;
-		if (!cpumask_test_cpu(cpu, &ctrl->cache.cpu_mask))
-			continue;
-		if (!ctrl->alloc_capable)
+		switch (ctrl->type) {
+		case CBQRI_CONTROLLER_TYPE_CAPACITY:
+			if (!cpumask_test_cpu(cpu, &ctrl->cache.cpu_mask))
+				continue;
+			if (!ctrl->alloc_capable)
+				continue;
+			err = cbqri_attach_cpu_to_cap_ctrl(ctrl, cpu);
+			break;
+		case CBQRI_CONTROLLER_TYPE_BANDWIDTH:
+			if (!cpumask_test_cpu(cpu, &ctrl->mem.cpu_mask))
+				continue;
+			if (!ctrl->alloc_capable)
+				continue;
+			err = cbqri_attach_cpu_to_bw_ctrl(ctrl, cpu);
+			break;
+		default:
 			continue;
-
-		err = cbqri_attach_cpu_to_cap_ctrl(ctrl, cpu);
+		}
 		if (err)
 			break;
 	}

-- 
2.43.0




More information about the linux-riscv mailing list