[PATCH RFC v4 13/18] riscv_cbqri: resctrl: Add MB_MIN bandwidth allocation via Rbwb
Drew Fustini
fustini at kernel.org
Sun May 10 22:11:09 PDT 2026
Add bandwidth allocation through Rbwb (reserved bandwidth blocks)
exposed as the MB_MIN resource. Rbwb's sum constraint does not fit MBA's
percentage cap, so MB_MIN lands as a new RDT_RESOURCE_* rather than
masquerading as MBA.
The sum(Rbwb) <= MRBWB (max resv bw blocks) invariant from the CBQRI
spec is enforced at schemata-write time using a per-RCID software cache
under ctrl->lock. -EINVAL on overflow, matching the existing
schemata-write rejection convention.
Reset gives RCID 0 the remaining MRBWB budget after reserving 1 block
per other RCID. default_to_min=true on MB_MIN so mkdir cannot overflow
the sum constraint.
Assisted-by: Claude:claude-opus-4-7
Co-developed-by: Adrien Ricciardi <aricciardi at baylibre.com>
Signed-off-by: Adrien Ricciardi <aricciardi at baylibre.com>
Signed-off-by: Drew Fustini <fustini at kernel.org>
---
drivers/resctrl/cbqri_resctrl.c | 209 ++++++++++++++++++++++++++++++++++++----
1 file changed, 189 insertions(+), 20 deletions(-)
diff --git a/drivers/resctrl/cbqri_resctrl.c b/drivers/resctrl/cbqri_resctrl.c
index d8fd9b06703f..bcd9367e3555 100644
--- a/drivers/resctrl/cbqri_resctrl.c
+++ b/drivers/resctrl/cbqri_resctrl.c
@@ -415,6 +415,9 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d,
case RDT_RESOURCE_L2:
case RDT_RESOURCE_L3:
return cbqri_apply_cache_config_dom(dom, r, closid, t, cfg_val);
+ case RDT_RESOURCE_MB_MIN:
+ /* sum(Rbwb) <= MRBWB validation runs inside cbqri_apply_rbwb(). */
+ return cbqri_apply_rbwb(dom->hw_ctrl, closid, cfg_val, true);
default:
return -EINVAL;
}
@@ -467,6 +470,14 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d,
if (err < 0)
val = resctrl_get_default_ctrl(r);
break;
+ case RDT_RESOURCE_MB_MIN: {
+ u64 rbwb;
+
+ err = cbqri_read_rbwb(ctrl, closid, &rbwb);
+ if (err == 0)
+ val = (u32)rbwb;
+ break;
+ }
default:
break;
}
@@ -477,6 +488,7 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d,
void resctrl_arch_reset_all_ctrls(struct rdt_resource *r)
{
struct cbqri_resctrl_res *hw_res;
+ struct cbqri_resctrl_dom *dom;
struct rdt_ctrl_domain *d;
enum resctrl_conf_type t;
u32 default_ctrl;
@@ -491,15 +503,42 @@ void resctrl_arch_reset_all_ctrls(struct rdt_resource *r)
return;
list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
- for (i = 0; i < hw_res->ctrl->rcid_count; i++) {
- for (t = 0; t < CDP_NUM_TYPES; t++) {
+ dom = container_of(d, struct cbqri_resctrl_dom,
+ resctrl_ctrl_dom);
+
+ switch (r->rid) {
+ case RDT_RESOURCE_MB_MIN:
+ /*
+ * CBQRI section 4.5: Rbwb >= 1, sum(Rbwb) <= MRBWB.
+ * Walk N-1..1 first so the final sum lands at
+ * MRBWB. Use the unchecked helper since the
+ * intermediate sum may exceed MRBWB.
+ */
+ for (i = 0; i < hw_res->ctrl->rcid_count; i++) {
+ u32 rcid = (i + 1) % hw_res->ctrl->rcid_count;
+ u64 rbwb = (rcid == 0) ?
+ dom->hw_ctrl->bc.mrbwb - (hw_res->ctrl->rcid_count - 1) : 1;
int rerr;
- rerr = resctrl_arch_update_one(r, d, i, t, default_ctrl);
+ rerr = cbqri_apply_rbwb(dom->hw_ctrl, rcid, rbwb, false);
if (rerr)
- pr_err_ratelimited("rid=%d reset RCID %u type %u failed (%d)\n",
- r->rid, i, t, rerr);
+ pr_err_ratelimited("RBWB reset RCID %u failed (%d)\n",
+ rcid, rerr);
+ }
+ break;
+ default:
+ for (i = 0; i < hw_res->ctrl->rcid_count; i++) {
+ for (t = 0; t < CDP_NUM_TYPES; t++) {
+ int rerr;
+
+ rerr = resctrl_arch_update_one(r, d, i, t,
+ default_ctrl);
+ if (rerr)
+ pr_err_ratelimited("rid=%d reset RCID %u type %u failed (%d)\n",
+ r->rid, i, t, rerr);
+ }
}
+ break;
}
}
}
@@ -524,24 +563,53 @@ static struct rdt_ctrl_domain *cbqri_new_domain(struct cbqri_controller *ctrl)
static int cbqri_init_domain_ctrlval(struct rdt_resource *r, struct rdt_ctrl_domain *d)
{
struct cbqri_resctrl_res *hw_res;
+ struct cbqri_resctrl_dom *dom;
enum resctrl_conf_type t;
int err = 0;
int i;
hw_res = container_of(r, struct cbqri_resctrl_res, resctrl_res);
+ dom = container_of(d, struct cbqri_resctrl_dom, resctrl_ctrl_dom);
for (i = 0; i < hw_res->ctrl->rcid_count; i++) {
/*
- * Seed both DATA and CODE staged slots so a later mount
- * with -o cdp does not see stale CODE values.
- * CDP_NUM_TYPES is 1 on non-CDP controllers.
+ * For MB_MIN walk RCIDs 1..N-1 then RCID 0 last so the sum
+ * trends toward MRBWB during the walk. Other rids iterate
+ * in natural order.
*/
- for (t = 0; t < CDP_NUM_TYPES; t++) {
- err = resctrl_arch_update_one(r, d, i, t,
- resctrl_get_default_ctrl(r));
- if (err)
- return err;
+ u32 rcid = (r->rid == RDT_RESOURCE_MB_MIN) ?
+ ((i + 1) % hw_res->ctrl->rcid_count) : i;
+
+ switch (r->rid) {
+ case RDT_RESOURCE_MB_MIN: {
+ /*
+ * CBQRI section 4.5: Rbwb >= 1, sum(Rbwb) <= MRBWB.
+ * RCID 0 gets the remaining budget. Use the
+ * unchecked helper since intermediate states
+ * transiently exceed MRBWB.
+ */
+ u64 rbwb = (rcid == 0) ?
+ dom->hw_ctrl->bc.mrbwb - (hw_res->ctrl->rcid_count - 1) : 1;
+
+ err = cbqri_apply_rbwb(dom->hw_ctrl, rcid, rbwb, false);
+ break;
}
+ default:
+ /*
+ * Seed both DATA and CODE staged slots so a later
+ * mount with -o cdp does not see stale CODE values.
+ * CDP_NUM_TYPES is 1 on non-CDP controllers.
+ */
+ for (t = 0; t < CDP_NUM_TYPES; t++) {
+ err = resctrl_arch_update_one(r, d, i, t,
+ resctrl_get_default_ctrl(r));
+ if (err)
+ break;
+ }
+ break;
+ }
+ if (err)
+ return err;
}
return 0;
}
@@ -638,6 +706,31 @@ static int cbqri_resctrl_control_init(struct cbqri_resctrl_res *cbqri_res)
res->mon_capable = true;
}
break;
+
+ case RDT_RESOURCE_MB_MIN:
+ res->name = "MB_MIN";
+ res->schema_fmt = RESCTRL_SCHEMA_RANGE;
+ /*
+ * resctrl requires a cache scope for MBA-style domains.
+ * Use L3 as a proxy until the framework supports non-cache
+ * scopes for bandwidth resources.
+ */
+ res->ctrl_scope = RESCTRL_L3_CACHE;
+ /* Rbwb is an integer block count, not a percentage. No MBA delay_linear. */
+ res->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED;
+ res->membw.min_bw = 1;
+ res->membw.max_bw = ctrl->bc.mrbwb;
+ res->membw.bw_gran = 1;
+ /*
+ * CBQRI section 4.5 caps sum(Rbwb) <= MRBWB. Default new
+ * groups to min_bw so mkdir cannot overflow that sum.
+ */
+ res->membw.default_to_min = true;
+ res->alloc_capable = ctrl->alloc_capable;
+ INIT_LIST_HEAD(&res->ctrl_domains);
+ INIT_LIST_HEAD(&res->mon_domains);
+ break;
+
default:
break;
}
@@ -645,6 +738,37 @@ static int cbqri_resctrl_control_init(struct cbqri_resctrl_res *cbqri_res)
return 0;
}
+/*
+ * Pick one BC to back MB_MIN. Multiple BCs must agree on rcid_count
+ * and mrbwb. Mismatch is fatal because resctrl exposes a single set
+ * of caps per rid.
+ */
+static int cbqri_resctrl_pick_bw_alloc(void)
+{
+ struct cbqri_resctrl_res *mb_min = &cbqri_resctrl_resources[RDT_RESOURCE_MB_MIN];
+ struct cbqri_controller *ctrl;
+
+ list_for_each_entry(ctrl, &cbqri_controllers, list) {
+ if (ctrl->type != CBQRI_CONTROLLER_TYPE_BANDWIDTH)
+ continue;
+ if (!ctrl->alloc_capable)
+ continue;
+
+ if (mb_min->ctrl) {
+ if (mb_min->ctrl->rcid_count != ctrl->rcid_count ||
+ mb_min->ctrl->bc.mrbwb != ctrl->bc.mrbwb) {
+ pr_err("BW controllers have mismatched capabilities\n");
+ return -EINVAL;
+ }
+ continue;
+ }
+
+ mb_min->ctrl = ctrl;
+ }
+
+ return 0;
+}
+
/*
* Pick one controller per monitoring event. L3 OCCUP comes from the
* picked L3 CC (if mon_capable).
@@ -830,6 +954,37 @@ static int cbqri_attach_cpu_to_cap_ctrl(struct cbqri_controller *ctrl,
return 0;
}
+static int cbqri_attach_cpu_to_one_bw_res(struct cbqri_controller *ctrl,
+ enum resctrl_res_level rid,
+ unsigned int cpu)
+{
+ struct cbqri_resctrl_res *hw_res = &cbqri_resctrl_resources[rid];
+ struct rdt_resource *res = &hw_res->resctrl_res;
+ struct rdt_ctrl_domain *domain;
+ int dom_id = ctrl->mem.prox_dom;
+
+ if (!hw_res->ctrl)
+ return 0;
+
+ domain = cbqri_find_ctrl_domain(&res->ctrl_domains, dom_id);
+ if (domain) {
+ cpumask_set_cpu(cpu, &domain->hdr.cpu_mask);
+ return 0;
+ }
+
+ domain = cbqri_create_ctrl_domain(ctrl, res, cpu, dom_id);
+ if (IS_ERR(domain))
+ return PTR_ERR(domain);
+
+ return 0;
+}
+
+static int cbqri_attach_cpu_to_bw_ctrl(struct cbqri_controller *ctrl,
+ unsigned int cpu)
+{
+ return cbqri_attach_cpu_to_one_bw_res(ctrl, RDT_RESOURCE_MB_MIN, cpu);
+}
+
static void cbqri_detach_cpu_from_l3_mon(struct rdt_resource *res,
unsigned int cpu)
{
@@ -910,6 +1065,10 @@ static int cbqri_resctrl_setup(void)
if (err)
return err;
+ err = cbqri_resctrl_pick_bw_alloc();
+ if (err)
+ return err;
+
cbqri_resctrl_pick_counters();
for (rid = 0; rid < RDT_NUM_RESOURCES; rid++) {
@@ -941,14 +1100,24 @@ static int cbqri_resctrl_online_cpu(unsigned int cpu)
mutex_lock(&cbqri_domain_list_lock);
list_for_each_entry(ctrl, &cbqri_controllers, list) {
- if (ctrl->type != CBQRI_CONTROLLER_TYPE_CAPACITY)
- continue;
- if (!cpumask_test_cpu(cpu, &ctrl->cache.cpu_mask))
- continue;
- if (!ctrl->alloc_capable)
+ switch (ctrl->type) {
+ case CBQRI_CONTROLLER_TYPE_CAPACITY:
+ if (!cpumask_test_cpu(cpu, &ctrl->cache.cpu_mask))
+ continue;
+ if (!ctrl->alloc_capable)
+ continue;
+ err = cbqri_attach_cpu_to_cap_ctrl(ctrl, cpu);
+ break;
+ case CBQRI_CONTROLLER_TYPE_BANDWIDTH:
+ if (!cpumask_test_cpu(cpu, &ctrl->mem.cpu_mask))
+ continue;
+ if (!ctrl->alloc_capable)
+ continue;
+ err = cbqri_attach_cpu_to_bw_ctrl(ctrl, cpu);
+ break;
+ default:
continue;
-
- err = cbqri_attach_cpu_to_cap_ctrl(ctrl, cpu);
+ }
if (err)
break;
}
--
2.43.0
More information about the linux-riscv
mailing list