[PATCH RFC v4 12/18] riscv_cbqri: resctrl: Add L3 cache occupancy monitoring
Drew Fustini
fustini at kernel.org
Sun May 10 22:11:08 PDT 2026
Expose QOS_L3_OCCUP_EVENT_ID so userspace can read per-MCID
llc_occupancy. The result is converted from capacity blocks to bytes
using cache_size and ncblks.
resctrl_arch_reset_rmid() re-arms CONFIG_EVENT with EVT_ID=Occupancy.
CONFIG_EVENT both resets the counter to 0 and selects the event, so
re-arming with the same event keeps the MCID counting after reset rather
than relying on sticky-last-event semantics that the CBQRI register
definition does not guarantee.
The L3 mon_domain is created lazily on the first CPU of a cache_id and
linked to the paired ctrl_domain.
Assisted-by: Claude:claude-opus-4-7
Co-developed-by: Adrien Ricciardi <aricciardi at baylibre.com>
Signed-off-by: Adrien Ricciardi <aricciardi at baylibre.com>
Signed-off-by: Drew Fustini <fustini at kernel.org>
---
drivers/resctrl/cbqri_resctrl.c | 272 ++++++++++++++++++++++++++++++++++++++--
1 file changed, 260 insertions(+), 12 deletions(-)
diff --git a/drivers/resctrl/cbqri_resctrl.c b/drivers/resctrl/cbqri_resctrl.c
index 82b157d35576..d8fd9b06703f 100644
--- a/drivers/resctrl/cbqri_resctrl.c
+++ b/drivers/resctrl/cbqri_resctrl.c
@@ -10,6 +10,7 @@
#include <linux/cpuhotplug.h>
#include <linux/err.h>
#include <linux/init.h>
+#include <linux/io.h>
#include <linux/resctrl.h>
#include <linux/slab.h>
#include <linux/types.h>
@@ -33,6 +34,13 @@ struct cbqri_resctrl_dom {
static struct cbqri_resctrl_res cbqri_resctrl_resources[RDT_NUM_RESOURCES];
+/*
+ * Per-event controller table. Only events CBQRI can back occupy a
+ * slot, so other events do not bloat the array.
+ */
+#define CBQRI_MAX_EVENT QOS_L3_OCCUP_EVENT_ID
+static struct cbqri_controller *cbqri_resctrl_counters[CBQRI_MAX_EVENT + 1];
+
/*
* cacheinfo populates the cache id <-> cpumask mapping from a
* device_initcall(). cbqri_resctrl_setup() runs at late_initcall, which
@@ -44,6 +52,10 @@ static bool cacheinfo_ready;
static DECLARE_WAIT_QUEUE_HEAD(wait_cacheinfo_ready);
static bool exposed_alloc_capable;
+static bool exposed_mon_capable;
+
+/* Used by resctrl_arch_system_num_rmid_idx(). Narrowed by accumulate_caps. */
+static u32 max_rmid = U32_MAX;
/* Protects ctrl_domain list mutations across CPU hotplug. */
static DEFINE_MUTEX(cbqri_domain_list_lock);
@@ -56,6 +68,14 @@ cbqri_find_ctrl_domain(struct list_head *h, int id)
return hdr ? container_of(hdr, struct rdt_ctrl_domain, hdr) : NULL;
}
+static struct rdt_l3_mon_domain *
+cbqri_find_l3_mon_domain(struct list_head *h, int id)
+{
+ struct rdt_domain_hdr *hdr = resctrl_find_domain(h, id, NULL);
+
+ return hdr ? container_of(hdr, struct rdt_l3_mon_domain, hdr) : NULL;
+}
+
/*
* Resctrl-side wrapper around the device-side cbqri_apply_cache_config().
* Builds the hardware config struct from resctrl-side state (cdp flag, AT
@@ -84,7 +104,7 @@ bool resctrl_arch_alloc_capable(void)
bool resctrl_arch_mon_capable(void)
{
- return false;
+ return exposed_mon_capable;
}
bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level rid)
@@ -185,20 +205,112 @@ void resctrl_arch_mon_event_config_write(void *info)
{
}
-void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_l3_mon_domain *d)
+void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
+ u32 unused, u32 rmid, enum resctrl_event_id eventid)
{
+ struct cbqri_resctrl_dom *hw_dom;
+ struct cbqri_controller *ctrl;
+ struct rdt_ctrl_domain *cd;
+
+ /* Don't sleep with IRQs disabled. */
+ if (irqs_disabled())
+ return;
+
+ switch (eventid) {
+ case QOS_L3_OCCUP_EVENT_ID:
+ cd = cbqri_find_ctrl_domain(&r->ctrl_domains, d->hdr.id);
+ if (!cd)
+ return;
+
+ hw_dom = container_of(cd, struct cbqri_resctrl_dom, resctrl_ctrl_dom);
+ ctrl = hw_dom->hw_ctrl;
+
+ mutex_lock(&ctrl->lock);
+ /*
+ * Re-arm with EVT_ID=OCCUPANCY (not None) on RMID recycle:
+ * this both zeros the counter and keeps the MCID counting,
+ * since cbqri_init_mon_counters() only runs once.
+ */
+ if (cbqri_mon_op(ctrl, CBQRI_CC_MON_CTL_OFF,
+ CBQRI_CC_MON_CTL_OP_CONFIG_EVENT,
+ rmid, CBQRI_CC_EVT_ID_OCCUPANCY, NULL))
+ pr_warn_ratelimited("CC@%pa MCID %u: occupancy reset failed\n",
+ &ctrl->addr, rmid);
+ mutex_unlock(&ctrl->lock);
+ return;
+
+ default:
+ return;
+ }
}
-void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
- u32 unused, u32 rmid, enum resctrl_event_id eventid)
+void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_l3_mon_domain *d)
{
+ int i;
+
+ /* Bound by max_rmid (system-wide minimum mcid_count). */
+ for (i = 0; i < max_rmid; i++)
+ resctrl_arch_reset_rmid(r, d, 0, i, QOS_L3_OCCUP_EVENT_ID);
}
int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain_hdr *hdr,
u32 closid, u32 rmid, enum resctrl_event_id eventid,
void *arch_priv, u64 *val, void *arch_mon_ctx)
{
- return -ENODATA;
+ struct cbqri_resctrl_dom *hw_dom;
+ struct cbqri_controller *ctrl;
+ struct rdt_ctrl_domain *d;
+ u64 ctr_val;
+ int err;
+
+ resctrl_arch_rmid_read_context_check();
+
+ /*
+ * Each branch takes a sleeping mutex. Bail if called with IRQs
+ * disabled (e.g. smp_call_function_any() from nohz_full CPUs).
+ */
+ if (irqs_disabled())
+ return -EIO;
+
+ switch (eventid) {
+ case QOS_L3_OCCUP_EVENT_ID:
+ /* Mon domain id matches the ctrl_domain id. Look up to get hw_ctrl. */
+ d = cbqri_find_ctrl_domain(&r->ctrl_domains, hdr->id);
+ if (!d)
+ return -ENOENT;
+
+ hw_dom = container_of(d, struct cbqri_resctrl_dom, resctrl_ctrl_dom);
+ ctrl = hw_dom->hw_ctrl;
+
+ mutex_lock(&ctrl->lock);
+
+ /*
+ * MCIDs are armed with Occupancy at init and re-armed on
+ * RMID recycle. Pass EVT_ID explicitly: the CBQRI spec
+ * does not guarantee sticky-last-configured-event for
+ * READ_COUNTER.
+ */
+ err = cbqri_mon_op(ctrl, CBQRI_CC_MON_CTL_OFF,
+ CBQRI_CC_MON_CTL_OP_READ_COUNTER,
+ rmid, CBQRI_CC_EVT_ID_OCCUPANCY, NULL);
+ if (err)
+ goto out_cc;
+
+ ctr_val = ioread64(ctrl->base + CBQRI_CC_MON_CTL_VAL_OFF);
+
+ /*
+ * Capacity blocks to bytes. Multiply before divide so a
+ * non-power-of-2 ncblks doesn't truncate. Both terms fit
+ * in u64 with room to spare.
+ */
+ *val = (u64)ctrl->cache.cache_size * ctr_val / ctrl->cc.ncblks;
+out_cc:
+ mutex_unlock(&ctrl->lock);
+ return err;
+
+ default:
+ return -EINVAL;
+ }
}
/*
@@ -225,7 +337,7 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *res)
u32 resctrl_arch_system_num_rmid_idx(void)
{
- return 1;
+ return max_rmid;
}
u32 resctrl_arch_rmid_idx_encode(u32 closid, u32 rmid)
@@ -517,6 +629,14 @@ static int cbqri_resctrl_control_init(struct cbqri_resctrl_res *cbqri_res)
res->alloc_capable = ctrl->alloc_capable;
INIT_LIST_HEAD(&res->ctrl_domains);
INIT_LIST_HEAD(&res->mon_domains);
+
+ if (ctrl->mon_capable && res->rid == RDT_RESOURCE_L3) {
+ res->mon_scope = RESCTRL_L3_CACHE;
+ res->mon.num_rmid = ctrl->mcid_count;
+ resctrl_enable_mon_event(QOS_L3_OCCUP_EVENT_ID,
+ false, 0, NULL);
+ res->mon_capable = true;
+ }
break;
default:
break;
@@ -525,8 +645,21 @@ static int cbqri_resctrl_control_init(struct cbqri_resctrl_res *cbqri_res)
return 0;
}
+/*
+ * Pick one controller per monitoring event. L3 OCCUP comes from the
+ * picked L3 CC (if mon_capable).
+ */
+static void cbqri_resctrl_pick_counters(void)
+{
+ struct cbqri_resctrl_res *l3 = &cbqri_resctrl_resources[RDT_RESOURCE_L3];
+
+ if (l3->ctrl && l3->ctrl->mon_capable)
+ cbqri_resctrl_counters[QOS_L3_OCCUP_EVENT_ID] = l3->ctrl;
+}
+
static void cbqri_resctrl_accumulate_caps(void)
{
+ struct cbqri_controller *ctrl;
int rid;
for (rid = 0; rid < RDT_NUM_RESOURCES; rid++) {
@@ -536,7 +669,22 @@ static void cbqri_resctrl_accumulate_caps(void)
continue;
if (hw_res->ctrl->alloc_capable)
exposed_alloc_capable = true;
+ if (hw_res->ctrl->mon_capable)
+ exposed_mon_capable = true;
}
+
+ /*
+ * Narrow max_rmid against mon-capable controllers only. RQSC may
+ * report mcid_count for non-mon-capable ones. Clamping the global
+ * minimum against those would shrink the rmid space unnecessarily.
+ */
+ list_for_each_entry(ctrl, &cbqri_controllers, list)
+ if (ctrl->mon_capable)
+ max_rmid = min(max_rmid, ctrl->mcid_count);
+
+ /* No mon-capable controller picked: leave max_rmid sentinel-narrowed. */
+ if (!exposed_mon_capable)
+ max_rmid = 1;
}
/*
@@ -577,6 +725,71 @@ static struct rdt_ctrl_domain *cbqri_create_ctrl_domain(struct cbqri_controller
return domain;
}
+static int cbqri_attach_cpu_to_l3_mon(struct cbqri_controller *ctrl,
+ struct rdt_resource *res, unsigned int cpu)
+{
+ struct rdt_l3_mon_domain *mon_dom;
+ struct rdt_ctrl_domain *ctrl_dom;
+ struct list_head *mon_pos = NULL;
+ int dom_id = ctrl->cache.cache_id;
+ int err;
+
+ lockdep_assert_held(&cbqri_domain_list_lock);
+
+ mon_dom = cbqri_find_l3_mon_domain(&res->mon_domains, dom_id);
+ if (mon_dom) {
+ cpumask_set_cpu(cpu, &mon_dom->hdr.cpu_mask);
+ return 0;
+ }
+
+ ctrl_dom = cbqri_find_ctrl_domain(&res->ctrl_domains, dom_id);
+ if (!ctrl_dom) {
+ pr_err("L3 mon attach for cpu %u: no ctrl_domain id %d\n",
+ cpu, dom_id);
+ return -EINVAL;
+ }
+
+ mon_dom = kzalloc_obj(*mon_dom, GFP_KERNEL);
+ if (!mon_dom)
+ return -ENOMEM;
+
+ mon_dom->hdr.id = dom_id;
+ mon_dom->hdr.type = RESCTRL_MON_DOMAIN;
+ mon_dom->hdr.rid = RDT_RESOURCE_L3;
+ cpumask_set_cpu(cpu, &mon_dom->hdr.cpu_mask);
+ INIT_LIST_HEAD(&mon_dom->hdr.list);
+
+ if (resctrl_find_domain(&res->mon_domains, dom_id, &mon_pos)) {
+ pr_err("duplicate L3 mon_domain id %d\n", dom_id);
+ err = -EEXIST;
+ goto err_free;
+ }
+ if (mon_pos)
+ list_add_tail(&mon_dom->hdr.list, mon_pos);
+ else
+ list_add_tail(&mon_dom->hdr.list, &res->mon_domains);
+
+ err = resctrl_online_mon_domain(res, &mon_dom->hdr);
+ if (err)
+ goto err_listdel;
+
+ err = cbqri_init_mon_counters(ctrl);
+ if (err)
+ goto err_offline;
+
+ return 0;
+
+err_offline:
+ cancel_delayed_work_sync(&mon_dom->cqm_limbo);
+ cancel_delayed_work_sync(&mon_dom->mbm_over);
+ resctrl_offline_mon_domain(res, &mon_dom->hdr);
+err_listdel:
+ list_del(&mon_dom->hdr.list);
+err_free:
+ kfree(mon_dom);
+ return err;
+}
+
static int cbqri_attach_cpu_to_cap_ctrl(struct cbqri_controller *ctrl,
unsigned int cpu)
{
@@ -584,6 +797,7 @@ static int cbqri_attach_cpu_to_cap_ctrl(struct cbqri_controller *ctrl,
struct rdt_ctrl_domain *domain;
struct rdt_resource *res;
int dom_id;
+ int err;
if (ctrl->cache.cache_level == 2)
hw_res = &cbqri_resctrl_resources[RDT_RESOURCE_L2];
@@ -601,16 +815,42 @@ static int cbqri_attach_cpu_to_cap_ctrl(struct cbqri_controller *ctrl,
domain = cbqri_find_ctrl_domain(&res->ctrl_domains, dom_id);
if (domain) {
cpumask_set_cpu(cpu, &domain->hdr.cpu_mask);
- return 0;
+ } else {
+ domain = cbqri_create_ctrl_domain(ctrl, res, cpu, dom_id);
+ if (IS_ERR(domain))
+ return PTR_ERR(domain);
}
- domain = cbqri_create_ctrl_domain(ctrl, res, cpu, dom_id);
- if (IS_ERR(domain))
- return PTR_ERR(domain);
+ if (ctrl->mon_capable && ctrl->cache.cache_level == 3) {
+ err = cbqri_attach_cpu_to_l3_mon(ctrl, res, cpu);
+ if (err)
+ return err;
+ }
return 0;
}
+static void cbqri_detach_cpu_from_l3_mon(struct rdt_resource *res,
+ unsigned int cpu)
+{
+ struct rdt_l3_mon_domain *mon_dom, *tmp;
+
+ lockdep_assert_held(&cbqri_domain_list_lock);
+
+ list_for_each_entry_safe(mon_dom, tmp, &res->mon_domains, hdr.list) {
+ if (!cpumask_test_cpu(cpu, &mon_dom->hdr.cpu_mask))
+ continue;
+ cpumask_clear_cpu(cpu, &mon_dom->hdr.cpu_mask);
+ if (cpumask_empty(&mon_dom->hdr.cpu_mask)) {
+ cancel_delayed_work_sync(&mon_dom->cqm_limbo);
+ cancel_delayed_work_sync(&mon_dom->mbm_over);
+ resctrl_offline_mon_domain(res, &mon_dom->hdr);
+ list_del(&mon_dom->hdr.list);
+ kfree(mon_dom);
+ }
+ }
+}
+
static void cbqri_detach_cpu_from_ctrl_domains(struct rdt_resource *res,
unsigned int cpu)
{
@@ -634,7 +874,7 @@ static bool cbqri_resctrl_inited;
static void cbqri_resctrl_teardown(void)
{
- int rid;
+ int rid, evt;
if (!cbqri_resctrl_inited)
return;
@@ -647,7 +887,11 @@ static void cbqri_resctrl_teardown(void)
hw_res->ctrl = NULL;
hw_res->cdp_enabled = false;
}
+ for (evt = 0; evt <= CBQRI_MAX_EVENT; evt++)
+ cbqri_resctrl_counters[evt] = NULL;
exposed_alloc_capable = false;
+ exposed_mon_capable = false;
+ max_rmid = U32_MAX;
cbqri_resctrl_inited = false;
}
@@ -666,6 +910,8 @@ static int cbqri_resctrl_setup(void)
if (err)
return err;
+ cbqri_resctrl_pick_counters();
+
for (rid = 0; rid < RDT_NUM_RESOURCES; rid++) {
err = cbqri_resctrl_control_init(&cbqri_resctrl_resources[rid]);
if (err)
@@ -674,7 +920,7 @@ static int cbqri_resctrl_setup(void)
cbqri_resctrl_accumulate_caps();
- if (!exposed_alloc_capable) {
+ if (!exposed_alloc_capable && !exposed_mon_capable) {
pr_debug("no resctrl-capable CBQRI controllers found\n");
return -ENODEV;
}
@@ -723,6 +969,8 @@ static int cbqri_resctrl_offline_cpu(unsigned int cpu)
if (!hw_res->ctrl)
continue;
cbqri_detach_cpu_from_ctrl_domains(&hw_res->resctrl_res, cpu);
+ if (rid == RDT_RESOURCE_L3 && hw_res->ctrl->mon_capable)
+ cbqri_detach_cpu_from_l3_mon(&hw_res->resctrl_res, cpu);
}
mutex_unlock(&cbqri_domain_list_lock);
--
2.43.0
More information about the linux-riscv
mailing list