[PATCH RFC v3 06/11] RISC-V: QoS: add resctrl setup and domain management
Reinette Chatre
reinette.chatre at intel.com
Thu Apr 30 16:20:09 PDT 2026
Hi Drew,
On 4/14/26 6:54 PM, Drew Fustini wrote:
> +
> +static int qos_resctrl_add_controller_domain(struct cbqri_controller *ctrl)
> +{
> + struct rdt_ctrl_domain *domain;
> + struct cbqri_resctrl_res *cbqri_res = NULL;
> + struct rdt_resource *res = NULL;
> + struct list_head *pos = NULL;
> + int err;
> +
> + domain = qos_new_domain(ctrl);
> + if (!domain)
> + return -ENOSPC;
> +
> + switch (ctrl->type) {
> + case CBQRI_CONTROLLER_TYPE_CAPACITY:
> + cpumask_copy(&domain->hdr.cpu_mask, &ctrl->cache.cpu_mask);
Looking at patch #10 ctrl->cache.cpu_mask contains all CPUs associated with cache
even if they are offline. This is not what resctrl expects. Instead the expectation is
that a domain exists and is online (hence "resctrl_online_ctrl_domain()") if at least one CPU
belonging to that domain is online and domain->hdr.cpu_mask lists all the *online* CPUs
associated with that domain.
This is why resctrl always takes the CPU hotplug lock when traversing the domain
lists.
I thus expected this initialization to be split between an early initialization of
resource capabilities and then domain initialization as part of the CPU online/offline
handlers.
> + domain->hdr.id = ctrl->cache.cache_id;
> +
> + if (ctrl->cache.cache_level == 2) {
> + cbqri_res = &cbqri_resctrl_resources[RDT_RESOURCE_L2];
> + err = qos_init_cache_resource(ctrl, cbqri_res,
> + RDT_RESOURCE_L2, "L2",
> + RESCTRL_L2_CACHE);
> + } else if (ctrl->cache.cache_level == 3) {
> + cbqri_res = &cbqri_resctrl_resources[RDT_RESOURCE_L3];
> + err = qos_init_cache_resource(ctrl, cbqri_res,
> + RDT_RESOURCE_L3, "L3",
> + RESCTRL_L3_CACHE);
> + } else {
> + pr_err("unknown cache level %d\n", ctrl->cache.cache_level);
> + err = -ENODEV;
> + }
> + if (err)
> + goto err_free_domain;
> + res = &cbqri_res->resctrl_res;
> + break;
> +
> + case CBQRI_CONTROLLER_TYPE_BANDWIDTH:
> + cpumask_copy(&domain->hdr.cpu_mask, &ctrl->mem.cpu_mask);
> + domain->hdr.id = ctrl->mem.prox_dom;
> + if (ctrl->alloc_capable) {
> + cbqri_res = &cbqri_resctrl_resources[RDT_RESOURCE_MBA];
> + err = qos_init_membw_resource(ctrl, cbqri_res);
> + if (err)
> + goto err_free_domain;
> + res = &cbqri_res->resctrl_res;
> + }
> + break;
> +
> + default:
> + pr_err("unknown controller type %d\n", ctrl->type);
> + err = -ENODEV;
> + goto err_free_domain;
> + }
> +
> + if (!res)
> + goto out;
> +
> + err = qos_init_domain_ctrlval(res, domain);
> + if (err)
> + goto err_free_domain;
> +
> + if (resctrl_find_domain(&res->ctrl_domains, domain->hdr.id, &pos)) {
> + pr_err("duplicate domain id %d for resource %s\n",
> + domain->hdr.id, res->name);
> + err = -EEXIST;
> + goto err_free_domain;
> + }
> + if (pos)
> + list_add_tail(&domain->hdr.list, pos);
> + else
> + list_add_tail(&domain->hdr.list, &res->ctrl_domains);
resctrl_find_domain() returns NULL if it cannot find an existing domain, in that
case it initializes "pos" to support adding a new domain in a sorted list.
Expectation is that domains are managed as part of CPU hotplug handlers. When
a CPU comes online then handler can check if the domain it belongs to already exists,
if it does then the CPU can just be added to that domain's cpu_mask, if it does
not then a new domain is created and added in the the appropriate spot in the
sorted list (based on domain ID) of domains.
> +
> + err = resctrl_online_ctrl_domain(res, domain);
> + if (err) {
> + pr_err("failed to online domain %d\n", domain->hdr.id);
> + list_del(&domain->hdr.list);
> + goto err_free_domain;
> + }
> +
> +out:
> + return 0;
> +
> +err_free_domain:
> + kfree(container_of(domain, struct cbqri_resctrl_dom, resctrl_ctrl_dom));
> + return err;
> +}
> +
> +int qos_resctrl_setup(void)
> +{
> + struct rdt_ctrl_domain *domain, *domain_temp;
> + struct cbqri_controller *ctrl;
> + struct cbqri_resctrl_res *res;
> + int err = 0;
> + int i = 0;
> +
> + max_rmid = U32_MAX;
> +
> + for (i = 0; i < RDT_NUM_RESOURCES; i++) {
> + res = &cbqri_resctrl_resources[i];
> + INIT_LIST_HEAD(&res->resctrl_res.ctrl_domains);
> + INIT_LIST_HEAD(&res->resctrl_res.mon_domains);
> + res->resctrl_res.rid = i;
> + }
> +
> + list_for_each_entry(ctrl, &cbqri_controllers, list) {
> + err = cbqri_probe_controller(ctrl);
> + if (err) {
> + pr_err("%s(): failed (%d)\n", __func__, err);
> + goto err_free_controllers_list;
> + }
> +
> + err = qos_resctrl_add_controller_domain(ctrl);
> + if (err) {
> + pr_err("%s(): failed to add controller domain (%d)\n", __func__, err);
> + goto err_free_controllers_list;
> + }
> +
> + /*
> + * CDP (code data prioritization) on x86 is similar to
> + * the AT (access type) field in CBQRI. CDP only supports
> + * caches so this must be a CBQRI capacity controller.
> + */
> + if (ctrl->type == CBQRI_CONTROLLER_TYPE_CAPACITY &&
> + ctrl->cc.supports_alloc_at_code) {
> + if (ctrl->cache.cache_level == 2)
> + exposed_cdp_l2_capable = true;
> + else
> + exposed_cdp_l3_capable = true;
> + }
> + }
> + pr_debug("alloc=%d cdp_l2=%d cdp_l3=%d\n",
> + exposed_alloc_capable,
> + exposed_cdp_l2_capable, exposed_cdp_l3_capable);
> +
> + err = resctrl_init();
> + if (err)
> + goto err_free_controllers_list;
> +
> + return 0;
> +
> +err_free_controllers_list:
> + for (i = 0; i < RDT_NUM_RESOURCES; i++) {
> + res = &cbqri_resctrl_resources[i];
> + list_for_each_entry_safe(domain, domain_temp, &res->resctrl_res.ctrl_domains,
> + hdr.list) {
> + resctrl_offline_ctrl_domain(&res->resctrl_res, domain);
> + list_del(&domain->hdr.list);
> + kfree(container_of(domain, struct cbqri_resctrl_dom, resctrl_ctrl_dom));
> + }
> + }
> +
> + list_for_each_entry(ctrl, &cbqri_controllers, list) {
> + if (!ctrl->base)
> + break;
> + iounmap(ctrl->base);
> + ctrl->base = NULL;
> + release_mem_region(ctrl->addr, ctrl->size);
> + }
> +
> + return err;
> +}
> +
> +int qos_resctrl_online_cpu(unsigned int cpu)
> +{
> + resctrl_online_cpu(cpu);
This is where a domain is expected to be added when its first CPU comes online.
> + return 0;
> +}
> +
> +int qos_resctrl_offline_cpu(unsigned int cpu)
> +{
> + resctrl_offline_cpu(cpu);
This is where a domain is expected to be removed when its last CPU goes offline.
> + return 0;
> +}
>
Reinette
More information about the linux-riscv
mailing list