[PATCH RFC v3 06/11] RISC-V: QoS: add resctrl setup and domain management

Reinette Chatre reinette.chatre at intel.com
Thu Apr 30 16:20:09 PDT 2026


Hi Drew,

On 4/14/26 6:54 PM, Drew Fustini wrote:
> +
> +static int qos_resctrl_add_controller_domain(struct cbqri_controller *ctrl)
> +{
> +	struct rdt_ctrl_domain *domain;
> +	struct cbqri_resctrl_res *cbqri_res = NULL;
> +	struct rdt_resource *res = NULL;
> +	struct list_head *pos = NULL;
> +	int err;
> +
> +	domain = qos_new_domain(ctrl);
> +	if (!domain)
> +		return -ENOSPC;
> +
> +	switch (ctrl->type) {
> +	case CBQRI_CONTROLLER_TYPE_CAPACITY:
> +		cpumask_copy(&domain->hdr.cpu_mask, &ctrl->cache.cpu_mask);

Looking at patch #10 ctrl->cache.cpu_mask contains all CPUs associated with cache
even if they are offline. This is not what resctrl expects. Instead the expectation is
that a domain exists and is online (hence "resctrl_online_ctrl_domain()") if at least one CPU
belonging to that domain is online and domain->hdr.cpu_mask lists all the *online* CPUs
associated with that domain.
This is why resctrl always takes the CPU hotplug lock when traversing the domain
lists.

I thus expected this initialization to be split between an early initialization of
resource capabilities and then domain initialization as part of the CPU online/offline
handlers.

> +		domain->hdr.id = ctrl->cache.cache_id;
> +
> +		if (ctrl->cache.cache_level == 2) {
> +			cbqri_res = &cbqri_resctrl_resources[RDT_RESOURCE_L2];
> +			err = qos_init_cache_resource(ctrl, cbqri_res,
> +						      RDT_RESOURCE_L2, "L2",
> +						      RESCTRL_L2_CACHE);
> +		} else if (ctrl->cache.cache_level == 3) {
> +			cbqri_res = &cbqri_resctrl_resources[RDT_RESOURCE_L3];
> +			err = qos_init_cache_resource(ctrl, cbqri_res,
> +						      RDT_RESOURCE_L3, "L3",
> +						      RESCTRL_L3_CACHE);
> +		} else {
> +			pr_err("unknown cache level %d\n", ctrl->cache.cache_level);
> +			err = -ENODEV;
> +		}
> +		if (err)
> +			goto err_free_domain;
> +		res = &cbqri_res->resctrl_res;
> +		break;
> +
> +	case CBQRI_CONTROLLER_TYPE_BANDWIDTH:
> +		cpumask_copy(&domain->hdr.cpu_mask, &ctrl->mem.cpu_mask);
> +		domain->hdr.id = ctrl->mem.prox_dom;
> +		if (ctrl->alloc_capable) {
> +			cbqri_res = &cbqri_resctrl_resources[RDT_RESOURCE_MBA];
> +			err = qos_init_membw_resource(ctrl, cbqri_res);
> +			if (err)
> +				goto err_free_domain;
> +			res = &cbqri_res->resctrl_res;
> +		}
> +		break;
> +
> +	default:
> +		pr_err("unknown controller type %d\n", ctrl->type);
> +		err = -ENODEV;
> +		goto err_free_domain;
> +	}
> +
> +	if (!res)
> +		goto out;
> +
> +	err = qos_init_domain_ctrlval(res, domain);
> +	if (err)
> +		goto err_free_domain;
> +
> +	if (resctrl_find_domain(&res->ctrl_domains, domain->hdr.id, &pos)) {
> +		pr_err("duplicate domain id %d for resource %s\n",
> +		       domain->hdr.id, res->name);
> +		err = -EEXIST;
> +		goto err_free_domain;
> +	}
> +	if (pos)
> +		list_add_tail(&domain->hdr.list, pos);
> +	else
> +		list_add_tail(&domain->hdr.list, &res->ctrl_domains);

resctrl_find_domain() returns NULL if it cannot find an existing domain, in that
case it initializes "pos" to support adding a new domain in a sorted list.
Expectation is that domains are managed as part of CPU hotplug handlers. When
a CPU comes online then handler can check if the domain it belongs to already exists,
if it does then the CPU can just be added to that domain's cpu_mask, if it does
not then a new domain is created and added in the the appropriate spot in the
sorted list (based on domain ID) of domains. 


> +
> +	err = resctrl_online_ctrl_domain(res, domain);
> +	if (err) {
> +		pr_err("failed to online domain %d\n", domain->hdr.id);
> +		list_del(&domain->hdr.list);
> +		goto err_free_domain;
> +	}
> +
> +out:
> +	return 0;
> +
> +err_free_domain:
> +	kfree(container_of(domain, struct cbqri_resctrl_dom, resctrl_ctrl_dom));
> +	return err;
> +}
> +
> +int qos_resctrl_setup(void)
> +{
> +	struct rdt_ctrl_domain *domain, *domain_temp;
> +	struct cbqri_controller *ctrl;
> +	struct cbqri_resctrl_res *res;
> +	int err = 0;
> +	int i = 0;
> +
> +	max_rmid = U32_MAX;
> +
> +	for (i = 0; i < RDT_NUM_RESOURCES; i++) {
> +		res = &cbqri_resctrl_resources[i];
> +		INIT_LIST_HEAD(&res->resctrl_res.ctrl_domains);
> +		INIT_LIST_HEAD(&res->resctrl_res.mon_domains);
> +		res->resctrl_res.rid = i;
> +	}
> +
> +	list_for_each_entry(ctrl, &cbqri_controllers, list) {
> +		err = cbqri_probe_controller(ctrl);
> +		if (err) {
> +			pr_err("%s(): failed (%d)\n", __func__, err);
> +			goto err_free_controllers_list;
> +		}
> +
> +		err = qos_resctrl_add_controller_domain(ctrl);
> +		if (err) {
> +			pr_err("%s(): failed to add controller domain (%d)\n", __func__, err);
> +			goto err_free_controllers_list;
> +		}
> +
> +		/*
> +		 * CDP (code data prioritization) on x86 is similar to
> +		 * the AT (access type) field in CBQRI. CDP only supports
> +		 * caches so this must be a CBQRI capacity controller.
> +		 */
> +		if (ctrl->type == CBQRI_CONTROLLER_TYPE_CAPACITY &&
> +		    ctrl->cc.supports_alloc_at_code) {
> +			if (ctrl->cache.cache_level == 2)
> +				exposed_cdp_l2_capable = true;
> +			else
> +				exposed_cdp_l3_capable = true;
> +		}
> +	}
> +	pr_debug("alloc=%d cdp_l2=%d cdp_l3=%d\n",
> +		 exposed_alloc_capable,
> +		 exposed_cdp_l2_capable, exposed_cdp_l3_capable);
> +
> +	err = resctrl_init();
> +	if (err)
> +		goto err_free_controllers_list;
> +
> +	return 0;
> +
> +err_free_controllers_list:
> +	for (i = 0; i < RDT_NUM_RESOURCES; i++) {
> +		res = &cbqri_resctrl_resources[i];
> +		list_for_each_entry_safe(domain, domain_temp, &res->resctrl_res.ctrl_domains,
> +					 hdr.list) {
> +			resctrl_offline_ctrl_domain(&res->resctrl_res, domain);
> +			list_del(&domain->hdr.list);
> +			kfree(container_of(domain, struct cbqri_resctrl_dom, resctrl_ctrl_dom));
> +		}
> +	}
> +
> +	list_for_each_entry(ctrl, &cbqri_controllers, list) {
> +		if (!ctrl->base)
> +			break;
> +		iounmap(ctrl->base);
> +		ctrl->base = NULL;
> +		release_mem_region(ctrl->addr, ctrl->size);
> +	}
> +
> +	return err;
> +}
> +
> +int qos_resctrl_online_cpu(unsigned int cpu)
> +{
> +	resctrl_online_cpu(cpu);

This is where a domain is expected to be added when its first CPU comes online.

> +	return 0;
> +}
> +
> +int qos_resctrl_offline_cpu(unsigned int cpu)
> +{
> +	resctrl_offline_cpu(cpu);

This is where a domain is expected to be removed when its last CPU goes offline.

> +	return 0;
> +}
> 

Reinette



More information about the linux-riscv mailing list