[RFC PATCH 3/5] RISC-V: hwprobe: Introduce which-cpus flag

Palmer Dabbelt palmer at dabbelt.com
Mon Sep 25 04:23:29 PDT 2023


On Thu, 21 Sep 2023 05:55:22 PDT (-0700), ajones at ventanamicro.com wrote:
> Introduce the first flag for the hwprobe syscall. The flag basically
> reverses its behavior, i.e. instead of populating the values of keys
> for a given set of cpus, the set of cpus after the call is the result
> of finding a set which supports the values of the keys. In order to
> do this, we implement pair merge and pair compare functions which
> take the type of value (a single value vs. a bitmap of booleans) into
> consideration. The flow for the which-cpus syscall variant is as
> follows:
>
>   1. Merge pairs into a set of pairs with unique keys
>   2. If any unknown keys are seen, return an empty set of cpus
>   3. If the platform is homogeneous, then check all the pairs
>      against the "all cpu" values and return early
>   4. Otherwise, check all the pairs against each cpu individually
>
> Signed-off-by: Andrew Jones <ajones at ventanamicro.com>
> ---
>  Documentation/riscv/hwprobe.rst       |  16 ++-
>  arch/riscv/include/uapi/asm/hwprobe.h |   3 +
>  arch/riscv/kernel/sys_riscv.c         | 148 +++++++++++++++++++++++++-
>  3 files changed, 163 insertions(+), 4 deletions(-)
>
> diff --git a/Documentation/riscv/hwprobe.rst b/Documentation/riscv/hwprobe.rst
> index 132e9acaa8f4..97b1e97e7dd2 100644
> --- a/Documentation/riscv/hwprobe.rst
> +++ b/Documentation/riscv/hwprobe.rst
> @@ -25,8 +25,20 @@ arch, impl), the returned value will only be valid if all CPUs in the given set
>  have the same value. Otherwise -1 will be returned. For boolean-like keys, the
>  value returned will be a logical AND of the values for the specified CPUs.
>  Usermode can supply NULL for ``cpus`` and 0 for ``cpusetsize`` as a shortcut for
> -all online CPUs. There are currently no flags, this value must be zero for
> -future compatibility.
> +all online CPUs. The currently supported flags are:
> +
> +* :c:macro:`RISCV_HWPROBE_WHICH_CPUS`: This flag basically reverses the behavior
> +  of sys_riscv_hwprobe().  Instead of populating the values of keys for a given
> +  set of CPUs, the set of CPUs is initially all unset and the values of each key
> +  are given.  Upon return, the CPUs which all match each of the given key-value
> +  pairs are set in ``cpus``.  How matching is done depends on the key type.  For
> +  value-like keys, matching means to be the exact same as the value.  For
> +  boolean-like keys, matching means the result of a logical AND of the pair's
> +  value with the CPU's value is exactly the same as the pair's value.  ``cpus``
> +  may also initially have set bits, in which case the bits of any CPUs which do
> +  not match the pairs will be cleared, but no other bits will be set.
> +
> +All other flags are reserved for future compatibility and must be zero.
>
>  On success 0 is returned, on failure a negative error code is returned.
>
> diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h
> index 86d08a0e617b..36683307c3e4 100644
> --- a/arch/riscv/include/uapi/asm/hwprobe.h
> +++ b/arch/riscv/include/uapi/asm/hwprobe.h
> @@ -40,4 +40,7 @@ struct riscv_hwprobe {
>  #define RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE	6
>  /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
>
> +/* Flags */
> +#define RISCV_HWPROBE_WHICH_CPUS	(1 << 0)
> +
>  #endif
> diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
> index 14b6dfaa5d9f..c70a72fe6aee 100644
> --- a/arch/riscv/kernel/sys_riscv.c
> +++ b/arch/riscv/kernel/sys_riscv.c
> @@ -245,14 +245,145 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair,
>  	}
>  }
>
> +static bool hwprobe_key_is_map(__s64 key)
> +{
> +	switch (key) {
> +	case RISCV_HWPROBE_KEY_BASE_BEHAVIOR:
> +	case RISCV_HWPROBE_KEY_IMA_EXT_0:
> +	case RISCV_HWPROBE_KEY_CPUPERF_0:
> +		return true;
> +	}
> +
> +	return false;
> +}
> +
> +static int hwprobe_pair_merge(struct riscv_hwprobe *to,
> +			      struct riscv_hwprobe *from)
> +{
> +	if (to->key != from->key)
> +		return -EINVAL;
> +
> +	if (hwprobe_key_is_map(to->key)) {
> +		to->value |= from->value;
> +		return 0;
> +	}
> +
> +	return to->value == from->value ? 0 : -EINVAL;
> +}
> +
> +static bool hwprobe_pair_cmp(struct riscv_hwprobe *pair,
> +			     struct riscv_hwprobe *other_pair)
> +{
> +	if (pair->key != other_pair->key)
> +		return false;
> +
> +	if (hwprobe_key_is_map(pair->key))
> +		return (pair->value & other_pair->value) == other_pair->value;
> +
> +	return pair->value == other_pair->value;
> +}
> +
> +static int hwprobe_which_cpus(struct riscv_hwprobe __user *pairs_user,
> +			      size_t pair_count, size_t cpusetsize,
> +			      cpumask_t *cpus)
> +{
> +	struct riscv_hwprobe pairs[RISCV_HWPROBE_MAX_KEY + 1] = {
> +		[0 ... RISCV_HWPROBE_MAX_KEY] = (struct riscv_hwprobe){ .key = -1 }
> +	};
> +	struct riscv_hwprobe pair;
> +	struct vdso_data *vd = __arch_get_k_vdso_data();
> +	struct arch_vdso_data *avd = &vd->arch_data;
> +	bool clear_all = false;
> +	cpumask_t one_cpu;
> +	int cpu, ret;
> +	size_t i;
> +
> +	for (i = 0; i < pair_count; i++) {
> +		ret = copy_from_user(&pair, &pairs_user[i], sizeof(pair));
> +		if (ret)
> +			return -EFAULT;
> +
> +		if (pair.key >= 0 && pair.key <= RISCV_HWPROBE_MAX_KEY) {
> +			if (pairs[pair.key].key == -1) {
> +				pairs[pair.key] = pair;
> +			} else {
> +				ret = hwprobe_pair_merge(&pairs[pair.key], &pair);
> +				if (ret)
> +					return ret;
> +			}
> +		} else {
> +			pair.key = -1;
> +			pair.value = 0;
> +			ret = copy_to_user(&pairs_user[i], &pair, sizeof(pair));
> +			if (ret)
> +				return -EFAULT;
> +			clear_all = true;
> +		}
> +	}
> +
> +	if (clear_all) {
> +		cpumask_clear(cpus);
> +		return 0;
> +	}
> +
> +	if (avd->homogeneous_cpus) {
> +		for (i = 0; i <= RISCV_HWPROBE_MAX_KEY; i++) {
> +			if (pairs[i].key == -1)
> +				continue;
> +
> +			pair.key = pairs[i].key;
> +			pair.value = avd->all_cpu_hwprobe_values[pairs[i].key];
> +
> +			if (!hwprobe_pair_cmp(&pair, &pairs[i])) {
> +				cpumask_clear(cpus);
> +				return 0;
> +			}
> +		}
> +
> +		return 0;
> +	}
> +
> +	cpumask_clear(&one_cpu);
> +
> +	for_each_cpu(cpu, cpus) {
> +		cpumask_set_cpu(cpu, &one_cpu);
> +
> +		for (i = 0; i <= RISCV_HWPROBE_MAX_KEY; i++) {
> +			if (pairs[i].key == -1)
> +				continue;
> +
> +			pair.key = pairs[i].key;
> +			pair.value = 0;
> +			hwprobe_one_pair(&pair, &one_cpu);
> +
> +			if (!hwprobe_pair_cmp(&pair, &pairs[i])) {
> +				cpumask_clear_cpu(cpu, cpus);
> +				break;
> +			}
> +		}
> +
> +		cpumask_clear_cpu(cpu, &one_cpu);
> +	}
> +
> +	return 0;
> +}
> +
>  static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs,
>  			    size_t pair_count, size_t cpusetsize,
>  			    unsigned long __user *cpus_user,
>  			    unsigned int flags)
>  {
> +	bool which_cpus = false;
> +	cpumask_t cpus;
>  	size_t out;
>  	int ret;
> -	cpumask_t cpus;
> +
> +	if (flags & RISCV_HWPROBE_WHICH_CPUS) {
> +		if (!cpusetsize || !cpus_user)
> +			return -EINVAL;
> +		flags &= ~RISCV_HWPROBE_WHICH_CPUS;
> +		which_cpus = true;
> +	}
>
>  	/* Check the reserved flags. */
>  	if (flags != 0)
> @@ -274,11 +405,24 @@ static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs,
>  		if (ret)
>  			return -EFAULT;
>
> +		cpumask_and(&cpus, &cpus, cpu_online_mask);
> +
> +		if (which_cpus) {
> +			if (cpumask_empty(&cpus))
> +				cpumask_copy(&cpus, cpu_online_mask);
> +			ret = hwprobe_which_cpus(pairs, pair_count, cpusetsize, &cpus);
> +			if (ret)
> +				return ret;
> +			ret = copy_to_user(cpus_user, &cpus, cpusetsize);
> +			if (ret)
> +				return -EFAULT;
> +			return 0;

So this is now essentailly two syscalls.  IMO it'd be cleaner to split 
out the implementations into two functions (ie, 
hwprobe_{which_cpus,which_featurs}() or whatever) rather than have an 
early out and the rest inline.

Also: maybe we want a whole hwprobe file?  It's sort of its own thing 
now, and it's only going to get bigger...

> +		}
> +
>  		/*
>  		 * Userspace must provide at least one online CPU, without that
>  		 * there's no way to define what is supported.
>  		 */
> -		cpumask_and(&cpus, &cpus, cpu_online_mask);
>  		if (cpumask_empty(&cpus))
>  			return -EINVAL;
>  	}



More information about the linux-riscv mailing list