[PATCH v7] riscv: hwprobe: Fix stale vDSO data for late-initialized keys at boot

Alexandre Ghiti alex at ghiti.fr
Tue Aug 5 01:18:10 PDT 2025


Hi Jengwei

On 7/28/25 02:47, Jingwei Wang wrote:
> The hwprobe vDSO data for some keys, like MISALIGNED_VECTOR_PERF,
> is determined by an asynchronous kthread. This can create a race
> condition where the kthread finishes after the vDSO data has
> already been populated, causing userspace to read stale values.
>
> To fix this race, a new 'ready' flag is added to the vDSO data,
> initialized to 'false' during late_initcall. This flag is checked
> by both the vDSO's user-space code and the riscv_hwprobe syscall.
> The syscall serves as a one-time gate, using a completion to wait
> for any pending probes before populating the data and setting the
> flag to 'true', thus ensuring userspace reads fresh values on its
> first request.
>
> Reported-by: Tsukasa OI <research_trasio at irq.a4lg.com>
> Closes: https://lore.kernel.org/linux-riscv/760d637b-b13b-4518-b6bf-883d55d44e7f@irq.a4lg.com/
> Fixes: e7c9d66e313b ("RISC-V: Report vector unaligned access speed hwprobe")
> Cc: Palmer Dabbelt <palmer at dabbelt.com>
> Cc: Alexandre Ghiti <alexghiti at rivosinc.com>
> Cc: Olof Johansson <olof at lixom.net>
> Cc: stable at vger.kernel.org
>

^ Can you remove this empty line?


> Co-developed-by: Palmer Dabbelt <palmer at dabbelt.com>


This needs to be followed by a "Signed-off-by: Palmer Dabbelt 
<palmer at dabbelt.com>"


> Signed-off-by: Jingwei Wang <wangjingwei at iscas.ac.cn>
> ---
> Changes in v7:
> 	- Refined the on-demand synchronization by using the DO_ONCE_SLEEPABLE
> 	  macro.
> 	- Fixed a build error for nommu configs and addressed several coding
> 	  style issues reported by the CI.
>
> Changes in v6:
> 	- Based on Palmer's feedback, reworked the synchronization to be on-demand,
> 	  deferring the wait until the first hwprobe syscall via a 'ready' flag.
> 	  This avoids the boot-time regression from v5's approach.
>
> Changes in v5:
> 	- Reworked the synchronization logic to a robust "sentinel-count"
> 	  pattern based on feedback from Alexandre.
> 	- Fixed a "multiple definition" linker error for nommu builds by changing
> 	  the header-file stub functions to `static inline`, as pointed out by Olof.
> 	- Updated the commit message to better explain the rationale for moving
> 	  the vDSO initialization to `late_initcall`.
>
> Changes in v4:
> 	- Reworked the synchronization mechanism based on feedback from Palmer
> 	  and Alexandre.
> 	- Instead of a post-hoc refresh, this version introduces a robust
> 	  completion-based framework using an atomic counter to ensure async
> 	  probes are finished before populating the vDSO.
> 	- Moved the vdso data initialization to a late_initcall to avoid
> 	  impacting boot time.
>
> Changes in v3:
> 	- Retained existing blank line.
>
> Changes in v2:
> 	- Addressed feedback from Yixun's regarding #ifdef CONFIG_MMU usage.
> 	- Updated commit message to provide a high-level summary.
> 	- Added Fixes tag for commit e7c9d66e313b.
>
> v1: https://lore.kernel.org/linux-riscv/20250521052754.185231-1-wangjingwei@iscas.ac.cn/T/#u
>
>   arch/riscv/include/asm/hwprobe.h           |  7 +++
>   arch/riscv/include/asm/vdso/arch_data.h    |  6 ++
>   arch/riscv/kernel/sys_hwprobe.c            | 72 ++++++++++++++++++----
>   arch/riscv/kernel/unaligned_access_speed.c |  9 ++-
>   arch/riscv/kernel/vdso/hwprobe.c           |  2 +-
>   5 files changed, 80 insertions(+), 16 deletions(-)
>
> diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h
> index 7fe0a379474ae2c6..5fe10724d307dc99 100644
> --- a/arch/riscv/include/asm/hwprobe.h
> +++ b/arch/riscv/include/asm/hwprobe.h
> @@ -41,4 +41,11 @@ static inline bool riscv_hwprobe_pair_cmp(struct riscv_hwprobe *pair,
>   	return pair->value == other_pair->value;
>   }
>
> +#ifdef CONFIG_MMU
> +void riscv_hwprobe_register_async_probe(void);
> +void riscv_hwprobe_complete_async_probe(void);
> +#else
> +static inline void riscv_hwprobe_register_async_probe(void) {}
> +static inline void riscv_hwprobe_complete_async_probe(void) {}
> +#endif
>   #endif
> diff --git a/arch/riscv/include/asm/vdso/arch_data.h b/arch/riscv/include/asm/vdso/arch_data.h
> index da57a3786f7a53c8..88b37af55175129b 100644
> --- a/arch/riscv/include/asm/vdso/arch_data.h
> +++ b/arch/riscv/include/asm/vdso/arch_data.h
> @@ -12,6 +12,12 @@ struct vdso_arch_data {
>
>   	/* Boolean indicating all CPUs have the same static hwprobe values. */
>   	__u8 homogeneous_cpus;
> +
> +	/*
> +	 * A gate to check and see if the hwprobe data is actually ready, as
> +	 * probing is deferred to avoid boot slowdowns.
> +	 */
> +	__u8 ready;
>   };
>
>   #endif /* __RISCV_ASM_VDSO_ARCH_DATA_H */
> diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c
> index 0b170e18a2beba57..95146bb10e796765 100644
> --- a/arch/riscv/kernel/sys_hwprobe.c
> +++ b/arch/riscv/kernel/sys_hwprobe.c
> @@ -5,6 +5,9 @@
>    * more details.
>    */
>   #include <linux/syscalls.h>
> +#include <linux/completion.h>
> +#include <linux/atomic.h>
> +#include <linux/once.h>
>   #include <asm/cacheflush.h>
>   #include <asm/cpufeature.h>
>   #include <asm/hwprobe.h>
> @@ -452,28 +455,32 @@ static int hwprobe_get_cpus(struct riscv_hwprobe __user *pairs,
>   	return 0;
>   }
>
> -static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs,
> -			    size_t pair_count, size_t cpusetsize,
> -			    unsigned long __user *cpus_user,
> -			    unsigned int flags)
> -{
> -	if (flags & RISCV_HWPROBE_WHICH_CPUS)
> -		return hwprobe_get_cpus(pairs, pair_count, cpusetsize,
> -					cpus_user, flags);
> +#ifdef CONFIG_MMU
>
> -	return hwprobe_get_values(pairs, pair_count, cpusetsize,
> -				  cpus_user, flags);
> +static DECLARE_COMPLETION(boot_probes_done);
> +static atomic_t pending_boot_probes = ATOMIC_INIT(1);
> +
> +void riscv_hwprobe_register_async_probe(void)
> +{
> +	atomic_inc(&pending_boot_probes);
>   }
>
> -#ifdef CONFIG_MMU
> +void riscv_hwprobe_complete_async_probe(void)
> +{
> +	if (atomic_dec_and_test(&pending_boot_probes))
> +		complete(&boot_probes_done);
> +}
>
> -static int __init init_hwprobe_vdso_data(void)
> +static int complete_hwprobe_vdso_data(void)
>   {
>   	struct vdso_arch_data *avd = vdso_k_arch_data;
>   	u64 id_bitsmash = 0;
>   	struct riscv_hwprobe pair;
>   	int key;
>
> +	if (unlikely(!atomic_dec_and_test(&pending_boot_probes)))
> +		wait_for_completion(&boot_probes_done);
> +
>   	/*
>   	 * Initialize vDSO data with the answers for the "all CPUs" case, to
>   	 * save a syscall in the common case.
> @@ -501,13 +508,52 @@ static int __init init_hwprobe_vdso_data(void)
>   	 * vDSO should defer to the kernel for exotic cpu masks.
>   	 */
>   	avd->homogeneous_cpus = id_bitsmash != 0 && id_bitsmash != -1;
> +
> +	/*
> +	 * Make sure all the VDSO values are visible before we look at them.
> +	 * This pairs with the implicit "no speculativly visible accesses"
> +	 * barrier in the VDSO hwprobe code.
> +	 */
> +	smp_wmb();
> +	avd->ready = true;
>   	return 0;
>   }
>
> -arch_initcall_sync(init_hwprobe_vdso_data);
> +static int __init init_hwprobe_vdso_data(void)
> +{
> +	struct vdso_arch_data *avd = vdso_k_arch_data;
> +
> +	/*
> +	 * Prevent the vDSO cached values from being used, as they're not ready
> +	 * yet.
> +	 */
> +	avd->ready = false;
> +	return 0;
> +}
> +
> +late_initcall(init_hwprobe_vdso_data);


I don't think we need to move the initcall anymore right?


> +
> +#else
> +
> +static int complete_hwprobe_vdso_data(void) { return 0; }
>
>   #endif /* CONFIG_MMU */
>
> +static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs,
> +			     size_t pair_count, size_t cpusetsize,
> +			     unsigned long __user *cpus_user,
> +			     unsigned int flags)
> +{
> +	DO_ONCE_SLEEPABLE(complete_hwprobe_vdso_data);
> +
> +	if (flags & RISCV_HWPROBE_WHICH_CPUS)
> +		return hwprobe_get_cpus(pairs, pair_count, cpusetsize,
> +					cpus_user, flags);
> +
> +	return hwprobe_get_values(pairs, pair_count, cpusetsize,
> +				cpus_user, flags);
> +}
> +
>   SYSCALL_DEFINE5(riscv_hwprobe, struct riscv_hwprobe __user *, pairs,
>   		size_t, pair_count, size_t, cpusetsize, unsigned long __user *,
>   		cpus, unsigned int, flags)
> diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
> index ae2068425fbcd207..aa912c62fb70ba0e 100644
> --- a/arch/riscv/kernel/unaligned_access_speed.c
> +++ b/arch/riscv/kernel/unaligned_access_speed.c
> @@ -379,6 +379,7 @@ static void check_vector_unaligned_access(struct work_struct *work __always_unus
>   static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
>   {
>   	schedule_on_each_cpu(check_vector_unaligned_access);
> +	riscv_hwprobe_complete_async_probe();
>
>   	return 0;
>   }
> @@ -473,8 +474,12 @@ static int __init check_unaligned_access_all_cpus(void)
>   			per_cpu(vector_misaligned_access, cpu) = unaligned_vector_speed_param;
>   	} else if (!check_vector_unaligned_access_emulated_all_cpus() &&
>   		   IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) {
> -		kthread_run(vec_check_unaligned_access_speed_all_cpus,
> -			    NULL, "vec_check_unaligned_access_speed_all_cpus");
> +		riscv_hwprobe_register_async_probe();
> +		if (IS_ERR(kthread_run(vec_check_unaligned_access_speed_all_cpus,
> +			   NULL, "vec_check_unaligned_access_speed_all_cpus"))) {
> +			pr_warn("Failed to create vec_unalign_check kthread\n");
> +			riscv_hwprobe_complete_async_probe();
> +		}
>   	}
>
>   	/*
> diff --git a/arch/riscv/kernel/vdso/hwprobe.c b/arch/riscv/kernel/vdso/hwprobe.c
> index 2ddeba6c68dda09b..bf77b4c1d2d8e803 100644
> --- a/arch/riscv/kernel/vdso/hwprobe.c
> +++ b/arch/riscv/kernel/vdso/hwprobe.c
> @@ -27,7 +27,7 @@ static int riscv_vdso_get_values(struct riscv_hwprobe *pairs, size_t pair_count,
>   	 * homogeneous, then this function can handle requests for arbitrary
>   	 * masks.
>   	 */
> -	if ((flags != 0) || (!all_cpus && !avd->homogeneous_cpus))
> +	if ((flags != 0) || (!all_cpus && !avd->homogeneous_cpus) || unlikely(!avd->ready))
>   		return riscv_hwprobe(pairs, pair_count, cpusetsize, cpus, flags);
>
>   	/* This is something we can handle, fill out the pairs. */
> --
> 2.50.1
>

With the nits above fixed, you can add:

Reviewed-by: Alexandre Ghiti <alexghiti at rivosinc.com>

Thanks,

Alex




More information about the linux-riscv mailing list