[PATCH v2 2/4] arm64/fpsimd: Discover maximum vector length implemented by any CPU

Fuad Tabba tabba at google.com
Fri Jun 7 00:33:27 PDT 2024


Hi Mark,

On Thu, Jun 6, 2024 at 4:32 PM Mark Brown <broonie at kernel.org> wrote:
>
> When discovering the vector lengths for SVE and SME we do not currently
> record the maximum VL supported on any individual CPU.  This is expected
> to be the same for all CPUs but the architecture allows asymmetry, if we
> do encounter an asymmetric system then some CPUs may support VLs higher
> than the maximum Linux will use.  Since the pKVM hypervisor needs to
> support saving and restoring anything the host can physically set it
> needs to know the maximum value any CPU could have, add support for
> enumerating it and validation for late CPUs.
>
> Signed-off-by: Mark Brown <broonie at kernel.org>
> ---
>  arch/arm64/include/asm/fpsimd.h | 13 +++++++++++++
>  arch/arm64/kernel/fpsimd.c      | 26 +++++++++++++++++++++++++-
>  2 files changed, 38 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
> index 51c21265b4fa..cd19713c9deb 100644
> --- a/arch/arm64/include/asm/fpsimd.h
> +++ b/arch/arm64/include/asm/fpsimd.h
> @@ -188,6 +188,9 @@ struct vl_info {
>         int max_vl;
>         int max_virtualisable_vl;
>
> +       /* Maximum vector length observed on any CPU */
> +       int max_cpu_vl;
> +
>         /*
>          * Set of available vector lengths,
>          * where length vq encoded as bit __vq_to_bit(vq):
> @@ -278,6 +281,11 @@ static inline int vec_max_virtualisable_vl(enum vec_type type)
>         return vl_info[type].max_virtualisable_vl;
>  }
>
> +static inline int vec_max_cpu_vl(enum vec_type type)
> +{
> +       return vl_info[type].max_cpu_vl;
> +}
> +
>  static inline int sve_max_vl(void)
>  {
>         return vec_max_vl(ARM64_VEC_SVE);
> @@ -288,6 +296,11 @@ static inline int sve_max_virtualisable_vl(void)
>         return vec_max_virtualisable_vl(ARM64_VEC_SVE);
>  }
>
> +static inline int sve_max_cpu_vl(void)
> +{
> +       return vec_max_cpu_vl(ARM64_VEC_SVE);
> +}
> +
>  /* Ensure vq >= SVE_VQ_MIN && vq <= SVE_VQ_MAX before calling this function */
>  static inline bool vq_available(enum vec_type type, unsigned int vq)
>  {
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index 22542fb81812..ee6fb8c4b16d 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -129,6 +129,7 @@ __ro_after_init struct vl_info vl_info[ARM64_VEC_MAX] = {
>                 .min_vl                 = SVE_VL_MIN,
>                 .max_vl                 = SVE_VL_MIN,
>                 .max_virtualisable_vl   = SVE_VL_MIN,
> +               .max_cpu_vl             = SVE_VL_MIN,
>         },
>  #endif
>  #ifdef CONFIG_ARM64_SME
> @@ -1041,8 +1042,13 @@ static void vec_probe_vqs(struct vl_info *info,
>  void __init vec_init_vq_map(enum vec_type type)
>  {
>         struct vl_info *info = &vl_info[type];
> +       unsigned long b;
> +
>         vec_probe_vqs(info, info->vq_map);
>         bitmap_copy(info->vq_partial_map, info->vq_map, SVE_VQ_MAX);
> +
> +       b = find_first_bit(info->vq_map, SVE_VQ_MAX);
> +       info->max_cpu_vl = __bit_to_vl(b);
>  }
>
>  /*
> @@ -1054,11 +1060,16 @@ void vec_update_vq_map(enum vec_type type)
>  {
>         struct vl_info *info = &vl_info[type];
>         DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
> +       unsigned long b;
>
>         vec_probe_vqs(info, tmp_map);
>         bitmap_and(info->vq_map, info->vq_map, tmp_map, SVE_VQ_MAX);
>         bitmap_or(info->vq_partial_map, info->vq_partial_map, tmp_map,
>                   SVE_VQ_MAX);
> +
> +       b = find_first_bit(tmp_map, SVE_VQ_MAX);
> +       if (__bit_to_vl(b) > info->max_cpu_vl)
> +               info->max_cpu_vl = __bit_to_vl(b);
>  }
>
>  /*
> @@ -1069,10 +1080,23 @@ int vec_verify_vq_map(enum vec_type type)
>  {
>         struct vl_info *info = &vl_info[type];
>         DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
> -       unsigned long b;
> +       unsigned long b, max_vl;
>
>         vec_probe_vqs(info, tmp_map);
>
> +       /*
> +        * Currently the maximum VL is only used for pKVM which
> +        * doesn't allow late CPUs but we don't expect asymmetry and
> +        * if we encounter any then future users will need handling so
> +        * warn if we see anything.
> +        */
> +       max_vl = __bit_to_vl(find_first_bit(tmp_map, SVE_VQ_MAX));
> +       if (max_vl > info->max_cpu_vl) {
> +               pr_warn("%s: cpu%d: increases maximum VL to %u\n",

This should be %lu since it's an unsigned long. Otherwise it doesn't
build (clang).

Cheers,
/fuad

> +                       info->name, smp_processor_id(), max_vl);
> +               info->max_cpu_vl = max_vl;
> +       }
> +
>         bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);
>         if (bitmap_intersects(tmp_map, info->vq_map, SVE_VQ_MAX)) {
>                 pr_warn("%s: cpu%d: Required vector length(s) missing\n",
>
> --
> 2.39.2
>



More information about the linux-arm-kernel mailing list