[PATCH v3 3/5] arm64: Add support for FEAT_HAFT
Catalin Marinas
catalin.marinas at arm.com
Tue Oct 22 11:30:44 PDT 2024
On Tue, Oct 22, 2024 at 05:27:32PM +0800, Yicong Yang wrote:
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 3e29b44d2d7b..029d7ad89de5 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -2176,6 +2176,21 @@ config ARCH_PKEY_BITS
> int
> default 3
>
> +config ARM64_HAFT
> + bool "Support for Hardware managed Access Flag for Table Descriptor"
Super nit: s/Descriptor/Descriptors/
> + depends on ARM64_HW_AFDBM
> + default y
> + help
> + The ARMv8.9/ARMv9.5 introduces the feature Hardware managed Access
> + Flag for Table descriptors. When enabled an architectural executed
> + memory access will update the Access Flag in each Table descriptor
> + which is accessed during the translation table walk and for which
> + the Access Flag is 0. The Access Flag of the Table descriptor use
> + the same bit of PTE_AF.
> +
> + The feature will only be enabled if all the CPUs in the system
> + support this feature. If unsure, say Y.
> +
> endmenu # "ARMv8.9 architectural features"
>
> config ARM64_SVE
> diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
> index 3d261cc123c1..fba2347c0aa6 100644
> --- a/arch/arm64/include/asm/cpufeature.h
> +++ b/arch/arm64/include/asm/cpufeature.h
> @@ -879,6 +879,30 @@ static inline bool cpu_has_hw_af(void)
> ID_AA64MMFR1_EL1_HAFDBS_SHIFT);
> }
>
> +/*
> + * Contrary to the page/block access flag, the table access flag
> + * cannot be emulated in software (no access fault will occur).
> + * So users should use this feature only if it's supported system
> + * wide.
> + */
> +static inline bool system_support_haft(void)
> +{
> + unsigned int hafdbs;
> + u64 mmfr1;
> +
> + if (!IS_ENABLED(CONFIG_ARM64_HAFT))
> + return false;
> +
> + /*
> + * Check the sanitised registers to see this feature is supported
> + * on all the CPUs.
> + */
> + mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
> + hafdbs = cpuid_feature_extract_unsigned_field(mmfr1,
> + ID_AA64MMFR1_EL1_HAFDBS_SHIFT);
> + return hafdbs >= ID_AA64MMFR1_EL1_HAFDBS_HAFT;
> +}
Can we not have just an entry in the arm64_features array with the type
ARM64_CPUCAP_SYSTEM_FEATURE and avoid the explicit checks here?
> diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
> index 8ff5f2a2579e..bc1051d65125 100644
> --- a/arch/arm64/include/asm/pgalloc.h
> +++ b/arch/arm64/include/asm/pgalloc.h
> @@ -30,7 +30,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp)
> {
> pudval_t pudval = PUD_TYPE_TABLE;
>
> - pudval |= (mm == &init_mm) ? PUD_TABLE_UXN : PUD_TABLE_PXN;
> + pudval |= (mm == &init_mm) ? PUD_TABLE_AF | PUD_TABLE_UXN : PUD_TABLE_PXN;
> __pud_populate(pudp, __pa(pmdp), pudval);
> }
Why not set the table AF for the task entries? I haven't checked the
core code but normally when we map a pte it's mapped as young. While for
table AF we wouldn't get a fault, I would have thought the core code
follows the same logic.
> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index 718728a85430..6eeaaa80f6fe 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -2046,6 +2046,18 @@ static bool has_hw_dbm(const struct arm64_cpu_capabilities *cap,
>
> #endif
>
> +#if CONFIG_ARM64_HAFT
> +
> +static struct cpumask haft_cpus;
> +
> +static void cpu_enable_haft(struct arm64_cpu_capabilities const *cap)
> +{
> + if (has_cpuid_feature(cap, SCOPE_LOCAL_CPU))
> + cpumask_set_cpu(smp_processor_id(), &haft_cpus);
> +}
> +
> +#endif /* CONFIG_ARM64_HAFT */
> +
> #ifdef CONFIG_ARM64_AMU_EXTN
>
> /*
> @@ -2590,6 +2602,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
> .cpus = &dbm_cpus,
> ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HAFDBS, DBM)
> },
> +#endif
> +#ifdef CONFIG_ARM64_HAFT
> + {
> + .desc = "Hardware managed Access Flag for Table Descriptor",
> + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
I'd actually use ARM64_CPUCAP_SYSTEM_FEATURE here. We use something
similar for HW DBM but there we get a fault and set the pte dirty. You
combined it with a system_support_haft() that checks the sanitised regs
but I'd rather have a static branch check via cpus_have_cap(). Even with
your approach we can have a race with a late CPU hot-plugged that
doesn't have the feature in the middle of some core code walking the
page tables.
With a system feature type, late CPUs not having the feature won't be
brought online (if feature enabled) but in general I don't have much
sympathy for SoC vendors combining CPUs with incompatible features ;).
> + .capability = ARM64_HAFT,
> + .matches = has_cpuid_feature,
> + .cpu_enable = cpu_enable_haft,
> + .cpus = &haft_cpus,
> + ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HAFDBS, HAFT)
> + },
[...]
> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> index ccbae4525891..4a58b9b36eb6 100644
> --- a/arch/arm64/mm/proc.S
> +++ b/arch/arm64/mm/proc.S
> @@ -495,9 +495,15 @@ alternative_else_nop_endif
> * via capabilities.
> */
> mrs x9, ID_AA64MMFR1_EL1
> - and x9, x9, ID_AA64MMFR1_EL1_HAFDBS_MASK
> + ubfx x9, x9, ID_AA64MMFR1_EL1_HAFDBS_SHIFT, #4
> cbz x9, 1f
> orr tcr, tcr, #TCR_HA // hardware Access flag update
> +
> +#ifdef CONFIG_ARM64_HAFT
> + cmp x9, ID_AA64MMFR1_EL1_HAFDBS_HAFT
> + b.lt 1f
> + orr tcr2, tcr2, TCR2_EL1x_HAFT
> +#endif /* CONFIG_ARM64_HAFT */
I think we can skip the ID check here and always set the HAFT bit. We do
something similar with MTE (not for TCR_HA though, don't remember why).
--
Catalin
More information about the linux-arm-kernel
mailing list