[PATCH v3 3/5] arm64: Add support for FEAT_HAFT

Tue Oct 22 11:30:44 PDT 2024

On Tue, Oct 22, 2024 at 05:27:32PM +0800, Yicong Yang wrote:
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 3e29b44d2d7b..029d7ad89de5 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -2176,6 +2176,21 @@ config ARCH_PKEY_BITS
>  	int
>  	default 3
>  
> +config ARM64_HAFT
> +	bool "Support for Hardware managed Access Flag for Table Descriptor"

Super nit: s/Descriptor/Descriptors/

> +	depends on ARM64_HW_AFDBM
> +	default y
> +	help
> +	  The ARMv8.9/ARMv9.5 introduces the feature Hardware managed Access
> +	  Flag for Table descriptors. When enabled an architectural executed
> +	  memory access will update the Access Flag in each Table descriptor
> +	  which is accessed during the translation table walk and for which
> +	  the Access Flag is 0. The Access Flag of the Table descriptor use
> +	  the same bit of PTE_AF.
> +
> +	  The feature will only be enabled if all the CPUs in the system
> +	  support this feature. If unsure, say Y.
> +
>  endmenu # "ARMv8.9 architectural features"
>  
>  config ARM64_SVE
> diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
> index 3d261cc123c1..fba2347c0aa6 100644
> --- a/arch/arm64/include/asm/cpufeature.h
> +++ b/arch/arm64/include/asm/cpufeature.h
> @@ -879,6 +879,30 @@ static inline bool cpu_has_hw_af(void)
>  						ID_AA64MMFR1_EL1_HAFDBS_SHIFT);
>  }
>  
> +/*
> + * Contrary to the page/block access flag, the table access flag
> + * cannot be emulated in software (no access fault will occur).
> + * So users should use this feature only if it's supported system
> + * wide.
> + */
> +static inline bool system_support_haft(void)
> +{
> +	unsigned int hafdbs;
> +	u64 mmfr1;
> +
> +	if (!IS_ENABLED(CONFIG_ARM64_HAFT))
> +		return false;
> +
> +	/*
> +	 * Check the sanitised registers to see this feature is supported
> +	 * on all the CPUs.
> +	 */
> +	mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
> +	hafdbs = cpuid_feature_extract_unsigned_field(mmfr1,
> +						ID_AA64MMFR1_EL1_HAFDBS_SHIFT);
> +	return hafdbs >= ID_AA64MMFR1_EL1_HAFDBS_HAFT;
> +}

Can we not have just an entry in the arm64_features array with the type
ARM64_CPUCAP_SYSTEM_FEATURE and avoid the explicit checks here?

> diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
> index 8ff5f2a2579e..bc1051d65125 100644
> --- a/arch/arm64/include/asm/pgalloc.h
> +++ b/arch/arm64/include/asm/pgalloc.h
> @@ -30,7 +30,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp)
>  {
>  	pudval_t pudval = PUD_TYPE_TABLE;
>  
> -	pudval |= (mm == &init_mm) ? PUD_TABLE_UXN : PUD_TABLE_PXN;
> +	pudval |= (mm == &init_mm) ? PUD_TABLE_AF | PUD_TABLE_UXN : PUD_TABLE_PXN;
>  	__pud_populate(pudp, __pa(pmdp), pudval);
>  }

Why not set the table AF for the task entries? I haven't checked the
core code but normally when we map a pte it's mapped as young. While for
table AF we wouldn't get a fault, I would have thought the core code
follows the same logic.

> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index 718728a85430..6eeaaa80f6fe 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -2046,6 +2046,18 @@ static bool has_hw_dbm(const struct arm64_cpu_capabilities *cap,
>  
>  #endif
>  
> +#if CONFIG_ARM64_HAFT
> +
> +static struct cpumask haft_cpus;
> +
> +static void cpu_enable_haft(struct arm64_cpu_capabilities const *cap)
> +{
> +	if (has_cpuid_feature(cap, SCOPE_LOCAL_CPU))
> +		cpumask_set_cpu(smp_processor_id(), &haft_cpus);
> +}
> +
> +#endif /* CONFIG_ARM64_HAFT */
> +
>  #ifdef CONFIG_ARM64_AMU_EXTN
>  
>  /*
> @@ -2590,6 +2602,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
>  		.cpus = &dbm_cpus,
>  		ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HAFDBS, DBM)
>  	},
> +#endif
> +#ifdef CONFIG_ARM64_HAFT
> +	{
> +		.desc = "Hardware managed Access Flag for Table Descriptor",
> +		.type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,

I'd actually use ARM64_CPUCAP_SYSTEM_FEATURE here. We use something
similar for HW DBM but there we get a fault and set the pte dirty. You
combined it with a system_support_haft() that checks the sanitised regs
but I'd rather have a static branch check via cpus_have_cap(). Even with
your approach we can have a race with a late CPU hot-plugged that
doesn't have the feature in the middle of some core code walking the
page tables.

With a system feature type, late CPUs not having the feature won't be
brought online (if feature enabled) but in general I don't have much
sympathy for SoC vendors combining CPUs with incompatible features ;).

> +		.capability = ARM64_HAFT,
> +		.matches = has_cpuid_feature,
> +		.cpu_enable = cpu_enable_haft,
> +		.cpus = &haft_cpus,
> +		ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HAFDBS, HAFT)
> +	},
[...]
> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> index ccbae4525891..4a58b9b36eb6 100644
> --- a/arch/arm64/mm/proc.S
> +++ b/arch/arm64/mm/proc.S
> @@ -495,9 +495,15 @@ alternative_else_nop_endif
>  	 * via capabilities.
>  	 */
>  	mrs	x9, ID_AA64MMFR1_EL1
> -	and	x9, x9, ID_AA64MMFR1_EL1_HAFDBS_MASK
> +	ubfx	x9, x9, ID_AA64MMFR1_EL1_HAFDBS_SHIFT, #4
>  	cbz	x9, 1f
>  	orr	tcr, tcr, #TCR_HA		// hardware Access flag update
> +
> +#ifdef CONFIG_ARM64_HAFT
> +	cmp	x9, ID_AA64MMFR1_EL1_HAFDBS_HAFT
> +	b.lt	1f
> +	orr	tcr2, tcr2, TCR2_EL1x_HAFT
> +#endif /* CONFIG_ARM64_HAFT */

I think we can skip the ID check here and always set the HAFT bit. We do
something similar with MTE (not for TCR_HA though, don't remember why).

-- 
Catalin