[RFC PATCH] arm64: Cache HW update of Access Flag support

Will Deacon will at kernel.org
Tue Jan 10 08:16:51 PST 2023


On Mon, Jan 09, 2023 at 12:16:13PM -0300, Gabriel Krisman Bertazi wrote:
> Will Deacon <will at kernel.org> writes:
> 
> > On Fri, Jan 06, 2023 at 01:38:25PM -0300, Gabriel Krisman Bertazi wrote:
> >> +		has_af = cpuid_feature_extract_unsigned_field(
> >> +						read_cpuid(ID_AA64MMFR1_EL1),
> >>  						ID_AA64MMFR1_EL1_HAFDBS_SHIFT);
> >> +	return has_af;
> >
> > The intention here was to read the value for the _current_ CPU, since it
> > might not be the same across the system when big.MISTAKE gets involved.
> >
> > However, since this is really just a performance optimisation and I
> > think that the access flag tends to be uniformly supported in practice
> > anyway, the best bet is probably just to read the sanitised version of
> > the field using read_sanitised_ftr_reg().
> >
> > Can you give that a shot, please?
> 
> Hey Will,
> 
> Thanks for the review.
> 
> I reran the benchmark over the weekend and the impact of the bsearch
> lookup seems to be <1% worse than my original patch, which is
> negligible, IMO.  I will shortly follow up with a v2 applying your
> suggestion.

FWIW, I had a hack kicking around to avoid the bsearch. It wasn't too
fiddly to rebase, so I've included it below if you want to take it for a
spin. The grotty part is having to maintain the extra enum, which is why
I didn't merge it in the past.

Will

--->8

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 03d1c9d7af82..6cf30b61d3d5 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -636,7 +636,53 @@ static inline bool id_aa64pfr1_mte(u64 pfr1)
 void __init setup_cpu_features(void);
 void check_local_cpu_capabilities(void);
 
+#define ARM64_FTR_REG2IDX(id)	id ## _IDX
+enum arm64_ftr_reg_idx {
+	ARM64_FTR_REG2IDX(SYS_ID_PFR0_EL1),
+	ARM64_FTR_REG2IDX(SYS_ID_PFR1_EL1),
+	ARM64_FTR_REG2IDX(SYS_ID_DFR0_EL1),
+	ARM64_FTR_REG2IDX(SYS_ID_MMFR0_EL1),
+	ARM64_FTR_REG2IDX(SYS_ID_MMFR1_EL1),
+	ARM64_FTR_REG2IDX(SYS_ID_MMFR2_EL1),
+	ARM64_FTR_REG2IDX(SYS_ID_MMFR3_EL1),
+	ARM64_FTR_REG2IDX(SYS_ID_ISAR0_EL1),
+	ARM64_FTR_REG2IDX(SYS_ID_ISAR1_EL1),
+	ARM64_FTR_REG2IDX(SYS_ID_ISAR2_EL1),
+	ARM64_FTR_REG2IDX(SYS_ID_ISAR3_EL1),
+	ARM64_FTR_REG2IDX(SYS_ID_ISAR4_EL1),
+	ARM64_FTR_REG2IDX(SYS_ID_ISAR5_EL1),
+	ARM64_FTR_REG2IDX(SYS_ID_MMFR4_EL1),
+	ARM64_FTR_REG2IDX(SYS_ID_ISAR6_EL1),
+	ARM64_FTR_REG2IDX(SYS_MVFR0_EL1),
+	ARM64_FTR_REG2IDX(SYS_MVFR1_EL1),
+	ARM64_FTR_REG2IDX(SYS_MVFR2_EL1),
+	ARM64_FTR_REG2IDX(SYS_ID_PFR2_EL1),
+	ARM64_FTR_REG2IDX(SYS_ID_DFR1_EL1),
+	ARM64_FTR_REG2IDX(SYS_ID_MMFR5_EL1),
+	ARM64_FTR_REG2IDX(SYS_ID_AA64PFR0_EL1),
+	ARM64_FTR_REG2IDX(SYS_ID_AA64PFR1_EL1),
+	ARM64_FTR_REG2IDX(SYS_ID_AA64ZFR0_EL1),
+	ARM64_FTR_REG2IDX(SYS_ID_AA64SMFR0_EL1),
+	ARM64_FTR_REG2IDX(SYS_ID_AA64DFR0_EL1),
+	ARM64_FTR_REG2IDX(SYS_ID_AA64DFR1_EL1),
+	ARM64_FTR_REG2IDX(SYS_ID_AA64ISAR0_EL1),
+	ARM64_FTR_REG2IDX(SYS_ID_AA64ISAR1_EL1),
+	ARM64_FTR_REG2IDX(SYS_ID_AA64ISAR2_EL1),
+	ARM64_FTR_REG2IDX(SYS_ID_AA64MMFR0_EL1),
+	ARM64_FTR_REG2IDX(SYS_ID_AA64MMFR1_EL1),
+	ARM64_FTR_REG2IDX(SYS_ID_AA64MMFR2_EL1),
+	ARM64_FTR_REG2IDX(SYS_ZCR_EL1),
+	ARM64_FTR_REG2IDX(SYS_SMCR_EL1),
+	ARM64_FTR_REG2IDX(SYS_GMID_EL1),
+	ARM64_FTR_REG2IDX(SYS_CTR_EL0),
+	ARM64_FTR_REG2IDX(SYS_DCZID_EL0),
+	ARM64_FTR_REG2IDX(SYS_CNTFRQ_EL0),
+
+	ARM64_FTR_REG_IDX_MAX,
+};
+
 u64 read_sanitised_ftr_reg(u32 id);
+u64 read_sanitised_ftr_reg_by_idx(enum arm64_ftr_reg_idx idx);
 u64 __read_sysreg_by_encoding(u32 sys_id);
 
 static inline bool cpu_supports_mixed_endian_el0(void)
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index a77315b338e6..c71472decd51 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -624,19 +624,21 @@ static const struct arm64_ftr_bits ftr_raz[] = {
 	ARM64_FTR_END,
 };
 
-#define __ARM64_FTR_REG_OVERRIDE(id_str, id, table, ovr) {	\
-		.sys_id = id,					\
-		.reg = 	&(struct arm64_ftr_reg){		\
-			.name = id_str,				\
-			.override = (ovr),			\
-			.ftr_bits = &((table)[0]),		\
-	}}
+#define __ARM64_FTR_REG_OVERRIDE(id_str, id_idx, id, table, ovr)	\
+	[id_idx] = {							\
+		.sys_id = id,						\
+		.reg = 	&(struct arm64_ftr_reg) {			\
+			.name = id_str,					\
+			.override = (ovr),				\
+			.ftr_bits = &((table)[0]),			\
+		}							\
+	}								\
 
 #define ARM64_FTR_REG_OVERRIDE(id, table, ovr)	\
-	__ARM64_FTR_REG_OVERRIDE(#id, id, table, ovr)
+	__ARM64_FTR_REG_OVERRIDE(#id, id ## _IDX, id, table, ovr)
 
 #define ARM64_FTR_REG(id, table)		\
-	__ARM64_FTR_REG_OVERRIDE(#id, id, table, &no_override)
+	__ARM64_FTR_REG_OVERRIDE(#id, id ## _IDX, id, table, &no_override)
 
 struct arm64_ftr_override __ro_after_init id_aa64mmfr1_override;
 struct arm64_ftr_override __ro_after_init id_aa64pfr0_override;
@@ -648,7 +650,7 @@ struct arm64_ftr_override __ro_after_init id_aa64isar2_override;
 
 static const struct __ftr_reg_entry {
 	u32			sys_id;
-	struct arm64_ftr_reg 	*reg;
+	struct arm64_ftr_reg	*reg;
 } arm64_ftr_regs[] = {
 
 	/* Op1 = 0, CRn = 0, CRm = 1 */
@@ -713,7 +715,7 @@ static const struct __ftr_reg_entry {
 	ARM64_FTR_REG(SYS_GMID_EL1, ftr_gmid),
 
 	/* Op1 = 3, CRn = 0, CRm = 0 */
-	{ SYS_CTR_EL0, &arm64_ftr_reg_ctrel0 },
+	[ARM64_FTR_REG2IDX(SYS_CTR_EL0)] = { SYS_CTR_EL0, &arm64_ftr_reg_ctrel0 },
 	ARM64_FTR_REG(SYS_DCZID_EL0, ftr_dczid),
 
 	/* Op1 = 3, CRn = 14, CRm = 0 */
@@ -1329,6 +1331,18 @@ u64 read_sanitised_ftr_reg(u32 id)
 }
 EXPORT_SYMBOL_GPL(read_sanitised_ftr_reg);
 
+u64 read_sanitised_ftr_reg_by_idx(enum arm64_ftr_reg_idx idx)
+{
+	struct arm64_ftr_reg *regp;
+
+	if (WARN_ON((unsigned)idx >= ARM64_FTR_REG_IDX_MAX))
+		return 0;
+
+	regp = arm64_ftr_regs[idx].reg;
+	return regp->sys_val;
+}
+EXPORT_SYMBOL_GPL(read_sanitised_ftr_reg_by_idx);
+
 #define read_sysreg_case(r)	\
 	case r:		val = read_sysreg_s(r); break;
 
-- 
2.39.0.314.g84b9a713c41-goog




More information about the linux-arm-kernel mailing list