[PATCH] arm64: mte: allow async MTE to be upgraded to sync on a per-CPU basis

Peter Collingbourne pcc at google.com
Wed Jun 2 16:24:45 PDT 2021


On some CPUs the performance of MTE in synchronous mode is the same
as that of asynchronous mode. This makes it worthwhile to enable
synchronous mode on those CPUs when asynchronous mode is requested,
in order to gain the error detection benefits of synchronous mode
without the performance downsides. Therefore, make it possible for CPUs
to opt into upgrading to synchronous mode via a new mte-prefer-sync
device tree attribute.

Signed-off-by: Peter Collingbourne <pcc at google.com>
Link: https://linux-review.googlesource.com/id/Id6f95b71fde6e701dd30b5e108126af7286147e8
---
 arch/arm64/kernel/process.c |  8 ++++++++
 arch/arm64/kernel/smp.c     | 22 ++++++++++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index b4bb67f17a2c..ba6ed0c1390c 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -527,8 +527,16 @@ static void erratum_1418040_thread_switch(struct task_struct *prev,
 	write_sysreg(val, cntkctl_el1);
 }
 
+DECLARE_PER_CPU_READ_MOSTLY(bool, mte_prefer_sync);
+
 static void update_sctlr_el1(u64 sctlr)
 {
+	if ((sctlr & SCTLR_EL1_TCF0_MASK) == SCTLR_EL1_TCF0_ASYNC &&
+	    __this_cpu_read(mte_prefer_sync)) {
+		sctlr &= ~SCTLR_EL1_TCF0_MASK;
+		sctlr |= SCTLR_EL1_TCF0_SYNC;
+	}
+
 	/*
 	 * EnIA must not be cleared while in the kernel as this is necessary for
 	 * in-kernel PAC. It will be cleared on kernel exit if needed.
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index dcd7041b2b07..3a475722f768 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -56,6 +56,8 @@
 
 DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number);
 EXPORT_PER_CPU_SYMBOL(cpu_number);
+DEFINE_PER_CPU_READ_MOSTLY(bool, mte_prefer_sync);
+EXPORT_PER_CPU_SYMBOL(mte_prefer_sync);
 
 /*
  * as from 2.5, kernels no longer have an init_tasks structure
@@ -649,6 +651,16 @@ static void __init acpi_parse_and_init_cpus(void)
 #define acpi_parse_and_init_cpus(...)	do { } while (0)
 #endif
 
+/*
+ * Read per-CPU properties from the device tree and store them in per-CPU
+ * variables for efficient access later.
+ */
+static void __init of_read_cpu_properties(int cpu, struct device_node *dn)
+{
+	per_cpu(mte_prefer_sync, cpu) =
+		of_property_read_bool(dn, "mte-prefer-sync");
+}
+
 /*
  * Enumerate the possible CPU set from the device tree and build the
  * cpu logical map array containing MPIDR values related to logical
@@ -789,6 +801,16 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 		set_cpu_present(cpu, true);
 		numa_store_cpu_info(cpu);
 	}
+
+	if (acpi_disabled) {
+		struct device_node *dn;
+		int cpu = 0;
+
+		for_each_of_cpu_node(dn) {
+			of_read_cpu_properties(cpu, dn);
+			cpu++;
+		}
+	}
 }
 
 static const char *ipi_types[NR_IPI] __tracepoint_string = {
-- 
2.32.0.rc1.229.g3e70b5a671-goog




More information about the linux-arm-kernel mailing list