[PATCH 3/4] arm64: patching: unify stop_machine() patch synchronization
Mark Rutland
mark.rutland at arm.com
Fri Dec 3 02:47:22 PST 2021
Some instruction sequences cannot be safely modified while they may be
concurrently executed, and so it's necessary to temporarily stop all
CPUs while performing the modification. We have separate implementations
of this for alternatives and kprobes.
This patch unifies these with a common patch_machine() helper function
which handles the necessary synchronization to ensure that CPUs are
stopped during patching. This separates the patching logic, making it
easier to understand, and means that we only have to maintain one
synchronization algorithm.
The synchronization logic in do_patch_machine() only uses unpatchable
functions, and the function itself is marked `noinstr` to prevent
instrumentation. The patch_machine() helper is left instrumentatble as
stop_machine() is instrumentable, and therefore there is no benefit to
forbidding instrumentation.
As with the prior alternative patching sequence, the CPU to apply the
patch is chosen early so that this may be deterministic.
Since __apply_alternatives_stopped() is only ever called once under
apply_alternatives_all(), the `all_alternatives_applied` variable and
warning are redundant and therefore removed.
Signed-off-by: Mark Rutland <mark.rutland at arm.com>
Cc: Andre Przywara <andre.przywara at arm.com>
Cc: Ard Biesheuvel <ardb at kernel.org>
Cc: Catalin Marinas <catalin.marinas at arm.com>
Cc: James Morse <james.morse at arm.com>
Cc: Joey Gouly <joey.gouly at arm.com>
Cc: Suzuki K Poulose <suzuki.poulose at arm.com>
Cc: Will Deacon <will at kernel.org>
---
arch/arm64/include/asm/patching.h | 4 ++
arch/arm64/kernel/alternative.c | 40 +++-----------
arch/arm64/kernel/patching.c | 91 +++++++++++++++++++++++++------
3 files changed, 84 insertions(+), 51 deletions(-)
diff --git a/arch/arm64/include/asm/patching.h b/arch/arm64/include/asm/patching.h
index 6bf5adc56295..25c199bc55d2 100644
--- a/arch/arm64/include/asm/patching.h
+++ b/arch/arm64/include/asm/patching.h
@@ -10,4 +10,8 @@ int aarch64_insn_write(void *addr, u32 insn);
int aarch64_insn_patch_text_nosync(void *addr, u32 insn);
int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
+typedef int (*patch_machine_func_t)(void *);
+int patch_machine_cpuslocked(patch_machine_func_t func, void *arg);
+int patch_machine(patch_machine_func_t func, void *arg);
+
#endif /* __ASM_PATCHING_H */
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index 4f32d4425aac..d2b4b9e6a0e4 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -14,8 +14,8 @@
#include <asm/alternative.h>
#include <asm/cpufeature.h>
#include <asm/insn.h>
+#include <asm/patching.h>
#include <asm/sections.h>
-#include <linux/stop_machine.h>
#define __ALT_PTR(a, f) ((void *)&(a)->f + (a)->f)
#define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset)
@@ -189,43 +189,17 @@ static void __nocfi __apply_alternatives(struct alt_region *region, bool is_modu
}
}
-/*
- * Apply alternatives, ensuring that no CPUs are concurrently executing code
- * being patched.
- *
- * We might be patching the stop_machine state machine or READ_ONCE(), so
- * we implement a simple polling protocol.
- */
-static int __apply_alternatives_multi_stop(void *unused)
+static int __apply_alternatives_stopped(void *unused)
{
- /* Volatile, as we may be patching the guts of READ_ONCE() */
- static volatile int all_alternatives_applied;
- static atomic_t stopped_cpus = ATOMIC_INIT(0);
struct alt_region region = {
.begin = (struct alt_instr *)__alt_instructions,
.end = (struct alt_instr *)__alt_instructions_end,
};
+ DECLARE_BITMAP(remaining_capabilities, ARM64_NPATCHABLE);
- /* We always have a CPU 0 at this point (__init) */
- if (smp_processor_id()) {
- arch_atomic_inc(&stopped_cpus);
- while (!all_alternatives_applied)
- cpu_relax();
- isb();
- } else {
- DECLARE_BITMAP(remaining_capabilities, ARM64_NPATCHABLE);
-
- while (arch_atomic_read(&stopped_cpus) != num_online_cpus() - 1)
- cpu_relax();
-
- bitmap_complement(remaining_capabilities, boot_capabilities,
- ARM64_NPATCHABLE);
-
- BUG_ON(all_alternatives_applied);
- __apply_alternatives(®ion, false, remaining_capabilities);
- /* Barriers provided by the cache flushing */
- all_alternatives_applied = 1;
- }
+ bitmap_complement(remaining_capabilities, boot_capabilities,
+ ARM64_NPATCHABLE);
+ __apply_alternatives(®ion, false, remaining_capabilities);
return 0;
}
@@ -233,7 +207,7 @@ static int __apply_alternatives_multi_stop(void *unused)
void __init apply_alternatives_all(void)
{
/* better not try code patching on a live SMP system */
- stop_machine(__apply_alternatives_multi_stop, NULL, cpu_online_mask);
+ patch_machine(__apply_alternatives_stopped, NULL);
}
/*
diff --git a/arch/arm64/kernel/patching.c b/arch/arm64/kernel/patching.c
index c0d51340c913..04497dbf14e2 100644
--- a/arch/arm64/kernel/patching.c
+++ b/arch/arm64/kernel/patching.c
@@ -105,31 +105,88 @@ int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn)
return ret;
}
+struct patch_machine_info {
+ patch_machine_func_t func;
+ void *arg;
+ int cpu;
+ atomic_t active;
+ volatile int done;
+};
+
+/*
+ * Run a code patching function on a single CPU, ensuring that no CPUs are
+ * concurrently executing code being patched.
+ *
+ * We wait for other CPUs to become quiescent before starting patching, and
+ * wait until patching is completed before other CPUs are woken.
+ *
+ * The patching function is responsible for any barriers necessary to make new
+ * instructions visible to other CPUs. The other CPUs will issue an ISB upon
+ * being woken to ensure they use the new instructions.
+ */
+static int noinstr do_patch_machine(void *arg)
+{
+ struct patch_machine_info *pmi = arg;
+ int cpu = smp_processor_id();
+ int ret = 0;
+
+ if (pmi->cpu == cpu) {
+ while (arch_atomic_read(&pmi->active))
+ cpu_relax();
+ ret = pmi->func(pmi->arg);
+ pmi->done = 1;
+ } else {
+ arch_atomic_dec(&pmi->active);
+ while (!pmi->done)
+ cpu_relax();
+ isb();
+ }
+
+ return ret;
+}
+
+/*
+ * Run a code patching function on a single CPU, ensuring that no CPUs are
+ * concurrently executing code being patched.
+ */
+int patch_machine_cpuslocked(patch_machine_func_t func, void *arg)
+{
+ struct patch_machine_info pmi = {
+ .func = func,
+ .arg = arg,
+ .cpu = raw_smp_processor_id(),
+ .active = ATOMIC_INIT(num_online_cpus() - 1),
+ .done = 0,
+ };
+
+ return stop_machine_cpuslocked(do_patch_machine, &pmi, cpu_online_mask);
+}
+
+int patch_machine(patch_machine_func_t func, void *arg)
+{
+ int ret;
+
+ cpus_read_lock();
+ ret = patch_machine_cpuslocked(func, arg);
+ cpus_read_unlock();
+
+ return ret;
+}
+
struct aarch64_insn_patch {
void **text_addrs;
u32 *new_insns;
int insn_cnt;
- atomic_t cpu_count;
};
static int __kprobes aarch64_insn_patch_text_cb(void *arg)
{
int i, ret = 0;
struct aarch64_insn_patch *pp = arg;
- int num_cpus = num_online_cpus();
-
- /* The last CPU becomes master */
- if (arch_atomic_inc_return(&pp->cpu_count) == num_cpus) {
- for (i = 0; ret == 0 && i < pp->insn_cnt; i++)
- ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i],
- pp->new_insns[i]);
- /* Notify other processors with an additional increment. */
- atomic_inc(&pp->cpu_count);
- } else {
- while (arch_atomic_read(&pp->cpu_count) <= num_cpus)
- cpu_relax();
- isb();
- }
+
+ for (i = 0; ret == 0 && i < pp->insn_cnt; i++)
+ ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i],
+ pp->new_insns[i]);
return ret;
}
@@ -140,12 +197,10 @@ int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt)
.text_addrs = addrs,
.new_insns = insns,
.insn_cnt = cnt,
- .cpu_count = ATOMIC_INIT(0),
};
if (cnt <= 0)
return -EINVAL;
- return stop_machine_cpuslocked(aarch64_insn_patch_text_cb, &patch,
- cpu_online_mask);
+ return patch_machine_cpuslocked(aarch64_insn_patch_text_cb, &patch);
}
--
2.30.2
More information about the linux-arm-kernel
mailing list