[PATCH v4 15/13] firmware: arm_sdei: be more robust against cpu-hotplug

James Morse james.morse at arm.com
Wed Nov 8 08:06:24 PST 2017


dpm_suspend() calls the freeze/thaw callbacks for hibernate before
disable_non_bootcpus() takes down secondaries.

This leads to a fun race where the freeze/thaw callbacks reset the
SDEI interface (as we may be restoring a kernel with a different
layout due to KASLR), then the cpu-hotplug callbacks come in to
save the current state, which has already been reset.

I tried to solve this with a 'frozen' flag that stops the hotplug
callback from overwriting the saved values. Instead this just
moves the race around and makes it even harder to think about.

Instead, make it look like the secondaries have gone offline.
Call cpuhp_remove_state() in the freeze callback, this will call the
teardown hook on all online CPUs, then remove the state. This saves
all private events and makes future CPU up/down events invisible.

Change sdei_event_unregister_all()/sdei_reregister_events() to
only save/restore shared events, which are all that is left. With
this we can remove the frozen flag. We can remove the device
suspend/resume calls too as cpuhotplug's teardown call has masked
the CPUs.

All that is left is the reboot notifier, (which was abusing the
frozen flag). Call cpuhp_remove_state() to make it look like
secondary CPUs have gone offline.

Suggested-by: Will Deacon <will.deacon at arm.com>
Signed-off-by: James Morse <james.morse at arm.com>
---
 drivers/firmware/arm_sdei.c | 60 +++++++++++++++++++++++----------------------
 1 file changed, 31 insertions(+), 29 deletions(-)

diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
index 65a8f122f545..d50634a25954 100644
--- a/drivers/firmware/arm_sdei.c
+++ b/drivers/firmware/arm_sdei.c
@@ -86,9 +86,6 @@ struct sdei_event {
 static LIST_HEAD(sdei_events);
 static DEFINE_SPINLOCK(sdei_events_lock);
 
-/* When frozen, cpu-hotplug notifiers shouldn't unregister/re-register events */
-static bool frozen;
-
 static DEFINE_PER_CPU(u64, sdei_running_event) = -1;
 
 /* Private events are registered/enabled via IPI passing one of these */
@@ -625,15 +622,18 @@ EXPORT_SYMBOL(sdei_event_unregister);
 
 /*
  * unregister events, but don't destroy them as they are re-registered by
- * sdei_reregister_events().
+ * sdei_reregister_shared().
  */
-static int sdei_event_unregister_all(void)
+static int sdei_unregister_shared(void)
 {
 	int err = 0;
 	struct sdei_event *event;
 
 	spin_lock(&sdei_events_lock);
 	list_for_each_entry(event, &sdei_events, list) {
+		if (event->type != SDEI_EVENT_TYPE_SHARED)
+			continue;
+
 		err = _sdei_event_unregister(event);
 		if (err)
 			break;
@@ -841,13 +841,16 @@ static int sdei_reregister_event(struct sdei_event *event)
 	return err;
 }
 
-static int sdei_reregister_events(void)
+static int sdei_reregister_shared(void)
 {
 	int err = 0;
 	struct sdei_event *event;
 
 	spin_lock(&sdei_events_lock);
 	list_for_each_entry(event, &sdei_events, list) {
+		if (event->type != SDEI_EVENT_TYPE_SHARED)
+			continue;
+
 		err = sdei_reregister_event(event);
 		if (err)
 			break;
@@ -862,11 +865,6 @@ static int sdei_cpuhp_down(unsigned int cpu)
 	struct sdei_event *event;
 	struct sdei_crosscall_args arg;
 
-	if (frozen) {
-		/* All events unregistered  */
-		return sdei_mask_local_cpu();
-	}
-
 	/* un-register private events */
 	spin_lock(&sdei_events_lock);
 	list_for_each_entry(event, &sdei_events, list) {
@@ -890,11 +888,6 @@ static int sdei_cpuhp_up(unsigned int cpu)
 	struct sdei_event *event;
 	struct sdei_crosscall_args arg;
 
-	if (frozen) {
-		/* Events will be re-registered when we thaw. */
-		return sdei_unmask_local_cpu();
-	}
-
 	/* re-register/enable private events */
 	spin_lock(&sdei_events_lock);
 	list_for_each_entry(event, &sdei_events, list) {
@@ -1004,22 +997,33 @@ static int sdei_device_freeze(struct device *dev)
 {
 	int err;
 
-	frozen = true;
-	err = sdei_event_unregister_all();
+	/* save and unregister private events */
+	cpuhp_remove_state(CPUHP_AP_ARM_SDEI_STARTING);
+
+	err = sdei_unregister_shared();
 	if (err)
 		return err;
 
-	return sdei_device_suspend(dev);
+	return 0;
 }
 
 static int sdei_device_thaw(struct device *dev)
 {
 	int err;
 
-	sdei_device_resume(dev);
+	/* re-register shared events */
+	err = sdei_reregister_shared();
+	if (err) {
+		pr_warn("Failed to re-register shared events...\n");
+		sdei_mark_interface_broken();
+		return err;
+	}
+
+	err = cpuhp_setup_state(CPUHP_AP_ARM_SDEI_STARTING, "SDEI",
+				&sdei_cpuhp_up, &sdei_cpuhp_down);
+	if (err)
+		pr_warn("Failed to re-register CPU hotplug notifier...\n");
 
-	err = sdei_reregister_events();
-	frozen = false;
 	return err;
 }
 
@@ -1048,15 +1052,13 @@ static const struct dev_pm_ops sdei_pm_ops = {
 static int sdei_reboot_notifier(struct notifier_block *nb, unsigned long action,
 				void *data)
 {
-	on_each_cpu(&_ipi_mask_cpu, NULL, true);
-
-	sdei_platform_reset();
-
 	/*
-	 * There is now no point trying to unregister private events if we go on
-	 * to take CPUs offline.
+	 * We are going to reset the interface, after this there is no point
+	 * doing work when we take CPUs offline.
 	 */
-	frozen = true;
+	cpuhp_remove_state(CPUHP_AP_ARM_SDEI_STARTING);
+
+	sdei_platform_reset();
 
 	return NOTIFY_OK;
 }
-- 
2.15.0




More information about the linux-arm-kernel mailing list