ARM64 board Hikey960 boot failure due to f2545b2d4ce1 (jump_label: Reorder hotplug lock and jump_label_lock)

Marc Zyngier marc.zyngier at arm.com
Wed Jul 26 08:13:49 PDT 2017


[+Mark]

Hi Leo,

On 24/07/17 15:34, Leo Yan wrote:
> Hi all,
> 
> We found the mainline arm64 kernel boot failure on Hikey960 board,
> this is caused by patch f2545b2d4ce1 (jump_label: Reorder hotplug lock
> and jump_label_lock), this patch adds locking cpus_read_lock() in
> function static_key_slow_inc() and introduce the dead lock issue by
> acquiring lock twice. Below are detailed flow:
> 
> arch_timer_register()
>  `> cpuhp_setup_state()
>      `> __cpuhp_setup_state()
>         cpus_read_lock()
>          `> __cpuhp_setup_state_cpuslocked()
>              `> cpuhp_issue_call()
>                  `> arch_timer_starting_cpu()
>                      `> __arch_timer_setup()
>                          `> arch_timer_check_ool_workaround()
>                              `> arch_timer_enable_workaround()
>                                  `> static_branch_enable()
>                                      `> static_key_enable()
>                                          `> static_key_slow_inc()
>                                              `> cpus_read_lock()
> 
> So finally there have called cpus_read_lock() twice, and kernel report
> log as below. So I am not sure what's the best way to fix this issue,
> could you give some suggestion for this? Thanks.

[...]

Thanks for this. Unfortunately, there is no easy fix for this.
Can you give the patch below a go and let us know if that solves
the issue you observed? I only tested in on a model...

Should this be considered an acceptable solution, I'll split that
into individual patches and repost it as a proper series.

Thanks,

	M.

diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index aae87c4c546e..44232f378543 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -455,7 +455,11 @@ void arch_timer_enable_workaround(const struct arch_timer_erratum_workaround *wa
 			per_cpu(timer_unstable_counter_workaround, i) = wa;
 	}
 
-	static_branch_enable(&arch_timer_read_ool_enabled);
+	/*
+	 * Use the _locked version, as we're called from the CPU
+	 * hotplug framework. Otherwise, we end-up in deadlock-land.
+	 */
+	static_branch_enable_locked(&arch_timer_read_ool_enabled);
 
 	/*
 	 * Don't use the vdso fastpath if errata require using the
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 2afd74b9d844..5cfbf9ff3fe8 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -159,10 +159,14 @@ extern void arch_jump_label_transform_static(struct jump_entry *entry,
 extern int jump_label_text_reserved(void *start, void *end);
 extern void static_key_slow_inc(struct static_key *key);
 extern void static_key_slow_dec(struct static_key *key);
+extern void static_key_slow_inc_locked(struct static_key *key);
+extern void static_key_slow_dec_locked(struct static_key *key);
 extern void jump_label_apply_nops(struct module *mod);
 extern int static_key_count(struct static_key *key);
 extern void static_key_enable(struct static_key *key);
 extern void static_key_disable(struct static_key *key);
+extern void static_key_enable_locked(struct static_key *key);
+extern void static_key_disable_locked(struct static_key *key);
 
 /*
  * We should be using ATOMIC_INIT() for initializing .enabled, but
@@ -213,12 +217,16 @@ static inline void static_key_slow_inc(struct static_key *key)
 	atomic_inc(&key->enabled);
 }
 
+#define static_key_slow_inc_locked(k)	static_key_slow_inc((k))
+
 static inline void static_key_slow_dec(struct static_key *key)
 {
 	STATIC_KEY_CHECK_USE();
 	atomic_dec(&key->enabled);
 }
 
+#define static_key_slow_dec_locked(k)	static_key_slow_inc((k))
+
 static inline int jump_label_text_reserved(void *start, void *end)
 {
 	return 0;
@@ -415,6 +423,8 @@ extern bool ____wrong_branch_error(void);
 
 #define static_branch_enable(x)		static_key_enable(&(x)->key)
 #define static_branch_disable(x)	static_key_disable(&(x)->key)
+#define static_branch_enable_locked(x)	static_key_enable_locked(&(x)->key)
+#define static_branch_disable_locked(x)	static_key_disable_locked(&(x)->key)
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index d11c506a6ac3..f543f765a738 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -57,6 +57,11 @@ jump_label_sort_entries(struct jump_entry *start, struct jump_entry *stop)
 }
 
 static void jump_label_update(struct static_key *key);
+static void static_key_slow_inc_with_lock(struct static_key *key, bool lock);
+static void __static_key_slow_dec_with_lock(struct static_key *key,
+					    bool lock,
+					    unsigned long rate_limit,
+					    struct delayed_work *work);
 
 /*
  * There are similar definitions for the !HAVE_JUMP_LABEL case in jump_label.h.
@@ -79,29 +84,53 @@ int static_key_count(struct static_key *key)
 }
 EXPORT_SYMBOL_GPL(static_key_count);
 
-void static_key_enable(struct static_key *key)
+static void static_key_enable_with_lock(struct static_key *key, bool lock)
 {
 	int count = static_key_count(key);
 
 	WARN_ON_ONCE(count < 0 || count > 1);
 
 	if (!count)
-		static_key_slow_inc(key);
+		static_key_slow_inc_with_lock(key, lock);
+}
+
+void static_key_enable(struct static_key *key)
+{
+	static_key_enable_with_lock(key, true);
 }
 EXPORT_SYMBOL_GPL(static_key_enable);
 
-void static_key_disable(struct static_key *key)
+void static_key_enable_locked(struct static_key *key)
+{
+	lockdep_assert_cpus_held();
+	static_key_enable_with_lock(key, false);
+}
+EXPORT_SYMBOL_GPL(static_key_enable_locked);
+
+static void static_key_disable_with_lock(struct static_key *key, bool lock)
 {
 	int count = static_key_count(key);
 
 	WARN_ON_ONCE(count < 0 || count > 1);
 
 	if (count)
-		static_key_slow_dec(key);
+		__static_key_slow_dec_with_lock(key, lock, 0, NULL);
+}
+
+void static_key_disable(struct static_key *key)
+{
+	static_key_disable_with_lock(key, true);
 }
 EXPORT_SYMBOL_GPL(static_key_disable);
 
-void static_key_slow_inc(struct static_key *key)
+void static_key_disable_locked(struct static_key *key)
+{
+	lockdep_assert_cpus_held();
+	static_key_disable_with_lock(key, false);
+}
+EXPORT_SYMBOL_GPL(static_key_disable_locked);
+
+static void static_key_slow_inc_with_lock(struct static_key *key, bool lock)
 {
 	int v, v1;
 
@@ -125,7 +154,8 @@ void static_key_slow_inc(struct static_key *key)
 			return;
 	}
 
-	cpus_read_lock();
+	if (lock)
+		cpus_read_lock();
 	jump_label_lock();
 	if (atomic_read(&key->enabled) == 0) {
 		atomic_set(&key->enabled, -1);
@@ -135,14 +165,30 @@ void static_key_slow_inc(struct static_key *key)
 		atomic_inc(&key->enabled);
 	}
 	jump_label_unlock();
-	cpus_read_unlock();
+	if (lock)
+		cpus_read_unlock();
+}
+
+void static_key_slow_inc(struct static_key *key)
+{
+	static_key_slow_inc_with_lock(key, true);
 }
 EXPORT_SYMBOL_GPL(static_key_slow_inc);
 
-static void __static_key_slow_dec(struct static_key *key,
-		unsigned long rate_limit, struct delayed_work *work)
+void static_key_slow_inc_locked(struct static_key *key)
 {
-	cpus_read_lock();
+	lockdep_assert_cpus_held();
+	static_key_slow_inc_with_lock(key, false);
+}
+EXPORT_SYMBOL_GPL(static_key_slow_inc_locked);
+
+static void __static_key_slow_dec_with_lock(struct static_key *key,
+					    bool lock,
+					    unsigned long rate_limit,
+					    struct delayed_work *work)
+{
+	if (lock)
+		cpus_read_lock();
 	/*
 	 * The negative count check is valid even when a negative
 	 * key->enabled is in use by static_key_slow_inc(); a
@@ -153,7 +199,8 @@ static void __static_key_slow_dec(struct static_key *key,
 	if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) {
 		WARN(atomic_read(&key->enabled) < 0,
 		     "jump label: negative count!\n");
-		cpus_read_unlock();
+		if (lock)
+			cpus_read_unlock();
 		return;
 	}
 
@@ -164,27 +211,36 @@ static void __static_key_slow_dec(struct static_key *key,
 		jump_label_update(key);
 	}
 	jump_label_unlock();
-	cpus_read_unlock();
+	if (lock)
+		cpus_read_unlock();
 }
 
 static void jump_label_update_timeout(struct work_struct *work)
 {
 	struct static_key_deferred *key =
 		container_of(work, struct static_key_deferred, work.work);
-	__static_key_slow_dec(&key->key, 0, NULL);
+	__static_key_slow_dec_with_lock(&key->key, true, 0, NULL);
 }
 
 void static_key_slow_dec(struct static_key *key)
 {
 	STATIC_KEY_CHECK_USE();
-	__static_key_slow_dec(key, 0, NULL);
+	__static_key_slow_dec_with_lock(key, true, 0, NULL);
 }
 EXPORT_SYMBOL_GPL(static_key_slow_dec);
 
+void static_key_slow_dec_locked(struct static_key *key)
+{
+	lockdep_assert_cpus_held();
+	STATIC_KEY_CHECK_USE();
+	__static_key_slow_dec_with_lock(key, false, 0, NULL);
+}
+EXPORT_SYMBOL_GPL(static_key_slow_dec_locked);
+
 void static_key_slow_dec_deferred(struct static_key_deferred *key)
 {
 	STATIC_KEY_CHECK_USE();
-	__static_key_slow_dec(&key->key, key->timeout, &key->work);
+	__static_key_slow_dec_with_lock(&key->key, true, key->timeout, &key->work);
 }
 EXPORT_SYMBOL_GPL(static_key_slow_dec_deferred);
 

-- 
Jazz is not dead. It just smells funny...



More information about the linux-arm-kernel mailing list