[PATCH 1/2] lib: sbi: Simplify wait_for_coldboot() implementation

Anup Patel apatel at ventanamicro.com
Tue Mar 19 07:59:29 PDT 2024


On QEMU virt machine with large number of HARTs, some of the HARTs
randomly fail to come out of wait_for_coldboot() due to one of the
following race-conditions:

1) Failing HARTs are not able to acquire the coldboot_lock and
   update the coldboot_hartmask in wait_for_coldboot() before
   the coldboot HART acquires the coldboot_lock and sends IPI
   in wake_coldboot_harts() hence the failing HARTs never
   receive IPI from the coldboot HART.

2) Failing HARTs acquire the coldbood_lock and update the
   coldboot_hartmask before coldboot HART does sbi_scratch_init()
   so the sbi_hartmask_set_hartid() does not update the
   coldboot_hartmask on the failing HARTs hence they never
   receive IPI from the coldboot HART.

To address this, use a simple busy-loop in wait_for_coldboot() for
polling on coldboot_done flag.

Signed-off-by: Anup Patel <apatel at ventanamicro.com>
---
 include/sbi/riscv_barrier.h |  6 +++-
 lib/sbi/sbi_init.c          | 70 ++-----------------------------------
 2 files changed, 8 insertions(+), 68 deletions(-)

diff --git a/include/sbi/riscv_barrier.h b/include/sbi/riscv_barrier.h
index 1fba8b8..3d4a038 100644
--- a/include/sbi/riscv_barrier.h
+++ b/include/sbi/riscv_barrier.h
@@ -40,7 +40,11 @@
 #define smp_wmb()		RISCV_FENCE(w,w)
 
 /* CPU relax for busy loop */
-#define cpu_relax()		asm volatile ("" : : : "memory")
+#define cpu_relax()						\
+do {								\
+	unsigned long __t;					\
+	__asm__ __volatile__ ("div %0, %0, zero" : "=r" (__t));	\
+} while (0)
 
 /* clang-format on */
 
diff --git a/lib/sbi/sbi_init.c b/lib/sbi/sbi_init.c
index 796cccc..b3f3e38 100644
--- a/lib/sbi/sbi_init.c
+++ b/lib/sbi/sbi_init.c
@@ -10,7 +10,6 @@
 #include <sbi/riscv_asm.h>
 #include <sbi/riscv_atomic.h>
 #include <sbi/riscv_barrier.h>
-#include <sbi/riscv_locks.h>
 #include <sbi/sbi_console.h>
 #include <sbi/sbi_cppc.h>
 #include <sbi/sbi_domain.h>
@@ -190,82 +189,19 @@ static void sbi_boot_print_hart(struct sbi_scratch *scratch, u32 hartid)
 	sbi_hart_delegation_dump(scratch, "Boot HART ", "         ");
 }
 
-static spinlock_t coldboot_lock = SPIN_LOCK_INITIALIZER;
-static struct sbi_hartmask coldboot_wait_hmask = { 0 };
-
 static unsigned long coldboot_done;
 
 static void wait_for_coldboot(struct sbi_scratch *scratch, u32 hartid)
 {
-	unsigned long saved_mie, cmip;
-
-	if (__smp_load_acquire(&coldboot_done))
-		return;
-
-	/* Save MIE CSR */
-	saved_mie = csr_read(CSR_MIE);
-
-	/* Set MSIE and MEIE bits to receive IPI */
-	csr_set(CSR_MIE, MIP_MSIP | MIP_MEIP);
-
-	/* Acquire coldboot lock */
-	spin_lock(&coldboot_lock);
-
-	/* Mark current HART as waiting */
-	sbi_hartmask_set_hartid(hartid, &coldboot_wait_hmask);
-
-	/* Release coldboot lock */
-	spin_unlock(&coldboot_lock);
-
-	/* Wait for coldboot to finish using WFI */
-	while (!__smp_load_acquire(&coldboot_done)) {
-		do {
-			wfi();
-			cmip = csr_read(CSR_MIP);
-		 } while (!(cmip & (MIP_MSIP | MIP_MEIP)));
-	}
-
-	/* Acquire coldboot lock */
-	spin_lock(&coldboot_lock);
-
-	/* Unmark current HART as waiting */
-	sbi_hartmask_clear_hartid(hartid, &coldboot_wait_hmask);
-
-	/* Release coldboot lock */
-	spin_unlock(&coldboot_lock);
-
-	/* Restore MIE CSR */
-	csr_write(CSR_MIE, saved_mie);
-
-	/*
-	 * The wait for coldboot is common for both warm startup and
-	 * warm resume path so clearing IPI here would result in losing
-	 * an IPI in warm resume path.
-	 *
-	 * Also, the sbi_platform_ipi_init() called from sbi_ipi_init()
-	 * will automatically clear IPI for current HART.
-	 */
+	/* Wait for coldboot to finish */
+	while (!__smp_load_acquire(&coldboot_done))
+		cpu_relax();
 }
 
 static void wake_coldboot_harts(struct sbi_scratch *scratch, u32 hartid)
 {
-	u32 i, hartindex = sbi_hartid_to_hartindex(hartid);
-
 	/* Mark coldboot done */
 	__smp_store_release(&coldboot_done, 1);
-
-	/* Acquire coldboot lock */
-	spin_lock(&coldboot_lock);
-
-	/* Send an IPI to all HARTs waiting for coldboot */
-	sbi_hartmask_for_each_hartindex(i, &coldboot_wait_hmask) {
-		if (i == hartindex)
-			continue;
-		sbi_ipi_raw_send(i);
-	}
-
-	/* Release coldboot lock */
-	spin_unlock(&coldboot_lock);
 }
 
 static unsigned long entry_count_offset;
-- 
2.34.1




More information about the opensbi mailing list