From 4140f3d1cb2ae2b7afc4a1c8188a56276193d781 Mon Sep 17 00:00:00 2001
From: Taylor R Campbell <riastradh@NetBSD.org>
Date: Sat, 13 Nov 2021 13:02:04 +0000
Subject: [PATCH] arm: Fix CPU startup synchronization.

- Use load-acquire instead of (wrong) membar_consumer then load in
  cpu_boot_secondary_processors and cpu_hatched_p.

  => (Could use load then membar_consumer instead but load-acquire is
     shorter.)

- Issue membar_exit before setting or clearing the bit in
  cpu_set_hatched and cpu_clr_mbox.

This way, everything before cpu_set_hatched or cpu_clr_mbox is
guaranteed to happen before everything after
cpu_boot_secondary_processors, which was previously not guaranteed.
---
 sys/arch/arm/arm/cpu_subr.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/sys/arch/arm/arm/cpu_subr.c b/sys/arch/arm/arm/cpu_subr.c
index fcbf500fcbd6..1fca8c3f9d42 100644
--- a/sys/arch/arm/arm/cpu_subr.c
+++ b/sys/arch/arm/arm/cpu_subr.c
@@ -95,7 +95,7 @@ cpu_boot_secondary_processors(void)
 		const size_t off = cpuno / CPUINDEX_DIVISOR;
 		const u_long bit = __BIT(cpuno % CPUINDEX_DIVISOR);
 
-		while (membar_consumer(), arm_cpu_mbox[off] & bit) {
+		while (atomic_load_acquire(&arm_cpu_mbox[off]) & bit) {
 			__asm __volatile ("wfe");
 		}
 		/* Add processor to kcpuset */
@@ -111,8 +111,7 @@ cpu_hatched_p(u_int cpuindex)
 	const u_int off = cpuindex / CPUINDEX_DIVISOR;
 	const u_int bit = cpuindex % CPUINDEX_DIVISOR;
 
-	membar_consumer();
-	return (arm_cpu_hatched[off] & __BIT(bit)) != 0;
+	return (atomic_load_acquire(&arm_cpu_hatched[off]) & __BIT(bit)) != 0;
 }
 
 void
@@ -122,6 +121,7 @@ cpu_set_hatched(int cpuindex)
 	const size_t off = cpuindex / CPUINDEX_DIVISOR;
 	const u_long bit = __BIT(cpuindex % CPUINDEX_DIVISOR);
 
+	membar_exit();		/* store-release */
 	atomic_or_ulong(&arm_cpu_hatched[off], bit);
 	dsb(ishst);
 	sev();
@@ -135,6 +135,7 @@ cpu_clr_mbox(int cpuindex)
 	const u_long bit = __BIT(cpuindex % CPUINDEX_DIVISOR);
 
 	/* Notify cpu_boot_secondary_processors that we're done */
+	membar_exit();		/* store-release */
 	atomic_and_ulong(&arm_cpu_mbox[off], ~bit);
 	dsb(ishst);
 	sev();