From d975b639df8559416b58dba9b8cb0a0b454605fa Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Sat, 12 Feb 2022 03:19:54 +0000 Subject: [PATCH] mips: Eradicate last vestiges of bonkers mb_* barriers. Leave an essay with citations about why we have an apparently pointless syncw _after_ releasing a lock, just for cnmips. --- common/lib/libc/arch/mips/atomic/membar_ops.S | 10 --- sys/arch/mips/include/lock.h | 88 ++++++++----------- 2 files changed, 36 insertions(+), 62 deletions(-) diff --git a/common/lib/libc/arch/mips/atomic/membar_ops.S b/common/lib/libc/arch/mips/atomic/membar_ops.S index 03fcb5a32e0b..ecdc8ecbdf6b 100644 --- a/common/lib/libc/arch/mips/atomic/membar_ops.S +++ b/common/lib/libc/arch/mips/atomic/membar_ops.S @@ -46,16 +46,6 @@ LEAF(_membar_producer) END(_membar_producer) #endif -#ifdef _KERNEL -STRONG_ALIAS(mb_read, _membar_sync) -#ifdef __OCTEON__ -STRONG_ALIAS(mb_write, _membar_producer) -#else -STRONG_ALIAS(mb_write, _membar_sync) -#endif -STRONG_ALIAS(mb_memory, _membar_sync) -#endif - ATOMIC_OP_ALIAS(membar_sync,_membar_sync) ATOMIC_OP_ALIAS(membar_enter,_membar_sync) STRONG_ALIAS(_membar_enter,_membar_sync) diff --git a/sys/arch/mips/include/lock.h b/sys/arch/mips/include/lock.h index 4317bd1af8e8..1ee1bf33977c 100644 --- a/sys/arch/mips/include/lock.h +++ b/sys/arch/mips/include/lock.h @@ -41,6 +41,8 @@ #include +#include + static __inline int __SIMPLELOCK_LOCKED_P(const __cpu_simple_lock_t *__ptr) { @@ -98,58 +100,11 @@ __cpu_simple_lock_try(__cpu_simple_lock_t *lp) return (v0 != 0); } -#ifdef MIPS1 -static __inline void -mb_read(void) -{ - __insn_barrier(); -} - -static __inline void -mb_write(void) -{ - __insn_barrier(); -} - -static __inline void -mb_memory(void) -{ - __insn_barrier(); -} -#else /* MIPS1*/ -static __inline void -mb_read(void) -{ - __asm volatile( - " .set push \n" - " .set mips2 \n" - " sync \n" - " .set pop" - ::: "memory" - ); -} - -static __inline void -mb_write(void) -{ - mb_read(); -} - -static __inline void -mb_memory(void) -{ - mb_read(); -} -#endif /* MIPS1 */ - #else /* !_HARDKERNEL */ u_int _atomic_cas_uint(volatile u_int *, u_int, u_int); u_long _atomic_cas_ulong(volatile u_long *, u_long, u_long); void * _atomic_cas_ptr(volatile void *, void *, void *); -void mb_read(void); -void mb_write(void); -void mb_memory(void); static __inline int __cpu_simple_lock_try(__cpu_simple_lock_t *lp) @@ -167,7 +122,6 @@ __cpu_simple_lock_init(__cpu_simple_lock_t *lp) { *lp = __SIMPLELOCK_UNLOCKED; - mb_memory(); } static __inline void @@ -184,12 +138,42 @@ static __inline void __cpu_simple_unlock(__cpu_simple_lock_t *lp) { -#ifndef _MIPS_ARCH_OCTEONP - mb_memory(); -#endif + membar_exit(); *lp = __SIMPLELOCK_UNLOCKED; #ifdef _MIPS_ARCH_OCTEONP - mb_write(); + /* + * On Cavium's recommendation, we issue an extra SYNCW that is + * not necessary for correct ordering because apparently stores + * can get stuck indefinitely in Octeon store buffers, + * according to the following note: + * + * Programming Notes: + * [...] + * Core A (writer) + * SW R1, DATA + * LI R2, 1 + * SYNCW + * SW R2, FLAG + * SYNCW + * [...] + * + * The second SYNCW instruction executed by core A is not + * necessary for correctness, but has very important + * performance effects on OCTEON. Without it, the store + * to FLAG may linger in core A's write buffer before it + * becomes visible to other cores. (If core A is not + * performing many stores, this may add hundreds of + * thousands of cycles to the flag release time since the + * OCTEON core normally retains stores to attempt to merge + * them before sending the store on the CMB.) + * Applications should include this second SYNCW + * instruction after flag or lock releases. + * + * Cavium Networks OCTEON Plus CN50XX Hardware Reference + * Manual, July 2008, Appendix A, p. 943. + * https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/hactive/CN50XX-HRM-V0.99E.pdf + */ + __asm volatile("syncw"); #endif }