From 5cf7a1c46e2dc956ebabc2f105df8ef33982de80 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Wed, 30 Mar 2022 20:33:03 +0000 Subject: [PATCH 03/49] x86: Every load is a load-acquire, so membar_consumer is a noop. lfence is only needed for MD logic, such as operations on I/O memory rather than normal cacheable memory, or special instructions like RDTSC -- never for MI synchronization between threads/CPUs. No need for hot-patching to do lfence here. (The x86_lfence function might reasonably be patched on i386 to do lfence for MD logic, but it isn't now and this doesn't change that.) --- common/lib/libc/arch/i386/atomic/atomic.S | 17 ++++---------- common/lib/libc/arch/x86_64/atomic/atomic.S | 17 ++++---------- sys/arch/amd64/include/frameasm.h | 3 +-- sys/arch/i386/include/frameasm.h | 9 ++++--- sys/arch/x86/x86/patch.c | 26 +++++++-------------- 5 files changed, 23 insertions(+), 49 deletions(-) diff --git a/common/lib/libc/arch/i386/atomic/atomic.S b/common/lib/libc/arch/i386/atomic/atomic.S index d1fcef92a4fe..8c494ca5f587 100644 --- a/common/lib/libc/arch/i386/atomic/atomic.S +++ b/common/lib/libc/arch/i386/atomic/atomic.S @@ -46,12 +46,10 @@ #include "opt_xen.h" #include #define LOCK HOTPATCH(HP_NAME_NOLOCK, 1); lock -#define HOTPATCH_SSE2_LFENCE HOTPATCH(HP_NAME_SSE2_LFENCE, 7); #define HOTPATCH_SSE2_MFENCE HOTPATCH(HP_NAME_SSE2_MFENCE, 7); #define HOTPATCH_CAS_64 HOTPATCH(HP_NAME_CAS_64, 49); #else #define LOCK lock -#define HOTPATCH_SSE2_LFENCE /* nothing */ #define HOTPATCH_SSE2_MFENCE /* nothing */ #define HOTPATCH_CAS_64 /* nothing */ #endif @@ -181,10 +179,11 @@ ENTRY(_atomic_cas_32_ni) END(_atomic_cas_32_ni) ENTRY(_membar_consumer) - HOTPATCH_SSE2_LFENCE - /* 7 bytes of instructions */ - LOCK - addl $0, -4(%esp) + /* + * Every load from normal memory is a load-acquire on x86, so + * there is never any need for explicit barriers to order + * load-before-anything. + */ ret END(_membar_consumer) @@ -396,12 +395,6 @@ STRONG_ALIAS(_membar_exit,_membar_producer) #ifdef _HARDKERNEL .section .rodata -LABEL(sse2_lfence) - lfence - ret - nop; nop; nop; -LABEL(sse2_lfence_end) - LABEL(sse2_mfence) mfence ret diff --git a/common/lib/libc/arch/x86_64/atomic/atomic.S b/common/lib/libc/arch/x86_64/atomic/atomic.S index 0206a746a8f2..a483aa98de1d 100644 --- a/common/lib/libc/arch/x86_64/atomic/atomic.S +++ b/common/lib/libc/arch/x86_64/atomic/atomic.S @@ -41,11 +41,9 @@ #ifdef _HARDKERNEL #include #define LOCK HOTPATCH(HP_NAME_NOLOCK, 1); lock -#define HOTPATCH_SSE2_LFENCE HOTPATCH(HP_NAME_SSE2_LFENCE, 8); #define HOTPATCH_SSE2_MFENCE HOTPATCH(HP_NAME_SSE2_MFENCE, 8); #else #define LOCK lock -#define HOTPATCH_SSE2_LFENCE /* nothing */ #define HOTPATCH_SSE2_MFENCE /* nothing */ #endif @@ -256,10 +254,11 @@ END(_atomic_cas_64_ni) /* memory barriers */ ENTRY(_membar_consumer) - HOTPATCH_SSE2_LFENCE - /* 8 bytes of instructions */ - LOCK - addq $0, -8(%rsp) + /* + * Every load from normal memory is a load-acquire on x86, so + * there is never any need for explicit barriers to order + * load-before-anything. + */ ret END(_membar_consumer) @@ -419,12 +418,6 @@ STRONG_ALIAS(_membar_exit,_membar_producer) #ifdef _HARDKERNEL .section .rodata -LABEL(sse2_lfence) - lfence - ret - nop; nop; nop; nop; -LABEL(sse2_lfence_end) - LABEL(sse2_mfence) mfence ret diff --git a/sys/arch/amd64/include/frameasm.h b/sys/arch/amd64/include/frameasm.h index bbd30dd78e57..e82077dd8e03 100644 --- a/sys/arch/amd64/include/frameasm.h +++ b/sys/arch/amd64/include/frameasm.h @@ -63,8 +63,7 @@ #define HP_NAME_SVS_ENTER_NMI 11 #define HP_NAME_SVS_LEAVE_NMI 12 #define HP_NAME_MDS_LEAVE 13 -#define HP_NAME_SSE2_LFENCE 14 -#define HP_NAME_SSE2_MFENCE 15 +#define HP_NAME_SSE2_MFENCE 14 #define HOTPATCH(name, size) \ 123: ; \ diff --git a/sys/arch/i386/include/frameasm.h b/sys/arch/i386/include/frameasm.h index f24d05b164d8..3467fa521046 100644 --- a/sys/arch/i386/include/frameasm.h +++ b/sys/arch/i386/include/frameasm.h @@ -48,11 +48,10 @@ #define HP_NAME_STAC 2 #define HP_NAME_NOLOCK 3 #define HP_NAME_RETFENCE 4 -#define HP_NAME_SSE2_LFENCE 5 -#define HP_NAME_SSE2_MFENCE 6 -#define HP_NAME_CAS_64 7 -#define HP_NAME_SPLLOWER 8 -#define HP_NAME_MUTEX_EXIT 9 +#define HP_NAME_SSE2_MFENCE 5 +#define HP_NAME_CAS_64 6 +#define HP_NAME_SPLLOWER 7 +#define HP_NAME_MUTEX_EXIT 8 #define HOTPATCH(name, size) \ 123: ; \ diff --git a/sys/arch/x86/x86/patch.c b/sys/arch/x86/x86/patch.c index 4b91b67dc668..69efb230b05c 100644 --- a/sys/arch/x86/x86/patch.c +++ b/sys/arch/x86/x86/patch.c @@ -117,19 +117,6 @@ static const struct x86_hotpatch_descriptor hp_nolock_desc = { }; __link_set_add_rodata(x86_hotpatch_descriptors, hp_nolock_desc); -/* Use LFENCE if available, part of SSE2. */ -extern uint8_t sse2_lfence, sse2_lfence_end; -static const struct x86_hotpatch_source hp_sse2_lfence_source = { - .saddr = &sse2_lfence, - .eaddr = &sse2_lfence_end -}; -static const struct x86_hotpatch_descriptor hp_sse2_lfence_desc = { - .name = HP_NAME_SSE2_LFENCE, - .nsrc = 1, - .srcs = { &hp_sse2_lfence_source } -}; -__link_set_add_rodata(x86_hotpatch_descriptors, hp_sse2_lfence_desc); - /* Use MFENCE if available, part of SSE2. */ extern uint8_t sse2_mfence, sse2_mfence_end; static const struct x86_hotpatch_source hp_sse2_mfence_source = { @@ -342,12 +329,15 @@ x86_patch(bool early) if (!early && (cpu_feature[0] & CPUID_SSE2) != 0) { /* - * Faster memory barriers. We do not need to patch - * membar_producer to use SFENCE because on x86 - * ordinary non-temporal stores are always issued in - * program order to main memory and to other CPUs. + * Faster memory barriers. The only barrier x86 ever + * requires for MI synchronization between CPUs is + * MFENCE for store-before-load ordering; all other + * ordering is guaranteed already -- every load is a + * load-acquire and every store is a store-release. + * + * LFENCE and SFENCE are relevant only for MD logic + * involving I/O devices or non-temporal stores. */ - x86_hotpatch(HP_NAME_SSE2_LFENCE, 0); x86_hotpatch(HP_NAME_SSE2_MFENCE, 0); }