#include #include #include #include #include #include #include #include #undef BROKEN #define BROKEN 1 #if defined(__amd64__) #define membar_acquire() asm volatile("" ::: "memory") #define membar_release() asm volatile("" ::: "memory") #ifdef BROKEN /* not really broken because atomic_swap implies seq_cst */ #define membar_dekker() asm volatile("" ::: "memory") #else #define membar_dekker() asm volatile("mfence" ::: "memory") #endif #define noop() asm volatile("pause" ::: "memory") #elif defined(__aarch64__) #define membar_acquire() asm volatile("dmb ishld" ::: "memory") #define membar_release() asm volatile("dmb ish" ::: "memory") #ifdef BROKEN #define membar_dekker() asm volatile("dmb ishld" ::: "memory") #else #define membar_dekker() asm volatile("dmb ish" ::: "memory") #endif #define noop() asm volatile("yield" ::: "memory") #endif volatile struct { unsigned v; uint8_t pad[128 - sizeof(unsigned)]; } waiting[2] __aligned((128)); volatile unsigned turn; volatile unsigned counter; static void lock(unsigned me) { top: atomic_swap_uint(&waiting[me].v, 1); membar_dekker(); while (waiting[1 - me].v) { if (turn != me) { waiting[me].v = 0; while (turn != me) continue; goto top; } } membar_acquire(); } static void unlock(unsigned me) { membar_release(); turn = 1 - me; waiting[me].v = 0; } struct t { pthread_t thread; unsigned i; const char *cpu; }; static void * thread(void *cookie) { const struct t *t = cookie; unsigned i; if (t->cpu) { cpuset_t *cset; int error; printf("bind thread %u to cpu %s\n", t->i, t->cpu); if ((cset = cpuset_create()) == NULL) err(1, "cpuset_create"); cpuset_set(atoi(t->cpu), cset); error = pthread_setaffinity_np(pthread_self(), cpuset_size(cset), cset); if (error) { errc(1, error, "pthread_setaffinity_np: %s", t->cpu); } cpuset_destroy(cset); } for (i = 10000000; i --> 0;) { lock(t->i); counter++; noop(); counter++; unlock(t->i); } return NULL; } int main(int argc, char **argv) { struct t t[2] = {0}; int i; int error; argc--; argv++; for (i = 0; i < 2; i++) { t[i].i = i; if (i < argc) t[i].cpu = argv[i]; } for (i = 0; i < 2; i++) { error = pthread_create(&t[i].thread, NULL, &thread, &t[i]); if (error) errc(1, error, "pthread_create"); } for (i = 0; i < 2; i++) { error = pthread_join(t[i].thread, NULL); if (error) errc(1, error, "pthread_join"); } printf("%u\n", counter); fflush(stdout); return ferror(stdout); }