|
// Yield |
|
#if defined(__cplusplus) |
|
#include <thread> |
|
static inline void mi_atomic_yield(void) { |
|
std::this_thread::yield(); |
|
} |
|
#elif defined(_WIN32) |
|
#define WIN32_LEAN_AND_MEAN |
|
#include <windows.h> |
|
static inline void mi_atomic_yield(void) { |
|
YieldProcessor(); |
|
} |
|
#elif defined(__SSE2__) |
|
#include <emmintrin.h> |
|
static inline void mi_atomic_yield(void) { |
|
_mm_pause(); |
|
} |
|
#elif (defined(__GNUC__) || defined(__clang__)) && \ |
|
(defined(__x86_64__) || defined(__i386__) || \ |
|
defined(__aarch64__) || defined(__arm__) || \ |
|
defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__POWERPC__)) |
|
#if defined(__x86_64__) || defined(__i386__) |
|
static inline void mi_atomic_yield(void) { |
|
__asm__ volatile ("pause" ::: "memory"); |
|
} |
|
#elif defined(__aarch64__) |
|
static inline void mi_atomic_yield(void) { |
|
__asm__ volatile("wfe"); |
|
} |
|
#elif defined(__arm__) |
|
#if __ARM_ARCH >= 7 |
|
static inline void mi_atomic_yield(void) { |
|
__asm__ volatile("yield" ::: "memory"); |
|
} |
|
#else |
|
static inline void mi_atomic_yield(void) { |
|
__asm__ volatile ("nop" ::: "memory"); |
|
} |
|
#endif |
|
#elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__POWERPC__) |
|
#ifdef __APPLE__ |
|
static inline void mi_atomic_yield(void) { |
|
__asm__ volatile ("or r27,r27,r27" ::: "memory"); |
|
} |
|
#else |
|
static inline void mi_atomic_yield(void) { |
|
__asm__ __volatile__ ("or 27,27,27" ::: "memory"); |
|
} |
|
#endif |
|
#endif |
|
#elif defined(__sun) |
|
// Fallback for other archs |
|
#include <synch.h> |
|
static inline void mi_atomic_yield(void) { |
|
smt_pause(); |
|
} |
|
#elif defined(__wasi__) |
|
#include <sched.h> |
|
static inline void mi_atomic_yield(void) { |
|
sched_yield(); |
|
} |
|
#else |
|
#include <unistd.h> |
|
static inline void mi_atomic_yield(void) { |
|
sleep(0); |
|
} |
|
#endif |
Proposal:
_Py_yield()currently relies onsched_yield()/SwitchToThread(), which are OS-level syscalls. We can replace these with lightweight CPU pause instructions (x86PAUSE, AArch64WFE, etc.) as CPython's bundled mimalloc already does inmi_atomic_yield():cpython/Include/internal/mimalloc/mimalloc/atomic.h
Lines 323 to 389 in e682141
Has this already been discussed elsewhere?
No response given
Links to previous discussion of this feature:
No response
Linked PRs
_Py_yield(AArch64 only) #149784