diff options
author | Thiago Macieira <[email protected]> | 2023-07-06 10:57:35 -0700 |
---|---|---|
committer | Thiago Macieira <[email protected]> | 2023-07-25 07:21:56 -0700 |
commit | a7f227f56cfe562280e89d3c73040f7e8384129e (patch) | |
tree | 70fd65f8f5b0335fd8843d6eeebd0cbcef8f167a | |
parent | aaa8c3835303e6eb3579df300e06ea6696ca769f (diff) |
Make qYieldCpu() public API
Rewritten to be a bit simpler, added a few more yield/YieldProcessor
alternatives, added RISC-V support.
[ChangeLog][QtCore] Added qYieldCpu() function.
Fixes: QTBUG-103014
Change-Id: I53335f845a1345299031fffd176f59032e7400f5
Reviewed-by: Allan Sandfeld Jensen <[email protected]>
-rw-r--r-- | src/concurrent/qtconcurrentthreadengine.cpp | 2 | ||||
-rw-r--r-- | src/corelib/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/corelib/global/qlogging.cpp | 1 | ||||
-rw-r--r-- | src/corelib/global/qsimd_p.h | 43 | ||||
-rw-r--r-- | src/corelib/thread/qatomic_cxx11.h | 1 | ||||
-rw-r--r-- | src/corelib/thread/qfutureinterface.cpp | 1 | ||||
-rw-r--r-- | src/corelib/thread/qyieldcpu.h | 66 | ||||
-rw-r--r-- | src/corelib/thread/qyieldcpu.qdoc | 59 | ||||
-rw-r--r-- | tests/auto/corelib/global/qglobal/qglobal.c | 7 |
9 files changed, 134 insertions, 47 deletions
diff --git a/src/concurrent/qtconcurrentthreadengine.cpp b/src/concurrent/qtconcurrentthreadengine.cpp index 03e018b0a29..ce02d0c2c95 100644 --- a/src/concurrent/qtconcurrentthreadengine.cpp +++ b/src/concurrent/qtconcurrentthreadengine.cpp @@ -3,8 +3,6 @@ #include "qtconcurrentthreadengine.h" -#include <QtCore/private/qsimd_p.h> - #if !defined(QT_NO_CONCURRENT) || defined(Q_QDOC) QT_BEGIN_NAMESPACE diff --git a/src/corelib/CMakeLists.txt b/src/corelib/CMakeLists.txt index a7798a18f77..3333bc1a629 100644 --- a/src/corelib/CMakeLists.txt +++ b/src/corelib/CMakeLists.txt @@ -251,6 +251,7 @@ qt_internal_add_module(Core thread/qthreadstorage.h thread/qtsan_impl.h thread/qwaitcondition.h thread/qwaitcondition_p.h + thread/qyieldcpu.h time/qcalendar.cpp time/qcalendar.h time/qcalendarbackend_p.h time/qcalendarmath_p.h diff --git a/src/corelib/global/qlogging.cpp b/src/corelib/global/qlogging.cpp index d33f0937207..d4a22957787 100644 --- a/src/corelib/global/qlogging.cpp +++ b/src/corelib/global/qlogging.cpp @@ -14,7 +14,6 @@ #include "qdebug.h" #include "qmutex.h" #include <QtCore/private/qlocking_p.h> -#include <QtCore/private/qsimd_p.h> #include "qloggingcategory.h" #ifndef QT_BOOTSTRAPPED #include "qelapsedtimer.h" diff --git a/src/corelib/global/qsimd_p.h b/src/corelib/global/qsimd_p.h index 031ac78c607..55adcef6598 100644 --- a/src/corelib/global/qsimd_p.h +++ b/src/corelib/global/qsimd_p.h @@ -378,49 +378,6 @@ static inline uint64_t qCpuFeatures() #define qCpuHasFeature(feature) (((qCompilerCpuFeatures & CpuFeature ## feature) == CpuFeature ## feature) \ || ((qCpuFeatures() & CpuFeature ## feature) == CpuFeature ## feature)) -/* - Small wrapper around x86's PAUSE and ARM's YIELD instructions. - - This is completely different from QThread::yieldCurrentThread(), which is - an OS-level operation that takes the whole thread off the CPU. - - This is just preventing one SMT thread from filling a core's pipeline with - speculated further loop iterations (which need to be expensively flushed on - final success) when it could just give those pipeline slots to a second SMT - thread that can do something useful with the core, such as unblocking this - SMT thread :) - - So, instead of - - while (!condition) - ; - - it's better to use - - while (!condition) - qYieldCpu(); -*/ -static inline void qYieldCpu() -{ -#if defined(Q_PROCESSOR_X86) - _mm_pause(); -#elif defined(Q_PROCESSOR_ARM) && Q_PROCESSOR_ARM >= 7 /* yield was added in ARMv7 */ -# if __has_builtin(__builtin_arm_yield) /* e.g. Clang */ - __builtin_arm_yield(); -# elif defined(Q_OS_INTEGRITY) || defined(Q_CC_GNU_ONLY) - /* - - Integrity is missing the arm_acle.h header - - GCC doesn't have __yield() in arm_acle.h - https://stackoverflow.com/a/70076751/134841 - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105416 - */ - asm volatile("yield"); /* this works everywhere */ -# else - __yield(); /* this is what should work everywhere */ -# endif -#endif -} - #ifdef __cplusplus } // extern "C" diff --git a/src/corelib/thread/qatomic_cxx11.h b/src/corelib/thread/qatomic_cxx11.h index 4f55a53ae3b..47a7bc9a106 100644 --- a/src/corelib/thread/qatomic_cxx11.h +++ b/src/corelib/thread/qatomic_cxx11.h @@ -6,6 +6,7 @@ #define QATOMIC_CXX11_H #include <QtCore/qgenericatomic.h> +#include <QtCore/qyieldcpu.h> #include <atomic> QT_BEGIN_NAMESPACE diff --git a/src/corelib/thread/qfutureinterface.cpp b/src/corelib/thread/qfutureinterface.cpp index de35089b2cc..3cf2566bd6c 100644 --- a/src/corelib/thread/qfutureinterface.cpp +++ b/src/corelib/thread/qfutureinterface.cpp @@ -9,7 +9,6 @@ #include <QtCore/qcoreapplication.h> #include <QtCore/qthread.h> #include <QtCore/qvarlengtharray.h> -#include <QtCore/private/qsimd_p.h> // for qYieldCpu() #include <private/qthreadpool_p.h> #include <private/qobject_p.h> diff --git a/src/corelib/thread/qyieldcpu.h b/src/corelib/thread/qyieldcpu.h new file mode 100644 index 00000000000..b54e54568d4 --- /dev/null +++ b/src/corelib/thread/qyieldcpu.h @@ -0,0 +1,66 @@ +// Copyright (C) 2023 The Qt Company Ltd. +// Copyright (C) 2023 Intel Corporation. +// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only + +#ifndef QYIELDCPU_H +#define QYIELDCPU_H + +#include <QtCore/qcompilerdetection.h> +#include <QtCore/qprocessordetection.h> +#include <QtCore/qtconfigmacros.h> + +#ifdef Q_CC_MSVC_ONLY +// MSVC defines _YIELD_PROCESSOR() in <xatomic.h>, but as that is a private +// header, we include the public ones +# ifdef __cplusplus +# include <atomic> +extern "C" +# endif +void _mm_pause(void); // the compiler recognizes as intrinsic +#endif + +QT_BEGIN_NAMESPACE + +#ifdef Q_CC_GNU +__attribute__((artificial)) +#endif +Q_ALWAYS_INLINE void qYieldCpu(void) Q_DECL_NOEXCEPT; + +void qYieldCpu(void) +#ifdef __cplusplus + noexcept +#endif +{ +#if __has_builtin(__yield) + __yield(); // Generic +#elif defined(_YIELD_PROCESSOR) && defined(Q_CC_MSVC) + _YIELD_PROCESSOR(); // Generic; MSVC's <atomic> + +#elif __has_builtin(__builtin_ia32_pause) + __builtin_ia32_pause(); +#elif defined(Q_PROCESSOR_X86) && defined(Q_CC_GNU) + // GCC < 10 didn't have __has_builtin() + __builtin_ia32_pause(); +#elif defined(Q_PROCESSOR_X86) && defined(Q_CC_MSVC) + _mm_pause(); +#elif defined(Q_PROCESSOR_X86) + asm("pause"); // hopefully asm() works in this compiler + +#elif __has_builtin(__builtin_arm_yield) + __builtin_arm_yield(); +#elif defined(Q_PROCESSOR_ARM) && Q_PROCESSOR_ARM >= 7 + asm("yield"); // this works everywhere + +#elif __has_builtin(__builtin_riscv_pause) + __builtin_riscv_pause(); // Zihintpause extension +#elif defined(Q_PROCESSOR_RISCV) + asm("fence w, 0"); // a.k.a. "pause" + +#elif defined(_YIELD_PROCESSOR) && defined(Q_CC_GHS) + _YIELD_PROCESSOR; // Green Hills (INTEGRITY), but only on ARM +#endif +} + +QT_END_NAMESPACE + +#endif // QYIELDCPU_H diff --git a/src/corelib/thread/qyieldcpu.qdoc b/src/corelib/thread/qyieldcpu.qdoc new file mode 100644 index 00000000000..9a5f693cb4f --- /dev/null +++ b/src/corelib/thread/qyieldcpu.qdoc @@ -0,0 +1,59 @@ +// Copyright (C) 2023 The Qt Company Ltd. +// Copyright (C) 2023 Intel Corporation. +// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only + +/*! + \fn qYieldCpu() + \inmodule QtCore + \ingroup thread + \relates QAtomicInteger + \relatesalso QAtomicPointer + \since 6.7 + + Pauses the execution of the current thread for an unspecified time, using + hardware instructions, without de-scheduling this thread. This function is + meant to be used in high-throughput loops where the code expects another + thread to modify an atomic variable. This is completely different from + QThread::yieldCurrentThread(), which is an OS-level operation that may take + the whole thread off the CPU and allow other threads (possibly belonging to + other processes) to run. + + So, instead of + \code + while (!condition) + ; + \endcode + + one should write + \code + while (!condition) + qYieldCpu(); + \endcode + + This is useful both with and without hardware multithreading on the same + core. In the case of hardware threads, it serves to prevent further + speculative execution filling up the pipeline, which could starve the + sibling thread of resources. Across cores and higher levels of separation, + it allows the cache coherency protocol to allocate the cache line being + modified and inspected to the logical processor whose result this code is + expecting. + + It is also recommended to loop around code that does not modify the global + variable, to avoid contention in exclusively obtaining the memory location. + Therefore, an atomic modification loop such as a spinlock acquisition + should be: + + \code + while (true) { + while (!readOnlyCondition(atomic)) + qYieldCpu(); + if (modify(atomic)) + break; + } + \endcode + + On x86 processors and on RISC-V processors with the \c{Zihintpause} + extension, this will emit the \c PAUSE instruction, which is ignored on + processors that don't support it; on ARMv7 or later ARM processors, it will + emit the \c{YIELD} instruction. +*/ diff --git a/tests/auto/corelib/global/qglobal/qglobal.c b/tests/auto/corelib/global/qglobal/qglobal.c index abe6ec4fde0..8f9d8b52397 100644 --- a/tests/auto/corelib/global/qglobal/qglobal.c +++ b/tests/auto/corelib/global/qglobal/qglobal.c @@ -3,6 +3,7 @@ #include <QtCore/qglobal.h> #include <QtCore/qtversion.h> +#include <QtCore/qyieldcpu.h> #ifdef Q_COMPILER_THREAD_LOCAL # include <threads.h> @@ -62,6 +63,12 @@ const char *tst_qVersion() #endif } +void tst_qYieldCpu(void) Q_DECL_NOEXCEPT; +void tst_qYieldCpu(void) +{ + qYieldCpu(); +} + /* Static assertion */ Q_STATIC_ASSERT(true); Q_STATIC_ASSERT(1); |