summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThiago Macieira <[email protected]>2023-07-06 10:57:35 -0700
committerThiago Macieira <[email protected]>2023-07-25 07:21:56 -0700
commita7f227f56cfe562280e89d3c73040f7e8384129e (patch)
tree70fd65f8f5b0335fd8843d6eeebd0cbcef8f167a
parentaaa8c3835303e6eb3579df300e06ea6696ca769f (diff)
Make qYieldCpu() public API
Rewritten to be a bit simpler, added a few more yield/YieldProcessor alternatives, added RISC-V support. [ChangeLog][QtCore] Added qYieldCpu() function. Fixes: QTBUG-103014 Change-Id: I53335f845a1345299031fffd176f59032e7400f5 Reviewed-by: Allan Sandfeld Jensen <[email protected]>
-rw-r--r--src/concurrent/qtconcurrentthreadengine.cpp2
-rw-r--r--src/corelib/CMakeLists.txt1
-rw-r--r--src/corelib/global/qlogging.cpp1
-rw-r--r--src/corelib/global/qsimd_p.h43
-rw-r--r--src/corelib/thread/qatomic_cxx11.h1
-rw-r--r--src/corelib/thread/qfutureinterface.cpp1
-rw-r--r--src/corelib/thread/qyieldcpu.h66
-rw-r--r--src/corelib/thread/qyieldcpu.qdoc59
-rw-r--r--tests/auto/corelib/global/qglobal/qglobal.c7
9 files changed, 134 insertions, 47 deletions
diff --git a/src/concurrent/qtconcurrentthreadengine.cpp b/src/concurrent/qtconcurrentthreadengine.cpp
index 03e018b0a29..ce02d0c2c95 100644
--- a/src/concurrent/qtconcurrentthreadengine.cpp
+++ b/src/concurrent/qtconcurrentthreadengine.cpp
@@ -3,8 +3,6 @@
#include "qtconcurrentthreadengine.h"
-#include <QtCore/private/qsimd_p.h>
-
#if !defined(QT_NO_CONCURRENT) || defined(Q_QDOC)
QT_BEGIN_NAMESPACE
diff --git a/src/corelib/CMakeLists.txt b/src/corelib/CMakeLists.txt
index a7798a18f77..3333bc1a629 100644
--- a/src/corelib/CMakeLists.txt
+++ b/src/corelib/CMakeLists.txt
@@ -251,6 +251,7 @@ qt_internal_add_module(Core
thread/qthreadstorage.h
thread/qtsan_impl.h
thread/qwaitcondition.h thread/qwaitcondition_p.h
+ thread/qyieldcpu.h
time/qcalendar.cpp time/qcalendar.h
time/qcalendarbackend_p.h
time/qcalendarmath_p.h
diff --git a/src/corelib/global/qlogging.cpp b/src/corelib/global/qlogging.cpp
index d33f0937207..d4a22957787 100644
--- a/src/corelib/global/qlogging.cpp
+++ b/src/corelib/global/qlogging.cpp
@@ -14,7 +14,6 @@
#include "qdebug.h"
#include "qmutex.h"
#include <QtCore/private/qlocking_p.h>
-#include <QtCore/private/qsimd_p.h>
#include "qloggingcategory.h"
#ifndef QT_BOOTSTRAPPED
#include "qelapsedtimer.h"
diff --git a/src/corelib/global/qsimd_p.h b/src/corelib/global/qsimd_p.h
index 031ac78c607..55adcef6598 100644
--- a/src/corelib/global/qsimd_p.h
+++ b/src/corelib/global/qsimd_p.h
@@ -378,49 +378,6 @@ static inline uint64_t qCpuFeatures()
#define qCpuHasFeature(feature) (((qCompilerCpuFeatures & CpuFeature ## feature) == CpuFeature ## feature) \
|| ((qCpuFeatures() & CpuFeature ## feature) == CpuFeature ## feature))
-/*
- Small wrapper around x86's PAUSE and ARM's YIELD instructions.
-
- This is completely different from QThread::yieldCurrentThread(), which is
- an OS-level operation that takes the whole thread off the CPU.
-
- This is just preventing one SMT thread from filling a core's pipeline with
- speculated further loop iterations (which need to be expensively flushed on
- final success) when it could just give those pipeline slots to a second SMT
- thread that can do something useful with the core, such as unblocking this
- SMT thread :)
-
- So, instead of
-
- while (!condition)
- ;
-
- it's better to use
-
- while (!condition)
- qYieldCpu();
-*/
-static inline void qYieldCpu()
-{
-#if defined(Q_PROCESSOR_X86)
- _mm_pause();
-#elif defined(Q_PROCESSOR_ARM) && Q_PROCESSOR_ARM >= 7 /* yield was added in ARMv7 */
-# if __has_builtin(__builtin_arm_yield) /* e.g. Clang */
- __builtin_arm_yield();
-# elif defined(Q_OS_INTEGRITY) || defined(Q_CC_GNU_ONLY)
- /*
- - Integrity is missing the arm_acle.h header
- - GCC doesn't have __yield() in arm_acle.h
- https://stackoverflow.com/a/70076751/134841
- https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105416
- */
- asm volatile("yield"); /* this works everywhere */
-# else
- __yield(); /* this is what should work everywhere */
-# endif
-#endif
-}
-
#ifdef __cplusplus
} // extern "C"
diff --git a/src/corelib/thread/qatomic_cxx11.h b/src/corelib/thread/qatomic_cxx11.h
index 4f55a53ae3b..47a7bc9a106 100644
--- a/src/corelib/thread/qatomic_cxx11.h
+++ b/src/corelib/thread/qatomic_cxx11.h
@@ -6,6 +6,7 @@
#define QATOMIC_CXX11_H
#include <QtCore/qgenericatomic.h>
+#include <QtCore/qyieldcpu.h>
#include <atomic>
QT_BEGIN_NAMESPACE
diff --git a/src/corelib/thread/qfutureinterface.cpp b/src/corelib/thread/qfutureinterface.cpp
index de35089b2cc..3cf2566bd6c 100644
--- a/src/corelib/thread/qfutureinterface.cpp
+++ b/src/corelib/thread/qfutureinterface.cpp
@@ -9,7 +9,6 @@
#include <QtCore/qcoreapplication.h>
#include <QtCore/qthread.h>
#include <QtCore/qvarlengtharray.h>
-#include <QtCore/private/qsimd_p.h> // for qYieldCpu()
#include <private/qthreadpool_p.h>
#include <private/qobject_p.h>
diff --git a/src/corelib/thread/qyieldcpu.h b/src/corelib/thread/qyieldcpu.h
new file mode 100644
index 00000000000..b54e54568d4
--- /dev/null
+++ b/src/corelib/thread/qyieldcpu.h
@@ -0,0 +1,66 @@
+// Copyright (C) 2023 The Qt Company Ltd.
+// Copyright (C) 2023 Intel Corporation.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
+
+#ifndef QYIELDCPU_H
+#define QYIELDCPU_H
+
+#include <QtCore/qcompilerdetection.h>
+#include <QtCore/qprocessordetection.h>
+#include <QtCore/qtconfigmacros.h>
+
+#ifdef Q_CC_MSVC_ONLY
+// MSVC defines _YIELD_PROCESSOR() in <xatomic.h>, but as that is a private
+// header, we include the public ones
+# ifdef __cplusplus
+# include <atomic>
+extern "C"
+# endif
+void _mm_pause(void); // the compiler recognizes as intrinsic
+#endif
+
+QT_BEGIN_NAMESPACE
+
+#ifdef Q_CC_GNU
+__attribute__((artificial))
+#endif
+Q_ALWAYS_INLINE void qYieldCpu(void) Q_DECL_NOEXCEPT;
+
+void qYieldCpu(void)
+#ifdef __cplusplus
+ noexcept
+#endif
+{
+#if __has_builtin(__yield)
+ __yield(); // Generic
+#elif defined(_YIELD_PROCESSOR) && defined(Q_CC_MSVC)
+ _YIELD_PROCESSOR(); // Generic; MSVC's <atomic>
+
+#elif __has_builtin(__builtin_ia32_pause)
+ __builtin_ia32_pause();
+#elif defined(Q_PROCESSOR_X86) && defined(Q_CC_GNU)
+ // GCC < 10 didn't have __has_builtin()
+ __builtin_ia32_pause();
+#elif defined(Q_PROCESSOR_X86) && defined(Q_CC_MSVC)
+ _mm_pause();
+#elif defined(Q_PROCESSOR_X86)
+ asm("pause"); // hopefully asm() works in this compiler
+
+#elif __has_builtin(__builtin_arm_yield)
+ __builtin_arm_yield();
+#elif defined(Q_PROCESSOR_ARM) && Q_PROCESSOR_ARM >= 7
+ asm("yield"); // this works everywhere
+
+#elif __has_builtin(__builtin_riscv_pause)
+ __builtin_riscv_pause(); // Zihintpause extension
+#elif defined(Q_PROCESSOR_RISCV)
+ asm("fence w, 0"); // a.k.a. "pause"
+
+#elif defined(_YIELD_PROCESSOR) && defined(Q_CC_GHS)
+ _YIELD_PROCESSOR; // Green Hills (INTEGRITY), but only on ARM
+#endif
+}
+
+QT_END_NAMESPACE
+
+#endif // QYIELDCPU_H
diff --git a/src/corelib/thread/qyieldcpu.qdoc b/src/corelib/thread/qyieldcpu.qdoc
new file mode 100644
index 00000000000..9a5f693cb4f
--- /dev/null
+++ b/src/corelib/thread/qyieldcpu.qdoc
@@ -0,0 +1,59 @@
+// Copyright (C) 2023 The Qt Company Ltd.
+// Copyright (C) 2023 Intel Corporation.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
+
+/*!
+ \fn qYieldCpu()
+ \inmodule QtCore
+ \ingroup thread
+ \relates QAtomicInteger
+ \relatesalso QAtomicPointer
+ \since 6.7
+
+ Pauses the execution of the current thread for an unspecified time, using
+ hardware instructions, without de-scheduling this thread. This function is
+ meant to be used in high-throughput loops where the code expects another
+ thread to modify an atomic variable. This is completely different from
+ QThread::yieldCurrentThread(), which is an OS-level operation that may take
+ the whole thread off the CPU and allow other threads (possibly belonging to
+ other processes) to run.
+
+ So, instead of
+ \code
+ while (!condition)
+ ;
+ \endcode
+
+ one should write
+ \code
+ while (!condition)
+ qYieldCpu();
+ \endcode
+
+ This is useful both with and without hardware multithreading on the same
+ core. In the case of hardware threads, it serves to prevent further
+ speculative execution filling up the pipeline, which could starve the
+ sibling thread of resources. Across cores and higher levels of separation,
+ it allows the cache coherency protocol to allocate the cache line being
+ modified and inspected to the logical processor whose result this code is
+ expecting.
+
+ It is also recommended to loop around code that does not modify the global
+ variable, to avoid contention in exclusively obtaining the memory location.
+ Therefore, an atomic modification loop such as a spinlock acquisition
+ should be:
+
+ \code
+ while (true) {
+ while (!readOnlyCondition(atomic))
+ qYieldCpu();
+ if (modify(atomic))
+ break;
+ }
+ \endcode
+
+ On x86 processors and on RISC-V processors with the \c{Zihintpause}
+ extension, this will emit the \c PAUSE instruction, which is ignored on
+ processors that don't support it; on ARMv7 or later ARM processors, it will
+ emit the \c{YIELD} instruction.
+*/
diff --git a/tests/auto/corelib/global/qglobal/qglobal.c b/tests/auto/corelib/global/qglobal/qglobal.c
index abe6ec4fde0..8f9d8b52397 100644
--- a/tests/auto/corelib/global/qglobal/qglobal.c
+++ b/tests/auto/corelib/global/qglobal/qglobal.c
@@ -3,6 +3,7 @@
#include <QtCore/qglobal.h>
#include <QtCore/qtversion.h>
+#include <QtCore/qyieldcpu.h>
#ifdef Q_COMPILER_THREAD_LOCAL
# include <threads.h>
@@ -62,6 +63,12 @@ const char *tst_qVersion()
#endif
}
+void tst_qYieldCpu(void) Q_DECL_NOEXCEPT;
+void tst_qYieldCpu(void)
+{
+ qYieldCpu();
+}
+
/* Static assertion */
Q_STATIC_ASSERT(true);
Q_STATIC_ASSERT(1);