diff options
| author | Thiago Macieira <[email protected]> | 2024-05-19 16:43:24 -0500 |
|---|---|---|
| committer | Thiago Macieira <[email protected]> | 2024-06-01 11:27:05 -0300 |
| commit | a98715c127fb569c21a0402d3c0e76ca0bb75c0d (patch) | |
| tree | 61032d382523714564b6269207f815620331447d /src | |
| parent | e673e5a257569eaa816c6acd31dd754efd9f8c75 (diff) | |
QByteArrayView: use memmem() to search for substrings
Let's just use whatever your C library provides, on the assumption that
it is optimized. Because it does the memchr() call when the size of the
needle is 1, we also skip the 1-char findByteArray() call.
It's been available on Linux since glibc 2.0, FreeBSD since 6.0, OpenBSD
5.4, NetBSD, Apple OSes, etc. even Solaris. If your OS doesn't have it,
you should ask your vendor to add it or consider upgrading to an OS that
already has it.
The glibc implementation[1] also uses a hashing for short needles and
the Two Way string search algorithm it describes as "a bad character
shift table similar to the Boyer-Moore algorithm" for longer ones. The
FreeBSD implementation[2] (which its man page says came from MUSL) uses
1-, 2-, 3-, and 4-byte search specializations before using the Two Way
search algorithm too.
[1] https://codebrowser.dev/glibc/glibc/string/memmem.c.html
[2] https://github.com/freebsd/freebsd-src/blob/main/lib/libc/string/memmem.c
Change-Id: If05cb740b64f42eba21efffd17d101e24528f7fd
Reviewed-by: Ahmad Samir <[email protected]>
Diffstat (limited to 'src')
| -rw-r--r-- | src/corelib/configure.cmake | 20 | ||||
| -rw-r--r-- | src/corelib/global/qconfig-bootstrapped.h | 1 | ||||
| -rw-r--r-- | src/corelib/text/qbytearraymatcher.cpp | 12 |
3 files changed, 33 insertions, 0 deletions
diff --git a/src/corelib/configure.cmake b/src/corelib/configure.cmake index 7c07aa1c88a..b34086c8ada 100644 --- a/src/corelib/configure.cmake +++ b/src/corelib/configure.cmake @@ -324,6 +324,22 @@ linkat(AT_FDCWD, \"foo\", AT_FDCWD, \"bar\", AT_SYMLINK_FOLLOW); } ") +# memmem +qt_config_compile_test(memmem + LABEL "memmem()" + CODE +#define _APPLE_SAUCE 1 /* Apple doesn't require anything */ +"#define _BSD_SOURCE 1 /* For FreeBSD */ +#define _GNU_SOURCE 1 /* For glibc, Bionic */ +#include <string.h> + +int main(void) +{ + const void *r = memmem(\"abc\", 3, \"bc\", 2); + (void)r; + return 0; +}") + # memrchr qt_config_compile_test(memrchr LABEL "memrchr()" @@ -569,6 +585,10 @@ qt_feature("std-atomic64" PUBLIC LABEL "64 bit atomic operations" CONDITION WrapAtomic_FOUND ) +qt_feature("memmem" PRIVATE + LABEL "C library function memmem()" + CONDITION TEST_memmem +) qt_feature("memrchr" PRIVATE LABEL "C library function memrchr()" CONDITION TEST_memrchr diff --git a/src/corelib/global/qconfig-bootstrapped.h b/src/corelib/global/qconfig-bootstrapped.h index 0596b9d9f4c..c2fe4661f6c 100644 --- a/src/corelib/global/qconfig-bootstrapped.h +++ b/src/corelib/global/qconfig-bootstrapped.h @@ -72,6 +72,7 @@ # define QT_FEATURE_linkat -1 #endif #define QT_FEATURE_lttng -1 +#define QT_FEATURE_memmem -1 #define QT_FEATURE_memrchr -1 #define QT_NO_QOBJECT #define QT_FEATURE_process -1 diff --git a/src/corelib/text/qbytearraymatcher.cpp b/src/corelib/text/qbytearraymatcher.cpp index a332f035efa..9f27e10f3d5 100644 --- a/src/corelib/text/qbytearraymatcher.cpp +++ b/src/corelib/text/qbytearraymatcher.cpp @@ -3,6 +3,11 @@ #include "qbytearraymatcher.h" +#include <qtconfiginclude.h> +#ifndef QT_BOOTSTRAPPED +# include <private/qtcore-config_p.h> +#endif + #include <limits.h> QT_BEGIN_NAMESPACE @@ -238,8 +243,10 @@ qsizetype QtPrivate::findByteArray(QByteArrayView haystack, qsizetype from, QByt const auto haystack0 = haystack.data(); const auto l = haystack.size(); const auto sl = needle.size(); +#if !QT_CONFIG(memmem) if (sl == 1) return findByteArray(haystack, from, needle.front()); +#endif if (from < 0) from += l; @@ -250,6 +257,11 @@ qsizetype QtPrivate::findByteArray(QByteArrayView haystack, qsizetype from, QByt if (!l) return -1; +#if QT_CONFIG(memmem) + auto where = memmem(haystack0 + from, l - from, needle.data(), sl); + return where ? static_cast<const char *>(where) - haystack0 : -1; +#endif + /* We use the Boyer-Moore algorithm in cases where the overhead for the skip table should pay off, otherwise we use a simple |
