summaryrefslogtreecommitdiffstats
path: root/util
diff options
context:
space:
mode:
Diffstat (limited to 'util')
-rw-r--r--util/unicode/main.cpp113
-rw-r--r--util/xkbdatagen/main.cpp4
2 files changed, 93 insertions, 24 deletions
diff --git a/util/unicode/main.cpp b/util/unicode/main.cpp
index 1f31febeaaf..f83e575d4c3 100644
--- a/util/unicode/main.cpp
+++ b/util/unicode/main.cpp
@@ -16,8 +16,10 @@
#include <private/qunicodetables_p.h>
#endif
+#include <array>
#include <QtCore/qxpfunctional.h>
#include <QtCore/q26numeric.h>
+#include <vector>
#if QT_VERSION < QT_VERSION_CHECK(6, 9, 0)
// QSpan, QIODevice::readLineInto()
@@ -1018,13 +1020,14 @@ static const char *property_string =
" ushort unicodeVersion : 5; /* 5 used */\n"
" ushort eastAsianWidth : 3; /* 3 used */\n"
" ushort nfQuickCheck : 8;\n" // could be narrowed
- " std::array<CaseConversion, NumCases> cases;\n"
+ " ushort caseIndex : 16; /* 9 used */\n"
" ushort graphemeBreakClass : 5; /* 5 used */\n"
" ushort wordBreakClass : 5; /* 5 used */\n"
" ushort lineBreakClass : 6; /* 6 used */\n"
" ushort sentenceBreakClass : 4; /* 4 used */\n"
" ushort idnaStatus : 4; /* 3 used */\n"
" ushort script : 8;\n"
+ " ushort reserved : 16; /* makes sizeof a nice round 16 bytes */\n"
"};\n\n"
"Q_DECL_CONST_FUNCTION\n"
"Q_CORE_EXPORT const Properties * QT_FASTCALL properties(char32_t ucs4) noexcept;\n"
@@ -1063,7 +1066,7 @@ static const char *methods =
"{ return eastAsianWidth(ch.unicode()); }\n"
"\n";
-static const int SizeOfPropertiesStruct = 20;
+static const int SizeOfPropertiesStruct = 16;
static const QByteArray sizeOfPropertiesStructCheck =
"static_assert(sizeof(Properties) == " + QByteArray::number(SizeOfPropertiesStruct) + ");\n\n";
@@ -1096,6 +1099,7 @@ struct PropertyFlags {
&& upperCaseSpecial == o.upperCaseSpecial
&& titleCaseSpecial == o.titleCaseSpecial
&& caseFoldSpecial == o.caseFoldSpecial
+ // caseIndex is _not_ part of equality
&& graphemeBreakClass == o.graphemeBreakClass
&& wordBreakClass == o.wordBreakClass
&& sentenceBreakClass == o.sentenceBreakClass
@@ -1129,6 +1133,7 @@ struct PropertyFlags {
bool upperCaseSpecial = 0;
bool titleCaseSpecial = 0;
bool caseFoldSpecial = 0;
+ int caseIndex = -1; // not part of equality; replaces {lower,upper,title,fold}CaseDiff
GraphemeBreakClass graphemeBreakClass = GraphemeBreak_Any;
WordBreakClass wordBreakClass = WordBreak_Any;
SentenceBreakClass sentenceBreakClass = SentenceBreak_Any;
@@ -1821,6 +1826,8 @@ static void readLineBreak()
loc.die("Unassigned line break class \"%.*s\"", qPrintableView(l[1]));
for (int codepoint = from; codepoint <= to; ++codepoint) {
+ if (QChar::isSurrogate(codepoint) && lb != LineBreak_SG)
+ loc.die("Surrogate with line-break class != SG, fix line-break detection in QUnicodeTools");
UnicodeData &d = UnicodeData::valueRef(codepoint);
d.p.lineBreakClass = lb;
}
@@ -2585,6 +2592,77 @@ static void computeUniqueProperties()
qDebug(" %" PRIdQSIZETYPE " unique unicode properties found", uniqueProperties.size());
}
+struct CaseConversion {
+ ushort special : 1;
+ signed short diff : 15;
+
+ friend bool operator==(CaseConversion lhs, CaseConversion rhs) noexcept
+ {
+ static_assert(std::has_unique_object_representations_v<CaseConversion>);
+ return std::memcmp(&lhs, &rhs, sizeof(lhs)) == 0;
+ }
+};
+using CaseConversions = std::array<CaseConversion, 4>;
+
+static std::vector<CaseConversions>
+computeUniqueCaseConversions(QList<PropertyFlags> &l)
+{
+ std::vector<CaseConversions> result;
+ result.emplace_back(); // all zeros should be at the beginning
+
+ qDebug("computeUniqueCaseConversions:");
+
+ size_t nonNullDuplicates = 0;
+
+ for (auto &e : l) {
+ CaseConversions candidate = {
+ CaseConversion{ e.lowerCaseSpecial, short(e.lowerCaseDiff) },
+ CaseConversion{ e.upperCaseSpecial, short(e.upperCaseDiff) },
+ CaseConversion{ e.titleCaseSpecial, short(e.titleCaseDiff) },
+ CaseConversion{ e.caseFoldSpecial, short(e.caseFoldDiff) },
+ };
+ const auto it = std::find(result.begin(), result.end(), candidate);
+ if (it == result.end()) {
+ // new one, add:
+ e.caseIndex = int(result.size());
+ result.push_back(std::move(candidate));
+ } else {
+ e.caseIndex = it - result.begin();
+ if (e.caseIndex != 0)
+ ++nonNullDuplicates;
+ }
+ }
+
+ qDebug(" %llu unique case conversions found (with %llu non-null duplicates)",
+ qulonglong(result.size()),
+ qulonglong(nonNullDuplicates));
+
+ return result;
+}
+
+static QByteArray createCaseConversions(std::vector<CaseConversions> conv)
+{
+ QByteArray out;
+
+ qDebug("createCaseConversions:");
+
+ out += "static constexpr std::array<CaseConversion, NumCases> caseConversions[] = {\n";
+ for (const auto &e : conv) {
+ out += " { { ";
+ for (const auto &f : e) {
+ out += "{ ";
+ out += QByteArray::number(f.special);
+ out += ", ";
+ out += QByteArray::number(f.diff);
+ out += " }, ";
+ }
+ out.chop(2); // removes ", "
+ out += " } },\n";
+ }
+ out += "};\n\n";
+ return out;
+}
+
struct UniqueBlock {
inline UniqueBlock() : index(-1) {}
@@ -2773,24 +2851,9 @@ static QByteArray createPropertyInfo()
// " ushort nfQuickCheck : 8;\n"
out += QByteArray::number( p.nfQuickCheck );
out += ", ";
-// " std::array<CaseConversion, NumCases> cases;\n"
- out += "{ { { ";
- out += QByteArray::number( p.lowerCaseSpecial );
- out += ", ";
- out += QByteArray::number( p.lowerCaseDiff );
- out += "}, {";
- out += QByteArray::number( p.upperCaseSpecial );
- out += ", ";
- out += QByteArray::number( p.upperCaseDiff );
- out += "}, {";
- out += QByteArray::number( p.titleCaseSpecial );
+// " ushort caseIndex; /* 9 used */\n"
+ out += QByteArray::number(p.caseIndex);
out += ", ";
- out += QByteArray::number( p.titleCaseDiff );
- out += "}, {";
- out += QByteArray::number( p.caseFoldSpecial );
- out += ", ";
- out += QByteArray::number( p.caseFoldDiff );
- out += "} } }, ";
// " ushort graphemeBreakClass : 5; /* 5 used */\n"
// " ushort wordBreakClass : 5; /* 5 used */\n"
// " ushort lineBreakClass : 6; /* 6 used */\n"
@@ -2808,6 +2871,9 @@ static QByteArray createPropertyInfo()
out += ", ";
// " ushort script : 8;\n"
out += QByteArray::number( p.script );
+ out += ", ";
+// " ushort reserved;\n"
+ out += '0';
out += " },";
}
if (out.endsWith(','))
@@ -2838,7 +2904,7 @@ static QByteArray createPropertyInfo()
"\n"
"QSpan<const CaseConversion, NumCases> QT_FASTCALL caseConversion(char32_t ucs4) noexcept\n"
"{\n"
- " return qGetProp(ucs4)->cases;\n"
+ " return caseConversions[qGetProp(ucs4)->caseIndex];\n"
"}\n\n";
out += "Q_CORE_EXPORT GraphemeBreakClass QT_FASTCALL graphemeBreakClass(char32_t ucs4) noexcept\n"
@@ -3358,6 +3424,8 @@ int main(int, char **)
resolveIdnaStatus();
computeUniqueProperties();
+
+ const QByteArray caseConv = createCaseConversions(computeUniqueCaseConversions(uniqueProperties));
QByteArray properties = createPropertyInfo();
QByteArray specialCases = createSpecialCaseMap();
QByteArray compositions = createCompositionInfo();
@@ -3365,13 +3433,13 @@ int main(int, char **)
QByteArray normalizationCorrections = createNormalizationCorrections();
QByteArray idnaMapping = createIdnaMapping();
- # REUSE-IgnoreStart
+ // REUSE-IgnoreStart
QByteArray header =
"// Copyright (C) 2020 The Qt Company Ltd.\n"
"// SPDX-License-Identifier: Unicode-3.0\n"
"// Qt-Security score:significant reason:default\n"
"\n";
- # REUSE-IgnoreEnd
+ // REUSE-IgnoreEnd
QByteArray note =
"/* This file is autogenerated from the Unicode " DATA_VERSION_S " database. Do not edit */\n\n";
@@ -3396,6 +3464,7 @@ int main(int, char **)
f.write("#include \"qunicodetables_p.h\"\n\n");
f.write("QT_BEGIN_NAMESPACE\n\n");
f.write("namespace QUnicodeTables {\n");
+ f.write(caseConv.data());
f.write(properties);
f.write(specialCases);
f.write(compositions);
diff --git a/util/xkbdatagen/main.cpp b/util/xkbdatagen/main.cpp
index 4c9227816b1..df0a1be7dbd 100644
--- a/util/xkbdatagen/main.cpp
+++ b/util/xkbdatagen/main.cpp
@@ -374,14 +374,14 @@ int main(int argc, char **argv)
}
QList<XKBLayout> layouts = findLayouts(layoutList);
- # REUSE-IgnoreStart
+ // REUSE-IgnoreStart
// copyright and stuff
printf("// Copyright (C) 2016 The Qt Company Ltd.\n"
"// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only\n"
"// This file is auto-generated, do not edit!\n"
"// (Generated using util/xkbdatagen)\n"
"\n");
- # REUSE-IgnoreEnd
+ // REUSE-IgnoreEnd
// data structure
printf("static struct {\n"