diff options
Diffstat (limited to 'util')
| -rw-r--r-- | util/unicode/main.cpp | 113 | ||||
| -rw-r--r-- | util/xkbdatagen/main.cpp | 4 |
2 files changed, 93 insertions, 24 deletions
diff --git a/util/unicode/main.cpp b/util/unicode/main.cpp index 1f31febeaaf..f83e575d4c3 100644 --- a/util/unicode/main.cpp +++ b/util/unicode/main.cpp @@ -16,8 +16,10 @@ #include <private/qunicodetables_p.h> #endif +#include <array> #include <QtCore/qxpfunctional.h> #include <QtCore/q26numeric.h> +#include <vector> #if QT_VERSION < QT_VERSION_CHECK(6, 9, 0) // QSpan, QIODevice::readLineInto() @@ -1018,13 +1020,14 @@ static const char *property_string = " ushort unicodeVersion : 5; /* 5 used */\n" " ushort eastAsianWidth : 3; /* 3 used */\n" " ushort nfQuickCheck : 8;\n" // could be narrowed - " std::array<CaseConversion, NumCases> cases;\n" + " ushort caseIndex : 16; /* 9 used */\n" " ushort graphemeBreakClass : 5; /* 5 used */\n" " ushort wordBreakClass : 5; /* 5 used */\n" " ushort lineBreakClass : 6; /* 6 used */\n" " ushort sentenceBreakClass : 4; /* 4 used */\n" " ushort idnaStatus : 4; /* 3 used */\n" " ushort script : 8;\n" + " ushort reserved : 16; /* makes sizeof a nice round 16 bytes */\n" "};\n\n" "Q_DECL_CONST_FUNCTION\n" "Q_CORE_EXPORT const Properties * QT_FASTCALL properties(char32_t ucs4) noexcept;\n" @@ -1063,7 +1066,7 @@ static const char *methods = "{ return eastAsianWidth(ch.unicode()); }\n" "\n"; -static const int SizeOfPropertiesStruct = 20; +static const int SizeOfPropertiesStruct = 16; static const QByteArray sizeOfPropertiesStructCheck = "static_assert(sizeof(Properties) == " + QByteArray::number(SizeOfPropertiesStruct) + ");\n\n"; @@ -1096,6 +1099,7 @@ struct PropertyFlags { && upperCaseSpecial == o.upperCaseSpecial && titleCaseSpecial == o.titleCaseSpecial && caseFoldSpecial == o.caseFoldSpecial + // caseIndex is _not_ part of equality && graphemeBreakClass == o.graphemeBreakClass && wordBreakClass == o.wordBreakClass && sentenceBreakClass == o.sentenceBreakClass @@ -1129,6 +1133,7 @@ struct PropertyFlags { bool upperCaseSpecial = 0; bool titleCaseSpecial = 0; bool caseFoldSpecial = 0; + int caseIndex = -1; // not part of equality; replaces {lower,upper,title,fold}CaseDiff GraphemeBreakClass graphemeBreakClass = GraphemeBreak_Any; WordBreakClass wordBreakClass = WordBreak_Any; SentenceBreakClass sentenceBreakClass = SentenceBreak_Any; @@ -1821,6 +1826,8 @@ static void readLineBreak() loc.die("Unassigned line break class \"%.*s\"", qPrintableView(l[1])); for (int codepoint = from; codepoint <= to; ++codepoint) { + if (QChar::isSurrogate(codepoint) && lb != LineBreak_SG) + loc.die("Surrogate with line-break class != SG, fix line-break detection in QUnicodeTools"); UnicodeData &d = UnicodeData::valueRef(codepoint); d.p.lineBreakClass = lb; } @@ -2585,6 +2592,77 @@ static void computeUniqueProperties() qDebug(" %" PRIdQSIZETYPE " unique unicode properties found", uniqueProperties.size()); } +struct CaseConversion { + ushort special : 1; + signed short diff : 15; + + friend bool operator==(CaseConversion lhs, CaseConversion rhs) noexcept + { + static_assert(std::has_unique_object_representations_v<CaseConversion>); + return std::memcmp(&lhs, &rhs, sizeof(lhs)) == 0; + } +}; +using CaseConversions = std::array<CaseConversion, 4>; + +static std::vector<CaseConversions> +computeUniqueCaseConversions(QList<PropertyFlags> &l) +{ + std::vector<CaseConversions> result; + result.emplace_back(); // all zeros should be at the beginning + + qDebug("computeUniqueCaseConversions:"); + + size_t nonNullDuplicates = 0; + + for (auto &e : l) { + CaseConversions candidate = { + CaseConversion{ e.lowerCaseSpecial, short(e.lowerCaseDiff) }, + CaseConversion{ e.upperCaseSpecial, short(e.upperCaseDiff) }, + CaseConversion{ e.titleCaseSpecial, short(e.titleCaseDiff) }, + CaseConversion{ e.caseFoldSpecial, short(e.caseFoldDiff) }, + }; + const auto it = std::find(result.begin(), result.end(), candidate); + if (it == result.end()) { + // new one, add: + e.caseIndex = int(result.size()); + result.push_back(std::move(candidate)); + } else { + e.caseIndex = it - result.begin(); + if (e.caseIndex != 0) + ++nonNullDuplicates; + } + } + + qDebug(" %llu unique case conversions found (with %llu non-null duplicates)", + qulonglong(result.size()), + qulonglong(nonNullDuplicates)); + + return result; +} + +static QByteArray createCaseConversions(std::vector<CaseConversions> conv) +{ + QByteArray out; + + qDebug("createCaseConversions:"); + + out += "static constexpr std::array<CaseConversion, NumCases> caseConversions[] = {\n"; + for (const auto &e : conv) { + out += " { { "; + for (const auto &f : e) { + out += "{ "; + out += QByteArray::number(f.special); + out += ", "; + out += QByteArray::number(f.diff); + out += " }, "; + } + out.chop(2); // removes ", " + out += " } },\n"; + } + out += "};\n\n"; + return out; +} + struct UniqueBlock { inline UniqueBlock() : index(-1) {} @@ -2773,24 +2851,9 @@ static QByteArray createPropertyInfo() // " ushort nfQuickCheck : 8;\n" out += QByteArray::number( p.nfQuickCheck ); out += ", "; -// " std::array<CaseConversion, NumCases> cases;\n" - out += "{ { { "; - out += QByteArray::number( p.lowerCaseSpecial ); - out += ", "; - out += QByteArray::number( p.lowerCaseDiff ); - out += "}, {"; - out += QByteArray::number( p.upperCaseSpecial ); - out += ", "; - out += QByteArray::number( p.upperCaseDiff ); - out += "}, {"; - out += QByteArray::number( p.titleCaseSpecial ); +// " ushort caseIndex; /* 9 used */\n" + out += QByteArray::number(p.caseIndex); out += ", "; - out += QByteArray::number( p.titleCaseDiff ); - out += "}, {"; - out += QByteArray::number( p.caseFoldSpecial ); - out += ", "; - out += QByteArray::number( p.caseFoldDiff ); - out += "} } }, "; // " ushort graphemeBreakClass : 5; /* 5 used */\n" // " ushort wordBreakClass : 5; /* 5 used */\n" // " ushort lineBreakClass : 6; /* 6 used */\n" @@ -2808,6 +2871,9 @@ static QByteArray createPropertyInfo() out += ", "; // " ushort script : 8;\n" out += QByteArray::number( p.script ); + out += ", "; +// " ushort reserved;\n" + out += '0'; out += " },"; } if (out.endsWith(',')) @@ -2838,7 +2904,7 @@ static QByteArray createPropertyInfo() "\n" "QSpan<const CaseConversion, NumCases> QT_FASTCALL caseConversion(char32_t ucs4) noexcept\n" "{\n" - " return qGetProp(ucs4)->cases;\n" + " return caseConversions[qGetProp(ucs4)->caseIndex];\n" "}\n\n"; out += "Q_CORE_EXPORT GraphemeBreakClass QT_FASTCALL graphemeBreakClass(char32_t ucs4) noexcept\n" @@ -3358,6 +3424,8 @@ int main(int, char **) resolveIdnaStatus(); computeUniqueProperties(); + + const QByteArray caseConv = createCaseConversions(computeUniqueCaseConversions(uniqueProperties)); QByteArray properties = createPropertyInfo(); QByteArray specialCases = createSpecialCaseMap(); QByteArray compositions = createCompositionInfo(); @@ -3365,13 +3433,13 @@ int main(int, char **) QByteArray normalizationCorrections = createNormalizationCorrections(); QByteArray idnaMapping = createIdnaMapping(); - # REUSE-IgnoreStart + // REUSE-IgnoreStart QByteArray header = "// Copyright (C) 2020 The Qt Company Ltd.\n" "// SPDX-License-Identifier: Unicode-3.0\n" "// Qt-Security score:significant reason:default\n" "\n"; - # REUSE-IgnoreEnd + // REUSE-IgnoreEnd QByteArray note = "/* This file is autogenerated from the Unicode " DATA_VERSION_S " database. Do not edit */\n\n"; @@ -3396,6 +3464,7 @@ int main(int, char **) f.write("#include \"qunicodetables_p.h\"\n\n"); f.write("QT_BEGIN_NAMESPACE\n\n"); f.write("namespace QUnicodeTables {\n"); + f.write(caseConv.data()); f.write(properties); f.write(specialCases); f.write(compositions); diff --git a/util/xkbdatagen/main.cpp b/util/xkbdatagen/main.cpp index 4c9227816b1..df0a1be7dbd 100644 --- a/util/xkbdatagen/main.cpp +++ b/util/xkbdatagen/main.cpp @@ -374,14 +374,14 @@ int main(int argc, char **argv) } QList<XKBLayout> layouts = findLayouts(layoutList); - # REUSE-IgnoreStart + // REUSE-IgnoreStart // copyright and stuff printf("// Copyright (C) 2016 The Qt Company Ltd.\n" "// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only\n" "// This file is auto-generated, do not edit!\n" "// (Generated using util/xkbdatagen)\n" "\n"); - # REUSE-IgnoreEnd + // REUSE-IgnoreEnd // data structure printf("static struct {\n" |
