summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Cullmann <[email protected]>2024-06-11 18:11:36 +0200
committerChristoph Cullmann <[email protected]>2024-06-18 15:55:33 +0000
commit607b3b2feb1328fdf8cf01768276d615c110e304 (patch)
treedc84f9dd6b6bc967b001196c6a5d5b66b8e48887
parenta8b7da59cba56b535393f50cd7432a412021d8d2 (diff)
rcc: de-duplicate data in resources
content based de-duplications by SHA256 hashing with full data check if candidates based on the hash value are found Task-number: QTBUG-126168 Change-Id: Ifebc8ca322e354d8ea1f701f27f3f65916f7555c Reviewed-by: hjk <[email protected]>
-rw-r--r--src/tools/rcc/rcc.cpp71
-rw-r--r--tests/auto/tools/rcc/data/deduplication/deduplication.expected157
-rw-r--r--tests/auto/tools/rcc/data/deduplication/deduplication.qrc10
-rw-r--r--tests/auto/tools/rcc/data/deduplication/files/a.txt1
-rw-r--r--tests/auto/tools/rcc/data/deduplication/files/b.txt1
-rw-r--r--tests/auto/tools/rcc/data/deduplication/files/c_with_a_content.txt1
-rw-r--r--tests/auto/tools/rcc/tst_rcc.cpp5
7 files changed, 238 insertions, 8 deletions
diff --git a/src/tools/rcc/rcc.cpp b/src/tools/rcc/rcc.cpp
index a1089914fd2..06f9ae1015c 100644
--- a/src/tools/rcc/rcc.cpp
+++ b/src/tools/rcc/rcc.cpp
@@ -1,10 +1,12 @@
// Copyright (C) 2018 The Qt Company Ltd.
// Copyright (C) 2018 Intel Corporation.
+// Copyright (C) 2024 Christoph Cullmann <[email protected]>
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
#include "rcc.h"
#include <qbytearray.h>
+#include <qcryptographichash.h>
#include <qdatetime.h>
#include <qdebug.h>
#include <qdir.h>
@@ -90,8 +92,28 @@ public:
QString resourceName() const;
+ struct DeduplicationKey {
+ RCCResourceLibrary::CompressionAlgorithm compressAlgo;
+ int compressLevel;
+ int compressThreshold;
+ QByteArray hash;
+
+ bool operator==(const DeduplicationKey &other) const
+ {
+ return compressAlgo == other.compressAlgo &&
+ compressLevel == other.compressLevel &&
+ compressThreshold == other.compressThreshold &&
+ hash == other.hash;
+ }
+ };
+
+ typedef QMultiHash<DeduplicationKey, RCCFileInfo*> DeduplicationMultiHash;
+
public:
- qint64 writeDataBlob(RCCResourceLibrary &lib, qint64 offset, QString *errorMessage);
+ qint64 writeDataBlob(RCCResourceLibrary &lib,
+ qint64 offset,
+ DeduplicationMultiHash &dedupByContent,
+ QString *errorMessage);
qint64 writeDataName(RCCResourceLibrary &, qint64 offset);
void writeDataInfo(RCCResourceLibrary &lib);
@@ -114,6 +136,11 @@ public:
qint64 m_childOffset = 0;
};
+static size_t qHash(const RCCFileInfo::DeduplicationKey &key, size_t seed) noexcept
+{
+ return qHashMulti(seed, key.compressAlgo, key.compressLevel, key.compressThreshold, key.hash);
+}
+
RCCFileInfo::RCCFileInfo(const QString &name, const QFileInfo &fileInfo, QLocale::Language language,
QLocale::Territory territory, uint flags,
RCCResourceLibrary::CompressionAlgorithm compressAlgo, int compressLevel,
@@ -217,8 +244,10 @@ void RCCFileInfo::writeDataInfo(RCCResourceLibrary &lib)
}
}
-qint64 RCCFileInfo::writeDataBlob(RCCResourceLibrary &lib, qint64 offset,
- QString *errorMessage)
+qint64 RCCFileInfo::writeDataBlob(RCCResourceLibrary &lib,
+ qint64 offset,
+ DeduplicationMultiHash &dedupByContent,
+ QString *errorMessage)
{
const bool text = lib.m_format == RCCResourceLibrary::C_Code;
const bool pass1 = lib.m_format == RCCResourceLibrary::Pass1;
@@ -231,14 +260,38 @@ qint64 RCCFileInfo::writeDataBlob(RCCResourceLibrary &lib, qint64 offset,
QByteArray data;
if (!m_isEmpty) {
- //find the data to be written
- QFile file(m_fileInfo.absoluteFilePath());
+ // find the data to be written
+ const QString absoluteFilePath = m_fileInfo.absoluteFilePath();
+ QFile file(absoluteFilePath);
if (!file.open(QFile::ReadOnly)) {
- *errorMessage = msgOpenReadFailed(m_fileInfo.absoluteFilePath(), file.errorString());
+ *errorMessage = msgOpenReadFailed(absoluteFilePath, file.errorString());
return 0;
}
-
data = file.readAll();
+
+ // de-duplicate the same file content, we can re-use already written data
+ // we only do that if we have the same compression settings
+ const QByteArray hash = QCryptographicHash::hash(data, QCryptographicHash::Sha256);
+ const DeduplicationKey key{m_compressAlgo, m_compressLevel, m_compressThreshold, hash};
+ const QList<RCCFileInfo *> potentialCandidates = dedupByContent.values(key);
+ for (const RCCFileInfo *candidate : potentialCandidates) {
+ // check real content, we can have collisions
+ QFile candidateFile(candidate->m_fileInfo.absoluteFilePath());
+ if (!candidateFile.open(QFile::ReadOnly)) {
+ *errorMessage = msgOpenReadFailed(candidate->m_fileInfo.absoluteFilePath(),
+ candidateFile.errorString());
+ return 0;
+ }
+ if (data != candidateFile.readAll()) {
+ continue;
+ }
+ // just remember the offset & flags with final compression state
+ // of the already written data and be done
+ m_dataOffset = candidate->m_dataOffset;
+ m_flags = candidate->m_flags;
+ return offset;
+ }
+ dedupByContent.insert(key, this);
}
// Check if compression is useful for this file
@@ -1168,6 +1221,7 @@ bool RCCResourceLibrary::writeDataBlobs()
QStack<RCCFileInfo*> pending;
pending.push(m_root);
qint64 offset = 0;
+ RCCFileInfo::DeduplicationMultiHash dedupByContent;
QString errorMessage;
while (!pending.isEmpty()) {
RCCFileInfo *file = pending.pop();
@@ -1176,7 +1230,8 @@ bool RCCResourceLibrary::writeDataBlobs()
if (child->m_flags & RCCFileInfo::Directory)
pending.push(child);
else {
- offset = child->writeDataBlob(*this, offset, &errorMessage);
+ offset = child->writeDataBlob(*this, offset,
+ dedupByContent, &errorMessage);
if (offset == 0) {
m_errorDevice->write(errorMessage.toUtf8());
return false;
diff --git a/tests/auto/tools/rcc/data/deduplication/deduplication.expected b/tests/auto/tools/rcc/data/deduplication/deduplication.expected
new file mode 100644
index 00000000000..bd873437b46
--- /dev/null
+++ b/tests/auto/tools/rcc/data/deduplication/deduplication.expected
@@ -0,0 +1,157 @@
+/****************************************************************************
+** Resource object code
+**
+IGNORE:** Created by: The Resource Compiler for Qt version 6.9.0
+**
+** WARNING! All changes made in this file will be lost!
+*****************************************************************************/
+
+#ifdef _MSC_VER
+// disable informational message "function ... selected for automatic inline expansion"
+#pragma warning (disable: 4711)
+#endif
+
+static const unsigned char qt_resource_data[] = {
+ // b.txt
+ 0x0,0x0,0x0,0xb,
+ 0x62,
+ 0x20,0x74,0x65,0x73,0x74,0x20,0x66,0x69,0x6c,0x65,
+ // c_with_a_content.txt
+ 0x0,0x0,0x0,0xb,
+ 0x61,
+ 0x20,0x74,0x65,0x73,0x74,0x20,0x66,0x69,0x6c,0x65,
+ // b.txt
+ 0x0,0x0,0x0,0xb,
+ 0x62,
+ 0x20,0x74,0x65,0x73,0x74,0x20,0x66,0x69,0x6c,0x65,
+
+};
+
+static const unsigned char qt_resource_name[] = {
+ // files
+ 0x0,0x5,
+ 0x0,0x6d,0x2,0xc3,
+ 0x0,0x66,
+ 0x0,0x69,0x0,0x6c,0x0,0x65,0x0,0x73,
+ // b.txt
+ 0x0,0x5,
+ 0x0,0x65,0x5b,0xf4,
+ 0x0,0x62,
+ 0x0,0x2e,0x0,0x74,0x0,0x78,0x0,0x74,
+ // c_with_a_content.txt
+ 0x0,0x14,
+ 0x1,0x61,0x1d,0x34,
+ 0x0,0x63,
+ 0x0,0x5f,0x0,0x77,0x0,0x69,0x0,0x74,0x0,0x68,0x0,0x5f,0x0,0x61,0x0,0x5f,0x0,0x63,0x0,0x6f,0x0,0x6e,0x0,0x74,0x0,0x65,0x0,0x6e,0x0,0x74,0x0,0x2e,
+ 0x0,0x74,0x0,0x78,0x0,0x74,
+ // a.txt
+ 0x0,0x5,
+ 0x0,0x64,0x5b,0xf4,
+ 0x0,0x61,
+ 0x0,0x2e,0x0,0x74,0x0,0x78,0x0,0x74,
+ // alias_of_b_compress9.txt
+ 0x0,0x18,
+ 0xb,0x26,0xf,0xb4,
+ 0x0,0x61,
+ 0x0,0x6c,0x0,0x69,0x0,0x61,0x0,0x73,0x0,0x5f,0x0,0x6f,0x0,0x66,0x0,0x5f,0x0,0x62,0x0,0x5f,0x0,0x63,0x0,0x6f,0x0,0x6d,0x0,0x70,0x0,0x72,0x0,0x65,
+ 0x0,0x73,0x0,0x73,0x0,0x39,0x0,0x2e,0x0,0x74,0x0,0x78,0x0,0x74,
+ // alias_of_b.txt
+ 0x0,0xe,
+ 0x1,0xa4,0x6d,0x34,
+ 0x0,0x61,
+ 0x0,0x6c,0x0,0x69,0x0,0x61,0x0,0x73,0x0,0x5f,0x0,0x6f,0x0,0x66,0x0,0x5f,0x0,0x62,0x0,0x2e,0x0,0x74,0x0,0x78,0x0,0x74,
+ // alias_of_b_compress9_dupe.txt
+ 0x0,0x1d,
+ 0x9,0x4,0x7a,0x14,
+ 0x0,0x61,
+ 0x0,0x6c,0x0,0x69,0x0,0x61,0x0,0x73,0x0,0x5f,0x0,0x6f,0x0,0x66,0x0,0x5f,0x0,0x62,0x0,0x5f,0x0,0x63,0x0,0x6f,0x0,0x6d,0x0,0x70,0x0,0x72,0x0,0x65,
+ 0x0,0x73,0x0,0x73,0x0,0x39,0x0,0x5f,0x0,0x64,0x0,0x75,0x0,0x70,0x0,0x65,0x0,0x2e,0x0,0x74,0x0,0x78,0x0,0x74,
+
+};
+
+static const unsigned char qt_resource_struct[] = {
+ // :
+ 0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x1,
+0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,
+ // :/files
+ 0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x0,0x0,0x6,0x0,0x0,0x0,0x2,
+0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,
+ // :/files/a.txt
+ 0x0,0x0,0x0,0x4e,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0xf,
+TIMESTAMP:files/a.txt
+ // :/files/b.txt
+ 0x0,0x0,0x0,0x10,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x0,
+TIMESTAMP:files/b.txt
+ // :/files/c_with_a_content.txt
+ 0x0,0x0,0x0,0x20,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0xf,
+TIMESTAMP:files/c_with_a_content.txt
+ // :/files/alias_of_b.txt
+ 0x0,0x0,0x0,0x94,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x0,
+TIMESTAMP:files/b.txt
+ // :/files/alias_of_b_compress9_dupe.txt
+ 0x0,0x0,0x0,0xb6,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x1e,
+TIMESTAMP:files/b.txt
+ // :/files/alias_of_b_compress9.txt
+ 0x0,0x0,0x0,0x5e,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x1e,
+TIMESTAMP:files/b.txt
+
+};
+
+#ifdef QT_NAMESPACE
+# define QT_RCC_PREPEND_NAMESPACE(name) ::QT_NAMESPACE::name
+# define QT_RCC_MANGLE_NAMESPACE0(x) x
+# define QT_RCC_MANGLE_NAMESPACE1(a, b) a##_##b
+# define QT_RCC_MANGLE_NAMESPACE2(a, b) QT_RCC_MANGLE_NAMESPACE1(a,b)
+# define QT_RCC_MANGLE_NAMESPACE(name) QT_RCC_MANGLE_NAMESPACE2( \
+ QT_RCC_MANGLE_NAMESPACE0(name), QT_RCC_MANGLE_NAMESPACE0(QT_NAMESPACE))
+#else
+# define QT_RCC_PREPEND_NAMESPACE(name) name
+# define QT_RCC_MANGLE_NAMESPACE(name) name
+#endif
+
+#if defined(QT_INLINE_NAMESPACE)
+inline namespace QT_NAMESPACE {
+#elif defined(QT_NAMESPACE)
+namespace QT_NAMESPACE {
+#endif
+
+bool qRegisterResourceData(int, const unsigned char *, const unsigned char *, const unsigned char *);
+bool qUnregisterResourceData(int, const unsigned char *, const unsigned char *, const unsigned char *);
+
+#ifdef QT_NAMESPACE
+}
+#endif
+
+int QT_RCC_MANGLE_NAMESPACE(qInitResources)();
+int QT_RCC_MANGLE_NAMESPACE(qInitResources)()
+{
+ int version = 3;
+ QT_RCC_PREPEND_NAMESPACE(qRegisterResourceData)
+ (version, qt_resource_struct, qt_resource_name, qt_resource_data);
+ return 1;
+}
+
+int QT_RCC_MANGLE_NAMESPACE(qCleanupResources)();
+int QT_RCC_MANGLE_NAMESPACE(qCleanupResources)()
+{
+ int version = 3;
+ QT_RCC_PREPEND_NAMESPACE(qUnregisterResourceData)
+ (version, qt_resource_struct, qt_resource_name, qt_resource_data);
+ return 1;
+}
+
+#ifdef __clang__
+# pragma clang diagnostic push
+# pragma clang diagnostic ignored "-Wexit-time-destructors"
+#endif
+
+namespace {
+ struct initializer {
+ initializer() { QT_RCC_MANGLE_NAMESPACE(qInitResources)(); }
+ ~initializer() { QT_RCC_MANGLE_NAMESPACE(qCleanupResources)(); }
+ } dummy;
+}
+
+#ifdef __clang__
+# pragma clang diagnostic pop
+#endif
diff --git a/tests/auto/tools/rcc/data/deduplication/deduplication.qrc b/tests/auto/tools/rcc/data/deduplication/deduplication.qrc
new file mode 100644
index 00000000000..fd8a776503e
--- /dev/null
+++ b/tests/auto/tools/rcc/data/deduplication/deduplication.qrc
@@ -0,0 +1,10 @@
+<!DOCTYPE RCC><RCC version="1.0">
+<qresource>
+ <file>files/a.txt</file>
+ <file>files/b.txt</file>
+ <file alias="files/alias_of_b.txt">files/b.txt</file>
+ <file>files/c_with_a_content.txt</file>
+ <file alias="files/alias_of_b_compress9.txt" compress="9">files/b.txt</file>
+ <file alias="files/alias_of_b_compress9_dupe.txt" compress="9">files/b.txt</file>
+</qresource>
+</RCC>
diff --git a/tests/auto/tools/rcc/data/deduplication/files/a.txt b/tests/auto/tools/rcc/data/deduplication/files/a.txt
new file mode 100644
index 00000000000..abd91bd4652
--- /dev/null
+++ b/tests/auto/tools/rcc/data/deduplication/files/a.txt
@@ -0,0 +1 @@
+a test file \ No newline at end of file
diff --git a/tests/auto/tools/rcc/data/deduplication/files/b.txt b/tests/auto/tools/rcc/data/deduplication/files/b.txt
new file mode 100644
index 00000000000..01e4d76fc57
--- /dev/null
+++ b/tests/auto/tools/rcc/data/deduplication/files/b.txt
@@ -0,0 +1 @@
+b test file \ No newline at end of file
diff --git a/tests/auto/tools/rcc/data/deduplication/files/c_with_a_content.txt b/tests/auto/tools/rcc/data/deduplication/files/c_with_a_content.txt
new file mode 100644
index 00000000000..abd91bd4652
--- /dev/null
+++ b/tests/auto/tools/rcc/data/deduplication/files/c_with_a_content.txt
@@ -0,0 +1 @@
+a test file \ No newline at end of file
diff --git a/tests/auto/tools/rcc/tst_rcc.cpp b/tests/auto/tools/rcc/tst_rcc.cpp
index af4a992d5cf..ac024b11d0f 100644
--- a/tests/auto/tools/rcc/tst_rcc.cpp
+++ b/tests/auto/tools/rcc/tst_rcc.cpp
@@ -152,6 +152,11 @@ void tst_rcc::rcc_data()
QTest::newRow("legal") << m_dataPath + QLatin1StringView("/legal")
<< "legal.qrc" << "rcc_legal.cpp";
+
+ if (sizeof(size_t) == 8) {
+ const QString deduplicationPath = m_dataPath + QLatin1String("/deduplication");
+ QTest::newRow("deduplication") << deduplicationPath << "deduplication.qrc" << "deduplication.expected";
+ }
}
static QStringList readLinesFromFile(const QString &fileName,