Skip to content

Commit 2820adf

Browse files
committed
Support long distance matching for zstd compression
zstd compression supports a special mode for finding matched in distant past, which may result in better compression ratio, at the expense of using more memory (the window size is 128MB). To enable this optional mode, use the "long" keyword when specifying the compression method (--compress=zstd:long). Author: Justin Pryzby Reviewed-by: Tomas Vondra, Jacob Champion Discussion: https://postgr.es/m/[email protected] Discussion: https://postgr.es/m/[email protected]
1 parent 983ec23 commit 2820adf

File tree

12 files changed

+127
-6
lines changed

12 files changed

+127
-6
lines changed

doc/src/sgml/protocol.sgml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2729,7 +2729,8 @@ psql "dbname=postgres replication=database" -c "IDENTIFY_SYSTEM;"
27292729
level. Otherwise, it should be a comma-separated list of items,
27302730
each of the form <replaceable>keyword</replaceable> or
27312731
<replaceable>keyword=value</replaceable>. Currently, the supported
2732-
keywords are <literal>level</literal> and <literal>workers</literal>.
2732+
keywords are <literal>level</literal>, <literal>long</literal> and
2733+
<literal>workers</literal>.
27332734
</para>
27342735

27352736
<para>
@@ -2746,6 +2747,13 @@ psql "dbname=postgres replication=database" -c "IDENTIFY_SYSTEM;"
27462747
<literal>3</literal>).
27472748
</para>
27482749

2750+
<para>
2751+
The <literal>long</literal> keyword enables long-distance matching
2752+
mode, for improved compression ratio, at the expense of higher memory
2753+
use. Long-distance mode is supported only for
2754+
<literal>zstd</literal>.
2755+
</para>
2756+
27492757
<para>
27502758
The <literal>workers</literal> keyword sets the number of threads
27512759
that should be used for parallel compression. Parallel compression

doc/src/sgml/ref/pg_basebackup.sgml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -424,8 +424,8 @@ PostgreSQL documentation
424424
level. Otherwise, it should be a comma-separated list of items,
425425
each of the form <literal>keyword</literal> or
426426
<literal>keyword=value</literal>.
427-
Currently, the supported keywords are <literal>level</literal>
428-
and <literal>workers</literal>.
427+
Currently, the supported keywords are <literal>level</literal>,
428+
<literal>long</literal>, and <literal>workers</literal>.
429429
The detail string cannot be used when the compression method
430430
is specified as a plain integer.
431431
</para>

doc/src/sgml/ref/pg_dump.sgml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -681,6 +681,8 @@ PostgreSQL documentation
681681
as though it had been fed through <application>gzip</application>,
682682
<application>lz4</application>, or <application>zstd</application>;
683683
but the default is not to compress.
684+
With zstd compression, <literal>long</literal> mode may improve the
685+
compression ratio, at the cost of increased memory use.
684686
</para>
685687
<para>
686688
The tar archive format currently does not support compression at all.

src/backend/backup/basebackup_zstd.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,18 @@ bbsink_zstd_begin_backup(bbsink *sink)
118118
compress->workers, ZSTD_getErrorName(ret)));
119119
}
120120

121+
if ((compress->options & PG_COMPRESSION_OPTION_LONG_DISTANCE) != 0)
122+
{
123+
ret = ZSTD_CCtx_setParameter(mysink->cctx,
124+
ZSTD_c_enableLongDistanceMatching,
125+
compress->long_distance);
126+
if (ZSTD_isError(ret))
127+
ereport(ERROR,
128+
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
129+
errmsg("could not set compression flag for %s: %s",
130+
"long", ZSTD_getErrorName(ret)));
131+
}
132+
121133
/*
122134
* We need our own buffer, because we're going to pass different data to
123135
* the next sink than what gets passed to us.

src/bin/pg_basebackup/bbstreamer_zstd.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,19 @@ bbstreamer_zstd_compressor_new(bbstreamer *next, pg_compress_specification *comp
106106
compress->workers, ZSTD_getErrorName(ret));
107107
}
108108

109+
if ((compress->options & PG_COMPRESSION_OPTION_LONG_DISTANCE) != 0)
110+
{
111+
ret = ZSTD_CCtx_setParameter(streamer->cctx,
112+
ZSTD_c_enableLongDistanceMatching,
113+
compress->long_distance);
114+
if (ZSTD_isError(ret))
115+
{
116+
pg_log_error("could not set compression flag for %s: %s",
117+
"long", ZSTD_getErrorName(ret));
118+
exit(1);
119+
}
120+
}
121+
109122
/* Initialize the ZSTD output buffer. */
110123
streamer->zstd_outBuf.dst = streamer->base.bbs_buffer.data;
111124
streamer->zstd_outBuf.size = streamer->base.bbs_buffer.maxlen;

src/bin/pg_basebackup/t/010_pg_basebackup.pl

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,14 @@
139139
'gzip:workers=3',
140140
'invalid compression specification: compression algorithm "gzip" does not accept a worker count',
141141
'failure on worker count for gzip'
142-
],);
142+
],
143+
[
144+
'gzip:long',
145+
'invalid compression specification: compression algorithm "gzip" does not support long-distance mode',
146+
'failure on long mode for gzip'
147+
],
148+
);
149+
143150
for my $cft (@compression_failure_tests)
144151
{
145152
my $cfail = quotemeta($client_fails . $cft->[1]);

src/bin/pg_dump/compress_zstd.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,11 @@ _ZstdCStreamParams(pg_compress_specification compress)
8080
_Zstd_CCtx_setParam_or_die(cstream, ZSTD_c_compressionLevel,
8181
compress.level, "level");
8282

83+
if (compress.options & PG_COMPRESSION_OPTION_LONG_DISTANCE)
84+
_Zstd_CCtx_setParam_or_die(cstream,
85+
ZSTD_c_enableLongDistanceMatching,
86+
compress.long_distance, "long");
87+
8388
return cstream;
8489
}
8590

src/bin/pg_dump/t/002_pg_dump.pl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,11 +267,12 @@
267267
],
268268
},
269269

270+
# Exercise long mode for test coverage
270271
compression_zstd_plain => {
271272
test_key => 'compression',
272273
compile_option => 'zstd',
273274
dump_cmd => [
274-
'pg_dump', '--format=plain', '--compress=zstd',
275+
'pg_dump', '--format=plain', '--compress=zstd:long',
275276
"--file=$tempdir/compression_zstd_plain.sql.zst", 'postgres',
276277
],
277278
# Decompress the generated file to run through the tests.

src/bin/pg_verifybackup/t/008_untar.pl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,14 @@
4949
'decompress_program' => $ENV{'ZSTD'},
5050
'decompress_flags' => ['-d'],
5151
'enabled' => check_pg_config("#define USE_ZSTD 1")
52+
},
53+
{
54+
'compression_method' => 'zstd',
55+
'backup_flags' => [ '--compress', 'server-zstd:level=1,long' ],
56+
'backup_archive' => 'base.tar.zst',
57+
'decompress_program' => $ENV{'ZSTD'},
58+
'decompress_flags' => ['-d'],
59+
'enabled' => check_pg_config("#define USE_ZSTD 1")
5260
});
5361

5462
for my $tc (@test_configuration)

src/bin/pg_verifybackup/t/010_client_untar.pl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,14 @@
5050
'decompress_flags' => ['-d'],
5151
'enabled' => check_pg_config("#define USE_ZSTD 1")
5252
},
53+
{
54+
'compression_method' => 'zstd',
55+
'backup_flags' => ['--compress', 'client-zstd:level=1,long'],
56+
'backup_archive' => 'base.tar.zst',
57+
'decompress_program' => $ENV{'ZSTD'},
58+
'decompress_flags' => [ '-d' ],
59+
'enabled' => check_pg_config("#define USE_ZSTD 1")
60+
},
5361
{
5462
'compression_method' => 'parallel zstd',
5563
'backup_flags' => [ '--compress', 'client-zstd:workers=3' ],

0 commit comments

Comments
 (0)