Skip to content

Commit aef8948

Browse files
committed
Rework refactoring of hex and encoding routines
This commit addresses some issues with c3826f8 that moved the hex decoding routine to src/common/: - The decoding function lacked overflow checks, so when used for security-related features it was an open door to out-of-bound writes if not carefully used that could remain undetected. Like the base64 routines already in src/common/ used by SCRAM, this routine is reworked to check for overflows by having the size of the destination buffer passed as argument, with overflows checked before doing any writes. - The encoding routine was missing. This is moved to src/common/ and it gains the same overflow checks as the decoding part. On failure, the hex routines of src/common/ issue an error as per the discussion done to make them usable by frontend tools, but not by shared libraries. Note that this is why ECPG is left out of this commit, and it still includes a duplicated logic doing hex encoding and decoding. While on it, this commit uses better variable names for the source and destination buffers in the existing escape and base64 routines in encode.c and it makes them more robust to overflow detection. The previous core code issued a FATAL after doing out-of-bound writes if going through the SQL functions, which would be enough to detect problems when working on changes that impacted this area of the code. Instead, an error is issued before doing an out-of-bound write. The hex routines were being directly called for bytea conversions and backup manifests without such sanity checks. The current calls happen to not have any problems, but careless uses of such APIs could easily lead to CVE-class bugs. Author: Bruce Momjian, Michael Paquier Reviewed-by: Sehrope Sarkuni Discussion: https://postgr.es/m/[email protected]
1 parent 0d56acf commit aef8948

File tree

10 files changed

+304
-182
lines changed

10 files changed

+304
-182
lines changed

src/backend/replication/backup_manifest.c

+18-10
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@
1313
#include "postgres.h"
1414

1515
#include "access/timeline.h"
16+
#include "common/hex.h"
1617
#include "libpq/libpq.h"
1718
#include "libpq/pqformat.h"
1819
#include "mb/pg_wchar.h"
1920
#include "replication/backup_manifest.h"
20-
#include "utils/builtins.h"
2121
#include "utils/json.h"
2222

2323
static void AppendStringToManifest(backup_manifest_info *manifest, char *s);
@@ -150,10 +150,12 @@ AddFileToBackupManifest(backup_manifest_info *manifest, const char *spcoid,
150150
}
151151
else
152152
{
153+
uint64 dstlen = pg_hex_enc_len(pathlen);
154+
153155
appendStringInfoString(&buf, "{ \"Encoded-Path\": \"");
154-
enlargeStringInfo(&buf, 2 * pathlen);
155-
buf.len += hex_encode(pathname, pathlen,
156-
&buf.data[buf.len]);
156+
enlargeStringInfo(&buf, dstlen);
157+
buf.len += pg_hex_encode(pathname, pathlen,
158+
&buf.data[buf.len], dstlen);
157159
appendStringInfoString(&buf, "\", ");
158160
}
159161

@@ -176,6 +178,7 @@ AddFileToBackupManifest(backup_manifest_info *manifest, const char *spcoid,
176178
{
177179
uint8 checksumbuf[PG_CHECKSUM_MAX_LENGTH];
178180
int checksumlen;
181+
uint64 dstlen;
179182

180183
checksumlen = pg_checksum_final(checksum_ctx, checksumbuf);
181184
if (checksumlen < 0)
@@ -185,9 +188,10 @@ AddFileToBackupManifest(backup_manifest_info *manifest, const char *spcoid,
185188
appendStringInfo(&buf,
186189
", \"Checksum-Algorithm\": \"%s\", \"Checksum\": \"",
187190
pg_checksum_type_name(checksum_ctx->type));
188-
enlargeStringInfo(&buf, 2 * checksumlen);
189-
buf.len += hex_encode((char *) checksumbuf, checksumlen,
190-
&buf.data[buf.len]);
191+
dstlen = pg_hex_enc_len(checksumlen);
192+
enlargeStringInfo(&buf, dstlen);
193+
buf.len += pg_hex_encode((char *) checksumbuf, checksumlen,
194+
&buf.data[buf.len], dstlen);
191195
appendStringInfoChar(&buf, '"');
192196
}
193197

@@ -307,8 +311,9 @@ SendBackupManifest(backup_manifest_info *manifest)
307311
{
308312
StringInfoData protobuf;
309313
uint8 checksumbuf[PG_SHA256_DIGEST_LENGTH];
310-
char checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH];
314+
char *checksumstringbuf;
311315
size_t manifest_bytes_done = 0;
316+
uint64 dstlen;
312317

313318
if (!IsManifestEnabled(manifest))
314319
return;
@@ -328,8 +333,11 @@ SendBackupManifest(backup_manifest_info *manifest)
328333
if (pg_cryptohash_final(manifest->manifest_ctx, checksumbuf) < 0)
329334
elog(ERROR, "failed to finalize checksum of backup manifest");
330335
AppendStringToManifest(manifest, "\"Manifest-Checksum\": \"");
331-
hex_encode((char *) checksumbuf, sizeof checksumbuf, checksumstringbuf);
332-
checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH - 1] = '\0';
336+
dstlen = pg_hex_enc_len(PG_SHA256_DIGEST_LENGTH);
337+
checksumstringbuf = palloc0(dstlen + 1); /* includes \0 */
338+
pg_hex_encode((char *) checksumbuf, sizeof checksumbuf,
339+
checksumstringbuf, dstlen);
340+
checksumstringbuf[dstlen] = '\0';
333341
AppendStringToManifest(manifest, checksumstringbuf);
334342
AppendStringToManifest(manifest, "\"}\n");
335343

src/backend/utils/adt/encode.c

+56-40
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
#include <ctype.h>
1717

18-
#include "common/hex_decode.h"
18+
#include "common/hex.h"
1919
#include "mb/pg_wchar.h"
2020
#include "utils/builtins.h"
2121
#include "utils/memutils.h"
@@ -32,10 +32,12 @@
3232
*/
3333
struct pg_encoding
3434
{
35-
uint64 (*encode_len) (const char *data, size_t dlen);
36-
uint64 (*decode_len) (const char *data, size_t dlen);
37-
uint64 (*encode) (const char *data, size_t dlen, char *res);
38-
uint64 (*decode) (const char *data, size_t dlen, char *res);
35+
uint64 (*encode_len) (const char *src, size_t srclen);
36+
uint64 (*decode_len) (const char *src, size_t srclen);
37+
uint64 (*encode) (const char *src, size_t srclen,
38+
char *dst, size_t dstlen);
39+
uint64 (*decode) (const char *src, size_t srclen,
40+
char *dst, size_t dstlen);
3941
};
4042

4143
static const struct pg_encoding *pg_find_encoding(const char *name);
@@ -81,11 +83,7 @@ binary_encode(PG_FUNCTION_ARGS)
8183

8284
result = palloc(VARHDRSZ + resultlen);
8385

84-
res = enc->encode(dataptr, datalen, VARDATA(result));
85-
86-
/* Make this FATAL 'cause we've trodden on memory ... */
87-
if (res > resultlen)
88-
elog(FATAL, "overflow - encode estimate too small");
86+
res = enc->encode(dataptr, datalen, VARDATA(result), resultlen);
8987

9088
SET_VARSIZE(result, VARHDRSZ + res);
9189

@@ -129,11 +127,7 @@ binary_decode(PG_FUNCTION_ARGS)
129127

130128
result = palloc(VARHDRSZ + resultlen);
131129

132-
res = enc->decode(dataptr, datalen, VARDATA(result));
133-
134-
/* Make this FATAL 'cause we've trodden on memory ... */
135-
if (res > resultlen)
136-
elog(FATAL, "overflow - decode estimate too small");
130+
res = enc->decode(dataptr, datalen, VARDATA(result), resultlen);
137131

138132
SET_VARSIZE(result, VARHDRSZ + res);
139133

@@ -145,32 +139,20 @@ binary_decode(PG_FUNCTION_ARGS)
145139
* HEX
146140
*/
147141

148-
static const char hextbl[] = "0123456789abcdef";
149-
150-
uint64
151-
hex_encode(const char *src, size_t len, char *dst)
152-
{
153-
const char *end = src + len;
154-
155-
while (src < end)
156-
{
157-
*dst++ = hextbl[(*src >> 4) & 0xF];
158-
*dst++ = hextbl[*src & 0xF];
159-
src++;
160-
}
161-
return (uint64) len * 2;
162-
}
163-
142+
/*
143+
* Those two wrappers are still needed to match with the layer of
144+
* src/common/.
145+
*/
164146
static uint64
165147
hex_enc_len(const char *src, size_t srclen)
166148
{
167-
return (uint64) srclen << 1;
149+
return pg_hex_enc_len(srclen);
168150
}
169151

170152
static uint64
171153
hex_dec_len(const char *src, size_t srclen)
172154
{
173-
return (uint64) srclen >> 1;
155+
return pg_hex_dec_len(srclen);
174156
}
175157

176158
/*
@@ -192,12 +174,12 @@ static const int8 b64lookup[128] = {
192174
};
193175

194176
static uint64
195-
pg_base64_encode(const char *src, size_t len, char *dst)
177+
pg_base64_encode(const char *src, size_t srclen, char *dst, size_t dstlen)
196178
{
197179
char *p,
198180
*lend = dst + 76;
199181
const char *s,
200-
*end = src + len;
182+
*end = src + srclen;
201183
int pos = 2;
202184
uint32 buf = 0;
203185

@@ -213,6 +195,8 @@ pg_base64_encode(const char *src, size_t len, char *dst)
213195
/* write it out */
214196
if (pos < 0)
215197
{
198+
if ((p - dst + 4) > dstlen)
199+
elog(ERROR, "overflow of destination buffer in base64 encoding");
216200
*p++ = _base64[(buf >> 18) & 0x3f];
217201
*p++ = _base64[(buf >> 12) & 0x3f];
218202
*p++ = _base64[(buf >> 6) & 0x3f];
@@ -223,25 +207,30 @@ pg_base64_encode(const char *src, size_t len, char *dst)
223207
}
224208
if (p >= lend)
225209
{
210+
if ((p - dst + 1) > dstlen)
211+
elog(ERROR, "overflow of destination buffer in base64 encoding");
226212
*p++ = '\n';
227213
lend = p + 76;
228214
}
229215
}
230216
if (pos != 2)
231217
{
218+
if ((p - dst + 4) > dstlen)
219+
elog(ERROR, "overflow of destination buffer in base64 encoding");
232220
*p++ = _base64[(buf >> 18) & 0x3f];
233221
*p++ = _base64[(buf >> 12) & 0x3f];
234222
*p++ = (pos == 0) ? _base64[(buf >> 6) & 0x3f] : '=';
235223
*p++ = '=';
236224
}
237225

226+
Assert((p - dst) <= dstlen);
238227
return p - dst;
239228
}
240229

241230
static uint64
242-
pg_base64_decode(const char *src, size_t len, char *dst)
231+
pg_base64_decode(const char *src, size_t srclen, char *dst, size_t dstlen)
243232
{
244-
const char *srcend = src + len,
233+
const char *srcend = src + srclen,
245234
*s = src;
246235
char *p = dst;
247236
char c;
@@ -289,11 +278,21 @@ pg_base64_decode(const char *src, size_t len, char *dst)
289278
pos++;
290279
if (pos == 4)
291280
{
281+
if ((p - dst + 1) > dstlen)
282+
elog(ERROR, "overflow of destination buffer in base64 decoding");
292283
*p++ = (buf >> 16) & 255;
293284
if (end == 0 || end > 1)
285+
{
286+
if ((p - dst + 1) > dstlen)
287+
elog(ERROR, "overflow of destination buffer in base64 decoding");
294288
*p++ = (buf >> 8) & 255;
289+
}
295290
if (end == 0 || end > 2)
291+
{
292+
if ((p - dst + 1) > dstlen)
293+
elog(ERROR, "overflow of destination buffer in base64 decoding");
296294
*p++ = buf & 255;
295+
}
297296
buf = 0;
298297
pos = 0;
299298
}
@@ -305,6 +304,7 @@ pg_base64_decode(const char *src, size_t len, char *dst)
305304
errmsg("invalid base64 end sequence"),
306305
errhint("Input data is missing padding, is truncated, or is otherwise corrupted.")));
307306

307+
Assert((p - dst) <= dstlen);
308308
return p - dst;
309309
}
310310

@@ -340,7 +340,7 @@ pg_base64_dec_len(const char *src, size_t srclen)
340340
#define DIG(VAL) ((VAL) + '0')
341341

342342
static uint64
343-
esc_encode(const char *src, size_t srclen, char *dst)
343+
esc_encode(const char *src, size_t srclen, char *dst, size_t dstlen)
344344
{
345345
const char *end = src + srclen;
346346
char *rp = dst;
@@ -352,6 +352,8 @@ esc_encode(const char *src, size_t srclen, char *dst)
352352

353353
if (c == '\0' || IS_HIGHBIT_SET(c))
354354
{
355+
if ((rp - dst + 4) > dstlen)
356+
elog(ERROR, "overflow of destination buffer in escape encoding");
355357
rp[0] = '\\';
356358
rp[1] = DIG(c >> 6);
357359
rp[2] = DIG((c >> 3) & 7);
@@ -361,25 +363,30 @@ esc_encode(const char *src, size_t srclen, char *dst)
361363
}
362364
else if (c == '\\')
363365
{
366+
if ((rp - dst + 2) > dstlen)
367+
elog(ERROR, "overflow of destination buffer in escape encoding");
364368
rp[0] = '\\';
365369
rp[1] = '\\';
366370
rp += 2;
367371
len += 2;
368372
}
369373
else
370374
{
375+
if ((rp - dst + 1) > dstlen)
376+
elog(ERROR, "overflow of destination buffer in escape encoding");
371377
*rp++ = c;
372378
len++;
373379
}
374380

375381
src++;
376382
}
377383

384+
Assert((rp - dst) <= dstlen);
378385
return len;
379386
}
380387

381388
static uint64
382-
esc_decode(const char *src, size_t srclen, char *dst)
389+
esc_decode(const char *src, size_t srclen, char *dst, size_t dstlen)
383390
{
384391
const char *end = src + srclen;
385392
char *rp = dst;
@@ -388,7 +395,11 @@ esc_decode(const char *src, size_t srclen, char *dst)
388395
while (src < end)
389396
{
390397
if (src[0] != '\\')
398+
{
399+
if ((rp - dst + 1) > dstlen)
400+
elog(ERROR, "overflow of destination buffer in escape decoding");
391401
*rp++ = *src++;
402+
}
392403
else if (src + 3 < end &&
393404
(src[1] >= '0' && src[1] <= '3') &&
394405
(src[2] >= '0' && src[2] <= '7') &&
@@ -400,12 +411,16 @@ esc_decode(const char *src, size_t srclen, char *dst)
400411
val <<= 3;
401412
val += VAL(src[2]);
402413
val <<= 3;
414+
if ((rp - dst + 1) > dstlen)
415+
elog(ERROR, "overflow of destination buffer in escape decoding");
403416
*rp++ = val + VAL(src[3]);
404417
src += 4;
405418
}
406419
else if (src + 1 < end &&
407420
(src[1] == '\\'))
408421
{
422+
if ((rp - dst + 1) > dstlen)
423+
elog(ERROR, "overflow of destination buffer in escape decoding");
409424
*rp++ = '\\';
410425
src += 2;
411426
}
@@ -423,6 +438,7 @@ esc_decode(const char *src, size_t srclen, char *dst)
423438
len++;
424439
}
425440

441+
Assert((rp - dst) <= dstlen);
426442
return len;
427443
}
428444

@@ -504,7 +520,7 @@ static const struct
504520
{
505521
"hex",
506522
{
507-
hex_enc_len, hex_dec_len, hex_encode, hex_decode
523+
hex_enc_len, hex_dec_len, pg_hex_encode, pg_hex_decode
508524
}
509525
},
510526
{

src/backend/utils/adt/varlena.c

+11-5
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@
2121
#include "catalog/pg_collation.h"
2222
#include "catalog/pg_type.h"
2323
#include "common/hashfn.h"
24+
#include "common/hex.h"
2425
#include "common/int.h"
25-
#include "common/hex_decode.h"
2626
#include "common/unicode_norm.h"
2727
#include "lib/hyperloglog.h"
2828
#include "libpq/pqformat.h"
@@ -304,10 +304,12 @@ byteain(PG_FUNCTION_ARGS)
304304
if (inputText[0] == '\\' && inputText[1] == 'x')
305305
{
306306
size_t len = strlen(inputText);
307+
uint64 dstlen = pg_hex_dec_len(len - 2);
307308

308-
bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */
309+
bc = dstlen + VARHDRSZ; /* maximum possible length */
309310
result = palloc(bc);
310-
bc = hex_decode(inputText + 2, len - 2, VARDATA(result));
311+
312+
bc = pg_hex_decode(inputText + 2, len - 2, VARDATA(result), dstlen);
311313
SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
312314

313315
PG_RETURN_BYTEA_P(result);
@@ -396,11 +398,15 @@ byteaout(PG_FUNCTION_ARGS)
396398

397399
if (bytea_output == BYTEA_OUTPUT_HEX)
398400
{
401+
uint64 dstlen = pg_hex_enc_len(VARSIZE_ANY_EXHDR(vlena));
402+
399403
/* Print hex format */
400-
rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
404+
rp = result = palloc(dstlen + 2 + 1);
401405
*rp++ = '\\';
402406
*rp++ = 'x';
403-
rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
407+
408+
rp += pg_hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp,
409+
dstlen);
404410
}
405411
else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
406412
{

0 commit comments

Comments
 (0)