Skip to content

Commit be8b06c

Browse files
committed
Revoke support for strxfrm() that write past the specified array length.
This formalizes a decision implicit in commit 4ea51cd and adds clean detection of affected systems. Vendor updates are available for each such known bug. Back-patch to 9.5, where the aforementioned commit first appeared.
1 parent b28244a commit be8b06c

File tree

5 files changed

+70
-10
lines changed

5 files changed

+70
-10
lines changed

src/backend/main/main.c

+2
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,8 @@ main(int argc, char *argv[])
149149
*/
150150
unsetenv("LC_ALL");
151151

152+
check_strxfrm_bug();
153+
152154
/*
153155
* Catch standard options before doing much else, in particular before we
154156
* insist on not being root.

src/backend/utils/adt/pg_locale.c

+58
Original file line numberDiff line numberDiff line change
@@ -854,6 +854,64 @@ IsoLocaleName(const char *winlocname)
854854
#endif /* WIN32 && LC_MESSAGES */
855855

856856

857+
/*
858+
* Detect aging strxfrm() implementations that, in a subset of locales, write
859+
* past the specified buffer length. Affected users must update OS packages
860+
* before using PostgreSQL 9.5 or later.
861+
*
862+
* Assume that the bug can come and go from one postmaster startup to another
863+
* due to physical replication among diverse machines. Assume that the bug's
864+
* presence will not change during the life of a particular postmaster. Given
865+
* those assumptions, call this no less than once per postmaster startup per
866+
* LC_COLLATE setting used. No known-affected system offers strxfrm_l(), so
867+
* there is no need to consider pg_collation locales.
868+
*/
869+
void
870+
check_strxfrm_bug(void)
871+
{
872+
char buf[32];
873+
const int canary = 0x7F;
874+
bool ok = true;
875+
876+
/*
877+
* Given a two-byte ASCII string and length limit 7, 8 or 9, Solaris 10
878+
* 05/08 returns 18 and modifies 10 bytes. It respects limits above or
879+
* below that range.
880+
*
881+
* The bug is present in Solaris 8 as well; it is absent in Solaris 10
882+
* 01/13 and Solaris 11.2. Affected locales include is_IS.ISO8859-1,
883+
* en_US.UTF-8, en_US.ISO8859-1, and ru_RU.KOI8-R. Unaffected locales
884+
* include de_DE.UTF-8, de_DE.ISO8859-1, zh_TW.UTF-8, and C.
885+
*/
886+
buf[7] = canary;
887+
(void) strxfrm(buf, "ab", 7);
888+
if (buf[7] != canary)
889+
ok = false;
890+
891+
/*
892+
* illumos bug #1594 was present in the source tree from 2010-10-11 to
893+
* 2012-02-01. Given an ASCII string of any length and length limit 1,
894+
* affected systems ignore the length limit and modify a number of bytes
895+
* one less than the return value. The problem inputs for this bug do not
896+
* overlap those for the Solaris bug, hence a distinct test.
897+
*
898+
* Affected systems include smartos-20110926T021612Z. Affected locales
899+
* include en_US.ISO8859-1 and en_US.UTF-8. Unaffected locales include C.
900+
*/
901+
buf[1] = canary;
902+
(void) strxfrm(buf, "a", 1);
903+
if (buf[1] != canary)
904+
ok = false;
905+
906+
if (!ok)
907+
ereport(ERROR,
908+
(errcode(ERRCODE_SYSTEM_ERROR),
909+
errmsg_internal("strxfrm(), in locale \"%s\", writes past the specified array length",
910+
setlocale(LC_COLLATE, NULL)),
911+
errhint("Apply system library package updates.")));
912+
}
913+
914+
857915
/*
858916
* Cache mechanism for collation information.
859917
*

src/backend/utils/adt/selfuncs.c

+7-10
Original file line numberDiff line numberDiff line change
@@ -3932,16 +3932,8 @@ convert_string_datum(Datum value, Oid typid)
39323932
size_t xfrmlen2 PG_USED_FOR_ASSERTS_ONLY;
39333933

39343934
/*
3935-
* Note: originally we guessed at a suitable output buffer size, and
3936-
* only needed to call strxfrm twice if our guess was too small.
3937-
* However, it seems that some versions of Solaris have buggy strxfrm
3938-
* that can write past the specified buffer length in that scenario.
3939-
* So, do it the dumb way for portability.
3940-
*
3941-
* Yet other systems (e.g., glibc) sometimes return a smaller value
3942-
* from the second call than the first; thus the Assert must be <= not
3943-
* == as you'd expect. Can't any of these people program their way
3944-
* out of a paper bag?
3935+
* XXX: We could guess at a suitable output buffer size and only call
3936+
* strxfrm twice if our guess is too small.
39453937
*
39463938
* XXX: strxfrm doesn't support UTF-8 encoding on Win32, it can return
39473939
* bogus data or set an error. This is not really a problem unless it
@@ -3974,6 +3966,11 @@ convert_string_datum(Datum value, Oid typid)
39743966
#endif
39753967
xfrmstr = (char *) palloc(xfrmlen + 1);
39763968
xfrmlen2 = strxfrm(xfrmstr, val, xfrmlen + 1);
3969+
3970+
/*
3971+
* Some systems (e.g., glibc) can return a smaller value from the
3972+
* second call than the first; thus the Assert must be <= not ==.
3973+
*/
39773974
Assert(xfrmlen2 <= xfrmlen);
39783975
pfree(val);
39793976
val = xfrmstr;

src/backend/utils/init/postinit.c

+2
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,8 @@ CheckMyDatabase(const char *name, bool am_superuser)
391391
SetConfigOption("lc_collate", collate, PGC_INTERNAL, PGC_S_OVERRIDE);
392392
SetConfigOption("lc_ctype", ctype, PGC_INTERNAL, PGC_S_OVERRIDE);
393393

394+
check_strxfrm_bug();
395+
394396
ReleaseSysCache(tup);
395397
}
396398

src/include/utils/pg_locale.h

+1
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ extern void assign_locale_time(const char *newval, void *extra);
4444

4545
extern bool check_locale(int category, const char *locale, char **canonname);
4646
extern char *pg_perm_setlocale(int category, const char *locale);
47+
extern void check_strxfrm_bug(void);
4748

4849
extern bool lc_collate_is_c(Oid collation);
4950
extern bool lc_ctype_is_c(Oid collation);

0 commit comments

Comments
 (0)