Skip to content

Commit 03c811a

Browse files
committed
Fix planner's test for case-foldable characters in ILIKE with ICU.
As coded, the ICU-collation path in pattern_char_isalpha() failed to consider regular ASCII letters to be case-varying. This led to like_fixed_prefix treating too much of an ILIKE pattern as being a fixed prefix, so that indexscans derived from an ILIKE clause might miss entries that they should find. Per bug #15892 from James Inform. This is an oversight in the original ICU patch (commit eccfef8), so back-patch to v10 where that came in. Discussion: https://postgr.es/m/[email protected]
1 parent 3c92658 commit 03c811a

File tree

3 files changed

+50
-5
lines changed

3 files changed

+50
-5
lines changed

src/backend/utils/adt/like_support.c

+5-3
Original file line numberDiff line numberDiff line change
@@ -1437,8 +1437,9 @@ regex_selectivity(const char *patt, int pattlen, bool case_insensitive,
14371437
* Check whether char is a letter (and, hence, subject to case-folding)
14381438
*
14391439
* In multibyte character sets or with ICU, we can't use isalpha, and it does
1440-
* not seem worth trying to convert to wchar_t to use iswalpha. Instead, just
1441-
* assume any multibyte char is potentially case-varying.
1440+
* not seem worth trying to convert to wchar_t to use iswalpha or u_isalpha.
1441+
* Instead, just assume any non-ASCII char is potentially case-varying, and
1442+
* hard-wire knowledge of which ASCII chars are letters.
14421443
*/
14431444
static int
14441445
pattern_char_isalpha(char c, bool is_multibyte,
@@ -1449,7 +1450,8 @@ pattern_char_isalpha(char c, bool is_multibyte,
14491450
else if (is_multibyte && IS_HIGHBIT_SET(c))
14501451
return true;
14511452
else if (locale && locale->provider == COLLPROVIDER_ICU)
1452-
return IS_HIGHBIT_SET(c) ? true : false;
1453+
return IS_HIGHBIT_SET(c) ||
1454+
(c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
14531455
#ifdef HAVE_LOCALE_T
14541456
else if (locale && locale->provider == COLLPROVIDER_LIBC)
14551457
return isalpha_l((unsigned char) c, locale->info.lt);

src/test/regress/expected/collate.icu.utf8.out

+34-1
Original file line numberDiff line numberDiff line change
@@ -983,6 +983,38 @@ SELECT relname, pg_get_indexdef(oid) FROM pg_class WHERE relname LIKE 'collate_t
983983
collate_test1_idx4 | CREATE INDEX collate_test1_idx4 ON collate_tests.collate_test1 USING btree (((b || 'foo'::text)) COLLATE "POSIX")
984984
(4 rows)
985985

986+
set enable_seqscan = off;
987+
explain (costs off)
988+
select * from collate_test1 where b ilike 'abc';
989+
QUERY PLAN
990+
-------------------------------
991+
Seq Scan on collate_test1
992+
Filter: (b ~~* 'abc'::text)
993+
(2 rows)
994+
995+
select * from collate_test1 where b ilike 'abc';
996+
a | b
997+
---+-----
998+
1 | abc
999+
4 | ABC
1000+
(2 rows)
1001+
1002+
explain (costs off)
1003+
select * from collate_test1 where b ilike 'ABC';
1004+
QUERY PLAN
1005+
-------------------------------
1006+
Seq Scan on collate_test1
1007+
Filter: (b ~~* 'ABC'::text)
1008+
(2 rows)
1009+
1010+
select * from collate_test1 where b ilike 'ABC';
1011+
a | b
1012+
---+-----
1013+
1 | abc
1014+
4 | ABC
1015+
(2 rows)
1016+
1017+
reset enable_seqscan;
9861018
-- schema manipulation commands
9871019
CREATE ROLE regress_test_role;
9881020
CREATE SCHEMA test_schema;
@@ -1867,8 +1899,9 @@ SELECT (SELECT count(*) FROM test33_0) <> (SELECT count(*) FROM test33_1);
18671899
(1 row)
18681900

18691901
-- cleanup
1902+
RESET search_path;
18701903
SET client_min_messages TO warning;
18711904
DROP SCHEMA collate_tests CASCADE;
1872-
RESET search_path;
1905+
RESET client_min_messages;
18731906
-- leave a collation for pg_upgrade test
18741907
CREATE COLLATION coll_icu_upgrade FROM "und-x-icu";

src/test/regress/sql/collate.icu.utf8.sql

+11-1
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,15 @@ CREATE INDEX collate_test1_idx6 ON collate_test1 ((a COLLATE "C")); -- fail
341341

342342
SELECT relname, pg_get_indexdef(oid) FROM pg_class WHERE relname LIKE 'collate_test%_idx%' ORDER BY 1;
343343

344+
set enable_seqscan = off;
345+
explain (costs off)
346+
select * from collate_test1 where b ilike 'abc';
347+
select * from collate_test1 where b ilike 'abc';
348+
explain (costs off)
349+
select * from collate_test1 where b ilike 'ABC';
350+
select * from collate_test1 where b ilike 'ABC';
351+
reset enable_seqscan;
352+
344353

345354
-- schema manipulation commands
346355

@@ -712,9 +721,10 @@ SELECT (SELECT count(*) FROM test33_0) <> (SELECT count(*) FROM test33_1);
712721

713722

714723
-- cleanup
724+
RESET search_path;
715725
SET client_min_messages TO warning;
716726
DROP SCHEMA collate_tests CASCADE;
717-
RESET search_path;
727+
RESET client_min_messages;
718728

719729
-- leave a collation for pg_upgrade test
720730
CREATE COLLATION coll_icu_upgrade FROM "und-x-icu";

0 commit comments

Comments
 (0)