summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Korotkov2020-11-15 05:52:12 +0000
committerAlexander Korotkov2020-11-15 05:52:35 +0000
commit935f6666502250abde8615bc7805a6e5aa05a066 (patch)
tree7a7d841a385fa0c310061a5b45f95af4965e0598
parent92bf7e2d027466d750b4ac5b026f6f4ac29be881 (diff)
Handle equality operator in contrib/pg_trgm
Obviously, in order to equality operator be satisfiable, target string must contain all the trigrams of the search string. On this base, we implement equality operator in GiST/GIN indexes with recheck. Discussion: https://postgr.es/m/CAOBaU_YWwtT7tdggtROacjdOdeYHCz-tmSwuC-j-TOG-g97J0w%40mail.gmail.com Author: Julien Rouhaud Reviewed-by: Tom Lane, Alexander Korotkov, Georgios Kokolatos, Erik Rijkers
-rw-r--r--contrib/pg_trgm/Makefile2
-rw-r--r--contrib/pg_trgm/expected/pg_trgm.out204
-rw-r--r--contrib/pg_trgm/pg_trgm--1.5--1.6.sql10
-rw-r--r--contrib/pg_trgm/pg_trgm.control2
-rw-r--r--contrib/pg_trgm/sql/pg_trgm.sql40
-rw-r--r--contrib/pg_trgm/trgm.h1
-rw-r--r--contrib/pg_trgm/trgm_gin.c3
-rw-r--r--contrib/pg_trgm/trgm_gist.c4
-rw-r--r--doc/src/sgml/pgtrgm.sgml8
9 files changed, 264 insertions, 10 deletions
diff --git a/contrib/pg_trgm/Makefile b/contrib/pg_trgm/Makefile
index d75e9ada2e4..1fbdc9ec1ef 100644
--- a/contrib/pg_trgm/Makefile
+++ b/contrib/pg_trgm/Makefile
@@ -9,7 +9,7 @@ OBJS = \
trgm_regexp.o
EXTENSION = pg_trgm
-DATA = pg_trgm--1.4--1.5.sql pg_trgm--1.3--1.4.sql \
+DATA = pg_trgm--1.5--1.6.sql pg_trgm--1.4--1.5.sql pg_trgm--1.3--1.4.sql \
pg_trgm--1.3.sql pg_trgm--1.2--1.3.sql pg_trgm--1.1--1.2.sql \
pg_trgm--1.0--1.1.sql
PGFILEDESC = "pg_trgm - trigram matching"
diff --git a/contrib/pg_trgm/expected/pg_trgm.out b/contrib/pg_trgm/expected/pg_trgm.out
index 923c326c7bd..20141ce7f3d 100644
--- a/contrib/pg_trgm/expected/pg_trgm.out
+++ b/contrib/pg_trgm/expected/pg_trgm.out
@@ -4761,6 +4761,12 @@ insert into test2 values ('abcdef');
insert into test2 values ('quark');
insert into test2 values (' z foo bar');
insert into test2 values ('/123/-45/');
+insert into test2 values ('line 1');
+insert into test2 values ('%line 2');
+insert into test2 values ('line 3%');
+insert into test2 values ('%line 4%');
+insert into test2 values ('%li%ne 5%');
+insert into test2 values ('li_e 6');
create index test2_idx_gin on test2 using gin (t gin_trgm_ops);
set enable_seqscan=off;
explain (costs off)
@@ -4863,7 +4869,13 @@ select * from test2 where t ~ '(abc)*$';
quark
z foo bar
/123/-45/
-(4 rows)
+ line 1
+ %line 2
+ line 3%
+ %line 4%
+ %li%ne 5%
+ li_e 6
+(10 rows)
select * from test2 where t ~* 'DEF';
t
@@ -4918,7 +4930,11 @@ select * from test2 where t ~ '[a-z]{3}';
abcdef
quark
z foo bar
-(3 rows)
+ line 1
+ %line 2
+ line 3%
+ %line 4%
+(7 rows)
select * from test2 where t ~* '(a{10}|b{10}|c{10}){10}';
t
@@ -4961,6 +4977,93 @@ select * from test2 where t ~ '/\d+/-\d';
/123/-45/
(1 row)
+-- test = operator
+explain (costs off)
+ select * from test2 where t = 'abcdef';
+ QUERY PLAN
+------------------------------------------
+ Bitmap Heap Scan on test2
+ Recheck Cond: (t = 'abcdef'::text)
+ -> Bitmap Index Scan on test2_idx_gin
+ Index Cond: (t = 'abcdef'::text)
+(4 rows)
+
+select * from test2 where t = 'abcdef';
+ t
+--------
+ abcdef
+(1 row)
+
+explain (costs off)
+ select * from test2 where t = '%line%';
+ QUERY PLAN
+------------------------------------------
+ Bitmap Heap Scan on test2
+ Recheck Cond: (t = '%line%'::text)
+ -> Bitmap Index Scan on test2_idx_gin
+ Index Cond: (t = '%line%'::text)
+(4 rows)
+
+select * from test2 where t = '%line%';
+ t
+---
+(0 rows)
+
+select * from test2 where t = 'li_e 1';
+ t
+---
+(0 rows)
+
+select * from test2 where t = '%line 2';
+ t
+---------
+ %line 2
+(1 row)
+
+select * from test2 where t = 'line 3%';
+ t
+---------
+ line 3%
+(1 row)
+
+select * from test2 where t = '%line 3%';
+ t
+---
+(0 rows)
+
+select * from test2 where t = '%line 4%';
+ t
+----------
+ %line 4%
+(1 row)
+
+select * from test2 where t = '%line 5%';
+ t
+---
+(0 rows)
+
+select * from test2 where t = '%li_ne 5%';
+ t
+---
+(0 rows)
+
+select * from test2 where t = '%li%ne 5%';
+ t
+-----------
+ %li%ne 5%
+(1 row)
+
+select * from test2 where t = 'line 6';
+ t
+---
+(0 rows)
+
+select * from test2 where t = 'li_e 6';
+ t
+--------
+ li_e 6
+(1 row)
+
drop index test2_idx_gin;
create index test2_idx_gist on test2 using gist (t gist_trgm_ops);
set enable_seqscan=off;
@@ -5056,7 +5159,13 @@ select * from test2 where t ~ '(abc)*$';
quark
z foo bar
/123/-45/
-(4 rows)
+ line 1
+ %line 2
+ line 3%
+ %line 4%
+ %li%ne 5%
+ li_e 6
+(10 rows)
select * from test2 where t ~* 'DEF';
t
@@ -5111,7 +5220,11 @@ select * from test2 where t ~ '[a-z]{3}';
abcdef
quark
z foo bar
-(3 rows)
+ line 1
+ %line 2
+ line 3%
+ %line 4%
+(7 rows)
select * from test2 where t ~* '(a{10}|b{10}|c{10}){10}';
t
@@ -5154,6 +5267,89 @@ select * from test2 where t ~ '/\d+/-\d';
/123/-45/
(1 row)
+-- test = operator
+explain (costs off)
+ select * from test2 where t = 'abcdef';
+ QUERY PLAN
+------------------------------------------
+ Index Scan using test2_idx_gist on test2
+ Index Cond: (t = 'abcdef'::text)
+(2 rows)
+
+select * from test2 where t = 'abcdef';
+ t
+--------
+ abcdef
+(1 row)
+
+explain (costs off)
+ select * from test2 where t = '%line%';
+ QUERY PLAN
+------------------------------------------
+ Index Scan using test2_idx_gist on test2
+ Index Cond: (t = '%line%'::text)
+(2 rows)
+
+select * from test2 where t = '%line%';
+ t
+---
+(0 rows)
+
+select * from test2 where t = 'li_e 1';
+ t
+---
+(0 rows)
+
+select * from test2 where t = '%line 2';
+ t
+---------
+ %line 2
+(1 row)
+
+select * from test2 where t = 'line 3%';
+ t
+---------
+ line 3%
+(1 row)
+
+select * from test2 where t = '%line 3%';
+ t
+---
+(0 rows)
+
+select * from test2 where t = '%line 4%';
+ t
+----------
+ %line 4%
+(1 row)
+
+select * from test2 where t = '%line 5%';
+ t
+---
+(0 rows)
+
+select * from test2 where t = '%li_ne 5%';
+ t
+---
+(0 rows)
+
+select * from test2 where t = '%li%ne 5%';
+ t
+-----------
+ %li%ne 5%
+(1 row)
+
+select * from test2 where t = 'line 6';
+ t
+---
+(0 rows)
+
+select * from test2 where t = 'li_e 6';
+ t
+--------
+ li_e 6
+(1 row)
+
-- Check similarity threshold (bug #14202)
CREATE TEMP TABLE restaurants (city text);
INSERT INTO restaurants SELECT 'Warsaw' FROM generate_series(1, 10000);
diff --git a/contrib/pg_trgm/pg_trgm--1.5--1.6.sql b/contrib/pg_trgm/pg_trgm--1.5--1.6.sql
new file mode 100644
index 00000000000..9e74684eadd
--- /dev/null
+++ b/contrib/pg_trgm/pg_trgm--1.5--1.6.sql
@@ -0,0 +1,10 @@
+/* contrib/pg_trgm/pg_trgm--1.5--1.6.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION pg_trgm UPDATE TO '1.6'" to load this file. \quit
+
+ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD
+ OPERATOR 11 pg_catalog.= (text, text);
+
+ALTER OPERATOR FAMILY gist_trgm_ops USING gist ADD
+ OPERATOR 11 pg_catalog.= (text, text);
diff --git a/contrib/pg_trgm/pg_trgm.control b/contrib/pg_trgm/pg_trgm.control
index ed4487e96b2..1d6a9ddf259 100644
--- a/contrib/pg_trgm/pg_trgm.control
+++ b/contrib/pg_trgm/pg_trgm.control
@@ -1,6 +1,6 @@
# pg_trgm extension
comment = 'text similarity measurement and index searching based on trigrams'
-default_version = '1.5'
+default_version = '1.6'
module_pathname = '$libdir/pg_trgm'
relocatable = true
trusted = true
diff --git a/contrib/pg_trgm/sql/pg_trgm.sql b/contrib/pg_trgm/sql/pg_trgm.sql
index bc2a6d525cc..6a9da24d5a7 100644
--- a/contrib/pg_trgm/sql/pg_trgm.sql
+++ b/contrib/pg_trgm/sql/pg_trgm.sql
@@ -101,6 +101,12 @@ insert into test2 values ('abcdef');
insert into test2 values ('quark');
insert into test2 values (' z foo bar');
insert into test2 values ('/123/-45/');
+insert into test2 values ('line 1');
+insert into test2 values ('%line 2');
+insert into test2 values ('line 3%');
+insert into test2 values ('%line 4%');
+insert into test2 values ('%li%ne 5%');
+insert into test2 values ('li_e 6');
create index test2_idx_gin on test2 using gin (t gin_trgm_ops);
set enable_seqscan=off;
explain (costs off)
@@ -137,6 +143,23 @@ select * from test2 where t ~ ' z foo bar';
select * from test2 where t ~ ' z foo';
select * from test2 where t ~ 'qua(?!foo)';
select * from test2 where t ~ '/\d+/-\d';
+-- test = operator
+explain (costs off)
+ select * from test2 where t = 'abcdef';
+select * from test2 where t = 'abcdef';
+explain (costs off)
+ select * from test2 where t = '%line%';
+select * from test2 where t = '%line%';
+select * from test2 where t = 'li_e 1';
+select * from test2 where t = '%line 2';
+select * from test2 where t = 'line 3%';
+select * from test2 where t = '%line 3%';
+select * from test2 where t = '%line 4%';
+select * from test2 where t = '%line 5%';
+select * from test2 where t = '%li_ne 5%';
+select * from test2 where t = '%li%ne 5%';
+select * from test2 where t = 'line 6';
+select * from test2 where t = 'li_e 6';
drop index test2_idx_gin;
create index test2_idx_gist on test2 using gist (t gist_trgm_ops);
@@ -175,6 +198,23 @@ select * from test2 where t ~ ' z foo bar';
select * from test2 where t ~ ' z foo';
select * from test2 where t ~ 'qua(?!foo)';
select * from test2 where t ~ '/\d+/-\d';
+-- test = operator
+explain (costs off)
+ select * from test2 where t = 'abcdef';
+select * from test2 where t = 'abcdef';
+explain (costs off)
+ select * from test2 where t = '%line%';
+select * from test2 where t = '%line%';
+select * from test2 where t = 'li_e 1';
+select * from test2 where t = '%line 2';
+select * from test2 where t = 'line 3%';
+select * from test2 where t = '%line 3%';
+select * from test2 where t = '%line 4%';
+select * from test2 where t = '%line 5%';
+select * from test2 where t = '%li_ne 5%';
+select * from test2 where t = '%li%ne 5%';
+select * from test2 where t = 'line 6';
+select * from test2 where t = 'li_e 6';
-- Check similarity threshold (bug #14202)
diff --git a/contrib/pg_trgm/trgm.h b/contrib/pg_trgm/trgm.h
index b616953462e..405a1d95528 100644
--- a/contrib/pg_trgm/trgm.h
+++ b/contrib/pg_trgm/trgm.h
@@ -37,6 +37,7 @@
#define WordDistanceStrategyNumber 8
#define StrictWordSimilarityStrategyNumber 9
#define StrictWordDistanceStrategyNumber 10
+#define EqualStrategyNumber 11
typedef char trgm[3];
diff --git a/contrib/pg_trgm/trgm_gin.c b/contrib/pg_trgm/trgm_gin.c
index 4dbf0ffb68a..32fafef203f 100644
--- a/contrib/pg_trgm/trgm_gin.c
+++ b/contrib/pg_trgm/trgm_gin.c
@@ -89,6 +89,7 @@ gin_extract_query_trgm(PG_FUNCTION_ARGS)
case SimilarityStrategyNumber:
case WordSimilarityStrategyNumber:
case StrictWordSimilarityStrategyNumber:
+ case EqualStrategyNumber:
trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val));
break;
case ILikeStrategyNumber:
@@ -221,6 +222,7 @@ gin_trgm_consistent(PG_FUNCTION_ARGS)
#endif
/* FALL THRU */
case LikeStrategyNumber:
+ case EqualStrategyNumber:
/* Check if all extracted trigrams are presented. */
res = true;
for (i = 0; i < nkeys; i++)
@@ -306,6 +308,7 @@ gin_trgm_triconsistent(PG_FUNCTION_ARGS)
#endif
/* FALL THRU */
case LikeStrategyNumber:
+ case EqualStrategyNumber:
/* Check if all extracted trigrams are presented. */
res = GIN_MAYBE;
for (i = 0; i < nkeys; i++)
diff --git a/contrib/pg_trgm/trgm_gist.c b/contrib/pg_trgm/trgm_gist.c
index 9c0ed6ed73a..6f28db7d1ed 100644
--- a/contrib/pg_trgm/trgm_gist.c
+++ b/contrib/pg_trgm/trgm_gist.c
@@ -232,6 +232,7 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
case SimilarityStrategyNumber:
case WordSimilarityStrategyNumber:
case StrictWordSimilarityStrategyNumber:
+ case EqualStrategyNumber:
qtrg = generate_trgm(VARDATA(query),
querysize - VARHDRSZ);
break;
@@ -338,7 +339,8 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
#endif
/* FALL THRU */
case LikeStrategyNumber:
- /* Wildcard search is inexact */
+ case EqualStrategyNumber:
+ /* Wildcard and equal search are inexact */
*recheck = true;
/*
diff --git a/doc/src/sgml/pgtrgm.sgml b/doc/src/sgml/pgtrgm.sgml
index 5365b0681e5..8b3193e12c4 100644
--- a/doc/src/sgml/pgtrgm.sgml
+++ b/doc/src/sgml/pgtrgm.sgml
@@ -419,9 +419,11 @@
the purpose of very fast similarity searches. These index types support
the above-described similarity operators, and additionally support
trigram-based index searches for <literal>LIKE</literal>, <literal>ILIKE</literal>,
- <literal>~</literal> and <literal>~*</literal> queries. (These indexes do not
- support equality nor simple comparison operators, so you may need a
- regular B-tree index too.)
+ <literal>~</literal> and <literal>~*</literal> queries. Beginning in
+ <productname>PostgreSQL</productname> 14, these indexes also support
+ equality operator (simple comparison operators are not supported).
+ Note that those indexes may not be as efficient as regular B-tree indexes
+ for equality operator.
</para>
<para>