Skip to content

Commit 5fcb079

Browse files
committed
Add sample text search dictionary templates and parsers, to replace the
hard-to-maintain textual examples currently in the SGML docs. From Sergey Karpov.
1 parent fb631db commit 5fcb079

24 files changed

+1324
-9
lines changed

contrib/Makefile

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# $PostgreSQL: pgsql/contrib/Makefile,v 1.80 2007/10/13 22:59:43 tgl Exp $
1+
# $PostgreSQL: pgsql/contrib/Makefile,v 1.81 2007/10/15 21:36:49 tgl Exp $
22

33
subdir = contrib
44
top_builddir = ..
@@ -10,6 +10,8 @@ WANTED_DIRS = \
1010
chkpass \
1111
cube \
1212
dblink \
13+
dict_int \
14+
dict_xsyn \
1315
earthdistance \
1416
fuzzystrmatch \
1517
hstore \
@@ -31,6 +33,7 @@ WANTED_DIRS = \
3133
seg \
3234
spi \
3335
tablefunc \
36+
test_parser \
3437
vacuumlo
3538

3639
ifeq ($(with_openssl),yes)

contrib/README

+23-8
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
The PostgreSQL contrib tree
32
---------------------------
43

@@ -29,8 +28,8 @@ adminpack -
2928
by Dave Page <[email protected]>
3029

3130
btree_gist -
32-
Support for emulating BTREE indexing in GiST
33-
by Oleg Bartunov <[email protected]> and Teodor Sigaev <[email protected]>
31+
Support for emulating BTREE indexing in GiST
32+
by Oleg Bartunov <[email protected]> and Teodor Sigaev <[email protected]>
3433

3534
chkpass -
3635
An auto-encrypted password datatype
@@ -44,17 +43,25 @@ dblink -
4443
Allows remote query execution
4544
by Joe Conway <[email protected]>
4645

46+
dict_int -
47+
Text search dictionary template for integers
48+
by Sergey Karpov <[email protected]>
49+
50+
dict_xsyn -
51+
Text search dictionary template for extended synonym processing
52+
by Sergey Karpov <[email protected]>
53+
4754
earthdistance -
48-
Operator for computing earth distance for two points
55+
Operator for computing earth distance between two points
4956
by Hal Snyder <[email protected]>
5057

5158
fuzzystrmatch -
5259
Levenshtein, metaphone, and soundex fuzzy string matching
5360
by Joe Conway <[email protected]>, Joel Burton <[email protected]>
5461

5562
hstore -
56-
Hstore - module for storing (key,value) pairs
57-
by Oleg Bartunov <[email protected]> and Teodor Sigaev <[email protected]>
63+
Module for storing (key, value) pairs
64+
by Oleg Bartunov <[email protected]> and Teodor Sigaev <[email protected]>
5865

5966
intagg -
6067
Integer aggregator
@@ -92,6 +99,10 @@ pg_freespacemap -
9299
Displays the contents of the free space map (FSM)
93100
by Mark Kirkwood <[email protected]>
94101

102+
pg_standby -
103+
Sample archive_command for warm standby operation
104+
by Simon Riggs <[email protected]>
105+
95106
pg_trgm -
96107
Functions for determining the similarity of text based on trigram
97108
matching.
@@ -110,7 +121,7 @@ pgrowlocks -
110121
by Tatsuo Ishii <[email protected]>
111122

112123
pgstattuple -
113-
A function to return statistics about "dead" tuples and free
124+
Functions to return statistics about "dead" tuples and free
114125
space within a table
115126
by Tatsuo Ishii <[email protected]>
116127

@@ -126,12 +137,16 @@ sslinfo -
126137
by Victor Wagner <[email protected]>
127138

128139
start-scripts -
129-
Scripts for starting the server at boot time.
140+
Scripts for starting the server at boot time on various platforms.
130141

131142
tablefunc -
132143
Examples of functions returning tables
133144
by Joe Conway <[email protected]>
134145

146+
test_parser -
147+
Sample text search parser
148+
by Sergey Karpov <[email protected]>
149+
135150
tsearch2 -
136151
Full-text-index support using GiST
137152
by Teodor Sigaev <[email protected]> and Oleg Bartunov

contrib/dict_int/Makefile

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# $PostgreSQL: pgsql/contrib/dict_int/Makefile,v 1.1 2007/10/15 21:36:50 tgl Exp $
2+
3+
MODULE_big = dict_int
4+
OBJS = dict_int.o
5+
DATA_built = dict_int.sql
6+
DATA = uninstall_dict_int.sql
7+
DOCS = README.dict_int
8+
REGRESS = dict_int
9+
10+
ifdef USE_PGXS
11+
PG_CONFIG = pg_config
12+
PGXS := $(shell $(PG_CONFIG) --pgxs)
13+
include $(PGXS)
14+
else
15+
subdir = contrib/dict_int
16+
top_builddir = ../..
17+
include $(top_builddir)/src/Makefile.global
18+
include $(top_srcdir)/contrib/contrib-global.mk
19+
endif

contrib/dict_int/README.dict_int

+41
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
Dictionary for integers
2+
=======================
3+
4+
The motivation for this example dictionary is to control the indexing of
5+
integers (signed and unsigned), and, consequently, to minimize the number of
6+
unique words which greatly affect the performance of searching.
7+
8+
* Configuration
9+
10+
The dictionary accepts two options:
11+
12+
- The MAXLEN parameter specifies the maximum length (number of digits)
13+
allowed in an integer word. The default value is 6.
14+
15+
- The REJECTLONG parameter specifies if an overlength integer should be
16+
truncated or ignored. If REJECTLONG=FALSE (default), the dictionary returns
17+
the first MAXLEN digits of the integer. If REJECTLONG=TRUE, the
18+
dictionary treats an overlength integer as a stop word, so that it will
19+
not be indexed.
20+
21+
* Usage
22+
23+
1. Compile and install
24+
25+
2. Load dictionary
26+
27+
psql mydb < dict_int.sql
28+
29+
3. Test it
30+
31+
mydb# select ts_lexize('intdict', '12345678');
32+
ts_lexize
33+
-----------
34+
{123456}
35+
36+
4. Change its options as you wish
37+
38+
mydb# ALTER TEXT SEARCH DICTIONARY intdict (MAXLEN = 4, REJECTLONG = true);
39+
ALTER TEXT SEARCH DICTIONARY
40+
41+
That's all.

contrib/dict_int/dict_int.c

+99
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
/*-------------------------------------------------------------------------
2+
*
3+
* dict_int.c
4+
* Text search dictionary for integers
5+
*
6+
* Copyright (c) 2007, PostgreSQL Global Development Group
7+
*
8+
* IDENTIFICATION
9+
* $PostgreSQL: pgsql/contrib/dict_int/dict_int.c,v 1.1 2007/10/15 21:36:50 tgl Exp $
10+
*
11+
*-------------------------------------------------------------------------
12+
*/
13+
#include "postgres.h"
14+
15+
#include "commands/defrem.h"
16+
#include "fmgr.h"
17+
#include "tsearch/ts_public.h"
18+
19+
PG_MODULE_MAGIC;
20+
21+
22+
typedef struct {
23+
int maxlen;
24+
bool rejectlong;
25+
} DictInt;
26+
27+
28+
PG_FUNCTION_INFO_V1(dintdict_init);
29+
Datum dintdict_init(PG_FUNCTION_ARGS);
30+
31+
PG_FUNCTION_INFO_V1(dintdict_lexize);
32+
Datum dintdict_lexize(PG_FUNCTION_ARGS);
33+
34+
Datum
35+
dintdict_init(PG_FUNCTION_ARGS)
36+
{
37+
List *dictoptions = (List *) PG_GETARG_POINTER(0);
38+
DictInt *d;
39+
ListCell *l;
40+
41+
d = (DictInt *) palloc0(sizeof(DictInt));
42+
d->maxlen = 6;
43+
d->rejectlong = false;
44+
45+
foreach(l, dictoptions)
46+
{
47+
DefElem *defel = (DefElem *) lfirst(l);
48+
49+
if (pg_strcasecmp(defel->defname, "MAXLEN") == 0)
50+
{
51+
d->maxlen = atoi(defGetString(defel));
52+
}
53+
else if (pg_strcasecmp(defel->defname, "REJECTLONG") == 0)
54+
{
55+
d->rejectlong = defGetBoolean(defel);
56+
}
57+
else
58+
{
59+
ereport(ERROR,
60+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
61+
errmsg("unrecognized intdict parameter: \"%s\"",
62+
defel->defname)));
63+
}
64+
}
65+
66+
PG_RETURN_POINTER(d);
67+
}
68+
69+
Datum
70+
dintdict_lexize(PG_FUNCTION_ARGS)
71+
{
72+
DictInt *d = (DictInt*)PG_GETARG_POINTER(0);
73+
char *in = (char*)PG_GETARG_POINTER(1);
74+
char *txt = pnstrdup(in, PG_GETARG_INT32(2));
75+
TSLexeme *res=palloc(sizeof(TSLexeme)*2);
76+
77+
res[1].lexeme = NULL;
78+
if (PG_GETARG_INT32(2) > d->maxlen)
79+
{
80+
if ( d->rejectlong )
81+
{
82+
/* reject by returning void array */
83+
pfree(txt);
84+
res[0].lexeme = NULL;
85+
}
86+
else
87+
{
88+
/* trim integer */
89+
txt[d->maxlen] = '\0';
90+
res[0].lexeme = txt;
91+
}
92+
}
93+
else
94+
{
95+
res[0].lexeme = txt;
96+
}
97+
98+
PG_RETURN_POINTER(res);
99+
}

contrib/dict_int/dict_int.sql.in

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
-- $PostgreSQL: pgsql/contrib/dict_int/dict_int.sql.in,v 1.1 2007/10/15 21:36:50 tgl Exp $
2+
3+
-- Adjust this setting to control where the objects get created.
4+
SET search_path = public;
5+
6+
BEGIN;
7+
8+
CREATE FUNCTION dintdict_init(internal)
9+
RETURNS internal
10+
AS 'MODULE_PATHNAME'
11+
LANGUAGE C STRICT;
12+
13+
CREATE FUNCTION dintdict_lexize(internal, internal, internal, internal)
14+
RETURNS internal
15+
AS 'MODULE_PATHNAME'
16+
LANGUAGE C STRICT;
17+
18+
CREATE TEXT SEARCH TEMPLATE intdict_template (
19+
LEXIZE = dintdict_lexize,
20+
INIT = dintdict_init
21+
);
22+
23+
CREATE TEXT SEARCH DICTIONARY intdict (
24+
TEMPLATE = intdict_template
25+
);
26+
27+
COMMENT ON TEXT SEARCH DICTIONARY intdict IS 'dictionary for integers';
28+
29+
END;

0 commit comments

Comments
 (0)