Skip to content

Commit 58fee28

Browse files
author
Alexander Korotkov
committed
Fix shameful bug with maximum item size in entry tree
RUM posting trees received rightbound keys. Accidentally RumMaxItemSize was corrected according to this. However RumMaxItemSize is related only to entry tree which wasn't changed. That cause long standing bug that long lexemes can't fit entry tree. Bug is fixed in this commit. Also regression test were added for longest lexemes with longest positions lists.
1 parent 5da261a commit 58fee28

File tree

4 files changed

+208
-4
lines changed

4 files changed

+208
-4
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ DATA = rum--1.0.sql
1212
PGFILEDESC = "RUM index access method"
1313

1414
REGRESS = rum rum_hash ruminv timestamp orderby orderby_hash altorder \
15-
altorder_hash
15+
altorder_hash limits
1616

1717
LDFLAGS_SL += $(filter -lm, $(LIBS))
1818

expected/limits.out

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
-- Check we can put and query lexemes of maximum size 2046 bytes
2+
-- with maximum posting list size.
3+
CREATE TABLE limits_test (v tsvector);
4+
INSERT INTO limits_test (SELECT (SELECT (repeat(chr(65 + num % 26), 2046) || ':' || string_agg(i::text, ','))::tsvector FROM generate_series(1,1024) i) FROM generate_series(1,1000) num);
5+
CREATE INDEX limits_test_idx ON limits_test USING rum (v);
6+
SET enable_seqscan = off;
7+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('A', 2046)::tsquery;
8+
count
9+
-------
10+
38
11+
(1 row)
12+
13+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('B', 2046)::tsquery;
14+
count
15+
-------
16+
39
17+
(1 row)
18+
19+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('C', 2046)::tsquery;
20+
count
21+
-------
22+
39
23+
(1 row)
24+
25+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('D', 2046)::tsquery;
26+
count
27+
-------
28+
39
29+
(1 row)
30+
31+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('E', 2046)::tsquery;
32+
count
33+
-------
34+
39
35+
(1 row)
36+
37+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('F', 2046)::tsquery;
38+
count
39+
-------
40+
39
41+
(1 row)
42+
43+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('G', 2046)::tsquery;
44+
count
45+
-------
46+
39
47+
(1 row)
48+
49+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('H', 2046)::tsquery;
50+
count
51+
-------
52+
39
53+
(1 row)
54+
55+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('I', 2046)::tsquery;
56+
count
57+
-------
58+
39
59+
(1 row)
60+
61+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('J', 2046)::tsquery;
62+
count
63+
-------
64+
39
65+
(1 row)
66+
67+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('K', 2046)::tsquery;
68+
count
69+
-------
70+
39
71+
(1 row)
72+
73+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('L', 2046)::tsquery;
74+
count
75+
-------
76+
39
77+
(1 row)
78+
79+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('M', 2046)::tsquery;
80+
count
81+
-------
82+
39
83+
(1 row)
84+
85+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('N', 2046)::tsquery;
86+
count
87+
-------
88+
38
89+
(1 row)
90+
91+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('O', 2046)::tsquery;
92+
count
93+
-------
94+
38
95+
(1 row)
96+
97+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('P', 2046)::tsquery;
98+
count
99+
-------
100+
38
101+
(1 row)
102+
103+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('Q', 2046)::tsquery;
104+
count
105+
-------
106+
38
107+
(1 row)
108+
109+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('R', 2046)::tsquery;
110+
count
111+
-------
112+
38
113+
(1 row)
114+
115+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('S', 2046)::tsquery;
116+
count
117+
-------
118+
38
119+
(1 row)
120+
121+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('T', 2046)::tsquery;
122+
count
123+
-------
124+
38
125+
(1 row)
126+
127+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('U', 2046)::tsquery;
128+
count
129+
-------
130+
38
131+
(1 row)
132+
133+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('V', 2046)::tsquery;
134+
count
135+
-------
136+
38
137+
(1 row)
138+
139+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('W', 2046)::tsquery;
140+
count
141+
-------
142+
38
143+
(1 row)
144+
145+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('X', 2046)::tsquery;
146+
count
147+
-------
148+
38
149+
(1 row)
150+
151+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('Y', 2046)::tsquery;
152+
count
153+
-------
154+
38
155+
(1 row)
156+
157+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('Z', 2046)::tsquery;
158+
count
159+
-------
160+
38
161+
(1 row)
162+

sql/limits.sql

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
-- Check we can put and query lexemes of maximum size 2046 bytes
2+
-- with maximum posting list size.
3+
CREATE TABLE limits_test (v tsvector);
4+
INSERT INTO limits_test (SELECT (SELECT (repeat(chr(65 + num % 26), 2046) || ':' || string_agg(i::text, ','))::tsvector FROM generate_series(1,1024) i) FROM generate_series(1,1000) num);
5+
CREATE INDEX limits_test_idx ON limits_test USING rum (v);
6+
7+
SET enable_seqscan = off;
8+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('A', 2046)::tsquery;
9+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('B', 2046)::tsquery;
10+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('C', 2046)::tsquery;
11+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('D', 2046)::tsquery;
12+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('E', 2046)::tsquery;
13+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('F', 2046)::tsquery;
14+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('G', 2046)::tsquery;
15+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('H', 2046)::tsquery;
16+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('I', 2046)::tsquery;
17+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('J', 2046)::tsquery;
18+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('K', 2046)::tsquery;
19+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('L', 2046)::tsquery;
20+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('M', 2046)::tsquery;
21+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('N', 2046)::tsquery;
22+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('O', 2046)::tsquery;
23+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('P', 2046)::tsquery;
24+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('Q', 2046)::tsquery;
25+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('R', 2046)::tsquery;
26+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('S', 2046)::tsquery;
27+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('T', 2046)::tsquery;
28+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('U', 2046)::tsquery;
29+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('V', 2046)::tsquery;
30+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('W', 2046)::tsquery;
31+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('X', 2046)::tsquery;
32+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('Y', 2046)::tsquery;
33+
SELECT COUNT(*) FROM limits_test WHERE v @@ repeat('Z', 2046)::tsquery;

src/rum.h

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -233,10 +233,19 @@ typedef signed char RumNullCategory;
233233
#define RumSetPostingOffset(itup,n) ItemPointerSetBlockNumber(&(itup)->t_tid,n)
234234
#define RumGetPosting(itup) ((Pointer) ((char*)(itup) + RumGetPostingOffset(itup)))
235235

236+
/*
237+
* Maximum size of an item on entry tree page. Make sure that we fit at least
238+
* three items on each page. (On regular B-tree indexes, we must fit at least
239+
* three items: two data items and the "high key". In RUM entry tree, we don't
240+
* currently store the high key explicitly, we just use the rightmost item on
241+
* the page, so it would actually be enough to fit two items.)
242+
*/
236243
#define RumMaxItemSize \
237-
MAXALIGN_DOWN(((BLCKSZ - SizeOfPageHeaderData - \
238-
MAXALIGN(sizeof(RumPageOpaqueData))) / 6 - \
239-
sizeof(RumKey) /* right bound */))
244+
Min(INDEX_SIZE_MASK, \
245+
MAXALIGN_DOWN(((BLCKSZ - \
246+
MAXALIGN(SizeOfPageHeaderData + 3 * sizeof(ItemIdData)) - \
247+
MAXALIGN(sizeof(RumPageOpaqueData))) / 3)))
248+
240249

241250
/*
242251
* Access macros for non-leaf entry tuples

0 commit comments

Comments
 (0)