Skip to content

Commit bde7493

Browse files
committed
Fix overflow check and comment in GIN posting list encoding.
The comment did not match what the code actually did for integers with the 43rd bit set. You get an integer like that, if you have a posting list with two adjacent TIDs that are more than 2^31 blocks apart. According to the comment, we would store that in 6 bytes, with no continuation bit on the 6th byte, but in reality, the code encodes it using 7 bytes, with a continuation bit on the 6th byte as normal. The decoding routine also handled these 7-byte integers correctly, except for an overflow check that assumed that one integer needs at most 6 bytes. Fix the overflow check, and fix the comment to match what the code actually does. Also fix the comment that claimed that there are 17 unused bits in the 64-bit representation of an item pointer. In reality, there are 64-32-11=21. Fitting any item pointer into max 6 bytes was an important property when this was written, because in the old pre-9.4 format, item pointers were stored as plain arrays, with 6 bytes for every item pointer. The maximum of 6 bytes per integer in the new format guaranteed that we could convert any page from the old format to the new format after upgrade, so that the new format was never larger than the old format. But we hardly need to worry about that anymore, and running into that problem during upgrade, where an item pointer is expanded from 6 to 7 bytes such that the data doesn't fit on a page anymore, is implausible in practice anyway. Backpatch to all supported versions. This also includes a little test module to test these large distances between item pointers, without requiring a 16 TB table. It is not backpatched, I'm including it more for the benefit of future development of new posting list formats. Discussion: https://www.postgresql.org/message-id/33bfc20a-5c86-f50c-f5a5-58e9925d05ff%40iki.fi Reviewed-by: Masahiko Sawada, Alexander Korotkov
1 parent 720b59b commit bde7493

File tree

9 files changed

+184
-9
lines changed

9 files changed

+184
-9
lines changed

src/backend/access/gin/ginpostinglist.c

+26-9
Original file line numberDiff line numberDiff line change
@@ -23,25 +23,32 @@
2323
/*
2424
* For encoding purposes, item pointers are represented as 64-bit unsigned
2525
* integers. The lowest 11 bits represent the offset number, and the next
26-
* lowest 32 bits are the block number. That leaves 17 bits unused, i.e.
26+
* lowest 32 bits are the block number. That leaves 21 bits unused, i.e.
2727
* only 43 low bits are used.
2828
*
29+
* 11 bits is enough for the offset number, because MaxHeapTuplesPerPage <
30+
* 2^11 on all supported block sizes. We are frugal with the bits, because
31+
* smaller integers use fewer bytes in the varbyte encoding, saving disk
32+
* space. (If we get a new table AM in the future that wants to use the full
33+
* range of possible offset numbers, we'll need to change this.)
34+
*
2935
* These 43-bit integers are encoded using varbyte encoding. In each byte,
3036
* the 7 low bits contain data, while the highest bit is a continuation bit.
3137
* When the continuation bit is set, the next byte is part of the same
32-
* integer, otherwise this is the last byte of this integer. 43 bits fit
33-
* conveniently in at most 6 bytes when varbyte encoded (the 6th byte does
34-
* not need a continuation bit, because we know the max size to be 43 bits):
38+
* integer, otherwise this is the last byte of this integer. 43 bits need
39+
* at most 7 bytes in this encoding:
3540
*
3641
* 0XXXXXXX
3742
* 1XXXXXXX 0XXXXYYY
3843
* 1XXXXXXX 1XXXXYYY 0YYYYYYY
3944
* 1XXXXXXX 1XXXXYYY 1YYYYYYY 0YYYYYYY
4045
* 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 0YYYYYYY
41-
* 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY YYYYYYYY
46+
* 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY 0YYYYYYY
47+
* 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY 0uuuuuuY
4248
*
4349
* X = bits used for offset number
4450
* Y = bits used for block number
51+
* u = unused bit
4552
*
4653
* The bytes are in stored in little-endian order.
4754
*
@@ -73,6 +80,9 @@
7380
*/
7481
#define MaxHeapTuplesPerPageBits 11
7582

83+
/* Max. number of bytes needed to encode the largest supported integer. */
84+
#define MaxBytesPerInteger 7
85+
7686
static inline uint64
7787
itemptr_to_uint64(const ItemPointer iptr)
7888
{
@@ -126,33 +136,40 @@ decode_varbyte(unsigned char **ptr)
126136
unsigned char *p = *ptr;
127137
uint64 c;
128138

139+
/* 1st byte */
129140
c = *(p++);
130141
val = c & 0x7F;
131142
if (c & 0x80)
132143
{
144+
/* 2nd byte */
133145
c = *(p++);
134146
val |= (c & 0x7F) << 7;
135147
if (c & 0x80)
136148
{
149+
/* 3rd byte */
137150
c = *(p++);
138151
val |= (c & 0x7F) << 14;
139152
if (c & 0x80)
140153
{
154+
/* 4th byte */
141155
c = *(p++);
142156
val |= (c & 0x7F) << 21;
143157
if (c & 0x80)
144158
{
159+
/* 5th byte */
145160
c = *(p++);
146161
val |= (c & 0x7F) << 28;
147162
if (c & 0x80)
148163
{
164+
/* 6th byte */
149165
c = *(p++);
150166
val |= (c & 0x7F) << 35;
151167
if (c & 0x80)
152168
{
153-
/* last byte, no continuation bit */
169+
/* 7th byte, should not have continuation bit */
154170
c = *(p++);
155171
val |= c << 42;
172+
Assert((c & 0x80) == 0);
156173
}
157174
}
158175
}
@@ -208,15 +225,15 @@ ginCompressPostingList(const ItemPointer ipd, int nipd, int maxsize,
208225

209226
Assert(val > prev);
210227

211-
if (endptr - ptr >= 6)
228+
if (endptr - ptr >= MaxBytesPerInteger)
212229
encode_varbyte(delta, &ptr);
213230
else
214231
{
215232
/*
216-
* There are less than 6 bytes left. Have to check if the next
233+
* There are less than 7 bytes left. Have to check if the next
217234
* item fits in that space before writing it out.
218235
*/
219-
unsigned char buf[6];
236+
unsigned char buf[MaxBytesPerInteger];
220237
unsigned char *p = buf;
221238

222239
encode_varbyte(delta, &p);

src/test/modules/Makefile

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ SUBDIRS = \
1212
test_bloomfilter \
1313
test_ddl_deparse \
1414
test_extensions \
15+
test_ginpostinglist \
1516
test_integerset \
1617
test_parser \
1718
test_pg_dump \
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# src/test/modules/test_ginpostinglist/Makefile
2+
3+
MODULE_big = test_ginpostinglist
4+
OBJS = test_ginpostinglist.o $(WIN32RES)
5+
PGFILEDESC = "test_ginpostinglist - test code for src/backend/access/gin//ginpostinglist.c"
6+
7+
EXTENSION = test_ginpostinglist
8+
DATA = test_ginpostinglist--1.0.sql
9+
10+
REGRESS = test_ginpostinglist
11+
12+
ifdef USE_PGXS
13+
PG_CONFIG = pg_config
14+
PGXS := $(shell $(PG_CONFIG) --pgxs)
15+
include $(PGXS)
16+
else
17+
subdir = src/test/modules/test_ginpostinglist
18+
top_builddir = ../../../..
19+
include $(top_builddir)/src/Makefile.global
20+
include $(top_srcdir)/contrib/contrib-global.mk
21+
endif
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
test_ginpostinglist contains unit tests for the GIN posting list code in
2+
src/backend/access/gin/ginpostinglist.c.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
CREATE EXTENSION test_ginpostinglist;
2+
--
3+
-- All the logic is in the test_ginpostinglist() function. It will throw
4+
-- a error if something fails.
5+
--
6+
SELECT test_ginpostinglist();
7+
NOTICE: testing with (0, 1), (0, 2), max 14 bytes
8+
NOTICE: encoded 2 item pointers to 10 bytes
9+
NOTICE: testing with (0, 1), (0, 291), max 14 bytes
10+
NOTICE: encoded 2 item pointers to 10 bytes
11+
NOTICE: testing with (0, 1), (4294967294, 291), max 14 bytes
12+
NOTICE: encoded 1 item pointers to 8 bytes
13+
NOTICE: testing with (0, 1), (4294967294, 291), max 16 bytes
14+
NOTICE: encoded 2 item pointers to 16 bytes
15+
test_ginpostinglist
16+
---------------------
17+
18+
(1 row)
19+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
CREATE EXTENSION test_ginpostinglist;
2+
3+
--
4+
-- All the logic is in the test_ginpostinglist() function. It will throw
5+
-- a error if something fails.
6+
--
7+
SELECT test_ginpostinglist();
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
/* src/test/modules/test_ginpostinglist/test_ginpostinglist--1.0.sql */
2+
3+
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
4+
\echo Use "CREATE EXTENSION test_ginpostinglist" to load this file. \quit
5+
6+
CREATE FUNCTION test_ginpostinglist()
7+
RETURNS pg_catalog.void STRICT
8+
AS 'MODULE_PATHNAME' LANGUAGE C;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
/*--------------------------------------------------------------------------
2+
*
3+
* test_ginpostinglist.c
4+
* Test varbyte-encoding in ginpostinglist.c
5+
*
6+
* Copyright (c) 2019, PostgreSQL Global Development Group
7+
*
8+
* IDENTIFICATION
9+
* src/test/modules/test_ginpostinglist/test_ginpostinglist.c
10+
*
11+
* -------------------------------------------------------------------------
12+
*/
13+
#include "postgres.h"
14+
15+
#include "fmgr.h"
16+
#include "access/ginblock.h"
17+
#include "access/gin_private.h"
18+
#include "access/htup_details.h"
19+
20+
PG_MODULE_MAGIC;
21+
22+
PG_FUNCTION_INFO_V1(test_ginpostinglist);
23+
24+
/*
25+
* Encodes a pair of TIDs, and decodes it back. The first TID is always
26+
* (0, 1), the second one is formed from the blk/off arguments. The 'maxsize'
27+
* argument is passed to ginCompressPostingList(); it can be used to test the
28+
* overflow checks.
29+
*
30+
* The reason that we test a pair, instead of just a single TID, is that
31+
* the GinPostingList stores the first TID as is, and the varbyte-encoding
32+
* is only used for the deltas between TIDs. So testing a single TID would
33+
* not exercise the varbyte encoding at all.
34+
*
35+
* This function prints NOTICEs to describe what is tested, and how large the
36+
* resulting GinPostingList is. Any incorrect results, e.g. if the encode +
37+
* decode round trip doesn't return the original input, are reported as
38+
* ERRORs.
39+
*/
40+
static void
41+
test_itemptr_pair(BlockNumber blk, OffsetNumber off, int maxsize)
42+
{
43+
ItemPointerData orig_itemptrs[2];
44+
ItemPointer decoded_itemptrs;
45+
GinPostingList *pl;
46+
int nwritten;
47+
int ndecoded;
48+
49+
elog(NOTICE, "testing with (%u, %d), (%u, %d), max %d bytes",
50+
0, 1, blk, off, maxsize);
51+
ItemPointerSet(&orig_itemptrs[0], 0, 1);
52+
ItemPointerSet(&orig_itemptrs[1], blk, off);
53+
54+
/* Encode, and decode it back */
55+
pl = ginCompressPostingList(orig_itemptrs, 2, maxsize, &nwritten);
56+
elog(NOTICE, "encoded %d item pointers to %zu bytes",
57+
nwritten, SizeOfGinPostingList(pl));
58+
59+
if (SizeOfGinPostingList(pl) > maxsize)
60+
elog(ERROR, "overflow: result was %zu bytes, max %d",
61+
SizeOfGinPostingList(pl), maxsize);
62+
63+
decoded_itemptrs = ginPostingListDecode(pl, &ndecoded);
64+
if (nwritten != ndecoded)
65+
elog(NOTICE, "encoded %d itemptrs, %d came back", nwritten, ndecoded);
66+
67+
/* Check the result */
68+
if (!ItemPointerEquals(&orig_itemptrs[0], &decoded_itemptrs[0]))
69+
elog(ERROR, "mismatch on first itemptr: (%u, %d) vs (%u, %d)",
70+
0, 1,
71+
ItemPointerGetBlockNumber(&decoded_itemptrs[0]),
72+
ItemPointerGetOffsetNumber(&decoded_itemptrs[0]));
73+
74+
if (ndecoded == 2 &&
75+
!ItemPointerEquals(&orig_itemptrs[0], &decoded_itemptrs[0]))
76+
{
77+
elog(ERROR, "mismatch on second itemptr: (%u, %d) vs (%u, %d)",
78+
0, 1,
79+
ItemPointerGetBlockNumber(&decoded_itemptrs[0]),
80+
ItemPointerGetOffsetNumber(&decoded_itemptrs[0]));
81+
}
82+
}
83+
84+
/*
85+
* SQL-callable entry point to perform all tests.
86+
*/
87+
Datum
88+
test_ginpostinglist(PG_FUNCTION_ARGS)
89+
{
90+
test_itemptr_pair(0, 2, 14);
91+
test_itemptr_pair(0, MaxHeapTuplesPerPage, 14);
92+
test_itemptr_pair(MaxBlockNumber, MaxHeapTuplesPerPage, 14);
93+
test_itemptr_pair(MaxBlockNumber, MaxHeapTuplesPerPage, 16);
94+
95+
PG_RETURN_VOID();
96+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
comment = 'Test code for ginpostinglist.c'
2+
default_version = '1.0'
3+
module_pathname = '$libdir/test_ginpostinglist'
4+
relocatable = true

0 commit comments

Comments
 (0)