Skip to content

Commit f1dadd3

Browse files
committed
Set pd_lower on internal GIN posting tree pages.
This allows squeezing out the unused space in full-page writes. And more importantly, it can be a useful debugging aid. In hindsight we should've done this back when GIN was added - we wouldn't need the 'maxoff' field in the page opaque struct if we had used pd_lower and pd_upper like on normal pages. But as long as there can be pages in the index that have been binary-upgraded from pre-9.4 versions, we can't rely on that, and have to continue using 'maxoff'. Most of the code churn comes from renaming some macros, now that they're used on internal pages, too. This change is completely backwards-compatible, no effect on pg_upgrade.
1 parent 69671ab commit f1dadd3

File tree

4 files changed

+78
-41
lines changed

4 files changed

+78
-41
lines changed

src/backend/access/gin/gindatapage.c

+46-23
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,15 @@ GinDataPageAddPostingItem(Page page, PostingItem *data, OffsetNumber offset)
390390
}
391391
memcpy(ptr, data, sizeof(PostingItem));
392392

393-
GinPageGetOpaque(page)->maxoff++;
393+
maxoff++;
394+
GinPageGetOpaque(page)->maxoff = maxoff;
395+
396+
/*
397+
* Also set pd_lower to the end of the posting items, to follow the
398+
* "standard" page layout, so that we can squeeze out the unused space
399+
* from full-page images.
400+
*/
401+
GinDataPageSetDataSize(page, maxoff * sizeof(PostingItem));
394402
}
395403

396404
/*
@@ -409,7 +417,10 @@ GinPageDeletePostingItem(Page page, OffsetNumber offset)
409417
GinDataPageGetPostingItem(page, offset + 1),
410418
sizeof(PostingItem) * (maxoff - offset));
411419

412-
GinPageGetOpaque(page)->maxoff--;
420+
maxoff--;
421+
GinPageGetOpaque(page)->maxoff = maxoff;
422+
423+
GinDataPageSetDataSize(page, maxoff * sizeof(PostingItem));
413424
}
414425

415426
/*
@@ -520,7 +531,7 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
520531
* a single byte, and we can use all the free space on the old page as
521532
* well as the new page. For simplicity, ignore segment overhead etc.
522533
*/
523-
maxitems = Min(maxitems, freespace + GinDataLeafMaxContentSize);
534+
maxitems = Min(maxitems, freespace + GinDataPageMaxDataSize);
524535
}
525536
else
526537
{
@@ -535,7 +546,7 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
535546
int nnewsegments;
536547

537548
nnewsegments = freespace / GinPostingListSegmentMaxSize;
538-
nnewsegments += GinDataLeafMaxContentSize / GinPostingListSegmentMaxSize;
549+
nnewsegments += GinDataPageMaxDataSize / GinPostingListSegmentMaxSize;
539550
maxitems = Min(maxitems, nnewsegments * MinTuplesPerSegment);
540551
}
541552

@@ -648,8 +659,8 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
648659
leaf->lastleft = dlist_prev_node(&leaf->segments, leaf->lastleft);
649660
}
650661
}
651-
Assert(leaf->lsize <= GinDataLeafMaxContentSize);
652-
Assert(leaf->rsize <= GinDataLeafMaxContentSize);
662+
Assert(leaf->lsize <= GinDataPageMaxDataSize);
663+
Assert(leaf->rsize <= GinDataPageMaxDataSize);
653664

654665
/*
655666
* Fetch the max item in the left page's last segment; it becomes the
@@ -716,7 +727,7 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
716727
if (seginfo->seg)
717728
oldsegsize = SizeOfGinPostingList(seginfo->seg);
718729
else
719-
oldsegsize = GinDataLeafMaxContentSize;
730+
oldsegsize = GinDataPageMaxDataSize;
720731

721732
cleaned = ginVacuumItemPointers(gvs,
722733
seginfo->items,
@@ -987,8 +998,8 @@ dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf)
987998
}
988999
}
9891000

990-
Assert(newsize <= GinDataLeafMaxContentSize);
991-
GinDataLeafPageSetPostingListSize(page, newsize);
1001+
Assert(newsize <= GinDataPageMaxDataSize);
1002+
GinDataPageSetDataSize(page, newsize);
9921003
}
9931004

9941005
/*
@@ -1043,7 +1054,7 @@ dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf,
10431054
}
10441055
}
10451056
Assert(lsize == leaf->lsize);
1046-
GinDataLeafPageSetPostingListSize(lpage, lsize);
1057+
GinDataPageSetDataSize(lpage, lsize);
10471058
*GinDataPageGetRightBound(lpage) = lbound;
10481059

10491060
/* Copy the segments that go to the right page */
@@ -1067,7 +1078,7 @@ dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf,
10671078
break;
10681079
}
10691080
Assert(rsize == leaf->rsize);
1070-
GinDataLeafPageSetPostingListSize(rpage, rsize);
1081+
GinDataPageSetDataSize(rpage, rsize);
10711082
*GinDataPageGetRightBound(rpage) = rbound;
10721083

10731084
/* Create WAL record */
@@ -1139,7 +1150,7 @@ dataPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
11391150
data.newitem = *pitem;
11401151

11411152
rdata.buffer = buf;
1142-
rdata.buffer_std = false;
1153+
rdata.buffer_std = TRUE;
11431154
rdata.data = (char *) &data;
11441155
rdata.len = sizeof(ginxlogInsertDataInternal);
11451156
rdata.next = NULL;
@@ -1183,6 +1194,8 @@ dataSplitPageInternal(GinBtree btree, Buffer origbuf,
11831194
Page oldpage = BufferGetPage(origbuf);
11841195
OffsetNumber off = stack->off;
11851196
int nitems = GinPageGetOpaque(oldpage)->maxoff;
1197+
int nleftitems;
1198+
int nrightitems;
11861199
Size pageSize = PageGetPageSize(oldpage);
11871200
ItemPointerData oldbound = *GinDataPageGetRightBound(oldpage);
11881201
ItemPointer bound;
@@ -1226,17 +1239,27 @@ dataSplitPageInternal(GinBtree btree, Buffer origbuf,
12261239
separator = GinNonLeafDataPageGetFreeSpace(rpage) / sizeof(PostingItem);
12271240
else
12281241
separator = nitems / 2;
1242+
nleftitems = separator;
1243+
nrightitems = nitems - separator;
12291244

1230-
memcpy(GinDataPageGetPostingItem(lpage, FirstOffsetNumber), allitems, separator * sizeof(PostingItem));
1231-
GinPageGetOpaque(lpage)->maxoff = separator;
1245+
memcpy(GinDataPageGetPostingItem(lpage, FirstOffsetNumber),
1246+
allitems,
1247+
nleftitems * sizeof(PostingItem));
1248+
GinPageGetOpaque(lpage)->maxoff = nleftitems;
12321249
memcpy(GinDataPageGetPostingItem(rpage, FirstOffsetNumber),
1233-
&allitems[separator], (nitems - separator) * sizeof(PostingItem));
1234-
GinPageGetOpaque(rpage)->maxoff = nitems - separator;
1250+
&allitems[separator],
1251+
nrightitems * sizeof(PostingItem));
1252+
GinPageGetOpaque(rpage)->maxoff = nrightitems;
1253+
1254+
/*
1255+
* Also set pd_lower for both pages, like GinDataPageAddPostingItem does.
1256+
*/
1257+
GinDataPageSetDataSize(lpage, nleftitems * sizeof(PostingItem));
1258+
GinDataPageSetDataSize(rpage, nrightitems * sizeof(PostingItem));
12351259

12361260
/* set up right bound for left page */
12371261
bound = GinDataPageGetRightBound(lpage);
1238-
*bound = GinDataPageGetPostingItem(lpage,
1239-
GinPageGetOpaque(lpage)->maxoff)->key;
1262+
*bound = GinDataPageGetPostingItem(lpage, nleftitems)->key;
12401263

12411264
/* set up right bound for right page */
12421265
*GinDataPageGetRightBound(rpage) = oldbound;
@@ -1619,7 +1642,7 @@ leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining)
16191642
* copying to the page. Did we exceed the size that fits on one page?
16201643
*/
16211644
segsize = SizeOfGinPostingList(seginfo->seg);
1622-
if (pgused + segsize > GinDataLeafMaxContentSize)
1645+
if (pgused + segsize > GinDataPageMaxDataSize)
16231646
{
16241647
if (!needsplit)
16251648
{
@@ -1659,8 +1682,8 @@ leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining)
16591682
else
16601683
leaf->rsize = pgused;
16611684

1662-
Assert(leaf->lsize <= GinDataLeafMaxContentSize);
1663-
Assert(leaf->rsize <= GinDataLeafMaxContentSize);
1685+
Assert(leaf->lsize <= GinDataPageMaxDataSize);
1686+
Assert(leaf->rsize <= GinDataPageMaxDataSize);
16641687

16651688
/*
16661689
* Make a palloc'd copy of every segment after the first modified one,
@@ -1735,7 +1758,7 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
17351758
GinPostingListSegmentMaxSize,
17361759
&npacked);
17371760
segsize = SizeOfGinPostingList(segment);
1738-
if (rootsize + segsize > GinDataLeafMaxContentSize)
1761+
if (rootsize + segsize > GinDataPageMaxDataSize)
17391762
break;
17401763

17411764
memcpy(ptr, segment, segsize);
@@ -1744,7 +1767,7 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
17441767
nrootitems += npacked;
17451768
pfree(segment);
17461769
}
1747-
GinDataLeafPageSetPostingListSize(tmppage, rootsize);
1770+
GinDataPageSetDataSize(tmppage, rootsize);
17481771

17491772
/*
17501773
* All set. Get a new physical page, and copy the in-memory page to it.

src/backend/access/gin/ginvacuum.c

+8-1
Original file line numberDiff line numberDiff line change
@@ -301,14 +301,21 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
301301
data.leftBlkno = leftBlkno;
302302
data.rightLink = GinPageGetOpaque(page)->rightlink;
303303

304+
/*
305+
* We can't pass buffer_std = TRUE, because we didn't set pd_lower
306+
* on pre-9.4 versions. The page might've been binary-upgraded from
307+
* an older version, and hence not have pd_lower set correctly.
308+
* Ditto for the left page, but removing the item from the parent
309+
* updated its pd_lower, so we know that's OK at this point.
310+
*/
304311
rdata[0].buffer = dBuffer;
305312
rdata[0].buffer_std = FALSE;
306313
rdata[0].data = NULL;
307314
rdata[0].len = 0;
308315
rdata[0].next = rdata + 1;
309316

310317
rdata[1].buffer = pBuffer;
311-
rdata[1].buffer_std = FALSE;
318+
rdata[1].buffer_std = TRUE;
312319
rdata[1].data = NULL;
313320
rdata[1].len = 0;
314321
rdata[1].next = rdata + 2;

src/backend/access/gin/ginxlog.c

+7-7
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ ginRedoCreatePTree(XLogRecPtr lsn, XLogRecord *record)
9696
/* Place page data */
9797
memcpy(GinDataLeafPageGetPostingList(page), ptr, data->size);
9898

99-
GinDataLeafPageSetPostingListSize(page, data->size);
99+
GinDataPageSetDataSize(page, data->size);
100100

101101
PageSetLSN(page, lsn);
102102

@@ -169,7 +169,7 @@ ginRedoRecompress(Page page, ginxlogRecompressDataLeaf *data)
169169
totalsize = SizeOfGinPostingList(plist);
170170

171171
memcpy(GinDataLeafPageGetPostingList(page), plist, totalsize);
172-
GinDataLeafPageSetPostingListSize(page, totalsize);
172+
GinDataPageSetDataSize(page, totalsize);
173173
GinPageSetCompressed(page);
174174
GinPageGetOpaque(page)->maxoff = InvalidOffsetNumber;
175175
}
@@ -296,7 +296,7 @@ ginRedoRecompress(Page page, ginxlogRecompressDataLeaf *data)
296296
}
297297

298298
totalsize = segmentend - (Pointer) GinDataLeafPageGetPostingList(page);
299-
GinDataLeafPageSetPostingListSize(page, totalsize);
299+
GinDataPageSetDataSize(page, totalsize);
300300
}
301301

302302
static void
@@ -423,14 +423,14 @@ ginRedoSplitData(Page lpage, Page rpage, void *rdata)
423423
Pointer lptr = (Pointer) rdata + sizeof(ginxlogSplitDataLeaf);
424424
Pointer rptr = lptr + data->lsize;
425425

426-
Assert(data->lsize > 0 && data->lsize <= GinDataLeafMaxContentSize);
427-
Assert(data->rsize > 0 && data->rsize <= GinDataLeafMaxContentSize);
426+
Assert(data->lsize > 0 && data->lsize <= GinDataPageMaxDataSize);
427+
Assert(data->rsize > 0 && data->rsize <= GinDataPageMaxDataSize);
428428

429429
memcpy(GinDataLeafPageGetPostingList(lpage), lptr, data->lsize);
430430
memcpy(GinDataLeafPageGetPostingList(rpage), rptr, data->rsize);
431431

432-
GinDataLeafPageSetPostingListSize(lpage, data->lsize);
433-
GinDataLeafPageSetPostingListSize(rpage, data->rsize);
432+
GinDataPageSetDataSize(lpage, data->lsize);
433+
GinDataPageSetDataSize(rpage, data->rsize);
434434
*GinDataPageGetRightBound(lpage) = data->lrightbound;
435435
*GinDataPageGetRightBound(rpage) = data->rrightbound;
436436
}

src/include/access/gin_private.h

+17-10
Original file line numberDiff line numberDiff line change
@@ -257,11 +257,6 @@ typedef signed char GinNullCategory;
257257
(GinPostingList *) ((PageGetContents(page) + MAXALIGN(sizeof(ItemPointerData))))
258258
#define GinDataLeafPageGetPostingListSize(page) \
259259
(((PageHeader) page)->pd_lower - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(ItemPointerData)))
260-
#define GinDataLeafPageSetPostingListSize(page, size) \
261-
{ \
262-
Assert(size <= GinDataLeafMaxContentSize); \
263-
((PageHeader) page)->pd_lower = (size) + MAXALIGN(SizeOfPageHeaderData) + MAXALIGN(sizeof(ItemPointerData)); \
264-
}
265260

266261
#define GinDataLeafPageIsEmpty(page) \
267262
(GinPageIsCompressed(page) ? (GinDataLeafPageGetPostingListSize(page) == 0) : (GinPageGetOpaque(page)->maxoff < FirstOffsetNumber))
@@ -281,13 +276,25 @@ typedef signed char GinNullCategory;
281276
#define GinDataPageGetPostingItem(page, i) \
282277
((PostingItem *) (GinDataPageGetData(page) + ((i)-1) * sizeof(PostingItem)))
283278

279+
/*
280+
* Note: there is no GinDataPageGetDataSize macro, because before version
281+
* 9.4, we didn't set pd_lower on data pages. There can be pages in the index
282+
* that were binary-upgraded from earlier versions and still have an invalid
283+
* pd_lower, so we cannot trust it in general. Compressed posting tree leaf
284+
* pages are new in 9.4, however, so we can trust them; see
285+
* GinDataLeafPageGetPostingListSize.
286+
*/
287+
#define GinDataPageSetDataSize(page, size) \
288+
{ \
289+
Assert(size <= GinDataPageMaxDataSize); \
290+
((PageHeader) page)->pd_lower = (size) + MAXALIGN(SizeOfPageHeaderData) + MAXALIGN(sizeof(ItemPointerData)); \
291+
}
292+
284293
#define GinNonLeafDataPageGetFreeSpace(page) \
285-
(BLCKSZ - MAXALIGN(SizeOfPageHeaderData) \
286-
- MAXALIGN(sizeof(ItemPointerData)) \
287-
- GinPageGetOpaque(page)->maxoff * sizeof(PostingItem) \
288-
- MAXALIGN(sizeof(GinPageOpaqueData)))
294+
(GinDataPageMaxDataSize - \
295+
GinPageGetOpaque(page)->maxoff * sizeof(PostingItem))
289296

290-
#define GinDataLeafMaxContentSize \
297+
#define GinDataPageMaxDataSize \
291298
(BLCKSZ - MAXALIGN(SizeOfPageHeaderData) \
292299
- MAXALIGN(sizeof(ItemPointerData)) \
293300
- MAXALIGN(sizeof(GinPageOpaqueData)))

0 commit comments

Comments
 (0)