Skip to content

Commit 3c3b8a4

Browse files
Truncate line pointer array during VACUUM.
Teach VACUUM to truncate the line pointer array of each heap page when a contiguous group of LP_UNUSED line pointers appear at the end of the array -- these unused and unreferenced items are excluded. This process occurs during VACUUM's second pass over the heap, right after LP_DEAD line pointers on the page (those encountered/pruned during the first pass) are marked LP_UNUSED. Truncation avoids line pointer bloat with certain workloads, particularly those involving continual range DELETEs and bulk INSERTs against the same table. Also harden heapam code to check for an out-of-range page offset number in places where we weren't already doing so. Author: Matthias van de Meent <[email protected]> Author: Peter Geoghegan <[email protected]> Reviewed-By: Masahiko Sawada <[email protected]> Reviewed-By: Peter Geoghegan <[email protected]> Discussion: https://postgr.es/m/CAEze2WjgaQc55Y5f5CQd3L=eS5CZcff2Obxp=O6pto8-f0hC4w@mail.gmail.com Discussion: https://postgr.es/m/CAH2-Wzn6a64PJM1Ggzm=uvx2otsopJMhFQj_g1rAj4GWr3ZSzw@mail.gmail.com
1 parent 3db826b commit 3c3b8a4

File tree

5 files changed

+144
-11
lines changed

5 files changed

+144
-11
lines changed

src/backend/access/heap/heapam.c

+17-5
Original file line numberDiff line numberDiff line change
@@ -635,8 +635,15 @@ heapgettup(HeapScanDesc scan,
635635
}
636636
else
637637
{
638+
/*
639+
* The previous returned tuple may have been vacuumed since the
640+
* previous scan when we use a non-MVCC snapshot, so we must
641+
* re-establish the lineoff <= PageGetMaxOffsetNumber(dp)
642+
* invariant
643+
*/
638644
lineoff = /* previous offnum */
639-
OffsetNumberPrev(ItemPointerGetOffsetNumber(&(tuple->t_self)));
645+
Min(lines,
646+
OffsetNumberPrev(ItemPointerGetOffsetNumber(&(tuple->t_self))));
640647
}
641648
/* page and lineoff now reference the physically previous tid */
642649

@@ -678,6 +685,13 @@ heapgettup(HeapScanDesc scan,
678685
lpp = PageGetItemId(dp, lineoff);
679686
for (;;)
680687
{
688+
/*
689+
* Only continue scanning the page while we have lines left.
690+
*
691+
* Note that this protects us from accessing line pointers past
692+
* PageGetMaxOffsetNumber(); both for forward scans when we resume the
693+
* table scan, and for when we start scanning a new page.
694+
*/
681695
while (linesleft > 0)
682696
{
683697
if (ItemIdIsNormal(lpp))
@@ -8556,10 +8570,8 @@ heap_xlog_vacuum(XLogReaderState *record)
85568570
ItemIdSetUnused(lp);
85578571
}
85588572

8559-
/*
8560-
* Update the page's hint bit about whether it has free pointers
8561-
*/
8562-
PageSetHasFreeLinePointers(page);
8573+
/* Attempt to truncate line pointer array now */
8574+
PageTruncateLinePointerArray(page);
85638575

85648576
PageSetLSN(page, lsn);
85658577
MarkBufferDirty(buffer);

src/backend/access/heap/pruneheap.c

+4
Original file line numberDiff line numberDiff line change
@@ -962,6 +962,10 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
962962
*/
963963
for (;;)
964964
{
965+
/* Sanity check */
966+
if (nextoffnum < FirstOffsetNumber || nextoffnum > maxoff)
967+
break;
968+
965969
lp = PageGetItemId(page, nextoffnum);
966970

967971
/* Check for broken chains */

src/backend/access/heap/vacuumlazy.c

+14-2
Original file line numberDiff line numberDiff line change
@@ -1444,7 +1444,11 @@ lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive)
14441444
if (prunestate.has_lpdead_items && vacrel->do_index_vacuuming)
14451445
{
14461446
/*
1447-
* Wait until lazy_vacuum_heap_rel() to save free space.
1447+
* Wait until lazy_vacuum_heap_rel() to save free space. This
1448+
* doesn't just save us some cycles; it also allows us to record
1449+
* any additional free space that lazy_vacuum_heap_page() will
1450+
* make available in cases where it's possible to truncate the
1451+
* page's line pointer array.
14481452
*
14491453
* Note: The one-pass (no indexes) case is only supposed to make
14501454
* it this far when there were no LP_DEAD items during pruning.
@@ -2033,6 +2037,13 @@ lazy_vacuum_all_indexes(LVRelState *vacrel)
20332037
* Pages that never had lazy_scan_prune record LP_DEAD items are not visited
20342038
* at all.
20352039
*
2040+
* We may also be able to truncate the line pointer array of the heap pages we
2041+
* visit. If there is a contiguous group of LP_UNUSED items at the end of the
2042+
* array, it can be reclaimed as free space. These LP_UNUSED items usually
2043+
* start out as LP_DEAD items recorded by lazy_scan_prune (we set items from
2044+
* each page to LP_UNUSED, and then consider if it's possible to truncate the
2045+
* page's line pointer array).
2046+
*
20362047
* Note: the reason for doing this as a second pass is we cannot remove the
20372048
* tuples until we've removed their index entries, and we want to process
20382049
* index entry removal in batches as large as possible.
@@ -2175,7 +2186,8 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
21752186

21762187
Assert(uncnt > 0);
21772188

2178-
PageSetHasFreeLinePointers(page);
2189+
/* Attempt to truncate line pointer array now */
2190+
PageTruncateLinePointerArray(page);
21792191

21802192
/*
21812193
* Mark buffer dirty before we write WAL.

src/backend/storage/page/bufpage.c

+108-4
Original file line numberDiff line numberDiff line change
@@ -250,8 +250,17 @@ PageAddItemExtended(Page page,
250250
/* if no free slot, we'll put it at limit (1st open slot) */
251251
if (PageHasFreeLinePointers(phdr))
252252
{
253-
/* Look for "recyclable" (unused) ItemId */
254-
for (offsetNumber = 1; offsetNumber < limit; offsetNumber++)
253+
/*
254+
* Scan line pointer array to locate a "recyclable" (unused)
255+
* ItemId.
256+
*
257+
* Always use earlier items first. PageTruncateLinePointerArray
258+
* can only truncate unused items when they appear as a contiguous
259+
* group at the end of the line pointer array.
260+
*/
261+
for (offsetNumber = FirstOffsetNumber;
262+
offsetNumber < limit; /* limit is maxoff+1 */
263+
offsetNumber++)
255264
{
256265
itemId = PageGetItemId(phdr, offsetNumber);
257266

@@ -675,11 +684,23 @@ compactify_tuples(itemIdCompact itemidbase, int nitems, Page page, bool presorte
675684
/*
676685
* PageRepairFragmentation
677686
*
678-
* Frees fragmented space on a page.
679-
* It doesn't remove unused line pointers! Please don't change this.
687+
* Frees fragmented space on a heap page following pruning.
680688
*
681689
* This routine is usable for heap pages only, but see PageIndexMultiDelete.
682690
*
691+
* Never removes unused line pointers. PageTruncateLinePointerArray can
692+
* safely remove some unused line pointers. It ought to be safe for this
693+
* routine to free unused line pointers in roughly the same way, but it's not
694+
* clear that that would be beneficial.
695+
*
696+
* PageTruncateLinePointerArray is only called during VACUUM's second pass
697+
* over the heap. Any unused line pointers that it sees are likely to have
698+
* been set to LP_UNUSED (from LP_DEAD) immediately before the time it is
699+
* called. On the other hand, many tables have the vast majority of all
700+
* required pruning performed opportunistically (not during VACUUM). And so
701+
* there is, in general, a good chance that even large groups of unused line
702+
* pointers that we see here will be recycled quickly.
703+
*
683704
* Caller had better have a super-exclusive lock on page's buffer. As a side
684705
* effect the page's PD_HAS_FREE_LINES hint bit will be set or unset as
685706
* needed.
@@ -784,6 +805,89 @@ PageRepairFragmentation(Page page)
784805
PageClearHasFreeLinePointers(page);
785806
}
786807

808+
/*
809+
* PageTruncateLinePointerArray
810+
*
811+
* Removes unused line pointers at the end of the line pointer array.
812+
*
813+
* This routine is usable for heap pages only. It is called by VACUUM during
814+
* its second pass over the heap. We expect at least one LP_UNUSED line
815+
* pointer on the page (if VACUUM didn't have an LP_DEAD item on the page that
816+
* it just set to LP_UNUSED then it should not call here).
817+
*
818+
* We avoid truncating the line pointer array to 0 items, if necessary by
819+
* leaving behind a single remaining LP_UNUSED item. This is a little
820+
* arbitrary, but it seems like a good idea to avoid leaving a PageIsEmpty()
821+
* page behind.
822+
*
823+
* Caller can have either an exclusive lock or a super-exclusive lock on
824+
* page's buffer. The page's PD_HAS_FREE_LINES hint bit will be set or unset
825+
* based on whether or not we leave behind any remaining LP_UNUSED items.
826+
*/
827+
void
828+
PageTruncateLinePointerArray(Page page)
829+
{
830+
PageHeader phdr = (PageHeader) page;
831+
bool countdone = false,
832+
sethint = false;
833+
int nunusedend = 0;
834+
835+
/* Scan line pointer array back-to-front */
836+
for (int i = PageGetMaxOffsetNumber(page); i >= FirstOffsetNumber; i--)
837+
{
838+
ItemId lp = PageGetItemId(page, i);
839+
840+
if (!countdone && i > FirstOffsetNumber)
841+
{
842+
/*
843+
* Still determining which line pointers from the end of the array
844+
* will be truncated away. Either count another line pointer as
845+
* safe to truncate, or notice that it's not safe to truncate
846+
* additional line pointers (stop counting line pointers).
847+
*/
848+
if (!ItemIdIsUsed(lp))
849+
nunusedend++;
850+
else
851+
countdone = true;
852+
}
853+
else
854+
{
855+
/*
856+
* Once we've stopped counting we still need to figure out if
857+
* there are any remaining LP_UNUSED line pointers somewhere more
858+
* towards the front of the array.
859+
*/
860+
if (!ItemIdIsUsed(lp))
861+
{
862+
/*
863+
* This is an unused line pointer that we won't be truncating
864+
* away -- so there is at least one. Set hint on page.
865+
*/
866+
sethint = true;
867+
break;
868+
}
869+
}
870+
}
871+
872+
if (nunusedend > 0)
873+
{
874+
phdr->pd_lower -= sizeof(ItemIdData) * nunusedend;
875+
876+
#ifdef CLOBBER_FREED_MEMORY
877+
memset((char *) page + phdr->pd_lower, 0x7F,
878+
sizeof(ItemIdData) * nunusedend);
879+
#endif
880+
}
881+
else
882+
Assert(sethint);
883+
884+
/* Set hint bit for PageAddItemExtended */
885+
if (sethint)
886+
PageSetHasFreeLinePointers(page);
887+
else
888+
PageClearHasFreeLinePointers(page);
889+
}
890+
787891
/*
788892
* PageGetFreeSpace
789893
* Returns the size of the free (allocatable) space on a page,

src/include/storage/bufpage.h

+1
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,7 @@ extern Page PageGetTempPageCopy(Page page);
441441
extern Page PageGetTempPageCopySpecial(Page page);
442442
extern void PageRestoreTempPage(Page tempPage, Page oldPage);
443443
extern void PageRepairFragmentation(Page page);
444+
extern void PageTruncateLinePointerArray(Page page);
444445
extern Size PageGetFreeSpace(Page page);
445446
extern Size PageGetFreeSpaceForMultipleTuples(Page page, int ntups);
446447
extern Size PageGetExactFreeSpace(Page page);

0 commit comments

Comments
 (0)