Skip to content

Commit 73a076b

Browse files
Fix undercounting in VACUUM VERBOSE output.
The logic for determining how many nbtree pages in an index are deleted pages sometimes undercounted pages. Pages that were deleted by the current VACUUM operation (as opposed to some previous VACUUM operation whose deleted pages have yet to be reused) were sometimes overlooked. The final count is exposed to users through VACUUM VERBOSE's "%u index pages have been deleted" output. btvacuumpage() avoided double-counting when _bt_pagedel() deleted more than one page by assuming that only one page was deleted, and that the additional deleted pages would get picked up during a future call to btvacuumpage() by the same VACUUM operation. _bt_pagedel() can legitimately delete pages that the btvacuumscan() scan will not visit again, though, so that assumption was slightly faulty. Fix the accounting by teaching _bt_pagedel() about its caller's requirements. It now only reports on pages that it knows btvacuumscan() won't visit again (including the current btvacuumpage() page), so everything works out in the end. This bug has been around forever. Only backpatch to v11, though, to keep _bt_pagedel() is sync on the branches that have today's bugfix commit b0229f2. Note that this commit changes the signature of _bt_pagedel(), just like commit b0229f2. Author: Peter Geoghegan Reviewed-By: Masahiko Sawada Discussion: https://postgr.es/m/CAH2-WzkrXBcMQWAYUJMFTTvzx_r4q=pYSjDe07JnUXhe+OZnJA@mail.gmail.com Backpatch: 11-
1 parent b0229f2 commit 73a076b

File tree

3 files changed

+38
-18
lines changed

3 files changed

+38
-18
lines changed

src/backend/access/nbtree/nbtpage.c

+30-10
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,10 @@ static BTMetaPageData *_bt_getmeta(Relation rel, Buffer metabuf);
3838
static bool _bt_mark_page_halfdead(Relation rel, Buffer leafbuf,
3939
BTStack stack);
4040
static bool _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf,
41+
BlockNumber scanblkno,
4142
bool *rightsib_empty,
42-
TransactionId *oldestBtpoXact);
43+
TransactionId *oldestBtpoXact,
44+
uint32 *ndeleted);
4345
static TransactionId _bt_xid_horizon(Relation rel, Relation heapRel, Page page,
4446
OffsetNumber *deletable, int ndeletable);
4547
static bool _bt_lock_branch_parent(Relation rel, BlockNumber child,
@@ -1489,7 +1491,9 @@ _bt_lock_branch_parent(Relation rel, BlockNumber child, BTStack stack,
14891491
*
14901492
* Returns the number of pages successfully deleted (zero if page cannot
14911493
* be deleted now; could be more than one if parent or right sibling pages
1492-
* were deleted too).
1494+
* were deleted too). Note that this does not include pages that we delete
1495+
* that the btvacuumscan scan has yet to reach; they'll get counted later
1496+
* instead.
14931497
*
14941498
* Maintains *oldestBtpoXact for any pages that get deleted. Caller is
14951499
* responsible for maintaining *oldestBtpoXact in the case of pages that were
@@ -1499,15 +1503,21 @@ _bt_lock_branch_parent(Relation rel, BlockNumber child, BTStack stack,
14991503
* carefully, it's better to run it in a temp context that can be reset
15001504
* frequently.
15011505
*/
1502-
int
1506+
uint32
15031507
_bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact)
15041508
{
1505-
int ndeleted = 0;
1509+
uint32 ndeleted = 0;
15061510
BlockNumber rightsib;
15071511
bool rightsib_empty;
15081512
Page page;
15091513
BTPageOpaque opaque;
15101514

1515+
/*
1516+
* Save original leafbuf block number from caller. Only deleted blocks
1517+
* that are <= scanblkno get counted in ndeleted return value.
1518+
*/
1519+
BlockNumber scanblkno = BufferGetBlockNumber(leafbuf);
1520+
15111521
/*
15121522
* "stack" is a search stack leading (approximately) to the target page.
15131523
* It is initially NULL, but when iterating, we keep it to avoid
@@ -1558,8 +1568,9 @@ _bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact)
15581568
if (P_ISDELETED(opaque))
15591569
ereport(LOG,
15601570
(errcode(ERRCODE_INDEX_CORRUPTED),
1561-
errmsg_internal("found deleted block %u while following right link in index \"%s\"",
1571+
errmsg_internal("found deleted block %u while following right link from block %u in index \"%s\"",
15621572
BufferGetBlockNumber(leafbuf),
1573+
scanblkno,
15631574
RelationGetRelationName(rel))));
15641575

15651576
_bt_relbuf(rel, leafbuf);
@@ -1709,13 +1720,13 @@ _bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact)
17091720
while (P_ISHALFDEAD(opaque))
17101721
{
17111722
/* Check for interrupts in _bt_unlink_halfdead_page */
1712-
if (!_bt_unlink_halfdead_page(rel, leafbuf, &rightsib_empty,
1713-
oldestBtpoXact))
1723+
if (!_bt_unlink_halfdead_page(rel, leafbuf, scanblkno,
1724+
&rightsib_empty, oldestBtpoXact,
1725+
&ndeleted))
17141726
{
17151727
/* _bt_unlink_halfdead_page failed, released buffer */
17161728
return ndeleted;
17171729
}
1718-
ndeleted++;
17191730
}
17201731

17211732
Assert(P_ISLEAF(opaque) && P_ISDELETED(opaque));
@@ -1974,8 +1985,9 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack)
19741985
* to avoid having to reacquire a lock we already released).
19751986
*/
19761987
static bool
1977-
_bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty,
1978-
TransactionId *oldestBtpoXact)
1988+
_bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno,
1989+
bool *rightsib_empty, TransactionId *oldestBtpoXact,
1990+
uint32 *ndeleted)
19791991
{
19801992
BlockNumber leafblkno = BufferGetBlockNumber(leafbuf);
19811993
BlockNumber leafleftsib;
@@ -2370,6 +2382,14 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty,
23702382
TransactionIdPrecedes(opaque->btpo.xact, *oldestBtpoXact))
23712383
*oldestBtpoXact = opaque->btpo.xact;
23722384

2385+
/*
2386+
* If btvacuumscan won't revisit this page in a future btvacuumpage call
2387+
* and count it as deleted then, we count it as deleted by current
2388+
* btvacuumpage call
2389+
*/
2390+
if (target <= scanblkno)
2391+
(*ndeleted)++;
2392+
23732393
/*
23742394
* Release the target, if it was not the leaf block. The leaf is always
23752395
* kept locked.

src/backend/access/nbtree/nbtree.c

+6-6
Original file line numberDiff line numberDiff line change
@@ -1362,17 +1362,17 @@ btvacuumpage(BTVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno)
13621362
if (delete_now)
13631363
{
13641364
MemoryContext oldcontext;
1365-
int ndel;
13661365

13671366
/* Run pagedel in a temp context to avoid memory leakage */
13681367
MemoryContextReset(vstate->pagedelcontext);
13691368
oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext);
13701369

1371-
ndel = _bt_pagedel(rel, buf, &vstate->oldestBtpoXact);
1372-
1373-
/* count only this page, else may double-count parent */
1374-
if (ndel)
1375-
stats->pages_deleted++;
1370+
/*
1371+
* We trust the _bt_pagedel return value because it does not include
1372+
* any page that a future call here from btvacuumscan is expected to
1373+
* count. There will be no double-counting.
1374+
*/
1375+
stats->pages_deleted += _bt_pagedel(rel, buf, &vstate->oldestBtpoXact);
13761376

13771377
MemoryContextSwitchTo(oldcontext);
13781378
/* pagedel released buffer, so we shouldn't */

src/include/access/nbtree.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -1080,8 +1080,8 @@ extern void _bt_delitems_vacuum(Relation rel, Buffer buf,
10801080
extern void _bt_delitems_delete(Relation rel, Buffer buf,
10811081
OffsetNumber *deletable, int ndeletable,
10821082
Relation heapRel);
1083-
extern int _bt_pagedel(Relation rel, Buffer leafbuf,
1084-
TransactionId *oldestBtpoXact);
1083+
extern uint32 _bt_pagedel(Relation rel, Buffer leafbuf,
1084+
TransactionId *oldestBtpoXact);
10851085

10861086
/*
10871087
* prototypes for functions in nbtsearch.c

0 commit comments

Comments
 (0)