Skip to content

Commit 7c91a03

Browse files
committed
Sync up our various ways of estimating pg_class.reltuples.
VACUUM thought that reltuples represents the total number of tuples in the relation, while ANALYZE counted only live tuples. This can cause "flapping" in the value when background vacuums and analyzes happen separately. The planner's use of reltuples essentially assumes that it's the count of live (visible) tuples, so let's standardize on having it mean live tuples. Another issue is that the definition of "live tuple" isn't totally clear; what should be done with INSERT_IN_PROGRESS or DELETE_IN_PROGRESS tuples? ANALYZE's choices in this regard are made on the assumption that if the originating transaction commits at all, it will happen after ANALYZE finishes, so we should ignore the effects of the in-progress transaction --- unless it is our own transaction, and then we should count it. Let's propagate this definition into VACUUM, too. Likewise propagate this definition into CREATE INDEX, and into contrib/pgstattuple's pgstattuple_approx() function. Tomas Vondra, reviewed by Haribabu Kommi, some corrections by me Discussion: https://postgr.es/m/[email protected]
1 parent cc415a5 commit 7c91a03

File tree

5 files changed

+132
-52
lines changed

5 files changed

+132
-52
lines changed

contrib/pgstattuple/pgstatapprox.c

+21-15
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,6 @@ statapprox_heap(Relation rel, output_type *stat)
6868
Buffer vmbuffer = InvalidBuffer;
6969
BufferAccessStrategy bstrategy;
7070
TransactionId OldestXmin;
71-
uint64 misc_count = 0;
7271

7372
OldestXmin = GetOldestXmin(rel, PROCARRAY_FLAGS_VACUUM);
7473
bstrategy = GetAccessStrategy(BAS_BULKREAD);
@@ -114,14 +113,15 @@ statapprox_heap(Relation rel, output_type *stat)
114113
else
115114
stat->free_space += BLCKSZ - SizeOfPageHeaderData;
116115

116+
/* We may count the page as scanned even if it's new/empty */
117+
scanned++;
118+
117119
if (PageIsNew(page) || PageIsEmpty(page))
118120
{
119121
UnlockReleaseBuffer(buf);
120122
continue;
121123
}
122124

123-
scanned++;
124-
125125
/*
126126
* Look at each tuple on the page and decide whether it's live or
127127
* dead, then count it and its size. Unlike lazy_scan_heap, we can
@@ -153,25 +153,23 @@ statapprox_heap(Relation rel, output_type *stat)
153153
tuple.t_tableOid = RelationGetRelid(rel);
154154

155155
/*
156-
* We count live and dead tuples, but we also need to add up
157-
* others in order to feed vac_estimate_reltuples.
156+
* We follow VACUUM's lead in counting INSERT_IN_PROGRESS tuples
157+
* as "dead" while DELETE_IN_PROGRESS tuples are "live". We don't
158+
* bother distinguishing tuples inserted/deleted by our own
159+
* transaction.
158160
*/
159161
switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
160162
{
161-
case HEAPTUPLE_RECENTLY_DEAD:
162-
misc_count++;
163-
/* Fall through */
164-
case HEAPTUPLE_DEAD:
165-
stat->dead_tuple_len += tuple.t_len;
166-
stat->dead_tuple_count++;
167-
break;
168163
case HEAPTUPLE_LIVE:
164+
case HEAPTUPLE_DELETE_IN_PROGRESS:
169165
stat->tuple_len += tuple.t_len;
170166
stat->tuple_count++;
171167
break;
168+
case HEAPTUPLE_DEAD:
169+
case HEAPTUPLE_RECENTLY_DEAD:
172170
case HEAPTUPLE_INSERT_IN_PROGRESS:
173-
case HEAPTUPLE_DELETE_IN_PROGRESS:
174-
misc_count++;
171+
stat->dead_tuple_len += tuple.t_len;
172+
stat->dead_tuple_count++;
175173
break;
176174
default:
177175
elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
@@ -184,8 +182,16 @@ statapprox_heap(Relation rel, output_type *stat)
184182

185183
stat->table_len = (uint64) nblocks * BLCKSZ;
186184

185+
/*
186+
* We don't know how many tuples are in the pages we didn't scan, so
187+
* extrapolate the live-tuple count to the whole table in the same way
188+
* that VACUUM does. (Like VACUUM, we're not taking a random sample, so
189+
* just extrapolating linearly seems unsafe.) There should be no dead
190+
* tuples in all-visible pages, so no correction is needed for that, and
191+
* we already accounted for the space in those pages, too.
192+
*/
187193
stat->tuple_count = vac_estimate_reltuples(rel, nblocks, scanned,
188-
stat->tuple_count + misc_count);
194+
stat->tuple_count);
189195

190196
/*
191197
* Calculate percentages if the relation has one or more pages.

doc/src/sgml/catalogs.sgml

+2-2
Original file line numberDiff line numberDiff line change
@@ -1739,8 +1739,8 @@ SCRAM-SHA-256$<replaceable>&lt;iteration count&gt;</replaceable>:<replaceable>&l
17391739
<entry><type>float4</type></entry>
17401740
<entry></entry>
17411741
<entry>
1742-
Number of rows in the table. This is only an estimate used by the
1743-
planner. It is updated by <command>VACUUM</command>,
1742+
Number of live rows in the table. This is only an estimate used by
1743+
the planner. It is updated by <command>VACUUM</command>,
17441744
<command>ANALYZE</command>, and a few DDL commands such as
17451745
<command>CREATE INDEX</command>.
17461746
</entry>

src/backend/catalog/index.c

+40-12
Original file line numberDiff line numberDiff line change
@@ -2366,12 +2366,12 @@ index_build(Relation heapRelation,
23662366
* things to add it to the new index. After we return, the AM's index
23672367
* build procedure does whatever cleanup it needs.
23682368
*
2369-
* The total count of heap tuples is returned. This is for updating pg_class
2370-
* statistics. (It's annoying not to be able to do that here, but we want
2371-
* to merge that update with others; see index_update_stats.) Note that the
2372-
* index AM itself must keep track of the number of index tuples; we don't do
2373-
* so here because the AM might reject some of the tuples for its own reasons,
2374-
* such as being unable to store NULLs.
2369+
* The total count of live heap tuples is returned. This is for updating
2370+
* pg_class statistics. (It's annoying not to be able to do that here, but we
2371+
* want to merge that update with others; see index_update_stats.) Note that
2372+
* the index AM itself must keep track of the number of index tuples; we don't
2373+
* do so here because the AM might reject some of the tuples for its own
2374+
* reasons, such as being unable to store NULLs.
23752375
*
23762376
* A side effect is to set indexInfo->ii_BrokenHotChain to true if we detect
23772377
* any potentially broken HOT chains. Currently, we set this if there are
@@ -2402,8 +2402,8 @@ IndexBuildHeapScan(Relation heapRelation,
24022402
* to scan cannot be done when requesting syncscan.
24032403
*
24042404
* When "anyvisible" mode is requested, all tuples visible to any transaction
2405-
* are considered, including those inserted or deleted by transactions that are
2406-
* still in progress.
2405+
* are indexed and counted as live, including those inserted or deleted by
2406+
* transactions that are still in progress.
24072407
*/
24082408
double
24092409
IndexBuildHeapRangeScan(Relation heapRelation,
@@ -2599,6 +2599,12 @@ IndexBuildHeapRangeScan(Relation heapRelation,
25992599
*/
26002600
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
26012601

2602+
/*
2603+
* The criteria for counting a tuple as live in this block need to
2604+
* match what analyze.c's acquire_sample_rows() does, otherwise
2605+
* CREATE INDEX and ANALYZE may produce wildly different reltuples
2606+
* values, e.g. when there are many recently-dead tuples.
2607+
*/
26022608
switch (HeapTupleSatisfiesVacuum(heapTuple, OldestXmin,
26032609
scan->rs_cbuf))
26042610
{
@@ -2611,6 +2617,8 @@ IndexBuildHeapRangeScan(Relation heapRelation,
26112617
/* Normal case, index and unique-check it */
26122618
indexIt = true;
26132619
tupleIsAlive = true;
2620+
/* Count it as live, too */
2621+
reltuples += 1;
26142622
break;
26152623
case HEAPTUPLE_RECENTLY_DEAD:
26162624

@@ -2624,6 +2632,9 @@ IndexBuildHeapRangeScan(Relation heapRelation,
26242632
* the live tuple at the end of the HOT-chain. Since this
26252633
* breaks semantics for pre-existing snapshots, mark the
26262634
* index as unusable for them.
2635+
*
2636+
* We don't count recently-dead tuples in reltuples, even
2637+
* if we index them; see acquire_sample_rows().
26272638
*/
26282639
if (HeapTupleIsHotUpdated(heapTuple))
26292640
{
@@ -2646,6 +2657,7 @@ IndexBuildHeapRangeScan(Relation heapRelation,
26462657
{
26472658
indexIt = true;
26482659
tupleIsAlive = true;
2660+
reltuples += 1;
26492661
break;
26502662
}
26512663

@@ -2683,6 +2695,15 @@ IndexBuildHeapRangeScan(Relation heapRelation,
26832695
goto recheck;
26842696
}
26852697
}
2698+
else
2699+
{
2700+
/*
2701+
* For consistency with acquire_sample_rows(), count
2702+
* HEAPTUPLE_INSERT_IN_PROGRESS tuples as live only
2703+
* when inserted by our own transaction.
2704+
*/
2705+
reltuples += 1;
2706+
}
26862707

26872708
/*
26882709
* We must index such tuples, since if the index build
@@ -2702,6 +2723,7 @@ IndexBuildHeapRangeScan(Relation heapRelation,
27022723
{
27032724
indexIt = true;
27042725
tupleIsAlive = false;
2726+
reltuples += 1;
27052727
break;
27062728
}
27072729

@@ -2745,6 +2767,14 @@ IndexBuildHeapRangeScan(Relation heapRelation,
27452767
* the same as a RECENTLY_DEAD tuple.
27462768
*/
27472769
indexIt = true;
2770+
2771+
/*
2772+
* Count HEAPTUPLE_DELETE_IN_PROGRESS tuples as live,
2773+
* if they were not deleted by the current
2774+
* transaction. That's what acquire_sample_rows()
2775+
* does, and we want the behavior to be consistent.
2776+
*/
2777+
reltuples += 1;
27482778
}
27492779
else if (HeapTupleIsHotUpdated(heapTuple))
27502780
{
@@ -2762,8 +2792,8 @@ IndexBuildHeapRangeScan(Relation heapRelation,
27622792
{
27632793
/*
27642794
* It's a regular tuple deleted by our own xact. Index
2765-
* it but don't check for uniqueness, the same as a
2766-
* RECENTLY_DEAD tuple.
2795+
* it, but don't check for uniqueness nor count in
2796+
* reltuples, the same as a RECENTLY_DEAD tuple.
27672797
*/
27682798
indexIt = true;
27692799
}
@@ -2787,8 +2817,6 @@ IndexBuildHeapRangeScan(Relation heapRelation,
27872817
tupleIsAlive = true;
27882818
}
27892819

2790-
reltuples += 1;
2791-
27922820
MemoryContextReset(econtext->ecxt_per_tuple_memory);
27932821

27942822
/* Set up for predicate or expression evaluation */

src/backend/commands/vacuum.c

+6
Original file line numberDiff line numberDiff line change
@@ -771,6 +771,9 @@ vacuum_set_xid_limits(Relation rel,
771771
* subset of the table. When we have only partial information, we take
772772
* the old value of pg_class.reltuples as a measurement of the
773773
* tuple density in the unscanned pages.
774+
*
775+
* Note: scanned_tuples should count only *live* tuples, since
776+
* pg_class.reltuples is defined that way.
774777
*/
775778
double
776779
vac_estimate_reltuples(Relation relation,
@@ -852,6 +855,9 @@ vac_estimate_reltuples(Relation relation,
852855
* transaction. This is OK since postponing the flag maintenance is
853856
* always allowable.
854857
*
858+
* Note: num_tuples should count only *live* tuples, since
859+
* pg_class.reltuples is defined that way.
860+
*
855861
* This routine is shared by VACUUM and ANALYZE.
856862
*/
857863
void

0 commit comments

Comments
 (0)