Skip to content

Commit 0827e8a

Browse files
committed
autovacuum: handle analyze for partitioned tables
Previously, autovacuum would completely ignore partitioned tables, which is not good regarding analyze -- failing to analyze those tables means poor plans may be chosen. Make autovacuum aware of those tables by propagating "changes since analyze" counts from the leaf partitions up the partitioning hierarchy. This also introduces necessary reloptions support for partitioned tables (autovacuum_enabled, autovacuum_analyze_scale_factor, autovacuum_analyze_threshold). It's unclear how best to document this aspect. Author: Yuzuko Hosoya <[email protected]> Reviewed-by: Kyotaro Horiguchi <[email protected]> Reviewed-by: Tomas Vondra <[email protected]> Reviewed-by: Álvaro Herrera <[email protected]> Discussion: https://postgr.es/m/CAKkQ508_PwVgwJyBY=0Lmkz90j8CmWNPUxgHvCUwGhMrouz6UA@mail.gmail.com
1 parent b3ee4c5 commit 0827e8a

File tree

7 files changed

+257
-44
lines changed

7 files changed

+257
-44
lines changed

src/backend/access/common/reloptions.c

+7-8
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ static relopt_bool boolRelOpts[] =
108108
{
109109
"autovacuum_enabled",
110110
"Enables autovacuum in this relation",
111-
RELOPT_KIND_HEAP | RELOPT_KIND_TOAST,
111+
RELOPT_KIND_HEAP | RELOPT_KIND_TOAST | RELOPT_KIND_PARTITIONED,
112112
ShareUpdateExclusiveLock
113113
},
114114
true
@@ -246,7 +246,7 @@ static relopt_int intRelOpts[] =
246246
{
247247
"autovacuum_analyze_threshold",
248248
"Minimum number of tuple inserts, updates or deletes prior to analyze",
249-
RELOPT_KIND_HEAP,
249+
RELOPT_KIND_HEAP | RELOPT_KIND_PARTITIONED,
250250
ShareUpdateExclusiveLock
251251
},
252252
-1, 0, INT_MAX
@@ -420,7 +420,7 @@ static relopt_real realRelOpts[] =
420420
{
421421
"autovacuum_analyze_scale_factor",
422422
"Number of tuple inserts, updates or deletes prior to analyze as a fraction of reltuples",
423-
RELOPT_KIND_HEAP,
423+
RELOPT_KIND_HEAP | RELOPT_KIND_PARTITIONED,
424424
ShareUpdateExclusiveLock
425425
},
426426
-1, 0.0, 100.0
@@ -1962,12 +1962,11 @@ bytea *
19621962
partitioned_table_reloptions(Datum reloptions, bool validate)
19631963
{
19641964
/*
1965-
* There are no options for partitioned tables yet, but this is able to do
1966-
* some validation.
1965+
* autovacuum_enabled, autovacuum_analyze_threshold and
1966+
* autovacuum_analyze_scale_factor are supported for partitioned tables.
19671967
*/
1968-
return (bytea *) build_reloptions(reloptions, validate,
1969-
RELOPT_KIND_PARTITIONED,
1970-
0, NULL, 0);
1968+
1969+
return default_reloptions(reloptions, validate, RELOPT_KIND_PARTITIONED);
19711970
}
19721971

19731972
/*

src/backend/catalog/system_views.sql

+2-2
Original file line numberDiff line numberDiff line change
@@ -660,7 +660,7 @@ CREATE VIEW pg_stat_all_tables AS
660660
FROM pg_class C LEFT JOIN
661661
pg_index I ON C.oid = I.indrelid
662662
LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace)
663-
WHERE C.relkind IN ('r', 't', 'm')
663+
WHERE C.relkind IN ('r', 't', 'm', 'p')
664664
GROUP BY C.oid, N.nspname, C.relname;
665665

666666
CREATE VIEW pg_stat_xact_all_tables AS
@@ -680,7 +680,7 @@ CREATE VIEW pg_stat_xact_all_tables AS
680680
FROM pg_class C LEFT JOIN
681681
pg_index I ON C.oid = I.indrelid
682682
LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace)
683-
WHERE C.relkind IN ('r', 't', 'm')
683+
WHERE C.relkind IN ('r', 't', 'm', 'p')
684684
GROUP BY C.oid, N.nspname, C.relname;
685685

686686
CREATE VIEW pg_stat_sys_tables AS

src/backend/commands/analyze.c

+28-12
Original file line numberDiff line numberDiff line change
@@ -612,8 +612,8 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
612612
PROGRESS_ANALYZE_PHASE_FINALIZE_ANALYZE);
613613

614614
/*
615-
* Update pages/tuples stats in pg_class, and report ANALYZE to the stats
616-
* collector ... but not if we're doing inherited stats.
615+
* Update pages/tuples stats in pg_class ... but not if we're doing
616+
* inherited stats.
617617
*
618618
* We assume that VACUUM hasn't set pg_class.reltuples already, even
619619
* during a VACUUM ANALYZE. Although VACUUM often updates pg_class,
@@ -655,19 +655,35 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
655655
InvalidMultiXactId,
656656
in_outer_xact);
657657
}
658+
}
658659

659-
/*
660-
* Now report ANALYZE to the stats collector.
661-
*
662-
* We deliberately don't report to the stats collector when doing
663-
* inherited stats, because the stats collector only tracks per-table
664-
* stats.
665-
*
666-
* Reset the changes_since_analyze counter only if we analyzed all
667-
* columns; otherwise, there is still work for auto-analyze to do.
668-
*/
660+
/*
661+
* Now report ANALYZE to the stats collector. For regular tables, we do
662+
* it only if not doing inherited stats. For partitioned tables, we only
663+
* do it for inherited stats. (We're never called for not-inherited stats
664+
* on partitioned tables anyway.)
665+
*
666+
* Reset the changes_since_analyze counter only if we analyzed all
667+
* columns; otherwise, there is still work for auto-analyze to do.
668+
*/
669+
if (!inh || onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
669670
pgstat_report_analyze(onerel, totalrows, totaldeadrows,
670671
(va_cols == NIL));
672+
673+
/*
674+
* If this is a manual analyze of all columns of a permanent leaf
675+
* partition, and not doing inherited stats, also let the collector know
676+
* about the ancestor tables of this partition. Autovacuum does the
677+
* equivalent of this at the start of its run, so there's no reason to do
678+
* it there.
679+
*/
680+
if (!inh && !IsAutoVacuumWorkerProcess() &&
681+
(va_cols == NIL) &&
682+
onerel->rd_rel->relispartition &&
683+
onerel->rd_rel->relkind == RELKIND_RELATION &&
684+
onerel->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT)
685+
{
686+
pgstat_report_anl_ancestors(RelationGetRelid(onerel));
671687
}
672688

673689
/*

src/backend/postmaster/autovacuum.c

+95-10
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@
7575
#include "catalog/dependency.h"
7676
#include "catalog/namespace.h"
7777
#include "catalog/pg_database.h"
78+
#include "catalog/pg_inherits.h"
7879
#include "commands/dbcommands.h"
7980
#include "commands/vacuum.h"
8081
#include "lib/ilist.h"
@@ -1969,6 +1970,7 @@ do_autovacuum(void)
19691970
int effective_multixact_freeze_max_age;
19701971
bool did_vacuum = false;
19711972
bool found_concurrent_worker = false;
1973+
bool updated = false;
19721974
int i;
19731975

19741976
/*
@@ -2054,12 +2056,19 @@ do_autovacuum(void)
20542056
/*
20552057
* Scan pg_class to determine which tables to vacuum.
20562058
*
2057-
* We do this in two passes: on the first one we collect the list of plain
2058-
* relations and materialized views, and on the second one we collect
2059-
* TOAST tables. The reason for doing the second pass is that during it we
2060-
* want to use the main relation's pg_class.reloptions entry if the TOAST
2061-
* table does not have any, and we cannot obtain it unless we know
2062-
* beforehand what's the main table OID.
2059+
* We do this in three passes: First we let pgstat collector know about
2060+
* the partitioned table ancestors of all partitions that have recently
2061+
* acquired rows for analyze. This informs the second pass about the
2062+
* total number of tuple count in partitioning hierarchies.
2063+
*
2064+
* On the second pass, we collect the list of plain relations,
2065+
* materialized views and partitioned tables. On the third one we collect
2066+
* TOAST tables.
2067+
*
2068+
* The reason for doing the third pass is that during it we want to use
2069+
* the main relation's pg_class.reloptions entry if the TOAST table does
2070+
* not have any, and we cannot obtain it unless we know beforehand what's
2071+
* the main table OID.
20632072
*
20642073
* We need to check TOAST tables separately because in cases with short,
20652074
* wide tables there might be proportionally much more activity in the
@@ -2068,7 +2077,44 @@ do_autovacuum(void)
20682077
relScan = table_beginscan_catalog(classRel, 0, NULL);
20692078

20702079
/*
2071-
* On the first pass, we collect main tables to vacuum, and also the main
2080+
* First pass: before collecting the list of tables to vacuum, let stat
2081+
* collector know about partitioned-table ancestors of each partition.
2082+
*/
2083+
while ((tuple = heap_getnext(relScan, ForwardScanDirection)) != NULL)
2084+
{
2085+
Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
2086+
Oid relid = classForm->oid;
2087+
PgStat_StatTabEntry *tabentry;
2088+
2089+
/* Only consider permanent leaf partitions */
2090+
if (!classForm->relispartition ||
2091+
classForm->relkind == RELKIND_PARTITIONED_TABLE ||
2092+
classForm->relpersistence == RELPERSISTENCE_TEMP)
2093+
continue;
2094+
2095+
/*
2096+
* No need to do this for partitions that haven't acquired any rows.
2097+
*/
2098+
tabentry = pgstat_fetch_stat_tabentry(relid);
2099+
if (tabentry &&
2100+
tabentry->changes_since_analyze -
2101+
tabentry->changes_since_analyze_reported > 0)
2102+
{
2103+
pgstat_report_anl_ancestors(relid);
2104+
updated = true;
2105+
}
2106+
}
2107+
2108+
/* Acquire fresh stats for the next passes, if needed */
2109+
if (updated)
2110+
{
2111+
autovac_refresh_stats();
2112+
dbentry = pgstat_fetch_stat_dbentry(MyDatabaseId);
2113+
shared = pgstat_fetch_stat_dbentry(InvalidOid);
2114+
}
2115+
2116+
/*
2117+
* On the second pass, we collect main tables to vacuum, and also the main
20722118
* table relid to TOAST relid mapping.
20732119
*/
20742120
while ((tuple = heap_getnext(relScan, ForwardScanDirection)) != NULL)
@@ -2082,7 +2128,8 @@ do_autovacuum(void)
20822128
bool wraparound;
20832129

20842130
if (classForm->relkind != RELKIND_RELATION &&
2085-
classForm->relkind != RELKIND_MATVIEW)
2131+
classForm->relkind != RELKIND_MATVIEW &&
2132+
classForm->relkind != RELKIND_PARTITIONED_TABLE)
20862133
continue;
20872134

20882135
relid = classForm->oid;
@@ -2157,7 +2204,7 @@ do_autovacuum(void)
21572204

21582205
table_endscan(relScan);
21592206

2160-
/* second pass: check TOAST tables */
2207+
/* third pass: check TOAST tables */
21612208
ScanKeyInit(&key,
21622209
Anum_pg_class_relkind,
21632210
BTEqualStrategyNumber, F_CHAREQ,
@@ -2745,6 +2792,7 @@ extract_autovac_opts(HeapTuple tup, TupleDesc pg_class_desc)
27452792

27462793
Assert(((Form_pg_class) GETSTRUCT(tup))->relkind == RELKIND_RELATION ||
27472794
((Form_pg_class) GETSTRUCT(tup))->relkind == RELKIND_MATVIEW ||
2795+
((Form_pg_class) GETSTRUCT(tup))->relkind == RELKIND_PARTITIONED_TABLE ||
27482796
((Form_pg_class) GETSTRUCT(tup))->relkind == RELKIND_TOASTVALUE);
27492797

27502798
relopts = extractRelOptions(tup, pg_class_desc, NULL);
@@ -3161,7 +3209,44 @@ relation_needs_vacanalyze(Oid relid,
31613209
*/
31623210
if (PointerIsValid(tabentry) && AutoVacuumingActive())
31633211
{
3164-
reltuples = classForm->reltuples;
3212+
if (classForm->relkind != RELKIND_PARTITIONED_TABLE)
3213+
{
3214+
reltuples = classForm->reltuples;
3215+
}
3216+
else
3217+
{
3218+
/*
3219+
* If the relation is a partitioned table, we must add up
3220+
* children's reltuples.
3221+
*/
3222+
List *children;
3223+
ListCell *lc;
3224+
3225+
reltuples = 0;
3226+
3227+
/* Find all members of inheritance set taking AccessShareLock */
3228+
children = find_all_inheritors(relid, AccessShareLock, NULL);
3229+
3230+
foreach(lc, children)
3231+
{
3232+
Oid childOID = lfirst_oid(lc);
3233+
HeapTuple childtuple;
3234+
Form_pg_class childclass;
3235+
3236+
childtuple = SearchSysCache1(RELOID, ObjectIdGetDatum(childOID));
3237+
childclass = (Form_pg_class) GETSTRUCT(childtuple);
3238+
3239+
/* Skip a partitioned table and foreign partitions */
3240+
if (RELKIND_HAS_STORAGE(childclass->relkind))
3241+
{
3242+
/* Sum up the child's reltuples for its parent table */
3243+
reltuples += childclass->reltuples;
3244+
}
3245+
ReleaseSysCache(childtuple);
3246+
}
3247+
3248+
list_free(children);
3249+
}
31653250
vactuples = tabentry->n_dead_tuples;
31663251
instuples = tabentry->inserts_since_vacuum;
31673252
anltuples = tabentry->changes_since_analyze;

0 commit comments

Comments
 (0)