From 189694d50a09902566a73a7215a0e8548dd98d7a Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Mon, 17 Mar 2025 16:11:19 -0500 Subject: [PATCH 1/3] vacuumdb: Teach vacuum_one_database() to reuse query results. Presently, each call to vacuum_one_database() performs a catalog query to retrieve the list of tables to process. A proposed follow-up commit would add a "missing only" feature to --analyze-in-stages, which requires us to save the results of the catalog query (since tables without statistics would have them after the first stage). This commit adds this ability via a new parameter for vacuum_one_database() that specifies either a previously-retrieved list to process or a place to return the results of the catalog query. Note that this commit does not make use of this new parameter for anything. That is left for a follow-up commit. Author: Corey Huinker Co-authored-by: Nathan Bossart Reviewed-by: John Naylor Discussion: https://postgr.es/m/Z5O1bpcwDrMgyrYy%40nathan --- src/bin/scripts/vacuumdb.c | 330 ++++++++++++++++++++++--------------- 1 file changed, 194 insertions(+), 136 deletions(-) diff --git a/src/bin/scripts/vacuumdb.c b/src/bin/scripts/vacuumdb.c index 982bf070be6f..e28f82a0ebaf 100644 --- a/src/bin/scripts/vacuumdb.c +++ b/src/bin/scripts/vacuumdb.c @@ -62,10 +62,16 @@ typedef enum static VacObjFilter objfilter = OBJFILTER_NONE; +static SimpleStringList *retrieve_objects(PGconn *conn, + vacuumingOptions *vacopts, + SimpleStringList *objects, + bool echo); + static void vacuum_one_database(ConnParams *cparams, vacuumingOptions *vacopts, int stage, SimpleStringList *objects, + SimpleStringList **found_objs, int concurrentCons, const char *progname, bool echo, bool quiet); @@ -405,7 +411,7 @@ main(int argc, char *argv[]) { vacuum_one_database(&cparams, &vacopts, stage, - &objects, + &objects, NULL, concurrentCons, progname, echo, quiet); } @@ -413,7 +419,7 @@ main(int argc, char *argv[]) else vacuum_one_database(&cparams, &vacopts, ANALYZE_NO_STAGE, - &objects, + &objects, NULL, concurrentCons, progname, echo, quiet); } @@ -461,8 +467,36 @@ escape_quotes(const char *src) /* * vacuum_one_database * - * Process tables in the given database. If the 'objects' list is empty, - * process all tables in the database. + * Process tables in the given database. + * + * There are two ways to specify the list of objects to process: + * + * 1) The "found_objs" parameter is a double pointer to a fully qualified list + * of objects to process, as returned by a previous call to + * vacuum_one_database(). + * + * a) If both "found_objs" (the double pointer) and "*found_objs" (the + * once-dereferenced double pointer) are not NULL, this list takes + * priority, and anything specified in "objects" is ignored. + * + * b) If "found_objs" (the double pointer) is not NULL but "*found_objs" + * (the once-dereferenced double pointer) _is_ NULL, the "objects" + * parameter takes priority, and the results of the catalog query + * described in (2) are stored in "found_objs". + * + * c) If "found_objs" (the double pointer) is NULL, the "objects" + * parameter again takes priority, and the results of the catalog query + * are not saved. + * + * 2) The "objects" parameter is a user-specified list of objects to process. + * When (1b) or (1c) applies, this function performs a catalog query to + * retrieve a fully qualified list of objects to process, as described + * below. + * + * a) If "objects" is not NULL, the catalog query gathers only the objects + * listed in "objects". + * + * b) If "objects" is NULL, all tables in the database are gathered. * * Note that this function is only concerned with running exactly one stage * when in analyze-in-stages mode; caller must iterate on us if necessary. @@ -475,22 +509,18 @@ vacuum_one_database(ConnParams *cparams, vacuumingOptions *vacopts, int stage, SimpleStringList *objects, + SimpleStringList **found_objs, int concurrentCons, const char *progname, bool echo, bool quiet) { PQExpBufferData sql; - PQExpBufferData buf; - PQExpBufferData catalog_query; - PGresult *res; PGconn *conn; SimpleStringListCell *cell; ParallelSlotArray *sa; - SimpleStringList dbtables = {NULL, NULL}; - int i; - int ntups; + int ntups = 0; bool failed = false; - bool objects_listed = false; const char *initcmd; + SimpleStringList *ret = NULL; const char *stage_commands[] = { "SET default_statistics_target=1; SET vacuum_cost_delay=0;", "SET default_statistics_target=10; RESET vacuum_cost_delay;", @@ -599,19 +629,155 @@ vacuum_one_database(ConnParams *cparams, } /* - * Prepare the list of tables to process by querying the catalogs. - * - * Since we execute the constructed query with the default search_path - * (which could be unsafe), everything in this query MUST be fully - * qualified. - * - * First, build a WITH clause for the catalog query if any tables were - * specified, with a set of values made of relation names and their - * optional set of columns. This is used to match any provided column - * lists with the generated qualified identifiers and to filter for the - * tables provided via --table. If a listed table does not exist, the - * catalog query will fail. + * If the caller provided the results of a previous catalog query, just + * use that. Otherwise, run the catalog query ourselves and set the + * return variable if provided. + */ + if (found_objs && *found_objs) + ret = *found_objs; + else + { + ret = retrieve_objects(conn, vacopts, objects, echo); + if (found_objs) + *found_objs = ret; + } + + /* + * Count the number of objects in the catalog query result. If there are + * none, we are done. + */ + for (cell = ret ? ret->head : NULL; cell; cell = cell->next) + ntups++; + + if (ntups == 0) + { + PQfinish(conn); + return; + } + + /* + * Ensure concurrentCons is sane. If there are more connections than + * vacuumable relations, we don't need to use them all. */ + if (concurrentCons > ntups) + concurrentCons = ntups; + if (concurrentCons <= 0) + concurrentCons = 1; + + /* + * All slots need to be prepared to run the appropriate analyze stage, if + * caller requested that mode. We have to prepare the initial connection + * ourselves before setting up the slots. + */ + if (stage == ANALYZE_NO_STAGE) + initcmd = NULL; + else + { + initcmd = stage_commands[stage]; + executeCommand(conn, initcmd, echo); + } + + /* + * Setup the database connections. We reuse the connection we already have + * for the first slot. If not in parallel mode, the first slot in the + * array contains the connection. + */ + sa = ParallelSlotsSetup(concurrentCons, cparams, progname, echo, initcmd); + ParallelSlotsAdoptConn(sa, conn); + + initPQExpBuffer(&sql); + + cell = ret->head; + do + { + const char *tabname = cell->val; + ParallelSlot *free_slot; + + if (CancelRequested) + { + failed = true; + goto finish; + } + + free_slot = ParallelSlotsGetIdle(sa, NULL); + if (!free_slot) + { + failed = true; + goto finish; + } + + prepare_vacuum_command(&sql, PQserverVersion(free_slot->connection), + vacopts, tabname); + + /* + * Execute the vacuum. All errors are handled in processQueryResult + * through ParallelSlotsGetIdle. + */ + ParallelSlotSetHandler(free_slot, TableCommandResultHandler, NULL); + run_vacuum_command(free_slot->connection, sql.data, + echo, tabname); + + cell = cell->next; + } while (cell != NULL); + + if (!ParallelSlotsWaitCompletion(sa)) + { + failed = true; + goto finish; + } + + /* If we used SKIP_DATABASE_STATS, mop up with ONLY_DATABASE_STATS */ + if (vacopts->skip_database_stats && stage == ANALYZE_NO_STAGE) + { + const char *cmd = "VACUUM (ONLY_DATABASE_STATS);"; + ParallelSlot *free_slot = ParallelSlotsGetIdle(sa, NULL); + + if (!free_slot) + { + failed = true; + goto finish; + } + + ParallelSlotSetHandler(free_slot, TableCommandResultHandler, NULL); + run_vacuum_command(free_slot->connection, cmd, echo, NULL); + + if (!ParallelSlotsWaitCompletion(sa)) + failed = true; + } + +finish: + ParallelSlotsTerminate(sa); + pg_free(sa); + + termPQExpBuffer(&sql); + + if (failed) + exit(1); +} + +/* + * Prepare the list of tables to process by querying the catalogs. + * + * Since we execute the constructed query with the default search_path (which + * could be unsafe), everything in this query MUST be fully qualified. + * + * First, build a WITH clause for the catalog query if any tables were + * specified, with a set of values made of relation names and their optional + * set of columns. This is used to match any provided column lists with the + * generated qualified identifiers and to filter for the tables provided via + * --table. If a listed table does not exist, the catalog query will fail. + */ +static SimpleStringList * +retrieve_objects(PGconn *conn, vacuumingOptions *vacopts, + SimpleStringList *objects, bool echo) +{ + PQExpBufferData buf; + PQExpBufferData catalog_query; + PGresult *res; + SimpleStringListCell *cell; + SimpleStringList *found_objs = palloc0(sizeof(SimpleStringList)); + bool objects_listed = false; + initPQExpBuffer(&catalog_query); for (cell = objects ? objects->head : NULL; cell; cell = cell->next) { @@ -765,23 +931,12 @@ vacuum_one_database(ConnParams *cparams, termPQExpBuffer(&catalog_query); PQclear(executeQuery(conn, ALWAYS_SECURE_SEARCH_PATH_SQL, echo)); - /* - * If no rows are returned, there are no matching tables, so we are done. - */ - ntups = PQntuples(res); - if (ntups == 0) - { - PQclear(res); - PQfinish(conn); - return; - } - /* * Build qualified identifiers for each table, including the column list * if given. */ initPQExpBuffer(&buf); - for (i = 0; i < ntups; i++) + for (int i = 0; i < PQntuples(res); i++) { appendPQExpBufferStr(&buf, fmtQualifiedIdEnc(PQgetvalue(res, i, 1), @@ -791,110 +946,13 @@ vacuum_one_database(ConnParams *cparams, if (objects_listed && !PQgetisnull(res, i, 2)) appendPQExpBufferStr(&buf, PQgetvalue(res, i, 2)); - simple_string_list_append(&dbtables, buf.data); + simple_string_list_append(found_objs, buf.data); resetPQExpBuffer(&buf); } termPQExpBuffer(&buf); PQclear(res); - /* - * Ensure concurrentCons is sane. If there are more connections than - * vacuumable relations, we don't need to use them all. - */ - if (concurrentCons > ntups) - concurrentCons = ntups; - if (concurrentCons <= 0) - concurrentCons = 1; - - /* - * All slots need to be prepared to run the appropriate analyze stage, if - * caller requested that mode. We have to prepare the initial connection - * ourselves before setting up the slots. - */ - if (stage == ANALYZE_NO_STAGE) - initcmd = NULL; - else - { - initcmd = stage_commands[stage]; - executeCommand(conn, initcmd, echo); - } - - /* - * Setup the database connections. We reuse the connection we already have - * for the first slot. If not in parallel mode, the first slot in the - * array contains the connection. - */ - sa = ParallelSlotsSetup(concurrentCons, cparams, progname, echo, initcmd); - ParallelSlotsAdoptConn(sa, conn); - - initPQExpBuffer(&sql); - - cell = dbtables.head; - do - { - const char *tabname = cell->val; - ParallelSlot *free_slot; - - if (CancelRequested) - { - failed = true; - goto finish; - } - - free_slot = ParallelSlotsGetIdle(sa, NULL); - if (!free_slot) - { - failed = true; - goto finish; - } - - prepare_vacuum_command(&sql, PQserverVersion(free_slot->connection), - vacopts, tabname); - - /* - * Execute the vacuum. All errors are handled in processQueryResult - * through ParallelSlotsGetIdle. - */ - ParallelSlotSetHandler(free_slot, TableCommandResultHandler, NULL); - run_vacuum_command(free_slot->connection, sql.data, - echo, tabname); - - cell = cell->next; - } while (cell != NULL); - - if (!ParallelSlotsWaitCompletion(sa)) - { - failed = true; - goto finish; - } - - /* If we used SKIP_DATABASE_STATS, mop up with ONLY_DATABASE_STATS */ - if (vacopts->skip_database_stats && stage == ANALYZE_NO_STAGE) - { - const char *cmd = "VACUUM (ONLY_DATABASE_STATS);"; - ParallelSlot *free_slot = ParallelSlotsGetIdle(sa, NULL); - - if (!free_slot) - { - failed = true; - goto finish; - } - - ParallelSlotSetHandler(free_slot, TableCommandResultHandler, NULL); - run_vacuum_command(free_slot->connection, cmd, echo, NULL); - - if (!ParallelSlotsWaitCompletion(sa)) - failed = true; - } - -finish: - ParallelSlotsTerminate(sa); - pg_free(sa); - - termPQExpBuffer(&sql); - - if (failed) - exit(1); + return found_objs; } /* @@ -941,7 +999,7 @@ vacuum_all_databases(ConnParams *cparams, vacuum_one_database(cparams, vacopts, stage, - objects, + objects, NULL, concurrentCons, progname, echo, quiet); } @@ -955,7 +1013,7 @@ vacuum_all_databases(ConnParams *cparams, vacuum_one_database(cparams, vacopts, ANALYZE_NO_STAGE, - objects, + objects, NULL, concurrentCons, progname, echo, quiet); } From 25f1a363b5adf6e4ee65d1ca8d8ade7aee6e0c04 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Tue, 11 Mar 2025 11:32:09 -0500 Subject: [PATCH 2/3] vacuumdb: Add option for analyzing only relations missing stats. This commit adds a new --missing-only option that can be used in conjunction with --analyze-only or --analyze-in-stages. When this option is specified, vacuumdb will generate ANALYZE commands for a relation if it is missing any statistics it should ordinarily have. For example, if a table has statistics for one column but not another, we will analyze the whole table. A similar principle applies to extended statistics, expression indexes, and table inheritance. Author: Corey Huinker Co-authored-by: Nathan Bossart Reviewed-by: John Naylor Discussion: https://postgr.es/m/Z5O1bpcwDrMgyrYy%40nathan --- doc/src/sgml/ref/vacuumdb.sgml | 16 ++++ src/bin/scripts/t/102_vacuumdb_stages.pl | 60 +++++++++++++ src/bin/scripts/vacuumdb.c | 109 ++++++++++++++++++++++- src/test/perl/PostgreSQL/Test/Cluster.pm | 27 ++++++ 4 files changed, 210 insertions(+), 2 deletions(-) diff --git a/doc/src/sgml/ref/vacuumdb.sgml b/doc/src/sgml/ref/vacuumdb.sgml index 66fccb30a2d2..91369bf1ffe6 100644 --- a/doc/src/sgml/ref/vacuumdb.sgml +++ b/doc/src/sgml/ref/vacuumdb.sgml @@ -277,6 +277,22 @@ PostgreSQL documentation + + + + + Only analyze relations that are missing statistics for a column, index + expression, or extended statistics object. This option prevents + vacuumdb from deleting existing statistics + so that the query optimizer's choices do not become transiently worse. + + + This option can only be used in conjunction with + or . + + + + diff --git a/src/bin/scripts/t/102_vacuumdb_stages.pl b/src/bin/scripts/t/102_vacuumdb_stages.pl index 984c8d06de60..b216fb0c2c68 100644 --- a/src/bin/scripts/t/102_vacuumdb_stages.pl +++ b/src/bin/scripts/t/102_vacuumdb_stages.pl @@ -21,6 +21,66 @@ .*statement:\ ANALYZE/sx, 'analyze three times'); +$node->safe_psql('postgres', + 'CREATE TABLE regression_vacuumdb_test AS select generate_series(1, 10) a, generate_series(2, 11) b;'); +$node->issues_sql_like( + [ 'vacuumdb', '--analyze-in-stages', '--missing-only', '-t', 'regression_vacuumdb_test', 'postgres' ], + qr/statement:\ ANALYZE/sx, + '--missing-only with missing stats'); +$node->issues_sql_unlike( + [ 'vacuumdb', '--analyze-in-stages', '--missing-only', '-t', 'regression_vacuumdb_test', 'postgres' ], + qr/statement:\ ANALYZE/sx, + '--missing-only with no missing stats'); + +$node->safe_psql('postgres', + 'CREATE INDEX regression_vacuumdb_test_idx ON regression_vacuumdb_test (mod(a, 2));'); +$node->issues_sql_like( + [ 'vacuumdb', '--analyze-in-stages', '--missing-only', '-t', 'regression_vacuumdb_test', 'postgres' ], + qr/statement:\ ANALYZE/sx, + '--missing-only with missing index expression stats'); +$node->issues_sql_unlike( + [ 'vacuumdb', '--analyze-in-stages', '--missing-only', '-t', 'regression_vacuumdb_test', 'postgres' ], + qr/statement:\ ANALYZE/sx, + '--missing-only with no missing index expression stats'); + +$node->safe_psql('postgres', + 'CREATE STATISTICS regression_vacuumdb_test_stat ON a, b FROM regression_vacuumdb_test;'); +$node->issues_sql_like( + [ 'vacuumdb', '--analyze-in-stages', '--missing-only', '-t', 'regression_vacuumdb_test', 'postgres' ], + qr/statement:\ ANALYZE/sx, + '--missing-only with missing extended stats'); +$node->issues_sql_unlike( + [ 'vacuumdb', '--analyze-in-stages', '--missing-only', '-t', 'regression_vacuumdb_test', 'postgres' ], + qr/statement:\ ANALYZE/sx, + '--missing-only with no missing extended stats'); + +$node->safe_psql('postgres', + "CREATE TABLE regression_vacuumdb_child (a INT) INHERITS (regression_vacuumdb_test);\n" + . "INSERT INTO regression_vacuumdb_child VALUES (1, 2);\n" + . "ANALYZE regression_vacuumdb_child;\n"); +$node->issues_sql_like( + [ 'vacuumdb', '--analyze-in-stages', '--missing-only', '-t', 'regression_vacuumdb_test', 'postgres' ], + qr/statement:\ ANALYZE/sx, + '--missing-only with missing inherited stats'); +$node->issues_sql_unlike( + [ 'vacuumdb', '--analyze-in-stages', '--missing-only', '-t', 'regression_vacuumdb_test', 'postgres' ], + qr/statement:\ ANALYZE/sx, + '--missing-only with no missing inherited stats'); + +$node->safe_psql('postgres', + "CREATE TABLE regression_vacuumdb_parted (a INT) PARTITION BY LIST (a);\n" + . "CREATE TABLE regression_vacuumdb_part1 PARTITION OF regression_vacuumdb_parted FOR VALUES IN (1);\n" + . "INSERT INTO regression_vacuumdb_parted VALUES (1);\n" + . "ANALYZE regression_vacuumdb_part1;\n"); +$node->issues_sql_like( + [ 'vacuumdb', '--analyze-in-stages', '--missing-only', '-t', 'regression_vacuumdb_parted', 'postgres' ], + qr/statement:\ ANALYZE/sx, + '--missing-only with missing partition stats'); +$node->issues_sql_unlike( + [ 'vacuumdb', '--analyze-in-stages', '--missing-only', '-t', 'regression_vacuumdb_parted', 'postgres' ], + qr/statement:\ ANALYZE/sx, + '--missing-only with no missing partition stats'); + $node->issues_sql_like( [ 'vacuumdb', '--analyze-in-stages', '--all' ], qr/statement:\ SET\ default_statistics_target=1;\ SET\ vacuum_cost_delay=0; diff --git a/src/bin/scripts/vacuumdb.c b/src/bin/scripts/vacuumdb.c index e28f82a0ebaf..b7c7025cf702 100644 --- a/src/bin/scripts/vacuumdb.c +++ b/src/bin/scripts/vacuumdb.c @@ -47,6 +47,7 @@ typedef struct vacuumingOptions bool process_toast; bool skip_database_stats; char *buffer_usage_limit; + bool missing_only; } vacuumingOptions; /* object filter options */ @@ -134,6 +135,7 @@ main(int argc, char *argv[]) {"no-process-toast", no_argument, NULL, 11}, {"no-process-main", no_argument, NULL, 12}, {"buffer-usage-limit", required_argument, NULL, 13}, + {"missing-only", no_argument, NULL, 14}, {NULL, 0, NULL, 0} }; @@ -281,6 +283,9 @@ main(int argc, char *argv[]) case 13: vacopts.buffer_usage_limit = escape_quotes(optarg); break; + case 14: + vacopts.missing_only = true; + break; default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); @@ -366,6 +371,11 @@ main(int argc, char *argv[]) pg_fatal("cannot use the \"%s\" option with the \"%s\" option", "buffer-usage-limit", "full"); + /* Prohibit --missing-only without --analyze-only or --analyze-in-stages */ + if (vacopts.missing_only && !vacopts.analyze_only) + pg_fatal("cannot use the \"%s\" option without \"%s\" or \"%s\"", + "missing-only", "analyze-only", "analyze-in-stages"); + /* fill cparams except for dbname, which is set below */ cparams.pghost = host; cparams.pgport = port; @@ -406,12 +416,14 @@ main(int argc, char *argv[]) if (analyze_in_stages) { int stage; + SimpleStringList *found_objs = NULL; for (stage = 0; stage < ANALYZE_NUM_STAGES; stage++) { vacuum_one_database(&cparams, &vacopts, stage, - &objects, NULL, + &objects, + vacopts.missing_only ? &found_objs : NULL, concurrentCons, progname, echo, quiet); } @@ -614,6 +626,13 @@ vacuum_one_database(ConnParams *cparams, "--buffer-usage-limit", "16"); } + if (vacopts->missing_only && PQserverVersion(conn) < 150000) + { + PQfinish(conn); + pg_fatal("cannot use the \"%s\" option on server versions older than PostgreSQL %s", + "--missing-only", "15"); + } + /* skip_database_stats is used automatically if server supports it */ vacopts->skip_database_stats = (PQserverVersion(conn) >= 160000); @@ -838,6 +857,9 @@ retrieve_objects(PGconn *conn, vacuumingOptions *vacopts, " FROM pg_catalog.pg_class c\n" " JOIN pg_catalog.pg_namespace ns" " ON c.relnamespace OPERATOR(pg_catalog.=) ns.oid\n" + " CROSS JOIN LATERAL (SELECT c.relkind IN (" + CppAsString2(RELKIND_PARTITIONED_TABLE) ", " + CppAsString2(RELKIND_PARTITIONED_INDEX) ")) as p (inherited)\n" " LEFT JOIN pg_catalog.pg_class t" " ON c.reltoastrelid OPERATOR(pg_catalog.=) t.oid\n"); @@ -921,6 +943,82 @@ retrieve_objects(PGconn *conn, vacuumingOptions *vacopts, vacopts->min_mxid_age); } + if (vacopts->missing_only) + { + appendPQExpBufferStr(&catalog_query, " AND (\n"); + + /* regular stats */ + appendPQExpBufferStr(&catalog_query, + " EXISTS (SELECT NULL FROM pg_catalog.pg_attribute a\n" + " WHERE a.attrelid OPERATOR(pg_catalog.=) c.oid\n" + " AND c.reltuples OPERATOR(pg_catalog.!=) 0::pg_catalog.float4\n" + " AND a.attnum OPERATOR(pg_catalog.>) 0::pg_catalog.int2\n" + " AND NOT a.attisdropped\n" + " AND a.attstattarget IS DISTINCT FROM 0::pg_catalog.int2\n" + " AND NOT EXISTS (SELECT NULL FROM pg_catalog.pg_statistic s\n" + " WHERE s.starelid OPERATOR(pg_catalog.=) a.attrelid\n" + " AND s.staattnum OPERATOR(pg_catalog.=) a.attnum\n" + " AND s.stainherit OPERATOR(pg_catalog.=) p.inherited))\n"); + + /* extended stats */ + appendPQExpBufferStr(&catalog_query, + " OR EXISTS (SELECT NULL FROM pg_catalog.pg_statistic_ext e\n" + " WHERE e.stxrelid OPERATOR(pg_catalog.=) c.oid\n" + " AND c.reltuples OPERATOR(pg_catalog.!=) 0::pg_catalog.float4\n" + " AND e.stxstattarget IS DISTINCT FROM 0::pg_catalog.int2\n" + " AND NOT EXISTS (SELECT NULL FROM pg_catalog.pg_statistic_ext_data d\n" + " WHERE d.stxoid OPERATOR(pg_catalog.=) e.oid\n" + " AND d.stxdinherit OPERATOR(pg_catalog.=) p.inherited))\n"); + + /* expression indexes */ + appendPQExpBufferStr(&catalog_query, + " OR EXISTS (SELECT NULL FROM pg_catalog.pg_attribute a\n" + " JOIN pg_index i ON i.indexrelid OPERATOR(pg_catalog.=) a.attrelid\n" + " WHERE i.indrelid OPERATOR(pg_catalog.=) c.oid\n" + " AND c.reltuples OPERATOR(pg_catalog.!=) 0::pg_catalog.float4\n" + " AND i.indkey[a.attnum OPERATOR(pg_catalog.-) 1::int2] OPERATOR(pg_catalog.=) 0::int2\n" + " AND a.attnum OPERATOR(pg_catalog.>) 0::pg_catalog.int2\n" + " AND NOT a.attisdropped\n" + " AND a.attstattarget IS DISTINCT FROM 0::pg_catalog.int2\n" + " AND NOT EXISTS (SELECT NULL FROM pg_catalog.pg_statistic s\n" + " WHERE s.starelid OPERATOR(pg_catalog.=) a.attrelid\n" + " AND s.staattnum OPERATOR(pg_catalog.=) a.attnum\n" + " AND s.stainherit OPERATOR(pg_catalog.=) p.inherited))\n"); + + /* table inheritance and regular stats */ + appendPQExpBufferStr(&catalog_query, + " OR EXISTS (SELECT NULL FROM pg_catalog.pg_attribute a\n" + " WHERE a.attrelid OPERATOR(pg_catalog.=) c.oid\n" + " AND c.reltuples OPERATOR(pg_catalog.!=) 0::pg_catalog.float4\n" + " AND a.attnum OPERATOR(pg_catalog.>) 0::pg_catalog.int2\n" + " AND NOT a.attisdropped\n" + " AND a.attstattarget IS DISTINCT FROM 0::pg_catalog.int2\n" + " AND c.relhassubclass\n" + " AND NOT p.inherited\n" + " AND EXISTS (SELECT NULL FROM pg_catalog.pg_inherits h\n" + " WHERE h.inhparent OPERATOR(pg_catalog.=) c.oid)\n" + " AND NOT EXISTS (SELECT NULL FROM pg_catalog.pg_statistic s\n" + " WHERE s.starelid OPERATOR(pg_catalog.=) a.attrelid\n" + " AND s.staattnum OPERATOR(pg_catalog.=) a.attnum\n" + " AND s.stainherit))\n"); + + /* table inheritance and extended stats */ + appendPQExpBufferStr(&catalog_query, + " OR EXISTS (SELECT NULL FROM pg_catalog.pg_statistic_ext e\n" + " WHERE e.stxrelid OPERATOR(pg_catalog.=) c.oid\n" + " AND c.reltuples OPERATOR(pg_catalog.!=) 0::pg_catalog.float4\n" + " AND e.stxstattarget IS DISTINCT FROM 0::pg_catalog.int2\n" + " AND c.relhassubclass\n" + " AND NOT p.inherited\n" + " AND EXISTS (SELECT NULL FROM pg_catalog.pg_inherits h\n" + " WHERE h.inhparent OPERATOR(pg_catalog.=) c.oid)\n" + " AND NOT EXISTS (SELECT NULL FROM pg_catalog.pg_statistic_ext_data d\n" + " WHERE d.stxoid OPERATOR(pg_catalog.=) e.oid\n" + " AND d.stxdinherit))\n"); + + appendPQExpBufferStr(&catalog_query, " )\n"); + } + /* * Execute the catalog query. We use the default search_path for this * query for consistency with table lookups done elsewhere by the user. @@ -983,6 +1081,11 @@ vacuum_all_databases(ConnParams *cparams, if (analyze_in_stages) { + SimpleStringList **found_objs = NULL; + + if (vacopts->missing_only) + found_objs = palloc0(PQntuples(result) * sizeof(SimpleStringList *)); + /* * When analyzing all databases in stages, we analyze them all in the * fastest stage first, so that initial statistics become available @@ -999,7 +1102,8 @@ vacuum_all_databases(ConnParams *cparams, vacuum_one_database(cparams, vacopts, stage, - objects, NULL, + objects, + vacopts->missing_only ? &found_objs[i] : NULL, concurrentCons, progname, echo, quiet); } @@ -1239,6 +1343,7 @@ help(const char *progname) printf(_(" -j, --jobs=NUM use this many concurrent connections to vacuum\n")); printf(_(" --min-mxid-age=MXID_AGE minimum multixact ID age of tables to vacuum\n")); printf(_(" --min-xid-age=XID_AGE minimum transaction ID age of tables to vacuum\n")); + printf(_(" --missing-only only analyze relations with missing statistics\n")); printf(_(" --no-index-cleanup don't remove index entries that point to dead tuples\n")); printf(_(" --no-process-main skip the main relation\n")); printf(_(" --no-process-toast skip the TOAST table associated with the table to vacuum\n")); diff --git a/src/test/perl/PostgreSQL/Test/Cluster.pm b/src/test/perl/PostgreSQL/Test/Cluster.pm index bab3f3d2dbef..05bd94609d4f 100644 --- a/src/test/perl/PostgreSQL/Test/Cluster.pm +++ b/src/test/perl/PostgreSQL/Test/Cluster.pm @@ -2849,6 +2849,33 @@ sub issues_sql_like =pod +=item $node->issues_sql_unlike(cmd, unexpected_sql, test_name) + +Run a command on the node, then verify that $unexpected_sql does not appear in +the server log file. + +=cut + +sub issues_sql_unlike +{ + local $Test::Builder::Level = $Test::Builder::Level + 1; + + my ($self, $cmd, $unexpected_sql, $test_name) = @_; + + local %ENV = $self->_get_env(); + + my $log_location = -s $self->logfile; + + my $result = PostgreSQL::Test::Utils::run_log($cmd); + ok($result, "@$cmd exit code 0"); + my $log = + PostgreSQL::Test::Utils::slurp_file($self->logfile, $log_location); + unlike($log, $unexpected_sql, "$test_name: SQL not found in server log"); + return; +} + +=pod + =item $node->log_content() Returns the contents of log of the node From 1eddc1e88802330d3c0ac6ea8e89dc534ca0ca93 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Mon, 17 Mar 2025 20:07:43 -0500 Subject: [PATCH 3/3] Update guidance for running vacuumdb after pg_upgrade. Now that pg_upgrade can carry over most optimizer statistics, we should recommend using vacuumdb's new --missing-only option to only analyze relations that are missing statistics. Reviewed-by: John Naylor Discussion: https://postgr.es/m/Z5O1bpcwDrMgyrYy%40nathan --- doc/src/sgml/ref/pgupgrade.sgml | 9 +++++---- src/bin/pg_upgrade/check.c | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/doc/src/sgml/ref/pgupgrade.sgml b/doc/src/sgml/ref/pgupgrade.sgml index 9ef7a84eed04..8e3b01f963d9 100644 --- a/doc/src/sgml/ref/pgupgrade.sgml +++ b/doc/src/sgml/ref/pgupgrade.sgml @@ -807,10 +807,11 @@ psql --username=postgres --file=script.sql postgres - Using vacuumdb --all --analyze-only can efficiently - generate such statistics, and the use of - can speed it up. Option - can be used to generate minimal statistics quickly. + Using vacuumdb --all --analyze-only --missing-only can + efficiently generate such statistics. Alternatively, + vacuumdb --all --analyze-in-stages --missing-only + can be used to generate minimal statistics quickly. For either command, + the use of can speed it up. If vacuum_cost_delay is set to a non-zero value, this can be overridden to speed up statistics generation using PGOPTIONS, e.g., PGOPTIONS='-c diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c index d32fc3d88ec7..9f0f219e5471 100644 --- a/src/bin/pg_upgrade/check.c +++ b/src/bin/pg_upgrade/check.c @@ -781,7 +781,7 @@ output_completion_banner(char *deletion_script_file_name) pg_log(PG_REPORT, "Some optimizer statistics may not have been transferred by pg_upgrade.\n" "Once you start the new server, consider running:\n" - " %s/vacuumdb %s--all --analyze-in-stages", new_cluster.bindir, user_specification.data); + " %s/vacuumdb %s--all --analyze-in-stages --missing-only", new_cluster.bindir, user_specification.data); if (deletion_script_file_name) pg_log(PG_REPORT,