diff options
author | Tomas Vondra | 2017-10-18 20:22:32 +0000 |
---|---|---|
committer | Tomas Vondra | 2017-10-19 16:00:04 +0000 |
commit | b89e46911d4ee4f61f856d5fddea290e8e511714 (patch) | |
tree | 0c52b994802c98cdba1f2fd7afd395c4ae3e3dbc | |
parent | 1d3fd00e4de01ac34fae9f93febc92b6ba156648 (diff) |
Collect index statistics during ANALYZE on coordinator
ANALYZE was not collecting index statistics, which may have negative
impact for example on selectivity estimates for expressions. This also
fixes some incorrect plan changes in updatable_views regression test.
Discussion: <[email protected]>
-rw-r--r-- | src/backend/commands/analyze.c | 133 |
1 files changed, 103 insertions, 30 deletions
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index ca48fca486..832d99a065 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -115,7 +115,8 @@ static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull); #ifdef XCP static void analyze_rel_coordinator(Relation onerel, bool inh, int attr_cnt, - VacAttrStats **vacattrstats); + VacAttrStats **vacattrstats, int nindexes, + Relation *indexes, AnlIndexData *indexdata); #endif /* @@ -424,30 +425,6 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params, attr_cnt = tcnt; } -#ifdef XCP - if (IS_PGXC_COORDINATOR && onerel->rd_locator_info) - { - /* - * Fetch relation statistics from remote nodes and update - */ - vacuum_rel_coordinator(onerel, in_outer_xact); - - /* - * Fetch attribute statistics from remote nodes. - */ - analyze_rel_coordinator(onerel, inh, attr_cnt, vacattrstats); - - /* - * Skip acquiring local stats. Coordinator does not store data of - * distributed tables. - */ - nindexes = 0; - hasindex = false; - Irel = NULL; - goto cleanup; - } -#endif - /* * Open all indexes of the relation, and see if there are any analyzable * columns in the indexes. We do not analyze index columns if there was @@ -505,6 +482,28 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params, } } +#ifdef XCP + if (IS_PGXC_COORDINATOR && onerel->rd_locator_info) + { + /* + * Fetch relation statistics from remote nodes and update + */ + vacuum_rel_coordinator(onerel, in_outer_xact); + + /* + * Fetch attribute statistics from remote nodes. + */ + analyze_rel_coordinator(onerel, inh, attr_cnt, vacattrstats, + nindexes, Irel, indexdata); + + /* + * Skip acquiring local stats. Coordinator does not store data of + * distributed tables. + */ + goto cleanup; + } +#endif + /* * Determine how many rows we need to sample, using the worst case from * all analyzable columns. We use a lower bound of 100 rows to avoid @@ -2944,9 +2943,24 @@ compare_mcvs(const void *a, const void *b) #ifdef XCP +/* + * coord_collect_simple_stats + * Collect simple stats for a relation (pg_statistic contents). + * + * Collects statistics from the datanodes, and then keeps the one of the + * received statistics for each attribute (the first one we receive, but + * it's mostly random). + * + * XXX We do not try to build statistics covering data fro all the nodes, + * either by collecting fresh sample of rows or merging the statistics + * somehow. The current approach is very simple and cheap, but may have + * negative impact on estimate accuracy as the stats only covers data + * from a single node, and we may end up with stats from different node + * for each attribute. + */ static void -analyze_rel_coordinator(Relation onerel, bool inh, int attr_cnt, - VacAttrStats **vacattrstats) +coord_collect_simple_stats(Relation onerel, bool inh, int attr_cnt, + VacAttrStats **vacattrstats) { char *nspname; char *relname; @@ -2960,14 +2974,11 @@ analyze_rel_coordinator(Relation onerel, bool inh, int attr_cnt, int i; /* Number of data nodes from which attribute statistics are received. */ int *numnodes; - List *stat_oids; /* Get the relation identifier */ relname = RelationGetRelationName(onerel); nspname = get_namespace_name(RelationGetNamespace(onerel)); - elog(LOG, "Getting detailed statistics for %s.%s", nspname, relname); - /* Make up query string */ initStringInfo(&query); /* Generic statistic fields */ @@ -3367,6 +3378,38 @@ analyze_rel_coordinator(Relation onerel, bool inh, int attr_cnt, } } update_attstats(RelationGetRelid(onerel), inh, attr_cnt, vacattrstats); +} + +/* + * coord_collect_extended_stats + * Collect extended stats for a relation (pg_statistic_ext contents). + * + * Collects statistics from the datanodes, and then keeps the one of the + * received statistics for each attribute (the first one we receive, but + * it's mostly random). + * + * XXX This has similar issues as coord_collect_simple_stats. + */ +static void +coord_collect_extended_stats(Relation onerel, int attr_cnt) +{ + char *nspname; + char *relname; + /* Fields to run query to read statistics from data nodes */ + StringInfoData query; + EState *estate; + MemoryContext oldcontext; + RemoteQuery *step; + RemoteQueryState *node; + TupleTableSlot *result; + int i; + /* Number of data nodes from which attribute statistics are received. */ + int *numnodes; + List *stat_oids; + + /* Get the relation identifier */ + relname = RelationGetRelationName(onerel); + nspname = get_namespace_name(RelationGetNamespace(onerel)); /* * Build extended statistics on the coordinator. @@ -3521,4 +3564,34 @@ analyze_rel_coordinator(Relation onerel, bool inh, int attr_cnt, } ExecEndRemoteQuery(node); } + +/* + * analyze_rel_coordinator + * Collect all statistics for a particular relation. + * + * We collect three types of statistics for each table: + * + * - simple statistics (pg_statistic) + * - extended statistics (pg_statistic_ext) + * - index statistics (including expression indexes) + */ +static void +analyze_rel_coordinator(Relation onerel, bool inh, int attr_cnt, + VacAttrStats **vacattrstats, int nindexes, + Relation *indexes, AnlIndexData *indexdata) +{ + int i; + + /* simple statistics (pg_statistic) for the relation */ + coord_collect_simple_stats(onerel, inh, attr_cnt, vacattrstats); + + /* simple statistics (pg_statistic) for all indexes */ + for (i = 0; i < nindexes; i++) + coord_collect_simple_stats(indexes[i], false, + indexdata[i].attr_cnt, + indexdata[i].vacattrstats); + + /* extended statistics (pg_statistic) for the relation */ + coord_collect_extended_stats(onerel, attr_cnt); +} #endif |