Skip to content

Commit 764242f

Browse files
yugo-nCommitfest Bot
authored andcommitted
Allow to collect statistics on virtual generated columns
During ANALYZE, generation expressions are expanded, and statistics are computed using compute_expr_stats(). To support this, both compute_expr_stats() and AnlExprData are now exported from extended_stats.c. To enable the optimizer to make use of these statistics, a new field named virtual_gencols is added to RelOptInfo. This field holds the expressions of virtual generated columns in the table. In examine_variable(), if an expression in a WHERE clause matches a virtual generated column, the corresponding statistics are used for that expression.
1 parent 5487058 commit 764242f

File tree

9 files changed

+166
-28
lines changed

9 files changed

+166
-28
lines changed

doc/src/sgml/ref/alter_table.sgml

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -210,8 +210,6 @@ WITH ( MODULUS <replaceable class="parameter">numeric_literal</replaceable>, REM
210210
When this form is used, the column's statistics are removed,
211211
so running <link linkend="sql-analyze"><command>ANALYZE</command></link>
212212
on the table afterwards is recommended.
213-
For a virtual generated column, <command>ANALYZE</command>
214-
is not necessary because such columns never have statistics.
215213
</para>
216214
</listitem>
217215
</varlistentry>
@@ -275,12 +273,9 @@ WITH ( MODULUS <replaceable class="parameter">numeric_literal</replaceable>, REM
275273
</para>
276274

277275
<para>
278-
When this form is used on a stored generated column, its statistics
279-
are removed, so running
280-
<link linkend="sql-analyze"><command>ANALYZE</command></link>
276+
When this form is used, the column's statistics are removed,
277+
so running <link linkend="sql-analyze"><command>ANALYZE</command></link>
281278
on the table afterwards is recommended.
282-
For a virtual generated column, <command>ANALYZE</command>
283-
is not necessary because such columns never have statistics.
284279
</para>
285280
</listitem>
286281
</varlistentry>

src/backend/commands/analyze.c

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
#include "parser/parse_oper.h"
4141
#include "parser/parse_relation.h"
4242
#include "pgstat.h"
43+
#include "rewrite/rewriteHandler.h"
4344
#include "statistics/extended_stats_internal.h"
4445
#include "statistics/statistics.h"
4546
#include "storage/bufmgr.h"
@@ -558,13 +559,28 @@ do_analyze_rel(Relation onerel, const VacuumParams params,
558559
{
559560
VacAttrStats *stats = vacattrstats[i];
560561
AttributeOpts *aopt;
562+
Form_pg_attribute attr = TupleDescAttr(onerel->rd_att, stats->tupattnum - 1);
561563

562-
stats->rows = rows;
563-
stats->tupDesc = onerel->rd_att;
564-
stats->compute_stats(stats,
565-
std_fetch_func,
566-
numrows,
567-
totalrows);
564+
/*
565+
* For a virtual generated column, compute statistics for the expression value.
566+
*/
567+
if (attr->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL)
568+
{
569+
AnlExprData *exprdata = (AnlExprData *) palloc0(sizeof(AnlExprData));
570+
571+
exprdata->expr = build_generation_expression(onerel, stats->tupattnum);
572+
exprdata->vacattrstat = stats;
573+
compute_expr_stats(onerel, exprdata, 1, rows, numrows);
574+
}
575+
else
576+
{
577+
stats->rows = rows;
578+
stats->tupDesc = onerel->rd_att;
579+
stats->compute_stats(stats,
580+
std_fetch_func,
581+
numrows,
582+
totalrows);
583+
}
568584

569585
/*
570586
* If the appropriate flavor of the n_distinct option is
@@ -1048,10 +1064,6 @@ examine_attribute(Relation onerel, int attnum, Node *index_expr)
10481064
if (attr->attisdropped)
10491065
return NULL;
10501066

1051-
/* Don't analyze virtual generated columns */
1052-
if (attr->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL)
1053-
return NULL;
1054-
10551067
/*
10561068
* Get attstattarget value. Set to -1 if null. (Analyze functions expect
10571069
* -1 to mean use default_statistics_target; see for example

src/backend/optimizer/util/plancat.c

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#include "parser/parse_relation.h"
4343
#include "parser/parsetree.h"
4444
#include "partitioning/partdesc.h"
45+
#include "rewrite/rewriteHandler.h"
4546
#include "rewrite/rewriteManip.h"
4647
#include "statistics/statistics.h"
4748
#include "storage/bufmgr.h"
@@ -77,6 +78,7 @@ static List *get_relation_constraints(PlannerInfo *root,
7778
bool include_partition);
7879
static List *build_index_tlist(PlannerInfo *root, IndexOptInfo *index,
7980
Relation heapRelation);
81+
static List *get_relation_virtual_gencols(RelOptInfo *rel, Relation relation);
8082
static List *get_relation_statistics(RelOptInfo *rel, Relation relation);
8183
static void set_relation_partition_info(PlannerInfo *root, RelOptInfo *rel,
8284
Relation relation);
@@ -508,6 +510,9 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
508510

509511
rel->indexlist = indexinfos;
510512

513+
/* Make list of virtual generated columns */
514+
rel->virtual_gencols = get_relation_virtual_gencols(rel, relation);
515+
511516
rel->statlist = get_relation_statistics(rel, relation);
512517

513518
/* Grab foreign-table info using the relcache, while we have it */
@@ -1482,6 +1487,61 @@ get_relation_constraints(PlannerInfo *root,
14821487
return result;
14831488
}
14841489

1490+
/*
1491+
* get_relation_virtual_gencols
1492+
* Retrieve virtual generated columns defined on the table.
1493+
*
1494+
* Returns a List (possibly empty) of VirtualGeneratedColumnInfoInfo objects
1495+
* containing the generation expressions. Each one has been processed by
1496+
* eval_const_expressions(), and its Vars are changed to have the varno
1497+
* indicated by rel->relid. This allows the expressions to be easily
1498+
* compared to expressions taken from WHERE.
1499+
*/
1500+
static List *get_relation_virtual_gencols(RelOptInfo *rel, Relation relation)
1501+
{
1502+
TupleDesc tupdesc = RelationGetDescr(relation);
1503+
Index varno = rel->relid;
1504+
List *virtual_gencols = NIL;
1505+
1506+
if (tupdesc->constr && tupdesc->constr->has_generated_virtual)
1507+
{
1508+
for (int i = 0; i < tupdesc->natts; i++)
1509+
{
1510+
Form_pg_attribute attr = TupleDescAttr(tupdesc, i);
1511+
1512+
if (attr->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL)
1513+
{
1514+
VirtualGeneratedColumnInfo *info;
1515+
1516+
info = makeNode(VirtualGeneratedColumnInfo);
1517+
info->attno = attr->attnum;
1518+
info->expr = build_generation_expression(relation, attr->attnum);
1519+
1520+
/*
1521+
* Run the expressions through eval_const_expressions. This is
1522+
* not just an optimization, but is necessary, because the
1523+
* planner will be comparing them to similarly-processed qual
1524+
* clauses, and may fail to detect valid matches without this.
1525+
* We must not use canonicalize_qual, however, since these
1526+
* aren't qual expressions.
1527+
*/
1528+
info->expr = eval_const_expressions(NULL, info->expr);
1529+
1530+
/* May as well fix opfuncids too */
1531+
fix_opfuncids(info->expr);
1532+
1533+
/* Fix Vars to have the desired varno */
1534+
if (varno != 1)
1535+
ChangeVarNodes((Node *) info->expr, 1, varno, 0);
1536+
1537+
virtual_gencols = lappend(virtual_gencols, info);
1538+
}
1539+
}
1540+
}
1541+
1542+
return virtual_gencols;
1543+
}
1544+
14851545
/*
14861546
* Try loading data for the statistics object.
14871547
*

src/backend/statistics/extended_stats.c

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -82,15 +82,6 @@ static void statext_store(Oid statOid, bool inh,
8282
static int statext_compute_stattarget(int stattarget,
8383
int nattrs, VacAttrStats **stats);
8484

85-
/* Information needed to analyze a single simple expression. */
86-
typedef struct AnlExprData
87-
{
88-
Node *expr; /* expression to analyze */
89-
VacAttrStats *vacattrstat; /* statistics attrs to analyze */
90-
} AnlExprData;
91-
92-
static void compute_expr_stats(Relation onerel, AnlExprData *exprdata,
93-
int nexprs, HeapTuple *rows, int numrows);
9485
static Datum serialize_expr_stats(AnlExprData *exprdata, int nexprs);
9586
static Datum expr_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
9687
static AnlExprData *build_expr_data(List *exprs, int stattarget);
@@ -2083,7 +2074,7 @@ examine_opclause_args(List *args, Node **exprp, Const **cstp,
20832074
/*
20842075
* Compute statistics about expressions of a relation.
20852076
*/
2086-
static void
2077+
void
20872078
compute_expr_stats(Relation onerel, AnlExprData *exprdata, int nexprs,
20882079
HeapTuple *rows, int numrows)
20892080
{

src/backend/utils/adt/selfuncs.c

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5408,6 +5408,7 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
54085408
*/
54095409
ListCell *ilist;
54105410
ListCell *slist;
5411+
ListCell *vlist;
54115412

54125413
/*
54135414
* The nullingrels bits within the expression could prevent us from
@@ -5527,6 +5528,46 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
55275528
break;
55285529
}
55295530

5531+
/*
5532+
* Search virtual generated columns for one with a matching expression
5533+
* and use the statistics collected for it if we have.
5534+
*/
5535+
foreach(vlist, onerel->virtual_gencols)
5536+
{
5537+
VirtualGeneratedColumnInfo *info = (VirtualGeneratedColumnInfo *) lfirst(vlist);
5538+
Node *expr = info->expr;
5539+
5540+
/*
5541+
* Stop once we've found statistics for the expression (either
5542+
* for a virtual generated columns or an index in the preceding
5543+
* loop).
5544+
*/
5545+
if (vardata->statsTuple)
5546+
break;
5547+
5548+
/* strip RelabelType before comparing it */
5549+
if (expr && IsA(expr, RelabelType))
5550+
expr = (Node *) ((RelabelType *) expr)->arg;
5551+
5552+
if (equal(node, expr))
5553+
{
5554+
Var *var = makeVar(onerel->relid,
5555+
info->attno,
5556+
vardata->atttype,
5557+
vardata->atttypmod,
5558+
exprCollation(node),
5559+
0);
5560+
/*
5561+
* There cannot be a unique constraint on a virtual generated column.
5562+
* Other fields other than the stats tuple must be already set.
5563+
*/
5564+
vardata->isunique = false;
5565+
5566+
/* Try to locate some stats */
5567+
examine_simple_variable(root, var, vardata);
5568+
}
5569+
}
5570+
55305571
/*
55315572
* Search extended statistics for one with a matching expression.
55325573
* There might be multiple ones, so just grab the first one. In the
@@ -5542,7 +5583,8 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
55425583

55435584
/*
55445585
* Stop once we've found statistics for the expression (either
5545-
* from extended stats, or for an index in the preceding loop).
5586+
* from extended stats, or for an index or a virtual generated
5587+
* column in the preceding loop).
55465588
*/
55475589
if (vardata->statsTuple)
55485590
break;

src/include/nodes/pathnodes.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -982,6 +982,8 @@ typedef struct RelOptInfo
982982
List *indexlist;
983983
/* list of StatisticExtInfo */
984984
List *statlist;
985+
/* list of VirtualGeneratedColumnInfo */
986+
List *virtual_gencols;
985987
/* size estimates derived from pg_class */
986988
BlockNumber pages;
987989
Cardinality tuples;
@@ -1355,6 +1357,23 @@ typedef struct StatisticExtInfo
13551357
List *exprs;
13561358
} StatisticExtInfo;
13571359

1360+
/*
1361+
* VirtualGeneratedColumnInfo
1362+
* Information about virtual generated columns for planning/optimization
1363+
*/
1364+
typedef struct VirtualGeneratedColumnInfo
1365+
{
1366+
pg_node_attr(no_copy_equal, no_read, no_query_jumble)
1367+
1368+
NodeTag type;
1369+
1370+
/* attribute number of virtual generated column */
1371+
AttrNumber attno;
1372+
1373+
/* generation expression */
1374+
Node *expr;
1375+
} VirtualGeneratedColumnInfo;
1376+
13581377
/*
13591378
* JoinDomains
13601379
*

src/include/statistics/extended_stats_internal.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,12 @@ typedef struct StatsBuildData
6868
bool **nulls;
6969
} StatsBuildData;
7070

71+
/* Information needed to analyze a single simple expression. */
72+
typedef struct AnlExprData
73+
{
74+
Node *expr; /* expression to analyze */
75+
VacAttrStats *vacattrstat; /* statistics attrs to analyze */
76+
} AnlExprData;
7177

7278
extern MVNDistinct *statext_ndistinct_build(double totalrows, StatsBuildData *data);
7379
extern bytea *statext_ndistinct_serialize(MVNDistinct *ndistinct);
@@ -127,4 +133,7 @@ extern Selectivity mcv_clause_selectivity_or(PlannerInfo *root,
127133
Selectivity *overlap_basesel,
128134
Selectivity *totalsel);
129135

136+
extern void
137+
compute_expr_stats(Relation onerel, AnlExprData *exprdata, int nexprs,
138+
HeapTuple *rows, int numrows);
130139
#endif /* EXTENDED_STATS_INTERNAL_H */

src/test/regress/expected/generated_virtual.out

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1493,6 +1493,13 @@ create table gtest32 (
14931493
);
14941494
insert into gtest32 values (1), (2);
14951495
analyze gtest32;
1496+
-- Ensure that statistics on virtual generated column are available
1497+
select count(*) from pg_stats where tablename = 'gtest32';
1498+
count
1499+
-------
1500+
5
1501+
(1 row)
1502+
14961503
-- Ensure that nullingrel bits are propagated into the generation expressions
14971504
explain (costs off)
14981505
select sum(t2.b) over (partition by t2.a),

src/test/regress/sql/generated_virtual.sql

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -817,6 +817,9 @@ create table gtest32 (
817817
insert into gtest32 values (1), (2);
818818
analyze gtest32;
819819

820+
-- Ensure that statistics on virtual generated column are available
821+
select count(*) from pg_stats where tablename = 'gtest32';
822+
820823
-- Ensure that nullingrel bits are propagated into the generation expressions
821824
explain (costs off)
822825
select sum(t2.b) over (partition by t2.a),

0 commit comments

Comments
 (0)