From 11319906728a4455e167626f9f3ba5dded1ffc35 Mon Sep 17 00:00:00 2001 From: Alena0704 Date: Tue, 14 Dec 2021 14:15:10 +0300 Subject: [PATCH 001/134] add query_hash as getting query_id, test for joint use pg_stat_statements and aqo, function for ignoring string with Query Identifier in aqo_gucs and aqo_fdw tests --- Makefile | 8 ++-- t/002_pg_stat_statements_aqo.pl | 66 +++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 4 deletions(-) create mode 100644 t/002_pg_stat_statements_aqo.pl diff --git a/Makefile b/Makefile index aedca207..2edb59be 100755 --- a/Makefile +++ b/Makefile @@ -26,9 +26,10 @@ REGRESS = aqo_disabled \ top_queries fdw_srcdir = $(top_srcdir)/contrib/postgres_fdw -PG_CPPFLAGS += -I$(libpq_srcdir) -I$(fdw_srcdir) +stat_srcdir = $(top_srcdir)/contrib/pg_stat_statements +PG_CPPFLAGS += -I$(libpq_srcdir) -I$(fdw_srcdir) -I$(stat_srcdir) EXTRA_REGRESS_OPTS=--temp-config=$(top_srcdir)/$(subdir)/conf.add -EXTRA_INSTALL = contrib/postgres_fdw +EXTRA_INSTALL = contrib/postgres_fdw contrib/pg_stat_statements DATA = aqo--1.0.sql aqo--1.0--1.1.sql aqo--1.1--1.2.sql aqo--1.2.sql \ aqo--1.2--1.3.sql @@ -42,5 +43,4 @@ subdir = contrib/aqo top_builddir = ../.. include $(top_builddir)/src/Makefile.global include $(top_srcdir)/contrib/contrib-global.mk -endif - +endif \ No newline at end of file diff --git a/t/002_pg_stat_statements_aqo.pl b/t/002_pg_stat_statements_aqo.pl new file mode 100644 index 00000000..5b6feac5 --- /dev/null +++ b/t/002_pg_stat_statements_aqo.pl @@ -0,0 +1,66 @@ +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More tests => 3; +print "start"; +my $node = PostgreSQL::Test::Cluster->new('profiling'); +$node->init; +print "create conf"; + +$node->append_conf('postgresql.conf', qq{ + aqo.mode = 'disabled' + aqo.profile_classes = -1 + aqo.profile_enable = 'true' + aqo.force_collect_stat = 'false' + aqo.log_ignorance = 'off' + log_statement = 'ddl' # reduce size of logs. + }); +# Test constants. +my $TRANSACTIONS = 100; +my $CLIENTS = 10; +my $THREADS = 10; +my $query_id; + +# General purpose variables. +my $res; +my $total_classes; +$node->start(); + # ERROR: AQO allow to load library only on startup +print "create extantion aqo"; +$node->psql('postgres', "CREATE EXTENSION aqo"); +$node->psql('postgres', "CREATE EXTENSION pg_stat_statements"); +print "create preload libraries"; +$node->append_conf('postgresql.conf', qq{shared_preload_libraries = 'aqo, pg_stat_statements'}); +$node->restart(); +$node->psql('postgres', "CREATE EXTENSION aqo"); +$node->psql('postgres', "CREATE EXTENSION pg_stat_statements"); +$node->psql('postgres', " + ALTER SYSTEM SET aqo.profile_enable = 'true'; + SELECT pg_reload_conf(); +"); + +$node->psql('postgres', "CREATE TABLE aqo_test0(a int, b int, c int, d int); +WITH RECURSIVE t(a, b, c, d) +AS ( + VALUES (0, 0, 0, 0) + UNION ALL + SELECT t.a + 1, t.b + 1, t.c + 1, t.d + 1 FROM t WHERE t.a < 2000 +) INSERT INTO aqo_test0 (SELECT * FROM t); +CREATE INDEX aqo_test0_idx_a ON aqo_test0 (a); +ANALYZE aqo_test0;"); +$node->psql('postgres', " + ALTER SYSTEM SET aqo.mode = 'controlled'; +"); +$res = $node->safe_psql('postgres', "SELECT * FROM aqo_test0"); +$res = $node->safe_psql('postgres', "SELECT count(*) FROM pg_stat_statements where query = 'SELECT * FROM aqo_test0'"); +is($res, 1); # The same query add in pg_stat_statements +$res = $node->safe_psql('postgres', "SELECT count(*) from aqo_query_texts where query_text = 'SELECT * FROM aqo_test0'"); +is($res, 0); # The same query isn't add in aqo_query_texts +$query_id = $node->safe_psql('postgres', "SELECT queryid FROM pg_stat_statements where query = 'SELECT * FROM aqo_test0'"); +$res = $node->safe_psql('postgres', "insert into aqo_queries values ($query_id,'f','f',$query_id,'f')"); +# Add query in aqo_query_texts +$res = $node->safe_psql('postgres', "insert into aqo_query_texts values ($query_id,'SELECT * FROM aqo_test0')"); +$res = $node->safe_psql('postgres', "SELECT count(*) from aqo_query_texts where query_text = 'SELECT * FROM aqo_test0'"); # The same query is in aqo_query_texts +is($res, 1); +$node->stop(); \ No newline at end of file From 48e68abdf7e90c947c8cb9f20e5b25d3e27b4ec3 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Thu, 23 Dec 2021 17:38:50 +0500 Subject: [PATCH 002/134] Arrange current AQO master with stable version of PG14. --- aqo_master.patch => aqo_pg14.patch | 86 +++++++++++++++--------------- path_utils.c | 4 +- t/001_pgbench.pl | 7 ++- t/002_pg_stat_statements_aqo.pl | 9 ++-- 4 files changed, 53 insertions(+), 53 deletions(-) rename aqo_master.patch => aqo_pg14.patch (91%) diff --git a/aqo_master.patch b/aqo_pg14.patch similarity index 91% rename from aqo_master.patch rename to aqo_pg14.patch index bd166b18..a4a158eb 100644 --- a/aqo_master.patch +++ b/aqo_pg14.patch @@ -11,7 +11,7 @@ index f27e458482..0c62191904 100644 auto_explain \ bloom \ diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c -index 10644dfac4..16d9e1e915 100644 +index 69821c4631..f1fd5f93c5 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -24,6 +24,7 @@ @@ -57,10 +57,10 @@ index 10644dfac4..16d9e1e915 100644 if (es->format == EXPLAIN_FORMAT_TEXT) appendStringInfoChar(es->str, '\n'); diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c -index 228387eaee..f8de8090f3 100644 +index 5769536c6a..8aae9d5039 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c -@@ -136,6 +136,7 @@ CopyPlanFields(const Plan *from, Plan *newnode) +@@ -129,6 +129,7 @@ CopyPlanFields(const Plan *from, Plan *newnode) COPY_NODE_FIELD(initPlan); COPY_BITMAPSET_FIELD(extParam); COPY_BITMAPSET_FIELD(allParam); @@ -69,10 +69,10 @@ index 228387eaee..f8de8090f3 100644 /* diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c -index 2e5ed77e18..b6cbf11f8f 100644 +index da212d9ddf..78dc137df8 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c -@@ -349,6 +349,7 @@ _outPlanInfo(StringInfo str, const Plan *node) +@@ -342,6 +342,7 @@ _outPlanInfo(StringInfo str, const Plan *node) WRITE_NODE_FIELD(initPlan); WRITE_BITMAPSET_FIELD(extParam); WRITE_BITMAPSET_FIELD(allParam); @@ -81,10 +81,10 @@ index 2e5ed77e18..b6cbf11f8f 100644 /* diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c -index abf08b7a2f..d21a662f9c 100644 +index 4c537c30e0..40b1ce29de 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c -@@ -1629,6 +1629,11 @@ ReadCommonPlan(Plan *local_node) +@@ -1628,6 +1628,11 @@ ReadCommonPlan(Plan *local_node) READ_NODE_FIELD(initPlan); READ_BITMAPSET_FIELD(extParam); READ_BITMAPSET_FIELD(allParam); @@ -97,7 +97,7 @@ index abf08b7a2f..d21a662f9c 100644 /* diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c -index 1e4d404f02..caa00f3716 100644 +index 30c8595f76..18699564b3 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -98,6 +98,11 @@ @@ -120,7 +120,7 @@ index 1e4d404f02..caa00f3716 100644 /* -@@ -4906,6 +4910,58 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) +@@ -4905,6 +4909,58 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) } @@ -179,7 +179,7 @@ index 1e4d404f02..caa00f3716 100644 /* * set_baserel_size_estimates * Set the size estimates for the given base relation. -@@ -4922,19 +4978,10 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) +@@ -4921,19 +4977,10 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) void set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) { @@ -200,7 +200,7 @@ index 1e4d404f02..caa00f3716 100644 cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); -@@ -4945,13 +4992,33 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) +@@ -4944,13 +4991,33 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) * get_parameterized_baserel_size * Make a size estimate for a parameterized scan of a base relation. * @@ -236,7 +236,7 @@ index 1e4d404f02..caa00f3716 100644 { List *allclauses; double nrows; -@@ -4980,6 +5047,36 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, +@@ -4979,6 +5046,36 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, * set_joinrel_size_estimates * Set the size estimates for the given join relation. * @@ -273,7 +273,7 @@ index 1e4d404f02..caa00f3716 100644 * The rel's targetlist must have been constructed already, and a * restriction clause list that matches the given component rels must * be provided. -@@ -4999,11 +5096,11 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, +@@ -4998,11 +5095,11 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, * build_joinrel_tlist, and baserestrictcost is not used for join rels. */ void @@ -290,7 +290,7 @@ index 1e4d404f02..caa00f3716 100644 { rel->rows = calc_joinrel_size_estimate(root, rel, -@@ -5019,6 +5116,35 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, +@@ -5018,6 +5115,35 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, * get_parameterized_joinrel_size * Make a size estimate for a parameterized scan of a join relation. * @@ -326,7 +326,7 @@ index 1e4d404f02..caa00f3716 100644 * 'rel' is the joinrel under consideration. * 'outer_path', 'inner_path' are (probably also parameterized) Paths that * produce the relations being joined. -@@ -5031,11 +5157,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, +@@ -5030,11 +5156,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, * set_joinrel_size_estimates must have been applied already. */ double @@ -343,7 +343,7 @@ index 1e4d404f02..caa00f3716 100644 { double nrows; -@@ -5751,7 +5877,7 @@ set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel) +@@ -5750,7 +5876,7 @@ set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel) /* Should only be applied to base relations */ Assert(rel->relid > 0); @@ -352,7 +352,7 @@ index 1e4d404f02..caa00f3716 100644 cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); -@@ -6038,7 +6164,7 @@ page_size(double tuples, int width) +@@ -6036,7 +6162,7 @@ page_size(double tuples, int width) * Estimate the fraction of the work that each worker will do given the * number of workers budgeted for the path. */ @@ -362,7 +362,7 @@ index 1e4d404f02..caa00f3716 100644 { double parallel_divisor = path->parallel_workers; diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c -index 3dc0176a51..4afd22392d 100644 +index 5658f24323..3bbfa3c1b5 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -71,6 +71,7 @@ @@ -373,7 +373,7 @@ index 3dc0176a51..4afd22392d 100644 static Plan *create_plan_recurse(PlannerInfo *root, Path *best_path, int flags); -@@ -543,6 +544,10 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags) +@@ -544,6 +545,10 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags) break; } @@ -384,7 +384,7 @@ index 3dc0176a51..4afd22392d 100644 return plan; } -@@ -5274,6 +5279,7 @@ copy_generic_path_info(Plan *dest, Path *src) +@@ -5278,6 +5283,7 @@ copy_generic_path_info(Plan *dest, Path *src) dest->plan_width = src->pathtarget->width; dest->parallel_aware = src->parallel_aware; dest->parallel_safe = src->parallel_safe; @@ -393,7 +393,7 @@ index 3dc0176a51..4afd22392d 100644 /* diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c -index 1e42d75465..561d5707c7 100644 +index 70899e5430..34075cc87b 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -143,7 +143,8 @@ static List *extract_rollup_sets(List *groupingSets); @@ -406,7 +406,7 @@ index 1e42d75465..561d5707c7 100644 grouping_sets_data *gd, List *target_list); static RelOptInfo *create_grouping_paths(PlannerInfo *root, -@@ -3157,7 +3158,8 @@ standard_qp_callback(PlannerInfo *root, void *extra) +@@ -3151,7 +3152,8 @@ standard_qp_callback(PlannerInfo *root, void *extra) */ static double get_number_of_groups(PlannerInfo *root, @@ -416,7 +416,7 @@ index 1e42d75465..561d5707c7 100644 grouping_sets_data *gd, List *target_list) { -@@ -3194,7 +3196,7 @@ get_number_of_groups(PlannerInfo *root, +@@ -3188,7 +3190,7 @@ get_number_of_groups(PlannerInfo *root, GroupingSetData *gs = lfirst_node(GroupingSetData, lc2); double numGroups = estimate_num_groups(root, groupExprs, @@ -425,7 +425,7 @@ index 1e42d75465..561d5707c7 100644 &gset, NULL); -@@ -3220,7 +3222,7 @@ get_number_of_groups(PlannerInfo *root, +@@ -3214,7 +3216,7 @@ get_number_of_groups(PlannerInfo *root, GroupingSetData *gs = lfirst_node(GroupingSetData, lc2); double numGroups = estimate_num_groups(root, groupExprs, @@ -434,7 +434,7 @@ index 1e42d75465..561d5707c7 100644 &gset, NULL); -@@ -3237,8 +3239,8 @@ get_number_of_groups(PlannerInfo *root, +@@ -3231,8 +3233,8 @@ get_number_of_groups(PlannerInfo *root, groupExprs = get_sortgrouplist_exprs(parse->groupClause, target_list); @@ -445,7 +445,7 @@ index 1e42d75465..561d5707c7 100644 } } else if (parse->groupingSets) -@@ -3625,7 +3627,8 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, +@@ -3619,7 +3621,8 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, * Estimate number of groups. */ dNumGroups = get_number_of_groups(root, @@ -455,7 +455,7 @@ index 1e42d75465..561d5707c7 100644 gd, extra->targetList); -@@ -6577,13 +6580,15 @@ create_partial_grouping_paths(PlannerInfo *root, +@@ -6425,13 +6428,15 @@ create_partial_grouping_paths(PlannerInfo *root, if (cheapest_total_path != NULL) dNumPartialGroups = get_number_of_groups(root, @@ -474,10 +474,10 @@ index 1e42d75465..561d5707c7 100644 extra->targetList); diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c -index 47769cea45..0498eb900e 100644 +index e105a4d5f1..d821ea63bd 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c -@@ -259,6 +259,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) +@@ -258,6 +258,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) rel->all_partrels = NULL; rel->partexprs = NULL; rel->nullable_partexprs = NULL; @@ -485,7 +485,7 @@ index 47769cea45..0498eb900e 100644 /* * Pass assorted information down the inheritance hierarchy. -@@ -384,7 +385,6 @@ find_base_rel(PlannerInfo *root, int relid) +@@ -383,7 +384,6 @@ find_base_rel(PlannerInfo *root, int relid) if (rel) return rel; } @@ -493,7 +493,7 @@ index 47769cea45..0498eb900e 100644 elog(ERROR, "no relation entry for relid %d", relid); return NULL; /* keep compiler quiet */ -@@ -674,6 +674,7 @@ build_join_rel(PlannerInfo *root, +@@ -672,6 +672,7 @@ build_join_rel(PlannerInfo *root, joinrel->all_partrels = NULL; joinrel->partexprs = NULL; joinrel->nullable_partexprs = NULL; @@ -501,7 +501,7 @@ index 47769cea45..0498eb900e 100644 /* Compute information relevant to the foreign relations. */ set_foreign_rel_properties(joinrel, outer_rel, inner_rel); -@@ -853,6 +854,7 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, +@@ -850,6 +851,7 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, joinrel->all_partrels = NULL; joinrel->partexprs = NULL; joinrel->nullable_partexprs = NULL; @@ -509,7 +509,7 @@ index 47769cea45..0498eb900e 100644 joinrel->top_parent_relids = bms_union(outer_rel->top_parent_relids, inner_rel->top_parent_relids); -@@ -1282,6 +1284,7 @@ find_childrel_parents(PlannerInfo *root, RelOptInfo *rel) +@@ -1279,6 +1281,7 @@ find_childrel_parents(PlannerInfo *root, RelOptInfo *rel) } @@ -517,7 +517,7 @@ index 47769cea45..0498eb900e 100644 /* * get_baserel_parampathinfo * Get the ParamPathInfo for a parameterized path for a base relation, -@@ -1350,6 +1353,10 @@ get_baserel_parampathinfo(PlannerInfo *root, RelOptInfo *baserel, +@@ -1347,6 +1350,10 @@ get_baserel_parampathinfo(PlannerInfo *root, RelOptInfo *baserel, ppi->ppi_req_outer = required_outer; ppi->ppi_rows = rows; ppi->ppi_clauses = pclauses; @@ -528,7 +528,7 @@ index 47769cea45..0498eb900e 100644 baserel->ppilist = lappend(baserel->ppilist, ppi); return ppi; -@@ -1575,6 +1582,10 @@ get_joinrel_parampathinfo(PlannerInfo *root, RelOptInfo *joinrel, +@@ -1572,6 +1579,10 @@ get_joinrel_parampathinfo(PlannerInfo *root, RelOptInfo *joinrel, ppi->ppi_req_outer = required_outer; ppi->ppi_rows = rows; ppi->ppi_clauses = NIL; @@ -540,7 +540,7 @@ index 47769cea45..0498eb900e 100644 return ppi; diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c -index 0c8c05f6c2..eba4d982b9 100644 +index 10895fb287..e81a6f6896 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -143,6 +143,7 @@ @@ -596,10 +596,10 @@ index e94d9e49cf..49236ced77 100644 extern void ExplainQuery(ParseState *pstate, ExplainStmt *stmt, ParamListInfo params, DestReceiver *dest); diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h -index 2a53a6e344..f370b5c694 100644 +index 8ee40cc68c..d7bb9df67c 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h -@@ -756,6 +756,10 @@ typedef struct RelOptInfo +@@ -755,6 +755,10 @@ typedef struct RelOptInfo Relids top_parent_relids; /* Relids of topmost parents (if "other" * rel) */ @@ -610,7 +610,7 @@ index 2a53a6e344..f370b5c694 100644 /* used for partitioned relations: */ PartitionScheme part_scheme; /* Partitioning scheme */ int nparts; /* Number of partitions; -1 if not yet set; in -@@ -773,7 +777,9 @@ typedef struct RelOptInfo +@@ -769,7 +773,9 @@ typedef struct RelOptInfo Relids all_partrels; /* Relids set of all partition relids */ List **partexprs; /* Non-nullable partition key expressions */ List **nullable_partexprs; /* Nullable partition key expressions */ @@ -621,9 +621,9 @@ index 2a53a6e344..f370b5c694 100644 /* * Is given relation partitioned? -@@ -1141,6 +1147,10 @@ typedef struct ParamPathInfo +@@ -1137,6 +1143,10 @@ typedef struct ParamPathInfo Relids ppi_req_outer; /* rels supplying parameters used by path */ - Cardinality ppi_rows; /* estimated number of result tuples */ + double ppi_rows; /* estimated number of result tuples */ List *ppi_clauses; /* join clauses available from outer rels */ + + /* AQO DEBUG purposes */ @@ -633,7 +633,7 @@ index 2a53a6e344..f370b5c694 100644 diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h -index 01a246d50e..e905e54527 100644 +index 5ddf947971..fe9bda387a 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -158,6 +158,9 @@ typedef struct Plan @@ -731,7 +731,7 @@ index 2113bc82de..bcc2520cec 100644 #endif /* COST_H */ diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h -index f704d39980..2058694c68 100644 +index 2922c0cdc1..c59dce6989 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -18,6 +18,10 @@ diff --git a/path_utils.c b/path_utils.c index c529d773..5a8771f0 100644 --- a/path_utils.c +++ b/path_utils.c @@ -589,8 +589,8 @@ aqo_store_upper_signature_hook(PlannerInfo *root, clauses = get_path_clauses(input_rel->cheapest_total_path, root, &selectivities); relids = get_list_of_relids(root, input_rel->relids); - fss_node->val.ival.type = T_Integer; + fss_node->val.type = T_Integer; fss_node->location = -1; - fss_node->val.ival.val = get_fss_for_object(relids, clauses, NIL, NULL, NULL); + fss_node->val.val.ival = get_fss_for_object(relids, clauses, NIL, NULL, NULL); output_rel->private = lappend(output_rel->private, (void *) fss_node); } diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index 65cddfb4..6c681f22 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -2,12 +2,11 @@ use warnings; use Config; -use PostgreSQL::Test::Cluster; -use PostgreSQL::Test::Utils; - +use PostgresNode; +use TestLib; use Test::More tests => 21; -my $node = PostgreSQL::Test::Cluster->new('aqotest'); +my $node = get_new_node('aqotest'); $node->init; $node->append_conf('postgresql.conf', qq{ shared_preload_libraries = 'aqo' diff --git a/t/002_pg_stat_statements_aqo.pl b/t/002_pg_stat_statements_aqo.pl index 5b6feac5..202c7873 100644 --- a/t/002_pg_stat_statements_aqo.pl +++ b/t/002_pg_stat_statements_aqo.pl @@ -1,10 +1,11 @@ use strict; use warnings; -use PostgreSQL::Test::Cluster; -use PostgreSQL::Test::Utils; + +use PostgresNode; +use TestLib; use Test::More tests => 3; -print "start"; -my $node = PostgreSQL::Test::Cluster->new('profiling'); + +my $node = get_new_node('profiling'); $node->init; print "create conf"; From 303e3833b8be23e425bbbd08dcc2f926e21072de Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Mon, 21 Feb 2022 13:12:52 +0500 Subject: [PATCH 003/134] Fix the bug with parallel_divisor. Fix the bug with error calculation. --- auto_tuning.c | 2 +- path_utils.c | 6 +++--- path_utils.h | 2 +- postprocessing.c | 13 ++++++++++++- 4 files changed, 17 insertions(+), 6 deletions(-) diff --git a/auto_tuning.c b/auto_tuning.c index d8a42af3..a98578cf 100644 --- a/auto_tuning.c +++ b/auto_tuning.c @@ -202,7 +202,7 @@ automatical_query_tuning(uint64 query_hash, QueryStat * stat) query_context.use_aqo = (random() / ((double) MAX_RANDOM_VALUE + 1)) < p_use; query_context.learn_aqo = query_context.use_aqo; } - + if (num_iterations <= auto_tuning_max_iterations || p_use > 0.5) update_query(query_hash, query_context.fspace_hash, diff --git a/path_utils.c b/path_utils.c index 5a8771f0..3d696341 100644 --- a/path_utils.c +++ b/path_utils.c @@ -40,7 +40,7 @@ static AQOPlanNode DefaultAQOPlanNode = .selectivities = NIL, .grouping_exprs = NIL, .jointype = -1, - .parallel_divisor = -1, + .parallel_divisor = -1., .was_parametrized = false, .fss = INT_MAX, .prediction = -1 @@ -481,7 +481,7 @@ AQOnodeOut(struct StringInfoData *str, const struct ExtensibleNode *enode) WRITE_NODE_FIELD(grouping_exprs); WRITE_ENUM_FIELD(jointype, JoinType); - WRITE_INT_FIELD(parallel_divisor); + WRITE_FLOAT_FIELD(parallel_divisor, "%.5f"); WRITE_BOOL_FIELD(was_parametrized); /* For Adaptive optimization DEBUG purposes */ @@ -534,7 +534,7 @@ AQOnodeRead(struct ExtensibleNode *enode) READ_NODE_FIELD(grouping_exprs); READ_ENUM_FIELD(jointype, JoinType); - READ_INT_FIELD(parallel_divisor); + READ_FLOAT_FIELD(parallel_divisor); READ_BOOL_FIELD(was_parametrized); /* For Adaptive optimization DEBUG purposes */ diff --git a/path_utils.h b/path_utils.h index 9fd6864e..5ee4bba5 100644 --- a/path_utils.h +++ b/path_utils.h @@ -23,7 +23,7 @@ typedef struct AQOPlanNode List *grouping_exprs; JoinType jointype; - int parallel_divisor; + double parallel_divisor; bool was_parametrized; /* For Adaptive optimization DEBUG purposes */ diff --git a/postprocessing.c b/postprocessing.c index 868e2d08..a48f85e1 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -436,10 +436,21 @@ learnOnPlanState(PlanState *p, void *context) * Need learn. */ - /* It is needed for correct exp(result) calculation. */ + /* + * It is needed for correct exp(result) calculation. + * Do it before cardinality error estimation because we can predict no less + * than 1 tuple, but get zero tuples. + */ predicted = clamp_row_est(predicted); learn_rows = clamp_row_est(learn_rows); + /* Exclude "not executed" nodes from error calculation to reduce fluctuations. */ + if (!notExecuted) + { + cardinality_sum_errors += fabs(predicted - learn_rows); + cardinality_num_objects += 1; + } + /* * Some nodes inserts after planning step (See T_Hash node type). * In this case we have'nt AQO prediction and fss record. From 01770ca279335926686cdc7d16ce6f36383af0db Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Tue, 22 Feb 2022 11:01:54 +0500 Subject: [PATCH 004/134] Add AQO_DEBUG_PRINT parameter to add many additional printing to debug learning machinery. Add missed nodes into the get_path_clauses() routine. Introduce 'Appropriate path' concept to avoid duplication of the same clauses at learning phase. TODO: This concept should be rethink. --- cardinality_estimation.c | 35 +++++++++++++++++++++++++- path_utils.c | 54 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 87 insertions(+), 2 deletions(-) diff --git a/cardinality_estimation.c b/cardinality_estimation.c index 98b689a3..c3e5d7a4 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -22,6 +22,37 @@ #include "aqo.h" #include "hash.h" +#ifdef AQO_DEBUG_PRINT +static void +predict_debug_output(List *clauses, List *selectivities, + List *relids, int fss_hash, double result) +{ + StringInfoData debug_str; + ListCell *lc; + + initStringInfo(&debug_str); + appendStringInfo(&debug_str, "fss: %d, clausesNum: %d, ", + fss_hash, list_length(clauses)); + + appendStringInfoString(&debug_str, ", selectivities: { "); + foreach(lc, selectivities) + { + Selectivity *s = (Selectivity *) lfirst(lc); + appendStringInfo(&debug_str, "%lf ", *s); + } + + appendStringInfoString(&debug_str, "}, relids: { "); + foreach(lc, relids) + { + int relid = lfirst_int(lc); + appendStringInfo(&debug_str, "%d ", relid); + } + + appendStringInfo(&debug_str, "}, result: %lf", result); + elog(DEBUG1, "Prediction: %s", debug_str.data); + pfree(debug_str.data); +} +#endif /* * General method for prediction the cardinality of given relation. @@ -65,7 +96,9 @@ predict_for_relation(List *clauses, List *selectivities, */ result = -1; } - +#ifdef AQO_DEBUG_PRINT + predict_debug_output(clauses, selectivities, relids, *fss_hash, result); +#endif pfree(features); if (nfeatures > 0) { diff --git a/path_utils.c b/path_utils.c index 3d696341..daa2d8f6 100644 --- a/path_utils.c +++ b/path_utils.c @@ -258,14 +258,29 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) return get_path_clauses(((MaterialPath *) path)->subpath, root, selectivities); break; + case T_MemoizePath: + return get_path_clauses(((MemoizePath *) path)->subpath, root, + selectivities); + break; case T_ProjectionPath: return get_path_clauses(((ProjectionPath *) path)->subpath, root, selectivities); break; + case T_ProjectSetPath: + return get_path_clauses(((ProjectSetPath *) path)->subpath, root, + selectivities); + break; case T_SortPath: return get_path_clauses(((SortPath *) path)->subpath, root, selectivities); break; + case T_IncrementalSortPath: + { + IncrementalSortPath *p = (IncrementalSortPath *) path; + return get_path_clauses(p->spath.subpath, root, + selectivities); + } + break; case T_GroupPath: return get_path_clauses(((GroupPath *) path)->subpath, root, selectivities); @@ -302,10 +317,20 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) return get_path_clauses(((SubqueryScanPath *) path)->subpath, root, selectivities); break; + case T_ModifyTablePath: + return get_path_clauses(((ModifyTablePath *) path)->subpath, root, + selectivities); + break; + /* TODO: RecursiveUnionPath */ case T_AppendPath: + case T_MergeAppendPath: { ListCell *lc; + /* + * It isn't a safe style, but we use the only subpaths field that is + * the first at both Append and MergeAppend nodes. + */ foreach (lc, ((AppendPath *) path)->subpaths) { Path *subpath = lfirst(lc); @@ -343,6 +368,33 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) } } +/* + * Some of paths are kind of utility path. I mean, It isn't corresponding to + * specific RelOptInfo node. So, it should be omitted in process of clauses + * gathering to avoid duplication of the same clauses. + * XXX: only a dump plug implemented for now. + */ +static bool +is_appropriate_path(Path *path) +{ + bool appropriate = true; + + switch (path->type) + { + case T_SortPath: + case T_IncrementalSortPath: + case T_MemoizePath: + case T_GatherPath: + case T_GatherMergePath: + appropriate = false; + break; + default: + break; + } + + return appropriate; +} + /* * Converts path info into plan node for collecting it after query execution. */ @@ -392,7 +444,7 @@ aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) node->relids = get_list_of_relids(root, ap->subpath->parent->relids); node->jointype = JOIN_INNER; } - else + else if (is_appropriate_path(src)) { node->clauses = list_concat( aqo_get_clauses(root, src->parent->baserestrictinfo), From d5ed1620dc6cfaa757e90b02ceff48e6ed4985d1 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 23 Feb 2022 10:57:47 +0500 Subject: [PATCH 005/134] Arrange CI tests in accordance with upstream PG14. --- .github/workflows/c-cpp.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 8739c73a..3b597fe1 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -19,7 +19,7 @@ jobs: git config --global user.name "CI PgPro admin" git clone https://github.com/postgres/postgres.git pg cd pg - git checkout master + git checkout REL_14_STABLE ./configure --prefix=`pwd`/tmp_install git subtree add --prefix=contrib/aqo https://github.com/postgrespro/aqo.git $GITHUB_REF patch -p1 --no-backup-if-mismatch < contrib/aqo/aqo_master.patch From d35cd76607ca6682adaa6702e9ac767073396317 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 24 Feb 2022 09:01:25 +0500 Subject: [PATCH 006/134] Update c-cpp.yml --- .github/workflows/c-cpp.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 3b597fe1..e1bdc9d1 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -1,10 +1,10 @@ -name: C/C++ CI for the master +name: C/C++ CI for the stable14 branch. on: push: - branches: [ master ] + branches: [ stable14 ] pull_request: - branches: [ master ] + branches: [ stable14 ] jobs: build: From 00b7c19ea960723185c00db1c1d5d60c810f320a Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Fri, 11 Mar 2022 13:40:21 +0300 Subject: [PATCH 007/134] Correct automatic CI-test in aqo 14 version --- .github/workflows/c-cpp.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index e1bdc9d1..ae145e13 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -22,6 +22,6 @@ jobs: git checkout REL_14_STABLE ./configure --prefix=`pwd`/tmp_install git subtree add --prefix=contrib/aqo https://github.com/postgrespro/aqo.git $GITHUB_REF - patch -p1 --no-backup-if-mismatch < contrib/aqo/aqo_master.patch + patch -p1 --no-backup-if-mismatch < contrib/aqo/aqo_pg14.patch make -j4 > /dev/null && make -j4 -C contrib > /dev/null make -C contrib/aqo check From 89e418f724f7deba1c76e652a4eea8f6799be113 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Fri, 11 Mar 2022 14:06:30 +0300 Subject: [PATCH 008/134] Edit documentation for installing aqo extension --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index b3c0216e..03582a3a 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,9 @@ To avoid compatibility issues, the following branches in the git-repository are * `stable9_6`. * `stable11` - for PG v10 and v11. * `stable12` - for PG v12. -* the `master` branch of the AQO repository correctly works with PGv13 and the PostgreSQL `master` branch. +* `stable13` - for PG v13 +* `stable14` - for PG v14 +* the `master` branch of the AQO repository correctly works with PGv15 and the PostgreSQL `master` branch. The module contains a patch and an extension. Patch has to be applied to the sources of PostgresSQL. Patch affects header files, that is why PostgreSQL From fe5b74c2cd61729b61891ce0c6b2cc23358517ad Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 23 Mar 2022 09:41:28 +0500 Subject: [PATCH 009/134] Bugfix. Recursing into subquery we must use subroot instead of root to transalte relids in this subtree. --- cardinality_hooks.c | 2 +- path_utils.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/cardinality_hooks.c b/cardinality_hooks.c index 1beff225..13a737d4 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -298,7 +298,7 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, List *inner_selectivities; List *outer_selectivities; List *current_selectivities = NULL; - int fss = 0; + int fss = 0; if (IsQueryDisabled()) /* Fast path */ diff --git a/path_utils.c b/path_utils.c index daa2d8f6..307e0946 100644 --- a/path_utils.c +++ b/path_utils.c @@ -314,7 +314,10 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) selectivities); break; case T_SubqueryScanPath: - return get_path_clauses(((SubqueryScanPath *) path)->subpath, root, + /* Recursing into Subquery we must use subroot */ + Assert(path->parent->subroot != NULL); + return get_path_clauses(((SubqueryScanPath *) path)->subpath, + path->parent->subroot, selectivities); break; case T_ModifyTablePath: From 2d1d2f2e21632c563cee3cb2bccca2d726175226 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 23 Mar 2022 09:50:30 +0500 Subject: [PATCH 010/134] Remove the 'ignorance' feature. It was a good idea, but it reduces stability of the code and isn't used since the implementation. --- Makefile | 4 +- aqo.c | 14 --- expected/gucs.out | 58 ---------- ignorance.c | 191 -------------------------------- ignorance.h | 10 -- postprocessing.c | 12 -- preprocessing.c | 3 +- sql/gucs.sql | 26 ----- t/001_pgbench.pl | 1 - t/002_pg_stat_statements_aqo.pl | 1 - 10 files changed, 3 insertions(+), 317 deletions(-) delete mode 100644 ignorance.c delete mode 100644 ignorance.h diff --git a/Makefile b/Makefile index 2edb59be..b351ae0e 100755 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ PGFILEDESC = "AQO - Adaptive Query Optimization" MODULE_big = aqo OBJS = aqo.o auto_tuning.o cardinality_estimation.o cardinality_hooks.o \ hash.o machine_learning.o path_utils.o postprocessing.o preprocessing.o \ -selectivity_cache.o storage.o utils.o ignorance.o $(WIN32RES) +selectivity_cache.o storage.o utils.o $(WIN32RES) TAP_TESTS = 1 @@ -43,4 +43,4 @@ subdir = contrib/aqo top_builddir = ../.. include $(top_builddir)/src/Makefile.global include $(top_srcdir)/contrib/contrib-global.mk -endif \ No newline at end of file +endif diff --git a/aqo.c b/aqo.c index bd40b3dc..b6cbb93f 100644 --- a/aqo.c +++ b/aqo.c @@ -19,7 +19,6 @@ #include "aqo.h" #include "cardinality_hooks.h" -#include "ignorance.h" #include "path_utils.h" #include "preprocessing.h" @@ -200,19 +199,6 @@ _PG_init(void) NULL ); - DefineCustomBoolVariable( - "aqo.log_ignorance", - "Log in a special table all feature spaces for which the AQO prediction was not successful.", - NULL, - &aqo_log_ignorance, - false, - PGC_SUSET, - 0, - NULL, - set_ignorance, - NULL - ); - prev_planner_hook = planner_hook; planner_hook = aqo_planner; prev_ExecutorStart_hook = ExecutorStart_hook; diff --git a/expected/gucs.out b/expected/gucs.out index 1c78ab22..6a28de78 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -29,61 +29,3 @@ EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) (6 rows) DROP EXTENSION aqo; -SET aqo.log_ignorance = 'on'; -SET aqo.log_ignorance = 'off'; -SET aqo.log_ignorance = 'off'; -SET aqo.log_ignorance = 'on'; -CREATE EXTENSION aqo; -SET aqo.log_ignorance = 'off'; -SET aqo.log_ignorance = 'on'; -SET aqo.log_ignorance = 'on'; -\d aqo_ignorance - Table "public.aqo_ignorance" - Column | Type | Collation | Nullable | Default ------------+---------+-----------+----------+--------- - qhash | bigint | | | - fhash | bigint | | | - fss_hash | integer | | | - node_type | integer | | | - node | text | | | -Indexes: - "aqo_ignorance_idx" UNIQUE, btree (qhash, fhash, fss_hash) - -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) -SELECT * FROM t; - QUERY PLAN ------------------------------------------ - Seq Scan on t (actual rows=100 loops=1) - AQO not used - Using aqo: true - AQO mode: LEARN - JOINS: 0 -(5 rows) - -SELECT node_type FROM aqo_ignorance; - node_type ------------ -(0 rows) - -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT * FROM t; - QUERY PLAN ------------------------------------------ - Seq Scan on t (actual rows=100 loops=1) - AQO: rows=100, error=0% - Using aqo: true - AQO mode: LEARN - JOINS: 0 -(5 rows) - -SELECT node_type FROM aqo_ignorance; - node_type ------------ -(0 rows) - --- This GUC can be changed by an admin only. -CREATE ROLE noadmin; -SET ROLE noadmin; -SET aqo.log_ignorance = 'off'; -ERROR: permission denied to set parameter "aqo.log_ignorance" -RESET ROLE; -DROP EXTENSION aqo; diff --git a/ignorance.c b/ignorance.c deleted file mode 100644 index 4dd1c1f1..00000000 --- a/ignorance.c +++ /dev/null @@ -1,191 +0,0 @@ -#include "postgres.h" - -#include "access/heapam.h" -#include "access/parallel.h" -#include "executor/spi.h" -#include "utils/lsyscache.h" -#include "miscadmin.h" - -#include "aqo.h" -#include "ignorance.h" -#include "path_utils.h" - - -bool aqo_log_ignorance; - -void -set_ignorance(bool newval, void *extra) -{ - /* - * On postgres start we can't create any table. - * It is not a problem. We will check existence at each update and create this - * table in dynamic mode, if needed. - */ - if (IsUnderPostmaster && !IsParallelWorker() && newval && - (aqo_log_ignorance != newval)) - /* Create storage and no error, if it exists already. */ - create_ignorance_table(true); - - aqo_log_ignorance = newval; -} - -bool -create_ignorance_table(bool fail_ok) -{ - Oid nspid = get_aqo_schema(); - char *nspname; - char *sql; - int rc; - - if (nspid == InvalidOid) - { - if (!fail_ok) - ereport(ERROR, - (errmsg("AQO extension is not installed"), - errdetail("AQO shared library is enabled but extension isn't installed."))); - else - return false; - } - - nspname = get_namespace_name(nspid); - Assert(nspname != NULL); - - /* Check the table existence. */ - if (get_relname_relid("aqo_ignorance", nspid) != InvalidOid) - { - if (!fail_ok) - elog(PANIC, "aqo_ignorance table exists yet."); - else - return false; - } - - sql = psprintf("CREATE TABLE %s.aqo_ignorance (qhash bigint, fhash bigint, fss_hash int, node_type int, node text);" - "CREATE UNIQUE INDEX aqo_ignorance_idx ON aqo_ignorance (qhash, fhash, fss_hash);", - nspname); - - SPI_connect(); - rc = SPI_execute(sql, false, 0); - SPI_finish(); - - if (rc < 0) - /* Can't ignore this problem. */ - elog(ERROR, "Failed to create aqo_ignorance table %s. status: %d", - sql, rc); - - pfree(nspname); - pfree(sql); - return true; -} - -void -update_ignorance(uint64 qhash, uint64 fhash, int fss_hash, Plan *plan) -{ - RangeVar *rv; - Relation hrel; - Relation irel; - SnapshotData snap; - TupleTableSlot *slot; - TupleDesc tupDesc; - HeapTuple tuple; - Datum values[5]; - bool isnull[5] = { false, false, false, false, false }; - bool shouldFree; - Oid reloid; - IndexScanDesc scan; - ScanKeyData key[3]; - LOCKTAG tag; - Oid nspid = get_aqo_schema(); - char *nspname; - AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); - - if (!OidIsValid(nspid)) - elog(PANIC, "AQO schema does not exists!"); - nspname = get_namespace_name(nspid); - Assert(nspname != 0); - - rv = makeRangeVar(nspname, "aqo_ignorance_idx", -1); - reloid = RangeVarGetRelid(rv, NoLock, true); - if (!OidIsValid(reloid)) - { - elog(LOG, "Create AQO ignorance table."); - - /* This table doesn't created on instance startup. Create now. */ - create_ignorance_table(false); - reloid = RangeVarGetRelid(rv, NoLock, true); - if (!OidIsValid(reloid)) - elog(PANIC, "Ignorance table does not exists!"); - } - - init_lock_tag(&tag, (uint32) fhash, fss_hash);//mycode !!! here half parted 32 bit!!! - LockAcquire(&tag, ExclusiveLock, false, false); - - rv = makeRangeVar(nspname, "aqo_ignorance", -1); - hrel = table_openrv(rv, RowExclusiveLock); - irel = index_open(reloid, RowExclusiveLock); - tupDesc = RelationGetDescr(hrel); - - InitDirtySnapshot(snap); - scan = index_beginscan(hrel, irel, &snap, 3, 0); - - ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT4EQ, Int64GetDatum(qhash));//? - ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int64GetDatum(fhash));//? - ScanKeyInit(&key[2], 3, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fss_hash)); - index_rescan(scan, key, 3, NULL, 0); - slot = MakeSingleTupleTableSlot(tupDesc, &TTSOpsBufferHeapTuple); - - if (!index_getnext_slot(scan, ForwardScanDirection, slot)) - { - if (aqo_node->prediction < 0.) - { - char nodestr[1024]; - char *qplan = nodeToString(plan); - - memset(nodestr, 0, 1024); - strncpy(nodestr, qplan, 1023); - pfree(qplan); - - /* - * AQO failed to predict cardinality for this node. - */ - values[0] = Int64GetDatum(qhash);//? - values[1] = Int64GetDatum(fhash);//? - values[2] = Int32GetDatum(fss_hash); - values[3] = Int32GetDatum(nodeTag(plan)); - values[4] = CStringGetTextDatum(nodestr); - tuple = heap_form_tuple(tupDesc, values, isnull); - - simple_heap_insert(hrel, tuple); - my_index_insert(irel, values, isnull, &(tuple->t_self), - hrel, UNIQUE_CHECK_YES); - } - else - { - /* AQO works as expected. */ - } - } - else if (!TransactionIdIsValid(snap.xmin) && - !TransactionIdIsValid(snap.xmax)) - { - /* - * AQO made prediction for this node. Delete it from the ignorance - * table. - */ - tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); - Assert(shouldFree != true); - simple_heap_delete(hrel, &(tuple->t_self)); - } - else - { - /* - * The data exists. We can't do anything for now. - */ - } - - ExecDropSingleTupleTableSlot(slot); - index_endscan(scan); - index_close(irel, RowExclusiveLock); - table_close(hrel, RowExclusiveLock); - - CommandCounterIncrement(); - LockRelease(&tag, ExclusiveLock, false); -} diff --git a/ignorance.h b/ignorance.h deleted file mode 100644 index 9f7c2096..00000000 --- a/ignorance.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef IGNORANCE_H -#define IGNORANCE_H - -extern bool aqo_log_ignorance; - -extern void set_ignorance(bool newval, void *extra); -extern bool create_ignorance_table(bool fail_ok); -extern void update_ignorance(uint64 qhash, uint64 fhash, int fss_hash, Plan *plan);//? - -#endif /* IGNORANCE_H */ diff --git a/postprocessing.c b/postprocessing.c index a48f85e1..6c2b0b82 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -25,7 +25,6 @@ #include "aqo.h" #include "hash.h" -#include "ignorance.h" #include "path_utils.h" #include "preprocessing.h" @@ -172,17 +171,6 @@ learn_sample(List *clauselist, List *selectivities, List *relidslist, if (notExecuted && aqo_node->prediction > 0) return; - if (aqo_log_ignorance && aqo_node->prediction <= 0 && - load_fss(fhash, fss_hash, 0, NULL, NULL, NULL, NULL) ) - { - /* - * If ignorance logging is enabled and the feature space was existed in - * the ML knowledge base, log this issue. - */ - Assert(query_context.query_hash>=0); - update_ignorance(query_context.query_hash, fhash, fss_hash, plan); - } - if (nfeatures > 0) for (i = 0; i < aqo_K; ++i) matrix[i] = palloc(sizeof(double) * nfeatures); diff --git a/preprocessing.c b/preprocessing.c index 30d6431c..cee457d9 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -436,8 +436,7 @@ IsAQORelation(Relation rel) if (strcmp(relname, "aqo_data") == 0 || strcmp(relname, "aqo_query_texts") == 0 || strcmp(relname, "aqo_query_stat") == 0 || - strcmp(relname, "aqo_queries") == 0 || - strcmp(relname, "aqo_ignorance") == 0 + strcmp(relname, "aqo_queries") == 0 ) return true; diff --git a/sql/gucs.sql b/sql/gucs.sql index 99804669..c8cc8f36 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -12,29 +12,3 @@ EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT x FROM t; DROP EXTENSION aqo; - -SET aqo.log_ignorance = 'on'; -SET aqo.log_ignorance = 'off'; -SET aqo.log_ignorance = 'off'; -SET aqo.log_ignorance = 'on'; - -CREATE EXTENSION aqo; -SET aqo.log_ignorance = 'off'; -SET aqo.log_ignorance = 'on'; -SET aqo.log_ignorance = 'on'; -\d aqo_ignorance - -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) -SELECT * FROM t; -SELECT node_type FROM aqo_ignorance; - -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT * FROM t; -SELECT node_type FROM aqo_ignorance; - --- This GUC can be changed by an admin only. -CREATE ROLE noadmin; -SET ROLE noadmin; -SET aqo.log_ignorance = 'off'; -RESET ROLE; - -DROP EXTENSION aqo; diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index 6c681f22..cdc18d38 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -11,7 +11,6 @@ $node->append_conf('postgresql.conf', qq{ shared_preload_libraries = 'aqo' aqo.mode = 'intelligent' - aqo.log_ignorance = 'off' # TODO: solve problems with deadlock on the table creation or remove this table at all. log_statement = 'ddl' }); diff --git a/t/002_pg_stat_statements_aqo.pl b/t/002_pg_stat_statements_aqo.pl index 202c7873..c0bc5127 100644 --- a/t/002_pg_stat_statements_aqo.pl +++ b/t/002_pg_stat_statements_aqo.pl @@ -14,7 +14,6 @@ aqo.profile_classes = -1 aqo.profile_enable = 'true' aqo.force_collect_stat = 'false' - aqo.log_ignorance = 'off' log_statement = 'ddl' # reduce size of logs. }); # Test constants. From 4bc045fde99075a45718ea8fe7f7309911a57e00 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Fri, 25 Feb 2022 16:36:09 +0300 Subject: [PATCH 011/134] review code --- auto_tuning.c | 1 - path_utils.c | 4 ---- 2 files changed, 5 deletions(-) diff --git a/auto_tuning.c b/auto_tuning.c index a98578cf..94c1641e 100644 --- a/auto_tuning.c +++ b/auto_tuning.c @@ -202,7 +202,6 @@ automatical_query_tuning(uint64 query_hash, QueryStat * stat) query_context.use_aqo = (random() / ((double) MAX_RANDOM_VALUE + 1)) < p_use; query_context.learn_aqo = query_context.use_aqo; } - if (num_iterations <= auto_tuning_max_iterations || p_use > 0.5) update_query(query_hash, query_context.fspace_hash, diff --git a/path_utils.c b/path_utils.c index 307e0946..c2da62ba 100644 --- a/path_utils.c +++ b/path_utils.c @@ -266,10 +266,6 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) return get_path_clauses(((ProjectionPath *) path)->subpath, root, selectivities); break; - case T_ProjectSetPath: - return get_path_clauses(((ProjectSetPath *) path)->subpath, root, - selectivities); - break; case T_SortPath: return get_path_clauses(((SortPath *) path)->subpath, root, selectivities); From a8bad9e1d07da19121981d26fdf045de13aaaf07 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Mon, 4 Apr 2022 15:59:13 +0300 Subject: [PATCH 012/134] Remove duplicating definition of prev_create_plan_hook in aqo.c --- aqo.c | 1 - 1 file changed, 1 deletion(-) diff --git a/aqo.c b/aqo.c index b6cbb93f..6ac095e9 100644 --- a/aqo.c +++ b/aqo.c @@ -108,7 +108,6 @@ set_baserel_rows_estimate_hook_type prev_set_baserel_rows_estimate_hook; get_parameterized_baserel_size_hook_type prev_get_parameterized_baserel_size_hook; set_joinrel_size_estimates_hook_type prev_set_joinrel_size_estimates_hook; get_parameterized_joinrel_size_hook_type prev_get_parameterized_joinrel_size_hook; -create_plan_hook_type prev_create_plan_hook; ExplainOnePlan_hook_type prev_ExplainOnePlan_hook; ExplainOneNode_hook_type prev_ExplainOneNode_hook; From c1aad66dc0e7c4975940c5899b138454da532f6a Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Tue, 5 Apr 2022 11:28:55 +0300 Subject: [PATCH 013/134] Fix print_node_explain. Avoid situation where an AQO node isn't initialized. --- path_utils.c | 6 ++++++ postprocessing.c | 18 +++++++----------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/path_utils.c b/path_utils.c index c2da62ba..102907a9 100644 --- a/path_utils.c +++ b/path_utils.c @@ -56,6 +56,12 @@ create_aqo_plan_node() return node; } +/* + * Extract an AQO node from the plan private field. + * If no one node was found, return pointer to the default value or allocate new + * node (with default value) according to 'create' field. + * Can't return NULL value at all. + */ AQOPlanNode * get_aqo_plan_node(Plan *plan, bool create) { diff --git a/postprocessing.c b/postprocessing.c index 6c2b0b82..d437a444 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -857,26 +857,22 @@ ExtractFromQueryEnv(QueryDesc *queryDesc) void print_node_explain(ExplainState *es, PlanState *ps, Plan *plan) { - int wrkrs = 1; - double error = -1.; - AQOPlanNode *aqo_node; + int wrkrs = 1; + double error = -1.; + AQOPlanNode *aqo_node; /* Extension, which took a hook early can be executed early too. */ if (prev_ExplainOneNode_hook) prev_ExplainOneNode_hook(es, ps, plan); - if (IsQueryDisabled()) + if (IsQueryDisabled() || !plan || es->format != EXPLAIN_FORMAT_TEXT) return; - if (es->format != EXPLAIN_FORMAT_TEXT) - /* Only text format is supported. */ - return; + aqo_node = get_aqo_plan_node(plan, false); - if (!aqo_show_details || !plan || !ps) + if (!aqo_show_details || !ps) goto explain_end; - aqo_node = get_aqo_plan_node(plan, false); - if (!ps->instrument) /* We can show only prediction, without error calculation */ goto explain_print; @@ -919,7 +915,7 @@ print_node_explain(ExplainState *es, PlanState *ps, Plan *plan) appendStringInfo(es->str, "AQO not used"); explain_end: - /* XXX: Do we really have situations than plan is NULL? */ + /* XXX: Do we really have situations when the plan is a NULL pointer? */ if (plan && aqo_show_hash) appendStringInfo(es->str, ", fss=%d", aqo_node->fss); } From 84bf3bff8d8daac927b949c0f0fbfd79b21f7d71 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Tue, 19 Apr 2022 14:21:03 +0500 Subject: [PATCH 014/134] Bugfix. Do not try to open an AQO heap relation if an index does not exists. --- aqo--1.2--1.3.sql | 6 +++--- storage.c | 34 +++++++++++++++++++--------------- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/aqo--1.2--1.3.sql b/aqo--1.2--1.3.sql index f8bd3e49..605e6b99 100755 --- a/aqo--1.2--1.3.sql +++ b/aqo--1.2--1.3.sql @@ -29,7 +29,7 @@ BEGIN END LOOP; END IF; - FOR aqo_queries_row IN (SELECT * FROM aqo_queries) + FOR aqo_queries_row IN (SELECT * FROM public.aqo_queries) LOOP IF (delete_row = true AND fspace_hash_var <> 0 AND fspace_hash_var = aqo_queries_row.fspace_hash AND @@ -103,7 +103,7 @@ BEGIN aqo_queries.query_hash, to_char(array_avg(execution_time_without_aqo), '9.99EEEE')::float, to_char(array_mse(execution_time_without_aqo), '9.99EEEE')::float - FROM aqo_queries INNER JOIN aqo_query_stat + FROM public.aqo_queries INNER JOIN aqo_query_stat ON aqo_queries.query_hash = aqo_query_stat.query_hash GROUP BY (execution_time_without_aqo, aqo_queries.fspace_hash, aqo_queries.query_hash) ORDER BY execution_time DESC LIMIT n; @@ -129,7 +129,7 @@ BEGIN aqo_queries.query_hash, to_char(array_avg(cardinality_error_without_aqo), '9.99EEEE')::float, to_char(array_mse(cardinality_error_without_aqo), '9.99EEEE')::float - FROM aqo_queries INNER JOIN aqo_query_stat + FROM public.aqo_queries INNER JOIN aqo_query_stat ON aqo_queries.query_hash = aqo_query_stat.query_hash GROUP BY (cardinality_error_without_aqo, aqo_queries.fspace_hash, aqo_queries.query_hash) ORDER BY error DESC LIMIT n; diff --git a/storage.c b/storage.c index 5c62896f..0b7cbf63 100644 --- a/storage.c +++ b/storage.c @@ -56,26 +56,30 @@ open_aqo_relation(char *heaprelnspname, char *heaprelname, RangeVar *rv; reloid = RelnameGetRelid(indrelname); + if (!OidIsValid(reloid)) + goto cleanup; + rv = makeRangeVar(heaprelnspname, heaprelname, -1); *hrel = table_openrv_extended(rv, lockmode, true); - if (!OidIsValid(reloid) || *hrel == NULL) - { - /* - * Absence of any AQO-related table tell us that someone executed - * a 'DROP EXTENSION aqo' command. We disable AQO for all future queries - * in this backend. For performance reasons we do it locally. - * Clear profiling hash table. - * Also, we gently disable AQO for the rest of the current query - * execution process. - */ - aqo_enabled = false; - disable_aqo_for_query(); - - return false; - } + if (*hrel == NULL) + goto cleanup; *irel = index_open(reloid, lockmode); return true; + +cleanup: + /* + * Absence of any AQO-related table tell us that someone executed + * a 'DROP EXTENSION aqo' command. We disable AQO for all future queries + * in this backend. For performance reasons we do it locally. + * Clear profiling hash table. + * Also, we gently disable AQO for the rest of the current query + * execution process. + */ + aqo_enabled = false; + disable_aqo_for_query(); + return false; + } /* From 42f71b4bba4a26172093dc59daecd5bfb27a0405 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Tue, 19 Apr 2022 16:46:46 +0500 Subject: [PATCH 015/134] Bugfixes: 1. Increase stability of the pgbench test. 2. Open subsidiary AQO relations more carefully. --- storage.c | 9 ++++++--- t/001_pgbench.pl | 11 ++++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/storage.c b/storage.c index 0b7cbf63..259d725b 100644 --- a/storage.c +++ b/storage.c @@ -52,8 +52,8 @@ open_aqo_relation(char *heaprelnspname, char *heaprelname, char *indrelname, LOCKMODE lockmode, Relation *hrel, Relation *irel) { - Oid reloid; - RangeVar *rv; + Oid reloid; + RangeVar *rv; reloid = RelnameGetRelid(indrelname); if (!OidIsValid(reloid)) @@ -64,7 +64,10 @@ open_aqo_relation(char *heaprelnspname, char *heaprelname, if (*hrel == NULL) goto cleanup; - *irel = index_open(reloid, lockmode); + /* Try to open index relation carefully. */ + *irel = try_relation_open(reloid, lockmode); + if (*irel == NULL) + goto cleanup; return true; cleanup: diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index cdc18d38..624e6cdc 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -109,6 +109,10 @@ (SELECT count(aid) AS x FROM pgbench_accounts GROUP BY abalance HAVING abalance < :border) AS q1 WHERE pgbb.bid = q1.x; }); + +# Avoid problems with an error fluctuations during the test above. +$node->safe_psql('postgres', "TRUNCATE aqo_query_stat"); + # Look for top of problematic queries. $node->command_ok([ 'pgbench', '-t', "10", '-c', "$CLIENTS", '-j', "$THREADS", '-f', "$analytics" ], @@ -127,7 +131,7 @@ $res = $node->safe_psql('postgres', "SELECT count(*) FROM top_time_queries(10) v WHERE v.execution_time > 0."); -is($res, 10); +is($res, 5); # ############################################################################## # @@ -281,7 +285,8 @@ SELECT abalance FROM pgbench_accounts WHERE aid = :aid; UPDATE pgbench_tellers SET tbalance = tbalance + :delta WHERE tid = :tid; UPDATE pgbench_branches SET bbalance = bbalance + :delta WHERE bid = :bid; - INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP); + INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) + VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP); END; \endif }); @@ -295,7 +300,7 @@ $node->restart(); $node->command_ok([ 'pgbench', '-T', - "5", '-c', "$CLIENTS", '-j', "$THREADS" , '-f', "$bank"], + "50", '-c', "$CLIENTS", '-j', "$THREADS" , '-f', "$bank"], 'Conflicts with an AQO dropping command.'); $node->stop(); From 1c44a6019575c478148aa155217ba93a74bfa384 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Wed, 20 Apr 2022 15:09:00 +0500 Subject: [PATCH 016/134] Parameterize 001_pgbench.pl: allow to define a number of transactions, clients and threads from the environment. --- t/001_pgbench.pl | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index 624e6cdc..c4ddb7ae 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -14,11 +14,25 @@ log_statement = 'ddl' }); -# Test constants. +# Test constants. Default values. my $TRANSACTIONS = 1000; my $CLIENTS = 10; my $THREADS = 10; +# Change pgbench parameters according to the environment variable. +if (defined $ENV{TRANSACTIONS}) +{ + $TRANSACTIONS = $ENV{TRANSACTIONS}; +} +if (defined $ENV{CLIENTS}) +{ + $CLIENTS = $ENV{CLIENTS}; +} +if (defined $ENV{THREADS}) +{ + $THREADS = $ENV{THREADS}; +} + # General purpose variables. my $res; my $fss_count; From 992c0683163604bb57bfa787afc0e1448f405a18 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 20 Apr 2022 15:23:34 +0500 Subject: [PATCH 017/134] Update c-cpp.yml Change CI to drastically increase concurrency among pgbench clients --- .github/workflows/c-cpp.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index ae145e13..0e70fb3c 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -24,4 +24,4 @@ jobs: git subtree add --prefix=contrib/aqo https://github.com/postgrespro/aqo.git $GITHUB_REF patch -p1 --no-backup-if-mismatch < contrib/aqo/aqo_pg14.patch make -j4 > /dev/null && make -j4 -C contrib > /dev/null - make -C contrib/aqo check + env CLIENTS=50 THREADS=50 make -C contrib/aqo check From 0eafe6f7eca75292cf5eb0112f3b10baee885f4a Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Wed, 20 Apr 2022 16:21:29 +0500 Subject: [PATCH 018/134] Bugfix. close heap relation in the case of races between backend and 'DROP EXTENSION aqo'. --- storage.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/storage.c b/storage.c index 259d725b..cf2ee59e 100644 --- a/storage.c +++ b/storage.c @@ -67,7 +67,10 @@ open_aqo_relation(char *heaprelnspname, char *heaprelname, /* Try to open index relation carefully. */ *irel = try_relation_open(reloid, lockmode); if (*irel == NULL) + { + relation_close(*hrel, lockmode); goto cleanup; + } return true; cleanup: From c6bdaf7da00e01ecbb985e03bb2851aa894f0e73 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Tue, 26 Apr 2022 13:34:38 +0500 Subject: [PATCH 019/134] Bugfix: we should use max_parallel_workers_per_gather == 0 to turn off parallel plans by default because of unsteadiness. Use explicit SET in the test that really needed parallel features. --- conf.add | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf.add b/conf.add index 3556e4d6..383554d1 100644 --- a/conf.add +++ b/conf.add @@ -1,3 +1,3 @@ autovacuum = off shared_preload_libraries = 'postgres_fdw, aqo' -max_parallel_workers = 0 # switch off parallel workers because of unsteadiness +max_parallel_workers_per_gather = 0 # switch off parallel workers because of unsteadiness From 1935fd0fc91b9dadf3563ab017632058a4743c9f Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Wed, 27 Apr 2022 08:21:57 +0500 Subject: [PATCH 020/134] Fix typos --- storage.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/storage.c b/storage.c index cf2ee59e..3349e25f 100644 --- a/storage.c +++ b/storage.c @@ -85,14 +85,13 @@ open_aqo_relation(char *heaprelnspname, char *heaprelname, aqo_enabled = false; disable_aqo_for_query(); return false; - } /* * Returns whether the query with given hash is in aqo_queries. * If yes, returns the content of the first line with given hash. * - * Use dirty snapshot to see all (include in-progess) data. We want to prevent + * Use dirty snapshot to see all (include in-progress) data. We want to prevent * wait in the XactLockTableWait routine. */ bool From fbc853f3d074952609566c42e7c5bdc124cab34b Mon Sep 17 00:00:00 2001 From: Marina Polyakova Date: Wed, 27 Apr 2022 11:26:40 +0300 Subject: [PATCH 021/134] Suppress parallel plans at each regression test So server regression tests pass with aqo's temporary configuration file loaded. Perhaps we will find a better solution later. --- conf.add | 1 - expected/aqo_CVE-2020-14350.out | 4 ++++ expected/aqo_controlled.out | 4 ++++ expected/aqo_disabled.out | 4 ++++ expected/aqo_fdw.out | 4 ++++ expected/aqo_forced.out | 4 ++++ expected/aqo_intelligent.out | 4 ++++ expected/aqo_learn.out | 4 ++++ expected/clean_aqo_data.out | 4 ++++ expected/forced_stat_collection.out | 4 ++++ expected/gucs.out | 4 ++++ expected/plancache.out | 4 ++++ expected/schema.out | 4 ++++ expected/top_queries.out | 4 ++++ expected/unsupported.out | 4 ++++ sql/aqo_CVE-2020-14350.sql | 5 +++++ sql/aqo_controlled.sql | 5 +++++ sql/aqo_disabled.sql | 5 +++++ sql/aqo_fdw.sql | 5 +++++ sql/aqo_forced.sql | 5 +++++ sql/aqo_intelligent.sql | 5 +++++ sql/aqo_learn.sql | 5 +++++ sql/clean_aqo_data.sql | 5 +++++ sql/forced_stat_collection.sql | 5 +++++ sql/gucs.sql | 5 +++++ sql/plancache.sql | 5 +++++ sql/schema.sql | 5 +++++ sql/top_queries.sql | 5 +++++ sql/unsupported.sql | 5 +++++ 29 files changed, 126 insertions(+), 1 deletion(-) diff --git a/conf.add b/conf.add index 383554d1..705e3dde 100644 --- a/conf.add +++ b/conf.add @@ -1,3 +1,2 @@ autovacuum = off shared_preload_libraries = 'postgres_fdw, aqo' -max_parallel_workers_per_gather = 0 # switch off parallel workers because of unsteadiness diff --git a/expected/aqo_CVE-2020-14350.out b/expected/aqo_CVE-2020-14350.out index de90beaa..46a74be3 100644 --- a/expected/aqo_CVE-2020-14350.out +++ b/expected/aqo_CVE-2020-14350.out @@ -2,6 +2,10 @@ * Check fix for CVE-2020-14350. * See also 7eeb1d986 postgresql commit. */ +-- Switch off parallel workers because of unsteadiness. +-- Do this in each aqo test separately, so that server regression tests pass +-- with aqo's temporary configuration file loaded. +SET max_parallel_workers TO 0; SET client_min_messages = 'warning'; DROP ROLE IF EXISTS regress_hacker; SET client_min_messages = 'notice'; diff --git a/expected/aqo_controlled.out b/expected/aqo_controlled.out index 316ade00..38f96f63 100644 --- a/expected/aqo_controlled.out +++ b/expected/aqo_controlled.out @@ -1,3 +1,7 @@ +-- Switch off parallel workers because of unsteadiness. +-- Do this in each aqo test separately, so that server regression tests pass +-- with aqo's temporary configuration file loaded. +SET max_parallel_workers TO 0; CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( diff --git a/expected/aqo_disabled.out b/expected/aqo_disabled.out index 3162fa6a..aeab0161 100644 --- a/expected/aqo_disabled.out +++ b/expected/aqo_disabled.out @@ -1,3 +1,7 @@ +-- Switch off parallel workers because of unsteadiness. +-- Do this in each aqo test separately, so that server regression tests pass +-- with aqo's temporary configuration file loaded. +SET max_parallel_workers TO 0; CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out index 7956f649..2bb43cea 100644 --- a/expected/aqo_fdw.out +++ b/expected/aqo_fdw.out @@ -3,6 +3,10 @@ -- JOIN push-down (check push of baserestrictinfo and joininfo) -- Aggregate push-down -- Push-down of groupings with HAVING clause. +-- Switch off parallel workers because of unsteadiness. +-- Do this in each aqo test separately, so that server regression tests pass +-- with aqo's temporary configuration file loaded. +SET max_parallel_workers TO 0; CREATE EXTENSION aqo; CREATE EXTENSION postgres_fdw; SET aqo.mode = 'learn'; diff --git a/expected/aqo_forced.out b/expected/aqo_forced.out index 6da016f2..e6940227 100644 --- a/expected/aqo_forced.out +++ b/expected/aqo_forced.out @@ -1,3 +1,7 @@ +-- Switch off parallel workers because of unsteadiness. +-- Do this in each aqo test separately, so that server regression tests pass +-- with aqo's temporary configuration file loaded. +SET max_parallel_workers TO 0; CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( diff --git a/expected/aqo_intelligent.out b/expected/aqo_intelligent.out index 1e984a2c..7e53b355 100644 --- a/expected/aqo_intelligent.out +++ b/expected/aqo_intelligent.out @@ -1,3 +1,7 @@ +-- Switch off parallel workers because of unsteadiness. +-- Do this in each aqo test separately, so that server regression tests pass +-- with aqo's temporary configuration file loaded. +SET max_parallel_workers TO 0; CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( diff --git a/expected/aqo_learn.out b/expected/aqo_learn.out index 9e6c21ee..df1c66ff 100644 --- a/expected/aqo_learn.out +++ b/expected/aqo_learn.out @@ -1,3 +1,7 @@ +-- Switch off parallel workers because of unsteadiness. +-- Do this in each aqo test separately, so that server regression tests pass +-- with aqo's temporary configuration file loaded. +SET max_parallel_workers TO 0; CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( diff --git a/expected/clean_aqo_data.out b/expected/clean_aqo_data.out index bc143be7..91ba7f99 100644 --- a/expected/clean_aqo_data.out +++ b/expected/clean_aqo_data.out @@ -1,3 +1,7 @@ +-- Switch off parallel workers because of unsteadiness. +-- Do this in each aqo test separately, so that server regression tests pass +-- with aqo's temporary configuration file loaded. +SET max_parallel_workers TO 0; CREATE EXTENSION aqo; SET aqo.mode = 'learn'; DROP TABLE IF EXISTS a; diff --git a/expected/forced_stat_collection.out b/expected/forced_stat_collection.out index fa40fcf6..4f5909af 100644 --- a/expected/forced_stat_collection.out +++ b/expected/forced_stat_collection.out @@ -1,3 +1,7 @@ +-- Switch off parallel workers because of unsteadiness. +-- Do this in each aqo test separately, so that server regression tests pass +-- with aqo's temporary configuration file loaded. +SET max_parallel_workers TO 0; \set citizens 1000 SET aqo.mode = 'disabled'; SET aqo.force_collect_stat = 'on'; diff --git a/expected/gucs.out b/expected/gucs.out index 6a28de78..b5089f20 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -1,3 +1,7 @@ +-- Switch off parallel workers because of unsteadiness. +-- Do this in each aqo test separately, so that server regression tests pass +-- with aqo's temporary configuration file loaded. +SET max_parallel_workers TO 0; CREATE EXTENSION aqo; SET aqo.mode = 'learn'; SET aqo.show_details = true; diff --git a/expected/plancache.out b/expected/plancache.out index 64eecf99..a87db8c0 100644 --- a/expected/plancache.out +++ b/expected/plancache.out @@ -1,4 +1,8 @@ -- Tests on interaction of AQO with cached plans. +-- Switch off parallel workers because of unsteadiness. +-- Do this in each aqo test separately, so that server regression tests pass +-- with aqo's temporary configuration file loaded. +SET max_parallel_workers TO 0; CREATE EXTENSION aqo; SET aqo.mode = 'intelligent'; SET aqo.show_details = 'on'; diff --git a/expected/schema.out b/expected/schema.out index 82ab68e8..cc586233 100644 --- a/expected/schema.out +++ b/expected/schema.out @@ -1,3 +1,7 @@ +-- Switch off parallel workers because of unsteadiness. +-- Do this in each aqo test separately, so that server regression tests pass +-- with aqo's temporary configuration file loaded. +SET max_parallel_workers TO 0; DROP EXTENSION IF EXISTS aqo CASCADE; NOTICE: extension "aqo" does not exist, skipping DROP SCHEMA IF EXISTS test CASCADE; diff --git a/expected/top_queries.out b/expected/top_queries.out index ebf6d21b..057e1bd4 100644 --- a/expected/top_queries.out +++ b/expected/top_queries.out @@ -1,3 +1,7 @@ +-- Switch off parallel workers because of unsteadiness. +-- Do this in each aqo test separately, so that server regression tests pass +-- with aqo's temporary configuration file loaded. +SET max_parallel_workers TO 0; CREATE EXTENSION aqo; SET aqo.mode = 'disabled'; SET aqo.force_collect_stat = 'on'; diff --git a/expected/unsupported.out b/expected/unsupported.out index 30de424d..cc0f6be7 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -1,3 +1,7 @@ +-- Switch off parallel workers because of unsteadiness. +-- Do this in each aqo test separately, so that server regression tests pass +-- with aqo's temporary configuration file loaded. +SET max_parallel_workers TO 0; CREATE EXTENSION aqo; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; diff --git a/sql/aqo_CVE-2020-14350.sql b/sql/aqo_CVE-2020-14350.sql index f7dd4e23..92c200f3 100644 --- a/sql/aqo_CVE-2020-14350.sql +++ b/sql/aqo_CVE-2020-14350.sql @@ -3,6 +3,11 @@ * See also 7eeb1d986 postgresql commit. */ +-- Switch off parallel workers because of unsteadiness. +-- Do this in each aqo test separately, so that server regression tests pass +-- with aqo's temporary configuration file loaded. +SET max_parallel_workers TO 0; + SET client_min_messages = 'warning'; DROP ROLE IF EXISTS regress_hacker; SET client_min_messages = 'notice'; diff --git a/sql/aqo_controlled.sql b/sql/aqo_controlled.sql index c4d1db08..30b201ee 100644 --- a/sql/aqo_controlled.sql +++ b/sql/aqo_controlled.sql @@ -1,3 +1,8 @@ +-- Switch off parallel workers because of unsteadiness. +-- Do this in each aqo test separately, so that server regression tests pass +-- with aqo's temporary configuration file loaded. +SET max_parallel_workers TO 0; + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( diff --git a/sql/aqo_disabled.sql b/sql/aqo_disabled.sql index 7d755be9..350fef13 100644 --- a/sql/aqo_disabled.sql +++ b/sql/aqo_disabled.sql @@ -1,3 +1,8 @@ +-- Switch off parallel workers because of unsteadiness. +-- Do this in each aqo test separately, so that server regression tests pass +-- with aqo's temporary configuration file loaded. +SET max_parallel_workers TO 0; + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( diff --git a/sql/aqo_fdw.sql b/sql/aqo_fdw.sql index e31923d9..40c00125 100644 --- a/sql/aqo_fdw.sql +++ b/sql/aqo_fdw.sql @@ -4,6 +4,11 @@ -- Aggregate push-down -- Push-down of groupings with HAVING clause. +-- Switch off parallel workers because of unsteadiness. +-- Do this in each aqo test separately, so that server regression tests pass +-- with aqo's temporary configuration file loaded. +SET max_parallel_workers TO 0; + CREATE EXTENSION aqo; CREATE EXTENSION postgres_fdw; SET aqo.mode = 'learn'; diff --git a/sql/aqo_forced.sql b/sql/aqo_forced.sql index 307c85f1..8cb10261 100644 --- a/sql/aqo_forced.sql +++ b/sql/aqo_forced.sql @@ -1,3 +1,8 @@ +-- Switch off parallel workers because of unsteadiness. +-- Do this in each aqo test separately, so that server regression tests pass +-- with aqo's temporary configuration file loaded. +SET max_parallel_workers TO 0; + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( diff --git a/sql/aqo_intelligent.sql b/sql/aqo_intelligent.sql index bc3351de..87a82842 100644 --- a/sql/aqo_intelligent.sql +++ b/sql/aqo_intelligent.sql @@ -1,3 +1,8 @@ +-- Switch off parallel workers because of unsteadiness. +-- Do this in each aqo test separately, so that server regression tests pass +-- with aqo's temporary configuration file loaded. +SET max_parallel_workers TO 0; + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( diff --git a/sql/aqo_learn.sql b/sql/aqo_learn.sql index e1ffe7e5..13fde235 100644 --- a/sql/aqo_learn.sql +++ b/sql/aqo_learn.sql @@ -1,3 +1,8 @@ +-- Switch off parallel workers because of unsteadiness. +-- Do this in each aqo test separately, so that server regression tests pass +-- with aqo's temporary configuration file loaded. +SET max_parallel_workers TO 0; + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( diff --git a/sql/clean_aqo_data.sql b/sql/clean_aqo_data.sql index acd64b16..9e597e6d 100644 --- a/sql/clean_aqo_data.sql +++ b/sql/clean_aqo_data.sql @@ -1,3 +1,8 @@ +-- Switch off parallel workers because of unsteadiness. +-- Do this in each aqo test separately, so that server regression tests pass +-- with aqo's temporary configuration file loaded. +SET max_parallel_workers TO 0; + CREATE EXTENSION aqo; SET aqo.mode = 'learn'; diff --git a/sql/forced_stat_collection.sql b/sql/forced_stat_collection.sql index 9c169a26..46f3e572 100644 --- a/sql/forced_stat_collection.sql +++ b/sql/forced_stat_collection.sql @@ -1,3 +1,8 @@ +-- Switch off parallel workers because of unsteadiness. +-- Do this in each aqo test separately, so that server regression tests pass +-- with aqo's temporary configuration file loaded. +SET max_parallel_workers TO 0; + \set citizens 1000 SET aqo.mode = 'disabled'; diff --git a/sql/gucs.sql b/sql/gucs.sql index c8cc8f36..0edf9ef7 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -1,3 +1,8 @@ +-- Switch off parallel workers because of unsteadiness. +-- Do this in each aqo test separately, so that server regression tests pass +-- with aqo's temporary configuration file loaded. +SET max_parallel_workers TO 0; + CREATE EXTENSION aqo; SET aqo.mode = 'learn'; SET aqo.show_details = true; diff --git a/sql/plancache.sql b/sql/plancache.sql index 8208b1d3..c6cc9072 100644 --- a/sql/plancache.sql +++ b/sql/plancache.sql @@ -1,5 +1,10 @@ -- Tests on interaction of AQO with cached plans. +-- Switch off parallel workers because of unsteadiness. +-- Do this in each aqo test separately, so that server regression tests pass +-- with aqo's temporary configuration file loaded. +SET max_parallel_workers TO 0; + CREATE EXTENSION aqo; SET aqo.mode = 'intelligent'; SET aqo.show_details = 'on'; diff --git a/sql/schema.sql b/sql/schema.sql index 8e61dedb..d3b1e7af 100644 --- a/sql/schema.sql +++ b/sql/schema.sql @@ -1,3 +1,8 @@ +-- Switch off parallel workers because of unsteadiness. +-- Do this in each aqo test separately, so that server regression tests pass +-- with aqo's temporary configuration file loaded. +SET max_parallel_workers TO 0; + DROP EXTENSION IF EXISTS aqo CASCADE; DROP SCHEMA IF EXISTS test CASCADE; diff --git a/sql/top_queries.sql b/sql/top_queries.sql index bfacdd38..eb397db8 100755 --- a/sql/top_queries.sql +++ b/sql/top_queries.sql @@ -1,3 +1,8 @@ +-- Switch off parallel workers because of unsteadiness. +-- Do this in each aqo test separately, so that server regression tests pass +-- with aqo's temporary configuration file loaded. +SET max_parallel_workers TO 0; + CREATE EXTENSION aqo; SET aqo.mode = 'disabled'; SET aqo.force_collect_stat = 'on'; diff --git a/sql/unsupported.sql b/sql/unsupported.sql index 472ea5d9..dc35a4d2 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -1,3 +1,8 @@ +-- Switch off parallel workers because of unsteadiness. +-- Do this in each aqo test separately, so that server regression tests pass +-- with aqo's temporary configuration file loaded. +SET max_parallel_workers TO 0; + CREATE EXTENSION aqo; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; From 42a425ad2c610c6920727e142c74cce7c04ebced Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 28 Apr 2022 09:24:30 +0500 Subject: [PATCH 022/134] Bugfix. Fix omissions related to shifting from 32-bit query hash to 64-bit hash --- aqo.c | 8 +++-- aqo.h | 4 +-- auto_tuning.c | 6 ++-- expected/plancache.out | 2 +- postprocessing.c | 9 ++---- preprocessing.c | 22 +++---------- sql/plancache.sql | 2 +- storage.c | 71 ++++++++++++++++++++++++------------------ 8 files changed, 61 insertions(+), 63 deletions(-) diff --git a/aqo.c b/aqo.c index 6ac095e9..99c54fea 100644 --- a/aqo.c +++ b/aqo.c @@ -303,11 +303,13 @@ get_aqo_schema(void) * Init userlock */ void -init_lock_tag(LOCKTAG *tag, uint32 key1, uint32 key2) +init_lock_tag(LOCKTAG *tag, uint64 key1, int32 key2) { + uint32 key = key1 % UINT32_MAX; + tag->locktag_field1 = AQO_MODULE_MAGIC; - tag->locktag_field2 = key1; - tag->locktag_field3 = key2; + tag->locktag_field2 = key; + tag->locktag_field3 = (uint32) key2; tag->locktag_field4 = 0; tag->locktag_type = LOCKTAG_USERLOCK; tag->locktag_lockmethodid = USER_LOCKMETHOD; diff --git a/aqo.h b/aqo.h index b94aaac7..05d11942 100644 --- a/aqo.h +++ b/aqo.h @@ -280,7 +280,7 @@ int get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash); /* Storage interaction */ -extern bool find_query(uint64 qhash, Datum *search_values, bool *search_nulls); +extern bool find_query(uint64 qhash, QueryContextData *ctx); extern bool update_query(uint64 qhash, uint64 fhash, bool learn_aqo, bool use_aqo, bool auto_tuning); extern bool add_query_text(uint64 query_hash, const char *query_string); @@ -343,7 +343,7 @@ extern double *selectivity_cache_find_global_relid(int clause_hash, extern void selectivity_cache_clear(void); extern Oid get_aqo_schema(void); -extern void init_lock_tag(LOCKTAG *tag, uint32 key1, uint32 key2); +extern void init_lock_tag(LOCKTAG *tag, uint64 key1, int32 key2); extern bool IsQueryDisabled(void); extern List *cur_classes; diff --git a/auto_tuning.c b/auto_tuning.c index 94c1641e..8bb024da 100644 --- a/auto_tuning.c +++ b/auto_tuning.c @@ -144,7 +144,7 @@ is_in_infinite_loop_cq(double *elems, int nelems) * this query to false. */ void -automatical_query_tuning(uint64 query_hash, QueryStat * stat) +automatical_query_tuning(uint64 qhash, QueryStat * stat) { double unstability = auto_tuning_exploration; double t_aqo, @@ -203,11 +203,11 @@ automatical_query_tuning(uint64 query_hash, QueryStat * stat) query_context.learn_aqo = query_context.use_aqo; } if (num_iterations <= auto_tuning_max_iterations || p_use > 0.5) - update_query(query_hash, + update_query(qhash, query_context.fspace_hash, query_context.learn_aqo, query_context.use_aqo, true); else - update_query(query_hash, query_context.fspace_hash, false, false, false); + update_query(qhash, query_context.fspace_hash, false, false, false); } diff --git a/expected/plancache.out b/expected/plancache.out index a87db8c0..8d02ef0f 100644 --- a/expected/plancache.out +++ b/expected/plancache.out @@ -37,7 +37,7 @@ BEGIN END $$ LANGUAGE 'plpgsql'; -- The function shows 6 executions without an AQO support (nnex) and -- 4 executions with usage of an AQO knowledge base (nex). Planning time in the --- case of AQO support (pt) is equal to '-1', because the query plan is exracted +-- case of AQO support (pt) is equal to '-1', because the query plan is extracted -- from the plan cache. SELECT * FROM f1(); nnex | nex | pt diff --git a/postprocessing.c b/postprocessing.c index d437a444..f9c00b50 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -94,7 +94,7 @@ atomic_fss_learn_step(uint64 fhash, int fss_hash, int ncols, LOCKTAG tag; int nrows; - init_lock_tag(&tag, (uint32) fhash, fss_hash); + init_lock_tag(&tag, fhash, fss_hash); LockAcquire(&tag, ExclusiveLock, false, false); if (!load_fss(fhash, fss_hash, ncols, matrix, targets, &nrows, NULL)) @@ -671,10 +671,9 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) cardinality_error = cardinality_sum_errors / cardinality_num_objects; else cardinality_error = -1; - Assert(query_context.query_hash>=0); + /* Prevent concurrent updates. */ - init_lock_tag(&tag, (uint32) query_context.query_hash,//my code - (uint32) query_context.fspace_hash);//possible here + init_lock_tag(&tag, query_context.query_hash, query_context.fspace_hash); LockAcquire(&tag, ExclusiveLock, false, false); if (stat != NULL) @@ -706,7 +705,6 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) &stat->executions_without_aqo); /* Store all learn data into the AQO service relations. */ - Assert(query_context.query_hash>=0); if (!query_context.adding_query && query_context.auto_tuning) automatical_query_tuning(query_context.query_hash, stat); @@ -970,7 +968,6 @@ print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, */ if (aqo_mode != AQO_MODE_DISABLED || force_collect_stat) { - Assert(query_context.query_hash>=0); if (aqo_show_hash) ExplainPropertyInteger("Query hash", NULL, query_context.query_hash, es); diff --git a/preprocessing.c b/preprocessing.c index cee457d9..f09e3eaa 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -175,8 +175,6 @@ aqo_planner(Query *parse, ParamListInfo boundParams) { bool query_is_stored = false; - Datum query_params[5]; - bool query_nulls[5] = {false, false, false, false, false}; LOCKTAG tag; MemoryContext oldCxt; @@ -226,7 +224,7 @@ aqo_planner(Query *parse, boundParams); } - elog(DEBUG1, "AQO will be used for query '%s', class %ld", + elog(DEBUG1, "AQO will be used for query '%s', class "UINT64_FORMAT, query_string ? query_string : "null string", query_context.query_hash); oldCxt = MemoryContextSwitchTo(AQOMemoryContext); @@ -240,8 +238,7 @@ aqo_planner(Query *parse, goto ignore_query_settings; } - query_is_stored = find_query(query_context.query_hash, &query_params[0], - &query_nulls[0]); + query_is_stored = find_query(query_context.query_hash, &query_context); if (!query_is_stored) { @@ -295,16 +292,12 @@ aqo_planner(Query *parse, else /* Query class exists in a ML knowledge base. */ { query_context.adding_query = false; - query_context.learn_aqo = DatumGetBool(query_params[1]); - query_context.use_aqo = DatumGetBool(query_params[2]); - query_context.fspace_hash = DatumGetInt64(query_params[3]); - query_context.auto_tuning = DatumGetBool(query_params[4]); - query_context.collect_stat = query_context.auto_tuning; + + /* Other query_context fields filled in the find_query() routine. */ /* * Deactivate query if no one reason exists for usage of an AQO machinery. */ - Assert(query_context.query_hash>=0); if (!query_context.learn_aqo && !query_context.use_aqo && !query_context.auto_tuning && !force_collect_stat) add_deactivated_query(query_context.query_hash); @@ -330,7 +323,6 @@ aqo_planner(Query *parse, * In this mode we want to learn with incoming query (if it is not * suppressed manually) and collect stats. */ - Assert(query_context.query_hash>=0); query_context.collect_stat = true; query_context.fspace_hash = query_context.query_hash; break; @@ -354,15 +346,13 @@ aqo_planner(Query *parse, * find-add query and query text must be atomic operation to prevent * concurrent insertions. */ - Assert(query_context.query_hash>=0); - init_lock_tag(&tag, (uint32) query_context.query_hash, (uint32) 0);//my code + init_lock_tag(&tag, query_context.query_hash, 0); LockAcquire(&tag, ExclusiveLock, false, false); /* * Add query into the AQO knowledge base. To process an error with * concurrent addition from another backend we will try to restart * preprocessing routine. */ - Assert(query_context.query_hash>=0); update_query(query_context.query_hash, query_context.fspace_hash, query_context.learn_aqo, query_context.use_aqo, query_context.auto_tuning); @@ -371,7 +361,6 @@ aqo_planner(Query *parse, * Add query text into the ML-knowledge base. Just for further * analysis. In the case of cached plans we could have NULL query text. */ - Assert(query_context.query_hash>=0); if (query_string != NULL) add_query_text(query_context.query_hash, query_string); @@ -385,7 +374,6 @@ aqo_planner(Query *parse, * query execution statistics in any mode. */ query_context.collect_stat = true; - Assert(query_context.query_hash>=0); query_context.fspace_hash = query_context.query_hash; } diff --git a/sql/plancache.sql b/sql/plancache.sql index c6cc9072..0d90149f 100644 --- a/sql/plancache.sql +++ b/sql/plancache.sql @@ -42,7 +42,7 @@ END $$ LANGUAGE 'plpgsql'; -- The function shows 6 executions without an AQO support (nnex) and -- 4 executions with usage of an AQO knowledge base (nex). Planning time in the --- case of AQO support (pt) is equal to '-1', because the query plan is exracted +-- case of AQO support (pt) is equal to '-1', because the query plan is extracted -- from the plan cache. SELECT * FROM f1(); diff --git a/storage.c b/storage.c index 3349e25f..70fa5abb 100644 --- a/storage.c +++ b/storage.c @@ -93,19 +93,22 @@ open_aqo_relation(char *heaprelnspname, char *heaprelname, * * Use dirty snapshot to see all (include in-progress) data. We want to prevent * wait in the XactLockTableWait routine. + * If query is found in the knowledge base, fill the query context struct. */ bool -find_query(uint64 qhash, Datum *search_values, bool *search_nulls) +find_query(uint64 qhash, QueryContextData *ctx) { - Relation hrel; - Relation irel; - HeapTuple tuple; + Relation hrel; + Relation irel; + HeapTuple tuple; TupleTableSlot *slot; - bool shouldFree; - IndexScanDesc scan; - ScanKeyData key; - SnapshotData snap; - bool find_ok = false; + bool shouldFree = true; + IndexScanDesc scan; + ScanKeyData key; + SnapshotData snap; + bool find_ok = false; + Datum values[5]; + bool nulls[5] = {false, false, false, false, false}; if (!open_aqo_relation("public", "aqo_queries", "aqo_queries_query_hash_idx", AccessShareLock, &hrel, &irel)) @@ -113,24 +116,30 @@ find_query(uint64 qhash, Datum *search_values, bool *search_nulls) InitDirtySnapshot(snap); scan = index_beginscan(hrel, irel, &snap, 1, 0); - ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT4EQ, Int64GetDatum(qhash)); + ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(qhash)); index_rescan(scan, &key, 1, NULL, 0); slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); find_ok = index_getnext_slot(scan, ForwardScanDirection, slot); - if (find_ok && search_values != NULL) + if (find_ok) { tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); Assert(shouldFree != true); - heap_deform_tuple(tuple, hrel->rd_att, search_values, search_nulls); + heap_deform_tuple(tuple, hrel->rd_att, values, nulls); + + /* Fill query context data */ + ctx->learn_aqo = DatumGetBool(values[1]); + ctx->use_aqo = DatumGetBool(values[2]); + ctx->fspace_hash = DatumGetInt64(values[3]); + ctx->auto_tuning = DatumGetBool(values[4]); + ctx->collect_stat = query_context.auto_tuning; } ExecDropSingleTupleTableSlot(slot); index_endscan(scan); index_close(irel, AccessShareLock); table_close(hrel, AccessShareLock); - return find_ok; } @@ -176,7 +185,7 @@ update_query(uint64 qhash, uint64 fhash, */ InitDirtySnapshot(snap); scan = index_beginscan(hrel, irel, &snap, 1, 0); - ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(qhash)); + ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(qhash)); index_rescan(scan, &key, 1, NULL, 0); slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); @@ -221,7 +230,8 @@ update_query(uint64 qhash, uint64 fhash, * Ooops, somebody concurrently updated the tuple. It is possible * only in the case of changes made by third-party code. */ - elog(ERROR, "AQO feature space data for signature (%ld, %ld) concurrently" + elog(ERROR, "AQO feature space data for signature ("UINT64_FORMAT \ + ", "UINT64_FORMAT") concurrently" " updated by a stranger backend.", qhash, fhash); result = false; @@ -283,7 +293,7 @@ add_query_text(uint64 qhash, const char *query_string) */ InitDirtySnapshot(snap); scan = index_beginscan(hrel, irel, &snap, 1, 0); - ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(qhash)); + ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(qhash)); index_rescan(scan, &key, 1, NULL, 0); slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); @@ -390,7 +400,7 @@ load_fss(uint64 fhash, int fss_hash, return false; scan = index_beginscan(hrel, irel, SnapshotSelf, 2, 0); - ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fhash)); + ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(fhash)); ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fss_hash)); index_rescan(scan, key, 2, NULL, 0); @@ -422,9 +432,10 @@ load_fss(uint64 fhash, int fss_hash, *relids = deform_oids_vector(values[5]); } else - elog(ERROR, "unexpected number of features for hash (%ld, %d):\ - expected %d features, obtained %d", - fhash, fss_hash, ncols, DatumGetInt32(values[2])); + elog(ERROR, "unexpected number of features for hash (" \ + UINT64_FORMAT", %d):\ + expected %d features, obtained %d", + fhash, fss_hash, ncols, DatumGetInt32(values[2])); } else success = false; @@ -483,7 +494,7 @@ update_fss(uint64 fhash, int fsshash, int nrows, int ncols, InitDirtySnapshot(snap); scan = index_beginscan(hrel, irel, &snap, 2, 0); - ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fhash)); + ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(fhash)); ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fsshash)); index_rescan(scan, key, 2, NULL, 0); @@ -493,7 +504,7 @@ update_fss(uint64 fhash, int fsshash, int nrows, int ncols, if (!find_ok) { - values[0] = Int32GetDatum(fhash); + values[0] = Int64GetDatum(fhash); values[1] = Int32GetDatum(fsshash); values[2] = Int32GetDatum(ncols); @@ -548,8 +559,8 @@ update_fss(uint64 fhash, int fsshash, int nrows, int ncols, * Ooops, somebody concurrently updated the tuple. It is possible * only in the case of changes made by third-party code. */ - elog(ERROR, "AQO data piece (%ld %d) concurrently updated" - " by a stranger backend.", + elog(ERROR, "AQO data piece ("UINT64_FORMAT" %d) concurrently" + " updated by a stranger backend.", fhash, fsshash); result = false; } @@ -595,7 +606,7 @@ get_aqo_stat(uint64 qhash) return false; scan = index_beginscan(hrel, irel, SnapshotSelf, 1, 0); - ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(qhash)); + ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(qhash)); index_rescan(scan, &key, 1, NULL, 0); slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); @@ -666,7 +677,7 @@ update_aqo_stat(uint64 qhash, QueryStat *stat) InitDirtySnapshot(snap); scan = index_beginscan(hrel, irel, &snap, 1, 0); - ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT4EQ, Int64GetDatum(qhash)); + ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(qhash)); index_rescan(scan, &key, 1, NULL, 0); slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); @@ -712,8 +723,8 @@ update_aqo_stat(uint64 qhash, QueryStat *stat) * Ooops, somebody concurrently updated the tuple. It is possible * only in the case of changes made by third-party code. */ - elog(ERROR, "AQO statistic data for query signature %ld concurrently" - " updated by a stranger backend.", + elog(ERROR, "AQO statistic data for query signature "UINT64_FORMAT + " concurrently updated by a stranger backend.", qhash); } } @@ -913,8 +924,8 @@ init_deactivated_queries_storage(void) /* Create the hashtable proper */ MemSet(&hash_ctl, 0, sizeof(hash_ctl)); - hash_ctl.keysize = sizeof(int); - hash_ctl.entrysize = sizeof(int); + hash_ctl.keysize = sizeof(uint64); + hash_ctl.entrysize = sizeof(uint64); deactivated_queries = hash_create("aqo_deactivated_queries", 128, /* start small and extend */ &hash_ctl, From 4c65110548d8f0e79e082d8254042abd76e1fd25 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Fri, 29 Apr 2022 15:12:42 +0500 Subject: [PATCH 023/134] Bugfix: we can't use C++ reserved words as identifiers for shared variables or routines. --- aqo_pg14.patch | 85 +++++++++++++++++++++++++++----------------------- path_utils.c | 12 +++---- 2 files changed, 52 insertions(+), 45 deletions(-) diff --git a/aqo_pg14.patch b/aqo_pg14.patch index a4a158eb..d119d98c 100644 --- a/aqo_pg14.patch +++ b/aqo_pg14.patch @@ -11,7 +11,7 @@ index f27e458482..0c62191904 100644 auto_explain \ bloom \ diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c -index 69821c4631..f1fd5f93c5 100644 +index 70551522da..958529fbab 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -24,6 +24,7 @@ @@ -35,7 +35,7 @@ index 69821c4631..f1fd5f93c5 100644 /* OR-able flags for ExplainXMLTag() */ #define X_OPENING 0 -@@ -670,6 +677,10 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, +@@ -676,6 +683,10 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, ExplainPropertyFloat("Execution Time", "ms", 1000.0 * totaltime, 3, es); @@ -46,7 +46,7 @@ index 69821c4631..f1fd5f93c5 100644 ExplainCloseGroup("Query", NULL, true, es); } -@@ -1652,6 +1663,9 @@ ExplainNode(PlanState *planstate, List *ancestors, +@@ -1658,6 +1669,9 @@ ExplainNode(PlanState *planstate, List *ancestors, } } @@ -57,39 +57,39 @@ index 69821c4631..f1fd5f93c5 100644 if (es->format == EXPLAIN_FORMAT_TEXT) appendStringInfoChar(es->str, '\n'); diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c -index 5769536c6a..8aae9d5039 100644 +index a106a2cdf1..7150dccb4d 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c -@@ -129,6 +129,7 @@ CopyPlanFields(const Plan *from, Plan *newnode) +@@ -132,6 +132,7 @@ CopyPlanFields(const Plan *from, Plan *newnode) COPY_NODE_FIELD(initPlan); COPY_BITMAPSET_FIELD(extParam); COPY_BITMAPSET_FIELD(allParam); -+ COPY_NODE_FIELD(private); ++ COPY_NODE_FIELD(ext_nodes); } /* diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c -index da212d9ddf..78dc137df8 100644 +index 0df9be1608..678e1f050f 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -342,6 +342,7 @@ _outPlanInfo(StringInfo str, const Plan *node) WRITE_NODE_FIELD(initPlan); WRITE_BITMAPSET_FIELD(extParam); WRITE_BITMAPSET_FIELD(allParam); -+ /*WRITE_NODE_FIELD(private); */ ++ /*WRITE_NODE_FIELD(ext_nodes); */ } /* diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c -index 4c537c30e0..40b1ce29de 100644 +index eaa51c5c06..6ad8b78c7d 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1628,6 +1628,11 @@ ReadCommonPlan(Plan *local_node) READ_NODE_FIELD(initPlan); READ_BITMAPSET_FIELD(extParam); READ_BITMAPSET_FIELD(allParam); -+ local_node->private = NIL; -+ /* READ_NODE_FIELD(private); ++ local_node->ext_nodes = NIL; ++ /* READ_NODE_FIELD(ext_nodes); + * Don't serialize this field. It is required to serialize RestrictInfo and + * EqualenceClass. + */ @@ -97,7 +97,7 @@ index 4c537c30e0..40b1ce29de 100644 /* diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c -index 30c8595f76..18699564b3 100644 +index 006f91f0a8..ef9c8ec581 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -98,6 +98,11 @@ @@ -120,7 +120,7 @@ index 30c8595f76..18699564b3 100644 /* -@@ -4905,6 +4909,58 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) +@@ -4911,6 +4915,58 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) } @@ -179,7 +179,7 @@ index 30c8595f76..18699564b3 100644 /* * set_baserel_size_estimates * Set the size estimates for the given base relation. -@@ -4921,19 +4977,10 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) +@@ -4927,19 +4983,10 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) void set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) { @@ -200,7 +200,7 @@ index 30c8595f76..18699564b3 100644 cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); -@@ -4944,13 +4991,33 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) +@@ -4950,13 +4997,33 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) * get_parameterized_baserel_size * Make a size estimate for a parameterized scan of a base relation. * @@ -236,7 +236,7 @@ index 30c8595f76..18699564b3 100644 { List *allclauses; double nrows; -@@ -4979,6 +5046,36 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, +@@ -4985,6 +5052,36 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, * set_joinrel_size_estimates * Set the size estimates for the given join relation. * @@ -273,7 +273,7 @@ index 30c8595f76..18699564b3 100644 * The rel's targetlist must have been constructed already, and a * restriction clause list that matches the given component rels must * be provided. -@@ -4998,11 +5095,11 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, +@@ -5004,11 +5101,11 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, * build_joinrel_tlist, and baserestrictcost is not used for join rels. */ void @@ -290,7 +290,7 @@ index 30c8595f76..18699564b3 100644 { rel->rows = calc_joinrel_size_estimate(root, rel, -@@ -5018,6 +5115,35 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, +@@ -5024,6 +5121,35 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, * get_parameterized_joinrel_size * Make a size estimate for a parameterized scan of a join relation. * @@ -326,7 +326,7 @@ index 30c8595f76..18699564b3 100644 * 'rel' is the joinrel under consideration. * 'outer_path', 'inner_path' are (probably also parameterized) Paths that * produce the relations being joined. -@@ -5030,11 +5156,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, +@@ -5036,11 +5162,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, * set_joinrel_size_estimates must have been applied already. */ double @@ -343,7 +343,7 @@ index 30c8595f76..18699564b3 100644 { double nrows; -@@ -5750,7 +5876,7 @@ set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel) +@@ -5756,7 +5882,7 @@ set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel) /* Should only be applied to base relations */ Assert(rel->relid > 0); @@ -352,7 +352,7 @@ index 30c8595f76..18699564b3 100644 cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); -@@ -6036,7 +6162,7 @@ page_size(double tuples, int width) +@@ -6042,7 +6168,7 @@ page_size(double tuples, int width) * Estimate the fraction of the work that each worker will do given the * number of workers budgeted for the path. */ @@ -362,7 +362,7 @@ index 30c8595f76..18699564b3 100644 { double parallel_divisor = path->parallel_workers; diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c -index 5658f24323..3bbfa3c1b5 100644 +index 0ed858f305..9d4a6c5903 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -71,6 +71,7 @@ @@ -373,7 +373,7 @@ index 5658f24323..3bbfa3c1b5 100644 static Plan *create_plan_recurse(PlannerInfo *root, Path *best_path, int flags); -@@ -544,6 +545,10 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags) +@@ -545,6 +546,10 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags) break; } @@ -384,11 +384,11 @@ index 5658f24323..3bbfa3c1b5 100644 return plan; } -@@ -5278,6 +5283,7 @@ copy_generic_path_info(Plan *dest, Path *src) +@@ -5323,6 +5328,7 @@ copy_generic_path_info(Plan *dest, Path *src) dest->plan_width = src->pathtarget->width; dest->parallel_aware = src->parallel_aware; dest->parallel_safe = src->parallel_safe; -+ dest->private = NIL; ++ dest->ext_nodes = NIL; } /* @@ -474,14 +474,14 @@ index 70899e5430..34075cc87b 100644 extra->targetList); diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c -index e105a4d5f1..d821ea63bd 100644 +index e105a4d5f1..c5bcc9d1d1 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -258,6 +258,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) rel->all_partrels = NULL; rel->partexprs = NULL; rel->nullable_partexprs = NULL; -+ rel->private = NULL; ++ rel->ext_nodes = NULL; /* * Pass assorted information down the inheritance hierarchy. @@ -497,7 +497,7 @@ index e105a4d5f1..d821ea63bd 100644 joinrel->all_partrels = NULL; joinrel->partexprs = NULL; joinrel->nullable_partexprs = NULL; -+ joinrel->private = NULL; ++ joinrel->ext_nodes = NULL; /* Compute information relevant to the foreign relations. */ set_foreign_rel_properties(joinrel, outer_rel, inner_rel); @@ -505,7 +505,7 @@ index e105a4d5f1..d821ea63bd 100644 joinrel->all_partrels = NULL; joinrel->partexprs = NULL; joinrel->nullable_partexprs = NULL; -+ joinrel->private = NULL; ++ joinrel->ext_nodes = NULL; joinrel->top_parent_relids = bms_union(outer_rel->top_parent_relids, inner_rel->top_parent_relids); @@ -540,7 +540,7 @@ index e105a4d5f1..d821ea63bd 100644 return ppi; diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c -index 10895fb287..e81a6f6896 100644 +index abe47dab86..0ef5f2c8da 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -143,6 +143,7 @@ @@ -596,10 +596,10 @@ index e94d9e49cf..49236ced77 100644 extern void ExplainQuery(ParseState *pstate, ExplainStmt *stmt, ParamListInfo params, DestReceiver *dest); diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h -index 8ee40cc68c..d7bb9df67c 100644 +index f16466a0df..c48d969ba8 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h -@@ -755,6 +755,10 @@ typedef struct RelOptInfo +@@ -756,6 +756,10 @@ typedef struct RelOptInfo Relids top_parent_relids; /* Relids of topmost parents (if "other" * rel) */ @@ -610,18 +610,22 @@ index 8ee40cc68c..d7bb9df67c 100644 /* used for partitioned relations: */ PartitionScheme part_scheme; /* Partitioning scheme */ int nparts; /* Number of partitions; -1 if not yet set; in -@@ -769,7 +773,9 @@ typedef struct RelOptInfo +@@ -770,7 +774,13 @@ typedef struct RelOptInfo Relids all_partrels; /* Relids set of all partition relids */ List **partexprs; /* Non-nullable partition key expressions */ List **nullable_partexprs; /* Nullable partition key expressions */ -} RelOptInfo; + -+ List *private; ++ /* ++ * At this list an extension can add additional nodes to pass an info along ++ * the planning and executing stages. ++ */ ++ List *ext_nodes; +} RelOptInfo; /* * Is given relation partitioned? -@@ -1137,6 +1143,10 @@ typedef struct ParamPathInfo +@@ -1138,6 +1148,10 @@ typedef struct ParamPathInfo Relids ppi_req_outer; /* rels supplying parameters used by path */ double ppi_rows; /* estimated number of result tuples */ List *ppi_clauses; /* join clauses available from outer rels */ @@ -633,16 +637,19 @@ index 8ee40cc68c..d7bb9df67c 100644 diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h -index 5ddf947971..fe9bda387a 100644 +index 9ac4d9af12..6e20cd28c8 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h -@@ -158,6 +158,9 @@ typedef struct Plan +@@ -158,6 +158,12 @@ typedef struct Plan */ Bitmapset *extParam; Bitmapset *allParam; + -+ /* Additional field for an extension purposes. */ -+ List *private; ++ /* ++ * Additional fields for purposes of an extension. ++ * TODO: allow to serialize/deserialize this list. ++ */ ++ List *ext_nodes; } Plan; /* ---------------- diff --git a/path_utils.c b/path_utils.c index 102907a9..512762ea 100644 --- a/path_utils.c +++ b/path_utils.c @@ -68,7 +68,7 @@ get_aqo_plan_node(Plan *plan, bool create) AQOPlanNode *node = NULL; ListCell *lc; - foreach(lc, plan->private) + foreach(lc, plan->ext_nodes) { AQOPlanNode *candidate = (AQOPlanNode *) lfirst(lc); @@ -88,7 +88,7 @@ get_aqo_plan_node(Plan *plan, bool create) return &DefaultAQOPlanNode; node = create_aqo_plan_node(); - plan->private = lappend(plan->private, node); + plan->ext_nodes = lappend(plan->ext_nodes, node); } Assert(node); @@ -176,10 +176,10 @@ subplan_hunter(Node *node, void *context) splan->plan_id - 1); upper_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL); - Assert(list_length(upper_rel->private) == 1); - Assert(IsA((Node *) linitial(upper_rel->private), A_Const)); + Assert(list_length(upper_rel->ext_nodes) == 1); + Assert(IsA((Node *) linitial(upper_rel->ext_nodes), A_Const)); - fss = (A_Const *) linitial(upper_rel->private); + fss = (A_Const *) linitial(upper_rel->ext_nodes); return (Node *) copyObject(fss); } return expression_tree_mutator(node, subplan_hunter, context); @@ -649,5 +649,5 @@ aqo_store_upper_signature_hook(PlannerInfo *root, fss_node->val.type = T_Integer; fss_node->location = -1; fss_node->val.val.ival = get_fss_for_object(relids, clauses, NIL, NULL, NULL); - output_rel->private = lappend(output_rel->private, (void *) fss_node); + output_rel->ext_nodes = lappend(output_rel->ext_nodes, (void *) fss_node); } From 648980c72ee3ca453991022b3c2d5436ef9617db Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 11 May 2022 15:14:12 +0500 Subject: [PATCH 024/134] Bugfix. Normalize cardinality error. --- postprocessing.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/postprocessing.c b/postprocessing.c index f9c00b50..524f41e0 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -435,7 +435,7 @@ learnOnPlanState(PlanState *p, void *context) /* Exclude "not executed" nodes from error calculation to reduce fluctuations. */ if (!notExecuted) { - cardinality_sum_errors += fabs(predicted - learn_rows); + cardinality_sum_errors += fabs(log(predicted) - log(learn_rows)); cardinality_num_objects += 1; } From 3f55c5e2f6051a89dd73b65d47c2d3efad388c89 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 13 Jul 2022 15:02:07 +0500 Subject: [PATCH 025/134] Bugfix. Copy of clauses, have got by aqo_get_clauses() has a specific structure and shouldn't be touched by any postgres machinery except node hash generator. --- cardinality_hooks.c | 10 ++++++++-- path_utils.c | 8 +++----- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/cardinality_hooks.c b/cardinality_hooks.c index 13a737d4..1a281ec3 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -228,10 +228,16 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, { MemoryContext mcxt; + selectivities = list_concat( + get_selectivities(root, param_clauses, rel->relid, + JOIN_INNER, NULL), + get_selectivities(root, rel->baserestrictinfo, + rel->relid, + JOIN_INNER, NULL)); + + /* Make specific copy of clauses with mutated subplans */ allclauses = list_concat(aqo_get_clauses(root, param_clauses), aqo_get_clauses(root, rel->baserestrictinfo)); - selectivities = get_selectivities(root, allclauses, rel->relid, - JOIN_INNER, NULL); relid = planner_rt_fetch(rel->relid, root)->relid; get_eclasses(allclauses, &nargs, &args_hash, &eclass_hash); diff --git a/path_utils.c b/path_utils.c index 512762ea..cac048c2 100644 --- a/path_utils.c +++ b/path_utils.c @@ -356,18 +356,16 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) case T_ForeignPath: /* The same as in the default case */ default: - cur = list_concat(aqo_get_clauses(root, - path->parent->baserestrictinfo), + cur = list_concat(list_copy(path->parent->baserestrictinfo), path->param_info ? - aqo_get_clauses(root, - path->param_info->ppi_clauses) : - NIL); + path->param_info->ppi_clauses : NIL); if (path->param_info) cur_sel = get_selectivities(root, cur, path->parent->relid, JOIN_INNER, NULL); else cur_sel = get_selectivities(root, cur, 0, JOIN_INNER, NULL); *selectivities = cur_sel; + cur = aqo_get_clauses(root, cur); return cur; break; } From 6411fee11a5f306b8f04c4bfcd0b6294ccb30bf4 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Wed, 10 Aug 2022 15:50:45 +0300 Subject: [PATCH 026/134] Clear AQO_cache_mem_ctx memory context. --- aqo.c | 4 ++++ aqo.h | 1 + cardinality_hooks.c | 7 ++++--- selectivity_cache.c | 1 + 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/aqo.c b/aqo.c index 99c54fea..4cffc94f 100644 --- a/aqo.c +++ b/aqo.c @@ -94,6 +94,7 @@ double log_selectivity_lower_bound = -30; * after a query parsing and is used during the query planning. */ MemoryContext AQOMemoryContext; +MemoryContext AQO_cache_mem_ctx; QueryContextData query_context; /* Additional plan info */ int njoins; @@ -235,6 +236,9 @@ _PG_init(void) AQOMemoryContext = AllocSetContextCreate(TopMemoryContext, "AQOMemoryContext", ALLOCSET_DEFAULT_SIZES); + AQO_cache_mem_ctx = AllocSetContextCreate(TopMemoryContext, + "AQO_cache_mem_ctx", + ALLOCSET_DEFAULT_SIZES); RegisterResourceReleaseCallback(aqo_free_callback, NULL); RegisterAQOPlanNodeMethods(); } diff --git a/aqo.h b/aqo.h index 05d11942..fff0bb06 100644 --- a/aqo.h +++ b/aqo.h @@ -252,6 +252,7 @@ extern int njoins; /* Memory context for long-live data */ extern MemoryContext AQOMemoryContext; +extern MemoryContext AQO_cache_mem_ctx; /* Saved hook values in case of unload */ extern post_parse_analyze_hook_type prev_post_parse_analyze_hook; diff --git a/cardinality_hooks.c b/cardinality_hooks.c index 1a281ec3..32852b06 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -226,7 +226,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, if (query_context.use_aqo || query_context.learn_aqo) { - MemoryContext mcxt; + MemoryContext old_ctx_m; selectivities = list_concat( get_selectivities(root, param_clauses, rel->relid, @@ -241,7 +241,8 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, relid = planner_rt_fetch(rel->relid, root)->relid; get_eclasses(allclauses, &nargs, &args_hash, &eclass_hash); - mcxt = MemoryContextSwitchTo(CacheMemoryContext); + old_ctx_m = MemoryContextSwitchTo(AQO_cache_mem_ctx); + forboth(l, allclauses, l2, selectivities) { current_hash = get_clause_hash( @@ -251,7 +252,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, *((double *) lfirst(l2))); } - MemoryContextSwitchTo(mcxt); + MemoryContextSwitchTo(old_ctx_m); pfree(args_hash); pfree(eclass_hash); } diff --git a/selectivity_cache.c b/selectivity_cache.c index 30b0f887..b59da933 100644 --- a/selectivity_cache.c +++ b/selectivity_cache.c @@ -89,5 +89,6 @@ selectivity_cache_find_global_relid(int clause_hash, int global_relid) void selectivity_cache_clear(void) { + MemoryContextReset(AQO_cache_mem_ctx); objects = NIL; } From 89c834de001c273ba37eb47619a6585962e0cd13 Mon Sep 17 00:00:00 2001 From: "a.pervushina" Date: Tue, 26 Jul 2022 19:13:06 +0300 Subject: [PATCH 027/134] [PGPRO-6755] Refactor machine dependent tests Tags: aqo --- expected/forced_stat_collection.out | 23 +++++++++++++++++------ expected/unsupported.out | 28 ++++++++++++++++++++-------- sql/forced_stat_collection.sql | 10 +++++++++- sql/unsupported.sql | 17 +++++++++++++++-- 4 files changed, 61 insertions(+), 17 deletions(-) diff --git a/expected/forced_stat_collection.out b/expected/forced_stat_collection.out index 4f5909af..229a2636 100644 --- a/expected/forced_stat_collection.out +++ b/expected/forced_stat_collection.out @@ -40,12 +40,19 @@ SELECT * FROM aqo_data; -------------+--------------+-----------+----------+---------+------ (0 rows) -SELECT learn_aqo,use_aqo,auto_tuning,cardinality_error_without_aqo ce,executions_without_aqo nex +CREATE OR REPLACE FUNCTION round_array (double precision[]) +RETURNS double precision[] +LANGUAGE SQL +AS $$ + SELECT array_agg(round(elem::numeric, 3)) + FROM unnest($1) as arr(elem); +$$; +SELECT learn_aqo,use_aqo,auto_tuning,round_array(cardinality_error_without_aqo) ce,executions_without_aqo nex FROM aqo_queries JOIN aqo_query_stat USING (query_hash); - learn_aqo | use_aqo | auto_tuning | ce | nex ------------+---------+-------------+----------------------+----- - f | f | f | {0.8637762840285226} | 1 - f | f | f | {2.9634630129852053} | 1 + learn_aqo | use_aqo | auto_tuning | ce | nex +-----------+---------+-------------+---------+----- + f | f | f | {0.864} | 1 + f | f | f | {2.963} | 1 (2 rows) SELECT query_text FROM aqo_query_texts ORDER BY (md5(query_text)); @@ -54,6 +61,10 @@ SELECT query_text FROM aqo_query_texts ORDER BY (md5(query_text)); SELECT count(*) FROM person WHERE age<18; COMMON feature space (do not delete!) SELECT count(*) FROM person WHERE age<18 AND passport IS NOT NULL; -(3 rows) + + + SELECT array_agg(round(elem::numeric, 3)) + + FROM unnest($1) as arr(elem); + + +(4 rows) DROP EXTENSION aqo; diff --git a/expected/unsupported.out b/expected/unsupported.out index cc0f6be7..e34914ed 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -470,6 +470,17 @@ SELECT * FROM -- any prediction on number of fetched tuples. -- So, if selectivity was wrong we could make bad choice of Scan operation. -- For example, we could choose suboptimal index. +-- +-- Returns string-by-string explain of a query. Made for removing some strings +-- from the explain output. +-- +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; -- Turn off statistics gathering for simple demonstration of filtering problem. ALTER TABLE t SET (autovacuum_enabled = 'false'); CREATE INDEX ind1 ON t(x); @@ -505,21 +516,22 @@ SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; 50 (1 row) -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) - SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; - QUERY PLAN +SELECT str AS result +FROM expln('SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1') AS str +WHERE str NOT LIKE '%Heap Blocks%'; + result ----------------------------------------------------------------- Aggregate (actual rows=1 loops=1) AQO not used - -> Bitmap Heap Scan on t (actual rows=50 loops=1) + Output: count(*) + -> Bitmap Heap Scan on public.t (actual rows=50 loops=1) AQO: rows=50, error=0% - Recheck Cond: (mod(x, 3) = 1) - Filter: (x < 3) + Recheck Cond: (mod(t.x, 3) = 1) + Filter: (t.x < 3) Rows Removed by Filter: 300 - Heap Blocks: exact=5 -> Bitmap Index Scan on ind2 (actual rows=350 loops=1) AQO not used - Index Cond: (mod(x, 3) = 1) + Index Cond: (mod(t.x, 3) = 1) Using aqo: true AQO mode: LEARN JOINS: 0 diff --git a/sql/forced_stat_collection.sql b/sql/forced_stat_collection.sql index 46f3e572..81d37f3b 100644 --- a/sql/forced_stat_collection.sql +++ b/sql/forced_stat_collection.sql @@ -33,7 +33,15 @@ SELECT count(*) FROM person WHERE age<18; SELECT count(*) FROM person WHERE age<18 AND passport IS NOT NULL; SELECT * FROM aqo_data; -SELECT learn_aqo,use_aqo,auto_tuning,cardinality_error_without_aqo ce,executions_without_aqo nex +CREATE OR REPLACE FUNCTION round_array (double precision[]) +RETURNS double precision[] +LANGUAGE SQL +AS $$ + SELECT array_agg(round(elem::numeric, 3)) + FROM unnest($1) as arr(elem); +$$; + +SELECT learn_aqo,use_aqo,auto_tuning,round_array(cardinality_error_without_aqo) ce,executions_without_aqo nex FROM aqo_queries JOIN aqo_query_stat USING (query_hash); SELECT query_text FROM aqo_query_texts ORDER BY (md5(query_text)); diff --git a/sql/unsupported.sql b/sql/unsupported.sql index dc35a4d2..accca094 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -139,6 +139,18 @@ SELECT * FROM -- So, if selectivity was wrong we could make bad choice of Scan operation. -- For example, we could choose suboptimal index. +-- +-- Returns string-by-string explain of a query. Made for removing some strings +-- from the explain output. +-- +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; + -- Turn off statistics gathering for simple demonstration of filtering problem. ALTER TABLE t SET (autovacuum_enabled = 'false'); CREATE INDEX ind1 ON t(x); @@ -151,8 +163,9 @@ EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) -- Here we filter more tuples than with the ind1 index. CREATE INDEX ind2 ON t(mod(x,3)); SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) - SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; +SELECT str AS result +FROM expln('SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1') AS str +WHERE str NOT LIKE '%Heap Blocks%'; -- Best choice is ... ANALYZE t; From d26cc4d7e2b328139f7f7797b125b129c8236661 Mon Sep 17 00:00:00 2001 From: "a.pervushina" Date: Mon, 5 Sep 2022 21:23:47 +0300 Subject: [PATCH 028/134] Reduce logging in the pgbench TAP test.Induces by requirement of comfort observation of a contrib-check in browser (By A.Lakhin report). --- t/001_pgbench.pl | 1 + 1 file changed, 1 insertion(+) diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index c4ddb7ae..29400582 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -10,6 +10,7 @@ $node->init; $node->append_conf('postgresql.conf', qq{ shared_preload_libraries = 'aqo' + log_statement = 'none' aqo.mode = 'intelligent' log_statement = 'ddl' }); From 33110ca44f1a3119f840f03d55cf395f2d1efe4b Mon Sep 17 00:00:00 2001 From: "a.pervushina" Date: Tue, 6 Sep 2022 03:54:30 +0300 Subject: [PATCH 029/134] PGPRO-6403: fix conf.add so PostgreSQL installchecks pass with aqo loaded --- conf.add | 1 + 1 file changed, 1 insertion(+) diff --git a/conf.add b/conf.add index 705e3dde..42c0a01d 100644 --- a/conf.add +++ b/conf.add @@ -1,2 +1,3 @@ autovacuum = off shared_preload_libraries = 'postgres_fdw, aqo' +max_parallel_workers_per_gather = 1 # switch off parallel workers because of unsteadiness \ No newline at end of file From 7e7a63afe99d21266dcc9b70a2a966f3cdc1bbb4 Mon Sep 17 00:00:00 2001 From: "a.pervushina" Date: Thu, 8 Sep 2022 02:26:56 +0300 Subject: [PATCH 030/134] Introduce usage of statement timeout. In the case then an user sets statement timeout AQO add one more timeout right before this. If timeout is expired, AQO walks across the PlanState tree and learn on partially executed nodes. --- Makefile | 2 +- README.md | 2 +- aqo.c | 7 +- aqo.h | 15 ++- auto_tuning.c | 2 +- cardinality_estimation.c | 6 +- cardinality_hooks.c | 5 +- hash.c | 2 +- learn_cache.c | 157 +++++++++++++++++++++++++++++++ learn_cache.h | 15 +++ machine_learning.c | 2 +- path_utils.c | 2 +- postprocessing.c | 193 +++++++++++++++++++++++++++++++++------ preprocessing.c | 2 +- selectivity_cache.c | 2 +- storage.c | 32 ++++++- t/001_pgbench.pl | 2 +- utils.c | 2 +- 18 files changed, 400 insertions(+), 50 deletions(-) create mode 100644 learn_cache.c create mode 100644 learn_cache.h diff --git a/Makefile b/Makefile index b351ae0e..2845854f 100755 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ PGFILEDESC = "AQO - Adaptive Query Optimization" MODULE_big = aqo OBJS = aqo.o auto_tuning.o cardinality_estimation.o cardinality_hooks.o \ hash.o machine_learning.o path_utils.o postprocessing.o preprocessing.o \ -selectivity_cache.o storage.o utils.o $(WIN32RES) +selectivity_cache.o storage.o utils.o learn_cache.o $(WIN32RES) TAP_TESTS = 1 diff --git a/README.md b/README.md index 03582a3a..169a09d9 100644 --- a/README.md +++ b/README.md @@ -327,7 +327,7 @@ Dynamically generated constants are okay. ## License -© [Postgres Professional](https://postgrespro.com/), 2016-2021. Licensed under +© [Postgres Professional](https://postgrespro.com/), 2016-2022. Licensed under [The PostgreSQL License](LICENSE). ## Reference diff --git a/aqo.c b/aqo.c index 4cffc94f..2acc4729 100644 --- a/aqo.c +++ b/aqo.c @@ -2,7 +2,7 @@ * aqo.c * Adaptive query optimization extension * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/aqo.c @@ -21,6 +21,7 @@ #include "cardinality_hooks.h" #include "path_utils.h" #include "preprocessing.h" +#include "learn_cache.h" PG_MODULE_MAGIC; @@ -103,6 +104,7 @@ int njoins; post_parse_analyze_hook_type prev_post_parse_analyze_hook; planner_hook_type prev_planner_hook; ExecutorStart_hook_type prev_ExecutorStart_hook; +ExecutorRun_hook_type prev_ExecutorRun; ExecutorEnd_hook_type prev_ExecutorEnd_hook; set_baserel_rows_estimate_hook_type prev_set_foreign_rows_estimate_hook; set_baserel_rows_estimate_hook_type prev_set_baserel_rows_estimate_hook; @@ -203,6 +205,8 @@ _PG_init(void) planner_hook = aqo_planner; prev_ExecutorStart_hook = ExecutorStart_hook; ExecutorStart_hook = aqo_ExecutorStart; + prev_ExecutorRun = ExecutorRun_hook; + ExecutorRun_hook = aqo_ExecutorRun; prev_ExecutorEnd_hook = ExecutorEnd_hook; ExecutorEnd_hook = aqo_ExecutorEnd; @@ -241,6 +245,7 @@ _PG_init(void) ALLOCSET_DEFAULT_SIZES); RegisterResourceReleaseCallback(aqo_free_callback, NULL); RegisterAQOPlanNodeMethods(); + lc_init(); } PG_FUNCTION_INFO_V1(invalidate_deactivated_queries_cache); diff --git a/aqo.h b/aqo.h index fff0bb06..d47a855f 100644 --- a/aqo.h +++ b/aqo.h @@ -105,7 +105,7 @@ * Module storage.c is responsible for storage query settings and models * (i. e. all information which is used in extension). * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/aqo.h @@ -258,6 +258,7 @@ extern MemoryContext AQO_cache_mem_ctx; extern post_parse_analyze_hook_type prev_post_parse_analyze_hook; extern planner_hook_type prev_planner_hook; extern ExecutorStart_hook_type prev_ExecutorStart_hook; +extern ExecutorRun_hook_type prev_ExecutorRun; extern ExecutorEnd_hook_type prev_ExecutorEnd_hook; extern set_baserel_rows_estimate_hook_type prev_set_foreign_rows_estimate_hook; @@ -285,9 +286,15 @@ extern bool find_query(uint64 qhash, QueryContextData *ctx); extern bool update_query(uint64 qhash, uint64 fhash, bool learn_aqo, bool use_aqo, bool auto_tuning); extern bool add_query_text(uint64 query_hash, const char *query_string); +extern bool load_fss_ext(uint64 fs, int fss, + int ncols, double **matrix, double *targets, int *rows, + List **relids, bool isSafe); extern bool load_fss(uint64 fhash, int fss_hash, int ncols, double **matrix, double *targets, int *rows, List **relids); +extern bool update_fss_ext(uint64 fhash, int fsshash, int nrows, int ncols, + double **matrix, double *targets, List *relids, + bool isTimedOut); extern bool update_fss(uint64 fhash, int fss_hash, int nrows, int ncols, double **matrix, double *targets, List *relids); QueryStat *get_aqo_stat(uint64 query_hash); @@ -313,8 +320,10 @@ double predict_for_relation(List *restrict_clauses, List *selectivities, List *relids, int *fss_hash); /* Query execution statistics collecting hooks */ -void aqo_ExecutorStart(QueryDesc *queryDesc, int eflags); -void aqo_ExecutorEnd(QueryDesc *queryDesc); +void aqo_ExecutorStart(QueryDesc *queryDesc, int eflags); +void aqo_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, + uint64 count, bool execute_once); +void aqo_ExecutorEnd(QueryDesc *queryDesc); /* Machine learning techniques */ extern double OkNNr_predict(int nrows, int ncols, diff --git a/auto_tuning.c b/auto_tuning.c index 8bb024da..fb7e1eed 100644 --- a/auto_tuning.c +++ b/auto_tuning.c @@ -8,7 +8,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/auto_tuning.c diff --git a/cardinality_estimation.c b/cardinality_estimation.c index c3e5d7a4..e5b9f593 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -8,7 +8,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/cardinality_estimation.c @@ -83,8 +83,8 @@ predict_for_relation(List *clauses, List *selectivities, for (i = 0; i < aqo_K; ++i) matrix[i] = palloc0(sizeof(**matrix) * nfeatures); - if (load_fss(query_context.fspace_hash, *fss_hash, nfeatures, matrix, - targets, &rows, NULL)) + if (load_fss_ext(query_context.fspace_hash, *fss_hash, nfeatures, matrix, + targets, &rows, NULL, true)) result = OkNNr_predict(rows, nfeatures, matrix, targets, features); else { diff --git a/cardinality_hooks.c b/cardinality_hooks.c index 32852b06..0e8a5a2c 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -18,7 +18,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/cardinality_hooks.c @@ -439,7 +439,8 @@ predict_num_groups(PlannerInfo *root, Path *subpath, List *group_exprs, *fss = get_grouped_exprs_hash(child_fss, group_exprs); - if (!load_fss(query_context.fspace_hash, *fss, 0, NULL, &target, &rows, NULL)) + if (!load_fss_ext(query_context.fspace_hash, *fss, 0, NULL, + &target, &rows, NULL, true)) return -1; Assert(rows == 1); diff --git a/hash.c b/hash.c index 0daad6e6..4510032e 100644 --- a/hash.c +++ b/hash.c @@ -12,7 +12,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/hash.c diff --git a/learn_cache.c b/learn_cache.c new file mode 100644 index 00000000..f2b59323 --- /dev/null +++ b/learn_cache.c @@ -0,0 +1,157 @@ +/* + ******************************************************************************* + * + * + * + ******************************************************************************* + * + * Copyright (c) 2016-2022, Postgres Professional + * + * IDENTIFICATION + * aqo/learn_cache.c + * + */ + +#include "postgres.h" + +#include "aqo.h" +#include "learn_cache.h" + +typedef struct +{ + /* XXX we assume this struct contains no padding bytes */ + uint64 fs; + int64 fss; +} htab_key; + +typedef struct +{ + htab_key key; + + /* Store ML data "AS IS". */ + int nrows; + int ncols; + double *matrix[aqo_K]; + double *targets; + List *relids; +} htab_entry; + +static HTAB *fss_htab = NULL; +MemoryContext LearnCacheMemoryContext = NULL; + +void +lc_init(void) +{ + HASHCTL ctl; + + Assert(!LearnCacheMemoryContext); + LearnCacheMemoryContext = AllocSetContextCreate(TopMemoryContext, + "lcache context", + ALLOCSET_DEFAULT_SIZES); + + ctl.keysize = sizeof(htab_key); + ctl.entrysize = sizeof(htab_entry); + ctl.hcxt = LearnCacheMemoryContext; + + fss_htab = hash_create("Remote Con hash", 32, &ctl, HASH_ELEM | HASH_BLOBS); +} + +bool +lc_update_fss(uint64 fs, int fss, int nrows, int ncols, + double **matrix, double *targets, List *relids) +{ + htab_key key = {fs, fss}; + htab_entry *entry; + bool found; + int i; + MemoryContext memctx = MemoryContextSwitchTo(LearnCacheMemoryContext); + + Assert(fss_htab); + + entry = (htab_entry *) hash_search(fss_htab, &key, HASH_ENTER, &found); + if (found) + { + /* Clear previous version of the cached data. */ + for (i = 0; i < entry->nrows; ++i) + pfree(entry->matrix[i]); + pfree(entry->targets); + list_free(entry->relids); + } + + entry->nrows = nrows; + entry->ncols = ncols; + for (i = 0; i < entry->nrows; ++i) + { + entry->matrix[i] = palloc(sizeof(double) * ncols); + memcpy(entry->matrix[i], matrix[i], sizeof(double) * ncols); + } + entry->targets = palloc(sizeof(double) * nrows); + memcpy(entry->targets, targets, sizeof(double) * nrows); + entry->relids = list_copy(relids); + + MemoryContextSwitchTo(memctx); + return true; +} + +bool +lc_has_fss(uint64 fs, int fss) +{ + htab_key key = {fs, fss}; + bool found; + + Assert(fss_htab); + + (void) hash_search(fss_htab, &key, HASH_FIND, &found); + if (!found) + return false; + return true; +} + +bool +lc_load_fss(uint64 fs, int fss, int ncols, double **matrix, + double *targets, int *nrows, List **relids) +{ + htab_key key = {fs, fss}; + htab_entry *entry; + bool found; + int i; + + Assert(fss_htab); + + entry = (htab_entry *) hash_search(fss_htab, &key, HASH_FIND, &found); + if (!found) + return false; + + *nrows = entry->nrows; + Assert(entry->ncols == ncols); + for (i = 0; i < entry->nrows; ++i) + memcpy(matrix[i], entry->matrix[i], sizeof(double) * ncols); + memcpy(targets, entry->targets, sizeof(double) * entry->nrows); + if (relids) + *relids = list_copy(entry->relids); + return true; +} + +/* + * Remove record from fss cache. Should be done at learning stage of successfully + * finished query execution. +*/ +void +lc_remove_fss(uint64 fs, int fss) +{ + htab_key key = {fs, fss}; + htab_entry *entry; + bool found; + int i; + + Assert(fss_htab); + + entry = (htab_entry *) hash_search(fss_htab, &key, HASH_FIND, &found); + if (!found) + return; + + for (i = 0; i < entry->nrows; ++i) + pfree(entry->matrix[i]); + pfree(entry->targets); + hash_search(fss_htab, &key, HASH_REMOVE, NULL); +} diff --git a/learn_cache.h b/learn_cache.h new file mode 100644 index 00000000..876a106e --- /dev/null +++ b/learn_cache.h @@ -0,0 +1,15 @@ +#ifndef LEARN_CACHE_H +#define LEARN_CACHE_H + +#include "nodes/pg_list.h" + +extern void lc_init(void); +extern bool lc_update_fss(uint64 fhash, int fsshash, int nrows, int ncols, + double **matrix, double *targets, List *relids); +extern bool lc_has_fss(uint64 fhash, int fss); +extern bool lc_load_fss(uint64 fhash, int fsshash, int ncols, + double **matrix, double *targets, int *nrows, + List **relids); +extern void lc_remove_fss(uint64 fhash, int fss_hash); + +#endif /* LEARN_CACHE_H */ diff --git a/machine_learning.c b/machine_learning.c index a9889868..91c72d3e 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -12,7 +12,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/machine_learning.c diff --git a/path_utils.c b/path_utils.c index cac048c2..ce616555 100644 --- a/path_utils.c +++ b/path_utils.c @@ -5,7 +5,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/path_utils.c diff --git a/postprocessing.c b/postprocessing.c index 524f41e0..1eab3d95 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -9,7 +9,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/postprocessing.c @@ -27,6 +27,7 @@ #include "hash.h" #include "path_utils.h" #include "preprocessing.h" +#include "learn_cache.h" typedef struct @@ -35,6 +36,7 @@ typedef struct List *selectivities; List *relidslist; bool learn; + bool isTimedOut; /* Is execution was interrupted by timeout? */ } aqo_obj_stat; static double cardinality_sum_errors; @@ -56,14 +58,13 @@ static char *PlanStateInfo = "PlanStateInfo"; static void atomic_fss_learn_step(uint64 fhash, int fss_hash, int ncols, double **matrix, double *targets, double *features, double target, - List *relids); + List *relids, bool isTimedOut); static bool learnOnPlanState(PlanState *p, void *context); -static void learn_sample(List *clauselist, - List *selectivities, - List *relidslist, - double true_cardinality, - Plan *plan, - bool notExecuted); +static void learn_agg_sample(aqo_obj_stat *ctx, List *relidslist, + double true_cardinality, Plan *plan, + bool notExecuted); +static void learn_sample(aqo_obj_stat *ctx, List *relidslist, + double true_cardinality, Plan *plan, bool notExecuted); static List *restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, @@ -89,7 +90,7 @@ static void atomic_fss_learn_step(uint64 fhash, int fss_hash, int ncols, double **matrix, double *targets, double *features, double target, - List *relids) + List *relids, bool isTimedOut) { LOCKTAG tag; int nrows; @@ -97,17 +98,18 @@ atomic_fss_learn_step(uint64 fhash, int fss_hash, int ncols, init_lock_tag(&tag, fhash, fss_hash); LockAcquire(&tag, ExclusiveLock, false, false); - if (!load_fss(fhash, fss_hash, ncols, matrix, targets, &nrows, NULL)) + if (!load_fss_ext(fhash, fss_hash, ncols, matrix, targets, &nrows, NULL, !isTimedOut)) nrows = 0; nrows = OkNNr_learn(nrows, ncols, matrix, targets, features, target); - update_fss(fhash, fss_hash, nrows, ncols, matrix, targets, relids); + update_fss_ext(fhash, fss_hash, nrows, ncols, matrix, targets, relids, + isTimedOut); LockRelease(&tag, ExclusiveLock, false); } static void -learn_agg_sample(List *clauselist, List *selectivities, List *relidslist, +learn_agg_sample(aqo_obj_stat *ctx, List *relidslist, double true_cardinality, Plan *plan, bool notExecuted) { uint64 fhash = query_context.fspace_hash; @@ -123,11 +125,11 @@ learn_agg_sample(List *clauselist, List *selectivities, List *relidslist, * Learn 'not executed' nodes only once, if no one another knowledge exists * for current feature subspace. */ - if (notExecuted && aqo_node->prediction > 0) + if (notExecuted && aqo_node->prediction > 0.) return; target = log(true_cardinality); - child_fss = get_fss_for_object(relidslist, clauselist, NIL, NULL, NULL); + child_fss = get_fss_for_object(relidslist, ctx->clauselist, NIL, NULL, NULL); fss = get_grouped_exprs_hash(child_fss, aqo_node->grouping_exprs); for (i = 0; i < aqo_K; i++) @@ -135,7 +137,7 @@ learn_agg_sample(List *clauselist, List *selectivities, List *relidslist, /* Critical section */ atomic_fss_learn_step(fhash, fss, 0, matrix, targets, NULL, target, - relidslist); + relidslist, ctx->isTimedOut); /* End of critical section */ } @@ -144,7 +146,7 @@ learn_agg_sample(List *clauselist, List *selectivities, List *relidslist, * true cardinalities) performs learning procedure. */ static void -learn_sample(List *clauselist, List *selectivities, List *relidslist, +learn_sample(aqo_obj_stat *ctx, List *relidslist, double true_cardinality, Plan *plan, bool notExecuted) { uint64 fhash = query_context.fspace_hash; @@ -158,8 +160,8 @@ learn_sample(List *clauselist, List *selectivities, List *relidslist, AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); target = log(true_cardinality); - fss_hash = get_fss_for_object(relidslist, clauselist, - selectivities, &nfeatures, &features); + fss_hash = get_fss_for_object(relidslist, ctx->clauselist, + ctx->selectivities, &nfeatures, &features); /* Only Agg nodes can have non-empty a grouping expressions list. */ Assert(!IsA(plan, Agg) || aqo_node->grouping_exprs != NIL); @@ -178,7 +180,7 @@ learn_sample(List *clauselist, List *selectivities, List *relidslist, /* Critical section */ atomic_fss_learn_step(fhash, fss_hash, nfeatures, matrix, targets, features, target, - relidslist); + relidslist, ctx->isTimedOut); /* End of critical section */ if (nfeatures > 0) @@ -264,7 +266,7 @@ IsParallelTuplesProcessing(const Plan *plan, bool IsParallel) /* * learn_subplan_recurse * - * Emphasise recursion operation into separate function because of increasing + * Emphasize recursion operation into separate function because of increasing * complexity of this logic. */ static bool @@ -276,6 +278,13 @@ learn_subplan_recurse(PlanState *p, aqo_obj_stat *ctx) if (!p->instrument) return true; + + if (!INSTR_TIME_IS_ZERO(p->instrument->starttime)) + { + Assert(ctx->isTimedOut); + InstrStopNode(p->instrument, 0); + } + InstrEndLoop(p->instrument); saved_subplan_list = p->subPlan; @@ -286,19 +295,22 @@ learn_subplan_recurse(PlanState *p, aqo_obj_stat *ctx) if (planstate_tree_walker(p, learnOnPlanState, (void *) ctx)) return true; + /* + * Learn on subplans and initplans separately. Discard learn context of these + * subplans because we will use their fss'es directly. + */ foreach(lc, saved_subplan_list) { SubPlanState *sps = lfirst_node(SubPlanState, lc); - aqo_obj_stat SPCtx = {NIL, NIL, NIL, ctx->learn}; + aqo_obj_stat SPCtx = {NIL, NIL, NIL, ctx->learn, ctx->isTimedOut}; if (learnOnPlanState(sps->planstate, (void *) &SPCtx)) return true; } - foreach(lc, saved_initplan_list) { SubPlanState *sps = lfirst_node(SubPlanState, lc); - aqo_obj_stat SPCtx = {NIL, NIL, NIL, ctx->learn}; + aqo_obj_stat SPCtx = {NIL, NIL, NIL, ctx->learn, ctx->isTimedOut}; if (learnOnPlanState(sps->planstate, (void *) &SPCtx)) return true; @@ -309,6 +321,23 @@ learn_subplan_recurse(PlanState *p, aqo_obj_stat *ctx) return false; } +static bool +should_learn(aqo_obj_stat *ctx, double predicted, double *nrows) +{ + if (ctx->isTimedOut) + { + if (ctx->learn && *nrows > predicted * 1.2) + { + *nrows += (*nrows - predicted) * 3.; + return true; + } + } + else if (ctx->learn) + return true; + + return false; +} + /* * Walks over obtained PlanState tree, collects relation objects with their * clauses, selectivities and relids and passes each object to learn_sample. @@ -324,7 +353,7 @@ static bool learnOnPlanState(PlanState *p, void *context) { aqo_obj_stat *ctx = (aqo_obj_stat *) context; - aqo_obj_stat SubplanCtx = {NIL, NIL, NIL, ctx->learn}; + aqo_obj_stat SubplanCtx = {NIL, NIL, NIL, ctx->learn, ctx->isTimedOut}; double predicted = 0.; double learn_rows = 0.; AQOPlanNode *aqo_node; @@ -332,7 +361,7 @@ learnOnPlanState(PlanState *p, void *context) /* Recurse into subtree and collect clauses. */ if (learn_subplan_recurse(p, &SubplanCtx)) - /* If something goes wrong, return quckly. */ + /* If something goes wrong, return quickly. */ return true; aqo_node = get_aqo_plan_node(p->plan, false); @@ -469,18 +498,24 @@ learnOnPlanState(PlanState *p, void *context) { Assert(predicted >= 1. && learn_rows >= 1.); - if (ctx->learn) + if (should_learn(ctx, predicted, &learn_rows)) { + if (ctx->isTimedOut) + elog(DEBUG1, "[AQO] Learn on partially executed plan node. fs: %lu, fss: %d, predicted rows: %.0lf, updated prediction: %.0lf", + query_context.query_hash, aqo_node->fss, predicted, learn_rows); + if (IsA(p, AggState)) - learn_agg_sample(SubplanCtx.clauselist, NULL, + learn_agg_sample(&SubplanCtx, aqo_node->relids, learn_rows, p->plan, notExecuted); else - learn_sample(SubplanCtx.clauselist, - SubplanCtx.selectivities, + learn_sample(&SubplanCtx, aqo_node->relids, learn_rows, p->plan, notExecuted); + + if (!ctx->isTimedOut) + lc_remove_fss(query_context.query_hash, aqo_node->fss); } } } @@ -606,6 +641,102 @@ aqo_ExecutorStart(QueryDesc *queryDesc, int eflags) StorePlanInternals(queryDesc); } +#include "utils/timeout.h" + +static struct +{ + TimeoutId id; + QueryDesc *queryDesc; +} timeoutCtl = {0, NULL}; + +static int exec_nested_level = 0; + +static void +aqo_timeout_handler(void) +{ + aqo_obj_stat ctx = {NIL, NIL, NIL, false, false}; + + if (!timeoutCtl.queryDesc || !ExtractFromQueryEnv(timeoutCtl.queryDesc)) + return; + + /* Now we can analyze execution state of the query. */ + + ctx.learn = query_context.learn_aqo; + ctx.isTimedOut = true; + + elog(DEBUG1, "AQO timeout was expired. Try to learn on partial data."); + learnOnPlanState(timeoutCtl.queryDesc->planstate, (void *) &ctx); +} + +static bool +set_timeout_if_need(QueryDesc *queryDesc) +{ + TimestampTz fin_time; + + if (!get_timeout_active(STATEMENT_TIMEOUT)) + return false; + + if (!ExtractFromQueryEnv(queryDesc)) + return false; + + if (IsQueryDisabled() || IsParallelWorker() || + !(query_context.use_aqo || query_context.learn_aqo)) + return false; + + /* + * Statement timeout exists. AQO should create user timeout right before the + * statement timeout. + */ + + if (timeoutCtl.id < USER_TIMEOUT) + /* Register once per backend, because of timeouts implementation. */ + timeoutCtl.id = RegisterTimeout(USER_TIMEOUT, aqo_timeout_handler); + else + Assert(!get_timeout_active(timeoutCtl.id)); + + fin_time = get_timeout_finish_time(STATEMENT_TIMEOUT); + enable_timeout_at(timeoutCtl.id, fin_time - 1); + + /* Save pointer to queryDesc to use at learning after a timeout interruption. */ + timeoutCtl.queryDesc = queryDesc; + return true; +} + +/* + * ExecutorRun hook. + */ +void +aqo_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, + bool execute_once) +{ + bool timeout_enabled = false; + + if (exec_nested_level <= 0) + timeout_enabled = set_timeout_if_need(queryDesc); + + Assert(!timeout_enabled || + (timeoutCtl.queryDesc && timeoutCtl.id >= USER_TIMEOUT)); + + exec_nested_level++; + + PG_TRY(); + { + if (prev_ExecutorRun) + prev_ExecutorRun(queryDesc, direction, count, execute_once); + else + standard_ExecutorRun(queryDesc, direction, count, execute_once); + } + PG_FINALLY(); + { + exec_nested_level--; + timeoutCtl.queryDesc = NULL; + + if (timeout_enabled) + disable_timeout(timeoutCtl.id, false); + } + PG_END_TRY(); +} + /* * General hook which runs before ExecutorEnd and collects query execution * cardinality statistics. @@ -647,7 +778,7 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) if (query_context.learn_aqo || (!query_context.learn_aqo && query_context.collect_stat)) { - aqo_obj_stat ctx = {NIL, NIL, NIL, query_context.learn_aqo}; + aqo_obj_stat ctx = {NIL, NIL, NIL, query_context.learn_aqo, false}; /* * Analyze plan if AQO need to learn or need to collect statistics only. @@ -730,6 +861,8 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) * standard_ExecutorEnd clears the queryDesc->planstate. After this point no * one operation with the plan can be made. */ + + timeoutCtl.queryDesc = NULL; } /* diff --git a/preprocessing.c b/preprocessing.c index f09e3eaa..ae992041 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -49,7 +49,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/preprocessing.c diff --git a/selectivity_cache.c b/selectivity_cache.c index b59da933..0b354ba0 100644 --- a/selectivity_cache.c +++ b/selectivity_cache.c @@ -9,7 +9,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/selectivity_cache.c diff --git a/storage.c b/storage.c index 70fa5abb..61de8a29 100644 --- a/storage.c +++ b/storage.c @@ -8,7 +8,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/storage.c @@ -23,6 +23,7 @@ #include "aqo.h" #include "preprocessing.h" +#include "learn_cache.h" HTAB *deactivated_queries = NULL; @@ -363,6 +364,23 @@ deform_oids_vector(Datum datum) return relids; } +bool +load_fss_ext(uint64 fs, int fss, + int ncols, double **matrix, double *targets, int *rows, + List **relids, bool isSafe) +{ + if (isSafe && !lc_has_fss(fs, fss)) + return load_fss(fs, fss, ncols, matrix, targets, rows, relids); + else + { + if (matrix == NULL && targets == NULL && rows == NULL) + return true; + + elog(DEBUG1, "Load ML data for fs %lu, fss %d", fs, fss); + return lc_load_fss(fs, fss, ncols, matrix, targets, rows, relids); + } +} + /* * Loads feature subspace (fss) from table aqo_data into memory. * The last column of the returned matrix is for target values of objects. @@ -448,6 +466,18 @@ load_fss(uint64 fhash, int fss_hash, return success; } +bool +update_fss_ext(uint64 fhash, int fsshash, int nrows, int ncols, + double **matrix, double *targets, List *relids, bool isTimedOut) +{ + if (!isTimedOut) + return update_fss(fhash, fsshash, nrows, ncols, matrix, targets, + relids); + else + return lc_update_fss(fhash, fsshash, nrows, ncols, matrix, targets, + relids); +} + /* * Updates the specified line in the specified feature subspace. * Returns false if the operation failed, true otherwise. diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index 29400582..a3f35191 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -98,7 +98,7 @@ "); $node->command_ok([ 'pgbench', '-t', "$TRANSACTIONS", '-c', "$CLIENTS", '-j', "$THREADS" ], - 'pgbench in disabled mode'); + 'pgbench in disabled mode - 2'); # Check: no any tuples added into the aqo_data table in this mode. $fss_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_data;"); diff --git a/utils.c b/utils.c index 34bcd2f9..8fc0d186 100644 --- a/utils.c +++ b/utils.c @@ -5,7 +5,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/utils.c From 2a2ddf9b10282c277cb2d96dc802fd2321731399 Mon Sep 17 00:00:00 2001 From: "a.pervushina" Date: Thu, 8 Sep 2022 02:28:52 +0300 Subject: [PATCH 031/134] Resolve a problem with gathering of instrumentation data on a partially executed query plan. Fix some issues. --- postprocessing.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/postprocessing.c b/postprocessing.c index 1eab3d95..ff466542 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -279,13 +279,24 @@ learn_subplan_recurse(PlanState *p, aqo_obj_stat *ctx) if (!p->instrument) return true; - if (!INSTR_TIME_IS_ZERO(p->instrument->starttime)) + if (!ctx->isTimedOut) + InstrEndLoop(p->instrument); + else if (p->instrument->running) { - Assert(ctx->isTimedOut); - InstrStopNode(p->instrument, 0); - } + /* + * We can't use node instrumentation functions because after the end + * of this timeout handler query can work for some time. + * We change ntuples and nloops to unify walking logic and because we + * know that the query execution results meaningless. + */ + p->instrument->ntuples += p->instrument->tuplecount; + p->instrument->nloops += 1; - InstrEndLoop(p->instrument); + /* + * TODO: can we simply use ExecParallelCleanup to implement gathering of + * instrument data in the case of parallel workers? + */ + } saved_subplan_list = p->subPlan; saved_initplan_list = p->initPlan; @@ -328,7 +339,7 @@ should_learn(aqo_obj_stat *ctx, double predicted, double *nrows) { if (ctx->learn && *nrows > predicted * 1.2) { - *nrows += (*nrows - predicted) * 3.; + *nrows += (*nrows - predicted) * 10.; return true; } } @@ -500,8 +511,8 @@ learnOnPlanState(PlanState *p, void *context) if (should_learn(ctx, predicted, &learn_rows)) { - if (ctx->isTimedOut) - elog(DEBUG1, "[AQO] Learn on partially executed plan node. fs: %lu, fss: %d, predicted rows: %.0lf, updated prediction: %.0lf", + if (ctx->isTimedOut && aqo_show_details) + elog(NOTICE, "[AQO] Learn on partially executed plan node. fs: %lu, fss: %d, predicted rows: %.0lf, updated prediction: %.0lf", query_context.query_hash, aqo_node->fss, predicted, learn_rows); if (IsA(p, AggState)) @@ -664,7 +675,7 @@ aqo_timeout_handler(void) ctx.learn = query_context.learn_aqo; ctx.isTimedOut = true; - elog(DEBUG1, "AQO timeout was expired. Try to learn on partial data."); + elog(NOTICE, "[AQO] Time limit for execution of the statement was expired. Try to learn on partial data."); learnOnPlanState(timeoutCtl.queryDesc->planstate, (void *) &ctx); } From 8ec446366de09a3d8df75632749fc6fcb8017a5d Mon Sep 17 00:00:00 2001 From: "a.pervushina" Date: Thu, 8 Sep 2022 02:30:22 +0300 Subject: [PATCH 032/134] An iteration of the code improvement. --- learn_cache.c | 16 +++++++++++----- storage.c | 1 - 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/learn_cache.c b/learn_cache.c index f2b59323..0feeb5dc 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -53,7 +53,7 @@ lc_init(void) ctl.entrysize = sizeof(htab_entry); ctl.hcxt = LearnCacheMemoryContext; - fss_htab = hash_create("Remote Con hash", 32, &ctl, HASH_ELEM | HASH_BLOBS); + fss_htab = hash_create("ML AQO cache", 256, &ctl, HASH_ELEM | HASH_BLOBS); } bool @@ -102,11 +102,13 @@ lc_has_fss(uint64 fs, int fss) Assert(fss_htab); (void) hash_search(fss_htab, &key, HASH_FIND, &found); - if (!found) - return false; - return true; + return found; } +/* + * Load ML data from a memory cache, not from a table. + * XXX That to do with learning tails, living in the cache? + */ bool lc_load_fss(uint64 fs, int fss, int ncols, double **matrix, double *targets, int *nrows, List **relids) @@ -122,11 +124,15 @@ lc_load_fss(uint64 fs, int fss, int ncols, double **matrix, if (!found) return false; + if (aqo_show_details) + elog(NOTICE, "[AQO] Load ML data for fs %lu, fss %d from the cache", + fs, fss); + *nrows = entry->nrows; Assert(entry->ncols == ncols); for (i = 0; i < entry->nrows; ++i) memcpy(matrix[i], entry->matrix[i], sizeof(double) * ncols); - memcpy(targets, entry->targets, sizeof(double) * entry->nrows); + memcpy(targets, entry->targets, sizeof(double) * entry->nrows); if (relids) *relids = list_copy(entry->relids); return true; diff --git a/storage.c b/storage.c index 61de8a29..88ac89af 100644 --- a/storage.c +++ b/storage.c @@ -376,7 +376,6 @@ load_fss_ext(uint64 fs, int fss, if (matrix == NULL && targets == NULL && rows == NULL) return true; - elog(DEBUG1, "Load ML data for fs %lu, fss %d", fs, fss); return lc_load_fss(fs, fss, ncols, matrix, targets, rows, relids); } } From b011f958eb23bbce7cf27575d7ad85e43ab0f2be Mon Sep 17 00:00:00 2001 From: "a.pervushina" Date: Mon, 12 Sep 2022 16:42:16 +0300 Subject: [PATCH 033/134] Hide the AQO Statement Timeout feature under a GUC. Use aqo.learn_statement_timeout to enable this feature. On more function here is to do cleanup on this cache and memory context. --- aqo.c | 13 +++++++++++++ learn_cache.c | 41 +++++++++++++++++++++++++++++++++++++++-- learn_cache.h | 3 +++ postprocessing.c | 4 ++-- storage.c | 4 +++- 5 files changed, 60 insertions(+), 5 deletions(-) diff --git a/aqo.c b/aqo.c index 2acc4729..146cca54 100644 --- a/aqo.c +++ b/aqo.c @@ -201,6 +201,19 @@ _PG_init(void) NULL ); + DefineCustomBoolVariable( + "aqo.learn_statement_timeout", + "Learn on a plan interrupted by statement timeout.", + "ML data stored in a backend cache, so it works only locally.", + &aqo_learn_statement_timeout, + false, + PGC_USERSET, + 0, + NULL, + lc_assign_hook, + NULL + ); + prev_planner_hook = planner_hook; planner_hook = aqo_planner; prev_ExecutorStart_hook = ExecutorStart_hook; diff --git a/learn_cache.c b/learn_cache.c index 0feeb5dc..bc7bf935 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -13,6 +13,7 @@ */ #include "postgres.h" +#include "miscadmin.h" #include "aqo.h" #include "learn_cache.h" @@ -39,6 +40,8 @@ typedef struct static HTAB *fss_htab = NULL; MemoryContext LearnCacheMemoryContext = NULL; +bool aqo_learn_statement_timeout = false; + void lc_init(void) { @@ -66,7 +69,7 @@ lc_update_fss(uint64 fs, int fss, int nrows, int ncols, int i; MemoryContext memctx = MemoryContextSwitchTo(LearnCacheMemoryContext); - Assert(fss_htab); + Assert(fss_htab && aqo_learn_statement_timeout); entry = (htab_entry *) hash_search(fss_htab, &key, HASH_ENTER, &found); if (found) @@ -99,6 +102,9 @@ lc_has_fss(uint64 fs, int fss) htab_key key = {fs, fss}; bool found; + if (!aqo_learn_statement_timeout) + return false; + Assert(fss_htab); (void) hash_search(fss_htab, &key, HASH_FIND, &found); @@ -118,7 +124,7 @@ lc_load_fss(uint64 fs, int fss, int ncols, double **matrix, bool found; int i; - Assert(fss_htab); + Assert(fss_htab && aqo_learn_statement_timeout); entry = (htab_entry *) hash_search(fss_htab, &key, HASH_FIND, &found); if (!found) @@ -150,6 +156,9 @@ lc_remove_fss(uint64 fs, int fss) bool found; int i; + if (!aqo_learn_statement_timeout) + return; + Assert(fss_htab); entry = (htab_entry *) hash_search(fss_htab, &key, HASH_FIND, &found); @@ -161,3 +170,31 @@ lc_remove_fss(uint64 fs, int fss) pfree(entry->targets); hash_search(fss_htab, &key, HASH_REMOVE, NULL); } + +/* + * Main purpose of this hook is to cleanup a backend cache in some way to prevent + * memory leaks - in large queries we could have many unused fss nodes. + */ +void +lc_assign_hook(bool newval, void *extra) +{ + HASH_SEQ_STATUS status; + htab_entry *entry; + + if (!fss_htab || !IsUnderPostmaster) + return; + + /* Remove all entries, reset memory context. */ + + elog(DEBUG5, "[AQO] Cleanup local cache of ML data."); + + /* Remove all frozen plans from a plancache. */ + hash_seq_init(&status, fss_htab); + while ((entry = (htab_entry *) hash_seq_search(&status)) != NULL) + { + if (!hash_search(fss_htab, (void *) &entry->key, HASH_REMOVE, NULL)) + elog(ERROR, "[AQO] The local ML cache is corrupted."); + } + + MemoryContextReset(LearnCacheMemoryContext); +} \ No newline at end of file diff --git a/learn_cache.h b/learn_cache.h index 876a106e..e597c0f1 100644 --- a/learn_cache.h +++ b/learn_cache.h @@ -3,6 +3,8 @@ #include "nodes/pg_list.h" +extern bool aqo_learn_statement_timeout; + extern void lc_init(void); extern bool lc_update_fss(uint64 fhash, int fsshash, int nrows, int ncols, double **matrix, double *targets, List *relids); @@ -11,5 +13,6 @@ extern bool lc_load_fss(uint64 fhash, int fsshash, int ncols, double **matrix, double *targets, int *nrows, List **relids); extern void lc_remove_fss(uint64 fhash, int fss_hash); +extern void lc_assign_hook(bool newval, void *extra); #endif /* LEARN_CACHE_H */ diff --git a/postprocessing.c b/postprocessing.c index ff466542..d7a6e572 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -684,7 +684,7 @@ set_timeout_if_need(QueryDesc *queryDesc) { TimestampTz fin_time; - if (!get_timeout_active(STATEMENT_TIMEOUT)) + if (!get_timeout_active(STATEMENT_TIMEOUT) || !aqo_learn_statement_timeout) return false; if (!ExtractFromQueryEnv(queryDesc)) @@ -696,7 +696,7 @@ set_timeout_if_need(QueryDesc *queryDesc) /* * Statement timeout exists. AQO should create user timeout right before the - * statement timeout. + * timeout. */ if (timeoutCtl.id < USER_TIMEOUT) diff --git a/storage.c b/storage.c index 88ac89af..5f7c44d9 100644 --- a/storage.c +++ b/storage.c @@ -369,10 +369,12 @@ load_fss_ext(uint64 fs, int fss, int ncols, double **matrix, double *targets, int *rows, List **relids, bool isSafe) { - if (isSafe && !lc_has_fss(fs, fss)) + if (isSafe && (!aqo_learn_statement_timeout || !lc_has_fss(fs, fss))) return load_fss(fs, fss, ncols, matrix, targets, rows, relids); else { + Assert(aqo_learn_statement_timeout); + if (matrix == NULL && targets == NULL && rows == NULL) return true; From fbbf5ce8ccda568158a9927602099abf4265205d Mon Sep 17 00:00:00 2001 From: "a.pervushina" Date: Mon, 12 Sep 2022 16:45:22 +0300 Subject: [PATCH 034/134] Distinguish finished and running plan nodes. --- postprocessing.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/postprocessing.c b/postprocessing.c index d7a6e572..8b9f0e3a 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -333,13 +333,34 @@ learn_subplan_recurse(PlanState *p, aqo_obj_stat *ctx) } static bool -should_learn(aqo_obj_stat *ctx, double predicted, double *nrows) +should_learn(PlanState *ps, AQOPlanNode *node, aqo_obj_stat *ctx, + double predicted, double *nrows) { if (ctx->isTimedOut) { if (ctx->learn && *nrows > predicted * 1.2) { - *nrows += (*nrows - predicted) * 10.; + /* This node s*/ + if (aqo_show_details) + elog(NOTICE, + "[AQO] Learn on a plan node (%lu, %d), " + "predicted rows: %.0lf, updated prediction: %.0lf", + query_context.query_hash, node->fss, predicted, *nrows); + + return true; + } + + /* Has the executor finished its work? */ + if (TupIsNull(ps->ps_ResultTupleSlot) && + ps->instrument->nloops > 0.) /* Node was visited by executor at least once. */ + { + /* This is much more reliable data. So we can correct our prediction. */ + if (ctx->learn && aqo_show_details && fabs(*nrows - predicted) / predicted > 0.2) + elog(NOTICE, + "[AQO] Learn on a finished plan node (%lu, %d), " + "predicted rows: %.0lf, updated prediction: %.0lf", + query_context.query_hash, node->fss, predicted, *nrows); + return true; } } @@ -509,12 +530,8 @@ learnOnPlanState(PlanState *p, void *context) { Assert(predicted >= 1. && learn_rows >= 1.); - if (should_learn(ctx, predicted, &learn_rows)) + if (should_learn(p, aqo_node, ctx, predicted, &learn_rows)) { - if (ctx->isTimedOut && aqo_show_details) - elog(NOTICE, "[AQO] Learn on partially executed plan node. fs: %lu, fss: %d, predicted rows: %.0lf, updated prediction: %.0lf", - query_context.query_hash, aqo_node->fss, predicted, learn_rows); - if (IsA(p, AggState)) learn_agg_sample(&SubplanCtx, aqo_node->relids, learn_rows, From 5a45c393477c33f92c838e03eccbcf993de4dfa1 Mon Sep 17 00:00:00 2001 From: "a.pervushina" Date: Wed, 14 Sep 2022 00:45:37 +0300 Subject: [PATCH 035/134] Add reliability factor (rfactor) into interface of learning procedures. --- aqo.c | 15 ------ aqo.h | 31 +++-------- cardinality_estimation.c | 31 +++++------ cardinality_hooks.c | 28 +++++----- learn_cache.c | 26 +++++---- learn_cache.h | 11 ++-- machine_learning.c | 97 +++++++++++++++++++-------------- machine_learning.h | 29 ++++++++++ postprocessing.c | 112 ++++++++++++++++++++------------------- storage.c | 71 ++++++++++--------------- 10 files changed, 225 insertions(+), 226 deletions(-) create mode 100644 machine_learning.h diff --git a/aqo.c b/aqo.c index 146cca54..b5520a66 100644 --- a/aqo.c +++ b/aqo.c @@ -71,21 +71,6 @@ int auto_tuning_infinite_loop = 8; /* Machine learning parameters */ -/* - * Defines where we do not perform learning procedure - */ -const double object_selection_prediction_threshold = 0.3; - -/* - * This parameter tell us that the new learning sample object has very small - * distance from one whose features stored in matrix already. - * In this case we will not to add new line in matrix, but will modify this - * nearest neighbor features and cardinality with linear smoothing by - * learning_rate coefficient. - */ -const double object_selection_threshold = 0.1; -const double learning_rate = 1e-1; - /* The number of nearest neighbors which will be chosen for ML-operations */ int aqo_k = 3; double log_selectivity_lower_bound = -30; diff --git a/aqo.h b/aqo.h index d47a855f..6f3f9018 100644 --- a/aqo.h +++ b/aqo.h @@ -144,6 +144,7 @@ #include "utils/fmgroids.h" #include "utils/snapmgr.h" +#include "machine_learning.h" /* Check PostgreSQL version (9.6.0 contains important changes in planner) */ #if PG_VERSION_NUM < 90600 @@ -237,12 +238,6 @@ extern double auto_tuning_convergence_error; /* Machine learning parameters */ -/* Max number of matrix rows - max number of possible neighbors. */ -#define aqo_K (30) - -extern const double object_selection_prediction_threshold; -extern const double object_selection_threshold; -extern const double learning_rate; extern int aqo_k; extern double log_selectivity_lower_bound; @@ -286,17 +281,13 @@ extern bool find_query(uint64 qhash, QueryContextData *ctx); extern bool update_query(uint64 qhash, uint64 fhash, bool learn_aqo, bool use_aqo, bool auto_tuning); extern bool add_query_text(uint64 query_hash, const char *query_string); -extern bool load_fss_ext(uint64 fs, int fss, - int ncols, double **matrix, double *targets, int *rows, +extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **relids, bool isSafe); -extern bool load_fss(uint64 fhash, int fss_hash, - int ncols, double **matrix, double *targets, int *rows, - List **relids); -extern bool update_fss_ext(uint64 fhash, int fsshash, int nrows, int ncols, - double **matrix, double *targets, List *relids, - bool isTimedOut); -extern bool update_fss(uint64 fhash, int fss_hash, int nrows, int ncols, - double **matrix, double *targets, List *relids); +extern bool load_fss(uint64 fhash, int fss_hash, OkNNrdata *data, List **relids); +extern bool update_fss_ext(uint64 fhash, int fsshash, OkNNrdata *data, + List *relids, bool isTimedOut); +extern bool update_fss(uint64 fhash, int fss_hash, OkNNrdata *data, + List *relids); QueryStat *get_aqo_stat(uint64 query_hash); void update_aqo_stat(uint64 query_hash, QueryStat * stat); extern bool my_index_insert(Relation indexRelation, Datum *values, bool *isnull, @@ -325,14 +316,6 @@ void aqo_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, bool execute_once); void aqo_ExecutorEnd(QueryDesc *queryDesc); -/* Machine learning techniques */ -extern double OkNNr_predict(int nrows, int ncols, - double **matrix, const double *targets, - double *features); -extern int OkNNr_learn(int matrix_rows, int matrix_cols, - double **matrix, double *targets, - double *features, double target); - /* Automatic query tuning */ extern void automatical_query_tuning(uint64 query_hash, QueryStat * stat); diff --git a/cardinality_estimation.c b/cardinality_estimation.c index e5b9f593..9bdaff5d 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -21,6 +21,7 @@ #include "aqo.h" #include "hash.h" +#include "machine_learning.h" #ifdef AQO_DEBUG_PRINT static void @@ -59,15 +60,12 @@ predict_debug_output(List *clauses, List *selectivities, */ double predict_for_relation(List *clauses, List *selectivities, - List *relids, int *fss_hash) + List *relids, int *fss) { - int nfeatures; - double *matrix[aqo_K]; - double targets[aqo_K]; - double *features; - double result; - int rows; - int i; + double *features; + double result; + int i; + OkNNrdata data; if (relids == NIL) /* @@ -76,16 +74,15 @@ predict_for_relation(List *clauses, List *selectivities, */ return -4.; - *fss_hash = get_fss_for_object(relids, clauses, - selectivities, &nfeatures, &features); + *fss = get_fss_for_object(relids, clauses, + selectivities, &data.cols, &features); - if (nfeatures > 0) + if (data.cols > 0) for (i = 0; i < aqo_K; ++i) - matrix[i] = palloc0(sizeof(**matrix) * nfeatures); + data.matrix[i] = palloc0(sizeof(double) * data.cols); - if (load_fss_ext(query_context.fspace_hash, *fss_hash, nfeatures, matrix, - targets, &rows, NULL, true)) - result = OkNNr_predict(rows, nfeatures, matrix, targets, features); + if (load_fss_ext(query_context.fspace_hash, *fss, &data, NULL, true)) + result = OkNNr_predict(&data, features); else { /* @@ -100,10 +97,10 @@ predict_for_relation(List *clauses, List *selectivities, predict_debug_output(clauses, selectivities, relids, *fss_hash, result); #endif pfree(features); - if (nfeatures > 0) + if (data.cols > 0) { for (i = 0; i < aqo_K; ++i) - pfree(matrix[i]); + pfree(data.matrix[i]); } if (result < 0) diff --git a/cardinality_hooks.c b/cardinality_hooks.c index 0e8a5a2c..b15012ca 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -30,6 +30,7 @@ #include "aqo.h" #include "cardinality_hooks.h" #include "hash.h" +#include "machine_learning.h" #include "path_utils.h" estimate_num_groups_hook_type prev_estimate_num_groups_hook = NULL; @@ -137,12 +138,12 @@ default_estimate_num_groups(PlannerInfo *root, List *groupExprs, void aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) { - double predicted; - Oid relid; - List *relids = NIL; - List *selectivities = NULL; - List *clauses; - int fss = 0; + double predicted; + Oid relid; + List *relids = NIL; + List *selectivities = NULL; + List *clauses; + int fss = 0; if (IsQueryDisabled()) /* Fast path. */ @@ -418,10 +419,9 @@ static double predict_num_groups(PlannerInfo *root, Path *subpath, List *group_exprs, int *fss) { - int child_fss = 0; - double prediction; - int rows; - double target; + int child_fss = 0; + double prediction; + OkNNrdata data; if (subpath->parent->predicted_cardinality > 0.) /* A fast path. Here we can use a fss hash of a leaf. */ @@ -438,13 +438,13 @@ predict_num_groups(PlannerInfo *root, Path *subpath, List *group_exprs, } *fss = get_grouped_exprs_hash(child_fss, group_exprs); + memset(&data, 0, sizeof(OkNNrdata)); - if (!load_fss_ext(query_context.fspace_hash, *fss, 0, NULL, - &target, &rows, NULL, true)) + if (!load_fss_ext(query_context.fspace_hash, *fss, &data, NULL, true)) return -1; - Assert(rows == 1); - prediction = exp(target); + Assert(data.rows == 1); + prediction = exp(data.targets[0]); return (prediction <= 0) ? -1 : prediction; } diff --git a/learn_cache.c b/learn_cache.c index bc7bf935..156f04a5 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -60,8 +60,7 @@ lc_init(void) } bool -lc_update_fss(uint64 fs, int fss, int nrows, int ncols, - double **matrix, double *targets, List *relids) +lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relids) { htab_key key = {fs, fss}; htab_entry *entry; @@ -81,15 +80,15 @@ lc_update_fss(uint64 fs, int fss, int nrows, int ncols, list_free(entry->relids); } - entry->nrows = nrows; - entry->ncols = ncols; + entry->nrows = data->rows; + entry->ncols = data->cols; for (i = 0; i < entry->nrows; ++i) { - entry->matrix[i] = palloc(sizeof(double) * ncols); - memcpy(entry->matrix[i], matrix[i], sizeof(double) * ncols); + entry->matrix[i] = palloc(sizeof(double) * data->cols); + memcpy(entry->matrix[i], data->matrix[i], sizeof(double) * data->cols); } - entry->targets = palloc(sizeof(double) * nrows); - memcpy(entry->targets, targets, sizeof(double) * nrows); + entry->targets = palloc(sizeof(double) * data->rows); + memcpy(entry->targets, data->targets, sizeof(double) * data->rows); entry->relids = list_copy(relids); MemoryContextSwitchTo(memctx); @@ -116,8 +115,7 @@ lc_has_fss(uint64 fs, int fss) * XXX That to do with learning tails, living in the cache? */ bool -lc_load_fss(uint64 fs, int fss, int ncols, double **matrix, - double *targets, int *nrows, List **relids) +lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **relids) { htab_key key = {fs, fss}; htab_entry *entry; @@ -134,11 +132,11 @@ lc_load_fss(uint64 fs, int fss, int ncols, double **matrix, elog(NOTICE, "[AQO] Load ML data for fs %lu, fss %d from the cache", fs, fss); - *nrows = entry->nrows; - Assert(entry->ncols == ncols); + data->rows = entry->nrows; + Assert(entry->ncols == data->cols); for (i = 0; i < entry->nrows; ++i) - memcpy(matrix[i], entry->matrix[i], sizeof(double) * ncols); - memcpy(targets, entry->targets, sizeof(double) * entry->nrows); + memcpy(data->matrix[i], entry->matrix[i], sizeof(double) * data->cols); + memcpy(data->targets, entry->targets, sizeof(double) * entry->nrows); if (relids) *relids = list_copy(entry->relids); return true; diff --git a/learn_cache.h b/learn_cache.h index e597c0f1..52e4bec2 100644 --- a/learn_cache.h +++ b/learn_cache.h @@ -3,15 +3,16 @@ #include "nodes/pg_list.h" +#include "machine_learning.h" + extern bool aqo_learn_statement_timeout; extern void lc_init(void); -extern bool lc_update_fss(uint64 fhash, int fsshash, int nrows, int ncols, - double **matrix, double *targets, List *relids); +extern bool lc_update_fss(uint64 fhash, int fsshash, OkNNrdata *data, + List *relids); extern bool lc_has_fss(uint64 fhash, int fss); -extern bool lc_load_fss(uint64 fhash, int fsshash, int ncols, - double **matrix, double *targets, int *nrows, - List **relids); +extern bool lc_load_fss(uint64 fhash, int fsshash, OkNNrdata *data, + List **relids); extern void lc_remove_fss(uint64 fhash, int fss_hash); extern void lc_assign_hook(bool newval, void *extra); diff --git a/machine_learning.c b/machine_learning.c index 91c72d3e..380c9e42 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -22,6 +22,19 @@ #include "postgres.h" #include "aqo.h" +#include "machine_learning.h" + + +/* + * This parameter tell us that the new learning sample object has very small + * distance from one whose features stored in matrix already. + * In this case we will not to add new line in matrix, but will modify this + * nearest neighbor features and cardinality with linear smoothing by + * learning_rate coefficient. + */ +const double object_selection_threshold = 0.1; +const double learning_rate = 1e-1; + static double fs_distance(double *a, double *b, int len); static double fs_similarity(double dist); @@ -31,7 +44,7 @@ static double compute_weights(double *distances, int nrows, double *w, int *idx) /* * Computes L2-distance between two given vectors. */ -double +static double fs_distance(double *a, double *b, int len) { double res = 0; @@ -47,7 +60,7 @@ fs_distance(double *a, double *b, int len) /* * Returns similarity between objects based on distance between them. */ -double +static double fs_similarity(double dist) { return 1.0 / (0.001 + dist); @@ -60,7 +73,7 @@ fs_similarity(double dist) * Appeared as a separate function because of "don't repeat your code" * principle. */ -double +static double compute_weights(double *distances, int nrows, double *w, int *idx) { int i, @@ -103,31 +116,30 @@ compute_weights(double *distances, int nrows, double *w, int *idx) * positive targets are assumed. */ double -OkNNr_predict(int nrows, int ncols, double **matrix, const double *targets, - double *features) +OkNNr_predict(OkNNrdata *data, double *features) { double distances[aqo_K]; int i; int idx[aqo_K]; /* indexes of nearest neighbors */ double w[aqo_K]; double w_sum; - double result = 0; + double result = 0.; - for (i = 0; i < nrows; ++i) - distances[i] = fs_distance(matrix[i], features, ncols); + for (i = 0; i < data->rows; ++i) + distances[i] = fs_distance(data->matrix[i], features, data->cols); - w_sum = compute_weights(distances, nrows, w, idx); + w_sum = compute_weights(distances, data->rows, w, idx); for (i = 0; i < aqo_k; ++i) if (idx[i] != -1) - result += targets[idx[i]] * w[i] / w_sum; + result += data->targets[idx[i]] * w[i] / w_sum; - if (result < 0) - result = 0; + if (result < 0.) + result = 0.; /* this should never happen */ if (idx[0] == -1) - result = -1; + result = -1.; return result; } @@ -139,23 +151,26 @@ OkNNr_predict(int nrows, int ncols, double **matrix, const double *targets, * updates this line in database, otherwise adds new line with given index. * It is supposed that indexes of new lines are consequent numbers * starting from matrix_rows. + * reliability: 1 - value after normal end of a query; 0.1 - data from partially + * executed node (we don't want this part); 0.9 - from finished node, but + * partially executed statement. */ int -OkNNr_learn(int nrows, int nfeatures, double **matrix, double *targets, - double *features, double target) +OkNNr_learn(OkNNrdata *data, + double *features, double target, double rfactor) { - double distances[aqo_K]; - int i, - j; - int mid = 0; /* index of row with minimum distance value */ - int idx[aqo_K]; + double distances[aqo_K]; + int i; + int j; + int mid = 0; /* index of row with minimum distance value */ + int idx[aqo_K]; /* * For each neighbor compute distance and search for nearest object. */ - for (i = 0; i < nrows; ++i) + for (i = 0; i < data->rows; ++i) { - distances[i] = fs_distance(matrix[i], features, nfeatures); + distances[i] = fs_distance(data->matrix[i], features, data->cols); if (distances[i] < distances[mid]) mid = i; } @@ -165,16 +180,16 @@ OkNNr_learn(int nrows, int nfeatures, double **matrix, double *targets, * replace data for the neighbor to avoid some fluctuations. * We will change it's row with linear smoothing by learning_rate. */ - if (nrows > 0 && distances[mid] < object_selection_threshold) + if (data->rows > 0 && distances[mid] < object_selection_threshold) { - for (j = 0; j < nfeatures; ++j) - matrix[mid][j] += learning_rate * (features[j] - matrix[mid][j]); - targets[mid] += learning_rate * (target - targets[mid]); + for (j = 0; j < data->cols; ++j) + data->matrix[mid][j] += learning_rate * (features[j] - data->matrix[mid][j]); + data->targets[mid] += learning_rate * (target - data->targets[mid]); - return nrows; + return data->rows; } - if (nrows < aqo_K) + if (data->rows < aqo_K) { /* We can't reached limit of stored neighbors */ @@ -182,11 +197,12 @@ OkNNr_learn(int nrows, int nfeatures, double **matrix, double *targets, * Add new line into the matrix. We can do this because matrix_rows * is not the boundary of matrix. Matrix has aqo_K free lines */ - for (j = 0; j < nfeatures; ++j) - matrix[nrows][j] = features[j]; - targets[nrows] = target; + for (j = 0; j < data->cols; ++j) + data->matrix[data->rows][j] = features[j]; + data->targets[data->rows] = target; + data->rfactors[data->rows] = rfactor; - return nrows+1; + return data->rows + 1; } else { @@ -208,7 +224,7 @@ OkNNr_learn(int nrows, int nfeatures, double **matrix, double *targets, * idx array. Compute weight for each nearest neighbor and total weight * of all nearest neighbor. */ - w_sum = compute_weights(distances, nrows, w, idx); + w_sum = compute_weights(distances, data->rows, w, idx); /* * Compute average value for target by nearest neighbors. We need to @@ -216,26 +232,27 @@ OkNNr_learn(int nrows, int nfeatures, double **matrix, double *targets, * neighbors than aqo_k. * Semantics of coef1: it is defined distance between new object and * this superposition value (with linear smoothing). + * fc_coef - feature changing rate. * */ for (i = 0; i < aqo_k && idx[i] != -1; ++i) - avg_target += targets[idx[i]] * w[i] / w_sum; + avg_target += data->targets[idx[i]] * w[i] / w_sum; tc_coef = learning_rate * (avg_target - target); /* Modify targets and features of each nearest neighbor row. */ for (i = 0; i < aqo_k && idx[i] != -1; ++i) { - fc_coef = tc_coef * (targets[idx[i]] - avg_target) * w[i] * w[i] / - sqrt(nfeatures) / w_sum; + fc_coef = tc_coef * (data->targets[idx[i]] - avg_target) * w[i] * w[i] / + sqrt(data->cols) / w_sum; - targets[idx[i]] -= tc_coef * w[i] / w_sum; - for (j = 0; j < nfeatures; ++j) + data->targets[idx[i]] -= tc_coef * w[i] / w_sum; + for (j = 0; j < data->cols; ++j) { - feature = matrix[idx[i]]; + feature = data->matrix[idx[i]]; feature[j] -= fc_coef * (features[j] - feature[j]) / distances[idx[i]]; } } } - return nrows; + return data->rows; } diff --git a/machine_learning.h b/machine_learning.h new file mode 100644 index 00000000..a09b3102 --- /dev/null +++ b/machine_learning.h @@ -0,0 +1,29 @@ +#ifndef MACHINE_LEARNING_H +#define MACHINE_LEARNING_H + +/* Max number of matrix rows - max number of possible neighbors. */ +#define aqo_K (30) + +extern const double object_selection_threshold; +extern const double learning_rate; + +#define RELIABILITY_MIN (0.1) +#define RELIABILITY_MAX (1.0) + +typedef struct OkNNrdata +{ + int rows; /* Number of filled rows in the matrix */ + int cols; /* Number of columns in the matrix */ + + double *matrix[aqo_K]; /* Contains the matrix - learning data for the same + * value of (fs, fss), but different features. */ + double targets[aqo_K]; /* Right side of the equations system */ + double rfactors[aqo_K]; +} OkNNrdata; + +/* Machine learning techniques */ +extern double OkNNr_predict(OkNNrdata *data, double *features); +extern int OkNNr_learn(OkNNrdata *data, + double *features, double target, double rfactor); + +#endif /* MACHINE_LEARNING_H */ diff --git a/postprocessing.c b/postprocessing.c index 8b9f0e3a..9a8ab192 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -26,6 +26,7 @@ #include "aqo.h" #include "hash.h" #include "path_utils.h" +#include "machine_learning.h" #include "preprocessing.h" #include "learn_cache.h" @@ -55,16 +56,17 @@ static char *PlanStateInfo = "PlanStateInfo"; /* Query execution statistics collecting utilities */ -static void atomic_fss_learn_step(uint64 fhash, int fss_hash, int ncols, - double **matrix, double *targets, - double *features, double target, +static void atomic_fss_learn_step(uint64 fhash, int fss_hash, OkNNrdata *data, + double *features, + double target, double rfactor, List *relids, bool isTimedOut); static bool learnOnPlanState(PlanState *p, void *context); static void learn_agg_sample(aqo_obj_stat *ctx, List *relidslist, - double true_cardinality, Plan *plan, + double learned, double rfactor, Plan *plan, bool notExecuted); static void learn_sample(aqo_obj_stat *ctx, List *relidslist, - double true_cardinality, Plan *plan, bool notExecuted); + double learned, double rfactor, + Plan *plan, bool notExecuted); static List *restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, @@ -87,39 +89,35 @@ static bool ExtractFromQueryEnv(QueryDesc *queryDesc); * matrix and targets are just preallocated memory for computations. */ static void -atomic_fss_learn_step(uint64 fhash, int fss_hash, int ncols, - double **matrix, double *targets, - double *features, double target, +atomic_fss_learn_step(uint64 fs, int fss, OkNNrdata *data, + double *features, double target, double rfactor, List *relids, bool isTimedOut) { - LOCKTAG tag; - int nrows; + LOCKTAG tag; - init_lock_tag(&tag, fhash, fss_hash); + init_lock_tag(&tag, fs, fss); LockAcquire(&tag, ExclusiveLock, false, false); - if (!load_fss_ext(fhash, fss_hash, ncols, matrix, targets, &nrows, NULL, !isTimedOut)) - nrows = 0; + if (!load_fss_ext(fs, fss, data, NULL, !isTimedOut)) + data->rows = 0; - nrows = OkNNr_learn(nrows, ncols, matrix, targets, features, target); - update_fss_ext(fhash, fss_hash, nrows, ncols, matrix, targets, relids, - isTimedOut); + data->rows = OkNNr_learn(data, features, target, rfactor); + update_fss_ext(fs, fss, data, relids, isTimedOut); LockRelease(&tag, ExclusiveLock, false); } static void learn_agg_sample(aqo_obj_stat *ctx, List *relidslist, - double true_cardinality, Plan *plan, bool notExecuted) + double learned, double rfactor, Plan *plan, bool notExecuted) { - uint64 fhash = query_context.fspace_hash; - int child_fss; - int fss; - double target; - double *matrix[aqo_K]; - double targets[aqo_K]; - AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); - int i; + AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); + uint64 fhash = query_context.fspace_hash; + int child_fss; + double target; + OkNNrdata data; + int fss; + int i; /* * Learn 'not executed' nodes only once, if no one another knowledge exists @@ -128,16 +126,17 @@ learn_agg_sample(aqo_obj_stat *ctx, List *relidslist, if (notExecuted && aqo_node->prediction > 0.) return; - target = log(true_cardinality); + target = log(learned); child_fss = get_fss_for_object(relidslist, ctx->clauselist, NIL, NULL, NULL); fss = get_grouped_exprs_hash(child_fss, aqo_node->grouping_exprs); + memset(&data, 0, sizeof(OkNNrdata)); for (i = 0; i < aqo_K; i++) - matrix[i] = NULL; + data.matrix[i] = NULL; + /* Critical section */ - atomic_fss_learn_step(fhash, fss, - 0, matrix, targets, NULL, target, - relidslist, ctx->isTimedOut); + atomic_fss_learn_step(fhash, fss, &data, NULL, + target, rfactor, relidslist, ctx->isTimedOut); /* End of critical section */ } @@ -147,21 +146,20 @@ learn_agg_sample(aqo_obj_stat *ctx, List *relidslist, */ static void learn_sample(aqo_obj_stat *ctx, List *relidslist, - double true_cardinality, Plan *plan, bool notExecuted) + double learned, double rfactor, Plan *plan, bool notExecuted) { - uint64 fhash = query_context.fspace_hash; - int fss_hash; - int nfeatures; - double *matrix[aqo_K]; - double targets[aqo_K]; - double *features; - double target; - int i; - AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); - - target = log(true_cardinality); - fss_hash = get_fss_for_object(relidslist, ctx->clauselist, - ctx->selectivities, &nfeatures, &features); + AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); + uint64 fs = query_context.fspace_hash; + double *features; + double target; + OkNNrdata data; + int fss; + int i; + + memset(&data, 0, sizeof(OkNNrdata)); + target = log(learned); + fss = get_fss_for_object(relidslist, ctx->clauselist, + ctx->selectivities, &data.cols, &features); /* Only Agg nodes can have non-empty a grouping expressions list. */ Assert(!IsA(plan, Agg) || aqo_node->grouping_exprs != NIL); @@ -173,19 +171,18 @@ learn_sample(aqo_obj_stat *ctx, List *relidslist, if (notExecuted && aqo_node->prediction > 0) return; - if (nfeatures > 0) + if (data.cols > 0) for (i = 0; i < aqo_K; ++i) - matrix[i] = palloc(sizeof(double) * nfeatures); + data.matrix[i] = palloc(sizeof(double) * data.cols); /* Critical section */ - atomic_fss_learn_step(fhash, fss_hash, - nfeatures, matrix, targets, features, target, + atomic_fss_learn_step(fs, fss, &data, features, target, rfactor, relidslist, ctx->isTimedOut); /* End of critical section */ - if (nfeatures > 0) + if (data.cols > 0) for (i = 0; i < aqo_K; ++i) - pfree(matrix[i]); + pfree(data.matrix[i]); pfree(features); } @@ -334,7 +331,7 @@ learn_subplan_recurse(PlanState *p, aqo_obj_stat *ctx) static bool should_learn(PlanState *ps, AQOPlanNode *node, aqo_obj_stat *ctx, - double predicted, double *nrows) + double predicted, double *nrows, double *rfactor) { if (ctx->isTimedOut) { @@ -347,6 +344,7 @@ should_learn(PlanState *ps, AQOPlanNode *node, aqo_obj_stat *ctx, "predicted rows: %.0lf, updated prediction: %.0lf", query_context.query_hash, node->fss, predicted, *nrows); + *rfactor = RELIABILITY_MIN; return true; } @@ -361,11 +359,15 @@ should_learn(PlanState *ps, AQOPlanNode *node, aqo_obj_stat *ctx, "predicted rows: %.0lf, updated prediction: %.0lf", query_context.query_hash, node->fss, predicted, *nrows); + *rfactor = 0.9 * (RELIABILITY_MAX - RELIABILITY_MIN); return true; } } else if (ctx->learn) + { + *rfactor = RELIABILITY_MAX; return true; + } return false; } @@ -528,18 +530,20 @@ learnOnPlanState(PlanState *p, void *context) if (p->instrument) { + double rfactor = 1.; + Assert(predicted >= 1. && learn_rows >= 1.); - if (should_learn(p, aqo_node, ctx, predicted, &learn_rows)) + if (should_learn(p, aqo_node, ctx, predicted, &learn_rows, &rfactor)) { if (IsA(p, AggState)) learn_agg_sample(&SubplanCtx, - aqo_node->relids, learn_rows, + aqo_node->relids, learn_rows, rfactor, p->plan, notExecuted); else learn_sample(&SubplanCtx, - aqo_node->relids, learn_rows, + aqo_node->relids, learn_rows, rfactor, p->plan, notExecuted); if (!ctx->isTimedOut) diff --git a/storage.c b/storage.c index 5f7c44d9..37ccfa16 100644 --- a/storage.c +++ b/storage.c @@ -22,6 +22,7 @@ #include "access/tableam.h" #include "aqo.h" +#include "machine_learning.h" #include "preprocessing.h" #include "learn_cache.h" @@ -347,11 +348,11 @@ form_oids_vector(List *relids) static List * deform_oids_vector(Datum datum) { - ArrayType *array = DatumGetArrayTypePCopy(PG_DETOAST_DATUM(datum)); - Datum *values; + ArrayType *array = DatumGetArrayTypePCopy(PG_DETOAST_DATUM(datum)); + Datum *values; int i; int nelems = 0; - List *relids = NIL; + List *relids = NIL; deconstruct_array(array, OIDOID, sizeof(Oid), true, TYPALIGN_INT, @@ -365,20 +366,14 @@ deform_oids_vector(Datum datum) } bool -load_fss_ext(uint64 fs, int fss, - int ncols, double **matrix, double *targets, int *rows, - List **relids, bool isSafe) +load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **relids, bool isSafe) { if (isSafe && (!aqo_learn_statement_timeout || !lc_has_fss(fs, fss))) - return load_fss(fs, fss, ncols, matrix, targets, rows, relids); + return load_fss(fs, fss, data, relids); else { Assert(aqo_learn_statement_timeout); - - if (matrix == NULL && targets == NULL && rows == NULL) - return true; - - return lc_load_fss(fs, fss, ncols, matrix, targets, rows, relids); + return lc_load_fss(fs, fss, data, relids); } } @@ -397,9 +392,7 @@ load_fss_ext(uint64 fs, int fss, * objects in the given feature space */ bool -load_fss(uint64 fhash, int fss_hash, - int ncols, double **matrix, double *targets, int *rows, - List **relids) +load_fss(uint64 fs, int fss, OkNNrdata *data, List **relids) { Relation hrel; Relation irel; @@ -419,33 +412,28 @@ load_fss(uint64 fhash, int fss_hash, return false; scan = index_beginscan(hrel, irel, SnapshotSelf, 2, 0); - ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(fhash)); - ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fss_hash)); + ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(fs)); + ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fss)); index_rescan(scan, key, 2, NULL, 0); slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); find_ok = index_getnext_slot(scan, ForwardScanDirection, slot); - if (matrix == NULL && targets == NULL && rows == NULL) - { - /* Just check availability */ - success = find_ok; - } - else if (find_ok) + if (find_ok) { tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); Assert(shouldFree != true); heap_deform_tuple(tuple, hrel->rd_att, values, isnull); - if (DatumGetInt32(values[2]) == ncols) + if (DatumGetInt32(values[2]) == data->cols) { - if (ncols > 0) + if (data->cols > 0) /* * The case than an object has not any filters and selectivities */ - deform_matrix(values[3], matrix); + deform_matrix(values[3], data->matrix); - deform_vector(values[4], targets, rows); + deform_vector(values[4], data->targets, &(data->rows)); if (relids != NULL) *relids = deform_oids_vector(values[5]); @@ -454,7 +442,7 @@ load_fss(uint64 fhash, int fss_hash, elog(ERROR, "unexpected number of features for hash (" \ UINT64_FORMAT", %d):\ expected %d features, obtained %d", - fhash, fss_hash, ncols, DatumGetInt32(values[2])); + fs, fss, ncols, DatumGetInt32(values[2])); } else success = false; @@ -468,15 +456,13 @@ load_fss(uint64 fhash, int fss_hash, } bool -update_fss_ext(uint64 fhash, int fsshash, int nrows, int ncols, - double **matrix, double *targets, List *relids, bool isTimedOut) +update_fss_ext(uint64 fs, int fsshash, OkNNrdata *data, List *relids, + bool isTimedOut) { if (!isTimedOut) - return update_fss(fhash, fsshash, nrows, ncols, matrix, targets, - relids); + return update_fss(fs, fsshash, data, relids); else - return lc_update_fss(fhash, fsshash, nrows, ncols, matrix, targets, - relids); + return lc_update_fss(fs, fsshash, data, relids); } /* @@ -492,8 +478,7 @@ update_fss_ext(uint64 fhash, int fsshash, int nrows, int ncols, * Caller guaranteed that no one AQO process insert or update this data row. */ bool -update_fss(uint64 fhash, int fsshash, int nrows, int ncols, - double **matrix, double *targets, List *relids) +update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids) { Relation hrel; Relation irel; @@ -537,14 +522,14 @@ update_fss(uint64 fhash, int fsshash, int nrows, int ncols, { values[0] = Int64GetDatum(fhash); values[1] = Int32GetDatum(fsshash); - values[2] = Int32GetDatum(ncols); + values[2] = Int32GetDatum(data->cols); - if (ncols > 0) - values[3] = PointerGetDatum(form_matrix(matrix, nrows, ncols)); + if (data->cols > 0) + values[3] = PointerGetDatum(form_matrix(data->matrix, data->rows, data->cols)); else isnull[3] = true; - values[4] = PointerGetDatum(form_vector(targets, nrows)); + values[4] = PointerGetDatum(form_vector(data->targets, data->rows)); /* Form array of relids. Only once. */ values[5] = PointerGetDatum(form_oids_vector(relids)); @@ -567,12 +552,12 @@ update_fss(uint64 fhash, int fsshash, int nrows, int ncols, Assert(shouldFree != true); heap_deform_tuple(tuple, hrel->rd_att, values, isnull); - if (ncols > 0) - values[3] = PointerGetDatum(form_matrix(matrix, nrows, ncols)); + if (data->cols > 0) + values[3] = PointerGetDatum(form_matrix(data->matrix, data->rows, data->cols)); else isnull[3] = true; - values[4] = PointerGetDatum(form_vector(targets, nrows)); + values[4] = PointerGetDatum(form_vector(data->targets, data->rows)); nw_tuple = heap_modify_tuple(tuple, tupDesc, values, isnull, replace); if (my_simple_heap_update(hrel, &(nw_tuple->t_self), nw_tuple, From 805b729e8e912b422cc207d9fb897a8c495759b6 Mon Sep 17 00:00:00 2001 From: "a.pervushina" Date: Wed, 14 Sep 2022 00:50:22 +0300 Subject: [PATCH 036/134] Introduce AQO v.1.4. Add reliability field into the aqo_data table. --- Makefile | 4 ++-- aqo--1.3--1.4.sql | 6 ++++++ aqo.control | 2 +- expected/forced_stat_collection.out | 4 ++-- learn_cache.c | 10 ++++++---- machine_learning.c | 9 ++++----- storage.c | 18 +++++++++++------- 7 files changed, 32 insertions(+), 21 deletions(-) create mode 100755 aqo--1.3--1.4.sql diff --git a/Makefile b/Makefile index 2845854f..766c98ca 100755 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # contrib/aqo/Makefile EXTENSION = aqo -EXTVERSION = 1.2 +EXTVERSION = 1.4 PGFILEDESC = "AQO - Adaptive Query Optimization" MODULE_big = aqo OBJS = aqo.o auto_tuning.o cardinality_estimation.o cardinality_hooks.o \ @@ -32,7 +32,7 @@ EXTRA_REGRESS_OPTS=--temp-config=$(top_srcdir)/$(subdir)/conf.add EXTRA_INSTALL = contrib/postgres_fdw contrib/pg_stat_statements DATA = aqo--1.0.sql aqo--1.0--1.1.sql aqo--1.1--1.2.sql aqo--1.2.sql \ - aqo--1.2--1.3.sql + aqo--1.2--1.3.sql aqo--1.3--1.4.sql ifdef USE_PGXS PG_CONFIG ?= pg_config diff --git a/aqo--1.3--1.4.sql b/aqo--1.3--1.4.sql new file mode 100755 index 00000000..517a6911 --- /dev/null +++ b/aqo--1.3--1.4.sql @@ -0,0 +1,6 @@ +/* contrib/aqo/aqo--1.3--1.4.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION aqo UPDATE TO '1.4'" to load this file. \quit + +ALTER TABLE public.aqo_data ADD COLUMN reliability double precision []; diff --git a/aqo.control b/aqo.control index 14bb3b50..dfdd815d 100644 --- a/aqo.control +++ b/aqo.control @@ -1,5 +1,5 @@ # AQO extension comment = 'machine learning for cardinality estimation in optimizer' -default_version = '1.3' +default_version = '1.4' module_pathname = '$libdir/aqo' relocatable = false diff --git a/expected/forced_stat_collection.out b/expected/forced_stat_collection.out index 229a2636..5c05d499 100644 --- a/expected/forced_stat_collection.out +++ b/expected/forced_stat_collection.out @@ -36,8 +36,8 @@ SELECT count(*) FROM person WHERE age<18 AND passport IS NOT NULL; (1 row) SELECT * FROM aqo_data; - fspace_hash | fsspace_hash | nfeatures | features | targets | oids --------------+--------------+-----------+----------+---------+------ + fspace_hash | fsspace_hash | nfeatures | features | targets | oids | reliability +-------------+--------------+-----------+----------+---------+------+------------- (0 rows) CREATE OR REPLACE FUNCTION round_array (double precision[]) diff --git a/learn_cache.c b/learn_cache.c index 156f04a5..471ea058 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -33,7 +33,8 @@ typedef struct int nrows; int ncols; double *matrix[aqo_K]; - double *targets; + double targets[aqo_K]; + double rfactors[aqo_K]; List *relids; } htab_entry; @@ -76,7 +77,6 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relids) /* Clear previous version of the cached data. */ for (i = 0; i < entry->nrows; ++i) pfree(entry->matrix[i]); - pfree(entry->targets); list_free(entry->relids); } @@ -87,8 +87,9 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relids) entry->matrix[i] = palloc(sizeof(double) * data->cols); memcpy(entry->matrix[i], data->matrix[i], sizeof(double) * data->cols); } - entry->targets = palloc(sizeof(double) * data->rows); + memcpy(entry->targets, data->targets, sizeof(double) * data->rows); + memcpy(entry->rfactors, data->rfactors, sizeof(double) * data->rows); entry->relids = list_copy(relids); MemoryContextSwitchTo(memctx); @@ -137,6 +138,7 @@ lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **relids) for (i = 0; i < entry->nrows; ++i) memcpy(data->matrix[i], entry->matrix[i], sizeof(double) * data->cols); memcpy(data->targets, entry->targets, sizeof(double) * entry->nrows); + memcpy(data->rfactors, entry->rfactors, sizeof(double) * entry->nrows); if (relids) *relids = list_copy(entry->relids); return true; @@ -165,7 +167,7 @@ lc_remove_fss(uint64 fs, int fss) for (i = 0; i < entry->nrows; ++i) pfree(entry->matrix[i]); - pfree(entry->targets); + hash_search(fss_htab, &key, HASH_REMOVE, NULL); } diff --git a/machine_learning.c b/machine_learning.c index 380c9e42..d0683334 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -156,8 +156,7 @@ OkNNr_predict(OkNNrdata *data, double *features) * partially executed statement. */ int -OkNNr_learn(OkNNrdata *data, - double *features, double target, double rfactor) +OkNNr_learn(OkNNrdata *data, double *features, double target, double rfactor) { double distances[aqo_K]; int i; @@ -191,10 +190,10 @@ OkNNr_learn(OkNNrdata *data, if (data->rows < aqo_K) { - /* We can't reached limit of stored neighbors */ + /* We don't reach a limit of stored neighbors */ /* - * Add new line into the matrix. We can do this because matrix_rows + * Add new line into the matrix. We can do this because data->rows * is not the boundary of matrix. Matrix has aqo_K free lines */ for (j = 0; j < data->cols; ++j) @@ -206,7 +205,7 @@ OkNNr_learn(OkNNrdata *data, } else { - double *feature; + double *feature; double avg_target = 0; double tc_coef; /* Target correction coefficient */ double fc_coef; /* Feature correction coefficient */ diff --git a/storage.c b/storage.c index 37ccfa16..efedac1d 100644 --- a/storage.c +++ b/storage.c @@ -27,6 +27,7 @@ #include "learn_cache.h" +#define AQO_DATA_COLUMNS (7) HTAB *deactivated_queries = NULL; static ArrayType *form_matrix(double **matrix, int nrows, int ncols); @@ -402,8 +403,8 @@ load_fss(uint64 fs, int fss, OkNNrdata *data, List **relids) bool find_ok = false; IndexScanDesc scan; ScanKeyData key[2]; - Datum values[6]; - bool isnull[6]; + Datum values[AQO_DATA_COLUMNS]; + bool isnull[AQO_DATA_COLUMNS]; bool success = true; if (!open_aqo_relation("public", "aqo_data", @@ -434,6 +435,7 @@ load_fss(uint64 fs, int fss, OkNNrdata *data, List **relids) deform_matrix(values[3], data->matrix); deform_vector(values[4], data->targets, &(data->rows)); + deform_vector(values[6], data->rfactors, &(data->rows)); if (relids != NULL) *relids = deform_oids_vector(values[5]); @@ -487,9 +489,9 @@ update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids) TupleDesc tupDesc; HeapTuple tuple, nw_tuple; - Datum values[6]; - bool isnull[6] = { false, false, false, false, false, false }; - bool replace[6] = { false, false, false, true, true, false }; + Datum values[AQO_DATA_COLUMNS]; + bool isnull[AQO_DATA_COLUMNS]; + bool replace[AQO_DATA_COLUMNS] = { false, false, false, true, true, false, true }; bool shouldFree; bool find_ok = false; bool update_indexes; @@ -506,6 +508,7 @@ update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids) RowExclusiveLock, &hrel, &irel)) return false; + memset(isnull, 0, sizeof(bool) * AQO_DATA_COLUMNS); tupDesc = RelationGetDescr(hrel); InitDirtySnapshot(snap); scan = index_beginscan(hrel, irel, &snap, 2, 0); @@ -535,6 +538,7 @@ update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids) values[5] = PointerGetDatum(form_oids_vector(relids)); if ((void *) values[5] == NULL) isnull[5] = true; + values[6] = PointerGetDatum(form_vector(data->rfactors, data->rows)); tuple = heap_form_tuple(tupDesc, values, isnull); /* @@ -558,8 +562,8 @@ update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids) isnull[3] = true; values[4] = PointerGetDatum(form_vector(data->targets, data->rows)); - nw_tuple = heap_modify_tuple(tuple, tupDesc, - values, isnull, replace); + values[6] = PointerGetDatum(form_vector(data->rfactors, data->rows)); + nw_tuple = heap_modify_tuple(tuple, tupDesc, values, isnull, replace); if (my_simple_heap_update(hrel, &(nw_tuple->t_self), nw_tuple, &update_indexes)) { From d173d454e34f01279795db00cd18cfaa5442098c Mon Sep 17 00:00:00 2001 From: "a.pervushina" Date: Wed, 14 Sep 2022 00:51:06 +0300 Subject: [PATCH 037/134] Add reliability into the ML model. --- machine_learning.c | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/machine_learning.c b/machine_learning.c index d0683334..1894a266 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -181,9 +181,21 @@ OkNNr_learn(OkNNrdata *data, double *features, double target, double rfactor) */ if (data->rows > 0 && distances[mid] < object_selection_threshold) { + double lr = learning_rate * rfactor / data->rfactors[mid]; + + if (lr > 1.) + { + elog(WARNING, "[AQO] Something goes wrong in the ML core: learning rate = %lf", lr); + lr = 1.; + } + + Assert(lr > 0.); + Assert(data->rfactors[mid] > 0. && data->rfactors[mid] <= 1.); + for (j = 0; j < data->cols; ++j) - data->matrix[mid][j] += learning_rate * (features[j] - data->matrix[mid][j]); - data->targets[mid] += learning_rate * (target - data->targets[mid]); + data->matrix[mid][j] += lr * (features[j] - data->matrix[mid][j]); + data->targets[mid] += lr * (target - data->targets[mid]); + data->rfactors[mid] += lr * (rfactor - data->rfactors[mid]); return data->rows; } @@ -229,7 +241,7 @@ OkNNr_learn(OkNNrdata *data, double *features, double target, double rfactor) * Compute average value for target by nearest neighbors. We need to * check idx[i] != -1 because we may have smaller value of nearest * neighbors than aqo_k. - * Semantics of coef1: it is defined distance between new object and + * Semantics of tc_coef: it is defined distance between new object and * this superposition value (with linear smoothing). * fc_coef - feature changing rate. * */ @@ -240,10 +252,21 @@ OkNNr_learn(OkNNrdata *data, double *features, double target, double rfactor) /* Modify targets and features of each nearest neighbor row. */ for (i = 0; i < aqo_k && idx[i] != -1; ++i) { - fc_coef = tc_coef * (data->targets[idx[i]] - avg_target) * w[i] * w[i] / - sqrt(data->cols) / w_sum; + double lr = learning_rate * rfactor / data->rfactors[mid]; + + if (lr > 1.) + { + elog(WARNING, "[AQO] Something goes wrong in the ML core: learning rate = %lf", lr); + lr = 1.; + } + + Assert(lr > 0.); + Assert(data->rfactors[mid] > 0. && data->rfactors[mid] <= 1.); + + fc_coef = tc_coef * lr * (data->targets[idx[i]] - avg_target) * + w[i] * w[i] / sqrt(data->cols) / w_sum; - data->targets[idx[i]] -= tc_coef * w[i] / w_sum; + data->targets[idx[i]] -= tc_coef * lr * w[i] / w_sum; for (j = 0; j < data->cols; ++j) { feature = data->matrix[idx[i]]; From 447108f28ee79aed955bb87384ffc534e4959c54 Mon Sep 17 00:00:00 2001 From: "a.pervushina" Date: Wed, 14 Sep 2022 00:52:33 +0300 Subject: [PATCH 038/134] Add basic code for support of DSM cache. --- Makefile | 2 +- aqo.c | 9 +++++++- aqo_shared.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++ aqo_shared.h | 22 +++++++++++++++++++ 4 files changed, 92 insertions(+), 2 deletions(-) create mode 100644 aqo_shared.c create mode 100644 aqo_shared.h diff --git a/Makefile b/Makefile index 766c98ca..d5dfd1c5 100755 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ PGFILEDESC = "AQO - Adaptive Query Optimization" MODULE_big = aqo OBJS = aqo.o auto_tuning.o cardinality_estimation.o cardinality_hooks.o \ hash.o machine_learning.o path_utils.o postprocessing.o preprocessing.o \ -selectivity_cache.o storage.o utils.o learn_cache.o $(WIN32RES) +selectivity_cache.o storage.o utils.o learn_cache.o aqo_shared.o $(WIN32RES) TAP_TESTS = 1 diff --git a/aqo.c b/aqo.c index b5520a66..35bff2e8 100644 --- a/aqo.c +++ b/aqo.c @@ -18,6 +18,7 @@ #include "utils/selfuncs.h" #include "aqo.h" +#include "aqo_shared.h" #include "cardinality_hooks.h" #include "path_utils.h" #include "preprocessing.h" @@ -126,7 +127,7 @@ _PG_init(void) { /* * In order to create our shared memory area, we have to be loaded via - * shared_preload_libraries. If not, report an ERROR. + * shared_preload_libraries. If not, report an ERROR. */ if (!process_shared_preload_libraries_in_progress) ereport(ERROR, @@ -199,6 +200,8 @@ _PG_init(void) NULL ); + prev_shmem_startup_hook = shmem_startup_hook; + shmem_startup_hook = aqo_init_shmem; prev_planner_hook = planner_hook; planner_hook = aqo_planner; prev_ExecutorStart_hook = ExecutorStart_hook; @@ -243,6 +246,10 @@ _PG_init(void) ALLOCSET_DEFAULT_SIZES); RegisterResourceReleaseCallback(aqo_free_callback, NULL); RegisterAQOPlanNodeMethods(); + + MarkGUCPrefixReserved("aqo"); + RequestAddinShmemSpace(MAXALIGN(sizeof(AQOSharedState))); + lc_init(); } diff --git a/aqo_shared.c b/aqo_shared.c new file mode 100644 index 00000000..1d6983f0 --- /dev/null +++ b/aqo_shared.c @@ -0,0 +1,61 @@ +/* + * + */ + +#include "postgres.h" + +#include "storage/shmem.h" + +#include "aqo_shared.h" + +shmem_startup_hook_type prev_shmem_startup_hook = NULL; +static AQOSharedState *aqo_state = NULL; +unsigned long temp_storage_size = 1024 * 1024; /* Storage size, in bytes */ +void *temp_storage = NULL; + +static void +attach_dsm_segment(void) +{ + dsm_segment *seg; + + LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); + + if (aqo_state->dsm_handler != DSM_HANDLE_INVALID) + { + seg = dsm_attach(aqo_state->dsm_handler); + } + else + { + seg = dsm_create(temp_storage_size, 0); + aqo_state->dsm_handler = dsm_segment_handle(seg); + } + + temp_storage = dsm_segment_address(seg); + LWLockRelease(&aqo_state->lock); +} + +static void +aqo_detach_shmem(int code, Datum arg) +{ + dsm_handle handler = *(dsm_handle *) arg; + dsm_detach(dsm_find_mapping(handler)); +} + +void +aqo_init_shmem(void) +{ + bool found; + + LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); + aqo_state = ShmemInitStruct("aqo", sizeof(AQOSharedState), &found); + if (!found) + { + /* First time through ... */ + LWLockInitialize(&aqo_state->lock, LWLockNewTrancheId()); + aqo_state->dsm_handler = DSM_HANDLE_INVALID; + } + LWLockRelease(AddinShmemInitLock); + + LWLockRegisterTranche(aqo_state->lock.tranche, "aqo"); + on_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); +} diff --git a/aqo_shared.h b/aqo_shared.h new file mode 100644 index 00000000..ce5b436f --- /dev/null +++ b/aqo_shared.h @@ -0,0 +1,22 @@ +#ifndef AQO_SHARED_H +#define AQO_SHARED_H + + +#include "storage/dsm.h" +#include "storage/ipc.h" +#include "storage/lwlock.h" + + +typedef struct AQOSharedState +{ + LWLock lock; /* mutual exclusion */ + dsm_handle dsm_handler; +} AQOSharedState; + + +extern shmem_startup_hook_type prev_shmem_startup_hook; + + +extern void aqo_init_shmem(void); + +#endif /* AQO_SHARED_H */ From 38840b222fba9c95838bbc2528a2e9ca460b8fcd Mon Sep 17 00:00:00 2001 From: "a.pervushina" Date: Thu, 15 Sep 2022 11:53:36 +0300 Subject: [PATCH 039/134] Cumulative commit on the 'learn on statement timeout' feature. Now it works quite stable, merge it into master branch --- aqo.c | 4 +- aqo_shared.c | 170 +++++++++++++++++++++++++++--- aqo_shared.h | 21 ++++ learn_cache.c | 261 +++++++++++++++++++++++++++++++++-------------- learn_cache.h | 2 +- postprocessing.c | 8 +- storage.c | 1 + t/001_pgbench.pl | 5 + 8 files changed, 373 insertions(+), 99 deletions(-) diff --git a/aqo.c b/aqo.c index 35bff2e8..a0b2dccf 100644 --- a/aqo.c +++ b/aqo.c @@ -248,9 +248,7 @@ _PG_init(void) RegisterAQOPlanNodeMethods(); MarkGUCPrefixReserved("aqo"); - RequestAddinShmemSpace(MAXALIGN(sizeof(AQOSharedState))); - - lc_init(); + RequestAddinShmemSpace(aqo_memsize()); } PG_FUNCTION_INFO_V1(invalidate_deactivated_queries_cache); diff --git a/aqo_shared.c b/aqo_shared.c index 1d6983f0..5d4edb6f 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -8,43 +8,169 @@ #include "aqo_shared.h" + +typedef struct +{ + int magic; + uint32 total_size; + uint32 delta; +} dsm_seg_hdr; + +#define free_space(hdr) (uint32) (temp_storage_size - sizeof(dsm_seg_hdr) - hdr->delta) +#define addr(delta) ((char *) dsm_segment_address(seg) + sizeof(dsm_seg_hdr) + delta) + shmem_startup_hook_type prev_shmem_startup_hook = NULL; -static AQOSharedState *aqo_state = NULL; -unsigned long temp_storage_size = 1024 * 1024; /* Storage size, in bytes */ -void *temp_storage = NULL; +AQOSharedState *aqo_state = NULL; +HTAB *fss_htab = NULL; +static int aqo_htab_max_items = 1000; +static uint32 temp_storage_size = 1024 * 1024 * 10; /* Storage size, in bytes */ +static dsm_segment *seg = NULL; -static void -attach_dsm_segment(void) + +static void aqo_detach_shmem(int code, Datum arg); + + +void * +get_dsm_all(uint32 *size) { - dsm_segment *seg; + dsm_seg_hdr *hdr; - LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); + Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE)); - if (aqo_state->dsm_handler != DSM_HANDLE_INVALID) + if (aqo_state->dsm_handler == DSM_HANDLE_INVALID) + { + /* Fast path. No any cached data exists. */ + *size = 0; + return NULL; + } + + if (!seg) { + /* if segment exists we should connect to */ seg = dsm_attach(aqo_state->dsm_handler); + Assert(seg); + dsm_pin_mapping(seg); + on_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); + } + + hdr = (dsm_seg_hdr *) dsm_segment_address(seg); + *size = hdr->delta; + return (char *) hdr + sizeof(dsm_seg_hdr); +} + +/* + * Cleanup of DSM cache: set header into default state and zero the memory block. + * This operation can be coupled with the cache dump, so we do it under an external + * hold of the lock. + */ +void +reset_dsm_cache(void) +{ + dsm_seg_hdr *hdr; + char *start; + + Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE)); + + if (aqo_state->dsm_handler == DSM_HANDLE_INVALID) + /* Fast path. No any cached data exists. */ + return; + + Assert(seg); + + hdr = (dsm_seg_hdr *) dsm_segment_address(seg); + start = (char *) hdr + sizeof(dsm_seg_hdr); + + /* Reset the cache */ + memset(start, 0, hdr->delta); + + hdr->delta = 0; + hdr->total_size = temp_storage_size - sizeof(dsm_seg_hdr); +} + +char * +get_cache_address(void) +{ + dsm_seg_hdr *hdr; + + Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE) || + LWLockHeldByMeInMode(&aqo_state->lock, LW_SHARED)); + + if (aqo_state->dsm_handler != DSM_HANDLE_INVALID) + { + if (!seg) + { + /* Another process created the segment yet. Just attach to. */ + seg = dsm_attach(aqo_state->dsm_handler); + dsm_pin_mapping(seg); + on_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); + } + + hdr = (dsm_seg_hdr *) dsm_segment_address(seg); } else { + /* + * First request for DSM cache in this instance. + * Create the DSM segment. Pin it to live up to instance shutdown. + * Don't forget to detach DSM segment before an exit. + */ seg = dsm_create(temp_storage_size, 0); + dsm_pin_mapping(seg); + dsm_pin_segment(seg); aqo_state->dsm_handler = dsm_segment_handle(seg); + on_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); + + hdr = (dsm_seg_hdr *) dsm_segment_address(seg); + hdr->magic = AQO_SHARED_MAGIC; + hdr->delta = 0; + hdr->total_size = temp_storage_size - sizeof(dsm_seg_hdr); } - temp_storage = dsm_segment_address(seg); - LWLockRelease(&aqo_state->lock); + Assert(seg); + Assert(hdr->magic == AQO_SHARED_MAGIC && hdr->total_size > 0); + + return (char *) hdr + sizeof(dsm_seg_hdr); +} + +uint32 +get_dsm_cache_pos(uint32 size) +{ + dsm_seg_hdr *hdr; + uint32 pos; + + Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE) || + LWLockHeldByMeInMode(&aqo_state->lock, LW_SHARED)); + + (void) get_cache_address(); + hdr = (dsm_seg_hdr *) dsm_segment_address(seg); + + if (free_space(hdr) < size || size == 0) + elog(ERROR, + "DSM cache can't allcoate a mem block. Required: %u, free: %u", + size, free_space(hdr)); + + pos = hdr->delta; + hdr->delta += size; + Assert(free_space(hdr) >= 0); + return pos; } static void aqo_detach_shmem(int code, Datum arg) { - dsm_handle handler = *(dsm_handle *) arg; - dsm_detach(dsm_find_mapping(handler)); + if (seg != NULL) + dsm_detach(seg); + seg = NULL; } void aqo_init_shmem(void) { bool found; + HASHCTL info; + + aqo_state = NULL; + fss_htab = NULL; LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); aqo_state = ShmemInitStruct("aqo", sizeof(AQOSharedState), &found); @@ -54,8 +180,26 @@ aqo_init_shmem(void) LWLockInitialize(&aqo_state->lock, LWLockNewTrancheId()); aqo_state->dsm_handler = DSM_HANDLE_INVALID; } + + info.keysize = sizeof(htab_key); + info.entrysize = sizeof(htab_entry); + fss_htab = ShmemInitHash("aqo hash", + aqo_htab_max_items, aqo_htab_max_items, + &info, + HASH_ELEM | HASH_BLOBS); + LWLockRelease(AddinShmemInitLock); LWLockRegisterTranche(aqo_state->lock.tranche, "aqo"); - on_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); +} + +Size +aqo_memsize(void) +{ + Size size; + + size = MAXALIGN(sizeof(AQOSharedState)); + size = add_size(size, hash_estimate_size(aqo_htab_max_items, sizeof(htab_entry))); + + return size; } diff --git a/aqo_shared.h b/aqo_shared.h index ce5b436f..eb5323e0 100644 --- a/aqo_shared.h +++ b/aqo_shared.h @@ -6,6 +6,20 @@ #include "storage/ipc.h" #include "storage/lwlock.h" +#define AQO_SHARED_MAGIC 0x053163 + +typedef struct +{ + /* XXX we assume this struct contains no padding bytes */ + uint64 fs; + int64 fss; +} htab_key; + +typedef struct +{ + htab_key key; + uint32 hdr_off; /* offset of data in DSM cache */ +} htab_entry; typedef struct AQOSharedState { @@ -15,8 +29,15 @@ typedef struct AQOSharedState extern shmem_startup_hook_type prev_shmem_startup_hook; +extern AQOSharedState *aqo_state; +extern HTAB *fss_htab; +extern Size aqo_memsize(void); +extern void reset_dsm_cache(void); +extern void *get_dsm_all(uint32 *size); +extern char *get_cache_address(void); +extern uint32 get_dsm_cache_pos(uint32 size); extern void aqo_init_shmem(void); #endif /* AQO_SHARED_H */ diff --git a/learn_cache.c b/learn_cache.c index 471ea058..dc07c959 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -16,48 +16,43 @@ #include "miscadmin.h" #include "aqo.h" +#include "aqo_shared.h" #include "learn_cache.h" -typedef struct -{ - /* XXX we assume this struct contains no padding bytes */ - uint64 fs; - int64 fss; -} htab_key; typedef struct { + int magic; htab_key key; + int rows; + int cols; + int nrelids; - /* Store ML data "AS IS". */ - int nrows; - int ncols; - double *matrix[aqo_K]; - double targets[aqo_K]; - double rfactors[aqo_K]; - List *relids; -} htab_entry; + /* + * Links to variable data: + * double *matrix[aqo_K]; + * double *targets; + * double *rfactors; + * int *relids; + */ +} dsm_block_hdr; -static HTAB *fss_htab = NULL; -MemoryContext LearnCacheMemoryContext = NULL; bool aqo_learn_statement_timeout = false; -void -lc_init(void) -{ - HASHCTL ctl; +static uint32 init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **relids); - Assert(!LearnCacheMemoryContext); - LearnCacheMemoryContext = AllocSetContextCreate(TopMemoryContext, - "lcache context", - ALLOCSET_DEFAULT_SIZES); - ctl.keysize = sizeof(htab_key); - ctl.entrysize = sizeof(htab_entry); - ctl.hcxt = LearnCacheMemoryContext; +/* Calculate, how many data we need to store an ML record. */ +static uint32 +calculate_size(int cols, int nrelids) +{ + uint32 size = sizeof(dsm_block_hdr); /* header's size */ - fss_htab = hash_create("ML AQO cache", 256, &ctl, HASH_ELEM | HASH_BLOBS); + size += sizeof(double) * cols * aqo_K; /* matrix */ + size += 2 * sizeof(double) * aqo_K; /* targets, rfactors */ + size += sizeof(int) * nrelids; /* relids */ + return size; } bool @@ -65,34 +60,81 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relids) { htab_key key = {fs, fss}; htab_entry *entry; + dsm_block_hdr *hdr; + char *ptr; bool found; int i; - MemoryContext memctx = MemoryContextSwitchTo(LearnCacheMemoryContext); + ListCell *lc; + uint32 size; Assert(fss_htab && aqo_learn_statement_timeout); + size = calculate_size(data->cols, list_length(relids)); + LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); + entry = (htab_entry *) hash_search(fss_htab, &key, HASH_ENTER, &found); if (found) { - /* Clear previous version of the cached data. */ - for (i = 0; i < entry->nrows; ++i) - pfree(entry->matrix[i]); - list_free(entry->relids); + hdr = (dsm_block_hdr *) (get_cache_address() + entry->hdr_off); + + Assert(hdr->magic == AQO_SHARED_MAGIC); + Assert(hdr->key.fs == fs && hdr->key.fss == fss); + + if (data->cols != hdr->cols || list_length(relids) != hdr->nrelids) + { + /* + * Collision found: the same {fs,fss}, but something different. + * For simplicity - just don't update. + */ + LWLockRelease(&aqo_state->lock); + return false; + } + } + else + { + /* Get new block of DSM */ + entry->hdr_off = get_dsm_cache_pos(size); + hdr = (dsm_block_hdr *) (get_cache_address() + entry->hdr_off); + + /* These fields shouldn't change */ + hdr->magic = AQO_SHARED_MAGIC; + hdr->key.fs = fs; + hdr->key.fss = fss; + hdr->cols = data->cols; + hdr->nrelids = list_length(relids); } - entry->nrows = data->rows; - entry->ncols = data->cols; - for (i = 0; i < entry->nrows; ++i) + hdr->rows = data->rows; + ptr = (char *) hdr + sizeof(dsm_block_hdr); /* start point of variable data */ + + /* copy the matrix into DSM storage */ + for (i = 0; i < aqo_K; ++i) { - entry->matrix[i] = palloc(sizeof(double) * data->cols); - memcpy(entry->matrix[i], data->matrix[i], sizeof(double) * data->cols); + if (i < hdr->rows) + memcpy(ptr, data->matrix[i], sizeof(double) * hdr->cols); + ptr += sizeof(double) * data->cols; } - memcpy(entry->targets, data->targets, sizeof(double) * data->rows); - memcpy(entry->rfactors, data->rfactors, sizeof(double) * data->rows); - entry->relids = list_copy(relids); + /* copy targets into DSM storage */ + memcpy(ptr, data->targets, sizeof(double) * hdr->rows); + ptr += sizeof(double) * aqo_K; + + /* copy rfactors into DSM storage */ + memcpy(ptr, data->rfactors, sizeof(double) * hdr->rows); + ptr += sizeof(double) * aqo_K; - MemoryContextSwitchTo(memctx); + /* store relids */ + i = 0; + foreach(lc, relids) + { + memcpy(ptr, &lfirst_int(lc), sizeof(int)); + ptr += sizeof(int); + } + + /* Check the invariant */ + Assert((uint32)(ptr - (char *) hdr) == size); + + LWLockRelease(&aqo_state->lock); return true; } @@ -107,68 +149,129 @@ lc_has_fss(uint64 fs, int fss) Assert(fss_htab); + LWLockAcquire(&aqo_state->lock, LW_SHARED); (void) hash_search(fss_htab, &key, HASH_FIND, &found); + LWLockRelease(&aqo_state->lock); + return found; } /* * Load ML data from a memory cache, not from a table. - * XXX That to do with learning tails, living in the cache? */ bool lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **relids) { - htab_key key = {fs, fss}; - htab_entry *entry; - bool found; - int i; + htab_key key = {fs, fss}; + htab_entry *entry; + bool found; + dsm_block_hdr *hdr; Assert(fss_htab && aqo_learn_statement_timeout); + if (aqo_show_details) + elog(NOTICE, "[AQO] Load ML data for fs %lu, fss %d from the cache", + fs, fss); + + LWLockAcquire(&aqo_state->lock, LW_SHARED); entry = (htab_entry *) hash_search(fss_htab, &key, HASH_FIND, &found); if (!found) + { + LWLockRelease(&aqo_state->lock); return false; + } - if (aqo_show_details) - elog(NOTICE, "[AQO] Load ML data for fs %lu, fss %d from the cache", - fs, fss); + hdr = (dsm_block_hdr *) (get_cache_address() + entry->hdr_off); + Assert(hdr->magic == AQO_SHARED_MAGIC); + Assert(hdr->key.fs == fs && hdr->key.fss == fss); - data->rows = entry->nrows; - Assert(entry->ncols == data->cols); - for (i = 0; i < entry->nrows; ++i) - memcpy(data->matrix[i], entry->matrix[i], sizeof(double) * data->cols); - memcpy(data->targets, entry->targets, sizeof(double) * entry->nrows); - memcpy(data->rfactors, entry->rfactors, sizeof(double) * entry->nrows); - if (relids) - *relids = list_copy(entry->relids); + /* XXX */ + if (hdr->cols != data->cols) + { + LWLockRelease(&aqo_state->lock); + return false; + } + + init_with_dsm(data, hdr, relids); + LWLockRelease(&aqo_state->lock); return true; } -/* - * Remove record from fss cache. Should be done at learning stage of successfully - * finished query execution. -*/ -void -lc_remove_fss(uint64 fs, int fss) +static uint32 +init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **relids) { - htab_key key = {fs, fss}; - htab_entry *entry; - bool found; - int i; + int i; + char *ptr = (char *) hdr + sizeof(dsm_block_hdr); - if (!aqo_learn_statement_timeout) - return; + Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE) || + LWLockHeldByMeInMode(&aqo_state->lock, LW_SHARED)); + Assert(hdr->magic == AQO_SHARED_MAGIC); - Assert(fss_htab); + data->rows = hdr->rows; + data->cols = hdr->cols; - entry = (htab_entry *) hash_search(fss_htab, &key, HASH_FIND, &found); - if (!found) + if (data->cols > 0) + { + for (i = 0; i < aqo_K; ++i) + { + if (i < data->rows) + { + data->matrix[i] = palloc(sizeof(double) * data->cols); + memcpy(data->matrix[i], ptr, sizeof(double) * data->cols); + } + ptr += sizeof(double) * data->cols; + } + } + memcpy(data->targets, ptr, sizeof(double) * hdr->rows); + ptr += sizeof(double) * aqo_K; + memcpy(data->rfactors, ptr, sizeof(double) * hdr->rows); + ptr += sizeof(double) * aqo_K; + + if (relids) + { + *relids = NIL; + for (i = 0; i < hdr->nrelids; i++) + { + *relids = lappend_int(*relids, *((int *)ptr)); + ptr += sizeof(int); + } + } + + return calculate_size(hdr->cols, hdr->nrelids); +} + +void +lc_flush_data(void) +{ + char *ptr; + uint32 size; + + if (aqo_state->dsm_handler == DSM_HANDLE_INVALID) + /* Fast path. No any cached data exists. */ return; - for (i = 0; i < entry->nrows; ++i) - pfree(entry->matrix[i]); + LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); + ptr = get_dsm_all(&size); - hash_search(fss_htab, &key, HASH_REMOVE, NULL); + /* Iterate through records and store them into the aqo_data table */ + while(size > 0) + { + dsm_block_hdr *hdr = (dsm_block_hdr *) ptr; + OkNNrdata data; + List *relids; + uint32 delta = 0; + + delta = init_with_dsm(&data, hdr, &relids); + ptr += delta; + size -= delta; + update_fss(hdr->key.fs, hdr->key.fss, &data, relids); + + if (!hash_search(fss_htab, (void *) &hdr->key, HASH_REMOVE, NULL)) + elog(ERROR, "[AQO] Flush: local ML cache is corrupted."); + } + + reset_dsm_cache(); + LWLockRelease(&aqo_state->lock); } /* @@ -189,12 +292,12 @@ lc_assign_hook(bool newval, void *extra) elog(DEBUG5, "[AQO] Cleanup local cache of ML data."); /* Remove all frozen plans from a plancache. */ + LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); hash_seq_init(&status, fss_htab); while ((entry = (htab_entry *) hash_seq_search(&status)) != NULL) { if (!hash_search(fss_htab, (void *) &entry->key, HASH_REMOVE, NULL)) elog(ERROR, "[AQO] The local ML cache is corrupted."); } - - MemoryContextReset(LearnCacheMemoryContext); -} \ No newline at end of file + LWLockRelease(&aqo_state->lock); +} diff --git a/learn_cache.h b/learn_cache.h index 52e4bec2..194f92c2 100644 --- a/learn_cache.h +++ b/learn_cache.h @@ -7,13 +7,13 @@ extern bool aqo_learn_statement_timeout; -extern void lc_init(void); extern bool lc_update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids); extern bool lc_has_fss(uint64 fhash, int fss); extern bool lc_load_fss(uint64 fhash, int fsshash, OkNNrdata *data, List **relids); extern void lc_remove_fss(uint64 fhash, int fss_hash); +extern void lc_flush_data(void); extern void lc_assign_hook(bool newval, void *extra); #endif /* LEARN_CACHE_H */ diff --git a/postprocessing.c b/postprocessing.c index 9a8ab192..7237102f 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -545,9 +545,6 @@ learnOnPlanState(PlanState *p, void *context) learn_sample(&SubplanCtx, aqo_node->relids, learn_rows, rfactor, p->plan, notExecuted); - - if (!ctx->isTimedOut) - lc_remove_fss(query_context.query_hash, aqo_node->fss); } } } @@ -812,6 +809,11 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) { aqo_obj_stat ctx = {NIL, NIL, NIL, query_context.learn_aqo, false}; + /* + * Before learn phase, flush all cached data down to ML base. + */ + lc_flush_data(); + /* * Analyze plan if AQO need to learn or need to collect statistics only. */ diff --git a/storage.c b/storage.c index efedac1d..38db75b9 100644 --- a/storage.c +++ b/storage.c @@ -74,6 +74,7 @@ open_aqo_relation(char *heaprelnspname, char *heaprelname, relation_close(*hrel, lockmode); goto cleanup; } + return true; cleanup: diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index a3f35191..39eebee2 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -138,6 +138,11 @@ JOIN aqo_query_texts t ON (t.query_hash = v.fspace_hash) WHERE v.error > 0. AND t.query_text LIKE '%pgbench_accounts%'"); is($res, 3); +$res = $node->safe_psql('postgres', + "SELECT * FROM top_error_queries(10) v + JOIN aqo_query_texts t ON (t.query_hash = v.fspace_hash) + WHERE v.error > 0. AND t.query_text LIKE '%pgbench_accounts%'"); +note("\n TopN: \n $res \n"); $res = $node->safe_psql('postgres', "SELECT v.error, t.query_text FROM top_error_queries(10) v JOIN aqo_query_texts t ON (t.query_hash = v.fspace_hash) From 5e6cbdb38ba4226b8d5a890c3890b460c2726973 Mon Sep 17 00:00:00 2001 From: "a.pervushina" Date: Thu, 15 Sep 2022 12:19:16 +0300 Subject: [PATCH 040/134] Add tests for the 'Learn after an query interruption by timeout' feature. Fix the bug with false finished node. Add some DEBUG messages. Just for conveniency. --- Makefile | 1 + expected/statement_timeout.out | 109 +++++++++++++++++++++++++++++++++ learn_cache.c | 3 + machine_learning.c | 4 +- postprocessing.c | 7 ++- sql/statement_timeout.sql | 64 +++++++++++++++++++ 6 files changed, 182 insertions(+), 6 deletions(-) create mode 100644 expected/statement_timeout.out create mode 100644 sql/statement_timeout.sql diff --git a/Makefile b/Makefile index d5dfd1c5..0a03ac48 100755 --- a/Makefile +++ b/Makefile @@ -23,6 +23,7 @@ REGRESS = aqo_disabled \ unsupported \ clean_aqo_data \ plancache \ + statement_timeout \ top_queries fdw_srcdir = $(top_srcdir)/contrib/postgres_fdw diff --git a/expected/statement_timeout.out b/expected/statement_timeout.out new file mode 100644 index 00000000..9d91de22 --- /dev/null +++ b/expected/statement_timeout.out @@ -0,0 +1,109 @@ +-- Check the learning-on-timeout feature +-- For stabilized reproduction autovacuum must be disabled. +CREATE FUNCTION check_estimated_rows(text) RETURNS TABLE (estimated int) +LANGUAGE plpgsql AS $$ +DECLARE + ln text; + tmp text[]; + first_row bool := true; +BEGIN + FOR ln IN + execute format('explain %s', $1) + LOOP + IF first_row THEN + first_row := false; + tmp := regexp_match(ln, 'rows=(\d*)'); + RETURN QUERY SELECT tmp[1]::int; + END IF; + END LOOP; +END; $$; +CREATE TABLE t AS SELECT * FROM generate_series(1,100) AS x; +ANALYZE t; +DELETE FROM t WHERE x > 5; -- Force optimizer to make overestimated prediction. +CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.mode = 'learn'; +SET aqo.show_details = 'off'; +SET aqo.learn_statement_timeout = 'on'; +SET statement_timeout = 800; -- [0.8s] +SELECT *, pg_sleep(1) FROM t; +NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. +ERROR: canceling statement due to statement timeout +SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); -- haven't any partial data + check_estimated_rows +---------------------- + 100 +(1 row) + +-- Don't learn because running node has smaller cardinality than an optimizer prediction +SET statement_timeout = 3500; +SELECT *, pg_sleep(1) FROM t; +NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. +ERROR: canceling statement due to statement timeout +SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); + check_estimated_rows +---------------------- + 100 +(1 row) + +-- We have a real learning data. +SET statement_timeout = 10000; +SELECT *, pg_sleep(1) FROM t; + x | pg_sleep +---+---------- + 1 | + 2 | + 3 | + 4 | + 5 | +(5 rows) + +SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); + check_estimated_rows +---------------------- + 5 +(1 row) + +-- Force to make an underestimated prediction +DELETE FROM t WHERE x > 2; +ANALYZE t; +INSERT INTO t (x) (SELECT * FROM generate_series(3,5) AS x); +TRUNCATE aqo_data; +SET statement_timeout = 800; +SELECT *, pg_sleep(1) FROM t; -- Not learned +NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. +ERROR: canceling statement due to statement timeout +SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); + check_estimated_rows +---------------------- + 2 +(1 row) + +SET statement_timeout = 3500; +SELECT *, pg_sleep(1) FROM t; -- Learn! +NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. +ERROR: canceling statement due to statement timeout +SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); + check_estimated_rows +---------------------- + 3 +(1 row) + +SET statement_timeout = 5500; +SELECT *, pg_sleep(1) FROM t; -- Get reliable data + x | pg_sleep +---+---------- + 1 | + 2 | + 3 | + 4 | + 5 | +(5 rows) + +SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); + check_estimated_rows +---------------------- + 5 +(1 row) + +DROP TABLE t; +DROP EXTENSION aqo; diff --git a/learn_cache.c b/learn_cache.c index dc07c959..c3f65d3f 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -86,6 +86,7 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relids) * Collision found: the same {fs,fss}, but something different. * For simplicity - just don't update. */ + elog(DEBUG5, "[AQO]: A collision found in the temporary storage."); LWLockRelease(&aqo_state->lock); return false; } @@ -134,6 +135,8 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relids) /* Check the invariant */ Assert((uint32)(ptr - (char *) hdr) == size); + elog(DEBUG5, "DSM entry: %s, targets: %d.", + found ? "Reused" : "New entry", hdr->rows); LWLockRelease(&aqo_state->lock); return true; } diff --git a/machine_learning.c b/machine_learning.c index 1894a266..52c1ab40 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -199,8 +199,7 @@ OkNNr_learn(OkNNrdata *data, double *features, double target, double rfactor) return data->rows; } - - if (data->rows < aqo_K) + else if (data->rows < aqo_K) { /* We don't reach a limit of stored neighbors */ @@ -275,6 +274,5 @@ OkNNr_learn(OkNNrdata *data, double *features, double target, double rfactor) } } } - return data->rows; } diff --git a/postprocessing.c b/postprocessing.c index 7237102f..d2eee036 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -349,11 +349,12 @@ should_learn(PlanState *ps, AQOPlanNode *node, aqo_obj_stat *ctx, } /* Has the executor finished its work? */ - if (TupIsNull(ps->ps_ResultTupleSlot) && + if (!ps->instrument->running && TupIsNull(ps->ps_ResultTupleSlot) && ps->instrument->nloops > 0.) /* Node was visited by executor at least once. */ { /* This is much more reliable data. So we can correct our prediction. */ - if (ctx->learn && aqo_show_details && fabs(*nrows - predicted) / predicted > 0.2) + if (ctx->learn && aqo_show_details && + fabs(*nrows - predicted) / predicted > 0.2) elog(NOTICE, "[AQO] Learn on a finished plan node (%lu, %d), " "predicted rows: %.0lf, updated prediction: %.0lf", @@ -693,7 +694,7 @@ aqo_timeout_handler(void) ctx.learn = query_context.learn_aqo; ctx.isTimedOut = true; - elog(NOTICE, "[AQO] Time limit for execution of the statement was expired. Try to learn on partial data."); + elog(NOTICE, "[AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data."); learnOnPlanState(timeoutCtl.queryDesc->planstate, (void *) &ctx); } diff --git a/sql/statement_timeout.sql b/sql/statement_timeout.sql new file mode 100644 index 00000000..419d85de --- /dev/null +++ b/sql/statement_timeout.sql @@ -0,0 +1,64 @@ +-- Check the learning-on-timeout feature +-- For stabilized reproduction autovacuum must be disabled. + +CREATE FUNCTION check_estimated_rows(text) RETURNS TABLE (estimated int) +LANGUAGE plpgsql AS $$ +DECLARE + ln text; + tmp text[]; + first_row bool := true; +BEGIN + FOR ln IN + execute format('explain %s', $1) + LOOP + IF first_row THEN + first_row := false; + tmp := regexp_match(ln, 'rows=(\d*)'); + RETURN QUERY SELECT tmp[1]::int; + END IF; + END LOOP; +END; $$; + +CREATE TABLE t AS SELECT * FROM generate_series(1,100) AS x; +ANALYZE t; +DELETE FROM t WHERE x > 5; -- Force optimizer to make overestimated prediction. + +CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.mode = 'learn'; +SET aqo.show_details = 'off'; +SET aqo.learn_statement_timeout = 'on'; + +SET statement_timeout = 800; -- [0.8s] +SELECT *, pg_sleep(1) FROM t; +SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); -- haven't any partial data + +-- Don't learn because running node has smaller cardinality than an optimizer prediction +SET statement_timeout = 3500; +SELECT *, pg_sleep(1) FROM t; +SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); + +-- We have a real learning data. +SET statement_timeout = 10000; +SELECT *, pg_sleep(1) FROM t; +SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); + +-- Force to make an underestimated prediction +DELETE FROM t WHERE x > 2; +ANALYZE t; +INSERT INTO t (x) (SELECT * FROM generate_series(3,5) AS x); +TRUNCATE aqo_data; + +SET statement_timeout = 800; +SELECT *, pg_sleep(1) FROM t; -- Not learned +SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); + +SET statement_timeout = 3500; +SELECT *, pg_sleep(1) FROM t; -- Learn! +SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); + +SET statement_timeout = 5500; +SELECT *, pg_sleep(1) FROM t; -- Get reliable data +SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); + +DROP TABLE t; +DROP EXTENSION aqo; From bd638da3d4782db0e3ad39b18efaf8625c85b1df Mon Sep 17 00:00:00 2001 From: "a.pervushina" Date: Thu, 15 Sep 2022 14:28:50 +0300 Subject: [PATCH 041/134] Move AQO from a relid based approach to a relation name based approach. It allows us to reuse ML data at different instance and learn on temporary tables. --- aqo--1.2--1.3.sql | 6 +-- aqo.h | 30 +++++++-------- cardinality_estimation.c | 20 +++++----- cardinality_hooks.c | 65 ++++++++++++++++++------------- expected/clean_aqo_data.out | 61 +++++++++++++++-------------- hash.c | 50 +++++++++++++++++------- hash.h | 2 +- learn_cache.c | 60 ++++++++++++++++++----------- learn_cache.h | 10 ++--- path_utils.c | 33 +++++++++------- path_utils.h | 3 +- postprocessing.c | 28 +++++++------- sql/clean_aqo_data.sql | 61 +++++++++++++++-------------- storage.c | 77 ++++++++++++++++++++----------------- t/001_pgbench.pl | 28 +++++++++----- utils.c | 16 ++++++++ 16 files changed, 314 insertions(+), 236 deletions(-) diff --git a/aqo--1.2--1.3.sql b/aqo--1.2--1.3.sql index 605e6b99..c29a6f10 100755 --- a/aqo--1.2--1.3.sql +++ b/aqo--1.2--1.3.sql @@ -1,4 +1,4 @@ -ALTER TABLE public.aqo_data ADD COLUMN oids OID [] DEFAULT NULL; +ALTER TABLE public.aqo_data ADD COLUMN oids text [] DEFAULT NULL; -- -- Remove data, related to previously dropped tables, from the AQO tables. @@ -9,7 +9,7 @@ DECLARE aqo_queries_row aqo_queries%ROWTYPE; aqo_query_texts_row aqo_query_texts%ROWTYPE; aqo_query_stat_row aqo_query_stat%ROWTYPE; - oid_var oid; + oid_var text; fspace_hash_var bigint; delete_row boolean DEFAULT false; BEGIN @@ -23,7 +23,7 @@ BEGIN IF (aqo_data_row.oids IS NOT NULL) THEN FOREACH oid_var IN ARRAY aqo_data_row.oids LOOP - IF NOT EXISTS (SELECT relname FROM pg_class WHERE oid = oid_var) THEN + IF NOT EXISTS (SELECT relname FROM pg_class WHERE oid::regclass::text = oid_var) THEN delete_row = true; END IF; END LOOP; diff --git a/aqo.h b/aqo.h index 6f3f9018..b43e01a9 100644 --- a/aqo.h +++ b/aqo.h @@ -281,13 +281,12 @@ extern bool find_query(uint64 qhash, QueryContextData *ctx); extern bool update_query(uint64 qhash, uint64 fhash, bool learn_aqo, bool use_aqo, bool auto_tuning); extern bool add_query_text(uint64 query_hash, const char *query_string); -extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, - List **relids, bool isSafe); -extern bool load_fss(uint64 fhash, int fss_hash, OkNNrdata *data, List **relids); -extern bool update_fss_ext(uint64 fhash, int fsshash, OkNNrdata *data, - List *relids, bool isTimedOut); -extern bool update_fss(uint64 fhash, int fss_hash, OkNNrdata *data, - List *relids); +extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **relnames, + bool isSafe); +extern bool load_fss(uint64 fs, int fss, OkNNrdata *data, List **relnames); +extern bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, + List *relnames, bool isTimedOut); +extern bool update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames); QueryStat *get_aqo_stat(uint64 query_hash); void update_aqo_stat(uint64 query_hash, QueryStat * stat); extern bool my_index_insert(Relation indexRelation, Datum *values, bool *isnull, @@ -308,7 +307,7 @@ extern void print_node_explain(ExplainState *es, PlanState *ps, Plan *plan); /* Cardinality estimation */ double predict_for_relation(List *restrict_clauses, List *selectivities, - List *relids, int *fss_hash); + List *relnames, int *fss); /* Query execution statistics collecting hooks */ void aqo_ExecutorStart(QueryDesc *queryDesc, int eflags); @@ -320,13 +319,14 @@ void aqo_ExecutorEnd(QueryDesc *queryDesc); extern void automatical_query_tuning(uint64 query_hash, QueryStat * stat); /* Utilities */ -int int_cmp(const void *a, const void *b); -int double_cmp(const void *a, const void *b); -int *argsort(void *a, int n, size_t es, - int (*cmp) (const void *, const void *)); -int *inverse_permutation(int *a, int n); -QueryStat *palloc_query_stat(void); -void pfree_query_stat(QueryStat *stat); +extern int int64_compare(const void *a, const void *b); +extern int int_cmp(const void *a, const void *b); +extern int double_cmp(const void *a, const void *b); +extern int *argsort(void *a, int n, size_t es, + int (*cmp) (const void *, const void *)); +extern int *inverse_permutation(int *a, int n); +extern QueryStat *palloc_query_stat(void); +extern void pfree_query_stat(QueryStat *stat); /* Selectivity cache for parametrized baserels */ extern void cache_selectivity(int clause_hash, int relid, int global_relid, diff --git a/cardinality_estimation.c b/cardinality_estimation.c index 9bdaff5d..f5202f22 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -26,7 +26,7 @@ #ifdef AQO_DEBUG_PRINT static void predict_debug_output(List *clauses, List *selectivities, - List *relids, int fss_hash, double result) + List *relnames, int fss, double result) { StringInfoData debug_str; ListCell *lc; @@ -42,11 +42,11 @@ predict_debug_output(List *clauses, List *selectivities, appendStringInfo(&debug_str, "%lf ", *s); } - appendStringInfoString(&debug_str, "}, relids: { "); - foreach(lc, relids) + appendStringInfoString(&debug_str, "}, relnames: { "); + foreach(lc, relnames) { - int relid = lfirst_int(lc); - appendStringInfo(&debug_str, "%d ", relid); + String *relname = lfirst_node(String, lc); + appendStringInfo(&debug_str, "%s ", relname->sval); } appendStringInfo(&debug_str, "}, result: %lf", result); @@ -60,22 +60,22 @@ predict_debug_output(List *clauses, List *selectivities, */ double predict_for_relation(List *clauses, List *selectivities, - List *relids, int *fss) + List *relnames, int *fss) { double *features; double result; int i; OkNNrdata data; - if (relids == NIL) + if (relnames == NIL) /* * Don't make prediction for query plans without any underlying plane * tables. Use return value -4 for debug purposes. */ return -4.; - *fss = get_fss_for_object(relids, clauses, - selectivities, &data.cols, &features); + *fss = get_fss_for_object(relnames, clauses, selectivities, + &data.cols, &features); if (data.cols > 0) for (i = 0; i < aqo_K; ++i) @@ -94,7 +94,7 @@ predict_for_relation(List *clauses, List *selectivities, result = -1; } #ifdef AQO_DEBUG_PRINT - predict_debug_output(clauses, selectivities, relids, *fss_hash, result); + predict_debug_output(clauses, selectivities, relnames, *fss, result); #endif pfree(features); if (data.cols > 0) diff --git a/cardinality_hooks.c b/cardinality_hooks.c index b15012ca..44240c5d 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -139,8 +139,8 @@ void aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) { double predicted; - Oid relid; - List *relids = NIL; + RangeTblEntry *rte; + List *relnames = NIL; List *selectivities = NULL; List *clauses; int fss = 0; @@ -161,19 +161,24 @@ aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) goto default_estimator; } - relid = planner_rt_fetch(rel->relid, root)->relid; - if (OidIsValid(relid)) - /* Predict for a plane table only. */ - relids = list_make1_int(relid); + rte = planner_rt_fetch(rel->relid, root); + if (rte && OidIsValid(rte->relid)) + { + String *s = makeNode(String); + + /* Predict for a plane table. */ + Assert(rte->eref && rte->eref->aliasname); + s->sval = pstrdup(rte->eref->aliasname); + relnames = list_make1(s); + } clauses = aqo_get_clauses(root, rel->baserestrictinfo); - predicted = predict_for_relation(clauses, selectivities, - relids, &fss); + predicted = predict_for_relation(clauses, selectivities, relnames, &fss); rel->fss_hash = fss; list_free_deep(selectivities); list_free(clauses); - list_free(relids); + list_free(relnames); if (predicted >= 0) { @@ -209,8 +214,8 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, List *param_clauses) { double predicted; - Oid relid = InvalidOid; - List *relids = NIL; + RangeTblEntry *rte = NULL; + List *relnames = NIL; List *allclauses = NULL; List *selectivities = NULL; ListCell *l; @@ -219,7 +224,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, int *args_hash; int *eclass_hash; int current_hash; - int fss = 0; + int fss = 0; if (IsQueryDisabled()) /* Fast path */ @@ -239,7 +244,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, /* Make specific copy of clauses with mutated subplans */ allclauses = list_concat(aqo_get_clauses(root, param_clauses), aqo_get_clauses(root, rel->baserestrictinfo)); - relid = planner_rt_fetch(rel->relid, root)->relid; + rte = planner_rt_fetch(rel->relid, root); get_eclasses(allclauses, &nargs, &args_hash, &eclass_hash); old_ctx_m = MemoryContextSwitchTo(AQO_cache_mem_ctx); @@ -249,7 +254,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, current_hash = get_clause_hash( ((RestrictInfo *) lfirst(l))->clause, nargs, args_hash, eclass_hash); - cache_selectivity(current_hash, rel->relid, relid, + cache_selectivity(current_hash, rel->relid, rte->relid, *((double *) lfirst(l2))); } @@ -269,11 +274,17 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, goto default_estimator; } - if (OidIsValid(relid)) - /* Predict for a plane table only. */ - relids = list_make1_int(relid); + if (rte && OidIsValid(rte->relid)) + { + String *s = makeNode(String); + + /* Predict for a plane table. */ + Assert(rte->eref && rte->eref->aliasname); + s->sval = pstrdup(rte->eref->aliasname); + relnames = list_make1(s); + } - predicted = predict_for_relation(allclauses, selectivities, relids, &fss); + predicted = predict_for_relation(allclauses, selectivities, relnames, &fss); predicted_ppi_rows = predicted; fss_ppi_hash = fss; @@ -298,7 +309,7 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, List *restrictlist) { double predicted; - List *relids; + List *relnames; List *outer_clauses; List *inner_clauses; List *allclauses; @@ -324,7 +335,7 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, goto default_estimator; } - relids = get_list_of_relids(root, rel->relids); + relnames = get_relnames(root, rel->relids); outer_clauses = get_path_clauses(outer_rel->cheapest_total_path, root, &outer_selectivities); inner_clauses = get_path_clauses(inner_rel->cheapest_total_path, root, @@ -335,7 +346,7 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, list_concat(outer_selectivities, inner_selectivities)); - predicted = predict_for_relation(allclauses, selectivities, relids, &fss); + predicted = predict_for_relation(allclauses, selectivities, relnames, &fss); rel->fss_hash = fss; if (predicted >= 0) @@ -366,7 +377,7 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, List *clauses) { double predicted; - List *relids; + List *relnames; List *outer_clauses; List *inner_clauses; List *allclauses; @@ -392,7 +403,7 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, goto default_estimator; } - relids = get_list_of_relids(root, rel->relids); + relnames = get_relnames(root, rel->relids); outer_clauses = get_path_clauses(outer_path, root, &outer_selectivities); inner_clauses = get_path_clauses(inner_path, root, &inner_selectivities); allclauses = list_concat(aqo_get_clauses(root, clauses), @@ -401,7 +412,7 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, list_concat(outer_selectivities, inner_selectivities)); - predicted = predict_for_relation(allclauses, selectivities, relids, &fss); + predicted = predict_for_relation(allclauses, selectivities, relnames, &fss); predicted_ppi_rows = predicted; fss_ppi_hash = fss; @@ -428,13 +439,13 @@ predict_num_groups(PlannerInfo *root, Path *subpath, List *group_exprs, child_fss = subpath->parent->fss_hash; else { - List *relids; + List *relnames; List *clauses; List *selectivities = NIL; - relids = get_list_of_relids(root, subpath->parent->relids); + relnames = get_relnames(root, subpath->parent->relids); clauses = get_path_clauses(subpath, root, &selectivities); - (void) predict_for_relation(clauses, selectivities, relids, &child_fss); + (void) predict_for_relation(clauses, selectivities, relnames, &child_fss); } *fss = get_grouped_exprs_hash(child_fss, group_exprs); diff --git a/expected/clean_aqo_data.out b/expected/clean_aqo_data.out index 91ba7f99..f9288b85 100644 --- a/expected/clean_aqo_data.out +++ b/expected/clean_aqo_data.out @@ -13,7 +13,6 @@ SELECT * FROM a; -- (0 rows) -SELECT 'a'::regclass::oid AS a_oid \gset SELECT clean_aqo_data(); NOTICE: Cleaning aqo_data records clean_aqo_data @@ -28,14 +27,14 @@ NOTICE: Cleaning aqo_data records * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat * should remain */ -SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); count ------- 1 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)); count ------- 1 @@ -43,7 +42,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); count ------- 1 @@ -51,7 +50,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); count ------- 1 @@ -72,14 +71,14 @@ NOTICE: Cleaning aqo_data records * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat, * should be deleted */ -SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); count ------- 0 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; count ------- @@ -88,7 +87,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -97,7 +96,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -141,17 +140,17 @@ SELECT * FROM b CROSS JOIN a; -- (0 rows) -SELECT 'a'::regclass::oid AS a_oid \gset -SELECT 'b'::regclass::oid AS b_oid \gset +-- SELECT 'a'::regclass::oid AS a_oid \gset +-- SELECT 'b'::regclass::oid AS b_oid \gset -- new lines added to aqo_data -SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); count ------- 3 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)); count ------- 2 @@ -159,7 +158,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); count ------- 2 @@ -167,20 +166,20 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); count ------- 2 (1 row) -SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'b' = ANY(oids); count ------- 3 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)); count ------- 2 @@ -188,7 +187,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids))); count ------- 2 @@ -196,7 +195,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids))); count ------- 2 @@ -216,14 +215,14 @@ NOTICE: Cleaning aqo_data records * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_texts, * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_stat, */ -SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); count ------- 0 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; count ------- @@ -232,7 +231,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -241,7 +240,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -249,14 +248,14 @@ SELECT count(*) FROM aqo_query_stat WHERE (1 row) -- lines corresponding to b_oid in all theese tables should remain -SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'b' = ANY(oids); count ------- 1 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; count ------- @@ -265,7 +264,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -274,7 +273,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -290,14 +289,14 @@ NOTICE: Cleaning aqo_data records (1 row) -- lines corresponding to b_oid in theese tables deleted -SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'b' = ANY(oids); count ------- 0 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; count ------- @@ -306,7 +305,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -315,7 +314,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- diff --git a/hash.c b/hash.c index 4510032e..d8083fce 100644 --- a/hash.c +++ b/hash.c @@ -31,7 +31,7 @@ static int get_node_hash(Node *node); static int get_unsorted_unsafe_int_array_hash(int *arr, int len); static int get_unordered_int_list_hash(List *lst); -static int get_relidslist_hash(List *relidslist); +static int64 get_relations_hash(List *relnames); static int get_fss_hash(int clauses_hash, int eclasses_hash, int relidslist_hash); @@ -149,7 +149,7 @@ get_grouped_exprs_hash(int child_fss, List *group_exprs) } /* - * For given object (clauselist, selectivities, relidslist) creates feature + * For given object (clauselist, selectivities, relnames) creates feature * subspace: * sets nfeatures * creates and computes fss_hash @@ -158,7 +158,7 @@ get_grouped_exprs_hash(int child_fss, List *group_exprs) * Special case for nfeatures == NULL: don't calculate features. */ int -get_fss_for_object(List *relidslist, List *clauselist, +get_fss_for_object(List *relnames, List *clauselist, List *selectivities, int *nfeatures, double **features) { int n; @@ -172,7 +172,7 @@ get_fss_for_object(List *relidslist, List *clauselist, int *eclass_hash; int clauses_hash; int eclasses_hash; - int relidslist_hash; + int relnames_hash; List **args; ListCell *lc; int i, @@ -181,7 +181,7 @@ get_fss_for_object(List *relidslist, List *clauselist, m; int sh = 0, old_sh; - int fss_hash; + int fss_hash; n = list_length(clauselist); @@ -259,13 +259,11 @@ get_fss_for_object(List *relidslist, List *clauselist, /* * Generate feature subspace hash. - * XXX: Remember! that relidslist_hash isn't portable between postgres - * instances. */ clauses_hash = get_int_array_hash(sorted_clauses, n - sh); eclasses_hash = get_int_array_hash(eclass_hash, nargs); - relidslist_hash = get_relidslist_hash(relidslist); - fss_hash = get_fss_hash(clauses_hash, eclasses_hash, relidslist_hash); + relnames_hash = (int) get_relations_hash(relnames); + fss_hash = get_fss_hash(clauses_hash, eclasses_hash, relnames_hash); pfree(clause_hashes); pfree(sorted_clauses); @@ -436,13 +434,37 @@ get_fss_hash(int clauses_hash, int eclasses_hash, int relidslist_hash) } /* - * Computes hash for given list of relids. - * Hash is supposed to be relids-order-insensitive. + * Computes hash for given list of relations. + * Hash is supposed to be relations-order-insensitive. + * Each element of a list must have a String type, */ -int -get_relidslist_hash(List *relidslist) +static int64 +get_relations_hash(List *relnames) { - return get_unordered_int_list_hash(relidslist); + int64 *hashes = palloc(list_length(relnames) * sizeof(int64)); + ListCell *lc; + int64 hash = 0; + int i = 0; + + /* generate array of hashes. */ + foreach(lc, relnames) + { + String *relname = lfirst_node(String, lc); + + hashes[i++] = DatumGetInt64(hash_any_extended( + (unsigned char *) relname->sval, + strlen(relname->sval), 0)); + } + + /* Sort the array to make query insensitive to input order of relations. */ + qsort(hashes, i, sizeof(int64), int64_compare); + + /* Make a final hash value */ + hash = DatumGetInt64(hash_any_extended((unsigned char *) hashes, + i * sizeof(int64), 0)); + + pfree(hashes); + return hash; } /* diff --git a/hash.h b/hash.h index 0a98814b..b33b1990 100644 --- a/hash.h +++ b/hash.h @@ -7,7 +7,7 @@ extern uint64 get_query_hash(Query *parse, const char *query_text); extern bool list_member_uint64(const List *list, uint64 datum); extern List *lappend_uint64(List *list, uint64 datum); extern List *ldelete_uint64(List *list, uint64 datum); -extern int get_fss_for_object(List *relidslist, List *clauselist, +extern int get_fss_for_object(List *relnames, List *clauselist, List *selectivities, int *nfeatures, double **features); extern int get_int_array_hash(int *arr, int len); diff --git a/learn_cache.c b/learn_cache.c index c3f65d3f..f2bbeca5 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -45,18 +45,25 @@ static uint32 init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **relids); /* Calculate, how many data we need to store an ML record. */ static uint32 -calculate_size(int cols, int nrelids) +calculate_size(int cols, List *relnames) { - uint32 size = sizeof(dsm_block_hdr); /* header's size */ + uint32 size = sizeof(dsm_block_hdr); /* header's size */ + ListCell *lc; size += sizeof(double) * cols * aqo_K; /* matrix */ size += 2 * sizeof(double) * aqo_K; /* targets, rfactors */ - size += sizeof(int) * nrelids; /* relids */ + + /* Calculate memory size needed to store relation names */ + foreach(lc, relnames) + { + size += strlen(lfirst_node(String, lc)->sval) + 1; + } + return size; } bool -lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relids) +lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames) { htab_key key = {fs, fss}; htab_entry *entry; @@ -69,7 +76,7 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relids) Assert(fss_htab && aqo_learn_statement_timeout); - size = calculate_size(data->cols, list_length(relids)); + size = calculate_size(data->cols, relnames); LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); entry = (htab_entry *) hash_search(fss_htab, &key, HASH_ENTER, &found); @@ -80,7 +87,7 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relids) Assert(hdr->magic == AQO_SHARED_MAGIC); Assert(hdr->key.fs == fs && hdr->key.fss == fss); - if (data->cols != hdr->cols || list_length(relids) != hdr->nrelids) + if (data->cols != hdr->cols || list_length(relnames) != hdr->nrelids) { /* * Collision found: the same {fs,fss}, but something different. @@ -102,7 +109,7 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relids) hdr->key.fs = fs; hdr->key.fss = fss; hdr->cols = data->cols; - hdr->nrelids = list_length(relids); + hdr->nrelids = list_length(relnames); } hdr->rows = data->rows; @@ -124,12 +131,14 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relids) memcpy(ptr, data->rfactors, sizeof(double) * hdr->rows); ptr += sizeof(double) * aqo_K; - /* store relids */ - i = 0; - foreach(lc, relids) + /* store strings of relation names. Each string ends with 0-byte */ + foreach(lc, relnames) { - memcpy(ptr, &lfirst_int(lc), sizeof(int)); - ptr += sizeof(int); + char *relname = lfirst_node(String, lc)->sval; + int len = strlen(relname) + 1; + + memcpy(ptr, relname, len); + ptr += len; } /* Check the invariant */ @@ -163,7 +172,7 @@ lc_has_fss(uint64 fs, int fss) * Load ML data from a memory cache, not from a table. */ bool -lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **relids) +lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **relnames) { htab_key key = {fs, fss}; htab_entry *entry; @@ -195,13 +204,13 @@ lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **relids) return false; } - init_with_dsm(data, hdr, relids); + init_with_dsm(data, hdr, relnames); LWLockRelease(&aqo_state->lock); return true; } static uint32 -init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **relids) +init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **relnames) { int i; char *ptr = (char *) hdr + sizeof(dsm_block_hdr); @@ -225,22 +234,27 @@ init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **relids) ptr += sizeof(double) * data->cols; } } + memcpy(data->targets, ptr, sizeof(double) * hdr->rows); ptr += sizeof(double) * aqo_K; memcpy(data->rfactors, ptr, sizeof(double) * hdr->rows); ptr += sizeof(double) * aqo_K; - if (relids) + if (relnames) { - *relids = NIL; + *relnames = NIL; for (i = 0; i < hdr->nrelids; i++) { - *relids = lappend_int(*relids, *((int *)ptr)); - ptr += sizeof(int); + String *s = makeNode(String); + int len = strlen(ptr) + 1; + + s->sval = pstrdup(ptr); + *relnames = lappend(*relnames, s); + ptr += len; } } - return calculate_size(hdr->cols, hdr->nrelids); + return calculate_size(hdr->cols, *relnames); } void @@ -261,13 +275,13 @@ lc_flush_data(void) { dsm_block_hdr *hdr = (dsm_block_hdr *) ptr; OkNNrdata data; - List *relids; + List *relnames; uint32 delta = 0; - delta = init_with_dsm(&data, hdr, &relids); + delta = init_with_dsm(&data, hdr, &relnames); ptr += delta; size -= delta; - update_fss(hdr->key.fs, hdr->key.fss, &data, relids); + update_fss(hdr->key.fs, hdr->key.fss, &data, relnames); if (!hash_search(fss_htab, (void *) &hdr->key, HASH_REMOVE, NULL)) elog(ERROR, "[AQO] Flush: local ML cache is corrupted."); diff --git a/learn_cache.h b/learn_cache.h index 194f92c2..eccca22a 100644 --- a/learn_cache.h +++ b/learn_cache.h @@ -7,12 +7,10 @@ extern bool aqo_learn_statement_timeout; -extern bool lc_update_fss(uint64 fhash, int fsshash, OkNNrdata *data, - List *relids); -extern bool lc_has_fss(uint64 fhash, int fss); -extern bool lc_load_fss(uint64 fhash, int fsshash, OkNNrdata *data, - List **relids); -extern void lc_remove_fss(uint64 fhash, int fss_hash); +extern bool lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames); +extern bool lc_has_fss(uint64 fs, int fss); +extern bool lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **relnames); +extern void lc_remove_fss(uint64 fs, int fss); extern void lc_flush_data(void); extern void lc_assign_hook(bool newval, void *extra); diff --git a/path_utils.c b/path_utils.c index ce616555..860cc832 100644 --- a/path_utils.c +++ b/path_utils.c @@ -125,14 +125,14 @@ get_selectivities(PlannerInfo *root, /* * Transforms given relids from path optimization stage format to list of - * an absolute (independent on query optimization context) relids. + * an absolute (independent on query optimization context) relnames. */ List * -get_list_of_relids(PlannerInfo *root, Relids relids) +get_relnames(PlannerInfo *root, Relids relids) { - int i; - RangeTblEntry *entry; - List *l = NIL; + int i; + RangeTblEntry *rte; + List *l = NIL; if (relids == NULL) return NIL; @@ -146,9 +146,14 @@ get_list_of_relids(PlannerInfo *root, Relids relids) i = -1; while ((i = bms_next_member(relids, i)) >= 0) { - entry = planner_rt_fetch(i, root); - if (OidIsValid(entry->relid)) - l = lappend_int(l, entry->relid); + rte = planner_rt_fetch(i, root); + if (OidIsValid(rte->relid)) + { + String *s = makeNode(String); + + s->sval = pstrdup(rte->eref->aliasname); + l = lappend(l, s); + } } return l; } @@ -404,9 +409,9 @@ is_appropriate_path(Path *path) void aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) { - bool is_join_path; - Plan *plan = *dest; - AQOPlanNode *node; + bool is_join_path; + Plan *plan = *dest; + AQOPlanNode *node; if (prev_create_plan_hook) prev_create_plan_hook(root, src, dest); @@ -444,7 +449,7 @@ aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) (*dest)->lefttree->targetlist); /* Copy bare expressions for further AQO learning case. */ node->grouping_exprs = copyObject(groupExprs); - node->relids = get_list_of_relids(root, ap->subpath->parent->relids); + node->relids = get_relnames(root, ap->subpath->parent->relids); node->jointype = JOIN_INNER; } else if (is_appropriate_path(src)) @@ -456,7 +461,7 @@ aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) } node->relids = list_concat(node->relids, - get_list_of_relids(root, src->parent->relids)); + get_relnames(root, src->parent->relids)); if (src->parallel_workers > 0) node->parallel_divisor = get_parallel_divisor(src); @@ -626,7 +631,7 @@ aqo_store_upper_signature_hook(PlannerInfo *root, void *extra) { A_Const *fss_node = makeNode(A_Const); - List *relids; + List *relnames; List *clauses; List *selectivities; diff --git a/path_utils.h b/path_utils.h index 5ee4bba5..54ee181d 100644 --- a/path_utils.h +++ b/path_utils.h @@ -16,6 +16,7 @@ typedef struct AQOPlanNode ExtensibleNode node; bool had_path; List *relids; + List *temp_relnames; /* We store name of temporary table because OID by-default haven't sense at other backends. */ List *clauses; List *selectivities; @@ -47,7 +48,7 @@ extern List *get_selectivities(PlannerInfo *root, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo); -extern List *get_list_of_relids(PlannerInfo *root, Relids relids); +extern List *get_relnames(PlannerInfo *root, Relids relids); extern List *get_path_clauses(Path *path, PlannerInfo *root, diff --git a/postprocessing.c b/postprocessing.c index d2eee036..dd420bce 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -56,10 +56,10 @@ static char *PlanStateInfo = "PlanStateInfo"; /* Query execution statistics collecting utilities */ -static void atomic_fss_learn_step(uint64 fhash, int fss_hash, OkNNrdata *data, +static void atomic_fss_learn_step(uint64 fhash, int fss, OkNNrdata *data, double *features, double target, double rfactor, - List *relids, bool isTimedOut); + List *relnames, bool isTimedOut); static bool learnOnPlanState(PlanState *p, void *context); static void learn_agg_sample(aqo_obj_stat *ctx, List *relidslist, double learned, double rfactor, Plan *plan, @@ -90,8 +90,8 @@ static bool ExtractFromQueryEnv(QueryDesc *queryDesc); */ static void atomic_fss_learn_step(uint64 fs, int fss, OkNNrdata *data, - double *features, double target, double rfactor, - List *relids, bool isTimedOut) + double *features, double target, double rfactor, + List *relnames, bool isTimedOut) { LOCKTAG tag; @@ -102,13 +102,13 @@ atomic_fss_learn_step(uint64 fs, int fss, OkNNrdata *data, data->rows = 0; data->rows = OkNNr_learn(data, features, target, rfactor); - update_fss_ext(fs, fss, data, relids, isTimedOut); + update_fss_ext(fs, fss, data, relnames, isTimedOut); LockRelease(&tag, ExclusiveLock, false); } static void -learn_agg_sample(aqo_obj_stat *ctx, List *relidslist, +learn_agg_sample(aqo_obj_stat *ctx, List *relnames, double learned, double rfactor, Plan *plan, bool notExecuted) { AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); @@ -127,7 +127,7 @@ learn_agg_sample(aqo_obj_stat *ctx, List *relidslist, return; target = log(learned); - child_fss = get_fss_for_object(relidslist, ctx->clauselist, NIL, NULL, NULL); + child_fss = get_fss_for_object(relnames, ctx->clauselist, NIL, NULL, NULL); fss = get_grouped_exprs_hash(child_fss, aqo_node->grouping_exprs); memset(&data, 0, sizeof(OkNNrdata)); @@ -136,7 +136,7 @@ learn_agg_sample(aqo_obj_stat *ctx, List *relidslist, /* Critical section */ atomic_fss_learn_step(fhash, fss, &data, NULL, - target, rfactor, relidslist, ctx->isTimedOut); + target, rfactor, relnames, ctx->isTimedOut); /* End of critical section */ } @@ -145,7 +145,7 @@ learn_agg_sample(aqo_obj_stat *ctx, List *relidslist, * true cardinalities) performs learning procedure. */ static void -learn_sample(aqo_obj_stat *ctx, List *relidslist, +learn_sample(aqo_obj_stat *ctx, List *relnames, double learned, double rfactor, Plan *plan, bool notExecuted) { AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); @@ -158,8 +158,8 @@ learn_sample(aqo_obj_stat *ctx, List *relidslist, memset(&data, 0, sizeof(OkNNrdata)); target = log(learned); - fss = get_fss_for_object(relidslist, ctx->clauselist, - ctx->selectivities, &data.cols, &features); + fss = get_fss_for_object(relnames, ctx->clauselist, + ctx->selectivities, &data.cols, &features); /* Only Agg nodes can have non-empty a grouping expressions list. */ Assert(!IsA(plan, Agg) || aqo_node->grouping_exprs != NIL); @@ -177,7 +177,7 @@ learn_sample(aqo_obj_stat *ctx, List *relidslist, /* Critical section */ atomic_fss_learn_step(fs, fss, &data, features, target, rfactor, - relidslist, ctx->isTimedOut); + relnames, ctx->isTimedOut); /* End of critical section */ if (data.cols > 0) @@ -192,9 +192,7 @@ learn_sample(aqo_obj_stat *ctx, List *relidslist, * the same selectivities of clauses as were used at query optimization stage. */ List * -restore_selectivities(List *clauselist, - List *relidslist, - JoinType join_type, +restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, bool was_parametrized) { List *lst = NIL; diff --git a/sql/clean_aqo_data.sql b/sql/clean_aqo_data.sql index 9e597e6d..b869c037 100644 --- a/sql/clean_aqo_data.sql +++ b/sql/clean_aqo_data.sql @@ -10,7 +10,6 @@ DROP TABLE IF EXISTS a; DROP TABLE IF EXISTS b; CREATE TABLE a(); SELECT * FROM a; -SELECT 'a'::regclass::oid AS a_oid \gset SELECT clean_aqo_data(); /* @@ -20,15 +19,15 @@ SELECT clean_aqo_data(); * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat * should remain */ -SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); DROP TABLE a; SELECT clean_aqo_data(); @@ -40,17 +39,17 @@ SELECT clean_aqo_data(); * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat, * should be deleted */ -SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); CREATE TABLE a(); @@ -68,29 +67,29 @@ CREATE TABLE b(); SELECT * FROM a; SELECT * FROM b; SELECT * FROM b CROSS JOIN a; -SELECT 'a'::regclass::oid AS a_oid \gset -SELECT 'b'::regclass::oid AS b_oid \gset +-- SELECT 'a'::regclass::oid AS a_oid \gset +-- SELECT 'b'::regclass::oid AS b_oid \gset -- new lines added to aqo_data -SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); -SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'b' = ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids))); DROP TABLE a; SELECT clean_aqo_data(); @@ -101,48 +100,48 @@ SELECT clean_aqo_data(); * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_texts, * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_stat, */ -SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); -- lines corresponding to b_oid in all theese tables should remain -SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'b' = ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); DROP TABLE b; SELECT clean_aqo_data(); -- lines corresponding to b_oid in theese tables deleted -SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'b' = ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); DROP EXTENSION aqo; \ No newline at end of file diff --git a/storage.c b/storage.c index 38db75b9..4593dfb9 100644 --- a/storage.c +++ b/storage.c @@ -322,60 +322,63 @@ add_query_text(uint64 qhash, const char *query_string) static ArrayType * -form_oids_vector(List *relids) +form_strings_vector(List *relnames) { - Datum *oids; + Datum *rels; ArrayType *array; ListCell *lc; int i = 0; - if (relids == NIL) + if (relnames == NIL) return NULL; - oids = (Datum *) palloc(list_length(relids) * sizeof(Datum)); + rels = (Datum *) palloc(list_length(relnames) * sizeof(Datum)); - foreach(lc, relids) + foreach(lc, relnames) { - Oid relid = lfirst_oid(lc); + char *relname = (lfirst_node(String, lc))->sval; - oids[i++] = ObjectIdGetDatum(relid); + rels[i++] = CStringGetTextDatum(relname); } - Assert(i == list_length(relids)); - array = construct_array(oids, i, OIDOID, sizeof(Oid), true, TYPALIGN_INT); - pfree(oids); + array = construct_array(rels, i, TEXTOID, -1, false, TYPALIGN_INT); + pfree(rels); return array; } static List * -deform_oids_vector(Datum datum) +deform_strings_vector(Datum datum) { ArrayType *array = DatumGetArrayTypePCopy(PG_DETOAST_DATUM(datum)); Datum *values; int i; int nelems = 0; - List *relids = NIL; + List *relnames = NIL; - deconstruct_array(array, - OIDOID, sizeof(Oid), true, TYPALIGN_INT, + deconstruct_array(array, TEXTOID, -1, false, TYPALIGN_INT, &values, NULL, &nelems); for (i = 0; i < nelems; ++i) - relids = lappend_oid(relids, DatumGetObjectId(values[i])); + { + String *s = makeNode(String); + + s->sval = pstrdup(TextDatumGetCString(values[i])); + relnames = lappend(relnames, s); + } pfree(values); pfree(array); - return relids; + return relnames; } bool -load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **relids, bool isSafe) +load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **relnames, bool isSafe) { if (isSafe && (!aqo_learn_statement_timeout || !lc_has_fss(fs, fss))) - return load_fss(fs, fss, data, relids); + return load_fss(fs, fss, data, relnames); else { Assert(aqo_learn_statement_timeout); - return lc_load_fss(fs, fss, data, relids); + return lc_load_fss(fs, fss, data, relnames); } } @@ -394,7 +397,7 @@ load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **relids, bool isSafe) * objects in the given feature space */ bool -load_fss(uint64 fs, int fss, OkNNrdata *data, List **relids) +load_fss(uint64 fs, int fss, OkNNrdata *data, List **relnames) { Relation hrel; Relation irel; @@ -438,8 +441,8 @@ load_fss(uint64 fs, int fss, OkNNrdata *data, List **relids) deform_vector(values[4], data->targets, &(data->rows)); deform_vector(values[6], data->rfactors, &(data->rows)); - if (relids != NULL) - *relids = deform_oids_vector(values[5]); + if (relnames != NULL) + *relnames = deform_strings_vector(values[5]); } else elog(ERROR, "unexpected number of features for hash (" \ @@ -459,13 +462,13 @@ load_fss(uint64 fs, int fss, OkNNrdata *data, List **relids) } bool -update_fss_ext(uint64 fs, int fsshash, OkNNrdata *data, List *relids, +update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *relnames, bool isTimedOut) { if (!isTimedOut) - return update_fss(fs, fsshash, data, relids); + return update_fss(fs, fss, data, relnames); else - return lc_update_fss(fs, fsshash, data, relids); + return lc_update_fss(fs, fss, data, relnames); } /* @@ -481,7 +484,7 @@ update_fss_ext(uint64 fs, int fsshash, OkNNrdata *data, List *relids, * Caller guaranteed that no one AQO process insert or update this data row. */ bool -update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids) +update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames) { Relation hrel; Relation irel; @@ -513,9 +516,8 @@ update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids) tupDesc = RelationGetDescr(hrel); InitDirtySnapshot(snap); scan = index_beginscan(hrel, irel, &snap, 2, 0); - - ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(fhash)); - ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fsshash)); + ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(fs)); + ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fss)); index_rescan(scan, key, 2, NULL, 0); @@ -524,8 +526,8 @@ update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids) if (!find_ok) { - values[0] = Int64GetDatum(fhash); - values[1] = Int32GetDatum(fsshash); + values[0] = Int64GetDatum(fs); + values[1] = Int32GetDatum(fss); values[2] = Int32GetDatum(data->cols); if (data->cols > 0) @@ -536,7 +538,7 @@ update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids) values[4] = PointerGetDatum(form_vector(data->targets, data->rows)); /* Form array of relids. Only once. */ - values[5] = PointerGetDatum(form_oids_vector(relids)); + values[5] = PointerGetDatum(form_strings_vector(relnames)); if ((void *) values[5] == NULL) isnull[5] = true; values[6] = PointerGetDatum(form_vector(data->rfactors, data->rows)); @@ -549,7 +551,7 @@ update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids) */ simple_heap_insert(hrel, tuple); my_index_insert(irel, values, isnull, &(tuple->t_self), - hrel, UNIQUE_CHECK_YES); + hrel, UNIQUE_CHECK_YES); } else if (!TransactionIdIsValid(snap.xmin) && !TransactionIdIsValid(snap.xmax)) { @@ -569,8 +571,7 @@ update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids) &update_indexes)) { if (update_indexes) - my_index_insert(irel, values, isnull, - &(nw_tuple->t_self), + my_index_insert(irel, values, isnull, &(nw_tuple->t_self), hrel, UNIQUE_CHECK_YES); result = true; } @@ -580,9 +581,15 @@ update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids) * Ooops, somebody concurrently updated the tuple. It is possible * only in the case of changes made by third-party code. */ +<<<<<<< HEAD elog(ERROR, "AQO data piece ("UINT64_FORMAT" %d) concurrently" " updated by a stranger backend.", fhash, fsshash); +======= + elog(ERROR, "AQO data piece (%ld %d) concurrently updated" + " by a stranger backend.", + fs, fss); +>>>>>>> ecac693 (Move AQO from a relid based approach to a relation name based approach.) result = false; } } diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index 39eebee2..91bae965 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -211,10 +211,10 @@ # Number of rows in aqo_data: related to pgbench test and total value. my $pgb_fss_count = $node->safe_psql('postgres', " SELECT count(*) FROM aqo_data - WHERE $aoid = ANY(oids) OR - $boid = ANY(oids) OR - $toid = ANY(oids) OR - $hoid = ANY(oids) + WHERE $aoid ::regclass::text = ANY(oids) OR + $boid ::regclass::text = ANY(oids) OR + $toid ::regclass::text = ANY(oids) OR + $hoid ::regclass::text = ANY(oids) "); $fss_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_data;"); @@ -224,10 +224,10 @@ WHERE fspace_hash IN ( SELECT fspace_hash FROM aqo_data WHERE - $aoid = ANY(oids) OR - $boid = ANY(oids) OR - $toid = ANY(oids) OR - $hoid = ANY(oids) + $aoid ::regclass::text = ANY(oids) OR + $boid ::regclass::text = ANY(oids) OR + $toid ::regclass::text = ANY(oids) OR + $hoid ::regclass::text = ANY(oids) ) "); $fs_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_queries;"); @@ -237,7 +237,11 @@ SELECT count(*) FROM aqo_query_texts WHERE query_hash IN ( SELECT fspace_hash FROM aqo_data - WHERE $aoid = ANY(oids) OR $boid = ANY(oids) OR $toid = ANY(oids) OR $hoid = ANY(oids) + WHERE + $aoid ::regclass::text = ANY(oids) OR + $boid ::regclass::text = ANY(oids) OR + $toid ::regclass::text = ANY(oids) OR + $hoid ::regclass::text = ANY(oids) ) "); $fs_samples_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_texts;"); @@ -247,7 +251,11 @@ SELECT count(*) FROM aqo_query_texts WHERE query_hash IN ( SELECT fspace_hash FROM aqo_data - WHERE $aoid = ANY(oids) OR $boid = ANY(oids) OR $toid = ANY(oids) OR $hoid = ANY(oids) + WHERE + $aoid ::regclass::text = ANY(oids) OR + $boid ::regclass::text = ANY(oids) OR + $toid ::regclass::text = ANY(oids) OR + $hoid ::regclass::text = ANY(oids) ) "); $stat_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_stat;"); diff --git a/utils.c b/utils.c index 8fc0d186..3fda40d6 100644 --- a/utils.c +++ b/utils.c @@ -24,6 +24,22 @@ static int (*argsort_value_cmp) (const void *, const void *); static int argsort_cmp(const void *a, const void *b); +/* + * qsort comparator functions + */ + +/* int64 comparator for pg_qsort. */ +int +int64_compare(const void *va, const void *vb) +{ + int64 a = *((const int64 *) va); + int64 b = *((const int64 *) vb); + + if (a == b) + return 0; + return (a > b) ? 1 : -1; +} + /* * Function for qsorting an integer arrays */ From 8ca6d9c751a71baf469ac6f463ab1193792ad33e Mon Sep 17 00:00:00 2001 From: "a.pervushina" Date: Thu, 15 Sep 2022 14:33:51 +0300 Subject: [PATCH 042/134] Bugfix. Detach DSM segment earlier, before cleaning of memory context. Bugfix. Small mistake during calculation of DSM segment size. --- aqo_shared.c | 7 +++---- learn_cache.c | 7 +++++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/aqo_shared.c b/aqo_shared.c index 5d4edb6f..84e6eadb 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -50,7 +50,7 @@ get_dsm_all(uint32 *size) seg = dsm_attach(aqo_state->dsm_handler); Assert(seg); dsm_pin_mapping(seg); - on_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); + before_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); } hdr = (dsm_seg_hdr *) dsm_segment_address(seg); @@ -102,7 +102,7 @@ get_cache_address(void) /* Another process created the segment yet. Just attach to. */ seg = dsm_attach(aqo_state->dsm_handler); dsm_pin_mapping(seg); - on_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); + before_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); } hdr = (dsm_seg_hdr *) dsm_segment_address(seg); @@ -118,7 +118,7 @@ get_cache_address(void) dsm_pin_mapping(seg); dsm_pin_segment(seg); aqo_state->dsm_handler = dsm_segment_handle(seg); - on_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); + before_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); hdr = (dsm_seg_hdr *) dsm_segment_address(seg); hdr->magic = AQO_SHARED_MAGIC; @@ -189,7 +189,6 @@ aqo_init_shmem(void) HASH_ELEM | HASH_BLOBS); LWLockRelease(AddinShmemInitLock); - LWLockRegisterTranche(aqo_state->lock.tranche, "aqo"); } diff --git a/learn_cache.c b/learn_cache.c index f2bbeca5..35cfd57a 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -252,9 +252,11 @@ init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **relnames) *relnames = lappend(*relnames, s); ptr += len; } + return calculate_size(hdr->cols, *relnames); } - return calculate_size(hdr->cols, *relnames); + /* It is just read operation. No any interest in size calculation. */ + return 0; } void @@ -275,10 +277,11 @@ lc_flush_data(void) { dsm_block_hdr *hdr = (dsm_block_hdr *) ptr; OkNNrdata data; - List *relnames; + List *relnames = NIL; uint32 delta = 0; delta = init_with_dsm(&data, hdr, &relnames); + Assert(delta > 0); ptr += delta; size -= delta; update_fss(hdr->key.fs, hdr->key.fss, &data, relnames); From 93a614a576d0d249a62594ba5716121ccb934e8e Mon Sep 17 00:00:00 2001 From: "a.pervushina" Date: Thu, 15 Sep 2022 14:35:18 +0300 Subject: [PATCH 043/134] Add the show_cardinality_errors routine. Add into AQO SQL interface one more function for an quick check of cardinality errors of last execution of each controlled query. --- aqo--1.3--1.4.sql | 29 ++++++++++++++++++++++++ expected/gucs.out | 7 ++++++ expected/unsupported.out | 49 ++++++++++++++++++++++++++++++++++++++++ sql/gucs.sql | 3 +++ sql/unsupported.sql | 16 +++++++++++++ 5 files changed, 104 insertions(+) diff --git a/aqo--1.3--1.4.sql b/aqo--1.3--1.4.sql index 517a6911..16891d34 100755 --- a/aqo--1.3--1.4.sql +++ b/aqo--1.3--1.4.sql @@ -4,3 +4,32 @@ \echo Use "ALTER EXTENSION aqo UPDATE TO '1.4'" to load this file. \quit ALTER TABLE public.aqo_data ADD COLUMN reliability double precision []; + +-- +-- Get IDs of queries having the largest cardinality error when last executed. +-- num - sequental number. Smaller number corresponds to higher error. +-- qhash - ID of a query. +-- error - AQO error calculated over plan nodes of the query. +-- +CREATE OR REPLACE FUNCTION public.show_cardinality_errors() +RETURNS TABLE(num bigint, id bigint, error float) +AS $$ +BEGIN + RETURN QUERY + SELECT + row_number() OVER (ORDER BY (cerror, qhash) DESC) AS nn, + qhash, cerror + FROM ( + SELECT + aq.query_hash AS qhash, + cardinality_error_with_aqo[array_length(cardinality_error_with_aqo, 1)] AS cerror + FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs + ON aq.query_hash = aqs.query_hash + WHERE TRUE = ANY (SELECT unnest(cardinality_error_with_aqo) IS NOT NULL) + ) AS q1 + ORDER BY nn ASC; +END; +$$ LANGUAGE plpgsql; + +COMMENT ON FUNCTION public.show_cardinality_errors() IS +'Get cardinality error of last query execution. Return queries having the largest error.'; diff --git a/expected/gucs.out b/expected/gucs.out index b5089f20..abecf7da 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -32,4 +32,11 @@ EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) JOINS: 0 (6 rows) +-- Check existence of the interface functions. +SELECT obj_description('public.show_cardinality_errors'::regproc::oid); + obj_description +----------------------------------------------------------------------------------------- + Get cardinality error of last query execution. Return queries having the largest error. +(1 row) + DROP EXTENSION aqo; diff --git a/expected/unsupported.out b/expected/unsupported.out index e34914ed..f766d18b 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -554,5 +554,54 @@ EXPLAIN (COSTS OFF) JOINS: 0 (9 rows) +-- XXX: Do we stuck into an unstable behavior of an error value? +-- Live with this variant of the test for some time. +SELECT + num, + to_char(error, '9.99EEEE')::text AS error +FROM public.show_cardinality_errors() +WHERE error > 0.; + num | error +-----+----------- + 1 | 9.69e+02 + 2 | 1.15e+02 + 3 | 3.00e+01 + 4 | 3.00e+01 + 5 | 3.00e+01 + 6 | 1.33e+00 +(6 rows) + DROP TABLE t,t1 CASCADE; +SELECT public.clean_aqo_data(); +NOTICE: Cleaning aqo_data records + clean_aqo_data +---------------- + +(1 row) + +-- TODO: figure out with remaining queries in the ML storage. +SELECT num, to_char(error, '9.99EEEE')::text AS error, query_text +FROM public.show_cardinality_errors() cef, aqo_query_texts aqt +WHERE aqt.query_hash = cef.id; + num | error | query_text +-----+-----------+------------------------------------------------------------------------------------------- + 1 | 9.69e+02 | SELECT str FROM expln(' + + | | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | | SELECT count(*) FROM + + | | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + + | | JOIN + + | | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2+ + | | ON q1.x = q2.x+1; + + | | ') AS str WHERE str NOT LIKE '%Memory Usage%'; + 2 | 3.27e+02 | SELECT + + | | num, + + | | to_char(error, '9.99EEEE')::text AS error + + | | FROM public.show_cardinality_errors() + + | | WHERE error > 0.; + 3 | 0.00e+00 | CREATE TABLE t AS SELECT (gs.* / 50) AS x FROM generate_series(1,1000) AS gs; + 4 | 0.00e+00 | SELECT public.clean_aqo_data(); + 5 | 0.00e+00 | CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y + + | | FROM generate_series(1,1000) AS gs; +(5 rows) + DROP EXTENSION aqo; diff --git a/sql/gucs.sql b/sql/gucs.sql index 0edf9ef7..c51c3699 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -16,4 +16,7 @@ EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT x FROM t; +-- Check existence of the interface functions. +SELECT obj_description('public.show_cardinality_errors'::regproc::oid); + DROP EXTENSION aqo; diff --git a/sql/unsupported.sql b/sql/unsupported.sql index accca094..7a21df91 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -172,5 +172,21 @@ ANALYZE t; EXPLAIN (COSTS OFF) SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; +-- XXX: Do we stuck into an unstable behavior of an error value? +-- Live with this variant of the test for some time. +SELECT + num, + to_char(error, '9.99EEEE')::text AS error +FROM public.show_cardinality_errors() +WHERE error > 0.; + DROP TABLE t,t1 CASCADE; + +SELECT public.clean_aqo_data(); + +-- TODO: figure out with remaining queries in the ML storage. +SELECT num, to_char(error, '9.99EEEE')::text AS error, query_text +FROM public.show_cardinality_errors() cef, aqo_query_texts aqt +WHERE aqt.query_hash = cef.id; + DROP EXTENSION aqo; From c387db71ad36f6dc455253958be93676a1606cda Mon Sep 17 00:00:00 2001 From: "a.pervushina" Date: Thu, 15 Sep 2022 14:36:23 +0300 Subject: [PATCH 044/134] Bugfixes: 1. Increase stability of the pgbench test. 2. Open subsidiary AQO relations more carefully. --- storage.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/storage.c b/storage.c index 4593dfb9..5b5c33ca 100644 --- a/storage.c +++ b/storage.c @@ -581,15 +581,9 @@ update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames) * Ooops, somebody concurrently updated the tuple. It is possible * only in the case of changes made by third-party code. */ -<<<<<<< HEAD elog(ERROR, "AQO data piece ("UINT64_FORMAT" %d) concurrently" " updated by a stranger backend.", fhash, fsshash); -======= - elog(ERROR, "AQO data piece (%ld %d) concurrently updated" - " by a stranger backend.", - fs, fss); ->>>>>>> ecac693 (Move AQO from a relid based approach to a relation name based approach.) result = false; } } From 9c1bede2f166c4b16dd1c17c5dcca50ce977507c Mon Sep 17 00:00:00 2001 From: "a.pervushina" Date: Thu, 15 Sep 2022 14:37:26 +0300 Subject: [PATCH 045/134] Reconcile backpatched (PG 15 -> 13) features with the code of PG13. --- aqo.c | 2 +- cardinality_estimation.c | 4 ++-- cardinality_hooks.c | 10 ++-------- expected/gucs.out | 2 +- expected/unsupported.out | 7 ++++--- hash.c | 6 +++--- learn_cache.c | 8 +++----- path_utils.c | 11 +++-------- sql/unsupported.sql | 3 ++- storage.c | 13 ++++++++----- 10 files changed, 29 insertions(+), 37 deletions(-) diff --git a/aqo.c b/aqo.c index a0b2dccf..3e0210e8 100644 --- a/aqo.c +++ b/aqo.c @@ -247,7 +247,7 @@ _PG_init(void) RegisterResourceReleaseCallback(aqo_free_callback, NULL); RegisterAQOPlanNodeMethods(); - MarkGUCPrefixReserved("aqo"); + EmitWarningsOnPlaceholders("aqo"); RequestAddinShmemSpace(aqo_memsize()); } diff --git a/cardinality_estimation.c b/cardinality_estimation.c index f5202f22..7740528a 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -45,8 +45,8 @@ predict_debug_output(List *clauses, List *selectivities, appendStringInfoString(&debug_str, "}, relnames: { "); foreach(lc, relnames) { - String *relname = lfirst_node(String, lc); - appendStringInfo(&debug_str, "%s ", relname->sval); + Value *relname = lfirst_node(String, lc); + appendStringInfo(&debug_str, "%s ", valStr(relname)); } appendStringInfo(&debug_str, "}, result: %lf", result); diff --git a/cardinality_hooks.c b/cardinality_hooks.c index 44240c5d..aa198cc9 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -164,12 +164,9 @@ aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) rte = planner_rt_fetch(rel->relid, root); if (rte && OidIsValid(rte->relid)) { - String *s = makeNode(String); - /* Predict for a plane table. */ Assert(rte->eref && rte->eref->aliasname); - s->sval = pstrdup(rte->eref->aliasname); - relnames = list_make1(s); + relnames = list_make1(makeString(pstrdup(rte->eref->aliasname))); } clauses = aqo_get_clauses(root, rel->baserestrictinfo); @@ -276,12 +273,9 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, if (rte && OidIsValid(rte->relid)) { - String *s = makeNode(String); - /* Predict for a plane table. */ Assert(rte->eref && rte->eref->aliasname); - s->sval = pstrdup(rte->eref->aliasname); - relnames = list_make1(s); + relnames = list_make1(makeString(pstrdup(rte->eref->aliasname))); } predicted = predict_for_relation(allclauses, selectivities, relnames, &fss); diff --git a/expected/gucs.out b/expected/gucs.out index abecf7da..a9df2926 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -34,7 +34,7 @@ EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) -- Check existence of the interface functions. SELECT obj_description('public.show_cardinality_errors'::regproc::oid); - obj_description + obj_description ----------------------------------------------------------------------------------------- Get cardinality error of last query execution. Return queries having the largest error. (1 row) diff --git a/expected/unsupported.out b/expected/unsupported.out index f766d18b..ad61852e 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -582,7 +582,8 @@ NOTICE: Cleaning aqo_data records -- TODO: figure out with remaining queries in the ML storage. SELECT num, to_char(error, '9.99EEEE')::text AS error, query_text FROM public.show_cardinality_errors() cef, aqo_query_texts aqt -WHERE aqt.query_hash = cef.id; +WHERE aqt.query_hash = cef.id +ORDER BY (error, md5(query_text)) DESC; num | error | query_text -----+-----------+------------------------------------------------------------------------------------------- 1 | 9.69e+02 | SELECT str FROM expln(' + @@ -598,9 +599,9 @@ WHERE aqt.query_hash = cef.id; | | to_char(error, '9.99EEEE')::text AS error + | | FROM public.show_cardinality_errors() + | | WHERE error > 0.; - 3 | 0.00e+00 | CREATE TABLE t AS SELECT (gs.* / 50) AS x FROM generate_series(1,1000) AS gs; + 5 | 0.00e+00 | CREATE TABLE t AS SELECT (gs.* / 50) AS x FROM generate_series(1,1000) AS gs; 4 | 0.00e+00 | SELECT public.clean_aqo_data(); - 5 | 0.00e+00 | CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y + + 3 | 0.00e+00 | CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y + | | FROM generate_series(1,1000) AS gs; (5 rows) diff --git a/hash.c b/hash.c index d8083fce..1f8f8112 100644 --- a/hash.c +++ b/hash.c @@ -449,11 +449,11 @@ get_relations_hash(List *relnames) /* generate array of hashes. */ foreach(lc, relnames) { - String *relname = lfirst_node(String, lc); + Value *relname = (Value *) lfirst(lc); hashes[i++] = DatumGetInt64(hash_any_extended( - (unsigned char *) relname->sval, - strlen(relname->sval), 0)); + (unsigned char *) strVal(relname), + strlen(strVal(relname)), 0)); } /* Sort the array to make query insensitive to input order of relations. */ diff --git a/learn_cache.c b/learn_cache.c index 35cfd57a..316968b0 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -56,7 +56,7 @@ calculate_size(int cols, List *relnames) /* Calculate memory size needed to store relation names */ foreach(lc, relnames) { - size += strlen(lfirst_node(String, lc)->sval) + 1; + size += strlen(strVal(lfirst(lc))) + 1; } return size; @@ -134,7 +134,7 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames) /* store strings of relation names. Each string ends with 0-byte */ foreach(lc, relnames) { - char *relname = lfirst_node(String, lc)->sval; + char *relname = strVal(lfirst(lc)); int len = strlen(relname) + 1; memcpy(ptr, relname, len); @@ -245,11 +245,9 @@ init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **relnames) *relnames = NIL; for (i = 0; i < hdr->nrelids; i++) { - String *s = makeNode(String); int len = strlen(ptr) + 1; - s->sval = pstrdup(ptr); - *relnames = lappend(*relnames, s); + *relnames = lappend(*relnames, makeString(pstrdup(ptr))); ptr += len; } return calculate_size(hdr->cols, *relnames); diff --git a/path_utils.c b/path_utils.c index 860cc832..d6463bfb 100644 --- a/path_utils.c +++ b/path_utils.c @@ -148,12 +148,7 @@ get_relnames(PlannerInfo *root, Relids relids) { rte = planner_rt_fetch(i, root); if (OidIsValid(rte->relid)) - { - String *s = makeNode(String); - - s->sval = pstrdup(rte->eref->aliasname); - l = lappend(l, s); - } + l = lappend(l, makeString(pstrdup(rte->eref->aliasname))); } return l; } @@ -648,9 +643,9 @@ aqo_store_upper_signature_hook(PlannerInfo *root, set_cheapest(input_rel); clauses = get_path_clauses(input_rel->cheapest_total_path, root, &selectivities); - relids = get_list_of_relids(root, input_rel->relids); + relnames = get_relnames(root, input_rel->relids); fss_node->val.type = T_Integer; fss_node->location = -1; - fss_node->val.val.ival = get_fss_for_object(relids, clauses, NIL, NULL, NULL); + fss_node->val.val.ival = get_fss_for_object(relnames, clauses, NIL, NULL, NULL); output_rel->ext_nodes = lappend(output_rel->ext_nodes, (void *) fss_node); } diff --git a/sql/unsupported.sql b/sql/unsupported.sql index 7a21df91..19e00767 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -187,6 +187,7 @@ SELECT public.clean_aqo_data(); -- TODO: figure out with remaining queries in the ML storage. SELECT num, to_char(error, '9.99EEEE')::text AS error, query_text FROM public.show_cardinality_errors() cef, aqo_query_texts aqt -WHERE aqt.query_hash = cef.id; +WHERE aqt.query_hash = cef.id +ORDER BY (error, md5(query_text)) DESC; DROP EXTENSION aqo; diff --git a/storage.c b/storage.c index 5b5c33ca..4fcb8ece 100644 --- a/storage.c +++ b/storage.c @@ -17,6 +17,9 @@ #include "postgres.h" +#include "nodes/value.h" +#include "postgres.h" + #include "access/heapam.h" #include "access/table.h" #include "access/tableam.h" @@ -336,7 +339,7 @@ form_strings_vector(List *relnames) foreach(lc, relnames) { - char *relname = (lfirst_node(String, lc))->sval; + char *relname = strVal(lfirst(lc)); rels[i++] = CStringGetTextDatum(relname); } @@ -359,9 +362,9 @@ deform_strings_vector(Datum datum) &values, NULL, &nelems); for (i = 0; i < nelems; ++i) { - String *s = makeNode(String); + Value *s; - s->sval = pstrdup(TextDatumGetCString(values[i])); + s = makeString(pstrdup(TextDatumGetCString(values[i]))); relnames = lappend(relnames, s); } @@ -448,7 +451,7 @@ load_fss(uint64 fs, int fss, OkNNrdata *data, List **relnames) elog(ERROR, "unexpected number of features for hash (" \ UINT64_FORMAT", %d):\ expected %d features, obtained %d", - fs, fss, ncols, DatumGetInt32(values[2])); + fs, fss, data->cols, DatumGetInt32(values[2])); } else success = false; @@ -583,7 +586,7 @@ update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames) */ elog(ERROR, "AQO data piece ("UINT64_FORMAT" %d) concurrently" " updated by a stranger backend.", - fhash, fsshash); + fs, fss); result = false; } } From 1c3c08852eab1063b55168ad1a45cab264ef31c4 Mon Sep 17 00:00:00 2001 From: "a.pervushina" Date: Thu, 15 Sep 2022 15:31:11 +0300 Subject: [PATCH 046/134] Fix some problems found during underwent of the AQO by Join-Order-Benchmark: 1. Minor code improvements 2. Introduce the show_cardinality_errors(bool) routine that can show cardinality errors detected by the AQO that made during last execution under or without AQO control. 3. Ignore queries that don't touch any database relations. --- aqo--1.3--1.4.sql | 67 +++++++++++++++++++++++---------- expected/gucs.out | 6 +-- expected/top_queries.out | 56 +++++++++++++++++---------- expected/unsupported.out | 81 ++++++++++++++++++++++------------------ preprocessing.c | 14 ++++++- sql/top_queries.sql | 29 +++++++++----- sql/unsupported.sql | 12 +++--- t/001_pgbench.pl | 14 +++---- 8 files changed, 176 insertions(+), 103 deletions(-) diff --git a/aqo--1.3--1.4.sql b/aqo--1.3--1.4.sql index 16891d34..f6df0263 100755 --- a/aqo--1.3--1.4.sql +++ b/aqo--1.3--1.4.sql @@ -5,31 +5,60 @@ ALTER TABLE public.aqo_data ADD COLUMN reliability double precision []; +DROP FUNCTION public.top_error_queries(int); + +-- +-- Get cardinality error of queries the last time they were executed. +-- IN: +-- controlled - show queries executed under a control of AQO (true); +-- executed without an AQO control, but AQO has a stat on the query (false). -- --- Get IDs of queries having the largest cardinality error when last executed. +-- OUT: -- num - sequental number. Smaller number corresponds to higher error. --- qhash - ID of a query. --- error - AQO error calculated over plan nodes of the query. +-- id - ID of a query. +-- fshash - feature space. Usually equal to zero or ID. +-- error - AQO error that calculated on plan nodes of the query. +-- nexecs - number of executions of queries associated with this ID. -- -CREATE OR REPLACE FUNCTION public.show_cardinality_errors() -RETURNS TABLE(num bigint, id bigint, error float) +CREATE OR REPLACE FUNCTION public.show_cardinality_errors(controlled boolean) +RETURNS TABLE(num bigint, id bigint, fshash bigint, error float, nexecs bigint) AS $$ BEGIN +IF (controlled) THEN + RETURN QUERY + SELECT + row_number() OVER (ORDER BY (cerror, query_id, fs_hash) DESC) AS nn, + query_id, fs_hash, cerror, execs + FROM ( + SELECT + aq.query_hash AS query_id, + aq.fspace_hash AS fs_hash, + cardinality_error_with_aqo[array_length(cardinality_error_with_aqo, 1)] AS cerror, + executions_with_aqo AS execs + FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs + ON aq.query_hash = aqs.query_hash + WHERE TRUE = ANY (SELECT unnest(cardinality_error_with_aqo) IS NOT NULL) + ) AS q1 + ORDER BY nn ASC; +ELSE RETURN QUERY - SELECT - row_number() OVER (ORDER BY (cerror, qhash) DESC) AS nn, - qhash, cerror - FROM ( - SELECT - aq.query_hash AS qhash, - cardinality_error_with_aqo[array_length(cardinality_error_with_aqo, 1)] AS cerror - FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs - ON aq.query_hash = aqs.query_hash - WHERE TRUE = ANY (SELECT unnest(cardinality_error_with_aqo) IS NOT NULL) - ) AS q1 - ORDER BY nn ASC; + SELECT + row_number() OVER (ORDER BY (cerror, query_id, fs_hash) DESC) AS nn, + query_id, fs_hash, cerror, execs + FROM ( + SELECT + aq.query_hash AS query_id, + aq.fspace_hash AS fs_hash, + array_avg(cardinality_error_without_aqo) AS cerror, + executions_without_aqo AS execs + FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs + ON aq.query_hash = aqs.query_hash + WHERE TRUE = ANY (SELECT unnest(cardinality_error_without_aqo) IS NOT NULL) + ) AS q1 + ORDER BY (nn) ASC; +END IF; END; $$ LANGUAGE plpgsql; -COMMENT ON FUNCTION public.show_cardinality_errors() IS -'Get cardinality error of last query execution. Return queries having the largest error.'; +COMMENT ON FUNCTION public.show_cardinality_errors(boolean) IS +'Get cardinality error of queries the last time they were executed. Order queries according to an error value.'; diff --git a/expected/gucs.out b/expected/gucs.out index a9df2926..fe46e53c 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -34,9 +34,9 @@ EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) -- Check existence of the interface functions. SELECT obj_description('public.show_cardinality_errors'::regproc::oid); - obj_description ------------------------------------------------------------------------------------------ - Get cardinality error of last query execution. Return queries having the largest error. + obj_description +--------------------------------------------------------------------------------------------------------------- + Get cardinality error of queries the last time they were executed. Order queries according to an error value. (1 row) DROP EXTENSION aqo; diff --git a/expected/top_queries.out b/expected/top_queries.out index 057e1bd4..77a7e280 100644 --- a/expected/top_queries.out +++ b/expected/top_queries.out @@ -6,23 +6,31 @@ CREATE EXTENSION aqo; SET aqo.mode = 'disabled'; SET aqo.force_collect_stat = 'on'; -- --- num of generate_series(1,1000000) query should be the first +-- Dummy test. CREATE TABLE shouldn't find in the ML storage. But a simple +-- select must be in. Also here we test on gathering a stat on temp and plain +-- relations. -- -SELECT count(*) FROM generate_series(1,1000000); - count ---------- - 1000000 +CREATE TEMP TABLE ttt AS SELECT count(*) AS cnt FROM generate_series(1,10); +CREATE TABLE ttp AS SELECT count(*) AS cnt FROM generate_series(1,10); +SELECT count(*) AS cnt FROM ttt WHERE cnt % 100 = 0; + cnt +----- + 0 +(1 row) + +SELECT count(*) AS cnt FROM ttp WHERE cnt % 100 = 0; + cnt +----- + 0 (1 row) -SELECT num FROM top_time_queries(10) AS tt WHERE - tt.fspace_hash = (SELECT fspace_hash FROM aqo_queries WHERE - aqo_queries.query_hash = (SELECT aqo_query_texts.query_hash FROM aqo_query_texts - WHERE query_text = 'SELECT count(*) FROM generate_series(1,1000000);')); -NOTICE: Top 10 execution time queries +SELECT num FROM top_time_queries(3); +NOTICE: Top 3 execution time queries num ----- 1 -(1 row) + 2 +(2 rows) -- -- num of query uses table t2 should be bigger than num of query uses table t1 and be the first @@ -43,13 +51,23 @@ SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y 31 (1 row) -SELECT num FROM top_error_queries(10) AS te WHERE - te.fspace_hash = (SELECT fspace_hash FROM aqo_queries WHERE - aqo_queries.query_hash = (SELECT aqo_query_texts.query_hash FROM aqo_query_texts - WHERE query_text = 'SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1;')); -NOTICE: Top 10 cardinality error queries - num ------ - 1 +SELECT num, to_char(error, '9.99EEEE') FROM show_cardinality_errors(false) AS te +WHERE te.fshash = ( + SELECT fspace_hash FROM aqo_queries + WHERE aqo_queries.query_hash = ( + SELECT aqo_query_texts.query_hash FROM aqo_query_texts + WHERE query_text = 'SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1;' + ) +); + num | to_char +-----+----------- + 1 | 1.94e+00 +(1 row) + +-- Should return zero +SELECT count(*) FROM show_cardinality_errors(true); + count +------- + 0 (1 row) diff --git a/expected/unsupported.out b/expected/unsupported.out index ad61852e..c26c1d72 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -557,52 +557,59 @@ EXPLAIN (COSTS OFF) -- XXX: Do we stuck into an unstable behavior of an error value? -- Live with this variant of the test for some time. SELECT - num, - to_char(error, '9.99EEEE')::text AS error -FROM public.show_cardinality_errors() -WHERE error > 0.; - num | error ------+----------- - 1 | 9.69e+02 - 2 | 1.15e+02 - 3 | 3.00e+01 - 4 | 3.00e+01 - 5 | 3.00e+01 - 6 | 1.33e+00 -(6 rows) + num, to_char(error, '9.99EEEE')::text AS error, query_text +FROM public.show_cardinality_errors(true) cef, aqo_query_texts aqt +WHERE aqt.query_hash = cef.id +ORDER BY (error, md5(query_text)) DESC; + num | error | query_text +-----+-----------+------------------------------------------------------------------------------------------------ + 1 | 1.15e+02 | SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + 3 | 3.00e+01 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; + 4 | 3.00e+01 | SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; + 2 | 3.00e+01 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,x*y)) AS q1; + 5 | 1.33e+00 | SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; + 11 | 0.00e+00 | SELECT * FROM + + | | (SELECT * FROM t WHERE x < 0) AS t0 + + | | JOIN + + | | (SELECT * FROM t WHERE x > 20) AS t1 + + | | USING(x); + 10 | 0.00e+00 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + 12 | 0.00e+00 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | | SELECT count(*) FROM t WHERE + + | | x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR + + | | x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); + 8 | 0.00e+00 | SELECT count(*) FROM ( + + | | SELECT count(*) AS x FROM ( + + | | SELECT count(*) FROM t1 GROUP BY (x,y) + + | | ) AS q1 + + | | ) AS q2 + + | | WHERE q2.x > 1; + 9 | 0.00e+00 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t WHERE x = 1); + 6 | 0.00e+00 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | | SELECT count(*) FROM t WHERE (SELECT avg(x) FROM t t0 WHERE t0.x = t.x) = + + | | (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + 7 | 0.00e+00 | SELECT count(*) FROM + + | | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + + | | JOIN + + | | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2 + + | | ON q1.x = q2.x+1; +(12 rows) DROP TABLE t,t1 CASCADE; SELECT public.clean_aqo_data(); NOTICE: Cleaning aqo_data records - clean_aqo_data + clean_aqo_data ---------------- - + (1 row) --- TODO: figure out with remaining queries in the ML storage. +-- Look for any remaining queries in the ML storage. SELECT num, to_char(error, '9.99EEEE')::text AS error, query_text -FROM public.show_cardinality_errors() cef, aqo_query_texts aqt +FROM public.show_cardinality_errors(true) cef, aqo_query_texts aqt WHERE aqt.query_hash = cef.id ORDER BY (error, md5(query_text)) DESC; - num | error | query_text ------+-----------+------------------------------------------------------------------------------------------- - 1 | 9.69e+02 | SELECT str FROM expln(' + - | | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + - | | SELECT count(*) FROM + - | | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + - | | JOIN + - | | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2+ - | | ON q1.x = q2.x+1; + - | | ') AS str WHERE str NOT LIKE '%Memory Usage%'; - 2 | 3.27e+02 | SELECT + - | | num, + - | | to_char(error, '9.99EEEE')::text AS error + - | | FROM public.show_cardinality_errors() + - | | WHERE error > 0.; - 5 | 0.00e+00 | CREATE TABLE t AS SELECT (gs.* / 50) AS x FROM generate_series(1,1000) AS gs; - 4 | 0.00e+00 | SELECT public.clean_aqo_data(); - 3 | 0.00e+00 | CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y + - | | FROM generate_series(1,1000) AS gs; -(5 rows) + num | error | query_text +-----+-------+------------ +(0 rows) DROP EXTENSION aqo; diff --git a/preprocessing.c b/preprocessing.c index ae992041..af10ae7f 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -407,12 +407,19 @@ disable_aqo_for_query(void) /* * Examine a fully-parsed query, and return TRUE iff any relation underlying - * the query is a system relation. + * the query is a system relation or no one relation touched by the query. */ static bool isQueryUsingSystemRelation(Query *query) { - return isQueryUsingSystemRelation_walker((Node *) query, NULL); + bool trivQuery = true; + bool result; + + result = isQueryUsingSystemRelation_walker((Node *) query, &trivQuery); + + if (result || trivQuery) + return true; + return false; } @@ -451,10 +458,13 @@ isQueryUsingSystemRelation_walker(Node *node, void *context) Relation rel = table_open(rte->relid, AccessShareLock); bool is_catalog = IsCatalogRelation(rel); bool is_aqo_rel = IsAQORelation(rel); + bool *trivQuery = (bool *) context; table_close(rel, AccessShareLock); if (is_catalog || is_aqo_rel) return true; + + *trivQuery = false; } else if (rte->rtekind == RTE_FUNCTION) { diff --git a/sql/top_queries.sql b/sql/top_queries.sql index eb397db8..520f3ce3 100755 --- a/sql/top_queries.sql +++ b/sql/top_queries.sql @@ -8,13 +8,15 @@ SET aqo.mode = 'disabled'; SET aqo.force_collect_stat = 'on'; -- --- num of generate_series(1,1000000) query should be the first +-- Dummy test. CREATE TABLE shouldn't find in the ML storage. But a simple +-- select must be in. Also here we test on gathering a stat on temp and plain +-- relations. -- -SELECT count(*) FROM generate_series(1,1000000); -SELECT num FROM top_time_queries(10) AS tt WHERE - tt.fspace_hash = (SELECT fspace_hash FROM aqo_queries WHERE - aqo_queries.query_hash = (SELECT aqo_query_texts.query_hash FROM aqo_query_texts - WHERE query_text = 'SELECT count(*) FROM generate_series(1,1000000);')); +CREATE TEMP TABLE ttt AS SELECT count(*) AS cnt FROM generate_series(1,10); +CREATE TABLE ttp AS SELECT count(*) AS cnt FROM generate_series(1,10); +SELECT count(*) AS cnt FROM ttt WHERE cnt % 100 = 0; +SELECT count(*) AS cnt FROM ttp WHERE cnt % 100 = 0; +SELECT num FROM top_time_queries(3); -- -- num of query uses table t2 should be bigger than num of query uses table t1 and be the first @@ -26,7 +28,14 @@ CREATE TABLE t2 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; -SELECT num FROM top_error_queries(10) AS te WHERE - te.fspace_hash = (SELECT fspace_hash FROM aqo_queries WHERE - aqo_queries.query_hash = (SELECT aqo_query_texts.query_hash FROM aqo_query_texts - WHERE query_text = 'SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1;')); \ No newline at end of file +SELECT num, to_char(error, '9.99EEEE') FROM show_cardinality_errors(false) AS te +WHERE te.fshash = ( + SELECT fspace_hash FROM aqo_queries + WHERE aqo_queries.query_hash = ( + SELECT aqo_query_texts.query_hash FROM aqo_query_texts + WHERE query_text = 'SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1;' + ) +); + +-- Should return zero +SELECT count(*) FROM show_cardinality_errors(true); diff --git a/sql/unsupported.sql b/sql/unsupported.sql index 19e00767..335d8ad2 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -175,18 +175,18 @@ EXPLAIN (COSTS OFF) -- XXX: Do we stuck into an unstable behavior of an error value? -- Live with this variant of the test for some time. SELECT - num, - to_char(error, '9.99EEEE')::text AS error -FROM public.show_cardinality_errors() -WHERE error > 0.; + num, to_char(error, '9.99EEEE')::text AS error, query_text +FROM public.show_cardinality_errors(true) cef, aqo_query_texts aqt +WHERE aqt.query_hash = cef.id +ORDER BY (error, md5(query_text)) DESC; DROP TABLE t,t1 CASCADE; SELECT public.clean_aqo_data(); --- TODO: figure out with remaining queries in the ML storage. +-- Look for any remaining queries in the ML storage. SELECT num, to_char(error, '9.99EEEE')::text AS error, query_text -FROM public.show_cardinality_errors() cef, aqo_query_texts aqt +FROM public.show_cardinality_errors(true) cef, aqo_query_texts aqt WHERE aqt.query_hash = cef.id ORDER BY (error, md5(query_text)) DESC; diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index 91bae965..b4445d12 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -134,24 +134,24 @@ 'analytical queries in pgbench (disabled mode)'); $res = $node->safe_psql('postgres', - "SELECT count(*) FROM top_error_queries(10) v - JOIN aqo_query_texts t ON (t.query_hash = v.fspace_hash) + "SELECT count(*) FROM show_cardinality_errors(false) v + JOIN aqo_query_texts t ON (t.query_hash = v.id) WHERE v.error > 0. AND t.query_text LIKE '%pgbench_accounts%'"); is($res, 3); $res = $node->safe_psql('postgres', - "SELECT * FROM top_error_queries(10) v - JOIN aqo_query_texts t ON (t.query_hash = v.fspace_hash) + "SELECT * FROM show_cardinality_errors(false) v + JOIN aqo_query_texts t ON (t.query_hash = v.id) WHERE v.error > 0. AND t.query_text LIKE '%pgbench_accounts%'"); note("\n TopN: \n $res \n"); $res = $node->safe_psql('postgres', - "SELECT v.error, t.query_text FROM top_error_queries(10) v - JOIN aqo_query_texts t ON (t.query_hash = v.fspace_hash) + "SELECT v.error, t.query_text FROM show_cardinality_errors(false) v + JOIN aqo_query_texts t ON (t.query_hash = v.id) WHERE v.error > 0."); note("\n Queries: \n $res \n"); $res = $node->safe_psql('postgres', "SELECT count(*) FROM top_time_queries(10) v WHERE v.execution_time > 0."); -is($res, 5); +is($res, 3); # ############################################################################## # From 031c0c8e12bef28542b927c4bcaf5f615eacdc4c Mon Sep 17 00:00:00 2001 From: Alexandra Pervushina Date: Fri, 30 Sep 2022 13:41:49 +0000 Subject: [PATCH 047/134] Port remaining functions from stable13 Currently has problems with aqo_cleanup() function --- .github/workflows/c-cpp.yml | 10 +- Makefile | 17 +- README.md | 8 +- aqo--1.4--1.5.sql | 160 ++ aqo.c | 216 +- aqo.control | 4 +- aqo.h | 70 +- aqo_pg14.patch | 179 +- aqo_shared.c | 84 +- aqo_shared.h | 20 + auto_tuning.c | 60 +- cardinality_estimation.c | 63 +- cardinality_hooks.c | 121 +- cardinality_hooks.h | 3 +- conf.add | 2 +- expected/aqo_CVE-2020-14350.out | 5 +- expected/aqo_controlled.out | 34 +- expected/aqo_disabled.out | 100 +- expected/aqo_fdw.out | 34 +- expected/aqo_forced.out | 12 +- expected/aqo_intelligent.out | 32 +- expected/aqo_learn.out | 403 +++- expected/clean_aqo_data.out | 199 +- expected/feature_subspace.out | 84 + expected/forced_stat_collection.out | 29 +- expected/gucs.out | 93 +- expected/look_a_like.out | 238 +++ expected/plancache.out | 9 +- expected/relocatable.out | 122 ++ expected/schema.out | 21 +- expected/statement_timeout.out | 15 +- expected/temp_tables.out | 195 ++ expected/top_queries.out | 71 +- expected/unsupported.out | 123 +- hash.c | 107 +- hash.h | 3 +- learn_cache.c | 72 +- learn_cache.h | 4 +- machine_learning.c | 23 + machine_learning.h | 3 + path_utils.c | 158 +- path_utils.h | 27 +- postprocessing.c | 243 +-- preprocessing.c | 249 ++- selectivity_cache.c | 21 +- sql/aqo_CVE-2020-14350.sql | 6 +- sql/aqo_controlled.sql | 21 +- sql/aqo_disabled.sql | 39 +- sql/aqo_fdw.sql | 14 +- sql/aqo_forced.sql | 9 +- sql/aqo_intelligent.sql | 19 +- sql/aqo_learn.sql | 184 +- sql/clean_aqo_data.sql | 157 +- sql/feature_subspace.sql | 45 + sql/forced_stat_collection.sql | 15 +- sql/gucs.sql | 27 +- sql/look_a_like.sql | 84 + sql/plancache.sql | 10 +- sql/relocatable.sql | 54 + sql/schema.sql | 12 +- sql/statement_timeout.sql | 6 +- sql/temp_tables.sql | 97 + sql/top_queries.sql | 44 +- sql/unsupported.sql | 32 +- storage.c | 2992 ++++++++++++++++++++------- storage.h | 122 ++ t/001_pgbench.pl | 164 +- t/002_pg_stat_statements_aqo.pl | 13 +- utils.c | 43 - 69 files changed, 5762 insertions(+), 2193 deletions(-) create mode 100644 aqo--1.4--1.5.sql create mode 100644 expected/feature_subspace.out create mode 100644 expected/look_a_like.out create mode 100644 expected/relocatable.out create mode 100644 expected/temp_tables.out create mode 100644 sql/feature_subspace.sql create mode 100644 sql/look_a_like.sql create mode 100644 sql/relocatable.sql create mode 100644 sql/temp_tables.sql create mode 100644 storage.h diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 0e70fb3c..3c987855 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -1,10 +1,10 @@ -name: C/C++ CI for the stable14 branch. +name: 'C/C++ CI for the stable13' on: push: - branches: [ stable14 ] + branches: [ stable13 ] pull_request: - branches: [ stable14 ] + branches: [ stable13 ] jobs: build: @@ -19,9 +19,9 @@ jobs: git config --global user.name "CI PgPro admin" git clone https://github.com/postgres/postgres.git pg cd pg - git checkout REL_14_STABLE + git checkout REL_13_STABLE ./configure --prefix=`pwd`/tmp_install git subtree add --prefix=contrib/aqo https://github.com/postgrespro/aqo.git $GITHUB_REF - patch -p1 --no-backup-if-mismatch < contrib/aqo/aqo_pg14.patch + patch -p1 --no-backup-if-mismatch < contrib/aqo/aqo_pg13.patch make -j4 > /dev/null && make -j4 -C contrib > /dev/null env CLIENTS=50 THREADS=50 make -C contrib/aqo check diff --git a/Makefile b/Makefile index 0a03ac48..1ef23b54 100755 --- a/Makefile +++ b/Makefile @@ -1,12 +1,13 @@ # contrib/aqo/Makefile EXTENSION = aqo -EXTVERSION = 1.4 +EXTVERSION = 1.5 PGFILEDESC = "AQO - Adaptive Query Optimization" MODULE_big = aqo -OBJS = aqo.o auto_tuning.o cardinality_estimation.o cardinality_hooks.o \ -hash.o machine_learning.o path_utils.o postprocessing.o preprocessing.o \ -selectivity_cache.o storage.o utils.o learn_cache.o aqo_shared.o $(WIN32RES) +OBJS = $(WIN32RES) \ + aqo.o auto_tuning.o cardinality_estimation.o cardinality_hooks.o \ + hash.o machine_learning.o path_utils.o postprocessing.o preprocessing.o \ + selectivity_cache.o storage.o utils.o learn_cache.o aqo_shared.o TAP_TESTS = 1 @@ -24,7 +25,11 @@ REGRESS = aqo_disabled \ clean_aqo_data \ plancache \ statement_timeout \ - top_queries + temp_tables \ + top_queries \ + relocatable\ + look_a_like \ + feature_subspace fdw_srcdir = $(top_srcdir)/contrib/postgres_fdw stat_srcdir = $(top_srcdir)/contrib/pg_stat_statements @@ -33,7 +38,7 @@ EXTRA_REGRESS_OPTS=--temp-config=$(top_srcdir)/$(subdir)/conf.add EXTRA_INSTALL = contrib/postgres_fdw contrib/pg_stat_statements DATA = aqo--1.0.sql aqo--1.0--1.1.sql aqo--1.1--1.2.sql aqo--1.2.sql \ - aqo--1.2--1.3.sql aqo--1.3--1.4.sql + aqo--1.2--1.3.sql aqo--1.3--1.4.sql aqo--1.4--1.5.sql ifdef USE_PGXS PG_CONFIG ?= pg_config diff --git a/README.md b/README.md index 169a09d9..e28ac89c 100644 --- a/README.md +++ b/README.md @@ -13,9 +13,7 @@ To avoid compatibility issues, the following branches in the git-repository are * `stable9_6`. * `stable11` - for PG v10 and v11. * `stable12` - for PG v12. -* `stable13` - for PG v13 -* `stable14` - for PG v14 -* the `master` branch of the AQO repository correctly works with PGv15 and the PostgreSQL `master` branch. +* the `master` branch of the AQO repository correctly works with PGv13 and the PostgreSQL `master` branch. The module contains a patch and an extension. Patch has to be applied to the sources of PostgresSQL. Patch affects header files, that is why PostgreSQL @@ -214,7 +212,7 @@ execution of such query type. Disabling of AQO usage is reasonable for that cases in which query execution time increases after applying AQO. It happens sometimes because of cost models incompleteness. -`Fspace_hash` setting is for extra advanced AQO tuning. It may be changed manually +`fs` setting is for extra advanced AQO tuning. It may be changed manually to optimize a number of queries using the same model. It may decrease the amount of memory for models and even the query execution time, but also it may cause the bad AQO's behavior, so please use it only if you know exactly @@ -232,7 +230,7 @@ ignored. If `aqo.mode` is `'learn'`, then the normalized query hash appends to aqo_queries with the default settings `learn_aqo=true`, `use_aqo=true`, `auto_tuning=false`, and -`fspace_hash = query_hash` which means that AQO uses separate machine learning +`fs = queryid` which means that AQO uses separate machine learning model for this query type optimization. After that the query is processed as if it already was in aqo_queries. diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql new file mode 100644 index 00000000..3244a721 --- /dev/null +++ b/aqo--1.4--1.5.sql @@ -0,0 +1,160 @@ +/* contrib/aqo/aqo--1.4--1.5.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION aqo UPDATE TO '1.5'" to load this file. \quit + +/* Remove old interface of the extension */ +DROP FUNCTION array_mse; +DROP FUNCTION array_avg; +DROP FUNCTION public.aqo_clear_hist; -- Should be renamed and reworked +DROP FUNCTION public.aqo_disable_query; +DROP FUNCTION public.aqo_drop; +DROP FUNCTION public.aqo_enable_query; +DROP FUNCTION public.aqo_ne_queries; -- Not needed anymore due to changing in the logic +DROP FUNCTION public.aqo_status; +DROP FUNCTION public.clean_aqo_data; +DROP FUNCTION public.show_cardinality_errors; +DROP FUNCTION public.top_time_queries; +DROP TABLE public.aqo_data CASCADE; +DROP TABLE public.aqo_queries CASCADE; +DROP TABLE public.aqo_query_texts CASCADE; +DROP TABLE public.aqo_query_stat CASCADE; + + +/* + * VIEWs to discover AQO data. + */ +CREATE FUNCTION aqo_queries ( + OUT queryid bigint, + OUT fs bigint, + OUT learn_aqo boolean, + OUT use_aqo boolean, + OUT auto_tuning boolean +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_queries' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE FUNCTION aqo_query_texts(OUT queryid bigint, OUT query_text text) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_query_texts' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE FUNCTION aqo_query_stat ( + OUT queryid bigint, + OUT execution_time_with_aqo double precision[], + OUT execution_time_without_aqo double precision[], + OUT planning_time_with_aqo double precision[], + OUT planning_time_without_aqo double precision[], + OUT cardinality_error_with_aqo double precision[], + OUT cardinality_error_without_aqo double precision[], + OUT executions_with_aqo bigint, + OUT executions_without_aqo bigint +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_query_stat' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE FUNCTION aqo_data ( + OUT fs bigint, + OUT fss integer, + OUT nfeatures integer, + OUT features double precision[][], + OUT targets double precision[], + OUT reliability double precision[], + OUT oids Oid[] +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_data' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE VIEW aqo_query_stat AS SELECT * FROM aqo_query_stat(); +CREATE VIEW aqo_query_texts AS SELECT * FROM aqo_query_texts(); +CREATE VIEW aqo_data AS SELECT * FROM aqo_data(); +CREATE VIEW aqo_queries AS SELECT * FROM aqo_queries(); + +/* UI functions */ + + +CREATE FUNCTION aqo_enable_query(queryid bigint) +RETURNS void +AS 'MODULE_PATHNAME', 'aqo_enable_query' +LANGUAGE C STRICT VOLATILE; + +CREATE FUNCTION aqo_disable_query(queryid bigint) +RETURNS void +AS 'MODULE_PATHNAME', 'aqo_enable_query' +LANGUAGE C STRICT VOLATILE; + +CREATE FUNCTION aqo_queries_update( + queryid bigint, fs bigint, learn_aqo bool, use_aqo bool, auto_tuning bool) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_queries_update' +LANGUAGE C VOLATILE; + +-- +-- Get cardinality error of queries the last time they were executed. +-- IN: +-- controlled - show queries executed under a control of AQO (true); +-- executed without an AQO control, but AQO has a stat on the query (false). +-- +-- OUT: +-- num - sequental number. Smaller number corresponds to higher error. +-- id - ID of a query. +-- fshash - feature space. Usually equal to zero or ID. +-- error - AQO error that calculated on plan nodes of the query. +-- nexecs - number of executions of queries associated with this ID. +-- +CREATE OR REPLACE FUNCTION aqo_cardinality_error(controlled boolean) +RETURNS TABLE(num integer, id bigint, fshash bigint, error double precision, nexecs bigint) +AS 'MODULE_PATHNAME', 'aqo_cardinality_error' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_cardinality_error(boolean) IS +'Get cardinality error of queries the last time they were executed. Order queries according to an error value.'; + +-- +-- Show execution time of queries, for which AQO has statistics. +-- controlled - show stat on executions where AQO was used for cardinality +-- estimations, or not used (controlled = false). +-- Last case is possible in disabled mode with aqo.force_collect_stat = 'on'. +-- +CREATE OR REPLACE FUNCTION aqo_execution_time(controlled boolean) +RETURNS TABLE(num integer, id bigint, fshash bigint, exec_time double precision, nexecs bigint) +AS 'MODULE_PATHNAME', 'aqo_execution_time' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_execution_time(boolean) IS +'Get execution time of queries. If controlled = true (AQO could advise cardinality estimations), show time of last execution attempt. Another case (AQO not used), return an average value of execution time across all known executions.'; + +-- +-- Remove query class settings, text, statistics and ML data from AQO storage. +-- Return number of FSS records, removed from the storage. +-- +CREATE OR REPLACE FUNCTION aqo_drop_class(queryid bigint) +RETURNS integer +AS 'MODULE_PATHNAME', 'aqo_drop_class' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_drop_class(bigint) IS +'Remove info about an query class from AQO ML knowledge base.'; + +-- +-- Remove unneeded rows from the AQO ML storage. +-- For common feature space, remove rows from aqo_data only. +-- For custom feature space - remove all rows related to the space from all AQO +-- tables even if only one oid for one feature subspace of the space is illegal. +-- Returns number of deleted rows from aqo_queries and aqo_data tables. +-- +CREATE OR REPLACE FUNCTION aqo_cleanup(OUT nfs integer, OUT nfss integer) +AS 'MODULE_PATHNAME', 'aqo_cleanup' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_cleanup() IS +'Remove unneeded rows from the AQO ML storage'; + +-- +-- Remove all records in the AQO storage. +-- Return number of rows removed. +-- +CREATE FUNCTION aqo_reset() RETURNS bigint +AS 'MODULE_PATHNAME', 'aqo_reset' +LANGUAGE C PARALLEL SAFE; +COMMENT ON FUNCTION aqo_reset() IS +'Reset all data gathered by AQO'; diff --git a/aqo.c b/aqo.c index 3e0210e8..dcd130da 100644 --- a/aqo.c +++ b/aqo.c @@ -23,6 +23,7 @@ #include "path_utils.h" #include "preprocessing.h" #include "learn_cache.h" +#include "storage.h" PG_MODULE_MAGIC; @@ -33,8 +34,6 @@ void _PG_init(void); /* Strategy of determining feature space for new queries. */ int aqo_mode; -bool aqo_enabled = false; /* Signals that CREATE EXTENSION have executed and - all extension tables is ready for use. */ bool force_collect_stat; /* @@ -62,7 +61,7 @@ static const struct config_enum_entry format_options[] = { }; /* Parameters of autotuning */ -int aqo_stat_size = 20; +int aqo_stat_size = STAT_SAMPLE_SIZE; int auto_tuning_window_size = 5; double auto_tuning_exploration = 0.1; int auto_tuning_max_iterations = 50; @@ -80,9 +79,23 @@ double log_selectivity_lower_bound = -30; * Currently we use it only to store query_text string which is initialized * after a query parsing and is used during the query planning. */ -MemoryContext AQOMemoryContext; -MemoryContext AQO_cache_mem_ctx; + QueryContextData query_context; + +MemoryContext AQOTopMemCtx = NULL; + +/* Is released at the end of transaction */ +MemoryContext AQOCacheMemCtx = NULL; + +/* Should be released in-place, just after a huge calculation */ +MemoryContext AQOUtilityMemCtx = NULL; + +/* Is released at the end of planning */ +MemoryContext AQOPredictMemCtx = NULL; + +/* Is released at the end of learning */ +MemoryContext AQOLearnMemCtx = NULL; + /* Additional plan info */ int njoins; @@ -117,7 +130,7 @@ aqo_free_callback(ResourceReleasePhase phase, if (isTopLevel) { - list_free_deep(cur_classes); + MemoryContextReset(AQOCacheMemCtx); cur_classes = NIL; } } @@ -200,6 +213,71 @@ _PG_init(void) NULL ); + DefineCustomIntVariable("aqo.join_threshold", + "Sets the threshold of number of JOINs in query beyond which AQO is used.", + NULL, + &aqo_join_threshold, + 3, + 0, INT_MAX / 1000, + PGC_USERSET, + 0, + NULL, + NULL, + NULL + ); + + DefineCustomIntVariable("aqo.fs_max_items", + "Max number of feature spaces that AQO can operate with.", + NULL, + &fs_max_items, + 10000, + 1, INT_MAX, + PGC_SUSET, + 0, + NULL, + NULL, + NULL + ); + + DefineCustomIntVariable("aqo.fss_max_items", + "Max number of feature subspaces that AQO can operate with.", + NULL, + &fss_max_items, + 100000, + 0, INT_MAX, + PGC_SUSET, + 0, + NULL, + NULL, + NULL + ); + + DefineCustomIntVariable("aqo.querytext_max_size", + "Query max size in aqo_query_texts.", + NULL, + &querytext_max_size, + 1000, + 0, INT_MAX, + PGC_SUSET, + 0, + NULL, + NULL, + NULL + ); + + DefineCustomIntVariable("aqo.dsm_size_max", + "Maximum size of dynamic shared memory which AQO could allocate to store learning data.", + NULL, + &dsm_size_max, + 100, + 0, INT_MAX, + PGC_SUSET, + 0, + NULL, + NULL, + NULL + ); + prev_shmem_startup_hook = shmem_startup_hook; shmem_startup_hook = aqo_init_shmem; prev_planner_hook = planner_hook; @@ -238,11 +316,41 @@ _PG_init(void) create_upper_paths_hook = aqo_store_upper_signature_hook; init_deactivated_queries_storage(); - AQOMemoryContext = AllocSetContextCreate(TopMemoryContext, - "AQOMemoryContext", + + /* + * Create own Top memory Context for reporting AQO memory in the future. + */ + AQOTopMemCtx = AllocSetContextCreate(TopMemoryContext, + "AQOTopMemoryContext", + ALLOCSET_DEFAULT_SIZES); + /* + * AQO Cache Memory Context containe environment data. + */ + AQOCacheMemCtx = AllocSetContextCreate(AQOTopMemCtx, + "AQOCacheMemCtx", + ALLOCSET_DEFAULT_SIZES); + /* + * AQOUtilityMemoryContext containe short-lived information which + * is appeared from having got clause, selectivity arrays and relid lists + * while calculating hashes. It clean up inside calculated + * function or immediately after her having completed. + */ + AQOUtilityMemCtx = AllocSetContextCreate(AQOTopMemCtx, + "AQOUtilityMemoryContext", + ALLOCSET_DEFAULT_SIZES); + /* + * AQOPredictMemoryContext save necessary information for making predict of plan nodes + * and clean up in the execution stage of query. + */ + AQOPredictMemCtx = AllocSetContextCreate(AQOTopMemCtx, + "AQOPredictMemoryContext", ALLOCSET_DEFAULT_SIZES); - AQO_cache_mem_ctx = AllocSetContextCreate(TopMemoryContext, - "AQO_cache_mem_ctx", + /* + * AQOLearnMemoryContext save necessary information for writing down to AQO knowledge table + * and clean up after doing this operation. + */ + AQOLearnMemCtx = AllocSetContextCreate(AQOTopMemCtx, + "AQOLearnMemoryContext", ALLOCSET_DEFAULT_SIZES); RegisterResourceReleaseCallback(aqo_free_callback, NULL); RegisterAQOPlanNodeMethods(); @@ -251,82 +359,6 @@ _PG_init(void) RequestAddinShmemSpace(aqo_memsize()); } -PG_FUNCTION_INFO_V1(invalidate_deactivated_queries_cache); - -/* - * Clears the cache of deactivated queries if the user changed aqo_queries - * manually. - */ -Datum -invalidate_deactivated_queries_cache(PG_FUNCTION_ARGS) -{ - fini_deactivated_queries_storage(); - init_deactivated_queries_storage(); - PG_RETURN_POINTER(NULL); -} - -/* - * Return AQO schema's Oid or InvalidOid if that's not possible. - */ -Oid -get_aqo_schema(void) -{ - Oid result; - Relation rel; - SysScanDesc scandesc; - HeapTuple tuple; - ScanKeyData entry[1]; - Oid ext_oid; - - /* It's impossible to fetch pg_aqo's schema now */ - if (!IsTransactionState()) - return InvalidOid; - - ext_oid = get_extension_oid("aqo", true); - if (ext_oid == InvalidOid) - return InvalidOid; /* exit if pg_aqo does not exist */ - - ScanKeyInit(&entry[0], -#if PG_VERSION_NUM >= 120000 - Anum_pg_extension_oid, -#else - ObjectIdAttributeNumber, -#endif - BTEqualStrategyNumber, F_OIDEQ, - ObjectIdGetDatum(ext_oid)); - - rel = relation_open(ExtensionRelationId, AccessShareLock); - scandesc = systable_beginscan(rel, ExtensionOidIndexId, true, - NULL, 1, entry); - tuple = systable_getnext(scandesc); - - /* We assume that there can be at most one matching tuple */ - if (HeapTupleIsValid(tuple)) - result = ((Form_pg_extension) GETSTRUCT(tuple))->extnamespace; - else - result = InvalidOid; - - systable_endscan(scandesc); - relation_close(rel, AccessShareLock); - return result; -} - -/* - * Init userlock - */ -void -init_lock_tag(LOCKTAG *tag, uint64 key1, int32 key2) -{ - uint32 key = key1 % UINT32_MAX; - - tag->locktag_field1 = AQO_MODULE_MAGIC; - tag->locktag_field2 = key; - tag->locktag_field3 = (uint32) key2; - tag->locktag_field4 = 0; - tag->locktag_type = LOCKTAG_USERLOCK; - tag->locktag_lockmethodid = USER_LOCKMETHOD; -} - /* * AQO is really needed for any activity? */ @@ -342,3 +374,15 @@ IsQueryDisabled(void) return false; } + +PG_FUNCTION_INFO_V1(invalidate_deactivated_queries_cache); + +/* + * Clears the cache of deactivated queries if the user changed aqo_queries + * manually. + */ +Datum +invalidate_deactivated_queries_cache(PG_FUNCTION_ARGS) +{ + PG_RETURN_POINTER(NULL); +} diff --git a/aqo.control b/aqo.control index dfdd815d..5507effb 100644 --- a/aqo.control +++ b/aqo.control @@ -1,5 +1,5 @@ # AQO extension comment = 'machine learning for cardinality estimation in optimizer' -default_version = '1.4' +default_version = '1.5' module_pathname = '$libdir/aqo' -relocatable = false +relocatable = true diff --git a/aqo.h b/aqo.h index b43e01a9..64092b94 100644 --- a/aqo.h +++ b/aqo.h @@ -135,7 +135,6 @@ #include "optimizer/cost.h" #include "parser/analyze.h" #include "parser/parsetree.h" -#include "utils/array.h" #include "utils/builtins.h" #include "utils/guc.h" #include "utils/hsearch.h" @@ -145,6 +144,7 @@ #include "utils/snapmgr.h" #include "machine_learning.h" +//#include "storage.h" /* Check PostgreSQL version (9.6.0 contains important changes in planner) */ #if PG_VERSION_NUM < 90600 @@ -169,36 +169,10 @@ typedef enum } AQO_MODE; extern int aqo_mode; -extern bool aqo_enabled; extern bool force_collect_stat; extern bool aqo_show_hash; extern bool aqo_show_details; - -/* - * It is mostly needed for auto tuning of query. with auto tuning mode aqo - * checks stability of last executions of the query, bad influence of strong - * cardinality estimation on query execution (planner bug?) and so on. - * It can induce aqo to suppress machine learning for this query. - */ -typedef struct -{ - double *execution_time_with_aqo; - double *execution_time_without_aqo; - double *planning_time_with_aqo; - double *planning_time_without_aqo; - double *cardinality_error_with_aqo; - double *cardinality_error_without_aqo; - - int execution_time_with_aqo_size; - int execution_time_without_aqo_size; - int planning_time_with_aqo_size; - int planning_time_without_aqo_size; - int cardinality_error_with_aqo_size; - int cardinality_error_without_aqo_size; - - int64 executions_with_aqo; - int64 executions_without_aqo; -} QueryStat; +extern int aqo_join_threshold; /* Parameters for current query */ typedef struct QueryContextData @@ -225,6 +199,8 @@ typedef struct QueryContextData double planning_time; } QueryContextData; +struct StatEntry; + extern double predicted_ppi_rows; extern double fss_ppi_hash; @@ -245,9 +221,12 @@ extern double log_selectivity_lower_bound; extern QueryContextData query_context; extern int njoins; -/* Memory context for long-live data */ -extern MemoryContext AQOMemoryContext; -extern MemoryContext AQO_cache_mem_ctx; +/* AQO Memory contexts */ +extern MemoryContext AQOTopMemCtx; +extern MemoryContext AQOCacheMemCtx; +extern MemoryContext AQOUtilityMemCtx; +extern MemoryContext AQOPredictMemCtx; +extern MemoryContext AQOLearnMemCtx; /* Saved hook values in case of unload */ extern post_parse_analyze_hook_type prev_post_parse_analyze_hook; @@ -277,25 +256,10 @@ int get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash); /* Storage interaction */ -extern bool find_query(uint64 qhash, QueryContextData *ctx); -extern bool update_query(uint64 qhash, uint64 fhash, - bool learn_aqo, bool use_aqo, bool auto_tuning); -extern bool add_query_text(uint64 query_hash, const char *query_string); -extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **relnames, +extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool isSafe); -extern bool load_fss(uint64 fs, int fss, OkNNrdata *data, List **relnames); extern bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, - List *relnames, bool isTimedOut); -extern bool update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames); -QueryStat *get_aqo_stat(uint64 query_hash); -void update_aqo_stat(uint64 query_hash, QueryStat * stat); -extern bool my_index_insert(Relation indexRelation, Datum *values, bool *isnull, - ItemPointer heap_t_ctid, Relation heapRelation, - IndexUniqueCheck checkUnique); -void init_deactivated_queries_storage(void); -void fini_deactivated_queries_storage(void); -extern bool query_is_deactivated(uint64 query_hash); -extern void add_deactivated_query(uint64 query_hash); + List *reloids, bool isTimedOut); /* Query preprocessing hooks */ extern void print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, @@ -306,8 +270,8 @@ extern void print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, extern void print_node_explain(ExplainState *es, PlanState *ps, Plan *plan); /* Cardinality estimation */ -double predict_for_relation(List *restrict_clauses, List *selectivities, - List *relnames, int *fss); +extern double predict_for_relation(List *restrict_clauses, List *selectivities, + List *relsigns, int *fss); /* Query execution statistics collecting hooks */ void aqo_ExecutorStart(QueryDesc *queryDesc, int eflags); @@ -316,7 +280,7 @@ void aqo_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, void aqo_ExecutorEnd(QueryDesc *queryDesc); /* Automatic query tuning */ -extern void automatical_query_tuning(uint64 query_hash, QueryStat * stat); +extern void automatical_query_tuning(uint64 query_hash, struct StatEntry *stat); /* Utilities */ extern int int64_compare(const void *a, const void *b); @@ -325,8 +289,6 @@ extern int double_cmp(const void *a, const void *b); extern int *argsort(void *a, int n, size_t es, int (*cmp) (const void *, const void *)); extern int *inverse_permutation(int *a, int n); -extern QueryStat *palloc_query_stat(void); -extern void pfree_query_stat(QueryStat *stat); /* Selectivity cache for parametrized baserels */ extern void cache_selectivity(int clause_hash, int relid, int global_relid, @@ -335,8 +297,6 @@ extern double *selectivity_cache_find_global_relid(int clause_hash, int global_relid); extern void selectivity_cache_clear(void); -extern Oid get_aqo_schema(void); -extern void init_lock_tag(LOCKTAG *tag, uint64 key1, int32 key2); extern bool IsQueryDisabled(void); extern List *cur_classes; diff --git a/aqo_pg14.patch b/aqo_pg14.patch index d119d98c..d43e24f4 100644 --- a/aqo_pg14.patch +++ b/aqo_pg14.patch @@ -1,5 +1,5 @@ diff --git a/contrib/Makefile b/contrib/Makefile -index f27e458482..0c62191904 100644 +index 1846d415b6..95519ac11d 100644 --- a/contrib/Makefile +++ b/contrib/Makefile @@ -7,6 +7,7 @@ include $(top_builddir)/src/Makefile.global @@ -11,7 +11,7 @@ index f27e458482..0c62191904 100644 auto_explain \ bloom \ diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c -index 70551522da..958529fbab 100644 +index bc05c96b4c..b6a3abe0d2 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -24,6 +24,7 @@ @@ -19,10 +19,10 @@ index 70551522da..958529fbab 100644 #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" +#include "optimizer/cost.h" - #include "parser/analyze.h" #include "parser/parsetree.h" #include "rewrite/rewriteHandler.h" -@@ -47,6 +48,12 @@ ExplainOneQuery_hook_type ExplainOneQuery_hook = NULL; + #include "storage/bufmgr.h" +@@ -46,6 +47,12 @@ ExplainOneQuery_hook_type ExplainOneQuery_hook = NULL; /* Hook for plugins to get control in explain_get_index_name() */ explain_get_index_name_hook_type explain_get_index_name_hook = NULL; @@ -35,7 +35,7 @@ index 70551522da..958529fbab 100644 /* OR-able flags for ExplainXMLTag() */ #define X_OPENING 0 -@@ -676,6 +683,10 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, +@@ -638,6 +645,10 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, ExplainPropertyFloat("Execution Time", "ms", 1000.0 * totaltime, 3, es); @@ -46,7 +46,7 @@ index 70551522da..958529fbab 100644 ExplainCloseGroup("Query", NULL, true, es); } -@@ -1658,6 +1669,9 @@ ExplainNode(PlanState *planstate, List *ancestors, +@@ -1612,6 +1623,9 @@ ExplainNode(PlanState *planstate, List *ancestors, } } @@ -57,7 +57,7 @@ index 70551522da..958529fbab 100644 if (es->format == EXPLAIN_FORMAT_TEXT) appendStringInfoChar(es->str, '\n'); diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c -index a106a2cdf1..7150dccb4d 100644 +index 682b28ed72..3a5c615deb 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -132,6 +132,7 @@ CopyPlanFields(const Plan *from, Plan *newnode) @@ -69,7 +69,7 @@ index a106a2cdf1..7150dccb4d 100644 /* diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c -index 0df9be1608..678e1f050f 100644 +index 7237b52e96..5e2ee2732a 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -342,6 +342,7 @@ _outPlanInfo(StringInfo str, const Plan *node) @@ -81,10 +81,10 @@ index 0df9be1608..678e1f050f 100644 /* diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c -index eaa51c5c06..6ad8b78c7d 100644 +index 62c945b6c5..a39046ca56 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c -@@ -1628,6 +1628,11 @@ ReadCommonPlan(Plan *local_node) +@@ -1580,6 +1580,11 @@ ReadCommonPlan(Plan *local_node) READ_NODE_FIELD(initPlan); READ_BITMAPSET_FIELD(extParam); READ_BITMAPSET_FIELD(allParam); @@ -97,22 +97,23 @@ index eaa51c5c06..6ad8b78c7d 100644 /* diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c -index 006f91f0a8..ef9c8ec581 100644 +index 4edc859cb5..988f2e6ab7 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c -@@ -98,6 +98,11 @@ - #include "utils/spccache.h" +@@ -98,6 +98,12 @@ #include "utils/tuplesort.h" + +set_baserel_rows_estimate_hook_type set_baserel_rows_estimate_hook = NULL; +set_foreign_rows_estimate_hook_type set_foreign_rows_estimate_hook = NULL; +get_parameterized_baserel_size_hook_type get_parameterized_baserel_size_hook = NULL; +get_parameterized_joinrel_size_hook_type get_parameterized_joinrel_size_hook = NULL; +set_joinrel_size_estimates_hook_type set_joinrel_size_estimates_hook = NULL; ++ + /* source-code-compatibility hacks for pull_varnos() API change */ + #define pull_varnos(a,b) pull_varnos_new(a,b) - #define LOG2(x) (log(x) / 0.693147180559945) - -@@ -188,7 +193,6 @@ static Cost append_nonpartial_cost(List *subpaths, int numpaths, +@@ -181,7 +187,6 @@ static Cost append_nonpartial_cost(List *subpaths, int numpaths, static void set_rel_width(PlannerInfo *root, RelOptInfo *rel); static double relation_byte_size(double tuples, int width); static double page_size(double tuples, int width); @@ -120,7 +121,7 @@ index 006f91f0a8..ef9c8ec581 100644 /* -@@ -4911,6 +4915,58 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) +@@ -4632,6 +4637,58 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) } @@ -179,7 +180,7 @@ index 006f91f0a8..ef9c8ec581 100644 /* * set_baserel_size_estimates * Set the size estimates for the given base relation. -@@ -4927,19 +4983,10 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) +@@ -4648,19 +4705,10 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) void set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) { @@ -200,7 +201,7 @@ index 006f91f0a8..ef9c8ec581 100644 cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); -@@ -4950,13 +4997,33 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) +@@ -4671,13 +4719,33 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) * get_parameterized_baserel_size * Make a size estimate for a parameterized scan of a base relation. * @@ -236,7 +237,7 @@ index 006f91f0a8..ef9c8ec581 100644 { List *allclauses; double nrows; -@@ -4985,6 +5052,36 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, +@@ -4706,6 +4774,36 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, * set_joinrel_size_estimates * Set the size estimates for the given join relation. * @@ -273,7 +274,7 @@ index 006f91f0a8..ef9c8ec581 100644 * The rel's targetlist must have been constructed already, and a * restriction clause list that matches the given component rels must * be provided. -@@ -5004,11 +5101,11 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, +@@ -4725,11 +4823,11 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, * build_joinrel_tlist, and baserestrictcost is not used for join rels. */ void @@ -290,7 +291,7 @@ index 006f91f0a8..ef9c8ec581 100644 { rel->rows = calc_joinrel_size_estimate(root, rel, -@@ -5024,6 +5121,35 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, +@@ -4745,6 +4843,35 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, * get_parameterized_joinrel_size * Make a size estimate for a parameterized scan of a join relation. * @@ -326,7 +327,7 @@ index 006f91f0a8..ef9c8ec581 100644 * 'rel' is the joinrel under consideration. * 'outer_path', 'inner_path' are (probably also parameterized) Paths that * produce the relations being joined. -@@ -5036,11 +5162,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, +@@ -4757,11 +4884,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, * set_joinrel_size_estimates must have been applied already. */ double @@ -343,7 +344,7 @@ index 006f91f0a8..ef9c8ec581 100644 { double nrows; -@@ -5756,7 +5882,7 @@ set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel) +@@ -5430,7 +5557,7 @@ set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel) /* Should only be applied to base relations */ Assert(rel->relid > 0); @@ -352,7 +353,7 @@ index 006f91f0a8..ef9c8ec581 100644 cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); -@@ -6042,7 +6168,7 @@ page_size(double tuples, int width) +@@ -5716,7 +5843,7 @@ page_size(double tuples, int width) * Estimate the fraction of the work that each worker will do given the * number of workers budgeted for the path. */ @@ -362,10 +363,10 @@ index 006f91f0a8..ef9c8ec581 100644 { double parallel_divisor = path->parallel_workers; diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c -index 0ed858f305..9d4a6c5903 100644 +index 917713c163..5b7bf1cec6 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c -@@ -71,6 +71,7 @@ +@@ -70,6 +70,7 @@ #define CP_LABEL_TLIST 0x0004 /* tlist must contain sortgrouprefs */ #define CP_IGNORE_TLIST 0x0008 /* caller will replace tlist */ @@ -373,7 +374,7 @@ index 0ed858f305..9d4a6c5903 100644 static Plan *create_plan_recurse(PlannerInfo *root, Path *best_path, int flags); -@@ -545,6 +546,10 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags) +@@ -524,6 +525,10 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags) break; } @@ -384,7 +385,7 @@ index 0ed858f305..9d4a6c5903 100644 return plan; } -@@ -5323,6 +5328,7 @@ copy_generic_path_info(Plan *dest, Path *src) +@@ -5163,6 +5168,7 @@ copy_generic_path_info(Plan *dest, Path *src) dest->plan_width = src->pathtarget->width; dest->parallel_aware = src->parallel_aware; dest->parallel_safe = src->parallel_safe; @@ -393,10 +394,10 @@ index 0ed858f305..9d4a6c5903 100644 /* diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c -index 70899e5430..34075cc87b 100644 +index 60e7fda6a9..5732c7a685 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c -@@ -143,7 +143,8 @@ static List *extract_rollup_sets(List *groupingSets); +@@ -145,7 +145,8 @@ static List *extract_rollup_sets(List *groupingSets); static List *reorder_grouping_sets(List *groupingSets, List *sortclause); static void standard_qp_callback(PlannerInfo *root, void *extra); static double get_number_of_groups(PlannerInfo *root, @@ -406,7 +407,7 @@ index 70899e5430..34075cc87b 100644 grouping_sets_data *gd, List *target_list); static RelOptInfo *create_grouping_paths(PlannerInfo *root, -@@ -3151,7 +3152,8 @@ standard_qp_callback(PlannerInfo *root, void *extra) +@@ -3682,7 +3683,8 @@ standard_qp_callback(PlannerInfo *root, void *extra) */ static double get_number_of_groups(PlannerInfo *root, @@ -416,36 +417,36 @@ index 70899e5430..34075cc87b 100644 grouping_sets_data *gd, List *target_list) { -@@ -3188,7 +3190,7 @@ get_number_of_groups(PlannerInfo *root, +@@ -3719,7 +3721,7 @@ get_number_of_groups(PlannerInfo *root, GroupingSetData *gs = lfirst_node(GroupingSetData, lc2); double numGroups = estimate_num_groups(root, groupExprs, - path_rows, + subpath->rows, - &gset, - NULL); + &gset); -@@ -3214,7 +3216,7 @@ get_number_of_groups(PlannerInfo *root, + gs->numGroups = numGroups; +@@ -3744,7 +3746,7 @@ get_number_of_groups(PlannerInfo *root, GroupingSetData *gs = lfirst_node(GroupingSetData, lc2); double numGroups = estimate_num_groups(root, groupExprs, - path_rows, + subpath->rows, - &gset, - NULL); + &gset); -@@ -3231,8 +3233,8 @@ get_number_of_groups(PlannerInfo *root, + gs->numGroups = numGroups; +@@ -3760,8 +3762,8 @@ get_number_of_groups(PlannerInfo *root, groupExprs = get_sortgrouplist_exprs(parse->groupClause, target_list); - dNumGroups = estimate_num_groups(root, groupExprs, path_rows, -- NULL, NULL); +- NULL); + dNumGroups = estimate_num_groups_ext(root, groupExprs, subpath, -+ grouped_rel, NULL, NULL); ++ grouped_rel, NULL); } } else if (parse->groupingSets) -@@ -3619,7 +3621,8 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, +@@ -4147,7 +4149,8 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, * Estimate number of groups. */ dNumGroups = get_number_of_groups(root, @@ -455,7 +456,7 @@ index 70899e5430..34075cc87b 100644 gd, extra->targetList); -@@ -6425,13 +6428,15 @@ create_partial_grouping_paths(PlannerInfo *root, +@@ -6931,13 +6934,15 @@ create_partial_grouping_paths(PlannerInfo *root, if (cheapest_total_path != NULL) dNumPartialGroups = get_number_of_groups(root, @@ -474,13 +475,13 @@ index 70899e5430..34075cc87b 100644 extra->targetList); diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c -index e105a4d5f1..c5bcc9d1d1 100644 +index a203e6f1ff..d31bf5bae6 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -258,6 +258,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) - rel->all_partrels = NULL; rel->partexprs = NULL; rel->nullable_partexprs = NULL; + rel->partitioned_child_rels = NIL; + rel->ext_nodes = NULL; /* @@ -493,23 +494,23 @@ index e105a4d5f1..c5bcc9d1d1 100644 elog(ERROR, "no relation entry for relid %d", relid); return NULL; /* keep compiler quiet */ -@@ -672,6 +672,7 @@ build_join_rel(PlannerInfo *root, - joinrel->all_partrels = NULL; +@@ -673,6 +673,7 @@ build_join_rel(PlannerInfo *root, joinrel->partexprs = NULL; joinrel->nullable_partexprs = NULL; + joinrel->partitioned_child_rels = NIL; + joinrel->ext_nodes = NULL; /* Compute information relevant to the foreign relations. */ set_foreign_rel_properties(joinrel, outer_rel, inner_rel); -@@ -850,6 +851,7 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, - joinrel->all_partrels = NULL; +@@ -851,6 +852,7 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, joinrel->partexprs = NULL; joinrel->nullable_partexprs = NULL; + joinrel->partitioned_child_rels = NIL; + joinrel->ext_nodes = NULL; joinrel->top_parent_relids = bms_union(outer_rel->top_parent_relids, inner_rel->top_parent_relids); -@@ -1279,6 +1281,7 @@ find_childrel_parents(PlannerInfo *root, RelOptInfo *rel) +@@ -1264,6 +1266,7 @@ find_childrel_parents(PlannerInfo *root, RelOptInfo *rel) } @@ -517,7 +518,7 @@ index e105a4d5f1..c5bcc9d1d1 100644 /* * get_baserel_parampathinfo * Get the ParamPathInfo for a parameterized path for a base relation, -@@ -1347,6 +1350,10 @@ get_baserel_parampathinfo(PlannerInfo *root, RelOptInfo *baserel, +@@ -1332,6 +1335,10 @@ get_baserel_parampathinfo(PlannerInfo *root, RelOptInfo *baserel, ppi->ppi_req_outer = required_outer; ppi->ppi_rows = rows; ppi->ppi_clauses = pclauses; @@ -528,7 +529,7 @@ index e105a4d5f1..c5bcc9d1d1 100644 baserel->ppilist = lappend(baserel->ppilist, ppi); return ppi; -@@ -1572,6 +1579,10 @@ get_joinrel_parampathinfo(PlannerInfo *root, RelOptInfo *joinrel, +@@ -1557,6 +1564,10 @@ get_joinrel_parampathinfo(PlannerInfo *root, RelOptInfo *joinrel, ppi->ppi_req_outer = required_outer; ppi->ppi_rows = rows; ppi->ppi_clauses = NIL; @@ -540,10 +541,10 @@ index e105a4d5f1..c5bcc9d1d1 100644 return ppi; diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c -index abe47dab86..0ef5f2c8da 100644 +index 821844ada3..85b2482114 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c -@@ -143,6 +143,7 @@ +@@ -147,6 +147,7 @@ /* Hooks for plugins to get control when we ask for stats */ get_relation_stats_hook_type get_relation_stats_hook = NULL; get_index_stats_hook_type get_index_stats_hook = NULL; @@ -551,29 +552,28 @@ index abe47dab86..0ef5f2c8da 100644 static double eqsel_internal(PG_FUNCTION_ARGS, bool negate); static double eqjoinsel_inner(Oid opfuncoid, Oid collation, -@@ -3293,6 +3294,20 @@ add_unique_group_var(PlannerInfo *root, List *varinfos, +@@ -3295,6 +3296,19 @@ add_unique_group_var(PlannerInfo *root, List *varinfos, return varinfos; } +double +estimate_num_groups_ext(PlannerInfo *root, List *groupExprs, Path *subpath, -+ RelOptInfo *grouped_rel, List **pgset, -+ EstimationInfo *estinfo) ++ RelOptInfo *grouped_rel, List **pgset) +{ + double input_rows = subpath->rows; + + if (estimate_num_groups_hook != NULL) -+ return (*estimate_num_groups_hook)(root, groupExprs, subpath, grouped_rel, -+ pgset, estinfo); ++ return (*estimate_num_groups_hook)(root, groupExprs, subpath, ++ grouped_rel, pgset); + -+ return estimate_num_groups(root, groupExprs, input_rows, pgset, estinfo); ++ return estimate_num_groups(root, groupExprs, input_rows, pgset); +} + /* * estimate_num_groups - Estimate number of groups in a grouped query * diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h -index e94d9e49cf..49236ced77 100644 +index ba661d32a6..09d0abe58b 100644 --- a/src/include/commands/explain.h +++ b/src/include/commands/explain.h @@ -75,6 +75,18 @@ extern PGDLLIMPORT ExplainOneQuery_hook_type ExplainOneQuery_hook; @@ -596,10 +596,10 @@ index e94d9e49cf..49236ced77 100644 extern void ExplainQuery(ParseState *pstate, ExplainStmt *stmt, ParamListInfo params, DestReceiver *dest); diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h -index f16466a0df..c48d969ba8 100644 +index 5ebf070979..5b2acd7de2 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h -@@ -756,6 +756,10 @@ typedef struct RelOptInfo +@@ -739,6 +739,10 @@ typedef struct RelOptInfo Relids top_parent_relids; /* Relids of topmost parents (if "other" * rel) */ @@ -610,22 +610,20 @@ index f16466a0df..c48d969ba8 100644 /* used for partitioned relations: */ PartitionScheme part_scheme; /* Partitioning scheme */ int nparts; /* Number of partitions; -1 if not yet set; in -@@ -770,7 +774,13 @@ typedef struct RelOptInfo - Relids all_partrels; /* Relids set of all partition relids */ +@@ -754,6 +758,12 @@ typedef struct RelOptInfo List **partexprs; /* Non-nullable partition key expressions */ List **nullable_partexprs; /* Nullable partition key expressions */ --} RelOptInfo; + List *partitioned_child_rels; /* List of RT indexes */ + + /* + * At this list an extension can add additional nodes to pass an info along + * the planning and executing stages. + */ -+ List *ext_nodes; -+} RelOptInfo; ++ List *ext_nodes; + } RelOptInfo; /* - * Is given relation partitioned? -@@ -1138,6 +1148,10 @@ typedef struct ParamPathInfo +@@ -1105,6 +1115,10 @@ typedef struct ParamPathInfo Relids ppi_req_outer; /* rels supplying parameters used by path */ double ppi_rows; /* estimated number of result tuples */ List *ppi_clauses; /* join clauses available from outer rels */ @@ -637,24 +635,24 @@ index f16466a0df..c48d969ba8 100644 diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h -index 9ac4d9af12..6e20cd28c8 100644 +index 90f02ce6fd..f3e2138ee2 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h -@@ -158,6 +158,12 @@ typedef struct Plan +@@ -159,6 +159,12 @@ typedef struct Plan */ Bitmapset *extParam; Bitmapset *allParam; + + /* -+ * Additional fields for purposes of an extension. ++ * Additional fields for an extension purposes. + * TODO: allow to serialize/deserialize this list. + */ -+ List *ext_nodes; ++ List *ext_nodes; } Plan; /* ---------------- diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h -index 2113bc82de..bcc2520cec 100644 +index 6141654e47..e6b28cbb05 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -39,6 +39,37 @@ typedef enum @@ -695,7 +693,7 @@ index 2113bc82de..bcc2520cec 100644 /* * prototypes for costsize.c * routines to compute costs and sizes -@@ -180,10 +211,22 @@ extern void compute_semi_anti_join_factors(PlannerInfo *root, +@@ -175,10 +206,22 @@ extern void compute_semi_anti_join_factors(PlannerInfo *root, SpecialJoinInfo *sjinfo, List *restrictlist, SemiAntiJoinFactors *semifactors); @@ -718,7 +716,7 @@ index 2113bc82de..bcc2520cec 100644 extern double get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, Path *outer_path, -@@ -195,6 +238,11 @@ extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, +@@ -190,6 +233,11 @@ extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, RelOptInfo *inner_rel, SpecialJoinInfo *sjinfo, List *restrictlist); @@ -730,7 +728,7 @@ index 2113bc82de..bcc2520cec 100644 extern void set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel); extern void set_function_size_estimates(PlannerInfo *root, RelOptInfo *rel); extern void set_values_size_estimates(PlannerInfo *root, RelOptInfo *rel); -@@ -207,5 +255,6 @@ extern void set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel); +@@ -202,5 +250,6 @@ extern void set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel); extern PathTarget *set_pathtarget_cost_width(PlannerInfo *root, PathTarget *target); extern double compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual, int loop_count, Cost *cost, double *tuple); @@ -738,7 +736,7 @@ index 2113bc82de..bcc2520cec 100644 #endif /* COST_H */ diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h -index 2922c0cdc1..c59dce6989 100644 +index 3bd7072ae8..21bbaba11c 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -18,6 +18,10 @@ @@ -753,7 +751,7 @@ index 2922c0cdc1..c59dce6989 100644 * prototypes for pathnode.c */ diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h -index bf1adfc52a..9c78e0f4e0 100644 +index 8ce60e202e..75415102c2 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -24,6 +24,12 @@ extern double cursor_tuple_fraction; @@ -770,10 +768,10 @@ index bf1adfc52a..9c78e0f4e0 100644 * prototypes for plan/planmain.c */ diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h -index 9dd444e1ff..b0b5a65618 100644 +index 7ac4a06391..def3522881 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h -@@ -144,6 +144,13 @@ typedef bool (*get_index_stats_hook_type) (PlannerInfo *root, +@@ -127,6 +127,12 @@ typedef bool (*get_index_stats_hook_type) (PlannerInfo *root, AttrNumber indexattnum, VariableStatData *vardata); extern PGDLLIMPORT get_index_stats_hook_type get_index_stats_hook; @@ -781,19 +779,18 @@ index 9dd444e1ff..b0b5a65618 100644 + List *groupExprs, + Path *subpath, + RelOptInfo *grouped_rel, -+ List **pgset, -+ EstimationInfo *estinfo); ++ List **pgset); +extern PGDLLIMPORT estimate_num_groups_hook_type estimate_num_groups_hook; /* Functions in selfuncs.c */ -@@ -210,6 +217,9 @@ extern void mergejoinscansel(PlannerInfo *root, Node *clause, - Selectivity *leftstart, Selectivity *leftend, - Selectivity *rightstart, Selectivity *rightend); +@@ -195,6 +201,9 @@ extern void mergejoinscansel(PlannerInfo *root, Node *clause, + extern double estimate_num_groups(PlannerInfo *root, List *groupExprs, + double input_rows, List **pgset); +extern double estimate_num_groups_ext(PlannerInfo *root, List *groupExprs, + Path *subpath, RelOptInfo *grouped_rel, -+ List **pgset, EstimationInfo *estinfo); - extern double estimate_num_groups(PlannerInfo *root, List *groupExprs, - double input_rows, List **pgset, - EstimationInfo *estinfo); ++ List **pgset); + + extern void estimate_hash_bucket_stats(PlannerInfo *root, + Node *hashkey, double nbuckets, diff --git a/aqo_shared.c b/aqo_shared.c index 84e6eadb..2ec063e7 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -4,9 +4,12 @@ #include "postgres.h" +#include "lib/dshash.h" +#include "miscadmin.h" #include "storage/shmem.h" #include "aqo_shared.h" +#include "storage.h" typedef struct @@ -23,11 +26,14 @@ shmem_startup_hook_type prev_shmem_startup_hook = NULL; AQOSharedState *aqo_state = NULL; HTAB *fss_htab = NULL; static int aqo_htab_max_items = 1000; +int fs_max_items = 1; /* Max number of different feature spaces in ML model */ +int fss_max_items = 1; /* Max number of different feature subspaces in ML model */ static uint32 temp_storage_size = 1024 * 1024 * 10; /* Storage size, in bytes */ static dsm_segment *seg = NULL; static void aqo_detach_shmem(int code, Datum arg); +static void on_shmem_shutdown(int code, Datum arg); void * @@ -169,27 +175,94 @@ aqo_init_shmem(void) bool found; HASHCTL info; + if (prev_shmem_startup_hook) + prev_shmem_startup_hook(); + aqo_state = NULL; fss_htab = NULL; + stat_htab = NULL; + qtexts_htab = NULL; + data_htab = NULL; + queries_htab = NULL; LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); - aqo_state = ShmemInitStruct("aqo", sizeof(AQOSharedState), &found); + aqo_state = ShmemInitStruct("AQO", sizeof(AQOSharedState), &found); if (!found) { /* First time through ... */ + LWLockInitialize(&aqo_state->lock, LWLockNewTrancheId()); aqo_state->dsm_handler = DSM_HANDLE_INVALID; + + aqo_state->qtexts_dsa_handler = DSM_HANDLE_INVALID; + aqo_state->qtext_trancheid = LWLockNewTrancheId(); + aqo_state->qtexts_changed = false; + aqo_state->data_dsa_handler = DSM_HANDLE_INVALID; + aqo_state->data_changed = false; + aqo_state->queries_changed = false; + + LWLockInitialize(&aqo_state->stat_lock, LWLockNewTrancheId()); + LWLockInitialize(&aqo_state->qtexts_lock, LWLockNewTrancheId()); + LWLockInitialize(&aqo_state->data_lock, LWLockNewTrancheId()); + LWLockInitialize(&aqo_state->queries_lock, LWLockNewTrancheId()); } info.keysize = sizeof(htab_key); info.entrysize = sizeof(htab_entry); - fss_htab = ShmemInitHash("aqo hash", + fss_htab = ShmemInitHash("AQO hash", aqo_htab_max_items, aqo_htab_max_items, &info, HASH_ELEM | HASH_BLOBS); + info.keysize = sizeof(((StatEntry *) 0)->queryid); + info.entrysize = sizeof(StatEntry); + stat_htab = ShmemInitHash("AQO Stat HTAB", fs_max_items, fs_max_items, + &info, HASH_ELEM | HASH_BLOBS); + + /* Init shared memory table for query texts */ + info.keysize = sizeof(((QueryTextEntry *) 0)->queryid); + info.entrysize = sizeof(QueryTextEntry); + qtexts_htab = ShmemInitHash("AQO Query Texts HTAB", fs_max_items, fs_max_items, + &info, HASH_ELEM | HASH_BLOBS); + + /* Shared memory hash table for the data */ + info.keysize = sizeof(data_key); + info.entrysize = sizeof(DataEntry); + data_htab = ShmemInitHash("AQO Data HTAB", fss_max_items, fss_max_items, + &info, HASH_ELEM | HASH_BLOBS); + + /* Shared memory hash table for queries */ + info.keysize = sizeof(((QueriesEntry *) 0)->queryid); + info.entrysize = sizeof(QueriesEntry); + queries_htab = ShmemInitHash("AQO Queries HTAB", fs_max_items, fs_max_items, + &info, HASH_ELEM | HASH_BLOBS); + LWLockRelease(AddinShmemInitLock); - LWLockRegisterTranche(aqo_state->lock.tranche, "aqo"); + LWLockRegisterTranche(aqo_state->lock.tranche, "AQO"); + LWLockRegisterTranche(aqo_state->stat_lock.tranche, "AQO Stat Lock Tranche"); + LWLockRegisterTranche(aqo_state->qtexts_lock.tranche, "AQO QTexts Lock Tranche"); + LWLockRegisterTranche(aqo_state->qtext_trancheid, "AQO Query Texts Tranche"); + LWLockRegisterTranche(aqo_state->data_lock.tranche, "AQO Data Lock Tranche"); + LWLockRegisterTranche(aqo_state->queries_lock.tranche, "AQO Queries Lock Tranche"); + + if (!IsUnderPostmaster) + { + before_shmem_exit(on_shmem_shutdown, (Datum) 0); + + /* Doesn't use DSA, so can be loaded in postmaster */ + aqo_stat_load(); + aqo_queries_load(); + } +} + +/* + * Main idea here is to store all ML data in temp files on postmaster shutdown. + */ +static void +on_shmem_shutdown(int code, Datum arg) +{ + aqo_stat_flush(); + aqo_queries_flush(); } Size @@ -199,6 +272,11 @@ aqo_memsize(void) size = MAXALIGN(sizeof(AQOSharedState)); size = add_size(size, hash_estimate_size(aqo_htab_max_items, sizeof(htab_entry))); + size = add_size(size, hash_estimate_size(fs_max_items, sizeof(AQOSharedState))); + size = add_size(size, hash_estimate_size(fs_max_items, sizeof(StatEntry))); + size = add_size(size, hash_estimate_size(fs_max_items, sizeof(QueryTextEntry))); + size = add_size(size, hash_estimate_size(fss_max_items, sizeof(DataEntry))); + size = add_size(size, hash_estimate_size(fs_max_items, sizeof(QueriesEntry))); return size; } diff --git a/aqo_shared.h b/aqo_shared.h index eb5323e0..61c0d3d0 100644 --- a/aqo_shared.h +++ b/aqo_shared.h @@ -5,6 +5,8 @@ #include "storage/dsm.h" #include "storage/ipc.h" #include "storage/lwlock.h" +#include "utils/dsa.h" +#include "lib/dshash.h" #define AQO_SHARED_MAGIC 0x053163 @@ -25,6 +27,22 @@ typedef struct AQOSharedState { LWLock lock; /* mutual exclusion */ dsm_handle dsm_handler; + + /* Storage fields */ + LWLock stat_lock; /* lock for access to stat storage */ + bool stat_changed; + + LWLock qtexts_lock; /* Lock for shared fields below */ + dsa_handle qtexts_dsa_handler; /* DSA area for storing of query texts */ + int qtext_trancheid; + bool qtexts_changed; + + LWLock data_lock; /* Lock for shared fields below */ + dsa_handle data_dsa_handler; + bool data_changed; + + LWLock queries_lock; /* lock for access to queries storage */ + bool queries_changed; } AQOSharedState; @@ -32,6 +50,8 @@ extern shmem_startup_hook_type prev_shmem_startup_hook; extern AQOSharedState *aqo_state; extern HTAB *fss_htab; +extern int fs_max_items; /* Max number of feature spaces that AQO can operate */ +extern int fss_max_items; extern Size aqo_memsize(void); extern void reset_dsm_cache(void); diff --git a/auto_tuning.c b/auto_tuning.c index fb7e1eed..7a15e516 100644 --- a/auto_tuning.c +++ b/auto_tuning.c @@ -18,6 +18,7 @@ #include "postgres.h" #include "aqo.h" +#include "storage.h" /* * Auto tuning criteria criteria of an query convergence by overall cardinality @@ -35,7 +36,7 @@ static bool is_in_infinite_loop_cq(double *elems, int nelems); /* * Returns mean value of the array of doubles. */ -double +static double get_mean(double *elems, int nelems) { double sum = 0; @@ -52,7 +53,7 @@ get_mean(double *elems, int nelems) * Having a time series it tries to predict its next value. * Now it do simple window averaging. */ -double +static double get_estimation(double *elems, int nelems) { int start; @@ -70,7 +71,7 @@ get_estimation(double *elems, int nelems) /* * Checks whether the series is stable with absolute or relative error. */ -bool +static bool is_stable(double *elems, int nelems) { double est, @@ -91,7 +92,7 @@ is_stable(double *elems, int nelems) * Now it checks whether the cardinality quality stopped decreasing with * absolute or relative error. */ -bool +static bool converged_cq(double *elems, int nelems) { if (nelems < auto_tuning_window_size + 2) @@ -107,7 +108,7 @@ converged_cq(double *elems, int nelems) * Now it checks whether the cardinality quality stopped decreasing with * absolute or relative error 0.1. */ -bool +static bool is_in_infinite_loop_cq(double *elems, int nelems) { if (nelems - auto_tuning_infinite_loop < auto_tuning_window_size + 2) @@ -144,22 +145,21 @@ is_in_infinite_loop_cq(double *elems, int nelems) * this query to false. */ void -automatical_query_tuning(uint64 qhash, QueryStat * stat) +automatical_query_tuning(uint64 queryid, StatEntry *stat) { - double unstability = auto_tuning_exploration; - double t_aqo, - t_not_aqo; - double p_use = -1; - int64 num_iterations; + double unstability = auto_tuning_exploration; + double t_aqo, + t_not_aqo; + double p_use = -1; + int64 num_iterations; - num_iterations = stat->executions_with_aqo + stat->executions_without_aqo; + num_iterations = stat->execs_with_aqo + stat->execs_without_aqo; query_context.learn_aqo = true; - if (stat->executions_without_aqo < auto_tuning_window_size + 1) + if (stat->execs_without_aqo < auto_tuning_window_size + 1) query_context.use_aqo = false; - else if (!converged_cq(stat->cardinality_error_with_aqo, - stat->cardinality_error_with_aqo_size) && - !is_in_infinite_loop_cq(stat->cardinality_error_with_aqo, - stat->cardinality_error_with_aqo_size)) + else if (!converged_cq(stat->est_error_aqo, stat->cur_stat_slot_aqo) && + !is_in_infinite_loop_cq(stat->est_error_aqo, + stat->cur_stat_slot_aqo)) query_context.use_aqo = true; else { @@ -168,15 +168,11 @@ automatical_query_tuning(uint64 qhash, QueryStat * stat) * by execution time. It is volatile, probabilistic part of code. * XXX: this logic of auto tuning may be reworked later. */ - t_aqo = get_estimation(stat->execution_time_with_aqo, - stat->execution_time_with_aqo_size) + - get_estimation(stat->planning_time_with_aqo, - stat->planning_time_with_aqo_size); + t_aqo = get_estimation(stat->exec_time_aqo, stat->cur_stat_slot_aqo) + + get_estimation(stat->plan_time_aqo, stat->cur_stat_slot_aqo); - t_not_aqo = get_estimation(stat->execution_time_without_aqo, - stat->execution_time_without_aqo_size) + - get_estimation(stat->planning_time_without_aqo, - stat->planning_time_without_aqo_size); + t_not_aqo = get_estimation(stat->exec_time, stat->cur_stat_slot) + + get_estimation(stat->plan_time, stat->cur_stat_slot); p_use = t_not_aqo / (t_not_aqo + t_aqo); @@ -199,15 +195,15 @@ automatical_query_tuning(uint64 qhash, QueryStat * stat) * If our decision is using AQO for this query class, then learn on new * queries of this type. Otherwise, turn off. */ - query_context.use_aqo = (random() / ((double) MAX_RANDOM_VALUE + 1)) < p_use; + query_context.use_aqo = + (random() / ((double) MAX_RANDOM_VALUE + 1)) < p_use; query_context.learn_aqo = query_context.use_aqo; } + if (num_iterations <= auto_tuning_max_iterations || p_use > 0.5) - update_query(qhash, - query_context.fspace_hash, - query_context.learn_aqo, - query_context.use_aqo, - true); + aqo_queries_store(queryid, query_context.fspace_hash, + query_context.learn_aqo, query_context.use_aqo, true); else - update_query(qhash, query_context.fspace_hash, false, false, false); + aqo_queries_store(queryid, + query_context.fspace_hash, false, false, false); } diff --git a/cardinality_estimation.c b/cardinality_estimation.c index 7740528a..4baba286 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -22,18 +22,19 @@ #include "aqo.h" #include "hash.h" #include "machine_learning.h" +#include "storage.h" #ifdef AQO_DEBUG_PRINT static void predict_debug_output(List *clauses, List *selectivities, - List *relnames, int fss, double result) + List *reloids, int fss, double result) { StringInfoData debug_str; ListCell *lc; initStringInfo(&debug_str); appendStringInfo(&debug_str, "fss: %d, clausesNum: %d, ", - fss_hash, list_length(clauses)); + fss, list_length(clauses)); appendStringInfoString(&debug_str, ", selectivities: { "); foreach(lc, selectivities) @@ -42,16 +43,15 @@ predict_debug_output(List *clauses, List *selectivities, appendStringInfo(&debug_str, "%lf ", *s); } - appendStringInfoString(&debug_str, "}, relnames: { "); - foreach(lc, relnames) + appendStringInfoString(&debug_str, "}, reloids: { "); + foreach(lc, reloids) { - Value *relname = lfirst_node(String, lc); - appendStringInfo(&debug_str, "%s ", valStr(relname)); + Oid relname = lfirst_oid(lc); + appendStringInfo(&debug_str, "%d ", relname); } appendStringInfo(&debug_str, "}, result: %lf", result); elog(DEBUG1, "Prediction: %s", debug_str.data); - pfree(debug_str.data); } #endif @@ -59,49 +59,50 @@ predict_debug_output(List *clauses, List *selectivities, * General method for prediction the cardinality of given relation. */ double -predict_for_relation(List *clauses, List *selectivities, - List *relnames, int *fss) +predict_for_relation(List *clauses, List *selectivities, List *relsigns, + int *fss) { double *features; double result; - int i; - OkNNrdata data; + int ncols; + OkNNrdata *data; - if (relnames == NIL) + if (relsigns == NIL) /* * Don't make prediction for query plans without any underlying plane * tables. Use return value -4 for debug purposes. */ return -4.; - *fss = get_fss_for_object(relnames, clauses, selectivities, - &data.cols, &features); + *fss = get_fss_for_object(relsigns, clauses, selectivities, + &ncols, &features); + data = OkNNr_allocate(ncols); - if (data.cols > 0) - for (i = 0; i < aqo_K; ++i) - data.matrix[i] = palloc0(sizeof(double) * data.cols); - - if (load_fss_ext(query_context.fspace_hash, *fss, &data, NULL, true)) - result = OkNNr_predict(&data, features); + if (load_fss_ext(query_context.fspace_hash, *fss, data, NULL, true)) + result = OkNNr_predict(data, features); else { /* * Due to planning optimizer tries to build many alternate paths. Many - * of these not used in final query execution path. Consequently, only - * small part of paths was used for AQO learning and fetch into the AQO - * knowledge base. + * of them aren't used in final query execution path. Consequently, only + * small part of paths was used for AQO learning and stored into + * the AQO knowledge base. */ - result = -1; + + /* Try to search in surrounding feature spaces for the same node */ + if (!load_aqo_data(query_context.fspace_hash, *fss, data, NULL, true)) + result = -1; + else + { + elog(DEBUG5, "[AQO] Make prediction for fss %d by a neighbour " + "includes %d feature(s) and %d fact(s).", + *fss, data->cols, data->rows); + result = OkNNr_predict(data, features); + } } #ifdef AQO_DEBUG_PRINT - predict_debug_output(clauses, selectivities, relnames, *fss, result); + predict_debug_output(clauses, selectivities, relsigns, *fss, result); #endif - pfree(features); - if (data.cols > 0) - { - for (i = 0; i < aqo_K; ++i) - pfree(data.matrix[i]); - } if (result < 0) return -1; diff --git a/cardinality_hooks.c b/cardinality_hooks.c index aa198cc9..fe55b992 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -117,7 +117,7 @@ default_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, static double default_estimate_num_groups(PlannerInfo *root, List *groupExprs, Path *subpath, RelOptInfo *grouped_rel, - List **pgset, EstimationInfo *estinfo) + List **pgset) { double input_rows = subpath->rows; @@ -125,9 +125,9 @@ default_estimate_num_groups(PlannerInfo *root, List *groupExprs, return (*prev_estimate_num_groups_hook)(root, groupExprs, subpath, grouped_rel, - pgset, estinfo); + pgset); else - return estimate_num_groups(root, groupExprs, input_rows, pgset, estinfo); + return estimate_num_groups(root, groupExprs, input_rows, pgset, NULL); } /* @@ -138,26 +138,27 @@ default_estimate_num_groups(PlannerInfo *root, List *groupExprs, void aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) { - double predicted; - RangeTblEntry *rte; - List *relnames = NIL; - List *selectivities = NULL; - List *clauses; - int fss = 0; + double predicted; + RangeTblEntry *rte; + RelSortOut rels = {NIL, NIL}; + List *selectivities = NULL; + List *clauses; + int fss = 0; + MemoryContext old_ctx_m; if (IsQueryDisabled()) /* Fast path. */ goto default_estimator; + old_ctx_m = MemoryContextSwitchTo(AQOPredictMemCtx); + if (query_context.use_aqo || query_context.learn_aqo) selectivities = get_selectivities(root, rel->baserestrictinfo, 0, JOIN_INNER, NULL); if (!query_context.use_aqo) { - if (query_context.learn_aqo) - list_free_deep(selectivities); - + MemoryContextSwitchTo(old_ctx_m); goto default_estimator; } @@ -166,16 +167,16 @@ aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) { /* Predict for a plane table. */ Assert(rte->eref && rte->eref->aliasname); - relnames = list_make1(makeString(pstrdup(rte->eref->aliasname))); + get_list_of_relids(root, rel->relids, &rels); } clauses = aqo_get_clauses(root, rel->baserestrictinfo); - predicted = predict_for_relation(clauses, selectivities, relnames, &fss); + predicted = predict_for_relation(clauses, selectivities, rels.signatures, + &fss); rel->fss_hash = fss; - list_free_deep(selectivities); - list_free(clauses); - list_free(relnames); + /* Return to the caller's memory context. */ + MemoryContextSwitchTo(old_ctx_m); if (predicted >= 0) { @@ -212,7 +213,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, { double predicted; RangeTblEntry *rte = NULL; - List *relnames = NIL; + RelSortOut rels = {NIL, NIL}; List *allclauses = NULL; List *selectivities = NULL; ListCell *l; @@ -222,14 +223,16 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, int *eclass_hash; int current_hash; int fss = 0; + MemoryContext oldctx; if (IsQueryDisabled()) /* Fast path */ goto default_estimator; + oldctx = MemoryContextSwitchTo(AQOPredictMemCtx); + if (query_context.use_aqo || query_context.learn_aqo) { - MemoryContext old_ctx_m; selectivities = list_concat( get_selectivities(root, param_clauses, rel->relid, @@ -241,11 +244,10 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, /* Make specific copy of clauses with mutated subplans */ allclauses = list_concat(aqo_get_clauses(root, param_clauses), aqo_get_clauses(root, rel->baserestrictinfo)); + rte = planner_rt_fetch(rel->relid, root); get_eclasses(allclauses, &nargs, &args_hash, &eclass_hash); - old_ctx_m = MemoryContextSwitchTo(AQO_cache_mem_ctx); - forboth(l, allclauses, l2, selectivities) { current_hash = get_clause_hash( @@ -254,19 +256,11 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, cache_selectivity(current_hash, rel->relid, rte->relid, *((double *) lfirst(l2))); } - - MemoryContextSwitchTo(old_ctx_m); - pfree(args_hash); - pfree(eclass_hash); } if (!query_context.use_aqo) { - if (query_context.learn_aqo) - { - list_free_deep(selectivities); - list_free(allclauses); - } + MemoryContextSwitchTo(oldctx); goto default_estimator; } @@ -275,10 +269,13 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, { /* Predict for a plane table. */ Assert(rte->eref && rte->eref->aliasname); - relnames = list_make1(makeString(pstrdup(rte->eref->aliasname))); + get_list_of_relids(root, rel->relids, &rels); } - predicted = predict_for_relation(allclauses, selectivities, relnames, &fss); + predicted = predict_for_relation(allclauses, selectivities, rels.signatures, &fss); + + /* Return to the caller's memory context */ + MemoryContextSwitchTo(oldctx); predicted_ppi_rows = predicted; fss_ppi_hash = fss; @@ -303,7 +300,7 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, List *restrictlist) { double predicted; - List *relnames; + RelSortOut rels = {NIL, NIL}; List *outer_clauses; List *inner_clauses; List *allclauses; @@ -312,24 +309,24 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, List *outer_selectivities; List *current_selectivities = NULL; int fss = 0; + MemoryContext old_ctx_m; if (IsQueryDisabled()) /* Fast path */ goto default_estimator; + old_ctx_m = MemoryContextSwitchTo(AQOPredictMemCtx); + if (query_context.use_aqo || query_context.learn_aqo) current_selectivities = get_selectivities(root, restrictlist, 0, sjinfo->jointype, sjinfo); - if (!query_context.use_aqo) { - if (query_context.learn_aqo) - list_free_deep(current_selectivities); - + MemoryContextSwitchTo(old_ctx_m); goto default_estimator; } - relnames = get_relnames(root, rel->relids); + get_list_of_relids(root, rel->relids, &rels); outer_clauses = get_path_clauses(outer_rel->cheapest_total_path, root, &outer_selectivities); inner_clauses = get_path_clauses(inner_rel->cheapest_total_path, root, @@ -340,7 +337,12 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, list_concat(outer_selectivities, inner_selectivities)); - predicted = predict_for_relation(allclauses, selectivities, relnames, &fss); + predicted = predict_for_relation(allclauses, selectivities, rels.signatures, + &fss); + + /* Return to the caller's memory context */ + MemoryContextSwitchTo(old_ctx_m); + rel->fss_hash = fss; if (predicted >= 0) @@ -371,7 +373,7 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, List *clauses) { double predicted; - List *relnames; + RelSortOut rels = {NIL, NIL}; List *outer_clauses; List *inner_clauses; List *allclauses; @@ -380,24 +382,25 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, List *outer_selectivities; List *current_selectivities = NULL; int fss = 0; + MemoryContext old_ctx_m; if (IsQueryDisabled()) /* Fast path */ goto default_estimator; + old_ctx_m = MemoryContextSwitchTo(AQOPredictMemCtx); + if (query_context.use_aqo || query_context.learn_aqo) current_selectivities = get_selectivities(root, clauses, 0, sjinfo->jointype, sjinfo); if (!query_context.use_aqo) { - if (query_context.learn_aqo) - list_free_deep(current_selectivities); - + MemoryContextSwitchTo(old_ctx_m); goto default_estimator; } - relnames = get_relnames(root, rel->relids); + get_list_of_relids(root, rel->relids, &rels); outer_clauses = get_path_clauses(outer_path, root, &outer_selectivities); inner_clauses = get_path_clauses(inner_path, root, &inner_selectivities); allclauses = list_concat(aqo_get_clauses(root, clauses), @@ -406,7 +409,10 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, list_concat(outer_selectivities, inner_selectivities)); - predicted = predict_for_relation(allclauses, selectivities, relnames, &fss); + predicted = predict_for_relation(allclauses, selectivities, rels.signatures, + &fss); + /* Return to the caller's memory context */ + MemoryContextSwitchTo(old_ctx_m); predicted_ppi_rows = predicted; fss_ppi_hash = fss; @@ -433,13 +439,14 @@ predict_num_groups(PlannerInfo *root, Path *subpath, List *group_exprs, child_fss = subpath->parent->fss_hash; else { - List *relnames; - List *clauses; - List *selectivities = NIL; + RelSortOut rels = {NIL, NIL}; + List *clauses; + List *selectivities = NIL; - relnames = get_relnames(root, subpath->parent->relids); + get_list_of_relids(root, subpath->parent->relids, &rels); clauses = get_path_clauses(subpath, root, &selectivities); - (void) predict_for_relation(clauses, selectivities, relnames, &child_fss); + (void) predict_for_relation(clauses, selectivities, rels.signatures, + &child_fss); } *fss = get_grouped_exprs_hash(child_fss, group_exprs); @@ -456,10 +463,11 @@ predict_num_groups(PlannerInfo *root, Path *subpath, List *group_exprs, double aqo_estimate_num_groups_hook(PlannerInfo *root, List *groupExprs, Path *subpath, RelOptInfo *grouped_rel, - List **pgset, EstimationInfo *estinfo) + List **pgset) { int fss; double predicted; + MemoryContext old_ctx_m; if (!query_context.use_aqo) goto default_estimator; @@ -471,19 +479,18 @@ aqo_estimate_num_groups_hook(PlannerInfo *root, List *groupExprs, if (prev_estimate_num_groups_hook != NULL) elog(WARNING, "AQO replaced another estimator of a groups number"); - /* Zero the estinfo output parameter, if non-NULL */ - if (estinfo != NULL) - memset(estinfo, 0, sizeof(EstimationInfo)); - if (groupExprs == NIL) return 1.0; + old_ctx_m = MemoryContextSwitchTo(AQOPredictMemCtx); + predicted = predict_num_groups(root, subpath, groupExprs, &fss); if (predicted > 0.) { grouped_rel->predicted_cardinality = predicted; grouped_rel->rows = predicted; grouped_rel->fss_hash = fss; + MemoryContextSwitchTo(old_ctx_m); return predicted; } else @@ -493,7 +500,9 @@ aqo_estimate_num_groups_hook(PlannerInfo *root, List *groupExprs, */ grouped_rel->predicted_cardinality = -1; + MemoryContextSwitchTo(old_ctx_m); + default_estimator: return default_estimate_num_groups(root, groupExprs, subpath, grouped_rel, - pgset, estinfo); + pgset); } diff --git a/cardinality_hooks.h b/cardinality_hooks.h index c34f9315..0e8c65c0 100644 --- a/cardinality_hooks.h +++ b/cardinality_hooks.h @@ -26,7 +26,6 @@ extern double aqo_get_parameterized_joinrel_size(PlannerInfo *root, extern double aqo_estimate_num_groups_hook(PlannerInfo *root, List *groupExprs, Path *subpath, RelOptInfo *grouped_rel, - List **pgset, - EstimationInfo *estinfo); + List **pgset); #endif /* CARDINALITY_HOOKS_H */ diff --git a/conf.add b/conf.add index 42c0a01d..ed455870 100644 --- a/conf.add +++ b/conf.add @@ -1,3 +1,3 @@ autovacuum = off shared_preload_libraries = 'postgres_fdw, aqo' -max_parallel_workers_per_gather = 1 # switch off parallel workers because of unsteadiness \ No newline at end of file +max_parallel_workers_per_gather = 1 # switch off parallel workers because of unsteadiness diff --git a/expected/aqo_CVE-2020-14350.out b/expected/aqo_CVE-2020-14350.out index 46a74be3..ccdc4694 100644 --- a/expected/aqo_CVE-2020-14350.out +++ b/expected/aqo_CVE-2020-14350.out @@ -2,10 +2,6 @@ * Check fix for CVE-2020-14350. * See also 7eeb1d986 postgresql commit. */ --- Switch off parallel workers because of unsteadiness. --- Do this in each aqo test separately, so that server regression tests pass --- with aqo's temporary configuration file loaded. -SET max_parallel_workers TO 0; SET client_min_messages = 'warning'; DROP ROLE IF EXISTS regress_hacker; SET client_min_messages = 'notice'; @@ -31,6 +27,7 @@ END $$ LANGUAGE plpgsql RETURNS NULL ON NULL INPUT; RESET ROLE; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; -- Test result (must be 'off') SET ROLE regress_hacker; SHOW is_superuser; diff --git a/expected/aqo_controlled.out b/expected/aqo_controlled.out index 38f96f63..cf88bf42 100644 --- a/expected/aqo_controlled.out +++ b/expected/aqo_controlled.out @@ -1,7 +1,3 @@ --- Switch off parallel workers because of unsteadiness. --- Do this in each aqo test separately, so that server regression tests pass --- with aqo's temporary configuration file loaded. -SET max_parallel_workers TO 0; CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -30,6 +26,7 @@ AS ( CREATE INDEX aqo_test2_idx_a ON aqo_test2 (a); ANALYZE aqo_test2; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'controlled'; EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 @@ -110,9 +107,15 @@ SELECT count(*) FROM tmp1; DROP TABLE tmp1; SET aqo.mode = 'controlled'; -UPDATE aqo_queries SET auto_tuning=false; -UPDATE aqo_queries SET learn_aqo=true; -UPDATE aqo_queries SET use_aqo=false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, true, false, false) +; -- learn = true, use = false, tuning = false + count +------- + 12 +(1 row) + EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -194,7 +197,15 @@ WHERE t1.a = t2.b AND t2.a = t3.b; -> Seq Scan on aqo_test1 t3 (9 rows) -UPDATE aqo_queries SET use_aqo=true; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, NULL, true, NULL) +; -- set use = true + count +------- + 12 +(1 row) + EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -300,4 +311,11 @@ DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; DROP INDEX aqo_test2_idx_a; DROP TABLE aqo_test2; +-- XXX: extension dropping doesn't clear file storage. Do it manually. +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 +(1 row) + DROP EXTENSION aqo; diff --git a/expected/aqo_disabled.out b/expected/aqo_disabled.out index aeab0161..606d258e 100644 --- a/expected/aqo_disabled.out +++ b/expected/aqo_disabled.out @@ -1,7 +1,3 @@ --- Switch off parallel workers because of unsteadiness. --- Do this in each aqo test separately, so that server regression tests pass --- with aqo's temporary configuration file loaded. -SET max_parallel_workers TO 0; CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -20,6 +16,60 @@ AS ( ) INSERT INTO aqo_test1 (SELECT * FROM t); CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; +CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; +SET aqo.mode = 'controlled'; +CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 +WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; +SELECT count(*) FROM tmp1; + count +------- + 3 +(1 row) + +DROP TABLE tmp1; +CREATE TABLE tmp1 AS SELECT t1.a, t2.b, t3.c +FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 +WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; +SELECT count(*) FROM tmp1; + count +------- + 0 +(1 row) + +DROP TABLE tmp1; +EXPLAIN SELECT * FROM aqo_test0 +WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; + QUERY PLAN +---------------------------------------------------------------------------------- + Index Scan using aqo_test0_idx_a on aqo_test0 (cost=0.28..8.35 rows=1 width=16) + Index Cond: (a < 3) + Filter: ((b < 3) AND (c < 3) AND (d < 3)) +(3 rows) + +EXPLAIN SELECT t1.a, t2.b, t3.c +FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 +WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; + QUERY PLAN +------------------------------------------------------------------------------------------------ + Nested Loop (cost=0.28..50.59 rows=1 width=12) + Join Filter: (t1.b = t3.b) + -> Nested Loop (cost=0.28..9.56 rows=1 width=12) + -> Seq Scan on aqo_test1 t1 (cost=0.00..1.25 rows=1 width=8) + Filter: (a < 1) + -> Index Scan using aqo_test0_idx_a on aqo_test0 t2 (cost=0.28..8.30 rows=1 width=8) + Index Cond: (a = t1.a) + Filter: (c < 1) + -> Seq Scan on aqo_test0 t3 (cost=0.00..41.02 rows=1 width=8) + Filter: ((b < 1) AND (d < 0)) +(10 rows) + +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero + count +------- + 0 +(1 row) + SET aqo.mode = 'disabled'; CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -66,7 +116,12 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) -CREATE EXTENSION aqo; +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero + count +------- + 0 +(1 row) + SET aqo.mode = 'intelligent'; CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -87,8 +142,22 @@ SELECT count(*) FROM tmp1; (1 row) DROP TABLE tmp1; +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero + count +------- + 0 +(1 row) + SET aqo.mode = 'controlled'; -UPDATE aqo_queries SET learn_aqo = true, use_aqo = true, auto_tuning = false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, true, true, false) +; -- Enable all disabled query classes + count +------- + 5 +(1 row) + EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; QUERY PLAN @@ -115,6 +184,12 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero + count +------- + 0 +(1 row) + SET aqo.mode = 'disabled'; EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -142,6 +217,19 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero + count +------- + 0 +(1 row) + +-- XXX: extension dropping doesn't clear file storage. Do it manually. +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 +(1 row) + DROP EXTENSION aqo; DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out index 2bb43cea..36af3bd6 100644 --- a/expected/aqo_fdw.out +++ b/expected/aqo_fdw.out @@ -3,15 +3,12 @@ -- JOIN push-down (check push of baserestrictinfo and joininfo) -- Aggregate push-down -- Push-down of groupings with HAVING clause. --- Switch off parallel workers because of unsteadiness. --- Do this in each aqo test separately, so that server regression tests pass --- with aqo's temporary configuration file loaded. -SET max_parallel_workers TO 0; CREATE EXTENSION aqo; CREATE EXTENSION postgres_fdw; SET aqo.mode = 'learn'; SET aqo.show_details = 'true'; -- show AQO info for each node and entire query. SET aqo.show_hash = 'false'; -- a hash value is system-depended. Ignore it. +SET aqo.join_threshold = 0; DO $d$ BEGIN EXECUTE $$CREATE SERVER loopback FOREIGN DATA WRAPPER postgres_fdw @@ -88,35 +85,26 @@ SELECT x FROM frgn WHERE x < -10; -- AQO ignores constants -- Trivial JOIN push-down. SELECT str FROM expln(' -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) -SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; ') AS str WHERE str NOT LIKE '%Sort Method%'; - str ------------------------------------------------------------- - Merge Join (actual rows=1 loops=1) + str +------------------------------------------- + Foreign Scan (actual rows=1 loops=1) AQO not used - Merge Cond: (a.x = b.x) - -> Sort (actual rows=1 loops=1) - AQO not used - Sort Key: a.x - -> Foreign Scan on frgn a (actual rows=1 loops=1) - AQO not used - -> Sort (actual rows=1 loops=1) - AQO not used - Sort Key: b.x - -> Foreign Scan on frgn b (actual rows=1 loops=1) - AQO not used + Relations: (frgn a) INNER JOIN (frgn b) Using aqo: true AQO mode: LEARN JOINS: 0 -(16 rows) +(6 rows) +-- TODO: Should learn on postgres_fdw nodes EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) -SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; + SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; QUERY PLAN -------------------------------------------------------------------------------------------------------- Foreign Scan (actual rows=1 loops=1) - AQO: rows=1, error=0% + AQO not used Output: a.x, b.x Relations: (public.frgn a) INNER JOIN (public.frgn b) Remote SQL: SELECT r1.x, r2.x FROM (public.local r1 INNER JOIN public.local r2 ON (((r1.x = r2.x)))) diff --git a/expected/aqo_forced.out b/expected/aqo_forced.out index e6940227..091ead32 100644 --- a/expected/aqo_forced.out +++ b/expected/aqo_forced.out @@ -1,7 +1,3 @@ --- Switch off parallel workers because of unsteadiness. --- Do this in each aqo test separately, so that server regression tests pass --- with aqo's temporary configuration file loaded. -SET max_parallel_workers TO 0; CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -21,6 +17,7 @@ AS ( CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'controlled'; EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 @@ -85,4 +82,11 @@ DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; +-- XXX: extension dropping doesn't clear file storage. Do it manually. +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 +(1 row) + DROP EXTENSION aqo; diff --git a/expected/aqo_intelligent.out b/expected/aqo_intelligent.out index 7e53b355..7ec943f5 100644 --- a/expected/aqo_intelligent.out +++ b/expected/aqo_intelligent.out @@ -1,7 +1,3 @@ --- Switch off parallel workers because of unsteadiness. --- Do this in each aqo test separately, so that server regression tests pass --- with aqo's temporary configuration file loaded. -SET max_parallel_workers TO 0; CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -21,6 +17,7 @@ AS ( CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'intelligent'; EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -292,7 +289,15 @@ SELECT count(*) FROM tmp1; DROP TABLE tmp1; SET aqo.mode = 'controlled'; -UPDATE aqo_queries SET learn_aqo = false, use_aqo = false, auto_tuning = false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, false, false, false) +; -- Disable all query classes + count +------- + 25 +(1 row) + EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; QUERY PLAN @@ -397,7 +402,15 @@ WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; -> Seq Scan on aqo_test1 t4 (cost=0.00..1.20 rows=20 width=8) (13 rows) -UPDATE aqo_queries SET learn_aqo = false, use_aqo = true, auto_tuning = false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, false, true, false) +; -- set learn = false, use = true, tuning = false + count +------- + 25 +(1 row) + EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; QUERY PLAN @@ -506,4 +519,11 @@ DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; +-- XXX: extension dropping doesn't clear file storage. Do it manually. +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 +(1 row) + DROP EXTENSION aqo; diff --git a/expected/aqo_learn.out b/expected/aqo_learn.out index df1c66ff..aed72fc9 100644 --- a/expected/aqo_learn.out +++ b/expected/aqo_learn.out @@ -1,7 +1,23 @@ --- Switch off parallel workers because of unsteadiness. --- Do this in each aqo test separately, so that server regression tests pass --- with aqo's temporary configuration file loaded. -SET max_parallel_workers TO 0; +-- The function just copied from stats_ext.sql +create function check_estimated_rows(text) returns table (estimated int, actual int) +language plpgsql as +$$ +declare + ln text; + tmp text[]; + first_row bool := true; +begin + for ln in + execute format('explain analyze %s', $1) + loop + if first_row then + first_row := false; + tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)'); + return query select tmp[1]::int, tmp[2]::int; + end if; + end loop; +end; +$$; CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -21,6 +37,7 @@ AS ( CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'intelligent'; EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -218,9 +235,57 @@ SELECT count(*) FROM tmp1; 17 (1 row) +-- Remove data on some unneeded instances of tmp1 table. +SELECT * FROM aqo_cleanup(); + nfs | nfss +-----+------ + 9 | 18 +(1 row) + +-- Result of the query below should be empty +SELECT * FROM aqo_query_texts aqt1, aqo_query_texts aqt2 +WHERE aqt1.query_text = aqt2.query_text AND aqt1.queryid <> aqt2.queryid; + queryid | query_text | queryid | query_text +---------+------------+---------+------------ +(0 rows) + +-- Fix the state of the AQO data +SELECT min(reliability),sum(nfeatures),query_text +FROM aqo_data ad, aqo_query_texts aqt +WHERE aqt.queryid = ad.fs +GROUP BY (query_text) ORDER BY (md5(query_text)) +; + min | sum | query_text +---------+-----+---------------------------------------------------------------------------------------- + {1} | 10 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test1 AS t1, aqo_test0 AS t2 + + | | WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; + {1,1,1} | 4 | EXPLAIN SELECT * FROM aqo_test0 + + | | WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; + {1} | 0 | SELECT count(*) FROM tmp1; + {1} | 14 | EXPlAIN SELECT t1.a, t2.b, t3.c + + | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + + | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; + {1} | 8 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 + + | | WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; + {1} | 3 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c + + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + + | | WHERE t1.a = t2.b AND t2.a = t3.b; + {1} | 6 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + + | | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; +(7 rows) + DROP TABLE tmp1; SET aqo.mode = 'controlled'; -UPDATE aqo_queries SET learn_aqo = false, use_aqo = false, auto_tuning = false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, false, false, false) +; -- Disable all AQO query classes + count +------- + 8 +(1 row) + EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; QUERY PLAN @@ -272,7 +337,7 @@ WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; Filter: ((b < 1) AND (c < 1) AND (d < 1)) (6 rows) -EXPlAIN SELECT t1.a, t2.b, t3.c +EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; QUERY PLAN @@ -289,21 +354,15 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) -EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c -FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 -WHERE t1.a = t2.b AND t2.a = t3.b; - QUERY PLAN -------------------------------------------------------------------------------- - Hash Join (cost=2.90..4.65 rows=20 width=12) - Hash Cond: (t2.a = t3.b) - -> Hash Join (cost=1.45..2.92 rows=20 width=8) - Hash Cond: (t1.a = t2.b) - -> Seq Scan on aqo_test1 t1 (cost=0.00..1.20 rows=20 width=4) - -> Hash (cost=1.20..1.20 rows=20 width=8) - -> Seq Scan on aqo_test1 t2 (cost=0.00..1.20 rows=20 width=8) - -> Hash (cost=1.20..1.20 rows=20 width=8) - -> Seq Scan on aqo_test1 t3 (cost=0.00..1.20 rows=20 width=8) -(9 rows) +SELECT * FROM check_estimated_rows(' + SELECT t1.a AS a, t2.a AS b, t3.a AS c + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + WHERE t1.a = t2.b AND t2.a = t3.b; +'); + estimated | actual +-----------+-------- + 20 | 18 +(1 row) EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 @@ -325,7 +384,15 @@ WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; -> Seq Scan on aqo_test1 t4 (cost=0.00..1.20 rows=20 width=8) (13 rows) -UPDATE aqo_queries SET learn_aqo = false, use_aqo = true, auto_tuning = false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, false, true, false) +; -- learn = false, use = true, tuning = false + count +------- + 8 +(1 row) + EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; QUERY PLAN @@ -377,7 +444,7 @@ WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; Filter: ((b < 1) AND (c < 1) AND (d < 1)) (6 rows) -EXPlAIN SELECT t1.a, t2.b, t3.c +EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; QUERY PLAN @@ -394,44 +461,266 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) -EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c -FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 -WHERE t1.a = t2.b AND t2.a = t3.b; - QUERY PLAN -------------------------------------------------------------------------------- - Hash Join (cost=2.90..4.64 rows=18 width=12) - Hash Cond: (t2.a = t3.b) - -> Hash Join (cost=1.45..2.92 rows=19 width=8) - Hash Cond: (t1.a = t2.b) - -> Seq Scan on aqo_test1 t1 (cost=0.00..1.20 rows=20 width=4) - -> Hash (cost=1.20..1.20 rows=20 width=8) - -> Seq Scan on aqo_test1 t2 (cost=0.00..1.20 rows=20 width=8) - -> Hash (cost=1.20..1.20 rows=20 width=8) - -> Seq Scan on aqo_test1 t3 (cost=0.00..1.20 rows=20 width=8) -(9 rows) +SELECT * FROM check_estimated_rows(' + SELECT t1.a AS a, t2.a AS b, t3.a AS c + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + WHERE t1.a = t2.b AND t2.a = t3.b; +'); + estimated | actual +-----------+-------- + 18 | 18 +(1 row) -EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d -FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 -WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; - QUERY PLAN -------------------------------------------------------------------------------------- - Hash Join (cost=4.35..6.33 rows=17 width=16) - Hash Cond: (t3.a = t4.b) - -> Hash Join (cost=2.90..4.64 rows=18 width=12) - Hash Cond: (t2.a = t3.b) - -> Hash Join (cost=1.45..2.92 rows=19 width=8) - Hash Cond: (t1.a = t2.b) - -> Seq Scan on aqo_test1 t1 (cost=0.00..1.20 rows=20 width=4) - -> Hash (cost=1.20..1.20 rows=20 width=8) - -> Seq Scan on aqo_test1 t2 (cost=0.00..1.20 rows=20 width=8) - -> Hash (cost=1.20..1.20 rows=20 width=8) - -> Seq Scan on aqo_test1 t3 (cost=0.00..1.20 rows=20 width=8) - -> Hash (cost=1.20..1.20 rows=20 width=8) - -> Seq Scan on aqo_test1 t4 (cost=0.00..1.20 rows=20 width=8) -(13 rows) +SELECT * FROM check_estimated_rows(' + SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; +'); + estimated | actual +-----------+-------- + 17 | 17 +(1 row) + +-- Test limit on number of joins +SET aqo.mode = 'learn'; +SELECT * FROM aqo_drop_class(0); +ERROR: [AQO] Cannot remove basic class 0. +SELECT * FROM aqo_drop_class(42); +ERROR: [AQO] Nothing to remove for the class 42. +-- Remove all data from ML knowledge base +SELECT count(*) FROM ( +SELECT aqo_drop_class(q1.id::bigint) FROM ( + SELECT queryid AS id + FROM aqo_queries WHERE queryid <> 0) AS q1 +) AS q2; + count +------- + 7 +(1 row) + +SELECT count(*) FROM aqo_data; + count +------- + 0 +(1 row) + +SET aqo.join_threshold = 3; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); + estimated | actual +-----------+-------- + 20 | 20 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); + estimated | actual +-----------+-------- + 20 | 19 +(1 row) + +SELECT count(*) FROM aqo_data; -- Return 0 - do not learn on the queries above + count +------- + 0 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT * + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; +'); -- Learn on the query + estimated | actual +-----------+-------- + 20 | 17 +(1 row) + +SELECT count(*) FROM + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 +; + count +------- + 1 +(1 row) + +SELECT query_text FROM aqo_query_texts WHERE queryid <> 0; -- Check query + query_text +---------------------------------------------------------------------------- + explain analyze + + SELECT * + + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4+ + WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; + + +(1 row) + +SET aqo.join_threshold = 1; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); + estimated | actual +-----------+-------- + 20 | 20 +(1 row) + +SELECT * FROM check_estimated_rows( + 'SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); + estimated | actual +-----------+-------- + 19 | 19 +(1 row) + +SELECT count(*) FROM + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 +; -- Learn on a query with one join + count +------- + 2 +(1 row) + +SET aqo.join_threshold = 0; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); + estimated | actual +-----------+-------- + 20 | 20 +(1 row) + +SELECT count(*) FROM + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 +; -- Learn on the query without any joins now + count +------- + 3 +(1 row) + +SET aqo.join_threshold = 1; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1 t1 JOIN aqo_test1 AS t2 USING (a)'); + estimated | actual +-----------+-------- + 20 | 20 +(1 row) + +SELECT count(*) FROM + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 +; -- See one more query in the AQO knowledge base + count +------- + 4 +(1 row) + +SELECT * FROM check_estimated_rows('WITH selected AS (SELECT * FROM aqo_test1 t1) SELECT count(*) FROM selected'); + estimated | actual +-----------+-------- + 1 | 1 +(1 row) + +SELECT * FROM check_estimated_rows(' + WITH selected AS ( + SELECT * FROM aqo_test1 t1 JOIN aqo_test1 AS t2 USING (a) + ) SELECT count(*) FROM selected') +; + estimated | actual +-----------+-------- + 1 | 1 +(1 row) +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 + count +------- + 5 +(1 row) + +-- InitPlan +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1 WHERE t1.a IN ( + SELECT t2.a FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a) + )'); + estimated | actual +-----------+-------- + 20 | 19 +(1 row) + +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 + count +------- + 6 +(1 row) + +-- SubPlan +SELECT * FROM check_estimated_rows(' + SELECT ( + SELECT avg(t2.a) FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a) AND (t2.a = t1.a) + ) FROM aqo_test1 AS t1; +'); + estimated | actual +-----------+-------- + 20 | 20 +(1 row) + +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 + count +------- + 7 +(1 row) + +-- Subquery +SET aqo.join_threshold = 3; +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1, + (SELECT t2.a FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a)) q1 + WHERE q1.a*t1.a = t1.a + 15; +'); -- Two JOINs, ignore it + estimated | actual +-----------+-------- + 2 | 4 +(1 row) + +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 + count +------- + 7 +(1 row) + +SET aqo.join_threshold = 2; +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1, + (SELECT t2.a FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a)) q1 + WHERE q1.a*t1.a = t1.a + 15; +'); -- One JOIN from subquery, another one from the query + estimated | actual +-----------+-------- + 2 | 4 +(1 row) + +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 + count +------- + 8 +(1 row) + +SELECT * FROM check_estimated_rows(' + WITH selected AS ( + SELECT t2.a FROM aqo_test1 t1 JOIN aqo_test1 AS t2 USING (a) + ) SELECT count(*) FROM aqo_test1 t3, selected WHERE selected.a = t3.a') +; -- One JOIN extracted from CTE, another - from a FROM part of the query + estimated | actual +-----------+-------- + 1 | 1 +(1 row) + +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 + count +------- + 9 +(1 row) + +DROP FUNCTION check_estimated_rows; +RESET aqo.join_threshold; DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; +-- XXX: extension dropping doesn't clear file storage. Do it manually. +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 +(1 row) + DROP EXTENSION aqo; diff --git a/expected/clean_aqo_data.out b/expected/clean_aqo_data.out index f9288b85..a954bac3 100644 --- a/expected/clean_aqo_data.out +++ b/expected/clean_aqo_data.out @@ -1,8 +1,5 @@ --- Switch off parallel workers because of unsteadiness. --- Do this in each aqo test separately, so that server regression tests pass --- with aqo's temporary configuration file loaded. -SET max_parallel_workers TO 0; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; DROP TABLE IF EXISTS a; NOTICE: table "a" does not exist, skipping @@ -13,119 +10,95 @@ SELECT * FROM a; -- (0 rows) -SELECT clean_aqo_data(); -NOTICE: Cleaning aqo_data records - clean_aqo_data ----------------- - +SELECT 'a'::regclass::oid AS a_oid \gset +SELECT true FROM aqo_cleanup(); + bool +------ + t (1 row) /* * lines with a_oid in aqo_data, - * lines with fspace_hash corresponding to a_oid in aqo_queries, - * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_texts, - * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat + * lines with fs corresponding to a_oid in aqo_queries, + * lines with queryid corresponding to a_oid's fs in aqo_query_texts, + * lines with queryid corresponding to a_oid's fs in aqo_query_stat * should remain */ -SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); count ------- 1 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); count ------- 1 (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- 1 (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- 1 (1 row) DROP TABLE a; -SELECT clean_aqo_data(); -NOTICE: Cleaning aqo_data records - clean_aqo_data ----------------- - +SELECT true FROM aqo_cleanup(); + bool +------ + t (1 row) /* * lines with a_oid in aqo_data, - * lines with a_oid's fspace_hash EQUAL TO query_hash in aqo_queries, - * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_texts, - * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat, + * lines with a_oid's fs EQUAL TO queryid in aqo_queries, + * lines with queryid corresponding to a_oid's fs in aqo_query_texts, + * lines with queryid corresponding to a_oid's fs in aqo_query_stat, * should be deleted */ -SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); count ------- 0 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash; + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; count ------- 0 (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); count ------- 0 (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); count ------- 0 (1 row) -CREATE TABLE a(); -SELECT * FROM a; --- -(0 rows) - -SELECT 'a'::regclass::oid AS a_oid \gset --- add manually line with different fspace_hash and query_hash to aqo_queries -INSERT INTO aqo_queries VALUES (:a_oid + 1, 't', 't', :a_oid, 'f'); -DROP TABLE a; -SELECT clean_aqo_data(); -NOTICE: Cleaning aqo_data records - clean_aqo_data ----------------- - -(1 row) - --- this line should remain -SELECT count(*) FROM aqo_queries WHERE (fspace_hash = :a_oid AND query_hash = :a_oid + 1); - count -------- - 1 -(1 row) - CREATE TABLE a(); CREATE TABLE b(); SELECT * FROM a; @@ -140,182 +113,180 @@ SELECT * FROM b CROSS JOIN a; -- (0 rows) --- SELECT 'a'::regclass::oid AS a_oid \gset --- SELECT 'b'::regclass::oid AS b_oid \gset +SELECT 'a'::regclass::oid AS a_oid \gset +SELECT 'b'::regclass::oid AS b_oid \gset -- new lines added to aqo_data -SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); count ------- - 3 + 2 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); count ------- 2 (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- 2 (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- 2 (1 row) -SELECT count(*) FROM aqo_data WHERE 'b' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); count ------- 3 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)); count ------- 2 (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids))); + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); count ------- 2 (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids))); + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); count ------- 2 (1 row) DROP TABLE a; -SELECT clean_aqo_data(); -NOTICE: Cleaning aqo_data records - clean_aqo_data ----------------- - +SELECT true FROM aqo_cleanup(); + bool +------ + t (1 row) /* - * lines corresponding to a_oid and both a_oid's fspace_hash deleted in aqo_data, - * lines with fspace_hash corresponding to a_oid deleted in aqo_queries, - * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_texts, - * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_stat, + * lines corresponding to a_oid and both a_oid's fs deleted in aqo_data, + * lines with fs corresponding to a_oid deleted in aqo_queries, + * lines with queryid corresponding to a_oid's fs deleted in aqo_query_texts, + * lines with queryid corresponding to a_oid's fs deleted in aqo_query_stat, */ -SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); count ------- 0 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash; + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; count ------- 0 (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); count ------- 0 (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); count ------- 0 (1 row) -- lines corresponding to b_oid in all theese tables should remain -SELECT count(*) FROM aqo_data WHERE 'b' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); count ------- 1 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash; + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; count ------- 1 (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); count ------- 1 (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); count ------- 1 (1 row) DROP TABLE b; -SELECT clean_aqo_data(); -NOTICE: Cleaning aqo_data records - clean_aqo_data ----------------- - +SELECT true FROM aqo_cleanup(); + bool +------ + t (1 row) -- lines corresponding to b_oid in theese tables deleted -SELECT count(*) FROM aqo_data WHERE 'b' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); count ------- 0 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash; + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; count ------- 0 (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); count ------- 0 (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); count ------- 0 diff --git a/expected/feature_subspace.out b/expected/feature_subspace.out new file mode 100644 index 00000000..185bede0 --- /dev/null +++ b/expected/feature_subspace.out @@ -0,0 +1,84 @@ +-- This test related to some issues on feature subspace calculation +CREATE EXTENSION aqo; +SET aqo.mode = 'learn'; +SET aqo.join_threshold = 0; +SET aqo.show_details = 'on'; +CREATE TABLE a AS (SELECT gs AS x FROM generate_series(1,10) AS gs); +CREATE TABLE b AS (SELECT gs AS x FROM generate_series(1,100) AS gs); +-- +-- Returns string-by-string explain of a query. Made for removing some strings +-- from the explain output. +-- +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; +-- +-- A LEFT JOIN B isn't equal B LEFT JOIN A. +-- +SELECT str AS result +FROM expln(' +SELECT * FROM a LEFT JOIN b USING (x);') AS str +WHERE str NOT LIKE '%Memory%'; + result +----------------------------------------------------- + Merge Left Join (actual rows=10 loops=1) + AQO not used + Merge Cond: (a.x = b.x) + -> Sort (actual rows=10 loops=1) + AQO not used + Sort Key: a.x + -> Seq Scan on a (actual rows=10 loops=1) + AQO not used + -> Sort (actual rows=11 loops=1) + AQO not used + Sort Key: b.x + -> Seq Scan on b (actual rows=100 loops=1) + AQO not used + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(16 rows) + +-- TODO: Using method of other classes neighbours we get a bad estimation. +SELECT str AS result +FROM expln(' +SELECT * FROM b LEFT JOIN a USING (x);') AS str +WHERE str NOT LIKE '%Memory%'; + result +---------------------------------------------------- + Hash Left Join (actual rows=100 loops=1) + AQO: rows=10, error=-900% + Hash Cond: (b.x = a.x) + -> Seq Scan on b (actual rows=100 loops=1) + AQO: rows=100, error=0% + -> Hash (actual rows=10 loops=1) + AQO not used + -> Seq Scan on a (actual rows=10 loops=1) + AQO: rows=10, error=0% + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(12 rows) + +-- Look into the reason: two JOINs from different classes have the same FSS. +SELECT to_char(d1.targets[1], 'FM999.00') AS target FROM aqo_data d1 +JOIN aqo_data d2 ON (d1.fs <> d2.fs AND d1.fss = d2.fss) +WHERE 'a'::regclass = ANY (d1.oids) AND 'b'::regclass = ANY (d1.oids) order by target; + target +-------- + 2.30 + 4.61 +(2 rows) + +DROP TABLE a,b CASCADE; +SELECT true FROM aqo_reset(); + bool +------ + t +(1 row) + +DROP EXTENSION aqo; diff --git a/expected/forced_stat_collection.out b/expected/forced_stat_collection.out index 5c05d499..f635fbcc 100644 --- a/expected/forced_stat_collection.out +++ b/expected/forced_stat_collection.out @@ -1,10 +1,7 @@ --- Switch off parallel workers because of unsteadiness. --- Do this in each aqo test separately, so that server regression tests pass --- with aqo's temporary configuration file loaded. -SET max_parallel_workers TO 0; \set citizens 1000 +SET aqo.join_threshold = 0; SET aqo.mode = 'disabled'; -SET aqo.force_collect_stat = 'on'; +SET aqo.force_collect_stat = 'off'; CREATE TABLE person ( id serial PRIMARY KEY, age integer, @@ -23,6 +20,7 @@ INSERT INTO person (id,age,gender,passport) FROM (SELECT *, 14+(id % 60) AS age FROM generate_series(1, :citizens) id) AS q1 ); CREATE EXTENSION aqo; +SET aqo.force_collect_stat = 'on'; SELECT count(*) FROM person WHERE age<18; count ------- @@ -36,8 +34,8 @@ SELECT count(*) FROM person WHERE age<18 AND passport IS NOT NULL; (1 row) SELECT * FROM aqo_data; - fspace_hash | fsspace_hash | nfeatures | features | targets | oids | reliability --------------+--------------+-----------+----------+---------+------+------------- + fs | fss | nfeatures | features | targets | reliability | oids +----+-----+-----------+----------+---------+-------------+------ (0 rows) CREATE OR REPLACE FUNCTION round_array (double precision[]) @@ -48,7 +46,9 @@ AS $$ FROM unnest($1) as arr(elem); $$; SELECT learn_aqo,use_aqo,auto_tuning,round_array(cardinality_error_without_aqo) ce,executions_without_aqo nex -FROM aqo_queries JOIN aqo_query_stat USING (query_hash); +FROM aqo_queries AS aq JOIN aqo_query_stat AS aqs +ON aq.queryid = aqs.queryid +ORDER BY (cardinality_error_without_aqo); learn_aqo | use_aqo | auto_tuning | ce | nex -----------+---------+-------------+---------+----- f | f | f | {0.864} | 1 @@ -61,10 +61,13 @@ SELECT query_text FROM aqo_query_texts ORDER BY (md5(query_text)); SELECT count(*) FROM person WHERE age<18; COMMON feature space (do not delete!) SELECT count(*) FROM person WHERE age<18 AND passport IS NOT NULL; - + - SELECT array_agg(round(elem::numeric, 3)) + - FROM unnest($1) as arr(elem); + - -(4 rows) +(3 rows) + +DROP TABLE person; +SELECT 1 FROM aqo_reset(); -- Full remove of ML data before the end + ?column? +---------- + 1 +(1 row) DROP EXTENSION aqo; diff --git a/expected/gucs.out b/expected/gucs.out index fe46e53c..bbfd8001 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -1,13 +1,17 @@ --- Switch off parallel workers because of unsteadiness. --- Do this in each aqo test separately, so that server regression tests pass --- with aqo's temporary configuration file loaded. -SET max_parallel_workers TO 0; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = true; CREATE TABLE t(x int); INSERT INTO t (x) (SELECT * FROM generate_series(1, 100) AS gs); ANALYZE t; +SELECT true FROM aqo_reset(); -- Remember! DROP EXTENSION doesn't remove any AQO data gathered. + bool +------ + t +(1 row) + +-- Check AQO addons to explain (the only stable data) EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT x FROM t; QUERY PLAN @@ -32,11 +36,90 @@ EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) JOINS: 0 (6 rows) +SET aqo.mode = 'disabled'; -- Check existence of the interface functions. -SELECT obj_description('public.show_cardinality_errors'::regproc::oid); +SELECT obj_description('aqo_cardinality_error'::regproc::oid); obj_description --------------------------------------------------------------------------------------------------------------- Get cardinality error of queries the last time they were executed. Order queries according to an error value. (1 row) +SELECT obj_description('aqo_execution_time'::regproc::oid); + obj_description +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Get execution time of queries. If controlled = true (AQO could advise cardinality estimations), show time of last execution attempt. Another case (AQO not used), return an average value of execution time across all known executions. +(1 row) + +SELECT obj_description('aqo_drop_class'::regproc::oid); + obj_description +-------------------------------------------------------------- + Remove info about an query class from AQO ML knowledge base. +(1 row) + +SELECT obj_description('aqo_cleanup'::regproc::oid); + obj_description +---------------------------------------------- + Remove unneeded rows from the AQO ML storage +(1 row) + +SELECT obj_description('aqo_reset'::regproc::oid); + obj_description +-------------------------------- + Reset all data gathered by AQO +(1 row) + +\df aqo_cardinality_error + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+-----------------------+-------------------------------------------------------------------------------------+---------------------+------ + public | aqo_cardinality_error | TABLE(num integer, id bigint, fshash bigint, error double precision, nexecs bigint) | controlled boolean | func +(1 row) + +\df aqo_execution_time + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+--------------------+-----------------------------------------------------------------------------------------+---------------------+------ + public | aqo_execution_time | TABLE(num integer, id bigint, fshash bigint, exec_time double precision, nexecs bigint) | controlled boolean | func +(1 row) + +\df aqo_drop_class + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+----------------+------------------+---------------------+------ + public | aqo_drop_class | integer | queryid bigint | func +(1 row) + +\df aqo_cleanup + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+-------------+------------------+-----------------------------------+------ + public | aqo_cleanup | record | OUT nfs integer, OUT nfss integer | func +(1 row) + +\df aqo_reset + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+-----------+------------------+---------------------+------ + public | aqo_reset | bigint | | func +(1 row) + +-- Check stat reset +SELECT count(*) FROM aqo_query_stat; + count +------- + 1 +(1 row) + +SELECT true FROM aqo_reset(); -- Remove one record from all tables + bool +------ + t +(1 row) + +SELECT count(*) FROM aqo_query_stat; + count +------- + 0 +(1 row) + DROP EXTENSION aqo; diff --git a/expected/look_a_like.out b/expected/look_a_like.out new file mode 100644 index 00000000..9cba2c48 --- /dev/null +++ b/expected/look_a_like.out @@ -0,0 +1,238 @@ +CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; +SET aqo.mode = 'learn'; +SET aqo.show_details = 'on'; +SET enable_material = 'off'; +DROP TABLE IF EXISTS a,b CASCADE; +NOTICE: table "a" does not exist, skipping +NOTICE: table "b" does not exist, skipping +CREATE TABLE a (x int); +INSERT INTO a (x) SELECT mod(ival,10) FROM generate_series(1,1000) As ival; +CREATE TABLE b (y int); +INSERT INTO b (y) SELECT mod(ival + 1,10) FROM generate_series(1,1000) As ival; +-- +-- Returns string-by-string explain of a query. Made for removing some strings +-- from the explain output. +-- +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; +-- no one predicted rows. we use knowledge cardinalities of the query +-- in the next queries with the same fss_hash +SELECT str AS result +FROM expln(' +SELECT x FROM A where x = 5;') AS str +WHERE str NOT LIKE 'Query Identifier%'; + result +------------------------------------------------ + Seq Scan on public.a (actual rows=100 loops=1) + AQO not used + Output: x + Filter: (a.x = 5) + Rows Removed by Filter: 900 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(8 rows) + +SELECT str AS result +FROM expln(' +SELECT x FROM A,B WHERE x = 5 AND A.x = B.y;') AS str +WHERE str NOT LIKE 'Query Identifier%' +; -- Find cardinality for SCAN A(x=5) from a neighbour class, created by the + result +-------------------------------------------------------- + Nested Loop (actual rows=10000 loops=1) + AQO not used + Output: a.x + -> Seq Scan on public.b (actual rows=100 loops=1) + AQO not used + Output: b.y + Filter: (b.y = 5) + Rows Removed by Filter: 900 + -> Seq Scan on public.a (actual rows=100 loops=100) + AQO: rows=100, error=0% + Output: a.x + Filter: (a.x = 5) + Rows Removed by Filter: 900 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(16 rows) + +-- query, executed above. +SELECT str AS result +FROM expln(' +SELECT x, sum(x) FROM A,B WHERE y = 5 AND A.x = B.y group by(x);') AS str +WHERE str NOT LIKE 'Query Identifier%' +; -- Find the JOIN cardinality from a neighbour class. + result +-------------------------------------------------------------- + GroupAggregate (actual rows=1 loops=1) + AQO not used + Output: a.x, sum(a.x) + Group Key: a.x + -> Nested Loop (actual rows=10000 loops=1) + AQO: rows=10000, error=0% + Output: a.x + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=100, error=0% + Output: a.x + Filter: (a.x = 5) + Rows Removed by Filter: 900 + -> Seq Scan on public.b (actual rows=100 loops=100) + AQO: rows=100, error=0% + Output: b.y + Filter: (b.y = 5) + Rows Removed by Filter: 900 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(20 rows) + +-- cardinality 100 in the first Seq Scan on a +SELECT str AS result +FROM expln(' +SELECT x, sum(x) FROM A WHERE x = 5 group by(x);') AS str +WHERE str NOT LIKE 'Query Identifier%'; + result +------------------------------------------------------ + GroupAggregate (actual rows=1 loops=1) + AQO not used + Output: x, sum(x) + Group Key: a.x + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=100, error=0% + Output: x + Filter: (a.x = 5) + Rows Removed by Filter: 900 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(12 rows) + +-- no one predicted rows. we use knowledge cardinalities of the query +-- in the next queries with the same fss_hash +SELECT str AS result +FROM expln(' +SELECT x FROM A where x < 10 group by(x);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------- + HashAggregate (actual rows=10 loops=1) + AQO not used + Output: x + Group Key: a.x + -> Seq Scan on public.a (actual rows=1000 loops=1) + AQO not used + Output: x + Filter: (a.x < 10) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(11 rows) + +-- cardinality 1000 in Seq Scan on a +SELECT str AS result +FROM expln(' +SELECT x,y FROM A,B WHERE x < 10 AND A.x = B.y;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------- + Merge Join (actual rows=100000 loops=1) + AQO not used + Output: a.x, b.y + Merge Cond: (a.x = b.y) + -> Sort (actual rows=1000 loops=1) + AQO not used + Output: a.x + Sort Key: a.x + -> Seq Scan on public.a (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: a.x + Filter: (a.x < 10) + -> Sort (actual rows=99901 loops=1) + AQO not used + Output: b.y + Sort Key: b.y + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO not used + Output: b.y + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(22 rows) + +-- cardinality 100 in Seq Scan on a and Seq Scan on b +SELECT str AS result +FROM expln(' +SELECT x FROM A,B where x < 10 and y > 10 group by(x);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +---------------------------------------------------------- + HashAggregate (actual rows=0 loops=1) + AQO not used + Output: a.x + Group Key: a.x + -> Nested Loop (actual rows=0 loops=1) + AQO not used + Output: a.x + -> Seq Scan on public.b (actual rows=0 loops=1) + AQO not used + Output: b.y + Filter: (b.y > 10) + Rows Removed by Filter: 1000 + -> Seq Scan on public.a (never executed) + AQO: rows=1000 + Output: a.x + Filter: (a.x < 10) + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(19 rows) + +-- +-- TODO: +-- Not executed case. What could we do better here? +-- +SELECT str AS result +FROM expln(' +SELECT x,y FROM A,B WHERE x < 10 and y > 10 AND A.x = B.y;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' +; + result +---------------------------------------------------------- + Hash Join (actual rows=0 loops=1) + AQO not used + Output: a.x, b.y + Hash Cond: (a.x = b.y) + -> Seq Scan on public.a (actual rows=1 loops=1) + AQO: rows=1000, error=100% + Output: a.x + Filter: (a.x < 10) + -> Hash (actual rows=0 loops=1) + AQO not used + Output: b.y + -> Seq Scan on public.b (actual rows=0 loops=1) + AQO: rows=1, error=100% + Output: b.y + Filter: (b.y > 10) + Rows Removed by Filter: 1000 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(19 rows) + +RESET enable_material; +DROP TABLE a,b CASCADE; +SELECT true FROM aqo_reset(); + bool +------ + t +(1 row) + +DROP EXTENSION aqo CASCADE; diff --git a/expected/plancache.out b/expected/plancache.out index 8d02ef0f..edcf30e7 100644 --- a/expected/plancache.out +++ b/expected/plancache.out @@ -1,9 +1,6 @@ -- Tests on interaction of AQO with cached plans. --- Switch off parallel workers because of unsteadiness. --- Do this in each aqo test separately, so that server regression tests pass --- with aqo's temporary configuration file loaded. -SET max_parallel_workers TO 0; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'intelligent'; SET aqo.show_details = 'on'; SET aqo.show_hash = 'off'; @@ -27,13 +24,13 @@ BEGIN execute 'EXECUTE fooplan(1)'; END LOOP; - SELECT query_hash FROM aqo_query_texts + SELECT queryid FROM aqo_query_texts WHERE query_text LIKE '%count(*) FROM test WHERE x%' INTO qhash; RETURN QUERY SELECT executions_without_aqo nnex, executions_with_aqo nex, planning_time_with_aqo pt - FROM aqo_query_stat WHERE query_hash = qhash; + FROM aqo_query_stat WHERE queryid = qhash; END $$ LANGUAGE 'plpgsql'; -- The function shows 6 executions without an AQO support (nnex) and -- 4 executions with usage of an AQO knowledge base (nex). Planning time in the diff --git a/expected/relocatable.out b/expected/relocatable.out new file mode 100644 index 00000000..5fcf06e6 --- /dev/null +++ b/expected/relocatable.out @@ -0,0 +1,122 @@ +CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; +SET aqo.mode = 'learn'; -- use this mode for unconditional learning +CREATE TABLE test AS (SELECT id, 'payload' || id FROM generate_series(1,100) id); +ANALYZE test; +-- Learn on a query +SELECT count(*) FROM test; + count +------- + 100 +(1 row) + +SELECT query_text, learn_aqo, use_aqo, auto_tuning +FROM aqo_query_texts aqt JOIN aqo_queries aq ON (aqt.queryid = aq.queryid) +ORDER BY (md5(query_text)) +; -- Check result. TODO: use aqo_status() + query_text | learn_aqo | use_aqo | auto_tuning +---------------------------------------+-----------+---------+------------- + SELECT count(*) FROM test; | t | t | f + COMMON feature space (do not delete!) | f | f | f +(2 rows) + +-- Create a schema and move AQO into it. +CREATE SCHEMA IF NOT EXISTS test; +ALTER EXTENSION aqo SET SCHEMA test; +-- Do something to be confident that AQO works +SELECT count(*) FROM test; + count +------- + 100 +(1 row) + +SELECT count(*) FROM test WHERE id < 10; + count +------- + 9 +(1 row) + +SELECT query_text, learn_aqo, use_aqo, auto_tuning +FROM test.aqo_query_texts aqt JOIN test.aqo_queries aq ON (aqt.queryid = aq.queryid) +ORDER BY (md5(query_text)) +; -- Find out both queries executed above + query_text | learn_aqo | use_aqo | auto_tuning +------------------------------------------+-----------+---------+------------- + SELECT count(*) FROM test; | t | t | f + COMMON feature space (do not delete!) | f | f | f + SELECT count(*) FROM test WHERE id < 10; | t | t | f +(3 rows) + +-- Add schema which contains AQO to the end of search_path +SELECT set_config('search_path', current_setting('search_path') || ', test', false); + set_config +----------------------- + "$user", public, test +(1 row) + +SELECT count(*) FROM test; + count +------- + 100 +(1 row) + +SELECT count(*) FROM test WHERE id < 10; + count +------- + 9 +(1 row) + +SELECT query_text, learn_aqo, use_aqo, auto_tuning +FROM test.aqo_query_texts aqt JOIN test.aqo_queries aq ON (aqt.queryid = aq.queryid) +ORDER BY (md5(query_text)) +; -- Check result. + query_text | learn_aqo | use_aqo | auto_tuning +------------------------------------------+-----------+---------+------------- + SELECT count(*) FROM test; | t | t | f + COMMON feature space (do not delete!) | f | f | f + SELECT count(*) FROM test WHERE id < 10; | t | t | f +(3 rows) + +/* + * Below, we should check each UI function + */ +SELECT aqo_disable_query(id) FROM ( + SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; + aqo_disable_query +------------------- + + +(2 rows) + +SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries +ORDER BY (learn_aqo, use_aqo, auto_tuning); + learn_aqo | use_aqo | auto_tuning +-----------+---------+------------- + f | f | f + t | t | f + t | t | f +(3 rows) + +SELECT aqo_enable_query(id) FROM ( + SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; + aqo_enable_query +------------------ + + +(2 rows) + +SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries +ORDER BY (learn_aqo, use_aqo, auto_tuning); + learn_aqo | use_aqo | auto_tuning +-----------+---------+------------- + f | f | f + t | t | f + t | t | f +(3 rows) + +RESET search_path; +DROP TABLE test CASCADE; +DROP SCHEMA IF EXISTS test CASCADE; +NOTICE: drop cascades to extension aqo +DROP EXTENSION IF EXISTS aqo CASCADE; +NOTICE: extension "aqo" does not exist, skipping diff --git a/expected/schema.out b/expected/schema.out index cc586233..0b5a5c07 100644 --- a/expected/schema.out +++ b/expected/schema.out @@ -1,7 +1,3 @@ --- Switch off parallel workers because of unsteadiness. --- Do this in each aqo test separately, so that server regression tests pass --- with aqo's temporary configuration file loaded. -SET max_parallel_workers TO 0; DROP EXTENSION IF EXISTS aqo CASCADE; NOTICE: extension "aqo" does not exist, skipping DROP SCHEMA IF EXISTS test CASCADE; @@ -16,6 +12,7 @@ ERROR: no schema has been selected to create in CREATE SCHEMA IF NOT EXISTS test1; SET search_path TO test1, public; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'intelligent'; CREATE TABLE test (id SERIAL, data TEXT); INSERT INTO test (data) VALUES ('string'); @@ -28,21 +25,21 @@ SELECT * FROM test; -- Check AQO service relations state after some manipulations -- Exclude fields with hash values from the queries. Hash is depend on -- nodefuncs code which is highly PostgreSQL version specific. -SELECT query_text FROM public.aqo_query_texts; - query_text --------------------------------------------- +SELECT query_text FROM aqo_query_texts +ORDER BY (md5(query_text)) DESC; + query_text +--------------------------------------- COMMON feature space (do not delete!) - INSERT INTO test (data) VALUES ('string'); SELECT * FROM test; -(3 rows) +(2 rows) -SELECT learn_aqo, use_aqo, auto_tuning FROM public.aqo_queries; +SELECT learn_aqo, use_aqo, auto_tuning FROM aqo_queries +ORDER BY (learn_aqo, use_aqo, auto_tuning); learn_aqo | use_aqo | auto_tuning -----------+---------+------------- f | f | f t | f | t - t | f | t -(3 rows) +(2 rows) DROP SCHEMA IF EXISTS test1 CASCADE; NOTICE: drop cascades to 2 other objects diff --git a/expected/statement_timeout.out b/expected/statement_timeout.out index 9d91de22..302b9b43 100644 --- a/expected/statement_timeout.out +++ b/expected/statement_timeout.out @@ -21,6 +21,7 @@ CREATE TABLE t AS SELECT * FROM generate_series(1,100) AS x; ANALYZE t; DELETE FROM t WHERE x > 5; -- Force optimizer to make overestimated prediction. CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'off'; SET aqo.learn_statement_timeout = 'on'; @@ -67,7 +68,12 @@ SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); DELETE FROM t WHERE x > 2; ANALYZE t; INSERT INTO t (x) (SELECT * FROM generate_series(3,5) AS x); -TRUNCATE aqo_data; +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 +(1 row) + SET statement_timeout = 800; SELECT *, pg_sleep(1) FROM t; -- Not learned NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. @@ -106,4 +112,11 @@ SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); (1 row) DROP TABLE t; +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 +(1 row) + DROP EXTENSION aqo; +DROP FUNCTION check_estimated_rows; diff --git a/expected/temp_tables.out b/expected/temp_tables.out new file mode 100644 index 00000000..d0656056 --- /dev/null +++ b/expected/temp_tables.out @@ -0,0 +1,195 @@ +CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; +SET aqo.mode = 'learn'; +CREATE TEMP TABLE tt(); +CREATE TABLE pt(); +-- Ignore queries with the only temp tables +SELECT count(*) FROM tt; + count +------- + 0 +(1 row) + +SELECT count(*) FROM tt AS t1, tt AS t2; + count +------- + 0 +(1 row) + +SELECT query_text FROM aqo_query_texts; -- Default row should be returned + query_text +--------------------------------------- + COMMON feature space (do not delete!) +(1 row) + +-- Should be stored in the ML base +SELECT count(*) FROM pt; + count +------- + 0 +(1 row) + +SELECT count(*) FROM pt, tt; + count +------- + 0 +(1 row) + +SELECT count(*) FROM pt AS pt1, tt AS tt1, tt AS tt2, pt AS pt2; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_data; -- Don't bother about false negatives because of trivial query plans + count +------- + 10 +(1 row) + +DROP TABLE tt; +SELECT * FROM aqo_cleanup(); + nfs | nfss +-----+------ + 0 | 0 +(1 row) + +SELECT count(*) FROM aqo_data; -- Should return the same as previous call above + count +------- + 10 +(1 row) + +DROP TABLE pt; +SELECT * FROM aqo_cleanup(); + nfs | nfss +-----+------ + 3 | 10 +(1 row) + +SELECT count(*) FROM aqo_data; -- Should be 0 + count +------- + 0 +(1 row) + +SELECT query_text FROM aqo_queries aq LEFT JOIN aqo_query_texts aqt +ON aq.queryid = aqt.queryid +ORDER BY (md5(query_text)); -- The only the common class is returned + query_text +--------------------------------------- + COMMON feature space (do not delete!) +(1 row) + +-- Test learning on temporary table +CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; +CREATE TEMP TABLE tt AS SELECT -x AS x, (x % 7) AS y FROM generate_series(1,100) AS x; +CREATE TEMP TABLE ttd AS -- the same structure as tt + SELECT -(x*3) AS x, (x % 9) AS y FROM generate_series(1,100) AS x; +ANALYZE pt,tt,ttd; +create function check_estimated_rows(text) returns table (estimated int, actual int) +language plpgsql as +$$ +declare + ln text; + tmp text[]; + first_row bool := true; +begin + for ln in + execute format('explain analyze %s', $1) + loop + if first_row then + first_row := false; + tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)'); + return query select tmp[1]::int, tmp[2]::int; + end if; + end loop; +end; +$$; +-- Check: AQO learns on queries with temp tables +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Estimation failed. Learn. + estimated | actual +-----------+-------- + 100 | 0 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Should use AQO estimation + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,ttd WHERE pt.x = ttd.x GROUP BY (pt.x); +'); -- Should use AQO estimation with another temp table of the same structure + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +SET aqo.mode = 'forced'; -- Now we use all fss records for each query +DROP TABLE pt; +SELECT * FROM aqo_cleanup(); + nfs | nfss +-----+------ + 2 | 5 +(1 row) + +CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; +CREATE TEMP TABLE ttd1 AS + SELECT -(x*3) AS x, (x % 9) AS y1 FROM generate_series(1,100) AS x; +ANALYZE; +-- Check: use AQO knowledge with different temp table of the same structure +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Estimation failed. Learn. + estimated | actual +-----------+-------- + 100 | 0 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Should use AQO estimation + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,ttd WHERE pt.x = ttd.x GROUP BY (pt.x); +'); -- Should use AQO estimation with another temp table of the same structure + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT pt1.x, avg(pt1.y) FROM pt AS pt1,ttd WHERE pt1.x = ttd.x GROUP BY (pt1.x); +'); -- Alias doesn't influence feature space + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,ttd1 WHERE pt.x = ttd1.x GROUP BY (pt.x); +'); -- Don't use AQO for temp table because of different attname + estimated | actual +-----------+-------- + 100 | 0 +(1 row) + +DROP TABLE pt CASCADE; +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 +(1 row) + +DROP EXTENSION aqo; +DROP FUNCTION check_estimated_rows; diff --git a/expected/top_queries.out b/expected/top_queries.out index 77a7e280..ba72d7c8 100644 --- a/expected/top_queries.out +++ b/expected/top_queries.out @@ -1,18 +1,16 @@ --- Switch off parallel workers because of unsteadiness. --- Do this in each aqo test separately, so that server regression tests pass --- with aqo's temporary configuration file loaded. -SET max_parallel_workers TO 0; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'disabled'; SET aqo.force_collect_stat = 'on'; -- --- Dummy test. CREATE TABLE shouldn't find in the ML storage. But a simple --- select must be in. Also here we test on gathering a stat on temp and plain +-- Dummy test. CREATE TABLE shouldn't be found in the ML storage. But a simple +-- select must recorded. Also here we test on gathering a stat on temp and plain -- relations. +-- XXX: Right now we ignore queries if no one permanent table is touched. -- CREATE TEMP TABLE ttt AS SELECT count(*) AS cnt FROM generate_series(1,10); CREATE TABLE ttp AS SELECT count(*) AS cnt FROM generate_series(1,10); -SELECT count(*) AS cnt FROM ttt WHERE cnt % 100 = 0; +SELECT count(*) AS cnt FROM ttt WHERE cnt % 100 = 0; -- Ignore it cnt ----- 0 @@ -24,16 +22,30 @@ SELECT count(*) AS cnt FROM ttp WHERE cnt % 100 = 0; 0 (1 row) -SELECT num FROM top_time_queries(3); -NOTICE: Top 3 execution time queries +SELECT num FROM aqo_execution_time(true); -- Just for checking, return zero. + num +----- +(0 rows) + +SELECT num FROM aqo_execution_time(false); num ----- 1 - 2 -(2 rows) +(1 row) + +-- Without the AQO control queries with and without temp tables are logged. +SELECT query_text,nexecs +FROM aqo_execution_time(false) ce, aqo_query_texts aqt +WHERE ce.id = aqt.queryid +ORDER BY (md5(query_text)); + query_text | nexecs +------------------------------------------------------+-------- + SELECT count(*) AS cnt FROM ttp WHERE cnt % 100 = 0; | 1 +(1 row) -- --- num of query uses table t2 should be bigger than num of query uses table t1 and be the first +-- num of query which uses the table t2 should be bigger than num of query which +-- uses the table t1 and must be the first -- CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y FROM generate_series(1,1000) AS gs; @@ -51,23 +63,42 @@ SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y 31 (1 row) -SELECT num, to_char(error, '9.99EEEE') FROM show_cardinality_errors(false) AS te +SELECT to_char(error, '9.99EEEE') FROM aqo_cardinality_error(false) AS te WHERE te.fshash = ( - SELECT fspace_hash FROM aqo_queries - WHERE aqo_queries.query_hash = ( - SELECT aqo_query_texts.query_hash FROM aqo_query_texts + SELECT fs FROM aqo_queries + WHERE aqo_queries.queryid = ( + SELECT aqo_query_texts.queryid FROM aqo_query_texts WHERE query_text = 'SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1;' ) ); - num | to_char ------+----------- - 1 | 1.94e+00 + to_char +----------- + 1.94e+00 (1 row) -- Should return zero -SELECT count(*) FROM show_cardinality_errors(true); +SELECT count(*) FROM aqo_cardinality_error(true); count ------- 0 (1 row) +-- Fix list of logged queries +SELECT query_text,nexecs +FROM aqo_cardinality_error(false) ce, aqo_query_texts aqt +WHERE ce.id = aqt.queryid +ORDER BY (md5(query_text)); + query_text | nexecs +------------------------------------------------------------------------------------------------+-------- + SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; | 1 + SELECT count(*) AS cnt FROM ttp WHERE cnt % 100 = 0; | 1 + SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; | 1 +(3 rows) + +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 +(1 row) + +DROP EXTENSION aqo; diff --git a/expected/unsupported.out b/expected/unsupported.out index c26c1d72..a28db16c 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -1,8 +1,5 @@ --- Switch off parallel workers because of unsteadiness. --- Do this in each aqo test separately, so that server regression tests pass --- with aqo's temporary configuration file loaded. -SET max_parallel_workers TO 0; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; DROP TABLE IF EXISTS t; @@ -384,7 +381,7 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) -> Aggregate (actual rows=1 loops=1000) AQO not used -> Seq Scan on t t0 (actual rows=50 loops=1000) - AQO not used + AQO: rows=50, error=0% Filter: (x = t.x) Rows Removed by Filter: 950 SubPlan 2 @@ -463,7 +460,7 @@ SELECT * FROM JOINS: 0 (13 rows) --- AQO need to predict total fetched tuples in a table. +-- AQO needs to predict total fetched tuples in a table. -- -- At a non-leaf node we have prediction about input tuples - is a number of -- predicted output rows in underlying node. But for Scan nodes we don't have @@ -556,60 +553,76 @@ EXPLAIN (COSTS OFF) -- XXX: Do we stuck into an unstable behavior of an error value? -- Live with this variant of the test for some time. -SELECT - num, to_char(error, '9.99EEEE')::text AS error, query_text -FROM public.show_cardinality_errors(true) cef, aqo_query_texts aqt -WHERE aqt.query_hash = cef.id -ORDER BY (error, md5(query_text)) DESC; - num | error | query_text ------+-----------+------------------------------------------------------------------------------------------------ - 1 | 1.15e+02 | SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; - 3 | 3.00e+01 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; - 4 | 3.00e+01 | SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; - 2 | 3.00e+01 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,x*y)) AS q1; - 5 | 1.33e+00 | SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; - 11 | 0.00e+00 | SELECT * FROM + - | | (SELECT * FROM t WHERE x < 0) AS t0 + - | | JOIN + - | | (SELECT * FROM t WHERE x > 20) AS t1 + - | | USING(x); - 10 | 0.00e+00 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); - 12 | 0.00e+00 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + - | | SELECT count(*) FROM t WHERE + - | | x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR + - | | x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); - 8 | 0.00e+00 | SELECT count(*) FROM ( + - | | SELECT count(*) AS x FROM ( + - | | SELECT count(*) FROM t1 GROUP BY (x,y) + - | | ) AS q1 + - | | ) AS q2 + - | | WHERE q2.x > 1; - 9 | 0.00e+00 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t WHERE x = 1); - 6 | 0.00e+00 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + - | | SELECT count(*) FROM t WHERE (SELECT avg(x) FROM t t0 WHERE t0.x = t.x) = + - | | (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); - 7 | 0.00e+00 | SELECT count(*) FROM + - | | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + - | | JOIN + - | | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2 + - | | ON q1.x = q2.x+1; +SELECT to_char(error, '9.99EEEE')::text AS error, query_text +FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt +WHERE aqt.queryid = cef.id +ORDER BY (md5(query_text),error) DESC; + error | query_text +-----------+------------------------------------------------------------------------------------------------ + 7.68e-01 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; + 7.04e-02 | SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; + 1.42e+00 | SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + 0.00e+00 | SELECT * FROM + + | (SELECT * FROM t WHERE x < 0) AS t0 + + | JOIN + + | (SELECT * FROM t WHERE x > 20) AS t1 + + | USING(x); + 0.00e+00 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + 0.00e+00 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT count(*) FROM t WHERE + + | x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR + + | x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); + 4.54e-01 | SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; + 0.00e+00 | SELECT count(*) FROM ( + + | SELECT count(*) AS x FROM ( + + | SELECT count(*) FROM t1 GROUP BY (x,y) + + | ) AS q1 + + | ) AS q2 + + | WHERE q2.x > 1; + 7.68e-01 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,x*y)) AS q1; + 0.00e+00 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t WHERE x = 1); + 0.00e+00 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT count(*) FROM t WHERE (SELECT avg(x) FROM t t0 WHERE t0.x = t.x) = + + | (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + 0.00e+00 | SELECT count(*) FROM + + | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + + | JOIN + + | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2 + + | ON q1.x = q2.x+1; (12 rows) -DROP TABLE t,t1 CASCADE; -SELECT public.clean_aqo_data(); -NOTICE: Cleaning aqo_data records - clean_aqo_data ----------------- - +DROP TABLE t,t1 CASCADE; -- delete all tables used in the test +SELECT count(*) FROM aqo_data; -- Just to detect some changes in the logic. May some false positives really bother us here? + count +------- + 42 +(1 row) + +SELECT * FROM aqo_cleanup(); + nfs | nfss +-----+------ + 12 | 42 +(1 row) + +SELECT count(*) FROM aqo_data; -- No one row should be returned + count +------- + 0 (1 row) -- Look for any remaining queries in the ML storage. -SELECT num, to_char(error, '9.99EEEE')::text AS error, query_text -FROM public.show_cardinality_errors(true) cef, aqo_query_texts aqt -WHERE aqt.query_hash = cef.id -ORDER BY (error, md5(query_text)) DESC; - num | error | query_text ------+-------+------------ +SELECT to_char(error, '9.99EEEE')::text AS error, query_text +FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt +WHERE aqt.queryid = cef.id +ORDER BY (md5(query_text),error) DESC; + error | query_text +-------+------------ (0 rows) +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 +(1 row) + DROP EXTENSION aqo; diff --git a/hash.c b/hash.c index 1f8f8112..d4866448 100644 --- a/hash.c +++ b/hash.c @@ -18,9 +18,11 @@ * aqo/hash.c * */ - #include "postgres.h" +#include "access/htup.h" +#include "common/fe_memutils.h" + #include "math.h" #include "aqo.h" @@ -31,7 +33,7 @@ static int get_node_hash(Node *node); static int get_unsorted_unsafe_int_array_hash(int *arr, int len); static int get_unordered_int_list_hash(List *lst); -static int64 get_relations_hash(List *relnames); +static int64 get_relations_hash(List *relsigns); static int get_fss_hash(int clauses_hash, int eclasses_hash, int relidslist_hash); @@ -69,7 +71,6 @@ get_query_hash(Query *parse, const char *query_text) /* XXX: remove_locations and remove_consts are heavy routines. */ str_repr = remove_locations(remove_consts(nodeToString(parse))); hash = DatumGetUInt64(hash_any_extended((void *) str_repr, strlen(str_repr),0)); - pfree(str_repr); return hash; } @@ -96,6 +97,29 @@ list_member_uint64(const List *list, uint64 datum) return false; } +/* + * Deep copy of uint64 list. + * Each element here is dynamically allocated in some memory context. + * If we copy the list in another memctx we should allocate memory for new + * elements too. + */ +List * +list_copy_uint64(List *list) +{ + ListCell *lc; + List *nlist = NIL; + + foreach(lc, list) + { + uint64 *val = palloc(sizeof(uint64)); + + *val = *(uint64 *) lfirst(lc); + nlist = lappend(nlist, (void *) val); + } + + return nlist; +} + List * lappend_uint64(List *list, uint64 datum) { @@ -106,6 +130,11 @@ lappend_uint64(List *list, uint64 datum) return list; } +/* + * Remove element from a list and free the memory which was allocated to it. + * Looks unconventional, but we unconventionally allocate memory on append, so + * it maybe ok. + */ List * ldelete_uint64(List *list, uint64 datum) { @@ -145,11 +174,12 @@ get_grouped_exprs_hash(int child_fss, List *group_exprs) final_hashes[0] = child_fss; final_hashes[1] = get_int_array_hash(hashes, i); + return get_int_array_hash(final_hashes, 2); } /* - * For given object (clauselist, selectivities, relnames) creates feature + * For given object (clauselist, selectivities, reloids) creates feature * subspace: * sets nfeatures * creates and computes fss_hash @@ -158,7 +188,7 @@ get_grouped_exprs_hash(int child_fss, List *group_exprs) * Special case for nfeatures == NULL: don't calculate features. */ int -get_fss_for_object(List *relnames, List *clauselist, +get_fss_for_object(List *relsigns, List *clauselist, List *selectivities, int *nfeatures, double **features) { int n; @@ -172,7 +202,7 @@ get_fss_for_object(List *relnames, List *clauselist, int *eclass_hash; int clauses_hash; int eclasses_hash; - int relnames_hash; + int relations_hash; List **args; ListCell *lc; int i, @@ -182,6 +212,7 @@ get_fss_for_object(List *relnames, List *clauselist, int sh = 0, old_sh; int fss_hash; + MemoryContext old_ctx_m; n = list_length(clauselist); @@ -190,14 +221,15 @@ get_fss_for_object(List *relnames, List *clauselist, (nfeatures == NULL && features == NULL)); get_eclasses(clauselist, &nargs, &args_hash, &eclass_hash); + if (nfeatures != NULL) + *features = palloc0(sizeof(**features) * n); + + old_ctx_m = MemoryContextSwitchTo(AQOUtilityMemCtx); clause_hashes = palloc(sizeof(*clause_hashes) * n); clause_has_consts = palloc(sizeof(*clause_has_consts) * n); sorted_clauses = palloc(sizeof(*sorted_clauses) * n); - if (nfeatures != NULL) - *features = palloc0(sizeof(**features) * n); - i = 0; foreach(lc, clauselist) { @@ -228,6 +260,7 @@ get_fss_for_object(List *relnames, List *clauselist, if (nfeatures != NULL) { (*features)[inverse_idx[i]] = log(*s); + Assert(!isnan(log(*s))); if ((*features)[inverse_idx[i]] < log_selectivity_lower_bound) (*features)[inverse_idx[i]] = log_selectivity_lower_bound; } @@ -260,18 +293,14 @@ get_fss_for_object(List *relnames, List *clauselist, /* * Generate feature subspace hash. */ + clauses_hash = get_int_array_hash(sorted_clauses, n - sh); eclasses_hash = get_int_array_hash(eclass_hash, nargs); - relnames_hash = (int) get_relations_hash(relnames); - fss_hash = get_fss_hash(clauses_hash, eclasses_hash, relnames_hash); + relations_hash = (int) get_relations_hash(relsigns); + fss_hash = get_fss_hash(clauses_hash, eclasses_hash, relations_hash); - pfree(clause_hashes); - pfree(sorted_clauses); - pfree(idx); - pfree(inverse_idx); - pfree(clause_has_consts); - pfree(args_hash); - pfree(eclass_hash); + MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOUtilityMemCtx); if (nfeatures != NULL) { @@ -439,32 +468,27 @@ get_fss_hash(int clauses_hash, int eclasses_hash, int relidslist_hash) * Each element of a list must have a String type, */ static int64 -get_relations_hash(List *relnames) +get_relations_hash(List *relsigns) { - int64 *hashes = palloc(list_length(relnames) * sizeof(int64)); + int nhashes = 0; + int64 *hashes = palloc(list_length(relsigns) * sizeof(uint64)); ListCell *lc; - int64 hash = 0; - int i = 0; + int64 result; - /* generate array of hashes. */ - foreach(lc, relnames) + foreach(lc, relsigns) { - Value *relname = (Value *) lfirst(lc); - - hashes[i++] = DatumGetInt64(hash_any_extended( - (unsigned char *) strVal(relname), - strlen(strVal(relname)), 0)); + hashes[nhashes++] = *(int64 *) lfirst(lc); } /* Sort the array to make query insensitive to input order of relations. */ - qsort(hashes, i, sizeof(int64), int64_compare); + qsort(hashes, nhashes, sizeof(int64), int64_compare); /* Make a final hash value */ - hash = DatumGetInt64(hash_any_extended((unsigned char *) hashes, - i * sizeof(int64), 0)); - pfree(hashes); - return hash; + result = DatumGetInt64(hash_any_extended((const unsigned char *) hashes, + nhashes * sizeof(int64), 0)); + + return result; } /* @@ -658,13 +682,19 @@ get_eclasses(List *clauselist, int *nargs, int **args_hash, int **eclass_hash) int i, v; int *e_hashes; + MemoryContext old_ctx_m; get_clauselist_args(clauselist, nargs, args_hash); + *eclass_hash = palloc((*nargs) * sizeof(**eclass_hash)); - p = perform_eclasses_join(clauselist, *nargs, *args_hash); + old_ctx_m = MemoryContextSwitchTo(AQOUtilityMemCtx); + p = perform_eclasses_join(clauselist, *nargs, *args_hash); lsts = palloc((*nargs) * sizeof(*lsts)); e_hashes = palloc((*nargs) * sizeof(*e_hashes)); + + MemoryContextSwitchTo(old_ctx_m); + for (i = 0; i < *nargs; ++i) lsts[i] = NIL; @@ -676,15 +706,10 @@ get_eclasses(List *clauselist, int *nargs, int **args_hash, int **eclass_hash) for (i = 0; i < *nargs; ++i) e_hashes[i] = get_unordered_int_list_hash(lsts[i]); - *eclass_hash = palloc((*nargs) * sizeof(**eclass_hash)); for (i = 0; i < *nargs; ++i) (*eclass_hash)[i] = e_hashes[disjoint_set_get_parent(p, i)]; - for (i = 0; i < *nargs; ++i) - list_free(lsts[i]); - pfree(lsts); - pfree(p); - pfree(e_hashes); + MemoryContextReset(AQOUtilityMemCtx); } /* diff --git a/hash.h b/hash.h index b33b1990..01c90bed 100644 --- a/hash.h +++ b/hash.h @@ -5,9 +5,10 @@ extern uint64 get_query_hash(Query *parse, const char *query_text); extern bool list_member_uint64(const List *list, uint64 datum); +extern List *list_copy_uint64(List *list); extern List *lappend_uint64(List *list, uint64 datum); extern List *ldelete_uint64(List *list, uint64 datum); -extern int get_fss_for_object(List *relnames, List *clauselist, +extern int get_fss_for_object(List *relsigns, List *clauselist, List *selectivities, int *nfeatures, double **features); extern int get_int_array_hash(int *arr, int len); diff --git a/learn_cache.c b/learn_cache.c index 316968b0..74b72249 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -18,6 +18,7 @@ #include "aqo.h" #include "aqo_shared.h" #include "learn_cache.h" +#include "storage.h" typedef struct @@ -45,25 +46,20 @@ static uint32 init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **relids); /* Calculate, how many data we need to store an ML record. */ static uint32 -calculate_size(int cols, List *relnames) +calculate_size(int cols, List *reloids) { uint32 size = sizeof(dsm_block_hdr); /* header's size */ - ListCell *lc; size += sizeof(double) * cols * aqo_K; /* matrix */ size += 2 * sizeof(double) * aqo_K; /* targets, rfactors */ /* Calculate memory size needed to store relation names */ - foreach(lc, relnames) - { - size += strlen(strVal(lfirst(lc))) + 1; - } - + size += list_length(reloids) * sizeof(Oid); return size; } bool -lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames) +lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids) { htab_key key = {fs, fss}; htab_entry *entry; @@ -76,7 +72,7 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames) Assert(fss_htab && aqo_learn_statement_timeout); - size = calculate_size(data->cols, relnames); + size = calculate_size(data->cols, reloids); LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); entry = (htab_entry *) hash_search(fss_htab, &key, HASH_ENTER, &found); @@ -87,7 +83,7 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames) Assert(hdr->magic == AQO_SHARED_MAGIC); Assert(hdr->key.fs == fs && hdr->key.fss == fss); - if (data->cols != hdr->cols || list_length(relnames) != hdr->nrelids) + if (data->cols != hdr->cols || list_length(reloids) != hdr->nrelids) { /* * Collision found: the same {fs,fss}, but something different. @@ -109,18 +105,26 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames) hdr->key.fs = fs; hdr->key.fss = fss; hdr->cols = data->cols; - hdr->nrelids = list_length(relnames); + hdr->nrelids = list_length(reloids); } hdr->rows = data->rows; ptr = (char *) hdr + sizeof(dsm_block_hdr); /* start point of variable data */ /* copy the matrix into DSM storage */ - for (i = 0; i < aqo_K; ++i) + + if (hdr->cols > 0) { - if (i < hdr->rows) + for (i = 0; i < aqo_K; ++i) + { + if (i >= hdr->rows) + break; + + if (!ptr || !data->matrix[i]) + elog(PANIC, "Something disruptive have happened! %d, %d (%d %d)", i, hdr->rows, found, hdr->cols); memcpy(ptr, data->matrix[i], sizeof(double) * hdr->cols); - ptr += sizeof(double) * data->cols; + ptr += sizeof(double) * data->cols; + } } /* copy targets into DSM storage */ @@ -131,14 +135,13 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames) memcpy(ptr, data->rfactors, sizeof(double) * hdr->rows); ptr += sizeof(double) * aqo_K; - /* store strings of relation names. Each string ends with 0-byte */ - foreach(lc, relnames) + /* store list of relations */ + foreach(lc, reloids) { - char *relname = strVal(lfirst(lc)); - int len = strlen(relname) + 1; + Oid reloid = lfirst_oid(lc); - memcpy(ptr, relname, len); - ptr += len; + memcpy(ptr, &reloid, sizeof(Oid)); + ptr += sizeof(Oid); } /* Check the invariant */ @@ -172,7 +175,7 @@ lc_has_fss(uint64 fs, int fss) * Load ML data from a memory cache, not from a table. */ bool -lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **relnames) +lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids) { htab_key key = {fs, fss}; htab_entry *entry; @@ -182,7 +185,7 @@ lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **relnames) Assert(fss_htab && aqo_learn_statement_timeout); if (aqo_show_details) - elog(NOTICE, "[AQO] Load ML data for fs %lu, fss %d from the cache", + elog(NOTICE, "[AQO] Load ML data for fs "UINT64_FORMAT", fss %d from the cache", fs, fss); LWLockAcquire(&aqo_state->lock, LW_SHARED); @@ -204,13 +207,13 @@ lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **relnames) return false; } - init_with_dsm(data, hdr, relnames); + init_with_dsm(data, hdr, reloids); LWLockRelease(&aqo_state->lock); return true; } static uint32 -init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **relnames) +init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **reloids) { int i; char *ptr = (char *) hdr + sizeof(dsm_block_hdr); @@ -218,6 +221,7 @@ init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **relnames) Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE) || LWLockHeldByMeInMode(&aqo_state->lock, LW_SHARED)); Assert(hdr->magic == AQO_SHARED_MAGIC); + Assert(hdr && ptr); data->rows = hdr->rows; data->cols = hdr->cols; @@ -240,17 +244,15 @@ init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **relnames) memcpy(data->rfactors, ptr, sizeof(double) * hdr->rows); ptr += sizeof(double) * aqo_K; - if (relnames) + if (reloids) { - *relnames = NIL; + *reloids = NIL; for (i = 0; i < hdr->nrelids; i++) { - int len = strlen(ptr) + 1; - - *relnames = lappend(*relnames, makeString(pstrdup(ptr))); - ptr += len; + *reloids = lappend_oid(*reloids, *(Oid *)(ptr)); + ptr += sizeof(Oid); } - return calculate_size(hdr->cols, *relnames); + return calculate_size(hdr->cols, *reloids); } /* It is just read operation. No any interest in size calculation. */ @@ -271,18 +273,18 @@ lc_flush_data(void) ptr = get_dsm_all(&size); /* Iterate through records and store them into the aqo_data table */ - while(size > 0) + while (size > 0) { dsm_block_hdr *hdr = (dsm_block_hdr *) ptr; OkNNrdata data; - List *relnames = NIL; + List *reloids = NIL; uint32 delta = 0; - delta = init_with_dsm(&data, hdr, &relnames); + delta = init_with_dsm(&data, hdr, &reloids); Assert(delta > 0); ptr += delta; size -= delta; - update_fss(hdr->key.fs, hdr->key.fss, &data, relnames); + aqo_data_store(hdr->key.fs, hdr->key.fss, &data, reloids); if (!hash_search(fss_htab, (void *) &hdr->key, HASH_REMOVE, NULL)) elog(ERROR, "[AQO] Flush: local ML cache is corrupted."); diff --git a/learn_cache.h b/learn_cache.h index eccca22a..df61700e 100644 --- a/learn_cache.h +++ b/learn_cache.h @@ -7,9 +7,9 @@ extern bool aqo_learn_statement_timeout; -extern bool lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames); +extern bool lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids); extern bool lc_has_fss(uint64 fs, int fss); -extern bool lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **relnames); +extern bool lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids); extern void lc_remove_fss(uint64 fs, int fss); extern void lc_flush_data(void); extern void lc_assign_hook(bool newval, void *extra); diff --git a/machine_learning.c b/machine_learning.c index 52c1ab40..7138db38 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -41,6 +41,24 @@ static double fs_similarity(double dist); static double compute_weights(double *distances, int nrows, double *w, int *idx); +OkNNrdata* +OkNNr_allocate(int ncols) +{ + OkNNrdata *data = palloc(sizeof(OkNNrdata)); + int i; + + if (ncols > 0) + for (i = 0; i < aqo_K; i++) + data->matrix[i] = palloc0(ncols * sizeof(double)); + else + for (i = 0; i < aqo_K; i++) + data->matrix[i] = NULL; + + data->cols = ncols; + data->rows = -1; + return data; +} + /* * Computes L2-distance between two given vectors. */ @@ -51,7 +69,10 @@ fs_distance(double *a, double *b, int len) int i; for (i = 0; i < len; ++i) + { + Assert(!isnan(a[i])); res += (a[i] - b[i]) * (a[i] - b[i]); + } if (len != 0) res = sqrt(res / len); return res; @@ -125,6 +146,8 @@ OkNNr_predict(OkNNrdata *data, double *features) double w_sum; double result = 0.; + Assert(data != NULL); + for (i = 0; i < data->rows; ++i) distances[i] = fs_distance(data->matrix[i], features, data->cols); diff --git a/machine_learning.h b/machine_learning.h index a09b3102..b114cade 100644 --- a/machine_learning.h +++ b/machine_learning.h @@ -21,6 +21,9 @@ typedef struct OkNNrdata double rfactors[aqo_K]; } OkNNrdata; +extern OkNNrdata* OkNNr_allocate(int ncols); +extern void OkNNr_free(OkNNrdata *data); + /* Machine learning techniques */ extern double OkNNr_predict(OkNNrdata *data, double *features); extern int OkNNr_learn(OkNNrdata *data, diff --git a/path_utils.c b/path_utils.c index d6463bfb..7f30a7e2 100644 --- a/path_utils.c +++ b/path_utils.c @@ -11,16 +11,22 @@ * aqo/path_utils.c * */ - #include "postgres.h" +#include "access/relation.h" #include "nodes/readfuncs.h" #include "optimizer/optimizer.h" #include "path_utils.h" +#include "utils/syscache.h" +#include "utils/lsyscache.h" #include "aqo.h" #include "hash.h" +#ifdef PGPRO_STD +# define expression_tree_mutator(node, mutator, context) \ + expression_tree_mutator(node, mutator, context, 0) +#endif /* * Hook on creation of a plan node. We need to store AQO-specific data to @@ -35,7 +41,7 @@ static AQOPlanNode DefaultAQOPlanNode = .node.type = T_ExtensibleNode, .node.extnodename = AQO_PLAN_NODE, .had_path = false, - .relids = NIL, + .rels = NULL, .clauses = NIL, .selectivities = NIL, .grouping_exprs = NIL, @@ -51,8 +57,11 @@ create_aqo_plan_node() { AQOPlanNode *node = (AQOPlanNode *) newNode(sizeof(AQOPlanNode), T_ExtensibleNode); - + Assert(node != NULL); memcpy(node, &DefaultAQOPlanNode, sizeof(AQOPlanNode)); + node->rels = palloc(sizeof(RelSortOut)); + node->rels->hrels = NIL; + node->rels->signatures = NIL; return node; } @@ -124,33 +133,97 @@ get_selectivities(PlannerInfo *root, } /* - * Transforms given relids from path optimization stage format to list of - * an absolute (independent on query optimization context) relnames. + * Based on the hashTupleDesc() routine */ -List * -get_relnames(PlannerInfo *root, Relids relids) +static uint64 +hashTempTupleDesc(TupleDesc desc) { - int i; - RangeTblEntry *rte; - List *l = NIL; + uint64 s; + int i; - if (relids == NULL) - return NIL; + s = hash_combine(0, hash_uint32(desc->natts)); - /* - * Check: don't take into account relations without underlying plane - * source table. - */ - Assert(!bms_is_member(0, relids)); + for (i = 0; i < desc->natts; ++i) + { + const char *attname = NameStr(TupleDescAttr(desc, i)->attname); + uint64 s1; - i = -1; - while ((i = bms_next_member(relids, i)) >= 0) + s = hash_combine64(s, hash_uint32(TupleDescAttr(desc, i)->atttypid)); + s1 = hash_bytes_extended((const unsigned char *) attname, strlen(attname), 0); + s = hash_combine64(s, s1); + } + return s; +} + +/* + * Get list of relation indexes and prepare list of permanent table reloids, + * list of temporary table reloids (can be changed between query launches) and + * array of table signatures. + */ +void +get_list_of_relids(PlannerInfo *root, Relids relids, RelSortOut *rels) +{ + int index; + RangeTblEntry *entry; + List *hrels = NIL; + List *hashes = NIL; + + if (relids == NULL) + return; + + index = -1; + while ((index = bms_next_member(relids, index)) >= 0) { - rte = planner_rt_fetch(i, root); - if (OidIsValid(rte->relid)) - l = lappend(l, makeString(pstrdup(rte->eref->aliasname))); + HeapTuple htup; + Form_pg_class classForm; + char *relname = NULL; + + entry = planner_rt_fetch(index, root); + + if (!OidIsValid(entry->relid)) + { + /* Invalid oid */ + hashes = lappend_uint64(hashes, (UINT64_MAX / 7)); + continue; + } + + htup = SearchSysCache1(RELOID, ObjectIdGetDatum(entry->relid)); + if (!HeapTupleIsValid(htup)) + elog(PANIC, "cache lookup failed for reloid %u", entry->relid); + + classForm = (Form_pg_class) GETSTRUCT(htup); + + if (classForm->relpersistence == RELPERSISTENCE_TEMP) + { + /* The case of temporary table */ + + Relation trel = relation_open(entry->relid, NoLock); + TupleDesc tdesc = RelationGetDescr(trel); + + hashes = lappend_uint64(hashes, hashTempTupleDesc(tdesc)); + relation_close(trel, NoLock); + } + else + { + /* The case of regular table */ + relname = quote_qualified_identifier( + get_namespace_name(get_rel_namespace(entry->relid)), + classForm->relrewrite ? + get_rel_name(classForm->relrewrite) : + NameStr(classForm->relname)); + hashes = lappend_uint64(hashes, DatumGetInt64(hash_any_extended( + (unsigned char *) relname, + strlen(relname), 0))); + + hrels = lappend_oid(hrels, entry->relid); + } + + ReleaseSysCache(htup); } - return l; + + rels->hrels = list_concat(rels->hrels, hrels); + rels->signatures = list_concat(rels->signatures, hashes); + return; } /* @@ -264,14 +337,14 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) return get_path_clauses(((MaterialPath *) path)->subpath, root, selectivities); break; - case T_MemoizePath: - return get_path_clauses(((MemoizePath *) path)->subpath, root, - selectivities); - break; case T_ProjectionPath: return get_path_clauses(((ProjectionPath *) path)->subpath, root, selectivities); break; + case T_ProjectSetPath: + return get_path_clauses(((ProjectSetPath *) path)->subpath, root, + selectivities); + break; case T_SortPath: return get_path_clauses(((SortPath *) path)->subpath, root, selectivities); @@ -386,7 +459,6 @@ is_appropriate_path(Path *path) { case T_SortPath: case T_IncrementalSortPath: - case T_MemoizePath: case T_GatherPath: case T_GatherMergePath: appropriate = false; @@ -400,6 +472,8 @@ is_appropriate_path(Path *path) /* * Converts path info into plan node for collecting it after query execution. + * Don't switch here to any AQO-specific memory contexts, because we should + * store AQO prediction in the same context, as the plan. */ void aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) @@ -444,7 +518,7 @@ aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) (*dest)->lefttree->targetlist); /* Copy bare expressions for further AQO learning case. */ node->grouping_exprs = copyObject(groupExprs); - node->relids = get_relnames(root, ap->subpath->parent->relids); + get_list_of_relids(root, ap->subpath->parent->relids, node->rels); node->jointype = JOIN_INNER; } else if (is_appropriate_path(src)) @@ -455,8 +529,7 @@ aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) node->jointype = JOIN_INNER; } - node->relids = list_concat(node->relids, - get_relnames(root, src->parent->relids)); + get_list_of_relids(root, src->parent->relids, node->rels); if (src->parallel_workers > 0) node->parallel_divisor = get_parallel_divisor(src); @@ -484,12 +557,16 @@ AQOnodeCopy(struct ExtensibleNode *enew, const struct ExtensibleNode *eold) Assert(IsA(old, ExtensibleNode)); Assert(strcmp(old->node.extnodename, AQO_PLAN_NODE) == 0); + Assert(new && old); /* Copy static fields in one command */ memcpy(new, old, sizeof(AQOPlanNode)); /* These lists couldn't contain AQO nodes. Use basic machinery */ - new->relids = copyObject(old->relids); + new->rels = palloc(sizeof(RelSortOut)); + new->rels->hrels = list_copy(old->rels->hrels); + new->rels->signatures = list_copy_uint64(old->rels->signatures); + new->clauses = copyObject(old->clauses); new->grouping_exprs = copyObject(old->grouping_exprs); new->selectivities = copyObject(old->selectivities); @@ -530,7 +607,7 @@ AQOnodeOut(struct StringInfoData *str, const struct ExtensibleNode *enode) Assert(0); WRITE_BOOL_FIELD(had_path); - WRITE_NODE_FIELD(relids); + WRITE_NODE_FIELD(rels); WRITE_NODE_FIELD(clauses); WRITE_NODE_FIELD(selectivities); WRITE_NODE_FIELD(grouping_exprs); @@ -583,7 +660,7 @@ AQOnodeRead(struct ExtensibleNode *enode) Assert(0); READ_BOOL_FIELD(had_path); - READ_NODE_FIELD(relids); + READ_NODE_FIELD(rels); READ_NODE_FIELD(clauses); READ_NODE_FIELD(selectivities); READ_NODE_FIELD(grouping_exprs); @@ -625,10 +702,10 @@ aqo_store_upper_signature_hook(PlannerInfo *root, RelOptInfo *output_rel, void *extra) { - A_Const *fss_node = makeNode(A_Const); - List *relnames; - List *clauses; - List *selectivities; + A_Const *fss_node = makeNode(A_Const); + RelSortOut rels = {NIL, NIL}; + List *clauses; + List *selectivities; if (prev_create_upper_paths_hook) (*prev_create_upper_paths_hook)(root, stage, input_rel, output_rel, extra); @@ -643,9 +720,10 @@ aqo_store_upper_signature_hook(PlannerInfo *root, set_cheapest(input_rel); clauses = get_path_clauses(input_rel->cheapest_total_path, root, &selectivities); - relnames = get_relnames(root, input_rel->relids); + get_list_of_relids(root, input_rel->relids, &rels); fss_node->val.type = T_Integer; fss_node->location = -1; - fss_node->val.val.ival = get_fss_for_object(relnames, clauses, NIL, NULL, NULL); + fss_node->val.val.ival = get_fss_for_object(rels.signatures, clauses, NIL, + NULL, NULL); output_rel->ext_nodes = lappend(output_rel->ext_nodes, (void *) fss_node); } diff --git a/path_utils.h b/path_utils.h index 54ee181d..1803e08d 100644 --- a/path_utils.h +++ b/path_utils.h @@ -8,17 +8,29 @@ #define AQO_PLAN_NODE "AQOPlanNode" +/* + * Find and sort out relations that used in the query: + * Use oids of relations to store dependency of ML row on a set of tables. + * Use oids of temporary tables to get access to these structure for preparing + * a kind of signature. + */ +typedef struct +{ + List *hrels; /* oids of persistent relations */ + List *signatures; /* list of hashes: on qualified name of a persistent + * table or on a table structure for temp table */ +} RelSortOut; + /* * information for adaptive query optimization */ typedef struct AQOPlanNode { - ExtensibleNode node; - bool had_path; - List *relids; - List *temp_relnames; /* We store name of temporary table because OID by-default haven't sense at other backends. */ - List *clauses; - List *selectivities; + ExtensibleNode node; + bool had_path; + RelSortOut *rels; + List *clauses; + List *selectivities; /* Grouping expressions from a target list. */ List *grouping_exprs; @@ -48,7 +60,8 @@ extern List *get_selectivities(PlannerInfo *root, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo); -extern List *get_relnames(PlannerInfo *root, Relids relids); +extern void get_list_of_relids(PlannerInfo *root, Relids relids, + RelSortOut *rels); extern List *get_path_clauses(Path *path, PlannerInfo *root, diff --git a/postprocessing.c b/postprocessing.c index dd420bce..0202239b 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -29,6 +29,7 @@ #include "machine_learning.h" #include "preprocessing.h" #include "learn_cache.h" +#include "storage.h" typedef struct @@ -59,25 +60,18 @@ static char *PlanStateInfo = "PlanStateInfo"; static void atomic_fss_learn_step(uint64 fhash, int fss, OkNNrdata *data, double *features, double target, double rfactor, - List *relnames, bool isTimedOut); + List *reloids, bool isTimedOut); static bool learnOnPlanState(PlanState *p, void *context); -static void learn_agg_sample(aqo_obj_stat *ctx, List *relidslist, +static void learn_agg_sample(aqo_obj_stat *ctx, RelSortOut *rels, double learned, double rfactor, Plan *plan, bool notExecuted); -static void learn_sample(aqo_obj_stat *ctx, List *relidslist, +static void learn_sample(aqo_obj_stat *ctx, RelSortOut *rels, double learned, double rfactor, Plan *plan, bool notExecuted); static List *restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, bool was_parametrized); -static void update_query_stat_row(double *et, int *et_size, - double *pt, int *pt_size, - double *ce, int *ce_size, - double planning_time, - double execution_time, - double cardinality_error, - int64 *n_exec); static void StoreToQueryEnv(QueryDesc *queryDesc); static void StorePlanInternals(QueryDesc *queryDesc); static bool ExtractFromQueryEnv(QueryDesc *queryDesc); @@ -91,33 +85,25 @@ static bool ExtractFromQueryEnv(QueryDesc *queryDesc); static void atomic_fss_learn_step(uint64 fs, int fss, OkNNrdata *data, double *features, double target, double rfactor, - List *relnames, bool isTimedOut) + List *reloids, bool isTimedOut) { - LOCKTAG tag; - - init_lock_tag(&tag, fs, fss); - LockAcquire(&tag, ExclusiveLock, false, false); - if (!load_fss_ext(fs, fss, data, NULL, !isTimedOut)) data->rows = 0; data->rows = OkNNr_learn(data, features, target, rfactor); - update_fss_ext(fs, fss, data, relnames, isTimedOut); - - LockRelease(&tag, ExclusiveLock, false); + update_fss_ext(fs, fss, data, reloids, isTimedOut); } static void -learn_agg_sample(aqo_obj_stat *ctx, List *relnames, +learn_agg_sample(aqo_obj_stat *ctx, RelSortOut *rels, double learned, double rfactor, Plan *plan, bool notExecuted) { AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); - uint64 fhash = query_context.fspace_hash; + uint64 fs = query_context.fspace_hash; int child_fss; double target; - OkNNrdata data; + OkNNrdata *data = OkNNr_allocate(0); int fss; - int i; /* * Learn 'not executed' nodes only once, if no one another knowledge exists @@ -127,16 +113,13 @@ learn_agg_sample(aqo_obj_stat *ctx, List *relnames, return; target = log(learned); - child_fss = get_fss_for_object(relnames, ctx->clauselist, NIL, NULL, NULL); + child_fss = get_fss_for_object(rels->signatures, ctx->clauselist, + NIL, NULL,NULL); fss = get_grouped_exprs_hash(child_fss, aqo_node->grouping_exprs); - memset(&data, 0, sizeof(OkNNrdata)); - for (i = 0; i < aqo_K; i++) - data.matrix[i] = NULL; - /* Critical section */ - atomic_fss_learn_step(fhash, fss, &data, NULL, - target, rfactor, relnames, ctx->isTimedOut); + atomic_fss_learn_step(fs, fss, data, NULL, + target, rfactor, rels->hrels, ctx->isTimedOut); /* End of critical section */ } @@ -145,21 +128,20 @@ learn_agg_sample(aqo_obj_stat *ctx, List *relnames, * true cardinalities) performs learning procedure. */ static void -learn_sample(aqo_obj_stat *ctx, List *relnames, +learn_sample(aqo_obj_stat *ctx, RelSortOut *rels, double learned, double rfactor, Plan *plan, bool notExecuted) { AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); uint64 fs = query_context.fspace_hash; double *features; double target; - OkNNrdata data; + OkNNrdata *data; int fss; - int i; + int ncols; - memset(&data, 0, sizeof(OkNNrdata)); target = log(learned); - fss = get_fss_for_object(relnames, ctx->clauselist, - ctx->selectivities, &data.cols, &features); + fss = get_fss_for_object(rels->signatures, ctx->clauselist, + ctx->selectivities, &ncols, &features); /* Only Agg nodes can have non-empty a grouping expressions list. */ Assert(!IsA(plan, Agg) || aqo_node->grouping_exprs != NIL); @@ -171,20 +153,12 @@ learn_sample(aqo_obj_stat *ctx, List *relnames, if (notExecuted && aqo_node->prediction > 0) return; - if (data.cols > 0) - for (i = 0; i < aqo_K; ++i) - data.matrix[i] = palloc(sizeof(double) * data.cols); + data = OkNNr_allocate(ncols); /* Critical section */ - atomic_fss_learn_step(fs, fss, &data, features, target, rfactor, - relnames, ctx->isTimedOut); + atomic_fss_learn_step(fs, fss, data, features, target, rfactor, + rels->hrels, ctx->isTimedOut); /* End of critical section */ - - if (data.cols > 0) - for (i = 0; i < aqo_K; ++i) - pfree(data.matrix[i]); - - pfree(features); } /* @@ -205,12 +179,16 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, double *cur_sel; int cur_hash; int cur_relid; + MemoryContext old_ctx_m; parametrized_sel = was_parametrized && (list_length(relidslist) == 1); if (parametrized_sel) { cur_relid = linitial_int(relidslist); + + old_ctx_m = MemoryContextSwitchTo(AQOUtilityMemCtx); get_eclasses(clauselist, &nargs, &args_hash, &eclass_hash); + MemoryContextSwitchTo(old_ctx_m); } foreach(l, clauselist) @@ -236,14 +214,18 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, else cur_sel = &rinfo->outer_selec; + if (*cur_sel < 0) + *cur_sel = 0; + + Assert(cur_sel > 0); + lst = lappend(lst, cur_sel); i++; } if (parametrized_sel) { - pfree(args_hash); - pfree(eclass_hash); + MemoryContextReset(AQOUtilityMemCtx); } return lst; @@ -338,7 +320,7 @@ should_learn(PlanState *ps, AQOPlanNode *node, aqo_obj_stat *ctx, /* This node s*/ if (aqo_show_details) elog(NOTICE, - "[AQO] Learn on a plan node (%lu, %d), " + "[AQO] Learn on a plan node ("UINT64_FORMAT", %d), " "predicted rows: %.0lf, updated prediction: %.0lf", query_context.query_hash, node->fss, predicted, *nrows); @@ -354,7 +336,7 @@ should_learn(PlanState *ps, AQOPlanNode *node, aqo_obj_stat *ctx, if (ctx->learn && aqo_show_details && fabs(*nrows - predicted) / predicted > 0.2) elog(NOTICE, - "[AQO] Learn on a finished plan node (%lu, %d), " + "[AQO] Learn on a finished plan node ("UINT64_FORMAT", %d), " "predicted rows: %.0lf, updated prediction: %.0lf", query_context.query_hash, node->fss, predicted, *nrows); @@ -510,7 +492,7 @@ learnOnPlanState(PlanState *p, void *context) List *cur_selectivities; cur_selectivities = restore_selectivities(aqo_node->clauses, - aqo_node->relids, + aqo_node->rels->hrels, aqo_node->jointype, aqo_node->was_parametrized); SubplanCtx.selectivities = list_concat(SubplanCtx.selectivities, @@ -518,14 +500,14 @@ learnOnPlanState(PlanState *p, void *context) SubplanCtx.clauselist = list_concat(SubplanCtx.clauselist, list_copy(aqo_node->clauses)); - if (aqo_node->relids != NIL) + if (aqo_node->rels->hrels != NIL) { /* * This plan can be stored as a cached plan. In the case we will have * bogus path_relids field (changed by list_concat routine) at the * next usage (and aqo-learn) of this plan. */ - ctx->relidslist = list_copy(aqo_node->relids); + ctx->relidslist = list_copy(aqo_node->rels->hrels); if (p->instrument) { @@ -537,12 +519,12 @@ learnOnPlanState(PlanState *p, void *context) { if (IsA(p, AggState)) learn_agg_sample(&SubplanCtx, - aqo_node->relids, learn_rows, rfactor, + aqo_node->rels, learn_rows, rfactor, p->plan, notExecuted); else learn_sample(&SubplanCtx, - aqo_node->relids, learn_rows, rfactor, + aqo_node->rels, learn_rows, rfactor, p->plan, notExecuted); } } @@ -555,50 +537,6 @@ learnOnPlanState(PlanState *p, void *context) return false; } -/* - * Updates given row of query statistics: - * et - execution time - * pt - planning time - * ce - cardinality error - */ -void -update_query_stat_row(double *et, int *et_size, - double *pt, int *pt_size, - double *ce, int *ce_size, - double planning_time, - double execution_time, - double cardinality_error, - int64 *n_exec) -{ - int i; - - /* - * If plan contains one or more "never visited" nodes, cardinality_error - * have -1 value and will be written to the knowledge base. User can use it - * as a sign that AQO ignores this query. - */ - if (*ce_size >= aqo_stat_size) - for (i = 1; i < aqo_stat_size; ++i) - ce[i - 1] = ce[i]; - *ce_size = (*ce_size >= aqo_stat_size) ? aqo_stat_size : (*ce_size + 1); - ce[*ce_size - 1] = cardinality_error; - - if (*et_size >= aqo_stat_size) - for (i = 1; i < aqo_stat_size; ++i) - et[i - 1] = et[i]; - - *et_size = (*et_size >= aqo_stat_size) ? aqo_stat_size : (*et_size + 1); - et[*et_size - 1] = execution_time; - - if (*pt_size >= aqo_stat_size) - for (i = 1; i < aqo_stat_size; ++i) - pt[i - 1] = pt[i]; - - *pt_size = (*pt_size >= aqo_stat_size) ? aqo_stat_size : (*pt_size + 1); - pt[*pt_size - 1] = planning_time; /* Just remember: planning time can be negative. */ - (*n_exec)++; -} - /***************************************************************************** * * QUERY EXECUTION STATISTICS COLLECTING HOOKS @@ -682,6 +620,7 @@ static int exec_nested_level = 0; static void aqo_timeout_handler(void) { + MemoryContext oldctx = MemoryContextSwitchTo(AQOLearnMemCtx); aqo_obj_stat ctx = {NIL, NIL, NIL, false, false}; if (!timeoutCtl.queryDesc || !ExtractFromQueryEnv(timeoutCtl.queryDesc)) @@ -694,6 +633,7 @@ aqo_timeout_handler(void) elog(NOTICE, "[AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data."); learnOnPlanState(timeoutCtl.queryDesc->planstate, (void *) &ctx); + MemoryContextSwitchTo(oldctx); } static bool @@ -773,12 +713,12 @@ aqo_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, void aqo_ExecutorEnd(QueryDesc *queryDesc) { - double execution_time; - double cardinality_error; - QueryStat *stat = NULL; - instr_time endtime; - EphemeralNamedRelation enr = get_ENR(queryDesc->queryEnv, PlanStateInfo); - LOCKTAG tag; + double execution_time; + double cardinality_error; + StatEntry *stat; + instr_time endtime; + EphemeralNamedRelation enr = get_ENR(queryDesc->queryEnv, PlanStateInfo); + MemoryContext oldctx = MemoryContextSwitchTo(AQOLearnMemCtx); cardinality_sum_errors = 0.; cardinality_num_objects = 0; @@ -817,74 +757,42 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) * Analyze plan if AQO need to learn or need to collect statistics only. */ learnOnPlanState(queryDesc->planstate, (void *) &ctx); - list_free(ctx.clauselist); - list_free(ctx.relidslist); - list_free(ctx.selectivities); } - if (query_context.collect_stat) - stat = get_aqo_stat(query_context.query_hash); - - { - /* Calculate execution time. */ - INSTR_TIME_SET_CURRENT(endtime); - INSTR_TIME_SUBTRACT(endtime, query_context.start_execution_time); - execution_time = INSTR_TIME_GET_DOUBLE(endtime); + /* Calculate execution time. */ + INSTR_TIME_SET_CURRENT(endtime); + INSTR_TIME_SUBTRACT(endtime, query_context.start_execution_time); + execution_time = INSTR_TIME_GET_DOUBLE(endtime); - if (cardinality_num_objects > 0) - cardinality_error = cardinality_sum_errors / cardinality_num_objects; - else - cardinality_error = -1; + if (cardinality_num_objects > 0) + cardinality_error = cardinality_sum_errors / cardinality_num_objects; + else + cardinality_error = -1; - /* Prevent concurrent updates. */ - init_lock_tag(&tag, query_context.query_hash, query_context.fspace_hash); - LockAcquire(&tag, ExclusiveLock, false, false); + if (query_context.collect_stat) + { + /* Write AQO statistics to the aqo_query_stat table */ + stat = aqo_stat_store(query_context.query_hash, + query_context.use_aqo, + query_context.planning_time, execution_time, + cardinality_error); if (stat != NULL) { - /* Calculate AQO statistics. */ - if (query_context.use_aqo) - /* For the case, when query executed with AQO predictions. */ - update_query_stat_row(stat->execution_time_with_aqo, - &stat->execution_time_with_aqo_size, - stat->planning_time_with_aqo, - &stat->planning_time_with_aqo_size, - stat->cardinality_error_with_aqo, - &stat->cardinality_error_with_aqo_size, - query_context.planning_time, - execution_time, - cardinality_error, - &stat->executions_with_aqo); - else - /* For the case, when query executed without AQO predictions. */ - update_query_stat_row(stat->execution_time_without_aqo, - &stat->execution_time_without_aqo_size, - stat->planning_time_without_aqo, - &stat->planning_time_without_aqo_size, - stat->cardinality_error_without_aqo, - &stat->cardinality_error_without_aqo_size, - query_context.planning_time, - execution_time, - cardinality_error, - &stat->executions_without_aqo); - /* Store all learn data into the AQO service relations. */ if (!query_context.adding_query && query_context.auto_tuning) automatical_query_tuning(query_context.query_hash, stat); - - /* Write AQO statistics to the aqo_query_stat table */ - update_aqo_stat(query_context.fspace_hash, stat); - pfree_query_stat(stat); } - - /* Allow concurrent queries to update this feature space. */ - LockRelease(&tag, ExclusiveLock, false); } selectivity_cache_clear(); cur_classes = ldelete_uint64(cur_classes, query_context.query_hash); end: + /* Release all AQO-specific memory, allocated during learning procedure */ + MemoryContextSwitchTo(oldctx); + MemoryContextReset(AQOLearnMemCtx); + if (prev_ExecutorEnd_hook) prev_ExecutorEnd_hook(queryDesc); else @@ -910,14 +818,13 @@ StoreToQueryEnv(QueryDesc *queryDesc) { EphemeralNamedRelation enr; int qcsize = sizeof(QueryContextData); - MemoryContext oldCxt; bool newentry = false; - - oldCxt = MemoryContextSwitchTo(GetMemoryChunkContext(queryDesc->plannedstmt)); + MemoryContext oldctx = MemoryContextSwitchTo(AQOCacheMemCtx); if (queryDesc->queryEnv == NULL) - queryDesc->queryEnv = create_queryEnv(); + queryDesc->queryEnv = create_queryEnv(); + Assert(queryDesc->queryEnv); enr = get_ENR(queryDesc->queryEnv, AQOPrivateData); if (enr == NULL) { @@ -932,12 +839,13 @@ StoreToQueryEnv(QueryDesc *queryDesc) enr->md.reliddesc = InvalidOid; enr->md.tupdesc = NULL; enr->reldata = palloc0(qcsize); + Assert(enr->reldata != NULL); memcpy(enr->reldata, &query_context, qcsize); if (newentry) register_ENR(queryDesc->queryEnv, enr); - MemoryContextSwitchTo(oldCxt); + MemoryContextSwitchTo(oldctx); } static bool @@ -959,17 +867,16 @@ static void StorePlanInternals(QueryDesc *queryDesc) { EphemeralNamedRelation enr; - MemoryContext oldCxt; bool newentry = false; + MemoryContext oldctx = MemoryContextSwitchTo(AQOCacheMemCtx); njoins = 0; planstate_tree_walker(queryDesc->planstate, calculateJoinNum, &njoins); - oldCxt = MemoryContextSwitchTo(GetMemoryChunkContext(queryDesc->plannedstmt)); - if (queryDesc->queryEnv == NULL) - queryDesc->queryEnv = create_queryEnv(); + queryDesc->queryEnv = create_queryEnv(); + Assert(queryDesc->queryEnv); enr = get_ENR(queryDesc->queryEnv, PlanStateInfo); if (enr == NULL) { @@ -984,12 +891,13 @@ StorePlanInternals(QueryDesc *queryDesc) enr->md.reliddesc = InvalidOid; enr->md.tupdesc = NULL; enr->reldata = palloc0(sizeof(int)); + Assert(enr->reldata != NULL); memcpy(enr->reldata, &njoins, sizeof(int)); if (newentry) register_ENR(queryDesc->queryEnv, enr); - MemoryContextSwitchTo(oldCxt); + MemoryContextSwitchTo(oldctx); } /* @@ -1013,6 +921,7 @@ ExtractFromQueryEnv(QueryDesc *queryDesc) if (enr == NULL) return false; + Assert(enr->reldata != NULL); memcpy(&query_context, enr->reldata, sizeof(QueryContextData)); return true; diff --git a/preprocessing.c b/preprocessing.c index af10ae7f..55000e79 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -11,7 +11,7 @@ * 'use_aqo': whether to use AQO estimations in query optimization * 'learn_aqo': whether to update AQO data based on query execution * statistics - * 'fspace_hash': hash of feature space to use with given query + * 'fs': hash of feature space to use with given query * 'auto_tuning': whether AQO may change use_aqo and learn_aqo values * for the next execution of such type of query using * its self-tuning algorithm @@ -65,50 +65,14 @@ #include "aqo.h" #include "hash.h" #include "preprocessing.h" +#include "storage.h" -const char * -CleanQuerytext(const char *query, int *location, int *len) -{ - int query_location = *location; - int query_len = *len; - - /* First apply starting offset, unless it's -1 (unknown). */ - if (query_location >= 0) - { - Assert(query_location <= strlen(query)); - query += query_location; - /* Length of 0 (or -1) means "rest of string" */ - if (query_len <= 0) - query_len = strlen(query); - else - Assert(query_len <= strlen(query)); - } - else - { - /* If query location is unknown, distrust query_len as well */ - query_location = 0; - query_len = strlen(query); - } - - /* - * Discard leading and trailing whitespace, too. Use scanner_isspace() - * not libc's isspace(), because we want to match the lexer's behavior. - */ - while (query_len > 0 && scanner_isspace(query[0])) - query++, query_location++, query_len--; - while (query_len > 0 && scanner_isspace(query[query_len - 1])) - query_len--; - - *location = query_location; - *len = query_len; - - return query; -} - /* List of feature spaces, that are processing in this backend. */ List *cur_classes = NIL; +int aqo_join_threshold = 0; + static bool isQueryUsingSystemRelation(Query *query); static bool isQueryUsingSystemRelation_walker(Node *node, void *context); @@ -134,30 +98,18 @@ call_default_planner(Query *parse, } /* - * Check, that a 'CREATE EXTENSION aqo' command has been executed. - * This function allows us to execute the get_extension_oid routine only once - * at each backend. - * If any AQO-related table is missed we will set aqo_enabled to false (see - * a storage implementation module). + * Can AQO be used for the query? */ static bool -aqoIsEnabled(void) +aqoIsEnabled(Query *parse) { - if (creating_extension) - /* Nothing to tell in this mode. */ + if (creating_extension || + (aqo_mode == AQO_MODE_DISABLED && !force_collect_stat) || + (parse->commandType != CMD_SELECT && parse->commandType != CMD_INSERT && + parse->commandType != CMD_UPDATE && parse->commandType != CMD_DELETE)) return false; - if (aqo_enabled) - /* - * Fast path. Dropping should be detected by absence of any AQO-related - * table. - */ - return true; - - if (get_extension_oid("aqo", true) != InvalidOid) - aqo_enabled = true; - - return aqo_enabled; + return true; } /* @@ -174,21 +126,17 @@ aqo_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) { - bool query_is_stored = false; - LOCKTAG tag; - MemoryContext oldCxt; + bool query_is_stored = false; + MemoryContext oldctx; + oldctx = MemoryContextSwitchTo(AQOPredictMemCtx); /* * We do not work inside an parallel worker now by reason of insert into - * the heap during planning. Transactions is synchronized between parallel + * the heap during planning. Transactions are synchronized between parallel * sections. See GetCurrentCommandId() comments also. */ - if (!aqoIsEnabled() || - (parse->commandType != CMD_SELECT && parse->commandType != CMD_INSERT && - parse->commandType != CMD_UPDATE && parse->commandType != CMD_DELETE) || - creating_extension || + if (!aqoIsEnabled(parse) || IsInParallelMode() || IsParallelWorker() || - (aqo_mode == AQO_MODE_DISABLED && !force_collect_stat) || strstr(application_name, "postgres_fdw") != NULL || /* Prevent distributed deadlocks */ strstr(application_name, "pgfdw:") != NULL || /* caused by fdw */ isQueryUsingSystemRelation(parse) || @@ -198,6 +146,7 @@ aqo_planner(Query *parse, * We should disable AQO for this query to remember this decision along * all execution stages. */ + MemoryContextSwitchTo(oldctx); disable_aqo_for_query(); return call_default_planner(parse, @@ -207,7 +156,18 @@ aqo_planner(Query *parse, } selectivity_cache_clear(); + MemoryContextSwitchTo(oldctx); + + oldctx = MemoryContextSwitchTo(AQOUtilityMemCtx); query_context.query_hash = get_query_hash(parse, query_string); + MemoryContextSwitchTo(oldctx); + + MemoryContextReset(AQOUtilityMemCtx); + + oldctx = MemoryContextSwitchTo(AQOPredictMemCtx); + + /* By default, they should be equal */ + query_context.fspace_hash = query_context.query_hash; if (query_is_deactivated(query_context.query_hash) || list_member_uint64(cur_classes,query_context.query_hash)) @@ -217,19 +177,24 @@ aqo_planner(Query *parse, * feature space, that is processing yet (disallow invalidation * recursion, as an example). */ + MemoryContextSwitchTo(oldctx); disable_aqo_for_query(); + return call_default_planner(parse, query_string, cursorOptions, boundParams); } + MemoryContextSwitchTo(oldctx); elog(DEBUG1, "AQO will be used for query '%s', class "UINT64_FORMAT, query_string ? query_string : "null string", query_context.query_hash); - oldCxt = MemoryContextSwitchTo(AQOMemoryContext); + oldctx = MemoryContextSwitchTo(AQOCacheMemCtx); cur_classes = lappend_uint64(cur_classes, query_context.query_hash); - MemoryContextSwitchTo(oldCxt); + MemoryContextSwitchTo(oldctx); + + oldctx = MemoryContextSwitchTo(AQOPredictMemCtx); if (aqo_mode == AQO_MODE_DISABLED) { @@ -238,7 +203,7 @@ aqo_planner(Query *parse, goto ignore_query_settings; } - query_is_stored = find_query(query_context.query_hash, &query_context); + query_is_stored = aqo_queries_find(query_context.query_hash, &query_context); if (!query_is_stored) { @@ -248,7 +213,6 @@ aqo_planner(Query *parse, query_context.adding_query = true; query_context.learn_aqo = true; query_context.use_aqo = false; - query_context.fspace_hash = query_context.query_hash; query_context.auto_tuning = true; query_context.collect_stat = true; break; @@ -257,7 +221,7 @@ aqo_planner(Query *parse, query_context.learn_aqo = true; query_context.use_aqo = true; query_context.auto_tuning = false; - query_context.fspace_hash = 0; + query_context.fspace_hash = 0; /* Use common feature space */ query_context.collect_stat = false; break; case AQO_MODE_CONTROLLED: @@ -276,7 +240,6 @@ aqo_planner(Query *parse, query_context.adding_query = true; query_context.learn_aqo = true; query_context.use_aqo = true; - query_context.fspace_hash = query_context.query_hash; query_context.auto_tuning = false; query_context.collect_stat = true; break; @@ -324,7 +287,6 @@ aqo_planner(Query *parse, * suppressed manually) and collect stats. */ query_context.collect_stat = true; - query_context.fspace_hash = query_context.query_hash; break; case AQO_MODE_INTELLIGENT: @@ -342,49 +304,61 @@ aqo_planner(Query *parse, ignore_query_settings: if (!query_is_stored && (query_context.adding_query || force_collect_stat)) { - /* - * find-add query and query text must be atomic operation to prevent - * concurrent insertions. - */ - init_lock_tag(&tag, query_context.query_hash, 0); - LockAcquire(&tag, ExclusiveLock, false, false); /* * Add query into the AQO knowledge base. To process an error with * concurrent addition from another backend we will try to restart * preprocessing routine. */ - update_query(query_context.query_hash, query_context.fspace_hash, - query_context.learn_aqo, query_context.use_aqo, - query_context.auto_tuning); - - /* - * Add query text into the ML-knowledge base. Just for further - * analysis. In the case of cached plans we could have NULL query text. - */ - if (query_string != NULL) - add_query_text(query_context.query_hash, query_string); + if (aqo_queries_store(query_context.query_hash, query_context.fspace_hash, + query_context.learn_aqo, query_context.use_aqo, + query_context.auto_tuning)) + { + /* + * Add query text into the ML-knowledge base. Just for further + * analysis. In the case of cached plans we may have NULL query text. + */ + if (!aqo_qtext_store(query_context.query_hash, query_string)) + { + Assert(0); /* panic only on debug installation */ + elog(ERROR, "[AQO] Impossible situation was detected. Maybe not enough of shared memory?"); + } + } + else + { + /* + * In the case of problems (shmem overflow, as a typical issue) - + * disable AQO for the query class. + */ + disable_aqo_for_query(); - LockRelease(&tag, ExclusiveLock, false); + /* + * Switch AQO to controlled mode. In this mode we wouldn't add new + * query classes, just use and learn on existed set. + */ + aqo_mode = AQO_MODE_CONTROLLED; + } } if (force_collect_stat) - { /* * If this GUC is set, AQO will analyze query results and collect * query execution statistics in any mode. */ query_context.collect_stat = true; - query_context.fspace_hash = query_context.query_hash; - } if (!IsQueryDisabled()) /* It's good place to set timestamp of start of a planning process. */ INSTR_TIME_SET_CURRENT(query_context.start_planning_time); - - return call_default_planner(parse, - query_string, - cursorOptions, - boundParams); + { + PlannedStmt *stmt; + MemoryContextSwitchTo(oldctx); + stmt = call_default_planner(parse, query_string, + cursorOptions, boundParams); + + /* Release the memory, allocated for AQO predictions */ + MemoryContextReset(AQOPredictMemCtx); + return stmt; + } } /* @@ -393,7 +367,6 @@ aqo_planner(Query *parse, void disable_aqo_for_query(void) { - query_context.learn_aqo = false; query_context.use_aqo = false; query_context.auto_tuning = false; @@ -405,19 +378,28 @@ disable_aqo_for_query(void) query_context.planning_time = -1.; } +typedef struct AQOPreWalkerCtx +{ + bool trivQuery; + int njoins; +} AQOPreWalkerCtx; + /* * Examine a fully-parsed query, and return TRUE iff any relation underlying - * the query is a system relation or no one relation touched by the query. + * the query is a system relation or no one permanent (non-temporary) relation + * touched by the query. */ static bool isQueryUsingSystemRelation(Query *query) { - bool trivQuery = true; + AQOPreWalkerCtx ctx; bool result; - result = isQueryUsingSystemRelation_walker((Node *) query, &trivQuery); + ctx.trivQuery = true; + ctx.njoins = 0; + result = isQueryUsingSystemRelation_walker((Node *) query, &ctx); - if (result || trivQuery) + if (result || ctx.trivQuery || ctx.njoins < aqo_join_threshold) return true; return false; } @@ -438,16 +420,54 @@ IsAQORelation(Relation rel) return false; } +/* + * Walk through jointree and calculate number of potential joins + */ +static void +jointree_walker(Node *jtnode, void *context) +{ + AQOPreWalkerCtx *ctx = (AQOPreWalkerCtx *) context; + + if (jtnode == NULL || IsA(jtnode, RangeTblRef)) + return; + else if (IsA(jtnode, FromExpr)) + { + FromExpr *f = (FromExpr *) jtnode; + ListCell *l; + + /* Count number of potential joins by number of sources in FROM list */ + ctx->njoins += list_length(f->fromlist) - 1; + + foreach(l, f->fromlist) + jointree_walker(lfirst(l), context); + } + else if (IsA(jtnode, JoinExpr)) + { + JoinExpr *j = (JoinExpr *) jtnode; + + /* Don't forget about explicit JOIN statement */ + ctx->njoins++; + jointree_walker(j->larg, context); + jointree_walker(j->rarg, context); + } + else + elog(ERROR, "unrecognized node type: %d", (int) nodeTag(jtnode)); + return; +} + static bool isQueryUsingSystemRelation_walker(Node *node, void *context) { + MemoryContext oldctx = MemoryContextSwitchTo(AQOLearnMemCtx); + AQOPreWalkerCtx *ctx = (AQOPreWalkerCtx *) context; + if (node == NULL) return false; if (IsA(node, Query)) { - Query *query = (Query *) node; - ListCell *rtable; + Query *query = (Query *) node; + ListCell *rtable; foreach(rtable, query->rtable) { @@ -458,13 +478,18 @@ isQueryUsingSystemRelation_walker(Node *node, void *context) Relation rel = table_open(rte->relid, AccessShareLock); bool is_catalog = IsCatalogRelation(rel); bool is_aqo_rel = IsAQORelation(rel); - bool *trivQuery = (bool *) context; - table_close(rel, AccessShareLock); if (is_catalog || is_aqo_rel) + { + table_close(rel, AccessShareLock); return true; + } + + if (rel->rd_rel->relpersistence != RELPERSISTENCE_TEMP) + /* Plane non TEMP table */ + ctx->trivQuery = false; - *trivQuery = false; + table_close(rel, AccessShareLock); } else if (rte->rtekind == RTE_FUNCTION) { @@ -474,6 +499,10 @@ isQueryUsingSystemRelation_walker(Node *node, void *context) } } + jointree_walker((Node *) query->jointree, context); + MemoryContextSwitchTo(oldctx); + + /* Recursively plunge into subqueries and CTEs */ return query_tree_walker(query, isQueryUsingSystemRelation_walker, context, diff --git a/selectivity_cache.c b/selectivity_cache.c index 0b354ba0..fbaa8829 100644 --- a/selectivity_cache.c +++ b/selectivity_cache.c @@ -30,6 +30,9 @@ typedef struct List *objects = NIL; +/* Specific memory context for selectivity objects */ +MemoryContext AQOCacheSelectivity = NULL; + /* * Stores the given selectivity for clause_hash, relid and global_relid * of the clause. @@ -42,6 +45,13 @@ cache_selectivity(int clause_hash, { ListCell *l; Entry *cur_element; + MemoryContext old_ctx; + + if (!AQOCacheSelectivity) + AQOCacheSelectivity = AllocSetContextCreate(AQOTopMemCtx, + "AQOCacheSelectivity", + ALLOCSET_DEFAULT_SIZES); + foreach(l, objects) { @@ -53,13 +63,14 @@ cache_selectivity(int clause_hash, return; } } - + old_ctx = MemoryContextSwitchTo(AQOCacheSelectivity); cur_element = palloc(sizeof(*cur_element)); cur_element->clause_hash = clause_hash; cur_element->relid = relid; cur_element->global_relid = global_relid; cur_element->selectivity = selectivity; objects = lappend(objects, cur_element); + MemoryContextSwitchTo(old_ctx); } /* @@ -89,6 +100,12 @@ selectivity_cache_find_global_relid(int clause_hash, int global_relid) void selectivity_cache_clear(void) { - MemoryContextReset(AQO_cache_mem_ctx); + if (!AQOCacheSelectivity) + { + Assert(objects == NIL); + return; + } + + MemoryContextReset(AQOCacheSelectivity); objects = NIL; } diff --git a/sql/aqo_CVE-2020-14350.sql b/sql/aqo_CVE-2020-14350.sql index 92c200f3..1b36b50b 100644 --- a/sql/aqo_CVE-2020-14350.sql +++ b/sql/aqo_CVE-2020-14350.sql @@ -3,11 +3,6 @@ * See also 7eeb1d986 postgresql commit. */ --- Switch off parallel workers because of unsteadiness. --- Do this in each aqo test separately, so that server regression tests pass --- with aqo's temporary configuration file loaded. -SET max_parallel_workers TO 0; - SET client_min_messages = 'warning'; DROP ROLE IF EXISTS regress_hacker; SET client_min_messages = 'notice'; @@ -32,6 +27,7 @@ $$ LANGUAGE plpgsql RETURNS NULL ON NULL INPUT; RESET ROLE; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; -- Test result (must be 'off') SET ROLE regress_hacker; diff --git a/sql/aqo_controlled.sql b/sql/aqo_controlled.sql index 30b201ee..0ba88e56 100644 --- a/sql/aqo_controlled.sql +++ b/sql/aqo_controlled.sql @@ -1,8 +1,3 @@ --- Switch off parallel workers because of unsteadiness. --- Do this in each aqo test separately, so that server regression tests pass --- with aqo's temporary configuration file loaded. -SET max_parallel_workers TO 0; - CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -34,6 +29,7 @@ CREATE INDEX aqo_test2_idx_a ON aqo_test2 (a); ANALYZE aqo_test2; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'controlled'; @@ -81,10 +77,11 @@ SELECT count(*) FROM tmp1; DROP TABLE tmp1; SET aqo.mode = 'controlled'; -UPDATE aqo_queries SET auto_tuning=false; -UPDATE aqo_queries SET learn_aqo=true; -UPDATE aqo_queries SET use_aqo=false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, true, false, false) +; -- learn = true, use = false, tuning = false EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 @@ -112,7 +109,10 @@ SELECT t1.a AS a, t2.a AS b, t3.a AS c FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 WHERE t1.a = t2.b AND t2.a = t3.b; -UPDATE aqo_queries SET use_aqo=true; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, NULL, true, NULL) +; -- set use = true EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 @@ -154,4 +154,7 @@ DROP TABLE aqo_test1; DROP INDEX aqo_test2_idx_a; DROP TABLE aqo_test2; +-- XXX: extension dropping doesn't clear file storage. Do it manually. +SELECT 1 FROM aqo_reset(); + DROP EXTENSION aqo; diff --git a/sql/aqo_disabled.sql b/sql/aqo_disabled.sql index 350fef13..fd709cf3 100644 --- a/sql/aqo_disabled.sql +++ b/sql/aqo_disabled.sql @@ -1,8 +1,3 @@ --- Switch off parallel workers because of unsteadiness. --- Do this in each aqo test separately, so that server regression tests pass --- with aqo's temporary configuration file loaded. -SET max_parallel_workers TO 0; - CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -22,6 +17,26 @@ AS ( ) INSERT INTO aqo_test1 (SELECT * FROM t); CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; +CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; + +SET aqo.mode = 'controlled'; + +CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 +WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; +SELECT count(*) FROM tmp1; +DROP TABLE tmp1; +CREATE TABLE tmp1 AS SELECT t1.a, t2.b, t3.c +FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 +WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; +SELECT count(*) FROM tmp1; +DROP TABLE tmp1; +EXPLAIN SELECT * FROM aqo_test0 +WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; +EXPLAIN SELECT t1.a, t2.b, t3.c +FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 +WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero SET aqo.mode = 'disabled'; @@ -43,8 +58,7 @@ EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; -CREATE EXTENSION aqo; - +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero SET aqo.mode = 'intelligent'; CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 @@ -58,9 +72,13 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = SELECT count(*) FROM tmp1; DROP TABLE tmp1; +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero SET aqo.mode = 'controlled'; -UPDATE aqo_queries SET learn_aqo = true, use_aqo = true, auto_tuning = false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, true, true, false) +; -- Enable all disabled query classes EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -69,6 +87,7 @@ EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero SET aqo.mode = 'disabled'; EXPLAIN SELECT * FROM aqo_test0 @@ -77,6 +96,10 @@ WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero + +-- XXX: extension dropping doesn't clear file storage. Do it manually. +SELECT 1 FROM aqo_reset(); DROP EXTENSION aqo; diff --git a/sql/aqo_fdw.sql b/sql/aqo_fdw.sql index 40c00125..2d71a20d 100644 --- a/sql/aqo_fdw.sql +++ b/sql/aqo_fdw.sql @@ -4,16 +4,12 @@ -- Aggregate push-down -- Push-down of groupings with HAVING clause. --- Switch off parallel workers because of unsteadiness. --- Do this in each aqo test separately, so that server regression tests pass --- with aqo's temporary configuration file loaded. -SET max_parallel_workers TO 0; - CREATE EXTENSION aqo; CREATE EXTENSION postgres_fdw; SET aqo.mode = 'learn'; SET aqo.show_details = 'true'; -- show AQO info for each node and entire query. SET aqo.show_hash = 'false'; -- a hash value is system-depended. Ignore it. +SET aqo.join_threshold = 0; DO $d$ BEGIN @@ -56,11 +52,13 @@ SELECT x FROM frgn WHERE x < -10; -- AQO ignores constants -- Trivial JOIN push-down. SELECT str FROM expln(' -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) -SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; ') AS str WHERE str NOT LIKE '%Sort Method%'; + +-- TODO: Should learn on postgres_fdw nodes EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) -SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; + SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; -- TODO: Non-mergejoinable join condition. EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) diff --git a/sql/aqo_forced.sql b/sql/aqo_forced.sql index 8cb10261..92a26564 100644 --- a/sql/aqo_forced.sql +++ b/sql/aqo_forced.sql @@ -1,8 +1,3 @@ --- Switch off parallel workers because of unsteadiness. --- Do this in each aqo test separately, so that server regression tests pass --- with aqo's temporary configuration file loaded. -SET max_parallel_workers TO 0; - CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -24,6 +19,7 @@ CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'controlled'; @@ -61,4 +57,7 @@ DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; +-- XXX: extension dropping doesn't clear file storage. Do it manually. +SELECT 1 FROM aqo_reset(); + DROP EXTENSION aqo; diff --git a/sql/aqo_intelligent.sql b/sql/aqo_intelligent.sql index 87a82842..545325c1 100644 --- a/sql/aqo_intelligent.sql +++ b/sql/aqo_intelligent.sql @@ -1,8 +1,3 @@ --- Switch off parallel workers because of unsteadiness. --- Do this in each aqo test separately, so that server regression tests pass --- with aqo's temporary configuration file loaded. -SET max_parallel_workers TO 0; - CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -24,6 +19,7 @@ CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'intelligent'; @@ -149,7 +145,10 @@ DROP TABLE tmp1; SET aqo.mode = 'controlled'; -UPDATE aqo_queries SET learn_aqo = false, use_aqo = false, auto_tuning = false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, false, false, false) +; -- Disable all query classes EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -178,7 +177,10 @@ EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; -UPDATE aqo_queries SET learn_aqo = false, use_aqo = true, auto_tuning = false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, false, true, false) +; -- set learn = false, use = true, tuning = false EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -213,4 +215,7 @@ DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; +-- XXX: extension dropping doesn't clear file storage. Do it manually. +SELECT 1 FROM aqo_reset(); + DROP EXTENSION aqo; diff --git a/sql/aqo_learn.sql b/sql/aqo_learn.sql index 13fde235..8b57972e 100644 --- a/sql/aqo_learn.sql +++ b/sql/aqo_learn.sql @@ -1,7 +1,23 @@ --- Switch off parallel workers because of unsteadiness. --- Do this in each aqo test separately, so that server regression tests pass --- with aqo's temporary configuration file loaded. -SET max_parallel_workers TO 0; +-- The function just copied from stats_ext.sql +create function check_estimated_rows(text) returns table (estimated int, actual int) +language plpgsql as +$$ +declare + ln text; + tmp text[]; + first_row bool := true; +begin + for ln in + execute format('explain analyze %s', $1) + loop + if first_row then + first_row := false; + tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)'); + return query select tmp[1]::int, tmp[2]::int; + end if; + end loop; +end; +$$; CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) @@ -24,6 +40,7 @@ CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'intelligent'; @@ -105,11 +122,29 @@ CREATE TABLE tmp1 AS SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; SELECT count(*) FROM tmp1; + +-- Remove data on some unneeded instances of tmp1 table. +SELECT * FROM aqo_cleanup(); + +-- Result of the query below should be empty +SELECT * FROM aqo_query_texts aqt1, aqo_query_texts aqt2 +WHERE aqt1.query_text = aqt2.query_text AND aqt1.queryid <> aqt2.queryid; + +-- Fix the state of the AQO data +SELECT min(reliability),sum(nfeatures),query_text +FROM aqo_data ad, aqo_query_texts aqt +WHERE aqt.queryid = ad.fs +GROUP BY (query_text) ORDER BY (md5(query_text)) +; + DROP TABLE tmp1; SET aqo.mode = 'controlled'; -UPDATE aqo_queries SET learn_aqo = false, use_aqo = false, auto_tuning = false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, false, false, false) +; -- Disable all AQO query classes EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -126,19 +161,24 @@ WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; EXPLAIN SELECT t1.a, t2.b FROM aqo_test1 AS t1, aqo_test0 AS t2 WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; -EXPlAIN SELECT t1.a, t2.b, t3.c +EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; -EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c -FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 -WHERE t1.a = t2.b AND t2.a = t3.b; +SELECT * FROM check_estimated_rows(' + SELECT t1.a AS a, t2.a AS b, t3.a AS c + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + WHERE t1.a = t2.b AND t2.a = t3.b; +'); EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; -UPDATE aqo_queries SET learn_aqo = false, use_aqo = true, auto_tuning = false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, false, true, false) +; -- learn = false, use = true, tuning = false EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -155,22 +195,126 @@ WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; EXPLAIN SELECT t1.a, t2.b FROM aqo_test1 AS t1, aqo_test0 AS t2 WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; -EXPlAIN SELECT t1.a, t2.b, t3.c +EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; -EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c -FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 -WHERE t1.a = t2.b AND t2.a = t3.b; - -EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d -FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 -WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; - +SELECT * FROM check_estimated_rows(' + SELECT t1.a AS a, t2.a AS b, t3.a AS c + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + WHERE t1.a = t2.b AND t2.a = t3.b; +'); + +SELECT * FROM check_estimated_rows(' + SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; +'); + +-- Test limit on number of joins +SET aqo.mode = 'learn'; + +SELECT * FROM aqo_drop_class(0); +SELECT * FROM aqo_drop_class(42); + +-- Remove all data from ML knowledge base +SELECT count(*) FROM ( +SELECT aqo_drop_class(q1.id::bigint) FROM ( + SELECT queryid AS id + FROM aqo_queries WHERE queryid <> 0) AS q1 +) AS q2; +SELECT count(*) FROM aqo_data; + +SET aqo.join_threshold = 3; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); +SELECT count(*) FROM aqo_data; -- Return 0 - do not learn on the queries above + +SELECT * FROM check_estimated_rows(' + SELECT * + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; +'); -- Learn on the query +SELECT count(*) FROM + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 +; +SELECT query_text FROM aqo_query_texts WHERE queryid <> 0; -- Check query + +SET aqo.join_threshold = 1; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); +SELECT * FROM check_estimated_rows( + 'SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); +SELECT count(*) FROM + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 +; -- Learn on a query with one join + +SET aqo.join_threshold = 0; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); +SELECT count(*) FROM + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 +; -- Learn on the query without any joins now + +SET aqo.join_threshold = 1; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1 t1 JOIN aqo_test1 AS t2 USING (a)'); +SELECT count(*) FROM + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 +; -- See one more query in the AQO knowledge base + +SELECT * FROM check_estimated_rows('WITH selected AS (SELECT * FROM aqo_test1 t1) SELECT count(*) FROM selected'); +SELECT * FROM check_estimated_rows(' + WITH selected AS ( + SELECT * FROM aqo_test1 t1 JOIN aqo_test1 AS t2 USING (a) + ) SELECT count(*) FROM selected') +; +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 + +-- InitPlan +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1 WHERE t1.a IN ( + SELECT t2.a FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a) + )'); +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 + +-- SubPlan +SELECT * FROM check_estimated_rows(' + SELECT ( + SELECT avg(t2.a) FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a) AND (t2.a = t1.a) + ) FROM aqo_test1 AS t1; +'); +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 + +-- Subquery +SET aqo.join_threshold = 3; +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1, + (SELECT t2.a FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a)) q1 + WHERE q1.a*t1.a = t1.a + 15; +'); -- Two JOINs, ignore it +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 +SET aqo.join_threshold = 2; +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1, + (SELECT t2.a FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a)) q1 + WHERE q1.a*t1.a = t1.a + 15; +'); -- One JOIN from subquery, another one from the query +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 + +SELECT * FROM check_estimated_rows(' + WITH selected AS ( + SELECT t2.a FROM aqo_test1 t1 JOIN aqo_test1 AS t2 USING (a) + ) SELECT count(*) FROM aqo_test1 t3, selected WHERE selected.a = t3.a') +; -- One JOIN extracted from CTE, another - from a FROM part of the query +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 + +DROP FUNCTION check_estimated_rows; +RESET aqo.join_threshold; DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; - DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; +-- XXX: extension dropping doesn't clear file storage. Do it manually. +SELECT 1 FROM aqo_reset(); + DROP EXTENSION aqo; diff --git a/sql/clean_aqo_data.sql b/sql/clean_aqo_data.sql index b869c037..d2abeb93 100644 --- a/sql/clean_aqo_data.sql +++ b/sql/clean_aqo_data.sql @@ -1,147 +1,134 @@ --- Switch off parallel workers because of unsteadiness. --- Do this in each aqo test separately, so that server regression tests pass --- with aqo's temporary configuration file loaded. -SET max_parallel_workers TO 0; - CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; DROP TABLE IF EXISTS a; DROP TABLE IF EXISTS b; CREATE TABLE a(); SELECT * FROM a; -SELECT clean_aqo_data(); +SELECT 'a'::regclass::oid AS a_oid \gset +SELECT true FROM aqo_cleanup(); /* * lines with a_oid in aqo_data, - * lines with fspace_hash corresponding to a_oid in aqo_queries, - * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_texts, - * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat + * lines with fs corresponding to a_oid in aqo_queries, + * lines with queryid corresponding to a_oid's fs in aqo_query_texts, + * lines with queryid corresponding to a_oid's fs in aqo_query_stat * should remain */ -SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); DROP TABLE a; -SELECT clean_aqo_data(); +SELECT true FROM aqo_cleanup(); /* * lines with a_oid in aqo_data, - * lines with a_oid's fspace_hash EQUAL TO query_hash in aqo_queries, - * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_texts, - * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat, + * lines with a_oid's fs EQUAL TO queryid in aqo_queries, + * lines with queryid corresponding to a_oid's fs in aqo_query_texts, + * lines with queryid corresponding to a_oid's fs in aqo_query_stat, * should be deleted */ -SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash; + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); - -CREATE TABLE a(); -SELECT * FROM a; -SELECT 'a'::regclass::oid AS a_oid \gset --- add manually line with different fspace_hash and query_hash to aqo_queries -INSERT INTO aqo_queries VALUES (:a_oid + 1, 't', 't', :a_oid, 'f'); -DROP TABLE a; -SELECT clean_aqo_data(); --- this line should remain -SELECT count(*) FROM aqo_queries WHERE (fspace_hash = :a_oid AND query_hash = :a_oid + 1); + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); CREATE TABLE a(); CREATE TABLE b(); SELECT * FROM a; SELECT * FROM b; SELECT * FROM b CROSS JOIN a; --- SELECT 'a'::regclass::oid AS a_oid \gset --- SELECT 'b'::regclass::oid AS b_oid \gset +SELECT 'a'::regclass::oid AS a_oid \gset +SELECT 'b'::regclass::oid AS b_oid \gset -- new lines added to aqo_data -SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); -SELECT count(*) FROM aqo_data WHERE 'b' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids))); + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids))); + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); DROP TABLE a; -SELECT clean_aqo_data(); +SELECT true FROM aqo_cleanup(); /* - * lines corresponding to a_oid and both a_oid's fspace_hash deleted in aqo_data, - * lines with fspace_hash corresponding to a_oid deleted in aqo_queries, - * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_texts, - * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_stat, + * lines corresponding to a_oid and both a_oid's fs deleted in aqo_data, + * lines with fs corresponding to a_oid deleted in aqo_queries, + * lines with queryid corresponding to a_oid's fs deleted in aqo_query_texts, + * lines with queryid corresponding to a_oid's fs deleted in aqo_query_stat, */ -SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash; + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); -- lines corresponding to b_oid in all theese tables should remain -SELECT count(*) FROM aqo_data WHERE 'b' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash; + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); DROP TABLE b; -SELECT clean_aqo_data(); +SELECT true FROM aqo_cleanup(); -- lines corresponding to b_oid in theese tables deleted -SELECT count(*) FROM aqo_data WHERE 'b' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash; + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); DROP EXTENSION aqo; \ No newline at end of file diff --git a/sql/feature_subspace.sql b/sql/feature_subspace.sql new file mode 100644 index 00000000..0176a700 --- /dev/null +++ b/sql/feature_subspace.sql @@ -0,0 +1,45 @@ +-- This test related to some issues on feature subspace calculation + +CREATE EXTENSION aqo; + +SET aqo.mode = 'learn'; +SET aqo.join_threshold = 0; +SET aqo.show_details = 'on'; + +CREATE TABLE a AS (SELECT gs AS x FROM generate_series(1,10) AS gs); +CREATE TABLE b AS (SELECT gs AS x FROM generate_series(1,100) AS gs); + +-- +-- Returns string-by-string explain of a query. Made for removing some strings +-- from the explain output. +-- +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; + +-- +-- A LEFT JOIN B isn't equal B LEFT JOIN A. +-- +SELECT str AS result +FROM expln(' +SELECT * FROM a LEFT JOIN b USING (x);') AS str +WHERE str NOT LIKE '%Memory%'; + +-- TODO: Using method of other classes neighbours we get a bad estimation. +SELECT str AS result +FROM expln(' +SELECT * FROM b LEFT JOIN a USING (x);') AS str +WHERE str NOT LIKE '%Memory%'; + +-- Look into the reason: two JOINs from different classes have the same FSS. +SELECT to_char(d1.targets[1], 'FM999.00') AS target FROM aqo_data d1 +JOIN aqo_data d2 ON (d1.fs <> d2.fs AND d1.fss = d2.fss) +WHERE 'a'::regclass = ANY (d1.oids) AND 'b'::regclass = ANY (d1.oids) order by target; + +DROP TABLE a,b CASCADE; +SELECT true FROM aqo_reset(); +DROP EXTENSION aqo; diff --git a/sql/forced_stat_collection.sql b/sql/forced_stat_collection.sql index 81d37f3b..d9fac51a 100644 --- a/sql/forced_stat_collection.sql +++ b/sql/forced_stat_collection.sql @@ -1,12 +1,8 @@ --- Switch off parallel workers because of unsteadiness. --- Do this in each aqo test separately, so that server regression tests pass --- with aqo's temporary configuration file loaded. -SET max_parallel_workers TO 0; - \set citizens 1000 +SET aqo.join_threshold = 0; SET aqo.mode = 'disabled'; -SET aqo.force_collect_stat = 'on'; +SET aqo.force_collect_stat = 'off'; CREATE TABLE person ( id serial PRIMARY KEY, @@ -28,6 +24,7 @@ INSERT INTO person (id,age,gender,passport) ); CREATE EXTENSION aqo; +SET aqo.force_collect_stat = 'on'; SELECT count(*) FROM person WHERE age<18; SELECT count(*) FROM person WHERE age<18 AND passport IS NOT NULL; @@ -42,8 +39,12 @@ AS $$ $$; SELECT learn_aqo,use_aqo,auto_tuning,round_array(cardinality_error_without_aqo) ce,executions_without_aqo nex -FROM aqo_queries JOIN aqo_query_stat USING (query_hash); +FROM aqo_queries AS aq JOIN aqo_query_stat AS aqs +ON aq.queryid = aqs.queryid +ORDER BY (cardinality_error_without_aqo); SELECT query_text FROM aqo_query_texts ORDER BY (md5(query_text)); +DROP TABLE person; +SELECT 1 FROM aqo_reset(); -- Full remove of ML data before the end DROP EXTENSION aqo; diff --git a/sql/gucs.sql b/sql/gucs.sql index c51c3699..9cb13e00 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -1,9 +1,6 @@ --- Switch off parallel workers because of unsteadiness. --- Do this in each aqo test separately, so that server regression tests pass --- with aqo's temporary configuration file loaded. -SET max_parallel_workers TO 0; - CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; + SET aqo.mode = 'learn'; SET aqo.show_details = true; @@ -11,12 +8,30 @@ CREATE TABLE t(x int); INSERT INTO t (x) (SELECT * FROM generate_series(1, 100) AS gs); ANALYZE t; +SELECT true FROM aqo_reset(); -- Remember! DROP EXTENSION doesn't remove any AQO data gathered. +-- Check AQO addons to explain (the only stable data) EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT x FROM t; EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT x FROM t; +SET aqo.mode = 'disabled'; -- Check existence of the interface functions. -SELECT obj_description('public.show_cardinality_errors'::regproc::oid); +SELECT obj_description('aqo_cardinality_error'::regproc::oid); +SELECT obj_description('aqo_execution_time'::regproc::oid); +SELECT obj_description('aqo_drop_class'::regproc::oid); +SELECT obj_description('aqo_cleanup'::regproc::oid); +SELECT obj_description('aqo_reset'::regproc::oid); + +\df aqo_cardinality_error +\df aqo_execution_time +\df aqo_drop_class +\df aqo_cleanup +\df aqo_reset + +-- Check stat reset +SELECT count(*) FROM aqo_query_stat; +SELECT true FROM aqo_reset(); -- Remove one record from all tables +SELECT count(*) FROM aqo_query_stat; DROP EXTENSION aqo; diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql new file mode 100644 index 00000000..07aff8a7 --- /dev/null +++ b/sql/look_a_like.sql @@ -0,0 +1,84 @@ +CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; +SET aqo.mode = 'learn'; +SET aqo.show_details = 'on'; + +SET enable_material = 'off'; + +DROP TABLE IF EXISTS a,b CASCADE; +CREATE TABLE a (x int); +INSERT INTO a (x) SELECT mod(ival,10) FROM generate_series(1,1000) As ival; + +CREATE TABLE b (y int); +INSERT INTO b (y) SELECT mod(ival + 1,10) FROM generate_series(1,1000) As ival; + +-- +-- Returns string-by-string explain of a query. Made for removing some strings +-- from the explain output. +-- +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; + +-- no one predicted rows. we use knowledge cardinalities of the query +-- in the next queries with the same fss_hash +SELECT str AS result +FROM expln(' +SELECT x FROM A where x = 5;') AS str +WHERE str NOT LIKE 'Query Identifier%'; + +SELECT str AS result +FROM expln(' +SELECT x FROM A,B WHERE x = 5 AND A.x = B.y;') AS str +WHERE str NOT LIKE 'Query Identifier%' +; -- Find cardinality for SCAN A(x=5) from a neighbour class, created by the +-- query, executed above. + +SELECT str AS result +FROM expln(' +SELECT x, sum(x) FROM A,B WHERE y = 5 AND A.x = B.y group by(x);') AS str +WHERE str NOT LIKE 'Query Identifier%' +; -- Find the JOIN cardinality from a neighbour class. + +-- cardinality 100 in the first Seq Scan on a +SELECT str AS result +FROM expln(' +SELECT x, sum(x) FROM A WHERE x = 5 group by(x);') AS str +WHERE str NOT LIKE 'Query Identifier%'; + +-- no one predicted rows. we use knowledge cardinalities of the query +-- in the next queries with the same fss_hash +SELECT str AS result +FROM expln(' +SELECT x FROM A where x < 10 group by(x);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +-- cardinality 1000 in Seq Scan on a +SELECT str AS result +FROM expln(' +SELECT x,y FROM A,B WHERE x < 10 AND A.x = B.y;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +-- cardinality 100 in Seq Scan on a and Seq Scan on b +SELECT str AS result +FROM expln(' +SELECT x FROM A,B where x < 10 and y > 10 group by(x);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +-- +-- TODO: +-- Not executed case. What could we do better here? +-- +SELECT str AS result +FROM expln(' +SELECT x,y FROM A,B WHERE x < 10 and y > 10 AND A.x = B.y;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' +; + +RESET enable_material; +DROP TABLE a,b CASCADE; +SELECT true FROM aqo_reset(); +DROP EXTENSION aqo CASCADE; diff --git a/sql/plancache.sql b/sql/plancache.sql index 0d90149f..3b074b90 100644 --- a/sql/plancache.sql +++ b/sql/plancache.sql @@ -1,11 +1,7 @@ -- Tests on interaction of AQO with cached plans. --- Switch off parallel workers because of unsteadiness. --- Do this in each aqo test separately, so that server regression tests pass --- with aqo's temporary configuration file loaded. -SET max_parallel_workers TO 0; - CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'intelligent'; SET aqo.show_details = 'on'; SET aqo.show_hash = 'off'; @@ -31,13 +27,13 @@ BEGIN execute 'EXECUTE fooplan(1)'; END LOOP; - SELECT query_hash FROM aqo_query_texts + SELECT queryid FROM aqo_query_texts WHERE query_text LIKE '%count(*) FROM test WHERE x%' INTO qhash; RETURN QUERY SELECT executions_without_aqo nnex, executions_with_aqo nex, planning_time_with_aqo pt - FROM aqo_query_stat WHERE query_hash = qhash; + FROM aqo_query_stat WHERE queryid = qhash; END $$ LANGUAGE 'plpgsql'; -- The function shows 6 executions without an AQO support (nnex) and diff --git a/sql/relocatable.sql b/sql/relocatable.sql new file mode 100644 index 00000000..e8cc57c3 --- /dev/null +++ b/sql/relocatable.sql @@ -0,0 +1,54 @@ +CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; +SET aqo.mode = 'learn'; -- use this mode for unconditional learning + +CREATE TABLE test AS (SELECT id, 'payload' || id FROM generate_series(1,100) id); +ANALYZE test; + +-- Learn on a query +SELECT count(*) FROM test; +SELECT query_text, learn_aqo, use_aqo, auto_tuning +FROM aqo_query_texts aqt JOIN aqo_queries aq ON (aqt.queryid = aq.queryid) +ORDER BY (md5(query_text)) +; -- Check result. TODO: use aqo_status() + +-- Create a schema and move AQO into it. +CREATE SCHEMA IF NOT EXISTS test; +ALTER EXTENSION aqo SET SCHEMA test; + +-- Do something to be confident that AQO works +SELECT count(*) FROM test; +SELECT count(*) FROM test WHERE id < 10; + +SELECT query_text, learn_aqo, use_aqo, auto_tuning +FROM test.aqo_query_texts aqt JOIN test.aqo_queries aq ON (aqt.queryid = aq.queryid) +ORDER BY (md5(query_text)) +; -- Find out both queries executed above + +-- Add schema which contains AQO to the end of search_path +SELECT set_config('search_path', current_setting('search_path') || ', test', false); + +SELECT count(*) FROM test; +SELECT count(*) FROM test WHERE id < 10; + +SELECT query_text, learn_aqo, use_aqo, auto_tuning +FROM test.aqo_query_texts aqt JOIN test.aqo_queries aq ON (aqt.queryid = aq.queryid) +ORDER BY (md5(query_text)) +; -- Check result. + +/* + * Below, we should check each UI function + */ +SELECT aqo_disable_query(id) FROM ( + SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; +SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries +ORDER BY (learn_aqo, use_aqo, auto_tuning); +SELECT aqo_enable_query(id) FROM ( + SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; +SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries +ORDER BY (learn_aqo, use_aqo, auto_tuning); + +RESET search_path; +DROP TABLE test CASCADE; +DROP SCHEMA IF EXISTS test CASCADE; +DROP EXTENSION IF EXISTS aqo CASCADE; diff --git a/sql/schema.sql b/sql/schema.sql index d3b1e7af..6f5f4454 100644 --- a/sql/schema.sql +++ b/sql/schema.sql @@ -1,8 +1,3 @@ --- Switch off parallel workers because of unsteadiness. --- Do this in each aqo test separately, so that server regression tests pass --- with aqo's temporary configuration file loaded. -SET max_parallel_workers TO 0; - DROP EXTENSION IF EXISTS aqo CASCADE; DROP SCHEMA IF EXISTS test CASCADE; @@ -16,6 +11,7 @@ CREATE EXTENSION aqo; -- fail CREATE SCHEMA IF NOT EXISTS test1; SET search_path TO test1, public; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'intelligent'; CREATE TABLE test (id SERIAL, data TEXT); @@ -25,6 +21,8 @@ SELECT * FROM test; -- Check AQO service relations state after some manipulations -- Exclude fields with hash values from the queries. Hash is depend on -- nodefuncs code which is highly PostgreSQL version specific. -SELECT query_text FROM public.aqo_query_texts; -SELECT learn_aqo, use_aqo, auto_tuning FROM public.aqo_queries; +SELECT query_text FROM aqo_query_texts +ORDER BY (md5(query_text)) DESC; +SELECT learn_aqo, use_aqo, auto_tuning FROM aqo_queries +ORDER BY (learn_aqo, use_aqo, auto_tuning); DROP SCHEMA IF EXISTS test1 CASCADE; diff --git a/sql/statement_timeout.sql b/sql/statement_timeout.sql index 419d85de..9666c1de 100644 --- a/sql/statement_timeout.sql +++ b/sql/statement_timeout.sql @@ -1,6 +1,5 @@ -- Check the learning-on-timeout feature -- For stabilized reproduction autovacuum must be disabled. - CREATE FUNCTION check_estimated_rows(text) RETURNS TABLE (estimated int) LANGUAGE plpgsql AS $$ DECLARE @@ -24,6 +23,7 @@ ANALYZE t; DELETE FROM t WHERE x > 5; -- Force optimizer to make overestimated prediction. CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'off'; SET aqo.learn_statement_timeout = 'on'; @@ -46,7 +46,7 @@ SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); DELETE FROM t WHERE x > 2; ANALYZE t; INSERT INTO t (x) (SELECT * FROM generate_series(3,5) AS x); -TRUNCATE aqo_data; +SELECT 1 FROM aqo_reset(); SET statement_timeout = 800; SELECT *, pg_sleep(1) FROM t; -- Not learned @@ -61,4 +61,6 @@ SELECT *, pg_sleep(1) FROM t; -- Get reliable data SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); DROP TABLE t; +SELECT 1 FROM aqo_reset(); DROP EXTENSION aqo; +DROP FUNCTION check_estimated_rows; diff --git a/sql/temp_tables.sql b/sql/temp_tables.sql new file mode 100644 index 00000000..aba78aba --- /dev/null +++ b/sql/temp_tables.sql @@ -0,0 +1,97 @@ +CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; +SET aqo.mode = 'learn'; + +CREATE TEMP TABLE tt(); +CREATE TABLE pt(); + +-- Ignore queries with the only temp tables +SELECT count(*) FROM tt; +SELECT count(*) FROM tt AS t1, tt AS t2; +SELECT query_text FROM aqo_query_texts; -- Default row should be returned + +-- Should be stored in the ML base +SELECT count(*) FROM pt; +SELECT count(*) FROM pt, tt; +SELECT count(*) FROM pt AS pt1, tt AS tt1, tt AS tt2, pt AS pt2; +SELECT count(*) FROM aqo_data; -- Don't bother about false negatives because of trivial query plans + +DROP TABLE tt; +SELECT * FROM aqo_cleanup(); +SELECT count(*) FROM aqo_data; -- Should return the same as previous call above +DROP TABLE pt; +SELECT * FROM aqo_cleanup(); +SELECT count(*) FROM aqo_data; -- Should be 0 +SELECT query_text FROM aqo_queries aq LEFT JOIN aqo_query_texts aqt +ON aq.queryid = aqt.queryid +ORDER BY (md5(query_text)); -- The only the common class is returned + +-- Test learning on temporary table +CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; +CREATE TEMP TABLE tt AS SELECT -x AS x, (x % 7) AS y FROM generate_series(1,100) AS x; +CREATE TEMP TABLE ttd AS -- the same structure as tt + SELECT -(x*3) AS x, (x % 9) AS y FROM generate_series(1,100) AS x; +ANALYZE pt,tt,ttd; + +create function check_estimated_rows(text) returns table (estimated int, actual int) +language plpgsql as +$$ +declare + ln text; + tmp text[]; + first_row bool := true; +begin + for ln in + execute format('explain analyze %s', $1) + loop + if first_row then + first_row := false; + tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)'); + return query select tmp[1]::int, tmp[2]::int; + end if; + end loop; +end; +$$; + +-- Check: AQO learns on queries with temp tables + +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Estimation failed. Learn. +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Should use AQO estimation +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,ttd WHERE pt.x = ttd.x GROUP BY (pt.x); +'); -- Should use AQO estimation with another temp table of the same structure + +SET aqo.mode = 'forced'; -- Now we use all fss records for each query +DROP TABLE pt; +SELECT * FROM aqo_cleanup(); +CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; +CREATE TEMP TABLE ttd1 AS + SELECT -(x*3) AS x, (x % 9) AS y1 FROM generate_series(1,100) AS x; +ANALYZE; + +-- Check: use AQO knowledge with different temp table of the same structure + +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Estimation failed. Learn. +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Should use AQO estimation +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,ttd WHERE pt.x = ttd.x GROUP BY (pt.x); +'); -- Should use AQO estimation with another temp table of the same structure +SELECT * FROM check_estimated_rows(' + SELECT pt1.x, avg(pt1.y) FROM pt AS pt1,ttd WHERE pt1.x = ttd.x GROUP BY (pt1.x); +'); -- Alias doesn't influence feature space +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,ttd1 WHERE pt.x = ttd1.x GROUP BY (pt.x); +'); -- Don't use AQO for temp table because of different attname + +DROP TABLE pt CASCADE; +SELECT 1 FROM aqo_reset(); +DROP EXTENSION aqo; +DROP FUNCTION check_estimated_rows; diff --git a/sql/top_queries.sql b/sql/top_queries.sql index 520f3ce3..da3817a0 100755 --- a/sql/top_queries.sql +++ b/sql/top_queries.sql @@ -1,25 +1,30 @@ --- Switch off parallel workers because of unsteadiness. --- Do this in each aqo test separately, so that server regression tests pass --- with aqo's temporary configuration file loaded. -SET max_parallel_workers TO 0; - CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'disabled'; SET aqo.force_collect_stat = 'on'; -- --- Dummy test. CREATE TABLE shouldn't find in the ML storage. But a simple --- select must be in. Also here we test on gathering a stat on temp and plain +-- Dummy test. CREATE TABLE shouldn't be found in the ML storage. But a simple +-- select must recorded. Also here we test on gathering a stat on temp and plain -- relations. +-- XXX: Right now we ignore queries if no one permanent table is touched. -- CREATE TEMP TABLE ttt AS SELECT count(*) AS cnt FROM generate_series(1,10); CREATE TABLE ttp AS SELECT count(*) AS cnt FROM generate_series(1,10); -SELECT count(*) AS cnt FROM ttt WHERE cnt % 100 = 0; +SELECT count(*) AS cnt FROM ttt WHERE cnt % 100 = 0; -- Ignore it SELECT count(*) AS cnt FROM ttp WHERE cnt % 100 = 0; -SELECT num FROM top_time_queries(3); +SELECT num FROM aqo_execution_time(true); -- Just for checking, return zero. +SELECT num FROM aqo_execution_time(false); + +-- Without the AQO control queries with and without temp tables are logged. +SELECT query_text,nexecs +FROM aqo_execution_time(false) ce, aqo_query_texts aqt +WHERE ce.id = aqt.queryid +ORDER BY (md5(query_text)); -- --- num of query uses table t2 should be bigger than num of query uses table t1 and be the first +-- num of query which uses the table t2 should be bigger than num of query which +-- uses the table t1 and must be the first -- CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y FROM generate_series(1,1000) AS gs; @@ -28,14 +33,23 @@ CREATE TABLE t2 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; -SELECT num, to_char(error, '9.99EEEE') FROM show_cardinality_errors(false) AS te +SELECT to_char(error, '9.99EEEE') FROM aqo_cardinality_error(false) AS te WHERE te.fshash = ( - SELECT fspace_hash FROM aqo_queries - WHERE aqo_queries.query_hash = ( - SELECT aqo_query_texts.query_hash FROM aqo_query_texts + SELECT fs FROM aqo_queries + WHERE aqo_queries.queryid = ( + SELECT aqo_query_texts.queryid FROM aqo_query_texts WHERE query_text = 'SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1;' ) ); -- Should return zero -SELECT count(*) FROM show_cardinality_errors(true); +SELECT count(*) FROM aqo_cardinality_error(true); + +-- Fix list of logged queries +SELECT query_text,nexecs +FROM aqo_cardinality_error(false) ce, aqo_query_texts aqt +WHERE ce.id = aqt.queryid +ORDER BY (md5(query_text)); + +SELECT 1 FROM aqo_reset(); +DROP EXTENSION aqo; diff --git a/sql/unsupported.sql b/sql/unsupported.sql index 335d8ad2..6446b741 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -1,9 +1,5 @@ --- Switch off parallel workers because of unsteadiness. --- Do this in each aqo test separately, so that server regression tests pass --- with aqo's temporary configuration file loaded. -SET max_parallel_workers TO 0; - CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; @@ -131,7 +127,7 @@ SELECT * FROM (SELECT * FROM t WHERE x > 20) AS t1 USING(x); --- AQO need to predict total fetched tuples in a table. +-- AQO needs to predict total fetched tuples in a table. -- -- At a non-leaf node we have prediction about input tuples - is a number of -- predicted output rows in underlying node. But for Scan nodes we don't have @@ -174,20 +170,22 @@ EXPLAIN (COSTS OFF) -- XXX: Do we stuck into an unstable behavior of an error value? -- Live with this variant of the test for some time. -SELECT - num, to_char(error, '9.99EEEE')::text AS error, query_text -FROM public.show_cardinality_errors(true) cef, aqo_query_texts aqt -WHERE aqt.query_hash = cef.id -ORDER BY (error, md5(query_text)) DESC; +SELECT to_char(error, '9.99EEEE')::text AS error, query_text +FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt +WHERE aqt.queryid = cef.id +ORDER BY (md5(query_text),error) DESC; -DROP TABLE t,t1 CASCADE; +DROP TABLE t,t1 CASCADE; -- delete all tables used in the test -SELECT public.clean_aqo_data(); +SELECT count(*) FROM aqo_data; -- Just to detect some changes in the logic. May some false positives really bother us here? +SELECT * FROM aqo_cleanup(); +SELECT count(*) FROM aqo_data; -- No one row should be returned -- Look for any remaining queries in the ML storage. -SELECT num, to_char(error, '9.99EEEE')::text AS error, query_text -FROM public.show_cardinality_errors(true) cef, aqo_query_texts aqt -WHERE aqt.query_hash = cef.id -ORDER BY (error, md5(query_text)) DESC; +SELECT to_char(error, '9.99EEEE')::text AS error, query_text +FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt +WHERE aqt.queryid = cef.id +ORDER BY (md5(query_text),error) DESC; +SELECT 1 FROM aqo_reset(); DROP EXTENSION aqo; diff --git a/storage.c b/storage.c index 4fcb8ece..47369c20 100644 --- a/storage.c +++ b/storage.c @@ -17,967 +17,2421 @@ #include "postgres.h" -#include "nodes/value.h" -#include "postgres.h" +#include -#include "access/heapam.h" -#include "access/table.h" -#include "access/tableam.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "pgstat.h" #include "aqo.h" +#include "aqo_shared.h" #include "machine_learning.h" #include "preprocessing.h" #include "learn_cache.h" +#include "storage.h" + + +/* AQO storage file names */ +#define PGAQO_STAT_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_statistics.stat" +#define PGAQO_TEXT_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_query_texts.stat" +#define PGAQO_DATA_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_data.stat" +#define PGAQO_QUERIES_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_queries.stat" + +#define AQO_DATA_COLUMNS (7) +#define FormVectorSz(v_name) (form_vector((v_name), (v_name ## _size))) + + +typedef enum { + QUERYID = 0, EXEC_TIME_AQO, EXEC_TIME, PLAN_TIME_AQO, PLAN_TIME, + EST_ERROR_AQO, EST_ERROR, NEXECS_AQO, NEXECS, TOTAL_NCOLS +} aqo_stat_cols; + +typedef enum { + QT_QUERYID = 0, QT_QUERY_STRING, QT_TOTAL_NCOLS +} aqo_qtexts_cols; + +typedef enum { + AD_FS = 0, AD_FSS, AD_NFEATURES, AD_FEATURES, AD_TARGETS, AD_RELIABILITY, + AD_OIDS, AD_TOTAL_NCOLS +} aqo_data_cols; +typedef enum { + AQ_QUERYID = 0, AQ_FS, AQ_LEARN_AQO, AQ_USE_AQO, AQ_AUTO_TUNING, + AQ_TOTAL_NCOLS +} aqo_queries_cols; -#define AQO_DATA_COLUMNS (7) +typedef void* (*form_record_t) (void *ctx, size_t *size); +typedef bool (*deform_record_t) (void *data, size_t size); + + +int querytext_max_size = 1000; +int dsm_size_max = 100; /* in MB */ + +HTAB *stat_htab = NULL; +HTAB *queries_htab = NULL; +HTAB *qtexts_htab = NULL; +dsa_area *qtext_dsa = NULL; +HTAB *data_htab = NULL; +dsa_area *data_dsa = NULL; HTAB *deactivated_queries = NULL; -static ArrayType *form_matrix(double **matrix, int nrows, int ncols); -static void deform_matrix(Datum datum, double **matrix); +/* Used to check data file consistency */ +static const uint32 PGAQO_FILE_HEADER = 123467589; +static const uint32 PGAQO_PG_MAJOR_VERSION = PG_VERSION_NUM / 100; + + +static ArrayType *form_matrix(double *matrix, int nrows, int ncols); +static void dsa_init(void); +static int data_store(const char *filename, form_record_t callback, + long nrecs, void *ctx); +static void data_load(const char *filename, deform_record_t callback, void *ctx); +static size_t _compute_data_dsa(const DataEntry *entry); + +static bool _aqo_stat_remove(uint64 queryid); +static bool _aqo_queries_remove(uint64 queryid); +static bool _aqo_qtexts_remove(uint64 queryid); +static bool _aqo_data_remove(data_key *key); + +PG_FUNCTION_INFO_V1(aqo_query_stat); +PG_FUNCTION_INFO_V1(aqo_query_texts); +PG_FUNCTION_INFO_V1(aqo_data); +PG_FUNCTION_INFO_V1(aqo_queries); +PG_FUNCTION_INFO_V1(aqo_enable_query); +PG_FUNCTION_INFO_V1(aqo_disable_query); +PG_FUNCTION_INFO_V1(aqo_queries_update); +PG_FUNCTION_INFO_V1(aqo_reset); +PG_FUNCTION_INFO_V1(aqo_cleanup); +PG_FUNCTION_INFO_V1(aqo_drop_class); +PG_FUNCTION_INFO_V1(aqo_cardinality_error); +PG_FUNCTION_INFO_V1(aqo_execution_time); -static ArrayType *form_vector(double *vector, int nrows); -static void deform_vector(Datum datum, double *vector, int *nelems); -#define FormVectorSz(v_name) (form_vector((v_name), (v_name ## _size))) -#define DeformVectorSz(datum, v_name) (deform_vector((datum), (v_name), &(v_name ## _size))) +bool +load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool isSafe) +{ + if (isSafe && (!aqo_learn_statement_timeout || !lc_has_fss(fs, fss))) + return load_aqo_data(fs, fss, data, reloids, false); + else + { + Assert(aqo_learn_statement_timeout); + return lc_load_fss(fs, fss, data, reloids); + } +} +bool +update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids, + bool isTimedOut) +{ + if (!isTimedOut) + return aqo_data_store(fs, fss, data, reloids); + else + return lc_update_fss(fs, fss, data, reloids); +} -static bool my_simple_heap_update(Relation relation, - ItemPointer otid, - HeapTuple tup, - bool *update_indexes); +/* + * Forms ArrayType object for storage from simple C-array matrix. + */ +ArrayType * +form_matrix(double *matrix, int nrows, int ncols) +{ + Datum *elems; + ArrayType *array; + int dims[2] = {nrows, ncols}; + int lbs[2]; + int i, + j; + + lbs[0] = lbs[1] = 1; + elems = palloc(sizeof(*elems) * nrows * ncols); + for (i = 0; i < nrows; ++i) + for (j = 0; j < ncols; ++j) + { + elems[i * ncols + j] = Float8GetDatum(matrix[i * ncols + j]); + Assert(!isnan(matrix[i * ncols + j])); + } + + array = construct_md_array(elems, NULL, 2, dims, lbs, + FLOAT8OID, 8, FLOAT8PASSBYVAL, 'd'); + return array; +} /* - * Open an AQO-related relation. - * It should be done carefully because of a possible concurrent DROP EXTENSION - * command. In such case AQO must be disabled in this backend. + * Forms ArrayType object for storage from simple C-array vector. */ -static bool -open_aqo_relation(char *heaprelnspname, char *heaprelname, - char *indrelname, LOCKMODE lockmode, - Relation *hrel, Relation *irel) +static ArrayType * +form_vector(double *vector, int nrows) +{ + Datum *elems; + ArrayType *array; + int dims[1]; + int lbs[1]; + int i; + + dims[0] = nrows; + lbs[0] = 1; + elems = palloc(sizeof(*elems) * nrows); + for (i = 0; i < nrows; ++i) + elems[i] = Float8GetDatum(vector[i]); + array = construct_md_array(elems, NULL, 1, dims, lbs, + FLOAT8OID, 8, FLOAT8PASSBYVAL, 'd'); + return array; +} + +/* Creates a storage for hashes of deactivated queries */ +void +init_deactivated_queries_storage(void) +{ + HASHCTL hash_ctl; + + /* Create the hashtable proper */ + MemSet(&hash_ctl, 0, sizeof(hash_ctl)); + hash_ctl.keysize = sizeof(uint64); + hash_ctl.entrysize = sizeof(uint64); + deactivated_queries = hash_create("aqo_deactivated_queries", + 128, /* start small and extend */ + &hash_ctl, + HASH_ELEM | HASH_BLOBS); +} + +/* Checks whether the query with given hash is deactivated */ +bool +query_is_deactivated(uint64 queryid) +{ + bool found; + + hash_search(deactivated_queries, &queryid, HASH_FIND, &found); + return found; +} + +/* Adds given query hash into the set of hashes of deactivated queries */ +void +add_deactivated_query(uint64 queryid) +{ + hash_search(deactivated_queries, &queryid, HASH_ENTER, NULL); +} + +/* + * Update AQO statistics. + * + * Add a record (or update an existed) to stat storage for the query class. + * Returns a copy of stat entry, allocated in current memory context. Caller is + * in charge to free this struct after usage. + * If stat hash table is full, return NULL and log this fact. + */ +StatEntry * +aqo_stat_store(uint64 queryid, bool use_aqo, + double plan_time, double exec_time, double est_error) +{ + StatEntry *entry; + bool found; + int pos; + bool tblOverflow; + HASHACTION action; + + Assert(stat_htab); + + LWLockAcquire(&aqo_state->stat_lock, LW_EXCLUSIVE); + tblOverflow = hash_get_num_entries(stat_htab) < fs_max_items ? false : true; + action = tblOverflow ? HASH_FIND : HASH_ENTER; + entry = (StatEntry *) hash_search(stat_htab, &queryid, action, &found); + + /* Initialize entry on first usage */ + if (!found) + { + uint64 qid; + + if (action == HASH_FIND) + { + /* + * Hash table is full. To avoid possible problems - don't try to add + * more, just exit + */ + LWLockRelease(&aqo_state->stat_lock); + ereport(LOG, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("[AQO] Stat storage is full. No more feature spaces can be added."), + errhint("Increase value of aqo.fs_max_items on restart of the instance"))); + return NULL; + } + + qid = entry->queryid; + memset(entry, 0, sizeof(StatEntry)); + entry->queryid = qid; + } + + /* Update the entry data */ + + if (use_aqo) + { + Assert(entry->cur_stat_slot_aqo >= 0); + pos = entry->cur_stat_slot_aqo; + if (entry->cur_stat_slot_aqo < STAT_SAMPLE_SIZE - 1) + entry->cur_stat_slot_aqo++; + else + { + size_t sz = (STAT_SAMPLE_SIZE - 1) * sizeof(entry->est_error_aqo[0]); + + Assert(entry->cur_stat_slot_aqo = STAT_SAMPLE_SIZE - 1); + memmove(entry->plan_time_aqo, &entry->plan_time_aqo[1], sz); + memmove(entry->exec_time_aqo, &entry->exec_time_aqo[1], sz); + memmove(entry->est_error_aqo, &entry->est_error_aqo[1], sz); + } + + entry->execs_with_aqo++; + entry->plan_time_aqo[pos] = plan_time; + entry->exec_time_aqo[pos] = exec_time; + entry->est_error_aqo[pos] = est_error; + } + else + { + Assert(entry->cur_stat_slot >= 0); + pos = entry->cur_stat_slot; + if (entry->cur_stat_slot < STAT_SAMPLE_SIZE - 1) + entry->cur_stat_slot++; + else + { + size_t sz = (STAT_SAMPLE_SIZE - 1) * sizeof(entry->est_error[0]); + + Assert(entry->cur_stat_slot = STAT_SAMPLE_SIZE - 1); + memmove(entry->plan_time, &entry->plan_time[1], sz); + memmove(entry->exec_time, &entry->exec_time[1], sz); + memmove(entry->est_error, &entry->est_error[1], sz); + } + + entry->execs_without_aqo++; + entry->plan_time[pos] = plan_time; + entry->exec_time[pos] = exec_time; + entry->est_error[pos] = est_error; + } + entry = memcpy(palloc(sizeof(StatEntry)), entry, sizeof(StatEntry)); + LWLockRelease(&aqo_state->stat_lock); + return entry; +} + +/* + * Returns AQO statistics on controlled query classes. + */ +Datum +aqo_query_stat(PG_FUNCTION_ARGS) { - Oid reloid; - RangeVar *rv; + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupDesc; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Tuplestorestate *tupstore; + Datum values[TOTAL_NCOLS + 1]; + bool nulls[TOTAL_NCOLS + 1]; + HASH_SEQ_STATUS hash_seq; + StatEntry *entry; + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* Switch into long-lived context to construct returned data structures */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + Assert(tupDesc->natts == TOTAL_NCOLS); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupDesc; + + MemoryContextSwitchTo(oldcontext); + + memset(nulls, 0, TOTAL_NCOLS + 1); + LWLockAcquire(&aqo_state->stat_lock, LW_SHARED); + hash_seq_init(&hash_seq, stat_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + memset(nulls, 0, TOTAL_NCOLS + 1); + + values[QUERYID] = Int64GetDatum(entry->queryid); + values[NEXECS] = Int64GetDatum(entry->execs_without_aqo); + values[NEXECS_AQO] = Int64GetDatum(entry->execs_with_aqo); + values[EXEC_TIME_AQO] = PointerGetDatum(form_vector(entry->exec_time_aqo, entry->cur_stat_slot_aqo)); + values[EXEC_TIME] = PointerGetDatum(form_vector(entry->exec_time, entry->cur_stat_slot)); + values[PLAN_TIME_AQO] = PointerGetDatum(form_vector(entry->plan_time_aqo, entry->cur_stat_slot_aqo)); + values[PLAN_TIME] = PointerGetDatum(form_vector(entry->plan_time, entry->cur_stat_slot)); + values[EST_ERROR_AQO] = PointerGetDatum(form_vector(entry->est_error_aqo, entry->cur_stat_slot_aqo)); + values[EST_ERROR] = PointerGetDatum(form_vector(entry->est_error, entry->cur_stat_slot)); + tuplestore_putvalues(tupstore, tupDesc, values, nulls); + } - reloid = RelnameGetRelid(indrelname); - if (!OidIsValid(reloid)) - goto cleanup; + LWLockRelease(&aqo_state->stat_lock); + tuplestore_donestoring(tupstore); + return (Datum) 0; +} - rv = makeRangeVar(heaprelnspname, heaprelname, -1); - *hrel = table_openrv_extended(rv, lockmode, true); - if (*hrel == NULL) - goto cleanup; +static long +aqo_stat_reset(void) +{ + HASH_SEQ_STATUS hash_seq; + StatEntry *entry; + long num_remove = 0; + long num_entries; + + LWLockAcquire(&aqo_state->stat_lock, LW_EXCLUSIVE); + num_entries = hash_get_num_entries(stat_htab); + hash_seq_init(&hash_seq, stat_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + if (hash_search(stat_htab, &entry->queryid, HASH_REMOVE, NULL) == NULL) + elog(ERROR, "[AQO] hash table corrupted"); + num_remove++; + } + aqo_state->stat_changed = true; + LWLockRelease(&aqo_state->stat_lock); + + if (num_remove != num_entries) + elog(ERROR, "[AQO] Stat memory storage is corrupted or parallel access without a lock was detected."); + + aqo_stat_flush(); + + return num_remove; +} + +static void * +_form_stat_record_cb(void *ctx, size_t *size) +{ + HASH_SEQ_STATUS *hash_seq = (HASH_SEQ_STATUS *) ctx; + StatEntry *entry; + + *size = sizeof(StatEntry); + entry = hash_seq_search(hash_seq); + if (entry == NULL) + return NULL; + + return memcpy(palloc(*size), entry, *size); +} + +/* Implement data flushing according to pgss_shmem_shutdown() */ + +void +aqo_stat_flush(void) +{ + HASH_SEQ_STATUS hash_seq; + int ret; + long entries; + + LWLockAcquire(&aqo_state->stat_lock, LW_SHARED); + entries = hash_get_num_entries(stat_htab); + hash_seq_init(&hash_seq, stat_htab); + ret = data_store(PGAQO_STAT_FILE, _form_stat_record_cb, entries, + (void *) &hash_seq); + if (ret != 0) + hash_seq_term(&hash_seq); + + LWLockRelease(&aqo_state->stat_lock); +} + +static void * +_form_qtext_record_cb(void *ctx, size_t *size) +{ + HASH_SEQ_STATUS *hash_seq = (HASH_SEQ_STATUS *) ctx; + QueryTextEntry *entry; + void *data; + char *query_string; + char *ptr; + + entry = hash_seq_search(hash_seq); + if (entry == NULL) + return NULL; + + Assert(DsaPointerIsValid(entry->qtext_dp)); + query_string = dsa_get_address(qtext_dsa, entry->qtext_dp); + Assert(query_string != NULL); + *size = sizeof(entry->queryid) + strlen(query_string) + 1; + ptr = data = palloc(*size); + Assert(ptr != NULL); + memcpy(ptr, &entry->queryid, sizeof(entry->queryid)); + ptr += sizeof(entry->queryid); + memcpy(ptr, query_string, strlen(query_string) + 1); + return data; +} + +void +aqo_qtexts_flush(void) +{ + HASH_SEQ_STATUS hash_seq; + int ret; + long entries; + + dsa_init(); + LWLockAcquire(&aqo_state->qtexts_lock, LW_SHARED); + + if (!aqo_state->qtexts_changed) + /* XXX: mull over forced mode. */ + goto end; + + entries = hash_get_num_entries(qtexts_htab); + hash_seq_init(&hash_seq, qtexts_htab); + ret = data_store(PGAQO_TEXT_FILE, _form_qtext_record_cb, entries, + (void *) &hash_seq); + if (ret != 0) + hash_seq_term(&hash_seq); + aqo_state->qtexts_changed = false; + +end: + LWLockRelease(&aqo_state->qtexts_lock); +} + +/* + * Getting a hash table iterator, return a newly allocated memory chunk and its + * size for subsequent writing into storage. + */ +static void * +_form_data_record_cb(void *ctx, size_t *size) +{ + HASH_SEQ_STATUS *hash_seq = (HASH_SEQ_STATUS *) ctx; + DataEntry *entry; + char *data; + char *ptr, + *dsa_ptr; + size_t sz; + + entry = hash_seq_search(hash_seq); + if (entry == NULL) + return NULL; + + /* Size of data is DataEntry (without DSA pointer) plus size of DSA chunk */ + sz = offsetof(DataEntry, data_dp) + _compute_data_dsa(entry); + ptr = data = palloc(sz); + + /* Put the data into the chunk */ + + /* Plane copy of all bytes of hash table entry */ + memcpy(ptr, entry, offsetof(DataEntry, data_dp)); + ptr += offsetof(DataEntry, data_dp); + + Assert(DsaPointerIsValid(entry->data_dp)); + dsa_ptr = (char *) dsa_get_address(data_dsa, entry->data_dp); + Assert((sz - (ptr - data)) == _compute_data_dsa(entry)); + memcpy(ptr, dsa_ptr, sz - (ptr - data)); + *size = sz; + return data; +} + +void +aqo_data_flush(void) +{ + HASH_SEQ_STATUS hash_seq; + int ret; + long entries; + + dsa_init(); + LWLockAcquire(&aqo_state->data_lock, LW_SHARED); + + if (!aqo_state->data_changed) + /* XXX: mull over forced mode. */ + goto end; + + entries = hash_get_num_entries(data_htab); + hash_seq_init(&hash_seq, data_htab); + ret = data_store(PGAQO_DATA_FILE, _form_data_record_cb, entries, + (void *) &hash_seq); + if (ret != 0) + /* + * Something happened and storing procedure hasn't finished walking + * along all records of the hash table. + */ + hash_seq_term(&hash_seq); + else + aqo_state->data_changed = false; +end: + LWLockRelease(&aqo_state->data_lock); +} + +static void * +_form_queries_record_cb(void *ctx, size_t *size) +{ + HASH_SEQ_STATUS *hash_seq = (HASH_SEQ_STATUS *) ctx; + QueriesEntry *entry; - /* Try to open index relation carefully. */ - *irel = try_relation_open(reloid, lockmode); - if (*irel == NULL) + *size = sizeof(QueriesEntry); + entry = hash_seq_search(hash_seq); + if (entry == NULL) + return NULL; + + return memcpy(palloc(*size), entry, *size); +} + +void +aqo_queries_flush(void) +{ + HASH_SEQ_STATUS hash_seq; + int ret; + long entries; + + LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); + entries = hash_get_num_entries(queries_htab); + hash_seq_init(&hash_seq, queries_htab); + ret = data_store(PGAQO_QUERIES_FILE, _form_queries_record_cb, entries, + (void *) &hash_seq); + if (ret != 0) + hash_seq_term(&hash_seq); + + LWLockRelease(&aqo_state->queries_lock); +} + +static int +data_store(const char *filename, form_record_t callback, + long nrecs, void *ctx) +{ + FILE *file; + size_t size; + uint32 counter = 0; + void *data; + char *tmpfile; + + tmpfile = psprintf("%s.tmp", filename); + file = AllocateFile(tmpfile, PG_BINARY_W); + if (file == NULL) + goto error; + + if (fwrite(&PGAQO_FILE_HEADER, sizeof(uint32), 1, file) != 1 || + fwrite(&PGAQO_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1 || + fwrite(&nrecs, sizeof(long), 1, file) != 1) + goto error; + + while ((data = callback(ctx, &size)) != NULL) + { + /* TODO: Add CRC code ? */ + if (fwrite(&size, sizeof(size), 1, file) != 1 || + fwrite(data, size, 1, file) != 1) + goto error; + counter++; + } + + Assert(counter == nrecs); + if (FreeFile(file)) { - relation_close(*hrel, lockmode); - goto cleanup; + file = NULL; + goto error; } + (void) durable_rename(tmpfile, filename, LOG); + elog(LOG, "[AQO] %d records stored in file %s.", counter, filename); + return 0; + +error: + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not write AQO file \"%s\": %m", tmpfile))); + + if (file) + FreeFile(file); + unlink(tmpfile); + pfree(tmpfile); + return -1; +} + +static bool +_deform_stat_record_cb(void *data, size_t size) +{ + bool found; + StatEntry *entry; + uint64 queryid; + + Assert(LWLockHeldByMeInMode(&aqo_state->stat_lock, LW_EXCLUSIVE)); + Assert(size == sizeof(StatEntry)); + + queryid = ((StatEntry *) data)->queryid; + entry = (StatEntry *) hash_search(stat_htab, &queryid, HASH_ENTER, &found); + Assert(!found && entry); + memcpy(entry, data, sizeof(StatEntry)); return true; +} -cleanup: - /* - * Absence of any AQO-related table tell us that someone executed - * a 'DROP EXTENSION aqo' command. We disable AQO for all future queries - * in this backend. For performance reasons we do it locally. - * Clear profiling hash table. - * Also, we gently disable AQO for the rest of the current query - * execution process. - */ - aqo_enabled = false; - disable_aqo_for_query(); +void +aqo_stat_load(void) +{ + Assert(!LWLockHeldByMe(&aqo_state->stat_lock)); + + LWLockAcquire(&aqo_state->stat_lock, LW_EXCLUSIVE); + + /* Load on postmaster sturtup. So no any concurrent actions possible here. */ + Assert(hash_get_num_entries(stat_htab) == 0); + + data_load(PGAQO_STAT_FILE, _deform_stat_record_cb, NULL); + + LWLockRelease(&aqo_state->stat_lock); +} + +static bool +_check_dsa_validity(dsa_pointer ptr) +{ + if (DsaPointerIsValid(ptr)) + return true; + + elog(LOG, "[AQO] DSA Pointer isn't valid. Is the memory limit exceeded?"); return false; } +static bool +_deform_qtexts_record_cb(void *data, size_t size) +{ + bool found; + QueryTextEntry *entry; + uint64 queryid = *(uint64 *) data; + char *query_string = (char *) data + sizeof(queryid); + size_t len = size - sizeof(queryid); + char *strptr; + + Assert(LWLockHeldByMeInMode(&aqo_state->qtexts_lock, LW_EXCLUSIVE)); + Assert(strlen(query_string) + 1 == len); + entry = (QueryTextEntry *) hash_search(qtexts_htab, &queryid, + HASH_ENTER, &found); + Assert(!found); + + entry->qtext_dp = dsa_allocate(qtext_dsa, len); + if (!_check_dsa_validity(entry->qtext_dp)) + { + /* + * DSA stuck into problems. Rollback changes. Return false in belief + * that caller recognize it and don't try to call us more. + */ + (void) hash_search(qtexts_htab, &queryid, HASH_REMOVE, NULL); + return false; + } + + strptr = (char *) dsa_get_address(qtext_dsa, entry->qtext_dp); + strlcpy(strptr, query_string, len); + return true; +} + +void +aqo_qtexts_load(void) +{ + uint64 queryid = 0; + bool found; + + Assert(!LWLockHeldByMe(&aqo_state->qtexts_lock)); + Assert(qtext_dsa != NULL); + + LWLockAcquire(&aqo_state->qtexts_lock, LW_EXCLUSIVE); + + if (hash_get_num_entries(qtexts_htab) != 0) + { + /* Someone have done it concurrently. */ + elog(LOG, "[AQO] Another backend have loaded query texts concurrently."); + LWLockRelease(&aqo_state->qtexts_lock); + return; + } + + data_load(PGAQO_TEXT_FILE, _deform_qtexts_record_cb, NULL); + + /* Check existence of default feature space */ + (void) hash_search(qtexts_htab, &queryid, HASH_FIND, &found); + + aqo_state->qtexts_changed = false; /* mem data consistent with disk */ + LWLockRelease(&aqo_state->qtexts_lock); + + if (!found) + { + if (!aqo_qtext_store(0, "COMMON feature space (do not delete!)")) + elog(PANIC, "[AQO] DSA Initialization was unsuccessful"); + } +} + /* - * Returns whether the query with given hash is in aqo_queries. - * If yes, returns the content of the first line with given hash. - * - * Use dirty snapshot to see all (include in-progress) data. We want to prevent - * wait in the XactLockTableWait routine. - * If query is found in the knowledge base, fill the query context struct. + * Getting a data chunk from a caller, add a record into the 'ML data' + * shmem hash table. Allocate and fill DSA chunk for variadic part of the data. */ -bool -find_query(uint64 qhash, QueryContextData *ctx) -{ - Relation hrel; - Relation irel; - HeapTuple tuple; - TupleTableSlot *slot; - bool shouldFree = true; - IndexScanDesc scan; - ScanKeyData key; - SnapshotData snap; - bool find_ok = false; - Datum values[5]; - bool nulls[5] = {false, false, false, false, false}; - - if (!open_aqo_relation("public", "aqo_queries", "aqo_queries_query_hash_idx", - AccessShareLock, &hrel, &irel)) +static bool +_deform_data_record_cb(void *data, size_t size) +{ + bool found; + DataEntry *fentry = (DataEntry *) data; /*Depends on a platform? */ + DataEntry *entry; + size_t sz; + char *ptr = (char *) data, + *dsa_ptr; + + Assert(ptr != NULL); + Assert(LWLockHeldByMeInMode(&aqo_state->data_lock, LW_EXCLUSIVE)); + + entry = (DataEntry *) hash_search(data_htab, &fentry->key, + HASH_ENTER, &found); + Assert(!found); + + /* Copy fixed-size part of entry byte-by-byte even with caves */ + memcpy(entry, fentry, offsetof(DataEntry, data_dp)); + ptr += offsetof(DataEntry, data_dp); + + sz = _compute_data_dsa(entry); + Assert(sz + offsetof(DataEntry, data_dp) == size); + entry->data_dp = dsa_allocate(data_dsa, sz); + + if (!_check_dsa_validity(entry->data_dp)) + { + /* + * DSA stuck into problems. Rollback changes. Return false in belief + * that caller recognize it and don't try to call us more. + */ + (void) hash_search(data_htab, &fentry->key, HASH_REMOVE, NULL); return false; + } - InitDirtySnapshot(snap); - scan = index_beginscan(hrel, irel, &snap, 1, 0); - ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(qhash)); + dsa_ptr = (char *) dsa_get_address(data_dsa, entry->data_dp); + Assert(dsa_ptr != NULL); + memcpy(dsa_ptr, ptr, sz); + return true; +} + +void +aqo_data_load(void) +{ + Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + Assert(data_dsa != NULL); - index_rescan(scan, &key, 1, NULL, 0); - slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); - find_ok = index_getnext_slot(scan, ForwardScanDirection, slot); + LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); - if (find_ok) + if (hash_get_num_entries(data_htab) != 0) { - tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); - Assert(shouldFree != true); - heap_deform_tuple(tuple, hrel->rd_att, values, nulls); + /* Someone have done it concurrently. */ + elog(LOG, "[AQO] Another backend have loaded query data concurrently."); + LWLockRelease(&aqo_state->data_lock); + return; + } + + data_load(PGAQO_DATA_FILE, _deform_data_record_cb, NULL); - /* Fill query context data */ - ctx->learn_aqo = DatumGetBool(values[1]); - ctx->use_aqo = DatumGetBool(values[2]); - ctx->fspace_hash = DatumGetInt64(values[3]); - ctx->auto_tuning = DatumGetBool(values[4]); - ctx->collect_stat = query_context.auto_tuning; + aqo_state->data_changed = false; /* mem data is consistent with disk */ + LWLockRelease(&aqo_state->data_lock); +} + +static bool +_deform_queries_record_cb(void *data, size_t size) +{ + bool found; + QueriesEntry *entry; + uint64 queryid; + + Assert(LWLockHeldByMeInMode(&aqo_state->queries_lock, LW_EXCLUSIVE)); + Assert(size == sizeof(QueriesEntry)); + + queryid = ((QueriesEntry *) data)->queryid; + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_ENTER, &found); + Assert(!found); + memcpy(entry, data, sizeof(QueriesEntry)); + return true; +} + +void +aqo_queries_load(void) +{ + bool found; + uint64 queryid = 0; + + Assert(!LWLockHeldByMe(&aqo_state->queries_lock)); + + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + + /* Load on postmaster sturtup. So no any concurrent actions possible here. */ + Assert(hash_get_num_entries(queries_htab) == 0); + + data_load(PGAQO_QUERIES_FILE, _deform_queries_record_cb, NULL); + + /* Check existence of default feature space */ + (void) hash_search(queries_htab, &queryid, HASH_FIND, &found); + + LWLockRelease(&aqo_state->queries_lock); + if (!found) + { + if (!aqo_queries_store(0, 0, 0, 0, 0)) + elog(PANIC, "[AQO] aqo_queries initialization was unsuccessful"); + } +} + +static void +data_load(const char *filename, deform_record_t callback, void *ctx) +{ + FILE *file; + long i; + uint32 header; + int32 pgver; + long num; + + file = AllocateFile(filename, PG_BINARY_R); + if (file == NULL) + { + if (errno != ENOENT) + goto read_error; + return; } - ExecDropSingleTupleTableSlot(slot); - index_endscan(scan); - index_close(irel, AccessShareLock); - table_close(hrel, AccessShareLock); - return find_ok; + if (fread(&header, sizeof(uint32), 1, file) != 1 || + fread(&pgver, sizeof(uint32), 1, file) != 1 || + fread(&num, sizeof(long), 1, file) != 1) + goto read_error; + + if (header != PGAQO_FILE_HEADER || pgver != PGAQO_PG_MAJOR_VERSION) + goto data_error; + + for (i = 0; i < num; i++) + { + void *data; + size_t size; + bool res; + + if (fread(&size, sizeof(size), 1, file) != 1) + goto read_error; + data = palloc(size); + if (fread(data, size, 1, file) != 1) + goto read_error; + res = callback(data, size); + + if (!res) + { + /* Error detected. Do not try to read tails of the storage. */ + elog(LOG, "[AQO] Because of an error skip %ld storage records.", + num - i); + break; + } + } + + FreeFile(file); + unlink(filename); + + elog(LOG, "[AQO] %ld records loaded from file %s.", num, filename); + return; + +read_error: + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", filename))); + goto fail; +data_error: + ereport(LOG, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("ignoring invalid data in file \"%s\"", filename))); +fail: + if (file) + FreeFile(file); + unlink(filename); +} + +static void +on_shmem_shutdown(int code, Datum arg) +{ + aqo_qtexts_flush(); + aqo_data_flush(); } /* - * Update query status in intelligent mode. - * - * Do it gently: to prevent possible deadlocks, revert this update if any - * concurrent transaction is doing it. - * - * Such logic is possible, because this update is performed by AQO itself. It is - * not break any learning logic besides possible additional learning iterations. + * Initialize DSA memory for AQO shared data with variable length. + * On first call, create DSA segments and load data into hash table and DSA + * from disk. + */ +static void +dsa_init() +{ + MemoryContext old_context; + + if (qtext_dsa) + return; + + Assert(data_dsa == NULL && data_dsa == NULL); + old_context = MemoryContextSwitchTo(TopMemoryContext); + LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); + + if (aqo_state->qtexts_dsa_handler == DSM_HANDLE_INVALID) + { + Assert(aqo_state->data_dsa_handler == DSM_HANDLE_INVALID); + + qtext_dsa = dsa_create(aqo_state->qtext_trancheid); + Assert(qtext_dsa != NULL); + + if (dsm_size_max > 0) + dsa_set_size_limit(qtext_dsa, dsm_size_max * 1024 * 1024); + + dsa_pin(qtext_dsa); + aqo_state->qtexts_dsa_handler = dsa_get_handle(qtext_dsa); + + data_dsa = qtext_dsa; + aqo_state->data_dsa_handler = dsa_get_handle(data_dsa); + + /* Load and initialize query texts hash table */ + aqo_qtexts_load(); + aqo_data_load(); + } + else + { + qtext_dsa = dsa_attach(aqo_state->qtexts_dsa_handler); + data_dsa = qtext_dsa; + } + + dsa_pin_mapping(qtext_dsa); + MemoryContextSwitchTo(old_context); + LWLockRelease(&aqo_state->lock); + + before_shmem_exit(on_shmem_shutdown, (Datum) 0); +} + +/* ************************************************************************** */ + +/* + * XXX: Maybe merge with aqo_queries ? */ bool -update_query(uint64 qhash, uint64 fhash, - bool learn_aqo, bool use_aqo, bool auto_tuning) -{ - Relation hrel; - Relation irel; - TupleTableSlot *slot; - HeapTuple tuple, - nw_tuple; - Datum values[5]; - bool isnull[5] = { false, false, false, false, false }; - bool replace[5] = { false, true, true, true, true }; - bool shouldFree; - bool result = true; - bool update_indexes; - IndexScanDesc scan; - ScanKeyData key; - SnapshotData snap; - - /* Couldn't allow to write if xact must be read-only. */ - if (XactReadOnly) - return false; +aqo_qtext_store(uint64 queryid, const char *query_string) +{ + QueryTextEntry *entry; + bool found; + bool tblOverflow; + HASHACTION action; + + Assert(!LWLockHeldByMe(&aqo_state->qtexts_lock)); - if (!open_aqo_relation("public", "aqo_queries", "aqo_queries_query_hash_idx", - RowExclusiveLock, &hrel, &irel)) + if (query_string == NULL || querytext_max_size == 0) return false; + dsa_init(); + + LWLockAcquire(&aqo_state->qtexts_lock, LW_EXCLUSIVE); + + /* Check hash table overflow */ + tblOverflow = hash_get_num_entries(qtexts_htab) < fs_max_items ? false : true; + action = tblOverflow ? HASH_FIND : HASH_ENTER; + + entry = (QueryTextEntry *) hash_search(qtexts_htab, &queryid, action, + &found); + + /* Initialize entry on first usage */ + if (!found) + { + size_t size = strlen(query_string) + 1; + char *strptr; + + if (action == HASH_FIND) + { + /* + * Hash table is full. To avoid possible problems - don't try to add + * more, just exit + */ + LWLockRelease(&aqo_state->qtexts_lock); + ereport(LOG, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("[AQO] Query texts storage is full. No more feature spaces can be added."), + errhint("Increase value of aqo.fs_max_items on restart of the instance"))); + return false; + } + + entry->queryid = queryid; + size = size > querytext_max_size ? querytext_max_size : size; + entry->qtext_dp = dsa_allocate(qtext_dsa, size); + + if (!_check_dsa_validity(entry->qtext_dp)) + { + /* + * DSA stuck into problems. Rollback changes. Return false in belief + * that caller recognize it and don't try to call us more. + */ + (void) hash_search(qtexts_htab, &queryid, HASH_REMOVE, NULL); + LWLockRelease(&aqo_state->qtexts_lock); + return false; + } + + strptr = (char *) dsa_get_address(qtext_dsa, entry->qtext_dp); + strlcpy(strptr, query_string, size); + aqo_state->qtexts_changed = true; + } + LWLockRelease(&aqo_state->qtexts_lock); + return true; +} + +Datum +aqo_query_texts(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupDesc; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Tuplestorestate *tupstore; + Datum values[QT_TOTAL_NCOLS]; + bool nulls[QT_TOTAL_NCOLS]; + HASH_SEQ_STATUS hash_seq; + QueryTextEntry *entry; + + Assert(!LWLockHeldByMe(&aqo_state->qtexts_lock)); + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* Switch into long-lived context to construct returned data structures */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + Assert(tupDesc->natts == QT_TOTAL_NCOLS); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupDesc; + + MemoryContextSwitchTo(oldcontext); + + dsa_init(); + memset(nulls, 0, QT_TOTAL_NCOLS); + LWLockAcquire(&aqo_state->qtexts_lock, LW_SHARED); + hash_seq_init(&hash_seq, qtexts_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + char *ptr; + + Assert(DsaPointerIsValid(entry->qtext_dp)); + ptr = dsa_get_address(qtext_dsa, entry->qtext_dp); + values[QT_QUERYID] = Int64GetDatum(entry->queryid); + values[QT_QUERY_STRING] = CStringGetTextDatum(ptr); + tuplestore_putvalues(tupstore, tupDesc, values, nulls); + } + + LWLockRelease(&aqo_state->qtexts_lock); + tuplestore_donestoring(tupstore); + return (Datum) 0; +} + +static bool +_aqo_stat_remove(uint64 queryid) +{ + bool found; + + Assert(!LWLockHeldByMe(&aqo_state->stat_lock)); + LWLockAcquire(&aqo_state->stat_lock, LW_EXCLUSIVE); + (void) hash_search(stat_htab, &queryid, HASH_FIND, &found); + + if (found) + { + (void) hash_search(stat_htab, &queryid, HASH_REMOVE, NULL); + aqo_state->stat_changed = true; + } + + LWLockRelease(&aqo_state->stat_lock); + return found; +} + +static bool +_aqo_queries_remove(uint64 queryid) +{ + bool found; + + Assert(!LWLockHeldByMe(&aqo_state->queries_lock)); + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + (void) hash_search(queries_htab, &queryid, HASH_FIND, &found); + + if (found) + { + (void) hash_search(queries_htab, &queryid, HASH_REMOVE, NULL); + aqo_state->queries_changed = true; + } + + LWLockRelease(&aqo_state->queries_lock); + return found; +} + +static bool +_aqo_qtexts_remove(uint64 queryid) +{ + bool found = false; + QueryTextEntry *entry; + + dsa_init(); + + Assert(!LWLockHeldByMe(&aqo_state->qtexts_lock)); + LWLockAcquire(&aqo_state->qtexts_lock, LW_EXCLUSIVE); + /* - * Start an index scan. Use dirty snapshot to check concurrent updates that - * can be made before, but still not visible. + * Look for a record with this queryid. DSA fields must be freed before + * deletion of the record. */ - InitDirtySnapshot(snap); - scan = index_beginscan(hrel, irel, &snap, 1, 0); - ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(qhash)); + entry = (QueryTextEntry *) hash_search(qtexts_htab, &queryid, HASH_FIND, + &found); + if (found) + { + /* Free DSA memory, allocated for this record */ + Assert(DsaPointerIsValid(entry->qtext_dp)); + dsa_free(qtext_dsa, entry->qtext_dp); - index_rescan(scan, &key, 1, NULL, 0); - slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); + (void) hash_search(qtexts_htab, &queryid, HASH_REMOVE, NULL); + aqo_state->qtexts_changed = true; + } - values[0] = Int64GetDatum(qhash); - values[1] = BoolGetDatum(learn_aqo); - values[2] = BoolGetDatum(use_aqo); - values[3] = Int64GetDatum(fhash); - values[4] = BoolGetDatum(auto_tuning); + LWLockRelease(&aqo_state->qtexts_lock); + return found; +} - if (!index_getnext_slot(scan, ForwardScanDirection, slot)) +static bool +_aqo_data_remove(data_key *key) +{ + DataEntry *entry; + bool found; + + Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); + + entry = (DataEntry *) hash_search(data_htab, key, HASH_FIND, &found); + if (found) { - /* New tuple for the ML knowledge base */ - tuple = heap_form_tuple(RelationGetDescr(hrel), values, isnull); - simple_heap_insert(hrel, tuple); - my_index_insert(irel, values, isnull, &(tuple->t_self), - hrel, UNIQUE_CHECK_YES); + /* Free DSA memory, allocated for this record */ + Assert(DsaPointerIsValid(entry->data_dp)); + dsa_free(data_dsa, entry->data_dp); + entry->data_dp = InvalidDsaPointer; + + if (hash_search(data_htab, key, HASH_REMOVE, NULL) == NULL) + elog(PANIC, "[AQO] Inconsistent data hash table"); + aqo_state->data_changed = true; } - else if (!TransactionIdIsValid(snap.xmin) && - !TransactionIdIsValid(snap.xmax)) + + LWLockRelease(&aqo_state->data_lock); + return found; +} + +static long +aqo_qtexts_reset(void) +{ + HASH_SEQ_STATUS hash_seq; + QueryTextEntry *entry; + long num_remove = 0; + long num_entries; + + dsa_init(); + + Assert(!LWLockHeldByMe(&aqo_state->qtexts_lock)); + LWLockAcquire(&aqo_state->qtexts_lock, LW_EXCLUSIVE); + num_entries = hash_get_num_entries(qtexts_htab); + hash_seq_init(&hash_seq, qtexts_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) { - /* - * Update existed data. No one concurrent transaction doesn't update this - * right now. - */ - tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); - Assert(shouldFree != true); - nw_tuple = heap_modify_tuple(tuple, hrel->rd_att, values, isnull, replace); + if (entry->queryid == 0) + continue; + + Assert(DsaPointerIsValid(entry->qtext_dp)); + dsa_free(qtext_dsa, entry->qtext_dp); + if (hash_search(qtexts_htab, &entry->queryid, HASH_REMOVE, NULL) == NULL) + elog(ERROR, "[AQO] hash table corrupted"); + num_remove++; + } + aqo_state->qtexts_changed = true; + LWLockRelease(&aqo_state->qtexts_lock); + if (num_remove != num_entries - 1) + elog(ERROR, "[AQO] Query texts memory storage is corrupted or parallel access without a lock was detected."); + + aqo_qtexts_flush(); + + return num_remove; +} + +static size_t +_compute_data_dsa(const DataEntry *entry) +{ + size_t size = sizeof(data_key); /* header's size */ + + size += sizeof(double) * entry->rows * entry->cols; /* matrix */ + size += 2 * sizeof(double) * entry->rows; /* targets, rfactors */ + + /* Calculate memory size needed to store relation names */ + size += entry->nrels * sizeof(Oid); + return size; +} - if (my_simple_heap_update(hrel, &(nw_tuple->t_self), nw_tuple, - &update_indexes)) +/* + * Insert new record or update existed in the AQO data storage. + * Return true if data was changed. + */ +bool +aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) +{ + DataEntry *entry; + bool found; + data_key key = {.fs = fs, .fss = fss}; + int i; + char *ptr; + ListCell *lc; + size_t size; + bool tblOverflow; + HASHACTION action; + + Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + + dsa_init(); + + LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); + + /* Check hash table overflow */ + tblOverflow = hash_get_num_entries(data_htab) < fss_max_items ? false : true; + action = tblOverflow ? HASH_FIND : HASH_ENTER; + + entry = (DataEntry *) hash_search(data_htab, &key, action, &found); + + /* Initialize entry on first usage */ + if (!found) + { + if (action == HASH_FIND) { - if (update_indexes) - my_index_insert(irel, values, isnull, - &(nw_tuple->t_self), - hrel, UNIQUE_CHECK_YES); - result = true; + /* + * Hash table is full. To avoid possible problems - don't try to add + * more, just exit + */ + LWLockRelease(&aqo_state->data_lock); + ereport(LOG, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("[AQO] Data storage is full. No more data can be added."), + errhint("Increase value of aqo.fss_max_items on restart of the instance"))); + return false; } - else + + entry->cols = data->cols; + entry->rows = data->rows; + entry->nrels = list_length(reloids); + + size = _compute_data_dsa(entry); + entry->data_dp = dsa_allocate0(data_dsa, size); + + if (!_check_dsa_validity(entry->data_dp)) { /* - * Ooops, somebody concurrently updated the tuple. It is possible - * only in the case of changes made by third-party code. + * DSA stuck into problems. Rollback changes. Return false in belief + * that caller recognize it and don't try to call us more. */ - elog(ERROR, "AQO feature space data for signature ("UINT64_FORMAT \ - ", "UINT64_FORMAT") concurrently" - " updated by a stranger backend.", - qhash, fhash); - result = false; + (void) hash_search(data_htab, &key, HASH_REMOVE, NULL); + LWLockRelease(&aqo_state->data_lock); + return false; } } - else + + Assert(DsaPointerIsValid(entry->data_dp)); + Assert(entry->rows <= data->rows); /* Reserved for the future features */ + + if (entry->cols != data->cols || entry->nrels != list_length(reloids)) + { + /* Collision happened? */ + elog(LOG, "[AQO] Does a collision happened? Check it if possible (fs: " + UINT64_FORMAT", fss: %d).", + fs, fss); + goto end; + } + + if (entry->rows < data->rows) + { + entry->rows = data->rows; + size = _compute_data_dsa(entry); + + /* Need to re-allocate DSA chunk */ + dsa_free(data_dsa, entry->data_dp); + entry->data_dp = dsa_allocate0(data_dsa, size); + + if (!_check_dsa_validity(entry->data_dp)) + { + /* + * DSA stuck into problems. Rollback changes. Return false in belief + * that caller recognize it and don't try to call us more. + */ + (void) hash_search(data_htab, &key, HASH_REMOVE, NULL); + LWLockRelease(&aqo_state->data_lock); + return false; + } + } + ptr = (char *) dsa_get_address(data_dsa, entry->data_dp); + Assert(ptr != NULL); + + /* + * Copy AQO data into allocated DSA segment + */ + + memcpy(ptr, &key, sizeof(data_key)); /* Just for debug */ + ptr += sizeof(data_key); + if (entry->cols > 0) + { + for (i = 0; i < entry->rows; i++) + { + Assert(data->matrix[i]); + memcpy(ptr, data->matrix[i], sizeof(double) * data->cols); + ptr += sizeof(double) * data->cols; + } + } + /* copy targets into DSM storage */ + memcpy(ptr, data->targets, sizeof(double) * entry->rows); + ptr += sizeof(double) * entry->rows; + /* copy rfactors into DSM storage */ + memcpy(ptr, data->rfactors, sizeof(double) * entry->rows); + ptr += sizeof(double) * entry->rows; + /* store list of relations. XXX: optimize ? */ + foreach(lc, reloids) + { + Oid reloid = lfirst_oid(lc); + + memcpy(ptr, &reloid, sizeof(Oid)); + ptr += sizeof(Oid); + } + + aqo_state->data_changed = true; +end: + LWLockRelease(&aqo_state->data_lock); + return aqo_state->data_changed; +} + +static void +build_knn_matrix(OkNNrdata *data, const OkNNrdata *temp_data) +{ + Assert(data->cols == temp_data->cols); + Assert(data->matrix); + + if (data->rows > 0) + /* trivial strategy - use first suitable record and ignore others */ + return; + + memcpy(data, temp_data, sizeof(OkNNrdata)); + if (data->cols > 0) + { + int i; + + for (i = 0; i < data->rows; i++) + { + Assert(data->matrix[i]); + memcpy(data->matrix[i], temp_data->matrix[i], data->cols * sizeof(double)); + } + } +} + +static OkNNrdata * +_fill_knn_data(const DataEntry *entry, List **reloids) +{ + OkNNrdata *data; + char *ptr; + int i; + size_t offset; + size_t sz = _compute_data_dsa(entry); + + data = OkNNr_allocate(entry->cols); + data->rows = entry->rows; + + ptr = (char *) dsa_get_address(data_dsa, entry->data_dp); + + /* Check invariants */ + Assert(entry->rows <= aqo_K); + Assert(ptr != NULL); + Assert(entry->key.fss == ((data_key *)ptr)->fss); + Assert(data->matrix); + + ptr += sizeof(data_key); + + if (entry->cols > 0) + { + for (i = 0; i < entry->rows; i++) + { + Assert(data->matrix[i]); + memcpy(data->matrix[i], ptr, sizeof(double) * data->cols); + ptr += sizeof(double) * data->cols; + } + } + + /* copy targets from DSM storage */ + memcpy(data->targets, ptr, sizeof(double) * entry->rows); + ptr += sizeof(double) * entry->rows; + offset = ptr - (char *) dsa_get_address(data_dsa, entry->data_dp); + Assert(offset < sz); + + /* copy rfactors from DSM storage */ + memcpy(data->rfactors, ptr, sizeof(double) * entry->rows); + ptr += sizeof(double) * entry->rows; + offset = ptr - (char *) dsa_get_address(data_dsa, entry->data_dp); + Assert(offset <= sz); + + if (reloids == NULL) + /* Isn't needed to load reloids list */ + return data; + + /* store list of relations. XXX: optimize ? */ + for (i = 0; i < entry->nrels; i++) { - /* - * Concurrent update was made. To prevent deadlocks refuse to update. - */ - result = false; + *reloids = lappend_oid(*reloids, ObjectIdGetDatum(*(Oid*)ptr)); + ptr += sizeof(Oid); } - ExecDropSingleTupleTableSlot(slot); - index_endscan(scan); - index_close(irel, RowExclusiveLock); - table_close(hrel, RowExclusiveLock); + offset = ptr - (char *) dsa_get_address(data_dsa, entry->data_dp); + if (offset != sz) + elog(PANIC, "[AQO] Shared memory ML storage is corrupted."); - CommandCounterIncrement(); - return result; + return data; } /* - * Creates entry for new query in aqo_query_texts table with given fields. - * Returns false if the operation failed, true otherwise. + * Return on feature subspace, unique defined by its class (fs) and hash value + * (fss). + * If reloids is NULL, skip loading of this list. + * If wideSearch is true - make seqscan on the hash table to see for relevant + * data across neighbours. */ bool -add_query_text(uint64 qhash, const char *query_string) +load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, + bool wideSearch) { - Relation hrel; - Relation irel; - HeapTuple tuple; - Datum values[2]; - bool isnull[2] = {false, false}; + DataEntry *entry; + bool found; + data_key key = {.fs = fs, .fss = fss}; + OkNNrdata *temp_data; - /* Variables for checking of concurrent writings. */ - TupleTableSlot *slot; - IndexScanDesc scan; - ScanKeyData key; - SnapshotData snap; + Assert(!LWLockHeldByMe(&aqo_state->data_lock)); - values[0] = Int64GetDatum(qhash); - values[1] = CStringGetTextDatum(query_string); + dsa_init(); - /* Couldn't allow to write if xact must be read-only. */ - if (XactReadOnly) - return false; + LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); - if (!open_aqo_relation("public", "aqo_query_texts", - "aqo_query_texts_query_hash_idx", - RowExclusiveLock, &hrel, &irel)) - return false; + if (!wideSearch) + { + entry = (DataEntry *) hash_search(data_htab, &key, HASH_FIND, &found); - tuple = heap_form_tuple(RelationGetDescr(hrel), values, isnull); + if (!found) + goto end; - /* - * Start an index scan. Use dirty snapshot to check concurrent updates that - * can be made before, but still not visible. - */ - InitDirtySnapshot(snap); - scan = index_beginscan(hrel, irel, &snap, 1, 0); - ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(qhash)); + /* One entry with all correctly filled fields is found */ + Assert(entry); + Assert(DsaPointerIsValid(entry->data_dp)); - index_rescan(scan, &key, 1, NULL, 0); - slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); + if (entry->cols != data->cols) + { + /* Collision happened? */ + elog(LOG, "[AQO] Does a collision happened? Check it if possible " + "(fs: "UINT64_FORMAT", fss: %d).", + fs, fss); + found = false; + goto end; + } - if (!index_getnext_slot(scan, ForwardScanDirection, slot)) + temp_data = _fill_knn_data(entry, reloids); + build_knn_matrix(data, temp_data); + } + else + /* Iterate across all elements of the table. XXX: Maybe slow. */ { - tuple = heap_form_tuple(RelationGetDescr(hrel), values, isnull); + HASH_SEQ_STATUS hash_seq; + int noids = -1; + + found = false; + hash_seq_init(&hash_seq, data_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + List *tmp_oids = NIL; + + if (entry->key.fss != fss || entry->cols != data->cols) + continue; + + temp_data = _fill_knn_data(entry, &tmp_oids); + + if (data->rows > 0 && list_length(tmp_oids) != noids) + { + /* Dubious case. So log it and skip these data */ + elog(LOG, + "[AQO] different number depended oids for the same fss %d: " + "%d and %d correspondingly.", + fss, list_length(tmp_oids), noids); + Assert(noids >= 0); + list_free(tmp_oids); + continue; + } - simple_heap_insert(hrel, tuple); - my_index_insert(irel, values, isnull, &(tuple->t_self), hrel, - UNIQUE_CHECK_YES); + noids = list_length(tmp_oids); + + if (reloids != NULL && *reloids == NIL) + *reloids = tmp_oids; + else + list_free(tmp_oids); + + build_knn_matrix(data, temp_data); + found = true; + } } - ExecDropSingleTupleTableSlot(slot); - index_endscan(scan); - index_close(irel, RowExclusiveLock); - table_close(hrel, RowExclusiveLock); + Assert(!found || (data->rows > 0 && data->rows <= aqo_K)); +end: + LWLockRelease(&aqo_state->data_lock); - CommandCounterIncrement(); - return true; + return found; } - -static ArrayType * -form_strings_vector(List *relnames) +Datum +aqo_data(PG_FUNCTION_ARGS) { - Datum *rels; - ArrayType *array; - ListCell *lc; - int i = 0; + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupDesc; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Tuplestorestate *tupstore; + Datum values[AD_TOTAL_NCOLS]; + bool nulls[AD_TOTAL_NCOLS]; + HASH_SEQ_STATUS hash_seq; + DataEntry *entry; + + Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* Switch into long-lived context to construct returned data structures */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + Assert(tupDesc->natts == AD_TOTAL_NCOLS); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupDesc; + + MemoryContextSwitchTo(oldcontext); + + dsa_init(); + LWLockAcquire(&aqo_state->data_lock, LW_SHARED); + hash_seq_init(&hash_seq, data_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + char *ptr; - if (relnames == NIL) - return NULL; + memset(nulls, 0, AD_TOTAL_NCOLS); - rels = (Datum *) palloc(list_length(relnames) * sizeof(Datum)); + values[AD_FS] = Int64GetDatum(entry->key.fs); + values[AD_FSS] = Int32GetDatum((int) entry->key.fss); + values[AD_NFEATURES] = Int32GetDatum(entry->cols); - foreach(lc, relnames) - { - char *relname = strVal(lfirst(lc)); + /* Fill values from the DSA data chunk */ + Assert(DsaPointerIsValid(entry->data_dp)); + ptr = dsa_get_address(data_dsa, entry->data_dp); + Assert(entry->key.fs == ((data_key*)ptr)->fs && entry->key.fss == ((data_key*)ptr)->fss); + ptr += sizeof(data_key); + + if (entry->cols > 0) + values[AD_FEATURES] = PointerGetDatum(form_matrix((double *)ptr, entry->rows, entry->cols)); + else + nulls[AD_FEATURES] = true; + + ptr += sizeof(double) * entry->rows * entry->cols; + values[AD_TARGETS] = PointerGetDatum(form_vector((double *)ptr, entry->rows)); + ptr += sizeof(double) * entry->rows; + values[AD_RELIABILITY] = PointerGetDatum(form_vector((double *)ptr, entry->rows)); + ptr += sizeof(double) * entry->rows; + + if (entry->nrels > 0) + { + Datum *elems; + ArrayType *array; + int i; + + elems = palloc(sizeof(*elems) * entry->nrels); + for(i = 0; i < entry->nrels; i++) + { + elems[i] = ObjectIdGetDatum(*(Oid *)ptr); + ptr += sizeof(Oid); + } + + array = construct_array(elems, entry->nrels, OIDOID, + sizeof(Oid), true, TYPALIGN_INT); + values[AD_OIDS] = PointerGetDatum(array); + } + else + nulls[AD_OIDS] = true; - rels[i++] = CStringGetTextDatum(relname); + tuplestore_putvalues(tupstore, tupDesc, values, nulls); } - array = construct_array(rels, i, TEXTOID, -1, false, TYPALIGN_INT); - pfree(rels); - return array; + LWLockRelease(&aqo_state->data_lock); + tuplestore_donestoring(tupstore); + return (Datum) 0; } -static List * -deform_strings_vector(Datum datum) +static long +_aqo_data_clean(uint64 fs) { - ArrayType *array = DatumGetArrayTypePCopy(PG_DETOAST_DATUM(datum)); - Datum *values; - int i; - int nelems = 0; - List *relnames = NIL; + HASH_SEQ_STATUS hash_seq; + DataEntry *entry; + long removed = 0; - deconstruct_array(array, TEXTOID, -1, false, TYPALIGN_INT, - &values, NULL, &nelems); - for (i = 0; i < nelems; ++i) - { - Value *s; + Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); - s = makeString(pstrdup(TextDatumGetCString(values[i]))); - relnames = lappend(relnames, s); + hash_seq_init(&hash_seq, data_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + if (entry->key.fs != fs) + continue; + + Assert(DsaPointerIsValid(entry->data_dp)); + dsa_free(data_dsa, entry->data_dp); + entry->data_dp = InvalidDsaPointer; + if (hash_search(data_htab, &entry->key, HASH_REMOVE, NULL) == NULL) + elog(ERROR, "[AQO] hash table corrupted"); + removed++; } - pfree(values); - pfree(array); - return relnames; + LWLockRelease(&aqo_state->data_lock); + return removed; } -bool -load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **relnames, bool isSafe) +static long +aqo_data_reset(void) { - if (isSafe && (!aqo_learn_statement_timeout || !lc_has_fss(fs, fss))) - return load_fss(fs, fss, data, relnames); - else + HASH_SEQ_STATUS hash_seq; + DataEntry *entry; + long num_remove = 0; + long num_entries; + + dsa_init(); + + Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); + num_entries = hash_get_num_entries(data_htab); + hash_seq_init(&hash_seq, data_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) { - Assert(aqo_learn_statement_timeout); - return lc_load_fss(fs, fss, data, relnames); + Assert(DsaPointerIsValid(entry->data_dp)); + dsa_free(data_dsa, entry->data_dp); + if (hash_search(data_htab, &entry->key, HASH_REMOVE, NULL) == NULL) + elog(ERROR, "[AQO] hash table corrupted"); + num_remove++; } -} - -/* - * Loads feature subspace (fss) from table aqo_data into memory. - * The last column of the returned matrix is for target values of objects. - * Returns false if the operation failed, true otherwise. - * - * 'fss_hash' is the hash of feature subspace which is supposed to be loaded - * 'ncols' is the number of clauses in the feature subspace - * 'matrix' is an allocated memory for matrix with the size of aqo_K rows - * and nhashes columns - * 'targets' is an allocated memory with size aqo_K for target values - * of the objects - * 'rows' is the pointer in which the function stores actual number of - * objects in the given feature space - */ -bool -load_fss(uint64 fs, int fss, OkNNrdata *data, List **relnames) -{ - Relation hrel; - Relation irel; - HeapTuple tuple; - TupleTableSlot *slot; - bool shouldFree; - bool find_ok = false; - IndexScanDesc scan; - ScanKeyData key[2]; - Datum values[AQO_DATA_COLUMNS]; - bool isnull[AQO_DATA_COLUMNS]; - bool success = true; - - if (!open_aqo_relation("public", "aqo_data", - "aqo_fss_access_idx", - AccessShareLock, &hrel, &irel)) - return false; + aqo_state->data_changed = true; + LWLockRelease(&aqo_state->data_lock); + if (num_remove != num_entries) + elog(ERROR, "[AQO] Query ML memory storage is corrupted or parallel access without a lock has detected."); - scan = index_beginscan(hrel, irel, SnapshotSelf, 2, 0); - ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(fs)); - ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fss)); - index_rescan(scan, key, 2, NULL, 0); + aqo_data_flush(); - slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); - find_ok = index_getnext_slot(scan, ForwardScanDirection, slot); + return num_remove; +} - if (find_ok) +Datum +aqo_queries(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupDesc; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Tuplestorestate *tupstore; + Datum values[AQ_TOTAL_NCOLS + 1]; + bool nulls[AQ_TOTAL_NCOLS + 1]; + HASH_SEQ_STATUS hash_seq; + QueriesEntry *entry; + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* Switch into long-lived context to construct returned data structures */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + Assert(tupDesc->natts == AQ_TOTAL_NCOLS); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupDesc; + + MemoryContextSwitchTo(oldcontext); + + LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); + hash_seq_init(&hash_seq, queries_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) { - tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); - Assert(shouldFree != true); - heap_deform_tuple(tuple, hrel->rd_att, values, isnull); - - if (DatumGetInt32(values[2]) == data->cols) - { - if (data->cols > 0) - /* - * The case than an object has not any filters and selectivities - */ - deform_matrix(values[3], data->matrix); - - deform_vector(values[4], data->targets, &(data->rows)); - deform_vector(values[6], data->rfactors, &(data->rows)); - - if (relnames != NULL) - *relnames = deform_strings_vector(values[5]); - } - else - elog(ERROR, "unexpected number of features for hash (" \ - UINT64_FORMAT", %d):\ - expected %d features, obtained %d", - fs, fss, data->cols, DatumGetInt32(values[2])); + memset(nulls, 0, AQ_TOTAL_NCOLS + 1); + + values[AQ_QUERYID] = Int64GetDatum(entry->queryid); + values[AQ_FS] = Int64GetDatum(entry->fs); + values[AQ_LEARN_AQO] = BoolGetDatum(entry->learn_aqo); + values[AQ_USE_AQO] = BoolGetDatum(entry->use_aqo); + values[AQ_AUTO_TUNING] = BoolGetDatum(entry->auto_tuning); + tuplestore_putvalues(tupstore, tupDesc, values, nulls); } - else - success = false; - ExecDropSingleTupleTableSlot(slot); - index_endscan(scan); - index_close(irel, AccessShareLock); - table_close(hrel, AccessShareLock); - - return success; + LWLockRelease(&aqo_state->queries_lock); + tuplestore_donestoring(tupstore); + return (Datum) 0; } bool -update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *relnames, - bool isTimedOut) +aqo_queries_store(uint64 queryid, + uint64 fs, bool learn_aqo, bool use_aqo, bool auto_tuning) { - if (!isTimedOut) - return update_fss(fs, fss, data, relnames); - else - return lc_update_fss(fs, fss, data, relnames); -} + QueriesEntry *entry; + bool found; + bool tblOverflow; + HASHACTION action; -/* - * Updates the specified line in the specified feature subspace. - * Returns false if the operation failed, true otherwise. - * - * 'fss_hash' specifies the feature subspace 'nrows' x 'ncols' is the shape - * of 'matrix' 'targets' is vector of size 'nrows' - * - * Necessary to prevent waiting for another transaction to commit in index - * insertion or heap update. - * - * Caller guaranteed that no one AQO process insert or update this data row. - */ -bool -update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames) -{ - Relation hrel; - Relation irel; - SnapshotData snap; - TupleTableSlot *slot; - TupleDesc tupDesc; - HeapTuple tuple, - nw_tuple; - Datum values[AQO_DATA_COLUMNS]; - bool isnull[AQO_DATA_COLUMNS]; - bool replace[AQO_DATA_COLUMNS] = { false, false, false, true, true, false, true }; - bool shouldFree; - bool find_ok = false; - bool update_indexes; - IndexScanDesc scan; - ScanKeyData key[2]; - bool result = true; - - /* Couldn't allow to write if xact must be read-only. */ - if (XactReadOnly) - return false; + Assert(queries_htab); - if (!open_aqo_relation("public", "aqo_data", - "aqo_fss_access_idx", - RowExclusiveLock, &hrel, &irel)) - return false; + /* Guard for default feature space */ + Assert(queryid != 0 || (fs == 0 && learn_aqo == false && + use_aqo == false && auto_tuning == false)); - memset(isnull, 0, sizeof(bool) * AQO_DATA_COLUMNS); - tupDesc = RelationGetDescr(hrel); - InitDirtySnapshot(snap); - scan = index_beginscan(hrel, irel, &snap, 2, 0); - ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(fs)); - ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fss)); + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); - index_rescan(scan, key, 2, NULL, 0); + /* Check hash table overflow */ + tblOverflow = hash_get_num_entries(queries_htab) < fs_max_items ? false : true; + action = tblOverflow ? HASH_FIND : HASH_ENTER; - slot = MakeSingleTupleTableSlot(tupDesc, &TTSOpsBufferHeapTuple); - find_ok = index_getnext_slot(scan, ForwardScanDirection, slot); + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, action, + &found); - if (!find_ok) + /* Initialize entry on first usage */ + if (!found && action == HASH_FIND) { - values[0] = Int64GetDatum(fs); - values[1] = Int32GetDatum(fss); - values[2] = Int32GetDatum(data->cols); - - if (data->cols > 0) - values[3] = PointerGetDatum(form_matrix(data->matrix, data->rows, data->cols)); - else - isnull[3] = true; - - values[4] = PointerGetDatum(form_vector(data->targets, data->rows)); - - /* Form array of relids. Only once. */ - values[5] = PointerGetDatum(form_strings_vector(relnames)); - if ((void *) values[5] == NULL) - isnull[5] = true; - values[6] = PointerGetDatum(form_vector(data->rfactors, data->rows)); - tuple = heap_form_tuple(tupDesc, values, isnull); - /* - * Don't use PG_TRY() section because of dirty snapshot and caller atomic - * prerequisities guarantees to us that no one concurrent insertion can - * exists. + * Hash table is full. To avoid possible problems - don't try to add + * more, just exit */ - simple_heap_insert(hrel, tuple); - my_index_insert(irel, values, isnull, &(tuple->t_self), - hrel, UNIQUE_CHECK_YES); + LWLockRelease(&aqo_state->queries_lock); + ereport(LOG, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("[AQO] Queries storage is full. No more feature spaces can be added."), + errhint("Increase value of aqo.fs_max_items on restart of the instance"))); + return false; } - else if (!TransactionIdIsValid(snap.xmin) && !TransactionIdIsValid(snap.xmax)) - { - tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); - Assert(shouldFree != true); - heap_deform_tuple(tuple, hrel->rd_att, values, isnull); - if (data->cols > 0) - values[3] = PointerGetDatum(form_matrix(data->matrix, data->rows, data->cols)); - else - isnull[3] = true; + entry->fs = fs; + entry->learn_aqo = learn_aqo; + entry->use_aqo = use_aqo; + entry->auto_tuning = auto_tuning; - values[4] = PointerGetDatum(form_vector(data->targets, data->rows)); - values[6] = PointerGetDatum(form_vector(data->rfactors, data->rows)); - nw_tuple = heap_modify_tuple(tuple, tupDesc, values, isnull, replace); - if (my_simple_heap_update(hrel, &(nw_tuple->t_self), nw_tuple, - &update_indexes)) - { - if (update_indexes) - my_index_insert(irel, values, isnull, &(nw_tuple->t_self), - hrel, UNIQUE_CHECK_YES); - result = true; - } - else - { - /* - * Ooops, somebody concurrently updated the tuple. It is possible - * only in the case of changes made by third-party code. - */ - elog(ERROR, "AQO data piece ("UINT64_FORMAT" %d) concurrently" - " updated by a stranger backend.", - fs, fss); - result = false; - } - } - else + LWLockRelease(&aqo_state->queries_lock); + return true; +} + +static long +aqo_queries_reset(void) +{ + HASH_SEQ_STATUS hash_seq; + QueriesEntry *entry; + long num_remove = 0; + long num_entries; + + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + num_entries = hash_get_num_entries(queries_htab); + hash_seq_init(&hash_seq, queries_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) { - /* - * Concurrent update was made. To prevent deadlocks refuse to update. - */ - result = false; + if (entry->queryid == 0) + /* Don't remove default feature space */ + continue; + + if (hash_search(queries_htab, &entry->queryid, HASH_REMOVE, NULL) == NULL) + elog(ERROR, "[AQO] hash table corrupted"); + num_remove++; } + aqo_state->queries_changed = true; + LWLockRelease(&aqo_state->queries_lock); - ExecDropSingleTupleTableSlot(slot); - index_endscan(scan); - index_close(irel, RowExclusiveLock); - table_close(hrel, RowExclusiveLock); + if (num_remove != num_entries - 1) + elog(ERROR, "[AQO] Queries memory storage is corrupted or parallel access without a lock has detected."); - CommandCounterIncrement(); - return result; + aqo_queries_flush(); + + return num_remove; } -/* - * Returns QueryStat for the given query_hash. Returns empty QueryStat if - * no statistics is stored for the given query_hash in table aqo_query_stat. - * Returns NULL and executes disable_aqo_for_query if aqo_query_stat - * is not found. - */ -QueryStat * -get_aqo_stat(uint64 qhash) +Datum +aqo_enable_query(PG_FUNCTION_ARGS) { - Relation hrel; - Relation irel; - TupleTableSlot *slot; - IndexScanDesc scan; - ScanKeyData key; - QueryStat *stat = palloc_query_stat(); - bool shouldFree; + uint64 queryid = (uint64) PG_GETARG_INT64(0); + QueriesEntry *entry; + bool found; + Assert(queries_htab); - if (!open_aqo_relation("public", "aqo_query_stat", - "aqo_query_stat_idx", - AccessShareLock, &hrel, &irel)) - return false; + if (queryid == 0) + elog(ERROR, "[AQO] Default class can't be updated."); - scan = index_beginscan(hrel, irel, SnapshotSelf, 1, 0); - ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(qhash)); - index_rescan(scan, &key, 1, NULL, 0); - slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_FIND, &found); - if (index_getnext_slot(scan, ForwardScanDirection, slot)) + if (found) { - HeapTuple tuple; - Datum values[9]; - bool nulls[9]; - - tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); - Assert(shouldFree != true); - heap_deform_tuple(tuple, hrel->rd_att, values, nulls); - - DeformVectorSz(values[1], stat->execution_time_with_aqo); - DeformVectorSz(values[2], stat->execution_time_without_aqo); - DeformVectorSz(values[3], stat->planning_time_with_aqo); - DeformVectorSz(values[4], stat->planning_time_without_aqo); - DeformVectorSz(values[5], stat->cardinality_error_with_aqo); - DeformVectorSz(values[6], stat->cardinality_error_without_aqo); - - stat->executions_with_aqo = DatumGetInt64(values[7]); - stat->executions_without_aqo = DatumGetInt64(values[8]); + entry->learn_aqo = true; + entry->use_aqo = true; + if (aqo_mode == AQO_MODE_INTELLIGENT) + entry->auto_tuning = true; } + else + elog(ERROR, "[AQO] Entry with queryid "INT64_FORMAT + " not contained in table", (int64) queryid); - ExecDropSingleTupleTableSlot(slot); - index_endscan(scan); - index_close(irel, AccessShareLock); - table_close(hrel, AccessShareLock); - return stat; + hash_search(deactivated_queries, &queryid, HASH_REMOVE, NULL); + LWLockRelease(&aqo_state->queries_lock); + PG_RETURN_VOID(); } -/* - * Saves given QueryStat for the given query_hash. - * Executes disable_aqo_for_query if aqo_query_stat is not found. - */ -void -update_aqo_stat(uint64 qhash, QueryStat *stat) -{ - Relation hrel; - Relation irel; - SnapshotData snap; - TupleTableSlot *slot; - TupleDesc tupDesc; - HeapTuple tuple, - nw_tuple; - Datum values[9]; - bool isnull[9] = { false, false, false, - false, false, false, - false, false, false }; - bool replace[9] = { false, true, true, - true, true, true, - true, true, true }; - bool shouldFree; - bool update_indexes; - IndexScanDesc scan; - ScanKeyData key; - - /* Couldn't allow to write if xact must be read-only. */ - if (XactReadOnly) - return; +Datum +aqo_disable_query(PG_FUNCTION_ARGS) +{ + uint64 queryid = (uint64) PG_GETARG_INT64(0); + QueriesEntry *entry; + bool found; - if (!open_aqo_relation("public", "aqo_query_stat", - "aqo_query_stat_idx", - RowExclusiveLock, &hrel, &irel)) - return; + Assert(queries_htab); - tupDesc = RelationGetDescr(hrel); - - InitDirtySnapshot(snap); - scan = index_beginscan(hrel, irel, &snap, 1, 0); - ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(qhash)); - index_rescan(scan, &key, 1, NULL, 0); - slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); - - /*values[0] will be initialized later */ - values[1] = PointerGetDatum(FormVectorSz(stat->execution_time_with_aqo)); - values[2] = PointerGetDatum(FormVectorSz(stat->execution_time_without_aqo)); - values[3] = PointerGetDatum(FormVectorSz(stat->planning_time_with_aqo)); - values[4] = PointerGetDatum(FormVectorSz(stat->planning_time_without_aqo)); - values[5] = PointerGetDatum(FormVectorSz(stat->cardinality_error_with_aqo)); - values[6] = PointerGetDatum(FormVectorSz(stat->cardinality_error_without_aqo)); - - values[7] = Int64GetDatum(stat->executions_with_aqo); - values[8] = Int64GetDatum(stat->executions_without_aqo); - - if (!index_getnext_slot(scan, ForwardScanDirection, slot)) - { - /* Such signature (hash) doesn't yet exist in the ML knowledge base. */ - values[0] = Int64GetDatum(qhash); - tuple = heap_form_tuple(tupDesc, values, isnull); - simple_heap_insert(hrel, tuple); - my_index_insert(irel, values, isnull, &(tuple->t_self), - hrel, UNIQUE_CHECK_YES); - } - else if (!TransactionIdIsValid(snap.xmin) && !TransactionIdIsValid(snap.xmax)) - { - /* Need to update ML data row and no one backend concurrently doing it. */ - tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); - Assert(shouldFree != true); - values[0] = heap_getattr(tuple, 1, tupDesc, &isnull[0]); - nw_tuple = heap_modify_tuple(tuple, tupDesc, values, isnull, replace); - if (my_simple_heap_update(hrel, &(nw_tuple->t_self), nw_tuple, - &update_indexes)) - { - /* NOTE: insert index tuple iff heap update succeeded! */ - if (update_indexes) - my_index_insert(irel, values, isnull, - &(nw_tuple->t_self), - hrel, UNIQUE_CHECK_YES); - } - else - { - /* - * Ooops, somebody concurrently updated the tuple. It is possible - * only in the case of changes made by third-party code. - */ - elog(ERROR, "AQO statistic data for query signature "UINT64_FORMAT - " concurrently updated by a stranger backend.", - qhash); - } + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_FIND, &found); + + if(found) + { + entry->learn_aqo = false; + entry->use_aqo = false; + entry->auto_tuning = false; } else { - /* - * Concurrent update was made. To prevent deadlocks refuse to update. - */ + elog(ERROR, "[AQO] Entry with "INT64_FORMAT" not contained in table", + (int64) queryid); } - - ExecDropSingleTupleTableSlot(slot); - index_endscan(scan); - index_close(irel, RowExclusiveLock); - table_close(hrel, RowExclusiveLock); - - CommandCounterIncrement(); + LWLockRelease(&aqo_state->queries_lock); + PG_RETURN_VOID(); } -/* - * Expands matrix from storage into simple C-array. - */ -void -deform_matrix(Datum datum, double **matrix) +bool +aqo_queries_find(uint64 queryid, QueryContextData *ctx) { - ArrayType *array = DatumGetArrayTypePCopy(PG_DETOAST_DATUM(datum)); - int nelems; - Datum *values; - int rows; - int cols; - int i, - j; + bool found; + QueriesEntry *entry; + + Assert(queries_htab); - deconstruct_array(array, - FLOAT8OID, 8, FLOAT8PASSBYVAL, 'd', - &values, NULL, &nelems); - if (nelems != 0) + LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_FIND, &found); + if (found) { - rows = ARR_DIMS(array)[0]; - cols = ARR_DIMS(array)[1]; - for (i = 0; i < rows; ++i) - for (j = 0; j < cols; ++j) - matrix[i][j] = DatumGetFloat8(values[i * cols + j]); + ctx->query_hash = entry->queryid; + ctx->learn_aqo = entry->learn_aqo; + ctx->use_aqo = entry->use_aqo; + ctx->auto_tuning = entry->auto_tuning; } - pfree(values); - pfree(array); + LWLockRelease(&aqo_state->queries_lock); + return found; } /* - * Expands vector from storage into simple C-array. - * Also returns its number of elements. + * Update AQO preferences for a given queryid value. + * if incoming param is null - leave it unchanged. + * if forced is false, do nothing if query with such ID isn't exists yet. + * Return true if operation have done some changes. */ -void -deform_vector(Datum datum, double *vector, int *nelems) +Datum +aqo_queries_update(PG_FUNCTION_ARGS) { - ArrayType *array = DatumGetArrayTypePCopy(PG_DETOAST_DATUM(datum)); - Datum *values; - int i; - - deconstruct_array(array, - FLOAT8OID, 8, FLOAT8PASSBYVAL, 'd', - &values, NULL, nelems); - for (i = 0; i < *nelems; ++i) - vector[i] = DatumGetFloat8(values[i]); - pfree(values); - pfree(array); + QueriesEntry *entry; + uint64 queryid = PG_GETARG_INT64(AQ_QUERYID); + bool found; + + if (queryid == 0) + /* Do nothing for default feature space */ + PG_RETURN_BOOL(false); + + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_FIND, + &found); + + if (!PG_ARGISNULL(AQ_FS)) + entry->fs = PG_GETARG_INT64(AQ_FS); + if (!PG_ARGISNULL(AQ_LEARN_AQO)) + entry->learn_aqo = PG_GETARG_BOOL(AQ_LEARN_AQO); + if (!PG_ARGISNULL(AQ_USE_AQO)) + entry->use_aqo = PG_GETARG_BOOL(AQ_USE_AQO); + if (!PG_ARGISNULL(AQ_AUTO_TUNING)) + entry->auto_tuning = PG_GETARG_BOOL(AQ_AUTO_TUNING); + + /* Remove the class from cache of deactivated queries */ + hash_search(deactivated_queries, &queryid, HASH_REMOVE, NULL); + + LWLockRelease(&aqo_state->queries_lock); + PG_RETURN_BOOL(true); } -/* - * Forms ArrayType object for storage from simple C-array matrix. - */ -ArrayType * -form_matrix(double **matrix, int nrows, int ncols) +Datum +aqo_reset(PG_FUNCTION_ARGS) { - Datum *elems; - ArrayType *array; - int dims[2]; - int lbs[2]; - int i, - j; - - dims[0] = nrows; - dims[1] = ncols; - lbs[0] = lbs[1] = 1; - elems = palloc(sizeof(*elems) * nrows * ncols); - for (i = 0; i < nrows; ++i) - for (j = 0; j < ncols; ++j) - elems[i * ncols + j] = Float8GetDatum(matrix[i][j]); + long counter = 0; - array = construct_md_array(elems, NULL, 2, dims, lbs, - FLOAT8OID, 8, FLOAT8PASSBYVAL, 'd'); - pfree(elems); - return array; + counter += aqo_stat_reset(); + counter += aqo_qtexts_reset(); + counter += aqo_data_reset(); + counter += aqo_queries_reset(); + PG_RETURN_INT64(counter); } +#include "utils/syscache.h" + /* - * Forms ArrayType object for storage from simple C-array vector. + * Scan aqo_queries. For each FS lookup aqo_data records: detect a record, where + * list of oids links to deleted tables. + * If + * + * Scan aqo_data hash table. Detect a record, where list of oids links to + * deleted tables. If gentle is TRUE, remove this record only. Another case, + * remove all records with the same (not default) fs from aqo_data. + * Scan aqo_queries. If no one record in aqo_data exists for this fs - remove + * the record from aqo_queries, aqo_query_stat and aqo_query_texts. */ -ArrayType * -form_vector(double *vector, int nrows) +static void +cleanup_aqo_database(bool gentle, int *fs_num, int *fss_num) { - Datum *elems; - ArrayType *array; - int dims[1]; - int lbs[1]; - int i; + HASH_SEQ_STATUS hash_seq; + QueriesEntry *entry; - dims[0] = nrows; - lbs[0] = 1; - elems = palloc(sizeof(*elems) * nrows); - for (i = 0; i < nrows; ++i) - elems[i] = Float8GetDatum(vector[i]); - array = construct_md_array(elems, NULL, 1, dims, lbs, - FLOAT8OID, 8, FLOAT8PASSBYVAL, 'd'); - pfree(elems); - return array; -} + /* Call it because we might touch DSA segments during the cleanup */ + dsa_init(); -/* - * Returns true if updated successfully, false if updated concurrently by - * another session, error otherwise. - */ -static bool -my_simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup, - bool *update_indexes) -{ - TM_Result result; - TM_FailureData hufd; - LockTupleMode lockmode; - - Assert(update_indexes != NULL); - result = heap_update(relation, otid, tup, - GetCurrentCommandId(true), InvalidSnapshot, - true /* wait for commit */ , - &hufd, &lockmode); - switch (result) - { - case TM_SelfModified: - /* Tuple was already updated in current command? */ - elog(ERROR, "tuple already updated by self"); - break; + *fs_num = 0; + *fss_num = 0; - case TM_Ok: - /* done successfully */ - if (!HeapTupleIsHeapOnly(tup)) - *update_indexes = true; + /* + * It's a long haul. So, make seq scan without any lock. It is possible + * because only this operation can delete data from hash table. + */ + hash_seq_init(&hash_seq, queries_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + HASH_SEQ_STATUS hash_seq2; + DataEntry *dentry; + List *junk_fss = NIL; + List *actual_fss = NIL; + ListCell *lc; + + /* Scan aqo_data for any junk records related to this FS */ + hash_seq_init(&hash_seq2, data_htab); + while ((dentry = hash_seq_search(&hash_seq2)) != NULL) + { + char *ptr; + + if (entry->fs != dentry->key.fs) + /* Another FS */ + continue; + + LWLockAcquire(&aqo_state->data_lock, LW_SHARED); + + Assert(DsaPointerIsValid(dentry->data_dp)); + ptr = dsa_get_address(data_dsa, dentry->data_dp); + + ptr += sizeof(data_key); + ptr += sizeof(double) * dentry->rows * dentry->cols; + ptr += sizeof(double) * 2 * dentry->rows; + + if (dentry->nrels > 0) + { + int i; + + /* Check each OID to be existed. */ + for(i = 0; i < dentry->nrels; i++) + { + Oid reloid = ObjectIdGetDatum(*(Oid *)ptr); + MemoryContext oldctx = MemoryContextSwitchTo(AQOUtilityMemCtx); + + if (!SearchSysCacheExists1(RELOID, reloid)) + /* Remember this value */ + junk_fss = list_append_unique_int(junk_fss, + dentry->key.fss); + else + actual_fss = list_append_unique_int(actual_fss, + dentry->key.fss); + MemoryContextSwitchTo(oldctx); + + ptr += sizeof(Oid); + } + } else - *update_indexes = false; - return true; + { + /* + * Impossible case. We don't use AQO for so simple or synthetic + * data. Just detect errors in this logic. + */ + ereport(PANIC, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("AQO detected incorrect behaviour: fs=" + UINT64_FORMAT" fss=%d", + dentry->key.fs, (int32) dentry->key.fss))); + } + + LWLockRelease(&aqo_state->data_lock); + } - case TM_Updated: - return false; - break; + /* + * In forced mode remove all child FSSes even some of them are still + * link to existed tables. + */ + if (junk_fss != NIL && !gentle) + junk_fss = list_concat(junk_fss, actual_fss); - case TM_BeingModified: - return false; - break; + /* Remove junk records from aqo_data */ + foreach(lc, junk_fss) + { + data_key key = {.fs = entry->fs, .fss = lfirst_int(lc)}; + (*fss_num) += (int) _aqo_data_remove(&key); + } - default: - elog(ERROR, "unrecognized heap_update status: %u", result); - break; - } - return false; -} + /* + * If no one live FSS exists, remove the class totally. Don't touch + * default query class. + */ + if (entry->fs != 0 && (actual_fss == NIL || (junk_fss != NIL && !gentle))) + { + /* Query Stat */ + _aqo_stat_remove(entry->queryid); + /* Query text */ + _aqo_qtexts_remove(entry->queryid); -/* Provides correct insert in both PostgreQL 9.6.X and 10.X.X */ -bool -my_index_insert(Relation indexRelation, - Datum *values, bool *isnull, - ItemPointer heap_t_ctid, - Relation heapRelation, - IndexUniqueCheck checkUnique) -{ - /* Index must be UNIQUE to support uniqueness checks */ - Assert(checkUnique == UNIQUE_CHECK_NO || - indexRelation->rd_index->indisunique); - -#if PG_VERSION_NUM < 100000 - return index_insert(indexRelation, values, isnull, heap_t_ctid, - heapRelation, checkUnique); -#elif PG_VERSION_NUM < 140000 - return index_insert(indexRelation, values, isnull, heap_t_ctid, - heapRelation, checkUnique, - BuildIndexInfo(indexRelation)); -#else - return index_insert(indexRelation, values, isnull, heap_t_ctid, - heapRelation, checkUnique, false, - BuildIndexInfo(indexRelation)); -#endif + /* Query class preferences */ + (*fs_num) += (int) _aqo_queries_remove(entry->queryid); + } + + MemoryContextReset(AQOUtilityMemCtx); + } + + /* + * The best place to flush updated AQO storage: calling the routine, user + * realizes how heavy it is. + */ + aqo_stat_flush(); + aqo_data_flush(); + aqo_qtexts_flush(); + aqo_queries_flush(); } -/* Creates a storage for hashes of deactivated queries */ -void -init_deactivated_queries_storage(void) +Datum +aqo_cleanup(PG_FUNCTION_ARGS) { - HASHCTL hash_ctl; + int fs_num; + int fss_num; + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupDesc; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Tuplestorestate *tupstore; + Datum values[2]; + bool nulls[2] = {0, 0}; + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* Switch into long-lived context to construct returned data structures */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + Assert(tupDesc->natts == 2); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupDesc; + + MemoryContextSwitchTo(oldcontext); - /* Create the hashtable proper */ - MemSet(&hash_ctl, 0, sizeof(hash_ctl)); - hash_ctl.keysize = sizeof(uint64); - hash_ctl.entrysize = sizeof(uint64); - deactivated_queries = hash_create("aqo_deactivated_queries", - 128, /* start small and extend */ - &hash_ctl, - HASH_ELEM | HASH_BLOBS); + /* + * Make forced cleanup: if at least one fss isn't actual, remove parent FS + * and all its FSSes. + * Main idea of such behaviour here is, if a table was deleted, we have + * little chance to use this class in future. Only one use case here can be + * a reason: to use it as a base for search data in a set of neighbours. + * But, invent another UI function for such logic. + */ + cleanup_aqo_database(false, &fs_num, &fss_num); + + values[0] = Int32GetDatum(fs_num); + values[1] = Int32GetDatum(fss_num); + tuplestore_putvalues(tupstore, tupDesc, values, nulls); + tuplestore_donestoring(tupstore); + return (Datum) 0; } -/* Destroys the storage for hash of deactivated queries */ -void -fini_deactivated_queries_storage(void) +/* + * XXX: Maybe to allow usage of NULL value to make a reset? + */ +Datum +aqo_drop_class(PG_FUNCTION_ARGS) { - hash_destroy(deactivated_queries); - deactivated_queries = NULL; + uint64 queryid = PG_GETARG_INT64(0); + bool found; + QueriesEntry *entry; + uint64 fs; + long cnt; + + if (queryid == 0) + elog(ERROR, "[AQO] Cannot remove basic class "INT64_FORMAT".", + (int64) queryid); + + /* Extract FS value for the queryid */ + LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_FIND, + &found); + if (!found) + elog(ERROR, "[AQO] Nothing to remove for the class "INT64_FORMAT".", + (int64) queryid); + + fs = entry->fs; + LWLockRelease(&aqo_state->queries_lock); + + if (fs == 0) + elog(ERROR, "[AQO] Cannot remove class "INT64_FORMAT" with default FS.", + (int64) queryid); + if (fs != queryid) + elog(WARNING, + "[AQO] Removing query class has non-generic feature space value: " + "id = "INT64_FORMAT", fs = "UINT64_FORMAT".", (int64) queryid, fs); + + /* Now, remove all data related to the class */ + _aqo_queries_remove(queryid); + _aqo_stat_remove(queryid); + _aqo_qtexts_remove(queryid); + cnt = _aqo_data_clean(fs); + + /* Immediately save changes to permanent storage. */ + aqo_stat_flush(); + aqo_data_flush(); + aqo_qtexts_flush(); + aqo_queries_flush(); + + PG_RETURN_INT32(cnt); } -/* Checks whether the query with given hash is deactivated */ -bool -query_is_deactivated(uint64 query_hash) +typedef enum { + AQE_NN = 0, AQE_QUERYID, AQE_FS, AQE_CERROR, AQE_NEXECS, AQE_TOTAL_NCOLS +} ce_output_order; + +/* + * Show cardinality error gathered on last execution. + * Skip entries with empty stat slots. XXX: is it possible? + */ +Datum +aqo_cardinality_error(PG_FUNCTION_ARGS) { - bool found; + bool controlled = PG_GETARG_BOOL(0); + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupDesc; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Tuplestorestate *tupstore; + Datum values[AQE_TOTAL_NCOLS]; + bool nulls[AQE_TOTAL_NCOLS]; + HASH_SEQ_STATUS hash_seq; + QueriesEntry *qentry; + StatEntry *sentry; + int counter = 0; + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* Switch into long-lived context to construct returned data structures */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + Assert(tupDesc->natts == AQE_TOTAL_NCOLS); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupDesc; + + MemoryContextSwitchTo(oldcontext); + + LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); + LWLockAcquire(&aqo_state->stat_lock, LW_SHARED); + + hash_seq_init(&hash_seq, queries_htab); + while ((qentry = hash_seq_search(&hash_seq)) != NULL) + { + bool found; + double *ce; + int64 nexecs; + int nvals; + + memset(nulls, 0, AQE_TOTAL_NCOLS * sizeof(nulls[0])); + + sentry = (StatEntry *) hash_search(stat_htab, &qentry->queryid, + HASH_FIND, &found); + if (!found) + /* Statistics not found by some reason. Just go further */ + continue; + + nvals = controlled ? sentry->cur_stat_slot_aqo : sentry->cur_stat_slot; + if (nvals == 0) + /* No one stat slot filled */ + continue; + + nexecs = controlled ? sentry->execs_with_aqo : sentry->execs_without_aqo; + ce = controlled ? sentry->est_error_aqo : sentry->est_error; + + values[AQE_NN] = Int32GetDatum(++counter); + values[AQE_QUERYID] = Int64GetDatum(qentry->queryid); + values[AQE_FS] = Int64GetDatum(qentry->fs); + values[AQE_NEXECS] = Int64GetDatum(nexecs); + values[AQE_CERROR] = Float8GetDatum(ce[nvals - 1]); + tuplestore_putvalues(tupstore, tupDesc, values, nulls); + } - hash_search(deactivated_queries, &query_hash, HASH_FIND, &found); - return found; + LWLockRelease(&aqo_state->stat_lock); + LWLockRelease(&aqo_state->queries_lock); + + tuplestore_donestoring(tupstore); + return (Datum) 0; } -/* Adds given query hash into the set of hashes of deactivated queries*/ -void -add_deactivated_query(uint64 query_hash) +typedef enum { + ET_NN = 0, ET_QUERYID, ET_FS, ET_EXECTIME, ET_NEXECS, ET_TOTAL_NCOLS +} et_output_order; + +/* + * XXX: maybe to merge with aqo_cardinality_error ? + * XXX: Do we really want sequental number ? + */ +Datum +aqo_execution_time(PG_FUNCTION_ARGS) { - hash_search(deactivated_queries, &query_hash, HASH_ENTER, NULL); + bool controlled = PG_GETARG_BOOL(0); + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupDesc; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Tuplestorestate *tupstore; + Datum values[AQE_TOTAL_NCOLS]; + bool nulls[AQE_TOTAL_NCOLS]; + HASH_SEQ_STATUS hash_seq; + QueriesEntry *qentry; + StatEntry *sentry; + int counter = 0; + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* Switch into long-lived context to construct returned data structures */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + Assert(tupDesc->natts == ET_TOTAL_NCOLS); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupDesc; + + MemoryContextSwitchTo(oldcontext); + + LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); + LWLockAcquire(&aqo_state->stat_lock, LW_SHARED); + + hash_seq_init(&hash_seq, queries_htab); + while ((qentry = hash_seq_search(&hash_seq)) != NULL) + { + bool found; + double *et; + int64 nexecs; + int nvals; + double tm = 0; + + memset(nulls, 0, ET_TOTAL_NCOLS * sizeof(nulls[0])); + + sentry = (StatEntry *) hash_search(stat_htab, &qentry->queryid, + HASH_FIND, &found); + if (!found) + /* Statistics not found by some reason. Just go further */ + continue; + + nvals = controlled ? sentry->cur_stat_slot_aqo : sentry->cur_stat_slot; + if (nvals == 0) + /* No one stat slot filled */ + continue; + + nexecs = controlled ? sentry->execs_with_aqo : sentry->execs_without_aqo; + et = controlled ? sentry->exec_time_aqo : sentry->exec_time; + + if (!controlled) + { + int i; + /* Calculate average execution time */ + for (i = 0; i < nvals; i++) + tm += et[i]; + tm /= nvals; + } + else + tm = et[nvals - 1]; + + values[ET_NN] = Int32GetDatum(++counter); + values[ET_QUERYID] = Int64GetDatum(qentry->queryid); + values[ET_FS] = Int64GetDatum(qentry->fs); + values[ET_NEXECS] = Int64GetDatum(nexecs); + values[ET_EXECTIME] = Float8GetDatum(tm); + tuplestore_putvalues(tupstore, tupDesc, values, nulls); + } + + LWLockRelease(&aqo_state->stat_lock); + LWLockRelease(&aqo_state->queries_lock); + + tuplestore_donestoring(tupstore); + return (Datum) 0; } diff --git a/storage.h b/storage.h new file mode 100644 index 00000000..94891c5d --- /dev/null +++ b/storage.h @@ -0,0 +1,122 @@ +#ifndef STORAGE_H +#define STORAGE_H + +#include "nodes/pg_list.h" +#include "utils/array.h" +#include "utils/dsa.h" /* Public structs have links to DSA memory blocks */ + +#include "aqo.h" +#include "machine_learning.h" + +#define STAT_SAMPLE_SIZE (20) + +/* + * Storage struct for AQO statistics + * It is mostly needed for auto tuning feature. With auto tuning mode aqo + * analyzes stability of last executions of the query, negative influence of + * strong cardinality estimation on a query execution (planner bug?) and so on. + * It can motivate aqo to suppress machine learning for this query class. + * Also, it can be used for an analytics. + */ +typedef struct StatEntry +{ + uint64 queryid; /* The key in the hash table, should be the first field ever */ + + int64 execs_with_aqo; + int64 execs_without_aqo; + + int cur_stat_slot; + double exec_time[STAT_SAMPLE_SIZE]; + double plan_time[STAT_SAMPLE_SIZE]; + double est_error[STAT_SAMPLE_SIZE]; + + int cur_stat_slot_aqo; + double exec_time_aqo[STAT_SAMPLE_SIZE]; + double plan_time_aqo[STAT_SAMPLE_SIZE]; + double est_error_aqo[STAT_SAMPLE_SIZE]; +} StatEntry; + +/* + * Storage entry for query texts. + * Query strings may have very different sizes. So, in hash table we store only + * link to DSA-allocated memory. + */ +typedef struct QueryTextEntry +{ + uint64 queryid; + + /* Link to DSA-allocated memory block. Can be shared across backends */ + dsa_pointer qtext_dp; +} QueryTextEntry; + +typedef struct data_key +{ + uint64 fs; + int64 fss; /* just for alignment */ +} data_key; + +typedef struct DataEntry +{ + data_key key; + + /* defines a size and data placement in the DSA memory block */ + int cols; /* aka nfeatures */ + int rows; /* aka number of equations */ + int nrels; + + /* + * Link to DSA-allocated memory block. Can be shared across backends. + * Contains: + * matrix[][], targets[], reliability[], oids. + */ + dsa_pointer data_dp; +} DataEntry; + +typedef struct QueriesEntry +{ + uint64 queryid; + + uint64 fs; + bool learn_aqo; + bool use_aqo; + bool auto_tuning; +} QueriesEntry; + +extern int querytext_max_size; +extern int dsm_size_max; + +extern HTAB *stat_htab; +extern HTAB *qtexts_htab; +extern HTAB *queries_htab; /* TODO */ +extern HTAB *data_htab; /* TODO */ + +extern StatEntry *aqo_stat_store(uint64 queryid, bool use_aqo, double plan_time, + double exec_time, double est_error); +extern void aqo_stat_flush(void); +extern void aqo_stat_load(void); + +extern bool aqo_qtext_store(uint64 queryid, const char *query_string); +extern void aqo_qtexts_flush(void); +extern void aqo_qtexts_load(void); + +extern bool aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids); +extern bool load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, + bool wideSearch); +extern void aqo_data_flush(void); +extern void aqo_data_load(void); + +extern bool aqo_queries_find(uint64 queryid, QueryContextData *ctx); +extern bool aqo_queries_store(uint64 queryid, uint64 fs, bool learn_aqo, + bool use_aqo, bool auto_tuning); +extern void aqo_queries_flush(void); +extern void aqo_queries_load(void); + +/* + * Machinery for deactivated queries cache. + * TODO: Should live in a custom memory context + */ +extern void init_deactivated_queries_storage(void); +extern bool query_is_deactivated(uint64 query_hash); +extern void add_deactivated_query(uint64 query_hash); + +#endif /* STORAGE_H */ diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index b4445d12..eae0c829 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -4,15 +4,15 @@ use Config; use PostgresNode; use TestLib; -use Test::More tests => 21; +use Test::More tests => 27; my $node = get_new_node('aqotest'); $node->init; $node->append_conf('postgresql.conf', qq{ shared_preload_libraries = 'aqo' - log_statement = 'none' aqo.mode = 'intelligent' log_statement = 'ddl' + aqo.join_threshold = 0 }); # Test constants. Default values. @@ -78,6 +78,7 @@ $node->safe_psql('postgres', " ALTER SYSTEM SET aqo.mode = 'disabled'; SELECT pg_reload_conf(); + SELECT * FROM aqo_reset(); -- Remove old data "); $node->command_ok([ 'pgbench', '-t', "$TRANSACTIONS", '-c', "$CLIENTS", '-j', "$THREADS" ], @@ -89,6 +90,7 @@ $fs_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_queries;"); $fs_samples_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_texts;"); $stat_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_stat;"); +note("counter: $fss_count, $fs_count, $fs_samples_count, $stat_count"); is( (($fss_count == 0) and ($fs_count == 1) and ($fs_samples_count == 1) and ($stat_count == 0)), 1); # Check: no problems with stats collection in highly concurrent environment. @@ -112,7 +114,8 @@ $stat_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_texts"); # This constants looks like magic numbers. But query set of the pgbench test # is fixed for a long time. -is( (($fs_count == 7) and ($fs_samples_count == 6) and ($stat_count == 7)), 1); +note("fs: $fs_count, $fs_samples_count, $stat_count"); +is( (($fs_count == 6) and ($fs_samples_count == 5) and ($stat_count == 6)), 1); my $analytics = File::Temp->new(); append_to_file($analytics, q{ @@ -126,7 +129,7 @@ }); # Avoid problems with an error fluctuations during the test above. -$node->safe_psql('postgres', "TRUNCATE aqo_query_stat"); +$node->safe_psql('postgres', "SELECT aqo_reset()"); # Look for top of problematic queries. $node->command_ok([ 'pgbench', '-t', "10", '-c', "$CLIENTS", '-j', "$THREADS", @@ -134,23 +137,26 @@ 'analytical queries in pgbench (disabled mode)'); $res = $node->safe_psql('postgres', - "SELECT count(*) FROM show_cardinality_errors(false) v - JOIN aqo_query_texts t ON (t.query_hash = v.id) + "SELECT count(*) FROM aqo_cardinality_error(false) v + JOIN aqo_query_texts t ON (t.queryid = v.id) WHERE v.error > 0. AND t.query_text LIKE '%pgbench_accounts%'"); is($res, 3); $res = $node->safe_psql('postgres', - "SELECT * FROM show_cardinality_errors(false) v - JOIN aqo_query_texts t ON (t.query_hash = v.id) + "SELECT * FROM aqo_cardinality_error(false) v + JOIN aqo_query_texts t ON (t.queryid = v.id) WHERE v.error > 0. AND t.query_text LIKE '%pgbench_accounts%'"); note("\n TopN: \n $res \n"); $res = $node->safe_psql('postgres', - "SELECT v.error, t.query_text FROM show_cardinality_errors(false) v - JOIN aqo_query_texts t ON (t.query_hash = v.id) + "SELECT v.error, t.query_text FROM aqo_cardinality_error(false) v + JOIN aqo_query_texts t ON (t.queryid = v.id) WHERE v.error > 0."); note("\n Queries: \n $res \n"); +$res = $node->safe_psql('postgres', "SELECT * FROM public.aqo_execution_time(false) v"); +note("\n TIMES: \n $res \n"); + $res = $node->safe_psql('postgres', - "SELECT count(*) FROM top_time_queries(10) v - WHERE v.execution_time > 0."); + "SELECT count(*) FROM public.aqo_execution_time(false) v + WHERE v.exec_time > 0."); is($res, 3); # ############################################################################## @@ -159,6 +165,7 @@ # # ############################################################################## +$node->safe_psql('postgres', "SELECT aqo_reset()"); $node->safe_psql('postgres', "DROP EXTENSION aqo"); $node->safe_psql('postgres', "CREATE EXTENSION aqo"); @@ -204,30 +211,31 @@ # New queries won't add rows into AQO knowledge base. $node->safe_psql('postgres', "ALTER SYSTEM SET aqo.mode = 'disabled'"); -$node->restart(); +$node->safe_psql('postgres', "SELECT pg_reload_conf()"); +$node->restart(); # AQO data storage should survive after a restart $res = $node->safe_psql('postgres', "SHOW aqo.mode"); is($res, 'disabled'); # Number of rows in aqo_data: related to pgbench test and total value. my $pgb_fss_count = $node->safe_psql('postgres', " SELECT count(*) FROM aqo_data - WHERE $aoid ::regclass::text = ANY(oids) OR - $boid ::regclass::text = ANY(oids) OR - $toid ::regclass::text = ANY(oids) OR - $hoid ::regclass::text = ANY(oids) + WHERE $aoid = ANY(oids) OR + $boid = ANY(oids) OR + $toid = ANY(oids) OR + $hoid = ANY(oids) "); $fss_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_data;"); # Number of rows in aqo_queries: related to pgbench test and total value. my $pgb_fs_count = $node->safe_psql('postgres', " SELECT count(*) FROM aqo_queries - WHERE fspace_hash IN ( - SELECT fspace_hash FROM aqo_data + WHERE fs IN ( + SELECT fs FROM aqo_data WHERE - $aoid ::regclass::text = ANY(oids) OR - $boid ::regclass::text = ANY(oids) OR - $toid ::regclass::text = ANY(oids) OR - $hoid ::regclass::text = ANY(oids) + $aoid = ANY(oids) OR + $boid = ANY(oids) OR + $toid = ANY(oids) OR + $hoid = ANY(oids) ) "); $fs_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_queries;"); @@ -235,57 +243,113 @@ # Number of rows in aqo_query_texts: related to pgbench test and total value. my $pgb_fs_samples_count = $node->safe_psql('postgres', " SELECT count(*) FROM aqo_query_texts - WHERE query_hash IN ( - SELECT fspace_hash FROM aqo_data + WHERE queryid IN ( + SELECT fs FROM aqo_data WHERE - $aoid ::regclass::text = ANY(oids) OR - $boid ::regclass::text = ANY(oids) OR - $toid ::regclass::text = ANY(oids) OR - $hoid ::regclass::text = ANY(oids) + $aoid = ANY(oids) OR + $boid = ANY(oids) OR + $toid = ANY(oids) OR + $hoid = ANY(oids) ) "); $fs_samples_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_texts;"); +is($pgb_fs_samples_count > 0, 1, "AQO query texts exists"); # Number of rows in aqo_query_stat: related to pgbench test and total value. my $pgb_stat_count = $node->safe_psql('postgres', " - SELECT count(*) FROM aqo_query_texts - WHERE query_hash IN ( - SELECT fspace_hash FROM aqo_data + SELECT count(*) FROM aqo_query_stat + WHERE queryid IN ( + SELECT fs FROM aqo_data WHERE - $aoid ::regclass::text = ANY(oids) OR - $boid ::regclass::text = ANY(oids) OR - $toid ::regclass::text = ANY(oids) OR - $hoid ::regclass::text = ANY(oids) + $aoid = ANY(oids) OR + $boid = ANY(oids) OR + $toid = ANY(oids) OR + $hoid = ANY(oids) ) "); $stat_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_stat;"); -note("pgbench-related rows: aqo_data - $pgb_fss_count/$fss_count, - aqo_queries: $pgb_fs_count/$fs_count, aqo_query_texts: $pgb_fs_samples_count/$fs_samples_count, - aqo_query_stat: $pgb_stat_count/$stat_count"); - $node->safe_psql('postgres', " DROP TABLE pgbench_accounts, pgbench_branches, pgbench_tellers, pgbench_history CASCADE;"); -# Clean unneeded AQO knowledge -$node->safe_psql('postgres', "SELECT clean_aqo_data()"); +# Remove unnecessary AQO knowledge +$node->safe_psql('postgres', "SELECT * FROM aqo_cleanup()"); # Calculate total number of rows in AQO-related tables. my $new_fs_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_queries;"); my $new_fss_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_data;"); my $new_fs_samples_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_texts;"); my $new_stat_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_stat;"); -note("Total AQO rows after dropping pgbench-related tables: - aqo_queries: $new_fs_count, aqo_data: $new_fss_count, - aqo_query_texts: $new_fs_samples_count, aqo_query_stat: $new_stat_count"); +note("Total AQO rows after dropping pgbench-related tables: + aqo_queries: ($new_fs_count, $fs_count, $pgb_fs_count), + aqo_data: ($new_fss_count, $fss_count, $pgb_fss_count), + aqo_query_texts: ($new_fs_samples_count, $fs_samples_count, $pgb_fs_samples_count), + aqo_query_stat: ($new_stat_count, $stat_count, $pgb_stat_count)"); # Check total number of rows in AQO knowledge base after removing of # pgbench-related data. -is($new_fs_count == $fs_count - $pgb_fs_count, 1, 'Total number of feature spaces'); -is($new_fss_count == $fss_count - $pgb_fss_count, 1, 'Total number of feature subspaces'); -is($new_fs_samples_count == $fs_samples_count - $pgb_fs_samples_count, 1, 'Total number of samples in aqo_query_texts'); -is($new_stat_count == $stat_count - $pgb_stat_count, 1, 'Total number of samples in aqo_query_texts'); +is($new_fs_count == $fs_count - $pgb_fs_count, 1, + 'Total number of feature spaces'); +is($new_fss_count == $fss_count - $pgb_fss_count, 1, + 'Total number of feature subspaces'); +is($new_fs_samples_count == $fs_samples_count - $pgb_fs_samples_count, 1, + 'Total number of samples in aqo_query_texts'); +is($new_stat_count == $stat_count - $pgb_stat_count, 1, + 'Total number of samples in aqo_query_stat'); + +# ############################################################################## +# +# AQO works after moving to another schema +# +# ############################################################################## + +# Move the extension to not-in-search-path schema +# use LEARN mode to guarantee that AQO will be triggered on each query. +$node->safe_psql('postgres', "CREATE SCHEMA test; ALTER EXTENSION aqo SET SCHEMA test"); +$node->safe_psql('postgres', "SELECT * FROM test.aqo_reset()"); # Clear data + +$res = $node->safe_psql('postgres', "SELECT count(*) FROM test.aqo_queries"); +is($res, 1, 'The extension data was reset'); + +$node->command_ok([ 'pgbench', '-i', '-s', '1' ], 'init pgbench tables'); +$node->safe_psql('postgres', " + ALTER SYSTEM SET aqo.mode = 'learn'; + ALTER SYSTEM SET log_statement = 'ddl'; + SELECT pg_reload_conf(); +"); +$node->restart(); + +$node->command_ok([ 'pgbench', '-t', "25", '-c', "$CLIENTS", '-j', "$THREADS" ], + 'pgbench should work with moved AQO.'); + +# DEBUG +$res = $node->safe_psql('postgres', " + SELECT executions_with_aqo, query_text + FROM test.aqo_query_stat a, test.aqo_query_texts b + WHERE a.queryid = b.queryid +"); +note("executions:\n$res\n"); + +$res = $node->safe_psql('postgres', + "SELECT sum(executions_with_aqo) FROM test.aqo_query_stat"); + +# 25 trans * 10 clients * 4 query classes = 1000 + unique SELECT to pgbench_branches +is($res, 1001, 'Each query should be logged in LEARN mode'); +$res = $node->safe_psql('postgres', + "SELECT sum(executions_without_aqo) FROM test.aqo_query_stat"); +is($res, 0, 'AQO has learned on the queries - 2'); + +# Try to call UI functions. Break the test on an error +$res = $node->safe_psql('postgres', " + SELECT * FROM test.aqo_cardinality_error(true); + SELECT * FROM test.aqo_execution_time(true); + SELECT * FROM + (SELECT queryid FROM test.aqo_queries WHERE queryid<>0 LIMIT 1) q, + LATERAL test.aqo_drop_class(queryid); + SELECT * FROM test.aqo_cleanup(); +"); +note("OUTPUT:\n$res\n"); $node->safe_psql('postgres', "DROP EXTENSION aqo"); @@ -322,7 +386,7 @@ $node->safe_psql('postgres', " CREATE EXTENSION aqo; ALTER SYSTEM SET aqo.mode = 'intelligent'; - ALTER SYSTEM SET log_statement = 'all'; + ALTER SYSTEM SET log_statement = 'none'; SELECT pg_reload_conf(); "); $node->restart(); diff --git a/t/002_pg_stat_statements_aqo.pl b/t/002_pg_stat_statements_aqo.pl index c0bc5127..dfa84b3a 100644 --- a/t/002_pg_stat_statements_aqo.pl +++ b/t/002_pg_stat_statements_aqo.pl @@ -3,7 +3,7 @@ use PostgresNode; use TestLib; -use Test::More tests => 3; +use Test::More tests => 2; my $node = get_new_node('profiling'); $node->init; @@ -15,6 +15,7 @@ aqo.profile_enable = 'true' aqo.force_collect_stat = 'false' log_statement = 'ddl' # reduce size of logs. + aqo.join_threshold = 0 }); # Test constants. my $TRANSACTIONS = 100; @@ -27,7 +28,7 @@ my $total_classes; $node->start(); # ERROR: AQO allow to load library only on startup -print "create extantion aqo"; +print "Create extension aqo"; $node->psql('postgres', "CREATE EXTENSION aqo"); $node->psql('postgres', "CREATE EXTENSION pg_stat_statements"); print "create preload libraries"; @@ -56,11 +57,5 @@ $res = $node->safe_psql('postgres', "SELECT count(*) FROM pg_stat_statements where query = 'SELECT * FROM aqo_test0'"); is($res, 1); # The same query add in pg_stat_statements $res = $node->safe_psql('postgres', "SELECT count(*) from aqo_query_texts where query_text = 'SELECT * FROM aqo_test0'"); -is($res, 0); # The same query isn't add in aqo_query_texts -$query_id = $node->safe_psql('postgres', "SELECT queryid FROM pg_stat_statements where query = 'SELECT * FROM aqo_test0'"); -$res = $node->safe_psql('postgres', "insert into aqo_queries values ($query_id,'f','f',$query_id,'f')"); -# Add query in aqo_query_texts -$res = $node->safe_psql('postgres', "insert into aqo_query_texts values ($query_id,'SELECT * FROM aqo_test0')"); -$res = $node->safe_psql('postgres', "SELECT count(*) from aqo_query_texts where query_text = 'SELECT * FROM aqo_test0'"); # The same query is in aqo_query_texts -is($res, 1); +is($res, 0); # The same query isn't added into aqo_query_texts $node->stop(); \ No newline at end of file diff --git a/utils.c b/utils.c index 3fda40d6..029af9ab 100644 --- a/utils.c +++ b/utils.c @@ -114,46 +114,3 @@ inverse_permutation(int *idx, int n) inv[idx[i]] = i; return inv; } - -/* - * Allocates empty QueryStat object. - */ -QueryStat * -palloc_query_stat(void) -{ - QueryStat *res; - MemoryContext oldCxt; - - oldCxt = MemoryContextSwitchTo(AQOMemoryContext); - res = palloc0(sizeof(QueryStat)); - res->execution_time_with_aqo = palloc0(aqo_stat_size * - sizeof(res->execution_time_with_aqo[0])); - res->execution_time_without_aqo = palloc0(aqo_stat_size * - sizeof(res->execution_time_without_aqo[0])); - res->planning_time_with_aqo = palloc0(aqo_stat_size * - sizeof(res->planning_time_with_aqo[0])); - res->planning_time_without_aqo = palloc0(aqo_stat_size * - sizeof(res->planning_time_without_aqo[0])); - res->cardinality_error_with_aqo = palloc0(aqo_stat_size * - sizeof(res->cardinality_error_with_aqo[0])); - res->cardinality_error_without_aqo = palloc0(aqo_stat_size * - sizeof(res->cardinality_error_without_aqo[0])); - MemoryContextSwitchTo(oldCxt); - - return res; -} - -/* - * Frees QueryStat object. - */ -void -pfree_query_stat(QueryStat * stat) -{ - pfree(stat->execution_time_with_aqo); - pfree(stat->execution_time_without_aqo); - pfree(stat->planning_time_with_aqo); - pfree(stat->planning_time_without_aqo); - pfree(stat->cardinality_error_with_aqo); - pfree(stat->cardinality_error_without_aqo); - pfree(stat); -} From 9e06e1d0e0151f65fc112cf993c40ecb819f1941 Mon Sep 17 00:00:00 2001 From: Alexandra Pervushina Date: Fri, 30 Sep 2022 13:43:29 +0000 Subject: [PATCH 048/134] Fix version in c-cpp.yml --- .github/workflows/c-cpp.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 3c987855..8a83eb53 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -1,10 +1,10 @@ -name: 'C/C++ CI for the stable13' +name: 'C/C++ CI for the stable14' on: push: - branches: [ stable13 ] + branches: [ stable14 ] pull_request: - branches: [ stable13 ] + branches: [ stable14 ] jobs: build: @@ -19,9 +19,9 @@ jobs: git config --global user.name "CI PgPro admin" git clone https://github.com/postgres/postgres.git pg cd pg - git checkout REL_13_STABLE + git checkout REL_14_STABLE ./configure --prefix=`pwd`/tmp_install git subtree add --prefix=contrib/aqo https://github.com/postgrespro/aqo.git $GITHUB_REF - patch -p1 --no-backup-if-mismatch < contrib/aqo/aqo_pg13.patch + patch -p1 --no-backup-if-mismatch < contrib/aqo/aqo_pg14.patch make -j4 > /dev/null && make -j4 -C contrib > /dev/null env CLIENTS=50 THREADS=50 make -C contrib/aqo check From d7e18d5bbc6c388d9b593661e38dc282ff6478af Mon Sep 17 00:00:00 2001 From: Alexandra Pervushina Date: Thu, 6 Oct 2022 11:59:25 +0000 Subject: [PATCH 049/134] Fix aqo_cleanup() return value; Fix tests that use aqo_cleanup --- aqo--1.4--1.5.sql | 1 + expected/aqo_learn.out | 2 +- expected/clean_aqo_data.out | 8 ++++---- expected/gucs.out | 2 +- expected/temp_tables.out | 8 ++++---- expected/unsupported.out | 2 +- storage.c | 3 +-- 7 files changed, 13 insertions(+), 13 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index 3244a721..86f9cc98 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -144,6 +144,7 @@ COMMENT ON FUNCTION aqo_drop_class(bigint) IS -- Returns number of deleted rows from aqo_queries and aqo_data tables. -- CREATE OR REPLACE FUNCTION aqo_cleanup(OUT nfs integer, OUT nfss integer) +RETURNS SETOF record AS 'MODULE_PATHNAME', 'aqo_cleanup' LANGUAGE C STRICT VOLATILE; COMMENT ON FUNCTION aqo_cleanup() IS diff --git a/expected/aqo_learn.out b/expected/aqo_learn.out index aed72fc9..db117a0c 100644 --- a/expected/aqo_learn.out +++ b/expected/aqo_learn.out @@ -237,7 +237,7 @@ SELECT count(*) FROM tmp1; -- Remove data on some unneeded instances of tmp1 table. SELECT * FROM aqo_cleanup(); - nfs | nfss + nfs | nfss -----+------ 9 | 18 (1 row) diff --git a/expected/clean_aqo_data.out b/expected/clean_aqo_data.out index a954bac3..e66f274b 100644 --- a/expected/clean_aqo_data.out +++ b/expected/clean_aqo_data.out @@ -12,7 +12,7 @@ SELECT * FROM a; SELECT 'a'::regclass::oid AS a_oid \gset SELECT true FROM aqo_cleanup(); - bool + bool ------ t (1 row) @@ -55,7 +55,7 @@ SELECT count(*) FROM aqo_query_stat WHERE DROP TABLE a; SELECT true FROM aqo_cleanup(); - bool + bool ------ t (1 row) @@ -176,7 +176,7 @@ SELECT count(*) FROM aqo_query_stat WHERE DROP TABLE a; SELECT true FROM aqo_cleanup(); - bool + bool ------ t (1 row) @@ -254,7 +254,7 @@ SELECT count(*) FROM aqo_query_stat WHERE DROP TABLE b; SELECT true FROM aqo_cleanup(); - bool + bool ------ t (1 row) diff --git a/expected/gucs.out b/expected/gucs.out index bbfd8001..7528c67b 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -93,7 +93,7 @@ SELECT obj_description('aqo_reset'::regproc::oid); List of functions Schema | Name | Result data type | Argument data types | Type --------+-------------+------------------+-----------------------------------+------ - public | aqo_cleanup | record | OUT nfs integer, OUT nfss integer | func + public | aqo_cleanup | SETOF record | OUT nfs integer, OUT nfss integer | func (1 row) \df aqo_reset diff --git a/expected/temp_tables.out b/expected/temp_tables.out index d0656056..cb1da23f 100644 --- a/expected/temp_tables.out +++ b/expected/temp_tables.out @@ -49,20 +49,20 @@ SELECT count(*) FROM aqo_data; -- Don't bother about false negatives because of DROP TABLE tt; SELECT * FROM aqo_cleanup(); - nfs | nfss + nfs | nfss -----+------ 0 | 0 (1 row) SELECT count(*) FROM aqo_data; -- Should return the same as previous call above - count + count ------- 10 (1 row) DROP TABLE pt; SELECT * FROM aqo_cleanup(); - nfs | nfss + nfs | nfss -----+------ 3 | 10 (1 row) @@ -134,7 +134,7 @@ SELECT * FROM check_estimated_rows(' SET aqo.mode = 'forced'; -- Now we use all fss records for each query DROP TABLE pt; SELECT * FROM aqo_cleanup(); - nfs | nfss + nfs | nfss -----+------ 2 | 5 (1 row) diff --git a/expected/unsupported.out b/expected/unsupported.out index a28db16c..8e29b597 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -599,7 +599,7 @@ SELECT count(*) FROM aqo_data; -- Just to detect some changes in the logic. May (1 row) SELECT * FROM aqo_cleanup(); - nfs | nfss + nfs | nfss -----+------ 12 | 42 (1 row) diff --git a/storage.c b/storage.c index 47369c20..b71d0e90 100644 --- a/storage.c +++ b/storage.c @@ -105,7 +105,6 @@ PG_FUNCTION_INFO_V1(aqo_drop_class); PG_FUNCTION_INFO_V1(aqo_cardinality_error); PG_FUNCTION_INFO_V1(aqo_execution_time); - bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool isSafe) { @@ -2187,7 +2186,7 @@ aqo_cleanup(PG_FUNCTION_ARGS) values[1] = Int32GetDatum(fss_num); tuplestore_putvalues(tupstore, tupDesc, values, nulls); tuplestore_donestoring(tupstore); - return (Datum) 0; + PG_RETURN_VOID(); } /* From f02343210d0a1dcfb9a4c4bbc7f13fe00e4e2ed8 Mon Sep 17 00:00:00 2001 From: Alexandra Pervushina Date: Thu, 6 Oct 2022 12:03:10 +0000 Subject: [PATCH 050/134] Fix patch to apply cleanly --- aqo_pg14.patch | 161 +++++++++++++++++++++++++------------------------ 1 file changed, 83 insertions(+), 78 deletions(-) diff --git a/aqo_pg14.patch b/aqo_pg14.patch index d43e24f4..b211df01 100644 --- a/aqo_pg14.patch +++ b/aqo_pg14.patch @@ -1,5 +1,5 @@ diff --git a/contrib/Makefile b/contrib/Makefile -index 1846d415b6..95519ac11d 100644 +index f27e458482e..0c621919045 100644 --- a/contrib/Makefile +++ b/contrib/Makefile @@ -7,6 +7,7 @@ include $(top_builddir)/src/Makefile.global @@ -11,18 +11,18 @@ index 1846d415b6..95519ac11d 100644 auto_explain \ bloom \ diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c -index bc05c96b4c..b6a3abe0d2 100644 +index 70551522dac..d9cca82fe84 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c -@@ -24,6 +24,7 @@ - #include "nodes/extensible.h" +@@ -25,6 +25,7 @@ #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" + #include "parser/analyze.h" +#include "optimizer/cost.h" #include "parser/parsetree.h" #include "rewrite/rewriteHandler.h" #include "storage/bufmgr.h" -@@ -46,6 +47,12 @@ ExplainOneQuery_hook_type ExplainOneQuery_hook = NULL; +@@ -47,6 +48,12 @@ ExplainOneQuery_hook_type ExplainOneQuery_hook = NULL; /* Hook for plugins to get control in explain_get_index_name() */ explain_get_index_name_hook_type explain_get_index_name_hook = NULL; @@ -35,7 +35,7 @@ index bc05c96b4c..b6a3abe0d2 100644 /* OR-able flags for ExplainXMLTag() */ #define X_OPENING 0 -@@ -638,6 +645,10 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, +@@ -676,6 +683,10 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, ExplainPropertyFloat("Execution Time", "ms", 1000.0 * totaltime, 3, es); @@ -46,7 +46,7 @@ index bc05c96b4c..b6a3abe0d2 100644 ExplainCloseGroup("Query", NULL, true, es); } -@@ -1612,6 +1623,9 @@ ExplainNode(PlanState *planstate, List *ancestors, +@@ -1658,6 +1669,9 @@ ExplainNode(PlanState *planstate, List *ancestors, } } @@ -57,7 +57,7 @@ index bc05c96b4c..b6a3abe0d2 100644 if (es->format == EXPLAIN_FORMAT_TEXT) appendStringInfoChar(es->str, '\n'); diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c -index 682b28ed72..3a5c615deb 100644 +index 4d9746d54a0..6fa85d1c71f 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -132,6 +132,7 @@ CopyPlanFields(const Plan *from, Plan *newnode) @@ -69,7 +69,7 @@ index 682b28ed72..3a5c615deb 100644 /* diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c -index 7237b52e96..5e2ee2732a 100644 +index 58c2590698c..1e06738a137 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -342,6 +342,7 @@ _outPlanInfo(StringInfo str, const Plan *node) @@ -81,10 +81,10 @@ index 7237b52e96..5e2ee2732a 100644 /* diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c -index 62c945b6c5..a39046ca56 100644 +index eaa51c5c062..6ad8b78c7d5 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c -@@ -1580,6 +1580,11 @@ ReadCommonPlan(Plan *local_node) +@@ -1628,6 +1628,11 @@ ReadCommonPlan(Plan *local_node) READ_NODE_FIELD(initPlan); READ_BITMAPSET_FIELD(extParam); READ_BITMAPSET_FIELD(allParam); @@ -97,23 +97,22 @@ index 62c945b6c5..a39046ca56 100644 /* diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c -index 4edc859cb5..988f2e6ab7 100644 +index 006f91f0a87..ef9c8ec5817 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c -@@ -98,6 +98,12 @@ +@@ -98,6 +98,11 @@ + #include "utils/spccache.h" #include "utils/tuplesort.h" - +set_baserel_rows_estimate_hook_type set_baserel_rows_estimate_hook = NULL; +set_foreign_rows_estimate_hook_type set_foreign_rows_estimate_hook = NULL; +get_parameterized_baserel_size_hook_type get_parameterized_baserel_size_hook = NULL; +get_parameterized_joinrel_size_hook_type get_parameterized_joinrel_size_hook = NULL; +set_joinrel_size_estimates_hook_type set_joinrel_size_estimates_hook = NULL; -+ - /* source-code-compatibility hacks for pull_varnos() API change */ - #define pull_varnos(a,b) pull_varnos_new(a,b) -@@ -181,7 +187,6 @@ static Cost append_nonpartial_cost(List *subpaths, int numpaths, + #define LOG2(x) (log(x) / 0.693147180559945) + +@@ -188,7 +193,6 @@ static Cost append_nonpartial_cost(List *subpaths, int numpaths, static void set_rel_width(PlannerInfo *root, RelOptInfo *rel); static double relation_byte_size(double tuples, int width); static double page_size(double tuples, int width); @@ -121,7 +120,7 @@ index 4edc859cb5..988f2e6ab7 100644 /* -@@ -4632,6 +4637,58 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) +@@ -4911,6 +4915,58 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) } @@ -180,7 +179,7 @@ index 4edc859cb5..988f2e6ab7 100644 /* * set_baserel_size_estimates * Set the size estimates for the given base relation. -@@ -4648,19 +4705,10 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) +@@ -4927,19 +4983,10 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) void set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) { @@ -201,7 +200,7 @@ index 4edc859cb5..988f2e6ab7 100644 cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); -@@ -4671,13 +4719,33 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) +@@ -4950,13 +4997,33 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) * get_parameterized_baserel_size * Make a size estimate for a parameterized scan of a base relation. * @@ -237,7 +236,7 @@ index 4edc859cb5..988f2e6ab7 100644 { List *allclauses; double nrows; -@@ -4706,6 +4774,36 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, +@@ -4985,6 +5052,36 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, * set_joinrel_size_estimates * Set the size estimates for the given join relation. * @@ -274,7 +273,7 @@ index 4edc859cb5..988f2e6ab7 100644 * The rel's targetlist must have been constructed already, and a * restriction clause list that matches the given component rels must * be provided. -@@ -4725,11 +4823,11 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, +@@ -5004,11 +5101,11 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, * build_joinrel_tlist, and baserestrictcost is not used for join rels. */ void @@ -291,7 +290,7 @@ index 4edc859cb5..988f2e6ab7 100644 { rel->rows = calc_joinrel_size_estimate(root, rel, -@@ -4745,6 +4843,35 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, +@@ -5024,6 +5121,35 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, * get_parameterized_joinrel_size * Make a size estimate for a parameterized scan of a join relation. * @@ -327,7 +326,7 @@ index 4edc859cb5..988f2e6ab7 100644 * 'rel' is the joinrel under consideration. * 'outer_path', 'inner_path' are (probably also parameterized) Paths that * produce the relations being joined. -@@ -4757,11 +4884,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, +@@ -5036,11 +5162,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, * set_joinrel_size_estimates must have been applied already. */ double @@ -344,7 +343,7 @@ index 4edc859cb5..988f2e6ab7 100644 { double nrows; -@@ -5430,7 +5557,7 @@ set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel) +@@ -5756,7 +5882,7 @@ set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel) /* Should only be applied to base relations */ Assert(rel->relid > 0); @@ -353,7 +352,7 @@ index 4edc859cb5..988f2e6ab7 100644 cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); -@@ -5716,7 +5843,7 @@ page_size(double tuples, int width) +@@ -6042,7 +6168,7 @@ page_size(double tuples, int width) * Estimate the fraction of the work that each worker will do given the * number of workers budgeted for the path. */ @@ -363,10 +362,10 @@ index 4edc859cb5..988f2e6ab7 100644 { double parallel_divisor = path->parallel_workers; diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c -index 917713c163..5b7bf1cec6 100644 +index 0ed858f305a..9d4a6c59030 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c -@@ -70,6 +70,7 @@ +@@ -71,6 +71,7 @@ #define CP_LABEL_TLIST 0x0004 /* tlist must contain sortgrouprefs */ #define CP_IGNORE_TLIST 0x0008 /* caller will replace tlist */ @@ -374,7 +373,7 @@ index 917713c163..5b7bf1cec6 100644 static Plan *create_plan_recurse(PlannerInfo *root, Path *best_path, int flags); -@@ -524,6 +525,10 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags) +@@ -545,6 +546,10 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags) break; } @@ -385,7 +384,7 @@ index 917713c163..5b7bf1cec6 100644 return plan; } -@@ -5163,6 +5168,7 @@ copy_generic_path_info(Plan *dest, Path *src) +@@ -5323,6 +5328,7 @@ copy_generic_path_info(Plan *dest, Path *src) dest->plan_width = src->pathtarget->width; dest->parallel_aware = src->parallel_aware; dest->parallel_safe = src->parallel_safe; @@ -394,10 +393,10 @@ index 917713c163..5b7bf1cec6 100644 /* diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c -index 60e7fda6a9..5732c7a685 100644 +index 70899e5430e..dac6132af54 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c -@@ -145,7 +145,8 @@ static List *extract_rollup_sets(List *groupingSets); +@@ -143,7 +143,8 @@ static List *extract_rollup_sets(List *groupingSets); static List *reorder_grouping_sets(List *groupingSets, List *sortclause); static void standard_qp_callback(PlannerInfo *root, void *extra); static double get_number_of_groups(PlannerInfo *root, @@ -407,7 +406,7 @@ index 60e7fda6a9..5732c7a685 100644 grouping_sets_data *gd, List *target_list); static RelOptInfo *create_grouping_paths(PlannerInfo *root, -@@ -3682,7 +3683,8 @@ standard_qp_callback(PlannerInfo *root, void *extra) +@@ -3151,7 +3152,8 @@ standard_qp_callback(PlannerInfo *root, void *extra) */ static double get_number_of_groups(PlannerInfo *root, @@ -417,36 +416,36 @@ index 60e7fda6a9..5732c7a685 100644 grouping_sets_data *gd, List *target_list) { -@@ -3719,7 +3721,7 @@ get_number_of_groups(PlannerInfo *root, +@@ -3188,7 +3190,7 @@ get_number_of_groups(PlannerInfo *root, GroupingSetData *gs = lfirst_node(GroupingSetData, lc2); double numGroups = estimate_num_groups(root, groupExprs, - path_rows, + subpath->rows, - &gset); + &gset, + NULL); - gs->numGroups = numGroups; -@@ -3744,7 +3746,7 @@ get_number_of_groups(PlannerInfo *root, +@@ -3214,7 +3216,7 @@ get_number_of_groups(PlannerInfo *root, GroupingSetData *gs = lfirst_node(GroupingSetData, lc2); double numGroups = estimate_num_groups(root, groupExprs, - path_rows, + subpath->rows, - &gset); + &gset, + NULL); - gs->numGroups = numGroups; -@@ -3760,8 +3762,8 @@ get_number_of_groups(PlannerInfo *root, +@@ -3231,8 +3233,8 @@ get_number_of_groups(PlannerInfo *root, groupExprs = get_sortgrouplist_exprs(parse->groupClause, target_list); - dNumGroups = estimate_num_groups(root, groupExprs, path_rows, -- NULL); +- NULL, NULL); + dNumGroups = estimate_num_groups_ext(root, groupExprs, subpath, + grouped_rel, NULL); } } else if (parse->groupingSets) -@@ -4147,7 +4149,8 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, +@@ -3619,7 +3621,8 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, * Estimate number of groups. */ dNumGroups = get_number_of_groups(root, @@ -456,7 +455,7 @@ index 60e7fda6a9..5732c7a685 100644 gd, extra->targetList); -@@ -6931,13 +6934,15 @@ create_partial_grouping_paths(PlannerInfo *root, +@@ -6425,13 +6428,15 @@ create_partial_grouping_paths(PlannerInfo *root, if (cheapest_total_path != NULL) dNumPartialGroups = get_number_of_groups(root, @@ -475,13 +474,13 @@ index 60e7fda6a9..5732c7a685 100644 extra->targetList); diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c -index a203e6f1ff..d31bf5bae6 100644 +index e105a4d5f1d..c5bcc9d1d15 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -258,6 +258,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) + rel->all_partrels = NULL; rel->partexprs = NULL; rel->nullable_partexprs = NULL; - rel->partitioned_child_rels = NIL; + rel->ext_nodes = NULL; /* @@ -494,23 +493,23 @@ index a203e6f1ff..d31bf5bae6 100644 elog(ERROR, "no relation entry for relid %d", relid); return NULL; /* keep compiler quiet */ -@@ -673,6 +673,7 @@ build_join_rel(PlannerInfo *root, +@@ -672,6 +672,7 @@ build_join_rel(PlannerInfo *root, + joinrel->all_partrels = NULL; joinrel->partexprs = NULL; joinrel->nullable_partexprs = NULL; - joinrel->partitioned_child_rels = NIL; + joinrel->ext_nodes = NULL; /* Compute information relevant to the foreign relations. */ set_foreign_rel_properties(joinrel, outer_rel, inner_rel); -@@ -851,6 +852,7 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, +@@ -850,6 +851,7 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, + joinrel->all_partrels = NULL; joinrel->partexprs = NULL; joinrel->nullable_partexprs = NULL; - joinrel->partitioned_child_rels = NIL; + joinrel->ext_nodes = NULL; joinrel->top_parent_relids = bms_union(outer_rel->top_parent_relids, inner_rel->top_parent_relids); -@@ -1264,6 +1266,7 @@ find_childrel_parents(PlannerInfo *root, RelOptInfo *rel) +@@ -1279,6 +1281,7 @@ find_childrel_parents(PlannerInfo *root, RelOptInfo *rel) } @@ -518,7 +517,7 @@ index a203e6f1ff..d31bf5bae6 100644 /* * get_baserel_parampathinfo * Get the ParamPathInfo for a parameterized path for a base relation, -@@ -1332,6 +1335,10 @@ get_baserel_parampathinfo(PlannerInfo *root, RelOptInfo *baserel, +@@ -1347,6 +1350,10 @@ get_baserel_parampathinfo(PlannerInfo *root, RelOptInfo *baserel, ppi->ppi_req_outer = required_outer; ppi->ppi_rows = rows; ppi->ppi_clauses = pclauses; @@ -529,7 +528,7 @@ index a203e6f1ff..d31bf5bae6 100644 baserel->ppilist = lappend(baserel->ppilist, ppi); return ppi; -@@ -1557,6 +1564,10 @@ get_joinrel_parampathinfo(PlannerInfo *root, RelOptInfo *joinrel, +@@ -1572,6 +1579,10 @@ get_joinrel_parampathinfo(PlannerInfo *root, RelOptInfo *joinrel, ppi->ppi_req_outer = required_outer; ppi->ppi_rows = rows; ppi->ppi_clauses = NIL; @@ -541,10 +540,16 @@ index a203e6f1ff..d31bf5bae6 100644 return ppi; diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c -index 821844ada3..85b2482114 100644 +index 962dec6d504..899ee2bf4c5 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c -@@ -147,6 +147,7 @@ +@@ -1,4 +1,4 @@ +-/*------------------------------------------------------------------------- ++ /*------------------------------------------------------------------------- + * + * selfuncs.c + * Selectivity functions and index cost estimation functions for +@@ -143,6 +143,7 @@ /* Hooks for plugins to get control when we ask for stats */ get_relation_stats_hook_type get_relation_stats_hook = NULL; get_index_stats_hook_type get_index_stats_hook = NULL; @@ -552,7 +557,7 @@ index 821844ada3..85b2482114 100644 static double eqsel_internal(PG_FUNCTION_ARGS, bool negate); static double eqjoinsel_inner(Oid opfuncoid, Oid collation, -@@ -3295,6 +3296,19 @@ add_unique_group_var(PlannerInfo *root, List *varinfos, +@@ -3293,6 +3294,19 @@ add_unique_group_var(PlannerInfo *root, List *varinfos, return varinfos; } @@ -566,14 +571,14 @@ index 821844ada3..85b2482114 100644 + return (*estimate_num_groups_hook)(root, groupExprs, subpath, + grouped_rel, pgset); + -+ return estimate_num_groups(root, groupExprs, input_rows, pgset); ++ return estimate_num_groups(root, groupExprs, input_rows, pgset, NULL); +} + /* * estimate_num_groups - Estimate number of groups in a grouped query * diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h -index ba661d32a6..09d0abe58b 100644 +index e94d9e49cf6..49236ced77c 100644 --- a/src/include/commands/explain.h +++ b/src/include/commands/explain.h @@ -75,6 +75,18 @@ extern PGDLLIMPORT ExplainOneQuery_hook_type ExplainOneQuery_hook; @@ -596,10 +601,10 @@ index ba661d32a6..09d0abe58b 100644 extern void ExplainQuery(ParseState *pstate, ExplainStmt *stmt, ParamListInfo params, DestReceiver *dest); diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h -index 5ebf070979..5b2acd7de2 100644 +index f16466a0df1..8f0ed706817 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h -@@ -739,6 +739,10 @@ typedef struct RelOptInfo +@@ -756,6 +756,10 @@ typedef struct RelOptInfo Relids top_parent_relids; /* Relids of topmost parents (if "other" * rel) */ @@ -610,10 +615,10 @@ index 5ebf070979..5b2acd7de2 100644 /* used for partitioned relations: */ PartitionScheme part_scheme; /* Partitioning scheme */ int nparts; /* Number of partitions; -1 if not yet set; in -@@ -754,6 +758,12 @@ typedef struct RelOptInfo +@@ -770,6 +774,12 @@ typedef struct RelOptInfo + Relids all_partrels; /* Relids set of all partition relids */ List **partexprs; /* Non-nullable partition key expressions */ List **nullable_partexprs; /* Nullable partition key expressions */ - List *partitioned_child_rels; /* List of RT indexes */ + + /* + * At this list an extension can add additional nodes to pass an info along @@ -623,7 +628,7 @@ index 5ebf070979..5b2acd7de2 100644 } RelOptInfo; /* -@@ -1105,6 +1115,10 @@ typedef struct ParamPathInfo +@@ -1138,6 +1148,10 @@ typedef struct ParamPathInfo Relids ppi_req_outer; /* rels supplying parameters used by path */ double ppi_rows; /* estimated number of result tuples */ List *ppi_clauses; /* join clauses available from outer rels */ @@ -635,10 +640,10 @@ index 5ebf070979..5b2acd7de2 100644 diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h -index 90f02ce6fd..f3e2138ee2 100644 +index 2308c80ddee..dc74cf85bd2 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h -@@ -159,6 +159,12 @@ typedef struct Plan +@@ -158,6 +158,12 @@ typedef struct Plan */ Bitmapset *extParam; Bitmapset *allParam; @@ -652,7 +657,7 @@ index 90f02ce6fd..f3e2138ee2 100644 /* ---------------- diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h -index 6141654e47..e6b28cbb05 100644 +index 2113bc82de0..bcc2520cec5 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -39,6 +39,37 @@ typedef enum @@ -693,7 +698,7 @@ index 6141654e47..e6b28cbb05 100644 /* * prototypes for costsize.c * routines to compute costs and sizes -@@ -175,10 +206,22 @@ extern void compute_semi_anti_join_factors(PlannerInfo *root, +@@ -180,10 +211,22 @@ extern void compute_semi_anti_join_factors(PlannerInfo *root, SpecialJoinInfo *sjinfo, List *restrictlist, SemiAntiJoinFactors *semifactors); @@ -716,7 +721,7 @@ index 6141654e47..e6b28cbb05 100644 extern double get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, Path *outer_path, -@@ -190,6 +233,11 @@ extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, +@@ -195,6 +238,11 @@ extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, RelOptInfo *inner_rel, SpecialJoinInfo *sjinfo, List *restrictlist); @@ -728,7 +733,7 @@ index 6141654e47..e6b28cbb05 100644 extern void set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel); extern void set_function_size_estimates(PlannerInfo *root, RelOptInfo *rel); extern void set_values_size_estimates(PlannerInfo *root, RelOptInfo *rel); -@@ -202,5 +250,6 @@ extern void set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel); +@@ -207,5 +255,6 @@ extern void set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel); extern PathTarget *set_pathtarget_cost_width(PlannerInfo *root, PathTarget *target); extern double compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual, int loop_count, Cost *cost, double *tuple); @@ -736,7 +741,7 @@ index 6141654e47..e6b28cbb05 100644 #endif /* COST_H */ diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h -index 3bd7072ae8..21bbaba11c 100644 +index 2922c0cdc14..c59dce6989e 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -18,6 +18,10 @@ @@ -751,7 +756,7 @@ index 3bd7072ae8..21bbaba11c 100644 * prototypes for pathnode.c */ diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h -index 8ce60e202e..75415102c2 100644 +index bf1adfc52ac..9c78e0f4e02 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -24,6 +24,12 @@ extern double cursor_tuple_fraction; @@ -768,10 +773,10 @@ index 8ce60e202e..75415102c2 100644 * prototypes for plan/planmain.c */ diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h -index 7ac4a06391..def3522881 100644 +index 9dd444e1ff5..37133340d84 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h -@@ -127,6 +127,12 @@ typedef bool (*get_index_stats_hook_type) (PlannerInfo *root, +@@ -144,6 +144,12 @@ typedef bool (*get_index_stats_hook_type) (PlannerInfo *root, AttrNumber indexattnum, VariableStatData *vardata); extern PGDLLIMPORT get_index_stats_hook_type get_index_stats_hook; @@ -784,13 +789,13 @@ index 7ac4a06391..def3522881 100644 /* Functions in selfuncs.c */ -@@ -195,6 +201,9 @@ extern void mergejoinscansel(PlannerInfo *root, Node *clause, - +@@ -213,6 +219,9 @@ extern void mergejoinscansel(PlannerInfo *root, Node *clause, extern double estimate_num_groups(PlannerInfo *root, List *groupExprs, - double input_rows, List **pgset); + double input_rows, List **pgset, + EstimationInfo *estinfo); +extern double estimate_num_groups_ext(PlannerInfo *root, List *groupExprs, + Path *subpath, RelOptInfo *grouped_rel, + List **pgset); - + extern void estimate_hash_bucket_stats(PlannerInfo *root, - Node *hashkey, double nbuckets, + Node *hashkey, double nbuckets, From 304b46f23ebe58f404423374560f09043912ec46 Mon Sep 17 00:00:00 2001 From: Alexandra Pervushina Date: Thu, 6 Oct 2022 12:12:45 +0000 Subject: [PATCH 051/134] Be more careful with locks of relations and syscaches in get_list_of_relids() routine Switch on feature 'search on neighbour feature spaces' by a GUC (disabled by default). --- aqo.c | 13 ++++++++ aqo.h | 1 + aqo_shared.c | 18 ++++++++--- cardinality_estimation.c | 5 ++- conf.add | 1 + path_utils.c | 27 ++++++++++------ postprocessing.c | 2 -- storage.c | 66 +++++++++++++++++++++++++++++++--------- 8 files changed, 102 insertions(+), 31 deletions(-) diff --git a/aqo.c b/aqo.c index dcd130da..e0c4588f 100644 --- a/aqo.c +++ b/aqo.c @@ -213,6 +213,19 @@ _PG_init(void) NULL ); + DefineCustomBoolVariable( + "aqo.wide_search", + "Search ML data in neighbour feature spaces.", + NULL, + &use_wide_search, + false, + PGC_USERSET, + 0, + NULL, + NULL, + NULL + ); + DefineCustomIntVariable("aqo.join_threshold", "Sets the threshold of number of JOINs in query beyond which AQO is used.", NULL, diff --git a/aqo.h b/aqo.h index 64092b94..8cad51c2 100644 --- a/aqo.h +++ b/aqo.h @@ -173,6 +173,7 @@ extern bool force_collect_stat; extern bool aqo_show_hash; extern bool aqo_show_details; extern int aqo_join_threshold; +extern bool use_wide_search; /* Parameters for current query */ typedef struct QueryContextData diff --git a/aqo_shared.c b/aqo_shared.c index 2ec063e7..ac5c5aea 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -191,16 +191,18 @@ aqo_init_shmem(void) { /* First time through ... */ - LWLockInitialize(&aqo_state->lock, LWLockNewTrancheId()); aqo_state->dsm_handler = DSM_HANDLE_INVALID; - aqo_state->qtexts_dsa_handler = DSM_HANDLE_INVALID; + aqo_state->data_dsa_handler = DSM_HANDLE_INVALID; + aqo_state->qtext_trancheid = LWLockNewTrancheId(); + aqo_state->qtexts_changed = false; - aqo_state->data_dsa_handler = DSM_HANDLE_INVALID; + aqo_state->stat_changed = false; aqo_state->data_changed = false; aqo_state->queries_changed = false; + LWLockInitialize(&aqo_state->lock, LWLockNewTrancheId()); LWLockInitialize(&aqo_state->stat_lock, LWLockNewTrancheId()); LWLockInitialize(&aqo_state->qtexts_lock, LWLockNewTrancheId()); LWLockInitialize(&aqo_state->data_lock, LWLockNewTrancheId()); @@ -245,7 +247,7 @@ aqo_init_shmem(void) LWLockRegisterTranche(aqo_state->data_lock.tranche, "AQO Data Lock Tranche"); LWLockRegisterTranche(aqo_state->queries_lock.tranche, "AQO Queries Lock Tranche"); - if (!IsUnderPostmaster) + if (!IsUnderPostmaster && !found) { before_shmem_exit(on_shmem_shutdown, (Datum) 0); @@ -261,8 +263,16 @@ aqo_init_shmem(void) static void on_shmem_shutdown(int code, Datum arg) { + Assert(!IsUnderPostmaster); + + /* + * Save ML data to a permanent storage. Do it on postmaster shutdown only + * to save time. We can't do so for query_texts and aqo_data because of DSM + * limits. + */ aqo_stat_flush(); aqo_queries_flush(); + return; } Size diff --git a/cardinality_estimation.c b/cardinality_estimation.c index 4baba286..96cd2c70 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -24,6 +24,9 @@ #include "machine_learning.h" #include "storage.h" + +bool use_wide_search = false; + #ifdef AQO_DEBUG_PRINT static void predict_debug_output(List *clauses, List *selectivities, @@ -90,7 +93,7 @@ predict_for_relation(List *clauses, List *selectivities, List *relsigns, */ /* Try to search in surrounding feature spaces for the same node */ - if (!load_aqo_data(query_context.fspace_hash, *fss, data, NULL, true)) + if (!load_aqo_data(query_context.fspace_hash, *fss, data, NULL, use_wide_search)) result = -1; else { diff --git a/conf.add b/conf.add index ed455870..9e9d2336 100644 --- a/conf.add +++ b/conf.add @@ -1,3 +1,4 @@ autovacuum = off shared_preload_libraries = 'postgres_fdw, aqo' max_parallel_workers_per_gather = 1 # switch off parallel workers because of unsteadiness +aqo.wide_search = 'on' \ No newline at end of file diff --git a/path_utils.c b/path_utils.c index 7f30a7e2..c9c7e72f 100644 --- a/path_utils.c +++ b/path_utils.c @@ -155,6 +155,8 @@ hashTempTupleDesc(TupleDesc desc) return s; } +#include "storage/lmgr.h" + /* * Get list of relation indexes and prepare list of permanent table reloids, * list of temporary table reloids (can be changed between query launches) and @@ -177,6 +179,8 @@ get_list_of_relids(PlannerInfo *root, Relids relids, RelSortOut *rels) HeapTuple htup; Form_pg_class classForm; char *relname = NULL; + Oid relrewrite; + char relpersistence; entry = planner_rt_fetch(index, root); @@ -191,15 +195,23 @@ get_list_of_relids(PlannerInfo *root, Relids relids, RelSortOut *rels) if (!HeapTupleIsValid(htup)) elog(PANIC, "cache lookup failed for reloid %u", entry->relid); + /* Copy the fields from syscache and release the slot as quickly as possible. */ classForm = (Form_pg_class) GETSTRUCT(htup); + relpersistence = classForm->relpersistence; + relrewrite = classForm->relrewrite; + relname = pstrdup(NameStr(classForm->relname)); + ReleaseSysCache(htup); - if (classForm->relpersistence == RELPERSISTENCE_TEMP) + if (relpersistence == RELPERSISTENCE_TEMP) { /* The case of temporary table */ - Relation trel = relation_open(entry->relid, NoLock); - TupleDesc tdesc = RelationGetDescr(trel); + Relation trel; + TupleDesc tdesc; + trel = relation_open(entry->relid, NoLock); + tdesc = RelationGetDescr(trel); + Assert(CheckRelationLockedByMe(trel, AccessShareLock, true)); hashes = lappend_uint64(hashes, hashTempTupleDesc(tdesc)); relation_close(trel, NoLock); } @@ -207,18 +219,15 @@ get_list_of_relids(PlannerInfo *root, Relids relids, RelSortOut *rels) { /* The case of regular table */ relname = quote_qualified_identifier( - get_namespace_name(get_rel_namespace(entry->relid)), - classForm->relrewrite ? - get_rel_name(classForm->relrewrite) : - NameStr(classForm->relname)); + get_namespace_name(get_rel_namespace(entry->relid)), + relrewrite ? get_rel_name(relrewrite) : relname); + hashes = lappend_uint64(hashes, DatumGetInt64(hash_any_extended( (unsigned char *) relname, strlen(relname), 0))); hrels = lappend_oid(hrels, entry->relid); } - - ReleaseSysCache(htup); } rels->hrels = list_concat(rels->hrels, hrels); diff --git a/postprocessing.c b/postprocessing.c index 0202239b..8c9a2db9 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -171,7 +171,6 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, { List *lst = NIL; ListCell *l; - int i = 0; bool parametrized_sel; int nargs; int *args_hash; @@ -220,7 +219,6 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, Assert(cur_sel > 0); lst = lappend(lst, cur_sel); - i++; } if (parametrized_sel) diff --git a/storage.c b/storage.c index b71d0e90..fc5123ae 100644 --- a/storage.c +++ b/storage.c @@ -302,7 +302,9 @@ aqo_stat_store(uint64 queryid, bool use_aqo, entry->exec_time[pos] = exec_time; entry->est_error[pos] = est_error; } + entry = memcpy(palloc(sizeof(StatEntry)), entry, sizeof(StatEntry)); + aqo_state->stat_changed = true; LWLockRelease(&aqo_state->stat_lock); return entry; } @@ -424,14 +426,24 @@ aqo_stat_flush(void) int ret; long entries; - LWLockAcquire(&aqo_state->stat_lock, LW_SHARED); + /* Use exclusive lock to prevent concurrent flushing in different backends. */ + LWLockAcquire(&aqo_state->stat_lock, LW_EXCLUSIVE); + + if (!aqo_state->stat_changed) + /* Hash table wasn't changed, meaningless to store it in permanent storage */ + goto end; + entries = hash_get_num_entries(stat_htab); hash_seq_init(&hash_seq, stat_htab); ret = data_store(PGAQO_STAT_FILE, _form_stat_record_cb, entries, (void *) &hash_seq); if (ret != 0) hash_seq_term(&hash_seq); + else + /* Hash table and disk storage are now consistent */ + aqo_state->stat_changed = false; +end: LWLockRelease(&aqo_state->stat_lock); } @@ -468,7 +480,7 @@ aqo_qtexts_flush(void) long entries; dsa_init(); - LWLockAcquire(&aqo_state->qtexts_lock, LW_SHARED); + LWLockAcquire(&aqo_state->qtexts_lock, LW_EXCLUSIVE); if (!aqo_state->qtexts_changed) /* XXX: mull over forced mode. */ @@ -480,7 +492,9 @@ aqo_qtexts_flush(void) (void *) &hash_seq); if (ret != 0) hash_seq_term(&hash_seq); - aqo_state->qtexts_changed = false; + else + /* Hash table and disk storage are now consistent */ + aqo_state->qtexts_changed = false; end: LWLockRelease(&aqo_state->qtexts_lock); @@ -530,7 +544,7 @@ aqo_data_flush(void) long entries; dsa_init(); - LWLockAcquire(&aqo_state->data_lock, LW_SHARED); + LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); if (!aqo_state->data_changed) /* XXX: mull over forced mode. */ @@ -547,6 +561,7 @@ aqo_data_flush(void) */ hash_seq_term(&hash_seq); else + /* Hash table and disk storage are now consistent */ aqo_state->data_changed = false; end: LWLockRelease(&aqo_state->data_lock); @@ -573,14 +588,22 @@ aqo_queries_flush(void) int ret; long entries; - LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + + if (!aqo_state->queries_changed) + goto end; + entries = hash_get_num_entries(queries_htab); hash_seq_init(&hash_seq, queries_htab); ret = data_store(PGAQO_QUERIES_FILE, _form_queries_record_cb, entries, (void *) &hash_seq); if (ret != 0) hash_seq_term(&hash_seq); + else + /* Hash table and disk storage are now consistent */ + aqo_state->queries_changed = false; +end: LWLockRelease(&aqo_state->queries_lock); } @@ -620,7 +643,8 @@ data_store(const char *filename, form_record_t callback, goto error; } - (void) durable_rename(tmpfile, filename, LOG); + /* Parallel (re)writing into a file haven't happen. */ + (void) durable_rename(tmpfile, filename, PANIC); elog(LOG, "[AQO] %d records stored in file %s.", counter, filename); return 0; @@ -838,7 +862,7 @@ aqo_queries_load(void) LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); - /* Load on postmaster sturtup. So no any concurrent actions possible here. */ + /* Load on postmaster startup. So no any concurrent actions possible here. */ Assert(hash_get_num_entries(queries_htab) == 0); data_load(PGAQO_QUERIES_FILE, _deform_queries_record_cb, NULL); @@ -925,6 +949,9 @@ data_load(const char *filename, deform_record_t callback, void *ctx) static void on_shmem_shutdown(int code, Datum arg) { + /* + * XXX: It can be expensive to rewrite a file on each shutdown of a backend. + */ aqo_qtexts_flush(); aqo_data_flush(); } @@ -1200,6 +1227,7 @@ _aqo_data_remove(data_key *key) if (hash_search(data_htab, key, HASH_REMOVE, NULL) == NULL) elog(PANIC, "[AQO] Inconsistent data hash table"); + aqo_state->data_changed = true; } @@ -1269,8 +1297,9 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) char *ptr; ListCell *lc; size_t size; - bool tblOverflow; - HASHACTION action; + bool tblOverflow; + HASHACTION action; + bool result; Assert(!LWLockHeldByMe(&aqo_state->data_lock)); @@ -1321,7 +1350,6 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) } Assert(DsaPointerIsValid(entry->data_dp)); - Assert(entry->rows <= data->rows); /* Reserved for the future features */ if (entry->cols != data->cols || entry->nrels != list_length(reloids)) { @@ -1387,8 +1415,9 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) aqo_state->data_changed = true; end: + result = aqo_state->data_changed; LWLockRelease(&aqo_state->data_lock); - return aqo_state->data_changed; + return result; } static void @@ -1496,7 +1525,7 @@ load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, dsa_init(); - LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); + LWLockAcquire(&aqo_state->data_lock, LW_SHARED); if (!wideSearch) { @@ -1631,7 +1660,8 @@ aqo_data(PG_FUNCTION_ARGS) ptr += sizeof(data_key); if (entry->cols > 0) - values[AD_FEATURES] = PointerGetDatum(form_matrix((double *)ptr, entry->rows, entry->cols)); + values[AD_FEATURES] = PointerGetDatum(form_matrix((double *) ptr, + entry->rows, entry->cols)); else nulls[AD_FEATURES] = true; @@ -1719,7 +1749,9 @@ aqo_data_reset(void) elog(ERROR, "[AQO] hash table corrupted"); num_remove++; } - aqo_state->data_changed = true; + + if (num_remove > 0) + aqo_state->data_changed = true; LWLockRelease(&aqo_state->data_lock); if (num_remove != num_entries) elog(ERROR, "[AQO] Query ML memory storage is corrupted or parallel access without a lock has detected."); @@ -1831,6 +1863,7 @@ aqo_queries_store(uint64 queryid, entry->use_aqo = use_aqo; entry->auto_tuning = auto_tuning; + aqo_state->queries_changed = true; LWLockRelease(&aqo_state->queries_lock); return true; } @@ -1856,7 +1889,10 @@ aqo_queries_reset(void) elog(ERROR, "[AQO] hash table corrupted"); num_remove++; } - aqo_state->queries_changed = true; + + if (num_remove > 0) + aqo_state->queries_changed = true; + LWLockRelease(&aqo_state->queries_lock); if (num_remove != num_entries - 1) From 7c443affd81c59a69ee60e7a4aa302dd85a0e426 Mon Sep 17 00:00:00 2001 From: Alexandra Pervushina Date: Thu, 6 Oct 2022 12:15:19 +0000 Subject: [PATCH 052/134] Refactor machine dependent tests. --- expected/unsupported.out | 66 ++++++++++++++++++++-------------------- sql/unsupported.sql | 2 +- 2 files changed, 34 insertions(+), 34 deletions(-) diff --git a/expected/unsupported.out b/expected/unsupported.out index 8e29b597..dbdc1f7b 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -553,42 +553,42 @@ EXPLAIN (COSTS OFF) -- XXX: Do we stuck into an unstable behavior of an error value? -- Live with this variant of the test for some time. -SELECT to_char(error, '9.99EEEE')::text AS error, query_text +SELECT round(error::numeric, 3) AS error, query_text FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt WHERE aqt.queryid = cef.id ORDER BY (md5(query_text),error) DESC; - error | query_text ------------+------------------------------------------------------------------------------------------------ - 7.68e-01 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; - 7.04e-02 | SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; - 1.42e+00 | SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; - 0.00e+00 | SELECT * FROM + - | (SELECT * FROM t WHERE x < 0) AS t0 + - | JOIN + - | (SELECT * FROM t WHERE x > 20) AS t1 + - | USING(x); - 0.00e+00 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); - 0.00e+00 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + - | SELECT count(*) FROM t WHERE + - | x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR + - | x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); - 4.54e-01 | SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; - 0.00e+00 | SELECT count(*) FROM ( + - | SELECT count(*) AS x FROM ( + - | SELECT count(*) FROM t1 GROUP BY (x,y) + - | ) AS q1 + - | ) AS q2 + - | WHERE q2.x > 1; - 7.68e-01 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,x*y)) AS q1; - 0.00e+00 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t WHERE x = 1); - 0.00e+00 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + - | SELECT count(*) FROM t WHERE (SELECT avg(x) FROM t t0 WHERE t0.x = t.x) = + - | (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); - 0.00e+00 | SELECT count(*) FROM + - | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + - | JOIN + - | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2 + - | ON q1.x = q2.x+1; + error | query_text +-------+------------------------------------------------------------------------------------------------ + 0.768 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; + 0.070 | SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; + 1.416 | SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + 0.000 | SELECT * FROM + + | (SELECT * FROM t WHERE x < 0) AS t0 + + | JOIN + + | (SELECT * FROM t WHERE x > 20) AS t1 + + | USING(x); + 0.000 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + 0.000 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT count(*) FROM t WHERE + + | x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR + + | x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); + 0.454 | SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; + 0.000 | SELECT count(*) FROM ( + + | SELECT count(*) AS x FROM ( + + | SELECT count(*) FROM t1 GROUP BY (x,y) + + | ) AS q1 + + | ) AS q2 + + | WHERE q2.x > 1; + 0.768 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,x*y)) AS q1; + 0.000 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t WHERE x = 1); + 0.000 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT count(*) FROM t WHERE (SELECT avg(x) FROM t t0 WHERE t0.x = t.x) = + + | (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + 0.000 | SELECT count(*) FROM + + | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + + | JOIN + + | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2 + + | ON q1.x = q2.x+1; (12 rows) DROP TABLE t,t1 CASCADE; -- delete all tables used in the test diff --git a/sql/unsupported.sql b/sql/unsupported.sql index 6446b741..9f26b9a6 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -170,7 +170,7 @@ EXPLAIN (COSTS OFF) -- XXX: Do we stuck into an unstable behavior of an error value? -- Live with this variant of the test for some time. -SELECT to_char(error, '9.99EEEE')::text AS error, query_text +SELECT round(error::numeric, 3) AS error, query_text FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt WHERE aqt.queryid = cef.id ORDER BY (md5(query_text),error) DESC; From fd700fe54b80c00038f66ecc226c9753bfcf4cfe Mon Sep 17 00:00:00 2001 From: Alexandra Pervushina Date: Thu, 6 Oct 2022 12:20:50 +0000 Subject: [PATCH 053/134] Rename conf.add to aqo.conf. Change max_parallel_workers_per_gather to max_parallel_maintenance_workers. --- Makefile | 2 +- aqo.conf | 4 ++++ conf.add | 4 ---- 3 files changed, 5 insertions(+), 5 deletions(-) create mode 100644 aqo.conf delete mode 100644 conf.add diff --git a/Makefile b/Makefile index 1ef23b54..0616b1b0 100755 --- a/Makefile +++ b/Makefile @@ -34,7 +34,7 @@ REGRESS = aqo_disabled \ fdw_srcdir = $(top_srcdir)/contrib/postgres_fdw stat_srcdir = $(top_srcdir)/contrib/pg_stat_statements PG_CPPFLAGS += -I$(libpq_srcdir) -I$(fdw_srcdir) -I$(stat_srcdir) -EXTRA_REGRESS_OPTS=--temp-config=$(top_srcdir)/$(subdir)/conf.add +EXTRA_REGRESS_OPTS=--temp-config=$(top_srcdir)/$(subdir)/aqo.conf EXTRA_INSTALL = contrib/postgres_fdw contrib/pg_stat_statements DATA = aqo--1.0.sql aqo--1.0--1.1.sql aqo--1.1--1.2.sql aqo--1.2.sql \ diff --git a/aqo.conf b/aqo.conf new file mode 100644 index 00000000..b53b5a5d --- /dev/null +++ b/aqo.conf @@ -0,0 +1,4 @@ +autovacuum = off +shared_preload_libraries = 'postgres_fdw, aqo' +max_parallel_maintenance_workers = 1 # switch off parallel workers because of unsteadiness +aqo.wide_search = 'on' \ No newline at end of file diff --git a/conf.add b/conf.add deleted file mode 100644 index 9e9d2336..00000000 --- a/conf.add +++ /dev/null @@ -1,4 +0,0 @@ -autovacuum = off -shared_preload_libraries = 'postgres_fdw, aqo' -max_parallel_workers_per_gather = 1 # switch off parallel workers because of unsteadiness -aqo.wide_search = 'on' \ No newline at end of file From ef5bee13e9ae5564a8dcda771b48a6a63e240a7b Mon Sep 17 00:00:00 2001 From: Alexandra Pervushina Date: Thu, 6 Oct 2022 12:36:52 +0000 Subject: [PATCH 054/134] Remove macros for PGPRO_STD from path_utils.c --- path_utils.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/path_utils.c b/path_utils.c index c9c7e72f..886a81eb 100644 --- a/path_utils.c +++ b/path_utils.c @@ -23,11 +23,6 @@ #include "aqo.h" #include "hash.h" -#ifdef PGPRO_STD -# define expression_tree_mutator(node, mutator, context) \ - expression_tree_mutator(node, mutator, context, 0) -#endif - /* * Hook on creation of a plan node. We need to store AQO-specific data to * support learning stage. From 2498f3df8b332b1f6a564034c99769ef304d8a02 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 5 Oct 2022 13:40:58 +0500 Subject: [PATCH 055/134] Return into the code the feature "QueryId based on jumbling machinery". --- aqo.c | 6 +++++ expected/aqo_fdw.out | 44 +++++++++++++++++++++---------- expected/gucs.out | 24 ++++++++++++----- expected/unsupported.out | 56 +++++++++++++++++++++++++++------------- hash.c | 18 ------------- hash.h | 1 - preprocessing.c | 8 +++++- sql/aqo_fdw.sql | 25 ++++++++++++------ sql/gucs.sql | 23 +++++++++++++---- sql/unsupported.sql | 34 +++++++++++++----------- 10 files changed, 153 insertions(+), 86 deletions(-) diff --git a/aqo.c b/aqo.c index e0c4588f..fee35ba5 100644 --- a/aqo.c +++ b/aqo.c @@ -148,6 +148,12 @@ _PG_init(void) errmsg("AQO module could be loaded only on startup."), errdetail("Add 'aqo' into the shared_preload_libraries list."))); + /* + * Inform the postmaster that we want to enable query_id calculation if + * compute_query_id is set to auto. + */ + EnableQueryId(); + DefineCustomEnumVariable("aqo.mode", "Mode of aqo usage.", NULL, diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out index 36af3bd6..74849914 100644 --- a/expected/aqo_fdw.out +++ b/expected/aqo_fdw.out @@ -54,14 +54,11 @@ SELECT x FROM frgn; (5 rows) -- Push down base filters. Use verbose mode to see filters. -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE)) -SELECT x FROM frgn WHERE x < 10; -ERROR: syntax error at or near ")" -LINE 1: ...LAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE)) - ^ -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) -SELECT x FROM frgn WHERE x < 10; - QUERY PLAN +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) + SELECT x FROM frgn WHERE x < 10; +') AS str WHERE str NOT LIKE '%Query Identifier%'; + str ----------------------------------------------------------- Foreign Scan on public.frgn (actual rows=1 loops=1) AQO not used @@ -72,6 +69,21 @@ SELECT x FROM frgn WHERE x < 10; JOINS: 0 (7 rows) +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) + SELECT x FROM frgn WHERE x < 10; +') AS str WHERE str NOT LIKE '%Query Identifier%'; + str +----------------------------------------------------------- + Foreign Scan on public.frgn (actual rows=1 loops=1) + AQO: rows=1, error=0% + Output: x + Remote SQL: SELECT x FROM public.local WHERE ((x < 10)) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(7 rows) + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT x FROM frgn WHERE x < -10; -- AQO ignores constants QUERY PLAN @@ -99,9 +111,11 @@ SELECT str FROM expln(' (6 rows) -- TODO: Should learn on postgres_fdw nodes -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) - SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; - QUERY PLAN +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) + SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; +') AS str WHERE str NOT LIKE '%Query Identifier%'; + str -------------------------------------------------------------------------------------------------------- Foreign Scan (actual rows=1 loops=1) AQO not used @@ -126,9 +140,11 @@ SELECT * FROM frgn AS a, frgn AS b WHERE a.x 3; +') AS str WHERE str NOT LIKE '%Memory Usage%'; + str +----------------------------------------------- + HashAggregate (actual rows=17 loops=1) + AQO not used + Group Key: x + -> Seq Scan on t (actual rows=801 loops=1) + AQO: rows=801, error=0% + Filter: (x > 3) + Rows Removed by Filter: 199 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(10 rows) + -- -- Doesn't estimates GROUP BY clause -- @@ -467,17 +493,6 @@ SELECT * FROM -- any prediction on number of fetched tuples. -- So, if selectivity was wrong we could make bad choice of Scan operation. -- For example, we could choose suboptimal index. --- --- Returns string-by-string explain of a query. Made for removing some strings --- from the explain output. --- -CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ -BEGIN - RETURN QUERY - EXECUTE format('EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); - RETURN; -END; -$$ LANGUAGE PLPGSQL; -- Turn off statistics gathering for simple demonstration of filtering problem. ALTER TABLE t SET (autovacuum_enabled = 'false'); CREATE INDEX ind1 ON t(x); @@ -513,10 +528,11 @@ SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; 50 (1 row) -SELECT str AS result -FROM expln('SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1') AS str -WHERE str NOT LIKE '%Heap Blocks%'; - result +SELECT str FROM expln(' + EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1') AS str +WHERE str NOT LIKE '%Heap Blocks%' AND str NOT LIKE '%Query Identifier%'; + str ----------------------------------------------------------------- Aggregate (actual rows=1 loops=1) AQO not used @@ -572,6 +588,10 @@ ORDER BY (md5(query_text),error) DESC; | SELECT count(*) FROM t WHERE + | x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR + | x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); + 0.106 | + + | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT * FROM t GROUP BY (x) HAVING x > 3; + + | 0.454 | SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; 0.000 | SELECT count(*) FROM ( + | SELECT count(*) AS x FROM ( + @@ -589,19 +609,19 @@ ORDER BY (md5(query_text),error) DESC; | JOIN + | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2 + | ON q1.x = q2.x+1; -(12 rows) +(13 rows) DROP TABLE t,t1 CASCADE; -- delete all tables used in the test SELECT count(*) FROM aqo_data; -- Just to detect some changes in the logic. May some false positives really bother us here? count ------- - 42 + 44 (1 row) SELECT * FROM aqo_cleanup(); nfs | nfss -----+------ - 12 | 42 + 13 | 44 (1 row) SELECT count(*) FROM aqo_data; -- No one row should be returned diff --git a/hash.c b/hash.c index d4866448..a07add4f 100644 --- a/hash.c +++ b/hash.c @@ -56,24 +56,6 @@ static bool has_consts(List *lst); static List **get_clause_args_ptr(Expr *clause); static bool clause_is_eq_clause(Expr *clause); -/* - * Computes hash for given query.Query Identifier: = - * Hash is supposed to be constant-insensitive. - * XXX: Hashing depend on Oids of database objects. It is restrict usability of - * the AQO knowledge base by current database at current Postgres instance. - */ -uint64 -get_query_hash(Query *parse, const char *query_text) -{ - char *str_repr; - uint64 hash; - - /* XXX: remove_locations and remove_consts are heavy routines. */ - str_repr = remove_locations(remove_consts(nodeToString(parse))); - hash = DatumGetUInt64(hash_any_extended((void *) str_repr, strlen(str_repr),0)); - - return hash; -} /********************************************************************************* * diff --git a/hash.h b/hash.h index 01c90bed..eb4b2b97 100644 --- a/hash.h +++ b/hash.h @@ -3,7 +3,6 @@ #include "nodes/pg_list.h" -extern uint64 get_query_hash(Query *parse, const char *query_text); extern bool list_member_uint64(const List *list, uint64 datum); extern List *list_copy_uint64(List *list); extern List *lappend_uint64(List *list, uint64 datum); diff --git a/preprocessing.c b/preprocessing.c index 55000e79..b5a6927d 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -159,7 +159,13 @@ aqo_planner(Query *parse, MemoryContextSwitchTo(oldctx); oldctx = MemoryContextSwitchTo(AQOUtilityMemCtx); - query_context.query_hash = get_query_hash(parse, query_string); + /* Check unlucky case (get a hash of zero) */ + if (parse->queryId == UINT64CONST(0)) + JumbleQuery(parse, query_string); + + Assert(parse->utilityStmt == NULL); + Assert(parse->queryId != UINT64CONST(0)); + query_context.query_hash = parse->queryId; MemoryContextSwitchTo(oldctx); MemoryContextReset(AQOUtilityMemCtx); diff --git a/sql/aqo_fdw.sql b/sql/aqo_fdw.sql index 2d71a20d..da1639d9 100644 --- a/sql/aqo_fdw.sql +++ b/sql/aqo_fdw.sql @@ -6,6 +6,7 @@ CREATE EXTENSION aqo; CREATE EXTENSION postgres_fdw; + SET aqo.mode = 'learn'; SET aqo.show_details = 'true'; -- show AQO info for each node and entire query. SET aqo.show_hash = 'false'; -- a hash value is system-depended. Ignore it. @@ -43,10 +44,14 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT x FROM frgn; -- Push down base filters. Use verbose mode to see filters. -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE)) -SELECT x FROM frgn WHERE x < 10; -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) -SELECT x FROM frgn WHERE x < 10; +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) + SELECT x FROM frgn WHERE x < 10; +') AS str WHERE str NOT LIKE '%Query Identifier%'; +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) + SELECT x FROM frgn WHERE x < 10; +') AS str WHERE str NOT LIKE '%Query Identifier%'; EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT x FROM frgn WHERE x < -10; -- AQO ignores constants @@ -57,14 +62,18 @@ SELECT str FROM expln(' ') AS str WHERE str NOT LIKE '%Sort Method%'; -- TODO: Should learn on postgres_fdw nodes -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) - SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) + SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; +') AS str WHERE str NOT LIKE '%Query Identifier%'; -- TODO: Non-mergejoinable join condition. EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM frgn AS a, frgn AS b WHERE a.x 3) AS q1; EXPLAIN (COSTS OFF) SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t GROUP BY (x) HAVING x > 3; +') AS str WHERE str NOT LIKE '%Memory Usage%'; + -- -- Doesn't estimates GROUP BY clause -- @@ -135,18 +150,6 @@ SELECT * FROM -- So, if selectivity was wrong we could make bad choice of Scan operation. -- For example, we could choose suboptimal index. --- --- Returns string-by-string explain of a query. Made for removing some strings --- from the explain output. --- -CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ -BEGIN - RETURN QUERY - EXECUTE format('EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); - RETURN; -END; -$$ LANGUAGE PLPGSQL; - -- Turn off statistics gathering for simple demonstration of filtering problem. ALTER TABLE t SET (autovacuum_enabled = 'false'); CREATE INDEX ind1 ON t(x); @@ -159,9 +162,10 @@ EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) -- Here we filter more tuples than with the ind1 index. CREATE INDEX ind2 ON t(mod(x,3)); SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; -SELECT str AS result -FROM expln('SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1') AS str -WHERE str NOT LIKE '%Heap Blocks%'; +SELECT str FROM expln(' + EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1') AS str +WHERE str NOT LIKE '%Heap Blocks%' AND str NOT LIKE '%Query Identifier%'; -- Best choice is ... ANALYZE t; From f097d8b3c428d909a1f7da7977a5bef8dfaa2f7b Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 6 Oct 2022 08:48:12 +0500 Subject: [PATCH 056/134] Change names of interface functions for better usage --- aqo--1.4--1.5.sql | 9 +++++---- expected/aqo_CVE-2020-14350.out | 24 ++++++++++++------------ expected/relocatable.out | 12 ++++++------ sql/aqo_CVE-2020-14350.sql | 16 ++++++++-------- sql/relocatable.sql | 4 ++-- storage.c | 8 ++++---- 6 files changed, 37 insertions(+), 36 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index 86f9cc98..569f2c53 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -19,6 +19,7 @@ DROP TABLE public.aqo_data CASCADE; DROP TABLE public.aqo_queries CASCADE; DROP TABLE public.aqo_query_texts CASCADE; DROP TABLE public.aqo_query_stat CASCADE; +DROP FUNCTION invalidate_deactivated_queries_cache; /* @@ -76,14 +77,14 @@ CREATE VIEW aqo_queries AS SELECT * FROM aqo_queries(); /* UI functions */ -CREATE FUNCTION aqo_enable_query(queryid bigint) +CREATE FUNCTION aqo_enable_class(queryid bigint) RETURNS void -AS 'MODULE_PATHNAME', 'aqo_enable_query' +AS 'MODULE_PATHNAME', 'aqo_enable_class' LANGUAGE C STRICT VOLATILE; -CREATE FUNCTION aqo_disable_query(queryid bigint) +CREATE FUNCTION aqo_disable_class(queryid bigint) RETURNS void -AS 'MODULE_PATHNAME', 'aqo_enable_query' +AS 'MODULE_PATHNAME', 'aqo_disable_class' LANGUAGE C STRICT VOLATILE; CREATE FUNCTION aqo_queries_update( diff --git a/expected/aqo_CVE-2020-14350.out b/expected/aqo_CVE-2020-14350.out index ccdc4694..8685b935 100644 --- a/expected/aqo_CVE-2020-14350.out +++ b/expected/aqo_CVE-2020-14350.out @@ -116,7 +116,7 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_enable_query(hash bigint) +CREATE FUNCTION aqo_enable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -125,9 +125,9 @@ $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_enable_query" already exists with same argument types +ERROR: function "aqo_enable_class" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_enable_query(hash bigint) +CREATE OR REPLACE FUNCTION aqo_enable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -135,8 +135,8 @@ BEGIN END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_enable_query(42); - aqo_enable_query +SELECT aqo_enable_class(42); + aqo_enable_class ------------------ (1 row) @@ -149,7 +149,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_enable_query(bigint); +DROP FUNCTION aqo_enable_class(bigint); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 4 @@ -162,7 +162,7 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_disable_query(hash bigint) +CREATE FUNCTION aqo_disable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -171,9 +171,9 @@ $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_disable_query" already exists with same argument types +ERROR: function "aqo_disable_class" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_disable_query(hash bigint) +CREATE OR REPLACE FUNCTION aqo_disable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -181,8 +181,8 @@ BEGIN END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_disable_query(42); - aqo_disable_query +SELECT aqo_disable_class(42); + aqo_disable_class ------------------- (1 row) @@ -195,7 +195,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_disable_query(bigint); +DROP FUNCTION aqo_disable_class(bigint); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 5 diff --git a/expected/relocatable.out b/expected/relocatable.out index 5fcf06e6..949896f6 100644 --- a/expected/relocatable.out +++ b/expected/relocatable.out @@ -80,9 +80,9 @@ ORDER BY (md5(query_text)) /* * Below, we should check each UI function */ -SELECT aqo_disable_query(id) FROM ( +SELECT aqo_disable_class(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; - aqo_disable_query + aqo_disable_class ------------------- @@ -93,13 +93,13 @@ ORDER BY (learn_aqo, use_aqo, auto_tuning); learn_aqo | use_aqo | auto_tuning -----------+---------+------------- f | f | f - t | t | f - t | t | f + f | f | f + f | f | f (3 rows) -SELECT aqo_enable_query(id) FROM ( +SELECT aqo_enable_class(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; - aqo_enable_query + aqo_enable_class ------------------ diff --git a/sql/aqo_CVE-2020-14350.sql b/sql/aqo_CVE-2020-14350.sql index 1b36b50b..75833223 100644 --- a/sql/aqo_CVE-2020-14350.sql +++ b/sql/aqo_CVE-2020-14350.sql @@ -103,7 +103,7 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_enable_query(hash bigint) +CREATE FUNCTION aqo_enable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -115,7 +115,7 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_enable_query(hash bigint) +CREATE OR REPLACE FUNCTION aqo_enable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -124,13 +124,13 @@ END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_enable_query(42); +SELECT aqo_enable_class(42); SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_enable_query(bigint); +DROP FUNCTION aqo_enable_class(bigint); DROP EXTENSION IF EXISTS aqo; -- Test 4 @@ -140,7 +140,7 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_disable_query(hash bigint) +CREATE FUNCTION aqo_disable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -152,7 +152,7 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_disable_query(hash bigint) +CREATE OR REPLACE FUNCTION aqo_disable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -161,13 +161,13 @@ END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_disable_query(42); +SELECT aqo_disable_class(42); SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_disable_query(bigint); +DROP FUNCTION aqo_disable_class(bigint); DROP EXTENSION IF EXISTS aqo; -- Test 5 diff --git a/sql/relocatable.sql b/sql/relocatable.sql index e8cc57c3..780c385e 100644 --- a/sql/relocatable.sql +++ b/sql/relocatable.sql @@ -39,11 +39,11 @@ ORDER BY (md5(query_text)) /* * Below, we should check each UI function */ -SELECT aqo_disable_query(id) FROM ( +SELECT aqo_disable_class(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries ORDER BY (learn_aqo, use_aqo, auto_tuning); -SELECT aqo_enable_query(id) FROM ( +SELECT aqo_enable_class(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries ORDER BY (learn_aqo, use_aqo, auto_tuning); diff --git a/storage.c b/storage.c index fc5123ae..868a12ec 100644 --- a/storage.c +++ b/storage.c @@ -96,8 +96,8 @@ PG_FUNCTION_INFO_V1(aqo_query_stat); PG_FUNCTION_INFO_V1(aqo_query_texts); PG_FUNCTION_INFO_V1(aqo_data); PG_FUNCTION_INFO_V1(aqo_queries); -PG_FUNCTION_INFO_V1(aqo_enable_query); -PG_FUNCTION_INFO_V1(aqo_disable_query); +PG_FUNCTION_INFO_V1(aqo_enable_class); +PG_FUNCTION_INFO_V1(aqo_disable_class); PG_FUNCTION_INFO_V1(aqo_queries_update); PG_FUNCTION_INFO_V1(aqo_reset); PG_FUNCTION_INFO_V1(aqo_cleanup); @@ -1904,7 +1904,7 @@ aqo_queries_reset(void) } Datum -aqo_enable_query(PG_FUNCTION_ARGS) +aqo_enable_class(PG_FUNCTION_ARGS) { uint64 queryid = (uint64) PG_GETARG_INT64(0); QueriesEntry *entry; @@ -1935,7 +1935,7 @@ aqo_enable_query(PG_FUNCTION_ARGS) } Datum -aqo_disable_query(PG_FUNCTION_ARGS) +aqo_disable_class(PG_FUNCTION_ARGS) { uint64 queryid = (uint64) PG_GETARG_INT64(0); QueriesEntry *entry; From a901651ecd1ffbe97acdb4bf78e866f0647fa7e3 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 6 Oct 2022 08:54:18 +0500 Subject: [PATCH 057/134] Update c-cpp.yml --- .github/workflows/c-cpp.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 8a83eb53..604d1607 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -21,7 +21,9 @@ jobs: cd pg git checkout REL_14_STABLE ./configure --prefix=`pwd`/tmp_install - git subtree add --prefix=contrib/aqo https://github.com/postgrespro/aqo.git $GITHUB_REF + + git clone https://github.com/postgrespro/aqo.git contrib/aqo + git -C contrib/aqo checkout $GITHUB_REF patch -p1 --no-backup-if-mismatch < contrib/aqo/aqo_pg14.patch make -j4 > /dev/null && make -j4 -C contrib > /dev/null env CLIENTS=50 THREADS=50 make -C contrib/aqo check From 6834a18cc562d87cf7a2355ab9a58f089d643763 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 6 Oct 2022 10:12:38 +0500 Subject: [PATCH 058/134] Minor change in memory contexts usage --- hash.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/hash.c b/hash.c index a07add4f..48e15e6d 100644 --- a/hash.c +++ b/hash.c @@ -202,12 +202,12 @@ get_fss_for_object(List *relsigns, List *clauselist, Assert(n == list_length(selectivities) || (nfeatures == NULL && features == NULL)); - get_eclasses(clauselist, &nargs, &args_hash, &eclass_hash); if (nfeatures != NULL) *features = palloc0(sizeof(**features) * n); old_ctx_m = MemoryContextSwitchTo(AQOUtilityMemCtx); + get_eclasses(clauselist, &nargs, &args_hash, &eclass_hash); clause_hashes = palloc(sizeof(*clause_hashes) * n); clause_has_consts = palloc(sizeof(*clause_has_consts) * n); sorted_clauses = palloc(sizeof(*sorted_clauses) * n); @@ -664,19 +664,14 @@ get_eclasses(List *clauselist, int *nargs, int **args_hash, int **eclass_hash) int i, v; int *e_hashes; - MemoryContext old_ctx_m; get_clauselist_args(clauselist, nargs, args_hash); *eclass_hash = palloc((*nargs) * sizeof(**eclass_hash)); - old_ctx_m = MemoryContextSwitchTo(AQOUtilityMemCtx); - p = perform_eclasses_join(clauselist, *nargs, *args_hash); lsts = palloc((*nargs) * sizeof(*lsts)); e_hashes = palloc((*nargs) * sizeof(*e_hashes)); - MemoryContextSwitchTo(old_ctx_m); - for (i = 0; i < *nargs; ++i) lsts[i] = NIL; @@ -690,8 +685,6 @@ get_eclasses(List *clauselist, int *nargs, int **args_hash, int **eclass_hash) for (i = 0; i < *nargs; ++i) (*eclass_hash)[i] = e_hashes[disjoint_set_get_parent(p, i)]; - - MemoryContextReset(AQOUtilityMemCtx); } /* From 3231ac91aec148968ff61c6b74c213da68950303 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Fri, 7 Oct 2022 15:53:05 +0500 Subject: [PATCH 059/134] Bugfix. AQO plan node must have reasonable set of serialization routines: it is used during plan transfer to parallel workers. Another options/extensions can require correct serialization too. --- aqo.h | 1 - cardinality_estimation.c | 1 + hash.c | 18 ++++++------ path_utils.c | 63 ++++++++++++++++++++++------------------ utils.c | 12 -------- 5 files changed, 45 insertions(+), 50 deletions(-) diff --git a/aqo.h b/aqo.h index 8cad51c2..2968f7fc 100644 --- a/aqo.h +++ b/aqo.h @@ -284,7 +284,6 @@ void aqo_ExecutorEnd(QueryDesc *queryDesc); extern void automatical_query_tuning(uint64 query_hash, struct StatEntry *stat); /* Utilities */ -extern int int64_compare(const void *a, const void *b); extern int int_cmp(const void *a, const void *b); extern int double_cmp(const void *a, const void *b); extern int *argsort(void *a, int n, size_t es, diff --git a/cardinality_estimation.c b/cardinality_estimation.c index 96cd2c70..9db202a1 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -103,6 +103,7 @@ predict_for_relation(List *clauses, List *selectivities, List *relsigns, result = OkNNr_predict(data, features); } } + #ifdef AQO_DEBUG_PRINT predict_debug_output(clauses, selectivities, relsigns, *fss, result); #endif diff --git a/hash.c b/hash.c index 48e15e6d..cb409587 100644 --- a/hash.c +++ b/hash.c @@ -33,7 +33,7 @@ static int get_node_hash(Node *node); static int get_unsorted_unsafe_int_array_hash(int *arr, int len); static int get_unordered_int_list_hash(List *lst); -static int64 get_relations_hash(List *relsigns); +static int get_relations_hash(List *relsigns); static int get_fss_hash(int clauses_hash, int eclasses_hash, int relidslist_hash); @@ -278,7 +278,7 @@ get_fss_for_object(List *relsigns, List *clauselist, clauses_hash = get_int_array_hash(sorted_clauses, n - sh); eclasses_hash = get_int_array_hash(eclass_hash, nargs); - relations_hash = (int) get_relations_hash(relsigns); + relations_hash = get_relations_hash(relsigns); fss_hash = get_fss_hash(clauses_hash, eclasses_hash, relations_hash); MemoryContextSwitchTo(old_ctx_m); @@ -449,26 +449,26 @@ get_fss_hash(int clauses_hash, int eclasses_hash, int relidslist_hash) * Hash is supposed to be relations-order-insensitive. * Each element of a list must have a String type, */ -static int64 +static int get_relations_hash(List *relsigns) { int nhashes = 0; - int64 *hashes = palloc(list_length(relsigns) * sizeof(uint64)); + uint32 *hashes = palloc(list_length(relsigns) * sizeof(uint32)); ListCell *lc; - int64 result; + int result; foreach(lc, relsigns) { - hashes[nhashes++] = *(int64 *) lfirst(lc); + hashes[nhashes++] = (uint32) lfirst_int(lc); } /* Sort the array to make query insensitive to input order of relations. */ - qsort(hashes, nhashes, sizeof(int64), int64_compare); + qsort(hashes, nhashes, sizeof(uint32), int_cmp); /* Make a final hash value */ - result = DatumGetInt64(hash_any_extended((const unsigned char *) hashes, - nhashes * sizeof(int64), 0)); + result = DatumGetInt32(hash_any((const unsigned char *) hashes, + nhashes * sizeof(uint32))); return result; } diff --git a/path_utils.c b/path_utils.c index 886a81eb..cd458c67 100644 --- a/path_utils.c +++ b/path_utils.c @@ -130,10 +130,10 @@ get_selectivities(PlannerInfo *root, /* * Based on the hashTupleDesc() routine */ -static uint64 +static uint32 hashTempTupleDesc(TupleDesc desc) { - uint64 s; + uint32 s; int i; s = hash_combine(0, hash_uint32(desc->natts)); @@ -141,11 +141,11 @@ hashTempTupleDesc(TupleDesc desc) for (i = 0; i < desc->natts; ++i) { const char *attname = NameStr(TupleDescAttr(desc, i)->attname); - uint64 s1; + uint32 s1; - s = hash_combine64(s, hash_uint32(TupleDescAttr(desc, i)->atttypid)); - s1 = hash_bytes_extended((const unsigned char *) attname, strlen(attname), 0); - s = hash_combine64(s, s1); + s = hash_combine(s, hash_uint32(TupleDescAttr(desc, i)->atttypid)); + s1 = hash_bytes((const unsigned char *) attname, strlen(attname)); + s = hash_combine(s, s1); } return s; } @@ -181,8 +181,8 @@ get_list_of_relids(PlannerInfo *root, Relids relids, RelSortOut *rels) if (!OidIsValid(entry->relid)) { - /* Invalid oid */ - hashes = lappend_uint64(hashes, (UINT64_MAX / 7)); + /* TODO: Explain this logic. */ + hashes = lappend_int(hashes, INT32_MAX / 3); continue; } @@ -207,7 +207,7 @@ get_list_of_relids(PlannerInfo *root, Relids relids, RelSortOut *rels) trel = relation_open(entry->relid, NoLock); tdesc = RelationGetDescr(trel); Assert(CheckRelationLockedByMe(trel, AccessShareLock, true)); - hashes = lappend_uint64(hashes, hashTempTupleDesc(tdesc)); + hashes = lappend_int(hashes, hashTempTupleDesc(tdesc)); relation_close(trel, NoLock); } else @@ -217,9 +217,9 @@ get_list_of_relids(PlannerInfo *root, Relids relids, RelSortOut *rels) get_namespace_name(get_rel_namespace(entry->relid)), relrewrite ? get_rel_name(relrewrite) : relname); - hashes = lappend_uint64(hashes, DatumGetInt64(hash_any_extended( + hashes = lappend_int(hashes, DatumGetInt32(hash_any( (unsigned char *) relname, - strlen(relname), 0))); + strlen(relname)))); hrels = lappend_oid(hrels, entry->relid); } @@ -569,7 +569,7 @@ AQOnodeCopy(struct ExtensibleNode *enew, const struct ExtensibleNode *eold) /* These lists couldn't contain AQO nodes. Use basic machinery */ new->rels = palloc(sizeof(RelSortOut)); new->rels->hrels = list_copy(old->rels->hrels); - new->rels->signatures = list_copy_uint64(old->rels->signatures); + new->rels->signatures = list_copy(old->rels->signatures); new->clauses = copyObject(old->clauses); new->grouping_exprs = copyObject(old->grouping_exprs); @@ -604,21 +604,24 @@ AQOnodeEqual(const struct ExtensibleNode *a, const struct ExtensibleNode *b) #define WRITE_FLOAT_FIELD(fldname,format) \ appendStringInfo(str, " :" CppAsString(fldname) " " format, node->fldname) +/* + * Serialize AQO plan node to a string. + * + * Right now we can't correctly serialize all fields of the node. Taking into + * account that this action needed when a plan moves into parallel workers or + * just during debugging, we serialize it only partially, just for debug + * purposes. + * Some extensions may manipulate by parts of serialized plan too. + */ static void AQOnodeOut(struct StringInfoData *str, const struct ExtensibleNode *enode) { AQOPlanNode *node = (AQOPlanNode *) enode; - Assert(0); - WRITE_BOOL_FIELD(had_path); - WRITE_NODE_FIELD(rels); - WRITE_NODE_FIELD(clauses); - WRITE_NODE_FIELD(selectivities); - WRITE_NODE_FIELD(grouping_exprs); - - WRITE_ENUM_FIELD(jointype, JoinType); - WRITE_FLOAT_FIELD(parallel_divisor, "%.5f"); - WRITE_BOOL_FIELD(was_parametrized); + node->had_path = false; + node->jointype = 0; + node->parallel_divisor = 1.0; + node->was_parametrized = false; /* For Adaptive optimization DEBUG purposes */ WRITE_INT_FIELD(fss); @@ -655,6 +658,11 @@ AQOnodeOut(struct StringInfoData *str, const struct ExtensibleNode *enode) (void) token; /* in case not used elsewhere */ \ local_node->fldname = nodeRead(NULL, 0) +/* + * Deserialize AQO plan node from a string to internal representation. + * + * Should work in coherence with AQOnodeOut(). + */ static void AQOnodeRead(struct ExtensibleNode *enode) { @@ -662,17 +670,16 @@ AQOnodeRead(struct ExtensibleNode *enode) const char *token; int length; - Assert(0); READ_BOOL_FIELD(had_path); - READ_NODE_FIELD(rels); - READ_NODE_FIELD(clauses); - READ_NODE_FIELD(selectivities); - READ_NODE_FIELD(grouping_exprs); - READ_ENUM_FIELD(jointype, JoinType); READ_FLOAT_FIELD(parallel_divisor); READ_BOOL_FIELD(was_parametrized); + local_node->rels = palloc0(sizeof(RelSortOut)); + local_node->clauses = NIL; + local_node->selectivities = NIL; + local_node->grouping_exprs = NIL; + /* For Adaptive optimization DEBUG purposes */ READ_INT_FIELD(fss); READ_FLOAT_FIELD(prediction); diff --git a/utils.c b/utils.c index 029af9ab..c44b3a64 100644 --- a/utils.c +++ b/utils.c @@ -28,18 +28,6 @@ static int argsort_cmp(const void *a, const void *b); * qsort comparator functions */ -/* int64 comparator for pg_qsort. */ -int -int64_compare(const void *va, const void *vb) -{ - int64 a = *((const int64 *) va); - int64 b = *((const int64 *) vb); - - if (a == b) - return 0; - return (a > b) ? 1 : -1; -} - /* * Function for qsorting an integer arrays */ From f96fc0df4095519d03735a1b9b9268af757061c7 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Fri, 7 Oct 2022 16:00:44 +0500 Subject: [PATCH 060/134] CI: add test on full debug options enabled case --- .github/workflows/c-cpp.yml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 604d1607..a66f06ba 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -20,10 +20,16 @@ jobs: git clone https://github.com/postgres/postgres.git pg cd pg git checkout REL_14_STABLE - ./configure --prefix=`pwd`/tmp_install - + ./configure --prefix=`pwd`/tmp_install CFLAGS="-O3" git clone https://github.com/postgrespro/aqo.git contrib/aqo git -C contrib/aqo checkout $GITHUB_REF patch -p1 --no-backup-if-mismatch < contrib/aqo/aqo_pg14.patch make -j4 > /dev/null && make -j4 -C contrib > /dev/null env CLIENTS=50 THREADS=50 make -C contrib/aqo check + + echo "Use AQO with debug code included" + git clean -fdx + git -C contrib/aqo clean -fdx + ./configure --prefix=`pwd`/tmp_install CFLAGS="-DAQO_DEBUG_PRINT -O0" + make -j4 > /dev/null && make -j4 -C contrib > /dev/null + env CLIENTS=50 THREADS=50 make -C contrib/aqo check From 03f09f1c186916d805429bbe432e74e58acdfdac Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 12 Oct 2022 15:08:03 +0500 Subject: [PATCH 061/134] Bugfix. AQOUtilityMemCtx is reset although some allocated data still in use. Remove the AQOUtilityMemCtx memory context at all. It is used for too small operations. I don't buy that such operations can allocate so much memory that backend must free memory right after the end of operation to avoid OOM. I guess, prediction, planning and execution memory context set is good enough. --- aqo.c | 13 +------------ aqo.h | 1 - hash.c | 10 ++++------ postprocessing.c | 8 -------- preprocessing.c | 13 +++---------- storage.c | 4 ---- 6 files changed, 8 insertions(+), 41 deletions(-) diff --git a/aqo.c b/aqo.c index fee35ba5..3b11bebc 100644 --- a/aqo.c +++ b/aqo.c @@ -87,9 +87,6 @@ MemoryContext AQOTopMemCtx = NULL; /* Is released at the end of transaction */ MemoryContext AQOCacheMemCtx = NULL; -/* Should be released in-place, just after a huge calculation */ -MemoryContext AQOUtilityMemCtx = NULL; - /* Is released at the end of planning */ MemoryContext AQOPredictMemCtx = NULL; @@ -348,15 +345,7 @@ _PG_init(void) AQOCacheMemCtx = AllocSetContextCreate(AQOTopMemCtx, "AQOCacheMemCtx", ALLOCSET_DEFAULT_SIZES); - /* - * AQOUtilityMemoryContext containe short-lived information which - * is appeared from having got clause, selectivity arrays and relid lists - * while calculating hashes. It clean up inside calculated - * function or immediately after her having completed. - */ - AQOUtilityMemCtx = AllocSetContextCreate(AQOTopMemCtx, - "AQOUtilityMemoryContext", - ALLOCSET_DEFAULT_SIZES); + /* * AQOPredictMemoryContext save necessary information for making predict of plan nodes * and clean up in the execution stage of query. diff --git a/aqo.h b/aqo.h index 2968f7fc..4471d2b8 100644 --- a/aqo.h +++ b/aqo.h @@ -225,7 +225,6 @@ extern int njoins; /* AQO Memory contexts */ extern MemoryContext AQOTopMemCtx; extern MemoryContext AQOCacheMemCtx; -extern MemoryContext AQOUtilityMemCtx; extern MemoryContext AQOPredictMemCtx; extern MemoryContext AQOLearnMemCtx; diff --git a/hash.c b/hash.c index cb409587..fe7da8ee 100644 --- a/hash.c +++ b/hash.c @@ -194,7 +194,6 @@ get_fss_for_object(List *relsigns, List *clauselist, int sh = 0, old_sh; int fss_hash; - MemoryContext old_ctx_m; n = list_length(clauselist); @@ -202,11 +201,13 @@ get_fss_for_object(List *relsigns, List *clauselist, Assert(n == list_length(selectivities) || (nfeatures == NULL && features == NULL)); + /* + * It should be allocated in a caller memory context, because it will be + * returned. + */ if (nfeatures != NULL) *features = palloc0(sizeof(**features) * n); - old_ctx_m = MemoryContextSwitchTo(AQOUtilityMemCtx); - get_eclasses(clauselist, &nargs, &args_hash, &eclass_hash); clause_hashes = palloc(sizeof(*clause_hashes) * n); clause_has_consts = palloc(sizeof(*clause_has_consts) * n); @@ -281,9 +282,6 @@ get_fss_for_object(List *relsigns, List *clauselist, relations_hash = get_relations_hash(relsigns); fss_hash = get_fss_hash(clauses_hash, eclasses_hash, relations_hash); - MemoryContextSwitchTo(old_ctx_m); - MemoryContextReset(AQOUtilityMemCtx); - if (nfeatures != NULL) { *nfeatures = n - sh; diff --git a/postprocessing.c b/postprocessing.c index 8c9a2db9..619d1c40 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -178,16 +178,13 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, double *cur_sel; int cur_hash; int cur_relid; - MemoryContext old_ctx_m; parametrized_sel = was_parametrized && (list_length(relidslist) == 1); if (parametrized_sel) { cur_relid = linitial_int(relidslist); - old_ctx_m = MemoryContextSwitchTo(AQOUtilityMemCtx); get_eclasses(clauselist, &nargs, &args_hash, &eclass_hash); - MemoryContextSwitchTo(old_ctx_m); } foreach(l, clauselist) @@ -221,11 +218,6 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, lst = lappend(lst, cur_sel); } - if (parametrized_sel) - { - MemoryContextReset(AQOUtilityMemCtx); - } - return lst; } diff --git a/preprocessing.c b/preprocessing.c index b5a6927d..91689b91 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -127,7 +127,8 @@ aqo_planner(Query *parse, ParamListInfo boundParams) { bool query_is_stored = false; - MemoryContext oldctx; + MemoryContext oldctx; + oldctx = MemoryContextSwitchTo(AQOPredictMemCtx); /* @@ -156,9 +157,7 @@ aqo_planner(Query *parse, } selectivity_cache_clear(); - MemoryContextSwitchTo(oldctx); - oldctx = MemoryContextSwitchTo(AQOUtilityMemCtx); /* Check unlucky case (get a hash of zero) */ if (parse->queryId == UINT64CONST(0)) JumbleQuery(parse, query_string); @@ -166,11 +165,6 @@ aqo_planner(Query *parse, Assert(parse->utilityStmt == NULL); Assert(parse->queryId != UINT64CONST(0)); query_context.query_hash = parse->queryId; - MemoryContextSwitchTo(oldctx); - - MemoryContextReset(AQOUtilityMemCtx); - - oldctx = MemoryContextSwitchTo(AQOPredictMemCtx); /* By default, they should be equal */ query_context.fspace_hash = query_context.query_hash; @@ -191,15 +185,14 @@ aqo_planner(Query *parse, cursorOptions, boundParams); } - MemoryContextSwitchTo(oldctx); elog(DEBUG1, "AQO will be used for query '%s', class "UINT64_FORMAT, query_string ? query_string : "null string", query_context.query_hash); + MemoryContextSwitchTo(oldctx); oldctx = MemoryContextSwitchTo(AQOCacheMemCtx); cur_classes = lappend_uint64(cur_classes, query_context.query_hash); MemoryContextSwitchTo(oldctx); - oldctx = MemoryContextSwitchTo(AQOPredictMemCtx); if (aqo_mode == AQO_MODE_DISABLED) diff --git a/storage.c b/storage.c index 868a12ec..29d4ef58 100644 --- a/storage.c +++ b/storage.c @@ -2096,7 +2096,6 @@ cleanup_aqo_database(bool gentle, int *fs_num, int *fss_num) for(i = 0; i < dentry->nrels; i++) { Oid reloid = ObjectIdGetDatum(*(Oid *)ptr); - MemoryContext oldctx = MemoryContextSwitchTo(AQOUtilityMemCtx); if (!SearchSysCacheExists1(RELOID, reloid)) /* Remember this value */ @@ -2105,7 +2104,6 @@ cleanup_aqo_database(bool gentle, int *fs_num, int *fss_num) else actual_fss = list_append_unique_int(actual_fss, dentry->key.fss); - MemoryContextSwitchTo(oldctx); ptr += sizeof(Oid); } @@ -2155,8 +2153,6 @@ cleanup_aqo_database(bool gentle, int *fs_num, int *fss_num) /* Query class preferences */ (*fs_num) += (int) _aqo_queries_remove(entry->queryid); } - - MemoryContextReset(AQOUtilityMemCtx); } /* From 6d364a854f5418d524ab34117c99f353c0f2b0f6 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Thu, 16 Jun 2022 13:48:57 +0300 Subject: [PATCH 062/134] Add more TAP tests on joint usage of query_id machinery by AQO and PGSS extensions. Some minor inconsistencies were detected (see issue #71). Authors: A.Kazarinov, A.Lepikhov --- t/002_pg_stat_statements_aqo.pl | 225 ++++++++++++++++++++++++++------ 1 file changed, 186 insertions(+), 39 deletions(-) diff --git a/t/002_pg_stat_statements_aqo.pl b/t/002_pg_stat_statements_aqo.pl index dfa84b3a..4d8b04d7 100644 --- a/t/002_pg_stat_statements_aqo.pl +++ b/t/002_pg_stat_statements_aqo.pl @@ -3,59 +3,206 @@ use PostgresNode; use TestLib; -use Test::More tests => 2; +use Test::More tests => 12; -my $node = get_new_node('profiling'); +my $node = get_new_node('test'); $node->init; -print "create conf"; $node->append_conf('postgresql.conf', qq{ aqo.mode = 'disabled' - aqo.profile_classes = -1 - aqo.profile_enable = 'true' aqo.force_collect_stat = 'false' log_statement = 'ddl' # reduce size of logs. aqo.join_threshold = 0 + pg_stat_statements.track = 'none' }); -# Test constants. -my $TRANSACTIONS = 100; -my $CLIENTS = 10; -my $THREADS = 10; my $query_id; - -# General purpose variables. -my $res; +my ($res, $aqo_res); my $total_classes; $node->start(); - # ERROR: AQO allow to load library only on startup -print "Create extension aqo"; -$node->psql('postgres', "CREATE EXTENSION aqo"); -$node->psql('postgres', "CREATE EXTENSION pg_stat_statements"); -print "create preload libraries"; -$node->append_conf('postgresql.conf', qq{shared_preload_libraries = 'aqo, pg_stat_statements'}); + +$node->psql('postgres', "CREATE EXTENSION aqo"); # Error +$node->append_conf('postgresql.conf', qq{ + shared_preload_libraries = 'aqo, pg_stat_statements' + aqo.mode = 'disabled' # disable AQO on schema creation +}); $node->restart(); -$node->psql('postgres', "CREATE EXTENSION aqo"); -$node->psql('postgres', "CREATE EXTENSION pg_stat_statements"); +$node->safe_psql('postgres', " + CREATE EXTENSION aqo; + CREATE EXTENSION pg_stat_statements; +"); + +# Execute test DDL +$node->psql('postgres', " + CREATE TABLE aqo_test0(a int, b int, c int, d int); + WITH RECURSIVE t(a, b, c, d) AS ( + VALUES (0, 0, 0, 0) + UNION ALL + SELECT t.a + 1, t.b + 1, t.c + 1, t.d + 1 FROM t WHERE t.a < 2000 + ) INSERT INTO aqo_test0 (SELECT * FROM t); + CREATE INDEX aqo_test0_idx_a ON aqo_test0 (a); + ANALYZE aqo_test0; +"); $node->psql('postgres', " - ALTER SYSTEM SET aqo.profile_enable = 'true'; + CREATE TABLE trig( + x double precision, + sinx double precision, + cosx double precision); + WITH RECURSIVE t(a, b, c) AS ( + VALUES (0.0::double precision, 0.0::double precision, 1.0::double precision) + UNION ALL + SELECT t.a + pi() / 50, sin(t.a + pi() / 50), cos(t.a + pi() / 50) + FROM t WHERE t.a < 2 * pi() + ) INSERT INTO trig (SELECT * FROM t); + CREATE INDEX trig_idx_x ON trig (x); + ANALYZE trig; +"); +$node->psql('postgres', " + CREATE TABLE department( + DepartmentID INT PRIMARY KEY NOT NULL, + DepartmentName VARCHAR(20) + ); + CREATE TABLE employee ( + LastName VARCHAR(20), + DepartmentID INT REFERENCES department(DepartmentID) + ); + INSERT INTO department + VALUES (31, 'Sales'), (33, 'Engineering'), (34, 'Clerical'), + (35, 'Marketing'); + INSERT INTO employee + VALUES ('Rafferty', 31), ('Jones', 33), ('Heisenberg', 33), + ('Robinson', 34), ('Smith', 34), ('Williams', NULL); +"); +$node->psql('postgres', " + ALTER SYSTEM SET aqo.mode = 'learn'; + ALTER SYSTEM SET pg_stat_statements.track = 'all'; SELECT pg_reload_conf(); "); -$node->psql('postgres', "CREATE TABLE aqo_test0(a int, b int, c int, d int); -WITH RECURSIVE t(a, b, c, d) -AS ( - VALUES (0, 0, 0, 0) - UNION ALL - SELECT t.a + 1, t.b + 1, t.c + 1, t.d + 1 FROM t WHERE t.a < 2000 -) INSERT INTO aqo_test0 (SELECT * FROM t); -CREATE INDEX aqo_test0_idx_a ON aqo_test0 (a); -ANALYZE aqo_test0;"); -$node->psql('postgres', " - ALTER SYSTEM SET aqo.mode = 'controlled'; -"); -$res = $node->safe_psql('postgres', "SELECT * FROM aqo_test0"); -$res = $node->safe_psql('postgres', "SELECT count(*) FROM pg_stat_statements where query = 'SELECT * FROM aqo_test0'"); -is($res, 1); # The same query add in pg_stat_statements -$res = $node->safe_psql('postgres', "SELECT count(*) from aqo_query_texts where query_text = 'SELECT * FROM aqo_test0'"); -is($res, 0); # The same query isn't added into aqo_query_texts -$node->stop(); \ No newline at end of file +# Trivial query without any clauses/parameters +$node->safe_psql('postgres', "SELECT * FROM aqo_test0"); +$res = $node->safe_psql('postgres', " + SELECT query FROM pg_stat_statements + JOIN aqo_queries USING(queryid) +"); # Both extensions have the same QueryID for the query above +is($res, "SELECT * FROM aqo_test0"); + +# Check number of queries which logged in both extensions. +$aqo_res = $node->safe_psql('postgres', " + SELECT count(*) FROM aqo_query_texts +"); # 2 - Common fs and trivial select. +$res = $node->safe_psql('postgres', " + SELECT count(*) FROM pg_stat_statements +"); # 3 - trivial select and two utility queries above. +is($res - $aqo_res, 1); + +$res = $node->safe_psql('postgres', " + SELECT count(*) FROM pg_stat_statements + WHERE queryid NOT IN (SELECT queryid FROM aqo_query_texts) +"); # Trivial select and utility query to pg_stat_statements +is($res, 2); + +$node->safe_psql('postgres', " + SELECT * FROM trig WHERE sinx < 0.5 and cosx > -0.5 +"); # Log query with two constants +$node->safe_psql('postgres', " + SELECT count(*) FROM pg_stat_statements + WHERE query = 'SELECT * FROM trig WHERE sinx < 0.5 and cosx > -0.5' +"); # The pg_stat_statements utility queries are logged too +$res = $node->safe_psql('postgres', " + SELECT count(*) FROM aqo_query_texts aqt, pg_stat_statements pgss + WHERE aqt.queryid = pgss.queryid +"); +is($res, 4); + +$res = $node->safe_psql('postgres', " + SELECT count(*) FROM pg_stat_statements + WHERE queryid NOT IN (SELECT queryid FROM aqo_query_texts) +"); # pgss logs queries to AQO tables these AQO are skip +is($res, 4); +$res = $node->safe_psql('postgres', " + SELECT count(*) FROM aqo_queries + WHERE queryid NOT IN (SELECT queryid FROM pg_stat_statements) +"); # PGSS have logged all queries that AQO logged, expect common fs. +is($res, 1); + +# ############################################################################ # +# +# Complex queries with meaningful tables +# +# ############################################################################ # + +$node->safe_psql('postgres', " + SELECT employee.LastName, employee.DepartmentID, department.DepartmentName + FROM employee + INNER JOIN department ON employee.DepartmentID = department.DepartmentID; +"); # Log query with a JOIN and a join clause +$node->safe_psql('postgres', " + EXPLAIN ANALYZE + SELECT ee.LastName, ee.DepartmentID, dpt.DepartmentName + FROM employee ee + INNER JOIN department dpt ON (ee.DepartmentID = dpt.DepartmentID) + WHERE ee.LastName NOT LIKE 'Wi%'; +"); # Use a table aliases, EXPLAIN ANALYZE mode and WHERE clause. +$node->safe_psql('postgres', " + SELECT ee.LastName, ee.DepartmentID, dpt.DepartmentName + FROM employee ee + INNER JOIN department dpt ON (ee.DepartmentID = dpt.DepartmentID) + WHERE ee.LastName NOT LIKE 'Wi%'; +"); # Without EXPLAIN ANALYZE option +$node->safe_psql('postgres', " + WITH smth AS ( + SELECT a FROM aqo_test0 + ) SELECT * FROM employee ee, department dpt, smth + WHERE (ee.DepartmentID = dpt.DepartmentID) + AND (ee.LastName NOT LIKE 'Wi%') + AND (ee.DepartmentID < smth.a); +"); # Use CTE +$res = $node->safe_psql('postgres', " + SELECT count(*) FROM aqo_query_texts aqt, pg_stat_statements pgss + WHERE aqt.queryid = pgss.queryid +"); # Check, both extensions added the query with the same query ID. +is($res, 8); + +# Check query texts identity. +# TODO: Maybe AQO should use parameterized query text too? +$res = $node->safe_psql('postgres', " + SELECT count(*) + FROM aqo_query_texts aqt, pg_stat_statements pgss + WHERE aqt.queryid = pgss.queryid AND aqt.query_text != pgss.query +"); # PGSS processes a query and generalizes it. So, some queries is diferent +is($res, 6); +$res = $node->safe_psql('postgres', " + SELECT count(*) + FROM aqo_query_texts aqt, pg_stat_statements pgss + WHERE aqt.queryid = pgss.queryid AND aqt.query_text = pgss.query +"); # Non-parameterized queries (without constants in a body of query) will have the same query text. +is($res, 2); + +# Check queries hasn't logged by another extension + +$res = $node->safe_psql('postgres', " + SELECT count(*) FROM pg_stat_statements + WHERE queryid NOT IN (SELECT queryid FROM aqo_queries) + AND query NOT LIKE '%aqo_quer%' +"); # PGSS logs all the same except queries with AQO-related objects. +is($res, 1); # allow to find shifts in PGSS logic + +# TODO: why queries in EXPLAIN ANALYZE mode have different query ID in AQO +# and PGSS extensions? + +$res = $node->safe_psql('postgres', " + SELECT count(*) FROM aqo_queries + WHERE queryid NOT IN (SELECT queryid FROM pg_stat_statements) +"); +is($res, 1); + +# only first entry in aqo_query_texts has zero hash +$res = $node->safe_psql('postgres', " + SELECT count(*) FROM aqo_query_texts + WHERE queryid = 0 +"); +is($res, 1); + +# TODO: check queries with queries in stored procedures + +$node->stop(); From 9595a940041ee2c3821e929dff3ef2ba8eae6b6a Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Fri, 7 Oct 2022 19:07:23 +0500 Subject: [PATCH 063/134] Fix the bug with serialization machinery. Change the AQO plan node behaviour: now we add such node also in the case when forced stat gathering option is enabled even in disabled mode. Also: 1. Do not add AQO info into an explain AQO sentence if AQO node isn't existed for the node. 2. Exclude node which aren't predicted by AQO from a cardinality error computation. Introduce 'parallel_workers' regression test. --- Makefile | 1 + expected/feature_subspace.out | 7 +- expected/look_a_like.out | 7 +- expected/parallel_workers.out | 125 ++++++++++++++++++++++++++++++++++ expected/unsupported.out | 8 +-- path_utils.c | 36 +++++----- postprocessing.c | 34 +++++---- sql/parallel_workers.sql | 61 +++++++++++++++++ 8 files changed, 235 insertions(+), 44 deletions(-) create mode 100644 expected/parallel_workers.out create mode 100644 sql/parallel_workers.sql diff --git a/Makefile b/Makefile index 0616b1b0..3cdf520d 100755 --- a/Makefile +++ b/Makefile @@ -23,6 +23,7 @@ REGRESS = aqo_disabled \ forced_stat_collection \ unsupported \ clean_aqo_data \ + parallel_workers \ plancache \ statement_timeout \ temp_tables \ diff --git a/expected/feature_subspace.out b/expected/feature_subspace.out index 185bede0..a49be254 100644 --- a/expected/feature_subspace.out +++ b/expected/feature_subspace.out @@ -29,19 +29,17 @@ WHERE str NOT LIKE '%Memory%'; AQO not used Merge Cond: (a.x = b.x) -> Sort (actual rows=10 loops=1) - AQO not used Sort Key: a.x -> Seq Scan on a (actual rows=10 loops=1) AQO not used -> Sort (actual rows=11 loops=1) - AQO not used Sort Key: b.x -> Seq Scan on b (actual rows=100 loops=1) AQO not used Using aqo: true AQO mode: LEARN JOINS: 0 -(16 rows) +(14 rows) -- TODO: Using method of other classes neighbours we get a bad estimation. SELECT str AS result @@ -56,13 +54,12 @@ WHERE str NOT LIKE '%Memory%'; -> Seq Scan on b (actual rows=100 loops=1) AQO: rows=100, error=0% -> Hash (actual rows=10 loops=1) - AQO not used -> Seq Scan on a (actual rows=10 loops=1) AQO: rows=10, error=0% Using aqo: true AQO mode: LEARN JOINS: 0 -(12 rows) +(11 rows) -- Look into the reason: two JOINs from different classes have the same FSS. SELECT to_char(d1.targets[1], 'FM999.00') AS target FROM aqo_data d1 diff --git a/expected/look_a_like.out b/expected/look_a_like.out index 9cba2c48..f3918dbf 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -148,7 +148,6 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; Output: a.x, b.y Merge Cond: (a.x = b.y) -> Sort (actual rows=1000 loops=1) - AQO not used Output: a.x Sort Key: a.x -> Seq Scan on public.a (actual rows=1000 loops=1) @@ -156,7 +155,6 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; Output: a.x Filter: (a.x < 10) -> Sort (actual rows=99901 loops=1) - AQO not used Output: b.y Sort Key: b.y -> Seq Scan on public.b (actual rows=1000 loops=1) @@ -165,7 +163,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; Using aqo: true AQO mode: LEARN JOINS: 0 -(22 rows) +(20 rows) -- cardinality 100 in Seq Scan on a and Seq Scan on b SELECT str AS result @@ -215,7 +213,6 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' Output: a.x Filter: (a.x < 10) -> Hash (actual rows=0 loops=1) - AQO not used Output: b.y -> Seq Scan on public.b (actual rows=0 loops=1) AQO: rows=1, error=100% @@ -225,7 +222,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' Using aqo: true AQO mode: LEARN JOINS: 0 -(19 rows) +(18 rows) RESET enable_material; DROP TABLE a,b CASCADE; diff --git a/expected/parallel_workers.out b/expected/parallel_workers.out new file mode 100644 index 00000000..14e086c8 --- /dev/null +++ b/expected/parallel_workers.out @@ -0,0 +1,125 @@ +-- Specifically test AQO machinery for queries uses partial paths and executed +-- with parallel workers. +CREATE EXTENSION aqo; +-- Utility tool. Allow to filter system-dependent strings from explain output. +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('%s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; +SET aqo.join_threshold = 0; +SET aqo.mode = 'learn'; +SET aqo.show_details = true; +-- Be generous with a number parallel workers to test the machinery +SET max_parallel_workers = 64; +SET max_parallel_workers_per_gather = 64; +-- Enforce usage of parallel workers +SET parallel_setup_cost = 0.1; +SET parallel_tuple_cost = 0.0001; +CREATE TABLE t AS ( + SELECT x AS id, repeat('a', 512) AS payload FROM generate_series(1, 1E5) AS x +); +ANALYZE t; +-- Simple test. Check serialization machinery mostly. +SELECT count(*) FROM t WHERE id % 100 = 0; -- Learning stage + count +------- + 1000 +(1 row) + +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) + SELECT count(*) FROM t WHERE id % 100 = 0;') AS str +WHERE str NOT LIKE '%Worker%'; + str +-------------------------------------------------------------------- + Finalize Aggregate (actual rows=1 loops=1) + AQO not used + -> Gather (actual rows=3 loops=1) + AQO not used + -> Partial Aggregate (actual rows=1 loops=3) + AQO not used + -> Parallel Seq Scan on t (actual rows=333 loops=3) + AQO: rows=1000, error=0% + Filter: ((id % '100'::numeric) = '0'::numeric) + Rows Removed by Filter: 33000 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(13 rows) + +-- More complex query just to provoke errors +SELECT count(*) FROM + (SELECT id FROM t WHERE id % 100 = 0 GROUP BY (id)) AS q1, + (SELECT max(id) AS id, payload FROM t + WHERE id % 101 = 0 GROUP BY (payload)) AS q2 +WHERE q1.id = q2.id; -- Learning stage + count +------- + 0 +(1 row) + +-- XXX: Why grouping prediction isn't working here? +SELECT str FROM expln(' +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) FROM + (SELECT id FROM t WHERE id % 100 = 0 GROUP BY (id)) AS q1, + (SELECT max(id) AS id, payload FROM t + WHERE id % 101 = 0 GROUP BY (payload)) AS q2 +WHERE q1.id = q2.id;') AS str +WHERE str NOT LIKE '%Workers%' AND str NOT LIKE '%Sort Method%'; + str +-------------------------------------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + AQO not used + -> Merge Join (actual rows=0 loops=1) + AQO not used + Merge Cond: (q2.id = t_1.id) + -> Sort (actual rows=1 loops=1) + Sort Key: q2.id + -> Subquery Scan on q2 (actual rows=1 loops=1) + AQO not used + -> Finalize GroupAggregate (actual rows=1 loops=1) + AQO not used + Group Key: t.payload + -> Gather Merge (actual rows=3 loops=1) + AQO not used + -> Partial GroupAggregate (actual rows=1 loops=3) + AQO not used + Group Key: t.payload + -> Sort (actual rows=330 loops=3) + AQO not used + Sort Key: t.payload + -> Parallel Seq Scan on t (actual rows=330 loops=3) + AQO: rows=991, error=0% + Filter: ((id % '101'::numeric) = '0'::numeric) + Rows Removed by Filter: 33003 + -> Group (actual rows=1000 loops=1) + AQO not used + Group Key: t_1.id + -> Gather Merge (actual rows=1000 loops=1) + AQO not used + -> Group (actual rows=333 loops=3) + AQO not used + Group Key: t_1.id + -> Sort (actual rows=333 loops=3) + AQO not used + Sort Key: t_1.id + -> Parallel Seq Scan on t t_1 (actual rows=333 loops=3) + AQO: rows=991, error=-1% + Filter: ((id % '100'::numeric) = '0'::numeric) + Rows Removed by Filter: 33000 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(42 rows) + +RESET parallel_tuple_cost; +RESET parallel_setup_cost; +RESET max_parallel_workers; +RESET max_parallel_workers_per_gather; +DROP TABLE t; +DROP FUNCTION expln; +DROP EXTENSION aqo; diff --git a/expected/unsupported.out b/expected/unsupported.out index 8d51a497..efbbc36b 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -375,7 +375,6 @@ SELECT count(*) FROM Filter: (x <> t_1.x) Rows Removed by Filter: 50 -> Hash (actual rows=851 loops=1) - AQO not used -> Seq Scan on t (actual rows=851 loops=1) AQO: rows=851, error=0% Filter: (((x % 3))::numeric < (SubPlan 1)) @@ -390,7 +389,7 @@ SELECT count(*) FROM Using aqo: true AQO mode: LEARN JOINS: 1 -(31 rows) +(30 rows) -- Two identical subplans in a clause EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) @@ -543,12 +542,11 @@ WHERE str NOT LIKE '%Heap Blocks%' AND str NOT LIKE '%Query Identifier%'; Filter: (t.x < 3) Rows Removed by Filter: 300 -> Bitmap Index Scan on ind2 (actual rows=350 loops=1) - AQO not used Index Cond: (mod(t.x, 3) = 1) Using aqo: true AQO mode: LEARN JOINS: 0 -(14 rows) +(13 rows) -- Best choice is ... ANALYZE t; @@ -577,7 +575,7 @@ ORDER BY (md5(query_text),error) DESC; -------+------------------------------------------------------------------------------------------------ 0.768 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; 0.070 | SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; - 1.416 | SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + 0.000 | SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; 0.000 | SELECT * FROM + | (SELECT * FROM t WHERE x < 0) AS t0 + | JOIN + diff --git a/path_utils.c b/path_utils.c index cd458c67..765dd189 100644 --- a/path_utils.c +++ b/path_utils.c @@ -62,9 +62,7 @@ create_aqo_plan_node() /* * Extract an AQO node from the plan private field. - * If no one node was found, return pointer to the default value or allocate new - * node (with default value) according to 'create' field. - * Can't return NULL value at all. + * If no one node was found, return pointer to the default value or return NULL. */ AQOPlanNode * get_aqo_plan_node(Plan *plan, bool create) @@ -89,7 +87,7 @@ get_aqo_plan_node(Plan *plan, bool create) if (node == NULL) { if (!create) - return &DefaultAQOPlanNode; + return NULL; node = create_aqo_plan_node(); plan->ext_nodes = lappend(plan->ext_nodes, node); @@ -475,9 +473,14 @@ is_appropriate_path(Path *path) } /* - * Converts path info into plan node for collecting it after query execution. + * Add AQO data into the plan node, if necessary. + * + * The necesssary case is when AQO is learning on this query, used for a + * prediction (and we will need the data to show prediction error at the end) or + * just to gather a plan statistics. * Don't switch here to any AQO-specific memory contexts, because we should - * store AQO prediction in the same context, as the plan. + * store AQO prediction in the same context, as the plan. So, explicitly free + * all unneeded data. */ void aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) @@ -489,7 +492,8 @@ aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) if (prev_create_plan_hook) prev_create_plan_hook(root, src, dest); - if (!query_context.use_aqo && !query_context.learn_aqo) + if (!query_context.use_aqo && !query_context.learn_aqo && + !query_context.collect_stat) return; is_join_path = (src->type == T_NestPath || src->type == T_MergePath || @@ -546,6 +550,11 @@ aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) } else { + /* + * In the case of forced stat gathering AQO must store fss as well as + * parallel divisor. Negative predicted cardinality field will be a sign + * that it is not a prediction, just statistics. + */ node->prediction = src->parent->predicted_cardinality; node->fss = src->parent->fss_hash; } @@ -618,11 +627,6 @@ AQOnodeOut(struct StringInfoData *str, const struct ExtensibleNode *enode) { AQOPlanNode *node = (AQOPlanNode *) enode; - node->had_path = false; - node->jointype = 0; - node->parallel_divisor = 1.0; - node->was_parametrized = false; - /* For Adaptive optimization DEBUG purposes */ WRITE_INT_FIELD(fss); WRITE_FLOAT_FIELD(prediction, "%.0f"); @@ -670,10 +674,10 @@ AQOnodeRead(struct ExtensibleNode *enode) const char *token; int length; - READ_BOOL_FIELD(had_path); - READ_ENUM_FIELD(jointype, JoinType); - READ_FLOAT_FIELD(parallel_divisor); - READ_BOOL_FIELD(was_parametrized); + local_node->had_path = false; + local_node->jointype = 0; + local_node->parallel_divisor = 1.0; + local_node->was_parametrized = false; local_node->rels = palloc0(sizeof(RelSortOut)); local_node->clauses = NIL; diff --git a/postprocessing.c b/postprocessing.c index 619d1c40..abbdcffd 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -109,13 +109,14 @@ learn_agg_sample(aqo_obj_stat *ctx, RelSortOut *rels, * Learn 'not executed' nodes only once, if no one another knowledge exists * for current feature subspace. */ - if (notExecuted && aqo_node->prediction > 0.) + if (notExecuted && aqo_node && aqo_node->prediction > 0.) return; target = log(learned); child_fss = get_fss_for_object(rels->signatures, ctx->clauselist, NIL, NULL,NULL); - fss = get_grouped_exprs_hash(child_fss, aqo_node->grouping_exprs); + fss = get_grouped_exprs_hash(child_fss, + aqo_node ? aqo_node->grouping_exprs : NIL); /* Critical section */ atomic_fss_learn_step(fs, fss, data, NULL, @@ -144,13 +145,13 @@ learn_sample(aqo_obj_stat *ctx, RelSortOut *rels, ctx->selectivities, &ncols, &features); /* Only Agg nodes can have non-empty a grouping expressions list. */ - Assert(!IsA(plan, Agg) || aqo_node->grouping_exprs != NIL); + Assert(!IsA(plan, Agg) || !aqo_node || aqo_node->grouping_exprs != NIL); /* * Learn 'not executed' nodes only once, if no one another knowledge exists * for current feature subspace. */ - if (notExecuted && aqo_node->prediction > 0) + if (notExecuted && aqo_node && aqo_node->prediction > 0) return; data = OkNNr_allocate(ncols); @@ -301,18 +302,18 @@ learn_subplan_recurse(PlanState *p, aqo_obj_stat *ctx) static bool should_learn(PlanState *ps, AQOPlanNode *node, aqo_obj_stat *ctx, - double predicted, double *nrows, double *rfactor) + double predicted, double nrows, double *rfactor) { if (ctx->isTimedOut) { - if (ctx->learn && *nrows > predicted * 1.2) + if (ctx->learn && nrows > predicted * 1.2) { /* This node s*/ if (aqo_show_details) elog(NOTICE, "[AQO] Learn on a plan node ("UINT64_FORMAT", %d), " "predicted rows: %.0lf, updated prediction: %.0lf", - query_context.query_hash, node->fss, predicted, *nrows); + query_context.query_hash, node->fss, predicted, nrows); *rfactor = RELIABILITY_MIN; return true; @@ -324,11 +325,11 @@ should_learn(PlanState *ps, AQOPlanNode *node, aqo_obj_stat *ctx, { /* This is much more reliable data. So we can correct our prediction. */ if (ctx->learn && aqo_show_details && - fabs(*nrows - predicted) / predicted > 0.2) + fabs(nrows - predicted) / predicted > 0.2) elog(NOTICE, "[AQO] Learn on a finished plan node ("UINT64_FORMAT", %d), " "predicted rows: %.0lf, updated prediction: %.0lf", - query_context.query_hash, node->fss, predicted, *nrows); + query_context.query_hash, node->fss, predicted, nrows); *rfactor = 0.9 * (RELIABILITY_MAX - RELIABILITY_MIN); return true; @@ -369,7 +370,12 @@ learnOnPlanState(PlanState *p, void *context) /* If something goes wrong, return quickly. */ return true; - aqo_node = get_aqo_plan_node(p->plan, false); + if ((aqo_node = get_aqo_plan_node(p->plan, false)) == NULL) + /* + * Skip the node even for error calculation. It can be incorrect in the + * case of parallel workers (parallel_divisor not known). + */ + goto end; /* * Compute real value of rows, passed through this node. Summarize rows @@ -475,7 +481,7 @@ learnOnPlanState(PlanState *p, void *context) /* * Some nodes inserts after planning step (See T_Hash node type). - * In this case we have'nt AQO prediction and fss record. + * In this case we haven't AQO prediction and fss record. */ if (aqo_node->had_path) { @@ -505,7 +511,7 @@ learnOnPlanState(PlanState *p, void *context) Assert(predicted >= 1. && learn_rows >= 1.); - if (should_learn(p, aqo_node, ctx, predicted, &learn_rows, &rfactor)) + if (should_learn(p, aqo_node, ctx, predicted, learn_rows, &rfactor)) { if (IsA(p, AggState)) learn_agg_sample(&SubplanCtx, @@ -521,6 +527,7 @@ learnOnPlanState(PlanState *p, void *context) } } +end: ctx->clauselist = list_concat(ctx->clauselist, SubplanCtx.clauselist); ctx->selectivities = list_concat(ctx->selectivities, SubplanCtx.selectivities); @@ -931,7 +938,8 @@ print_node_explain(ExplainState *es, PlanState *ps, Plan *plan) if (IsQueryDisabled() || !plan || es->format != EXPLAIN_FORMAT_TEXT) return; - aqo_node = get_aqo_plan_node(plan, false); + if ((aqo_node = get_aqo_plan_node(plan, false)) == NULL) + return; if (!aqo_show_details || !ps) goto explain_end; diff --git a/sql/parallel_workers.sql b/sql/parallel_workers.sql new file mode 100644 index 00000000..3fbccb48 --- /dev/null +++ b/sql/parallel_workers.sql @@ -0,0 +1,61 @@ +-- Specifically test AQO machinery for queries uses partial paths and executed +-- with parallel workers. + +CREATE EXTENSION aqo; + +-- Utility tool. Allow to filter system-dependent strings from explain output. +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('%s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; + +SET aqo.join_threshold = 0; +SET aqo.mode = 'learn'; +SET aqo.show_details = true; + +-- Be generous with a number parallel workers to test the machinery +SET max_parallel_workers = 64; +SET max_parallel_workers_per_gather = 64; +-- Enforce usage of parallel workers +SET parallel_setup_cost = 0.1; +SET parallel_tuple_cost = 0.0001; + +CREATE TABLE t AS ( + SELECT x AS id, repeat('a', 512) AS payload FROM generate_series(1, 1E5) AS x +); +ANALYZE t; + +-- Simple test. Check serialization machinery mostly. +SELECT count(*) FROM t WHERE id % 100 = 0; -- Learning stage +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) + SELECT count(*) FROM t WHERE id % 100 = 0;') AS str +WHERE str NOT LIKE '%Worker%'; + +-- More complex query just to provoke errors +SELECT count(*) FROM + (SELECT id FROM t WHERE id % 100 = 0 GROUP BY (id)) AS q1, + (SELECT max(id) AS id, payload FROM t + WHERE id % 101 = 0 GROUP BY (payload)) AS q2 +WHERE q1.id = q2.id; -- Learning stage +-- XXX: Why grouping prediction isn't working here? +SELECT str FROM expln(' +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) FROM + (SELECT id FROM t WHERE id % 100 = 0 GROUP BY (id)) AS q1, + (SELECT max(id) AS id, payload FROM t + WHERE id % 101 = 0 GROUP BY (payload)) AS q2 +WHERE q1.id = q2.id;') AS str +WHERE str NOT LIKE '%Workers%' AND str NOT LIKE '%Sort Method%'; + + +RESET parallel_tuple_cost; +RESET parallel_setup_cost; +RESET max_parallel_workers; +RESET max_parallel_workers_per_gather; +DROP TABLE t; +DROP FUNCTION expln; +DROP EXTENSION aqo; From 56dcbb7e12609fbaed9eaa23db7a22f320d34f04 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Mon, 10 Oct 2022 16:28:53 +0500 Subject: [PATCH 064/134] Github actions CI: change code of an AQO branch name designation. Now it should work on push trigger as well as on a pull request [1]. [1] https://stackoverflow.com/questions/58033366/how-to-get-the-current-branch-within-github-actions --- .github/workflows/c-cpp.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index a66f06ba..ba4001f6 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -6,6 +6,9 @@ on: pull_request: branches: [ stable14 ] +env: + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + jobs: build: @@ -14,7 +17,7 @@ jobs: steps: - name: pg run: | - echo "Deploying to production server on branch $GITHUB_REF" + echo "Deploying to production server on branch" $BRANCH_NAME git config --global user.email "ci@postgrespro.ru" git config --global user.name "CI PgPro admin" git clone https://github.com/postgres/postgres.git pg @@ -22,7 +25,7 @@ jobs: git checkout REL_14_STABLE ./configure --prefix=`pwd`/tmp_install CFLAGS="-O3" git clone https://github.com/postgrespro/aqo.git contrib/aqo - git -C contrib/aqo checkout $GITHUB_REF + git -C contrib/aqo checkout $BRANCH_NAME patch -p1 --no-backup-if-mismatch < contrib/aqo/aqo_pg14.patch make -j4 > /dev/null && make -j4 -C contrib > /dev/null env CLIENTS=50 THREADS=50 make -C contrib/aqo check From e49f2fd29d075f8742d1103d49b0f94ef8ad55b8 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Mon, 10 Oct 2022 16:41:39 +0500 Subject: [PATCH 065/134] Bugfix. Incorrect pointer shift during reading from learn_cache. --- learn_cache.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/learn_cache.c b/learn_cache.c index 74b72249..67590e5d 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -127,6 +127,12 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids) } } + /* + * Kludge code. But we should rewrite this code because now all knowledge + * base lives in non-transactional shared memory. + */ + ptr = (char *) hdr + sizeof(dsm_block_hdr) + (sizeof(double) * data->cols * aqo_K); + /* copy targets into DSM storage */ memcpy(ptr, data->targets, sizeof(double) * hdr->rows); ptr += sizeof(double) * aqo_K; From de8cb2a210432a4e894661dc583562523f37052c Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 12 Oct 2022 14:40:25 +0500 Subject: [PATCH 066/134] Unify elog messages for the case of corrupted hash table. --- learn_cache.c | 4 ++-- storage.c | 22 +++++++++++----------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/learn_cache.c b/learn_cache.c index 67590e5d..3249fe3a 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -293,7 +293,7 @@ lc_flush_data(void) aqo_data_store(hdr->key.fs, hdr->key.fss, &data, reloids); if (!hash_search(fss_htab, (void *) &hdr->key, HASH_REMOVE, NULL)) - elog(ERROR, "[AQO] Flush: local ML cache is corrupted."); + elog(PANIC, "[AQO] Flush: local ML cache is corrupted."); } reset_dsm_cache(); @@ -323,7 +323,7 @@ lc_assign_hook(bool newval, void *extra) while ((entry = (htab_entry *) hash_seq_search(&status)) != NULL) { if (!hash_search(fss_htab, (void *) &entry->key, HASH_REMOVE, NULL)) - elog(ERROR, "[AQO] The local ML cache is corrupted."); + elog(PANIC, "[AQO] The local ML cache is corrupted."); } LWLockRelease(&aqo_state->lock); } diff --git a/storage.c b/storage.c index 29d4ef58..28375f65 100644 --- a/storage.c +++ b/storage.c @@ -388,8 +388,8 @@ aqo_stat_reset(void) hash_seq_init(&hash_seq, stat_htab); while ((entry = hash_seq_search(&hash_seq)) != NULL) { - if (hash_search(stat_htab, &entry->queryid, HASH_REMOVE, NULL) == NULL) - elog(ERROR, "[AQO] hash table corrupted"); + if (!hash_search(stat_htab, &entry->queryid, HASH_REMOVE, NULL)) + elog(PANIC, "[AQO] hash table corrupted"); num_remove++; } aqo_state->stat_changed = true; @@ -1225,7 +1225,7 @@ _aqo_data_remove(data_key *key) dsa_free(data_dsa, entry->data_dp); entry->data_dp = InvalidDsaPointer; - if (hash_search(data_htab, key, HASH_REMOVE, NULL) == NULL) + if (!hash_search(data_htab, key, HASH_REMOVE, NULL)) elog(PANIC, "[AQO] Inconsistent data hash table"); aqo_state->data_changed = true; @@ -1256,8 +1256,8 @@ aqo_qtexts_reset(void) Assert(DsaPointerIsValid(entry->qtext_dp)); dsa_free(qtext_dsa, entry->qtext_dp); - if (hash_search(qtexts_htab, &entry->queryid, HASH_REMOVE, NULL) == NULL) - elog(ERROR, "[AQO] hash table corrupted"); + if (!hash_search(qtexts_htab, &entry->queryid, HASH_REMOVE, NULL)) + elog(PANIC, "[AQO] hash table corrupted"); num_remove++; } aqo_state->qtexts_changed = true; @@ -1718,8 +1718,8 @@ _aqo_data_clean(uint64 fs) Assert(DsaPointerIsValid(entry->data_dp)); dsa_free(data_dsa, entry->data_dp); entry->data_dp = InvalidDsaPointer; - if (hash_search(data_htab, &entry->key, HASH_REMOVE, NULL) == NULL) - elog(ERROR, "[AQO] hash table corrupted"); + if (!hash_search(data_htab, &entry->key, HASH_REMOVE, NULL)) + elog(PANIC, "[AQO] hash table corrupted"); removed++; } @@ -1745,8 +1745,8 @@ aqo_data_reset(void) { Assert(DsaPointerIsValid(entry->data_dp)); dsa_free(data_dsa, entry->data_dp); - if (hash_search(data_htab, &entry->key, HASH_REMOVE, NULL) == NULL) - elog(ERROR, "[AQO] hash table corrupted"); + if (!hash_search(data_htab, &entry->key, HASH_REMOVE, NULL)) + elog(PANIC, "[AQO] hash table corrupted"); num_remove++; } @@ -1885,8 +1885,8 @@ aqo_queries_reset(void) /* Don't remove default feature space */ continue; - if (hash_search(queries_htab, &entry->queryid, HASH_REMOVE, NULL) == NULL) - elog(ERROR, "[AQO] hash table corrupted"); + if (!hash_search(queries_htab, &entry->queryid, HASH_REMOVE, NULL)) + elog(PANIC, "[AQO] hash table corrupted"); num_remove++; } From fc800224c583308e0aba4c17319a396d04071167 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Fri, 14 Oct 2022 09:43:07 +0500 Subject: [PATCH 067/134] A couple of bugfixes here: don't enable statement timeout in parallel worker and minor DSM cache fix. --- learn_cache.c | 10 ++++++++-- postprocessing.c | 7 +++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/learn_cache.c b/learn_cache.c index 3249fe3a..2fc6644a 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -227,7 +227,7 @@ init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **reloids) Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE) || LWLockHeldByMeInMode(&aqo_state->lock, LW_SHARED)); Assert(hdr->magic == AQO_SHARED_MAGIC); - Assert(hdr && ptr); + Assert(hdr && ptr && hdr->rows > 0); data->rows = hdr->rows; data->cols = hdr->cols; @@ -245,6 +245,12 @@ init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **reloids) } } + /* + * Kludge code. But we should rewrite this code because now all knowledge + * base lives in non-transactional shared memory. + */ + ptr = (char *) hdr + sizeof(dsm_block_hdr) + (sizeof(double) * data->cols * aqo_K); + memcpy(data->targets, ptr, sizeof(double) * hdr->rows); ptr += sizeof(double) * aqo_K; memcpy(data->rfactors, ptr, sizeof(double) * hdr->rows); @@ -261,7 +267,7 @@ init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **reloids) return calculate_size(hdr->cols, *reloids); } - /* It is just read operation. No any interest in size calculation. */ + /* It is just a read operation. No any interest in size calculation. */ return 0; } diff --git a/postprocessing.c b/postprocessing.c index abbdcffd..0e8ccd50 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -638,6 +638,13 @@ set_timeout_if_need(QueryDesc *queryDesc) { TimestampTz fin_time; + if (IsParallelWorker()) + /* + * AQO timeout should stop only main worker. Other workers would be + * terminated by a regular ERROR machinery. + */ + return false; + if (!get_timeout_active(STATEMENT_TIMEOUT) || !aqo_learn_statement_timeout) return false; From 58ea474bd00265602c03b2b0051d3d2893fd675f Mon Sep 17 00:00:00 2001 From: Alexander Pyhalov Date: Fri, 7 Oct 2022 07:58:59 +0300 Subject: [PATCH 068/134] Extract info from a Foreign Join plan node. --- expected/aqo_fdw.out | 45 +++++++++++++++++++++++++++++++--- path_utils.c | 58 +++++++++++++++++++++++++++++++++++++++++--- sql/aqo_fdw.sql | 21 +++++++++++++++- 3 files changed, 116 insertions(+), 8 deletions(-) diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out index 74849914..bd13be82 100644 --- a/expected/aqo_fdw.out +++ b/expected/aqo_fdw.out @@ -110,7 +110,7 @@ SELECT str FROM expln(' JOINS: 0 (6 rows) --- TODO: Should learn on postgres_fdw nodes +-- Should learn on postgres_fdw nodes SELECT str FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; @@ -118,7 +118,7 @@ SELECT str FROM expln(' str -------------------------------------------------------------------------------------------------------- Foreign Scan (actual rows=1 loops=1) - AQO not used + AQO: rows=1, error=0% Output: a.x, b.x Relations: (public.frgn a) INNER JOIN (public.frgn b) Remote SQL: SELECT r1.x, r2.x FROM (public.local r1 INNER JOIN public.local r2 ON (((r1.x = r2.x)))) @@ -127,6 +127,39 @@ SELECT str FROM expln(' JOINS: 0 (8 rows) +CREATE TABLE local_a(aid int primary key, aval text); +CREATE TABLE local_b(bid int primary key, aid int references local_a(aid), bval text); +INSERT INTO local_a SELECT i, 'val_' || i FROM generate_series(1,100) i; +INSERT INTO local_b SELECT i, mod((i+random()*10)::numeric, 10) + 1, 'val_' || i FROM generate_series(1,1000) i; +ANALYZE local_a, local_b; +CREATE FOREIGN TABLE frgn_a(aid int, aval text) SERVER loopback OPTIONS (table_name 'local_a'); +CREATE FOREIGN TABLE frgn_b(bid int, aid int, bval text) SERVER loopback OPTIONS (table_name 'local_b'); +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * from frgn_a AS a, frgn_b AS b +WHERE a.aid = b.aid AND b.bval like 'val%'; + QUERY PLAN +----------------------------------------------- + Foreign Scan (actual rows=1000 loops=1) + AQO not used + Relations: (frgn_a a) INNER JOIN (frgn_b b) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(6 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * from frgn_a AS a, frgn_b AS b +WHERE a.aid = b.aid AND b.bval like 'val%'; + QUERY PLAN +----------------------------------------------- + Foreign Scan (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Relations: (frgn_a a) INNER JOIN (frgn_b b) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(6 rows) + -- TODO: Non-mergejoinable join condition. EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM frgn AS a, frgn AS b WHERE a.xfdwid, FDW_MISSING_OK); + if (!fdw || !fdw->fdwname) + return false; + + if (strcmp(fdw->fdwname, "postgres_fdw") != 0) + return false; + + return true; +} + /* * Extract an AQO node from the plan private field. * If no one node was found, return pointer to the default value or return NULL. @@ -497,7 +524,8 @@ aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) return; is_join_path = (src->type == T_NestPath || src->type == T_MergePath || - src->type == T_HashPath); + src->type == T_HashPath || + (src->type == T_ForeignPath && IS_JOIN_REL(src->parent))); node = get_aqo_plan_node(plan, true); @@ -513,8 +541,32 @@ aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) if (is_join_path) { - node->clauses = aqo_get_clauses(root, ((JoinPath *) src)->joinrestrictinfo); - node->jointype = ((JoinPath *) src)->jointype; + if (IsA(src, ForeignPath)) + { + PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) src->parent->fdw_private; + List *restrictclauses = NIL; + + if (!fpinfo) + return; + + /* We have to ensure that this is postgres_fdw ForeignPath */ + if (!is_postgres_fdw_server(src->parent->serverid)) + return; + + restrictclauses = list_concat(restrictclauses, fpinfo->joinclauses); + restrictclauses = list_concat(restrictclauses, fpinfo->remote_conds); + restrictclauses = list_concat(restrictclauses, fpinfo->local_conds); + + node->clauses = aqo_get_clauses(root, restrictclauses); + node->jointype = fpinfo->jointype; + + list_free(restrictclauses); + } + else + { + node->clauses = aqo_get_clauses(root, ((JoinPath *) src)->joinrestrictinfo); + node->jointype = ((JoinPath *) src)->jointype; + } } else if (IsA(src, AggPath)) /* Aggregation node must store grouping clauses. */ diff --git a/sql/aqo_fdw.sql b/sql/aqo_fdw.sql index da1639d9..f225a107 100644 --- a/sql/aqo_fdw.sql +++ b/sql/aqo_fdw.sql @@ -61,12 +61,29 @@ SELECT str FROM expln(' SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; ') AS str WHERE str NOT LIKE '%Sort Method%'; --- TODO: Should learn on postgres_fdw nodes +-- Should learn on postgres_fdw nodes SELECT str FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; ') AS str WHERE str NOT LIKE '%Query Identifier%'; +CREATE TABLE local_a(aid int primary key, aval text); +CREATE TABLE local_b(bid int primary key, aid int references local_a(aid), bval text); +INSERT INTO local_a SELECT i, 'val_' || i FROM generate_series(1,100) i; +INSERT INTO local_b SELECT i, mod((i+random()*10)::numeric, 10) + 1, 'val_' || i FROM generate_series(1,1000) i; +ANALYZE local_a, local_b; + +CREATE FOREIGN TABLE frgn_a(aid int, aval text) SERVER loopback OPTIONS (table_name 'local_a'); +CREATE FOREIGN TABLE frgn_b(bid int, aid int, bval text) SERVER loopback OPTIONS (table_name 'local_b'); + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * from frgn_a AS a, frgn_b AS b +WHERE a.aid = b.aid AND b.bval like 'val%'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * from frgn_a AS a, frgn_b AS b +WHERE a.aid = b.aid AND b.bval like 'val%'; + -- TODO: Non-mergejoinable join condition. EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM frgn AS a, frgn AS b WHERE a.x Date: Thu, 13 Oct 2022 16:25:01 +0300 Subject: [PATCH 069/134] Add tests on partitioned tables with foreign partitions. --- expected/aqo_fdw.out | 83 ++++++++++++++++++++++++++++++++++++++++++++ sql/aqo_fdw.sql | 44 ++++++++++++++++++++++- 2 files changed, 126 insertions(+), 1 deletion(-) diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out index bd13be82..a52ba851 100644 --- a/expected/aqo_fdw.out +++ b/expected/aqo_fdw.out @@ -160,6 +160,89 @@ WHERE a.aid = b.aid AND b.bval like 'val%'; JOINS: 0 (6 rows) +-- Partitioned join over foreign tables +set enable_partitionwise_join = on; +ALTER SERVER loopback OPTIONS (ADD fdw_tuple_cost '1.0'); +CREATE TABLE local_main_p0(aid int, aval text); +CREATE TABLE local_main_p1(aid int, aval text); +CREATE TABLE main (aid int, aval text) PARTITION BY HASH(aid); +CREATE FOREIGN TABLE main_p0 PARTITION OF main FOR VALUES WITH (MODULUS 3, REMAINDER 0) + SERVER loopback OPTIONS (table_name 'local_main_p0'); +CREATE FOREIGN TABLE main_p1 PARTITION OF main FOR VALUES WITH (MODULUS 3, REMAINDER 1) + SERVER loopback OPTIONS (table_name 'local_main_p1'); +CREATE TABLE main_p2 PARTITION OF main FOR VALUES WITH (MODULUS 3, REMAINDER 2); +CREATE TABLE local_ref_p0(bid int, aid int, bval text); +CREATE TABLE local_ref_p1(bid int, aid int, bval text); +CREATE TABLE ref (bid int, aid int, bval text) PARTITION BY HASH(aid); +CREATE FOREIGN TABLE ref_p0 PARTITION OF ref FOR VALUES WITH (MODULUS 3, REMAINDER 0) + SERVER loopback OPTIONS (table_name 'local_ref_p0'); +CREATE FOREIGN TABLE ref_p1 PARTITION OF ref FOR VALUES WITH (MODULUS 3, REMAINDER 1) + SERVER loopback OPTIONS (table_name 'local_ref_p1'); +CREATE TABLE ref_p2 PARTITION OF ref FOR VALUES WITH (MODULUS 3, REMAINDER 2); +INSERT INTO main SELECT i, 'val_' || i FROM generate_series(1,100) i; +INSERT INTO ref SELECT i, mod(i, 10) + 1, 'val_' || i FROM generate_series(1,1000) i; +ANALYZE local_main_p0, local_main_p1, main_p2; +ANALYZE local_ref_p0, local_ref_p1, ref_p2; +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * from main AS a, ref AS b +WHERE a.aid = b.aid AND b.bval like 'val%'; + QUERY PLAN +-------------------------------------------------------------------- + Append (actual rows=1000 loops=1) + AQO not used + -> Foreign Scan (actual rows=400 loops=1) + AQO not used + Relations: (main_p0 a_1) INNER JOIN (ref_p0 b_1) + -> Foreign Scan (actual rows=300 loops=1) + AQO not used + Relations: (main_p1 a_2) INNER JOIN (ref_p1 b_2) + -> Hash Join (actual rows=300 loops=1) + AQO not used + Hash Cond: (b_3.aid = a_3.aid) + -> Seq Scan on ref_p2 b_3 (actual rows=300 loops=1) + AQO not used + Filter: (bval ~~ 'val%'::text) + -> Hash (actual rows=38 loops=1) + Buckets: 1024 Batches: 1 Memory Usage: 10kB + -> Seq Scan on main_p2 a_3 (actual rows=38 loops=1) + AQO not used + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(21 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * from main AS a, ref AS b +WHERE a.aid = b.aid AND b.bval like 'val%'; + QUERY PLAN +-------------------------------------------------------------------- + Append (actual rows=1000 loops=1) + AQO not used + -> Foreign Scan (actual rows=400 loops=1) + AQO: rows=400, error=0% + Relations: (main_p0 a_1) INNER JOIN (ref_p0 b_1) + -> Foreign Scan (actual rows=300 loops=1) + AQO: rows=300, error=0% + Relations: (main_p1 a_2) INNER JOIN (ref_p1 b_2) + -> Hash Join (actual rows=300 loops=1) + AQO: rows=300, error=0% + Hash Cond: (b_3.aid = a_3.aid) + -> Seq Scan on ref_p2 b_3 (actual rows=300 loops=1) + AQO: rows=300, error=0% + Filter: (bval ~~ 'val%'::text) + -> Hash (actual rows=38 loops=1) + Buckets: 1024 Batches: 1 Memory Usage: 10kB + -> Seq Scan on main_p2 a_3 (actual rows=38 loops=1) + AQO: rows=38, error=0% + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(21 rows) + +DROP TABLE main, local_main_p0, local_main_p1; +DROP TABLE ref, local_ref_p0, local_ref_p1; +ALTER SERVER loopback OPTIONS (DROP fdw_tuple_cost); +reset enable_partitionwise_join; -- TODO: Non-mergejoinable join condition. EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM frgn AS a, frgn AS b WHERE a.x Date: Fri, 14 Oct 2022 14:32:22 +0500 Subject: [PATCH 070/134] restore_selectivities: avoid links to restrictinfo selectivity field: it can be freed or changed externally --- postprocessing.c | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/postprocessing.c b/postprocessing.c index 0e8ccd50..75a61707 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -176,7 +176,6 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, int nargs; int *args_hash; int *eclass_hash; - double *cur_sel; int cur_hash; int cur_relid; @@ -191,30 +190,29 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, foreach(l, clauselist) { RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); + Selectivity *cur_sel = NULL; - cur_sel = NULL; if (parametrized_sel) { cur_hash = get_clause_hash(rinfo->clause, nargs, args_hash, eclass_hash); cur_sel = selectivity_cache_find_global_relid(cur_hash, cur_relid); - if (cur_sel == NULL) - { - if (join_type == JOIN_INNER) - cur_sel = &rinfo->norm_selec; - else - cur_sel = &rinfo->outer_selec; - } } - else if (join_type == JOIN_INNER) - cur_sel = &rinfo->norm_selec; - else - cur_sel = &rinfo->outer_selec; - if (*cur_sel < 0) - *cur_sel = 0; + if (cur_sel == NULL) + { + cur_sel = palloc(sizeof(double)); + + if (join_type == JOIN_INNER) + *cur_sel = rinfo->norm_selec; + else + *cur_sel = rinfo->outer_selec; + + if (*cur_sel < 0) + *cur_sel = 0; + } - Assert(cur_sel > 0); + Assert(*cur_sel >= 0); lst = lappend(lst, cur_sel); } From d49363fedd6d45b217046e7212b097d9c9f9ba6a Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Mon, 17 Oct 2022 13:52:23 +0500 Subject: [PATCH 071/134] Bugfix. Do not delete AQO ML data file after loading into memory. --- storage.c | 1 - 1 file changed, 1 deletion(-) diff --git a/storage.c b/storage.c index 28375f65..4825b54f 100644 --- a/storage.c +++ b/storage.c @@ -926,7 +926,6 @@ data_load(const char *filename, deform_record_t callback, void *ctx) } FreeFile(file); - unlink(filename); elog(LOG, "[AQO] %ld records loaded from file %s.", num, filename); return; From e5946accf3fdbe976d2c9b9cc0032671e34b69b1 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 20 Oct 2022 09:36:21 +0500 Subject: [PATCH 072/134] Add schedule for regression tests instead of list of tests in the REGRESS variable. The real reason is to successfully pass the statement_timeout test in very slow environments. We must inialize REGRESS. So, add an empty dummy test just to define the variable. regress_schedule contains the full list of real tests. So all changes for real tests will be made in a general way in regress_schedule. Authors: a.lepikhov, m.polyakova. --- Makefile | 24 ++++-------------------- expected/aqo_dummy_test.out | 0 regress_schedule | 22 ++++++++++++++++++++++ sql/aqo_dummy_test.sql | 0 4 files changed, 26 insertions(+), 20 deletions(-) create mode 100644 expected/aqo_dummy_test.out create mode 100644 regress_schedule create mode 100644 sql/aqo_dummy_test.sql diff --git a/Makefile b/Makefile index 3cdf520d..b07d7f86 100755 --- a/Makefile +++ b/Makefile @@ -11,26 +11,10 @@ OBJS = $(WIN32RES) \ TAP_TESTS = 1 -REGRESS = aqo_disabled \ - aqo_controlled \ - aqo_intelligent \ - aqo_forced \ - aqo_learn \ - schema \ - aqo_fdw \ - aqo_CVE-2020-14350 \ - gucs \ - forced_stat_collection \ - unsupported \ - clean_aqo_data \ - parallel_workers \ - plancache \ - statement_timeout \ - temp_tables \ - top_queries \ - relocatable\ - look_a_like \ - feature_subspace +# Use an empty dummy test to define the variable REGRESS and therefore run all +# regression tests. regress_schedule contains the full list of real tests. +REGRESS = aqo_dummy_test +REGRESS_OPTS = --schedule=$(srcdir)/regress_schedule fdw_srcdir = $(top_srcdir)/contrib/postgres_fdw stat_srcdir = $(top_srcdir)/contrib/pg_stat_statements diff --git a/expected/aqo_dummy_test.out b/expected/aqo_dummy_test.out new file mode 100644 index 00000000..e69de29b diff --git a/regress_schedule b/regress_schedule new file mode 100644 index 00000000..418e14ec --- /dev/null +++ b/regress_schedule @@ -0,0 +1,22 @@ +test: aqo_disabled +test: aqo_controlled +test: aqo_intelligent +test: aqo_forced +test: aqo_learn +test: schema +test: aqo_fdw +test: aqo_CVE-2020-14350 +test: gucs +test: forced_stat_collection +test: unsupported +test: clean_aqo_data +test: parallel_workers +test: plancache +# Performance-dependent test. Can be ignored if executes in containers or on slow machines +ignore: statement_timeout +test: statement_timeout +test: temp_tables +test: top_queries +test: relocatable +test: look_a_like +test: feature_subspace diff --git a/sql/aqo_dummy_test.sql b/sql/aqo_dummy_test.sql new file mode 100644 index 00000000..e69de29b From afdc59ec9a491be26651cce9ed433c482e0bc7e5 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 20 Oct 2022 11:35:21 +0500 Subject: [PATCH 073/134] Update github actions file (c-cpp.yml): 1. Enable TAP-tests 2. Add some useful options for configure and build stages. 3. Parameterize github CI, just to reduce code duplication. Authors: m.polyakova (mostly), and a.lepikhov. --- .github/workflows/c-cpp.yml | 9 +++++++-- t/001_pgbench.pl | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index ba4001f6..7bab079d 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -17,13 +17,18 @@ jobs: steps: - name: pg run: | + sudo apt install libipc-run-perl + echo "Deploying to production server on branch" $BRANCH_NAME git config --global user.email "ci@postgrespro.ru" git config --global user.name "CI PgPro admin" + export COPT=-Werror + export CONFIGURE_OPTS="--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" git clone https://github.com/postgres/postgres.git pg cd pg + git checkout REL_14_STABLE - ./configure --prefix=`pwd`/tmp_install CFLAGS="-O3" + ./configure $CONFIGURE_OPTS CFLAGS="-O3" git clone https://github.com/postgrespro/aqo.git contrib/aqo git -C contrib/aqo checkout $BRANCH_NAME patch -p1 --no-backup-if-mismatch < contrib/aqo/aqo_pg14.patch @@ -33,6 +38,6 @@ jobs: echo "Use AQO with debug code included" git clean -fdx git -C contrib/aqo clean -fdx - ./configure --prefix=`pwd`/tmp_install CFLAGS="-DAQO_DEBUG_PRINT -O0" + ./configure $CONFIGURE_OPTS CFLAGS="-DAQO_DEBUG_PRINT -O0" make -j4 > /dev/null && make -j4 -C contrib > /dev/null env CLIENTS=50 THREADS=50 make -C contrib/aqo check diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index eae0c829..3aa3b7b5 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -335,7 +335,7 @@ "SELECT sum(executions_with_aqo) FROM test.aqo_query_stat"); # 25 trans * 10 clients * 4 query classes = 1000 + unique SELECT to pgbench_branches -is($res, 1001, 'Each query should be logged in LEARN mode'); +is($res, $CLIENTS*100+1, 'Each query should be logged in LEARN mode'); $res = $node->safe_psql('postgres', "SELECT sum(executions_without_aqo) FROM test.aqo_query_stat"); is($res, 0, 'AQO has learned on the queries - 2'); From c7fc6794c3e7f87a0a9ea2df43cea26af961da0e Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Tue, 8 Nov 2022 10:25:54 +0300 Subject: [PATCH 074/134] Fix aqo.fs_max_items, add.fss_max_items. Set GucContext as PGC_POSTMASTER to allow values to be changed only before the instance is started. --- aqo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aqo.c b/aqo.c index 3b11bebc..6d616e6c 100644 --- a/aqo.c +++ b/aqo.c @@ -248,7 +248,7 @@ _PG_init(void) &fs_max_items, 10000, 1, INT_MAX, - PGC_SUSET, + PGC_POSTMASTER, 0, NULL, NULL, @@ -261,7 +261,7 @@ _PG_init(void) &fss_max_items, 100000, 0, INT_MAX, - PGC_SUSET, + PGC_POSTMASTER, 0, NULL, NULL, From 6ee752a58591005383f96f23b83aaaa89eac30ff Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Tue, 18 Oct 2022 11:45:26 +0300 Subject: [PATCH 075/134] Add compute_query_id parameter in aqo configure with value as regress. It is necessary for avoiding output Query Identifier while vanille's test are running. (Look at more in explain.c:612. We can get fail condition if only query identifier is not null) Where clause 'NOT LIKE '%Query Identifier%'' is throwed away due to being necessary any more. This addition parameter is appeared if we set compute_query_id parameter with value as 'auto'. Appearance of the parameter is checked in only gucs test. --- aqo.conf | 3 ++- expected/aqo_fdw.out | 8 ++++---- expected/gucs.out | 17 +++++++++++------ expected/look_a_like.out | 16 ++++++---------- expected/unsupported.out | 2 +- sql/aqo_fdw.sql | 8 ++++---- sql/gucs.sql | 11 +++++++---- sql/look_a_like.sql | 16 ++++++---------- sql/unsupported.sql | 2 +- 9 files changed, 42 insertions(+), 41 deletions(-) diff --git a/aqo.conf b/aqo.conf index b53b5a5d..586f5147 100644 --- a/aqo.conf +++ b/aqo.conf @@ -1,4 +1,5 @@ autovacuum = off shared_preload_libraries = 'postgres_fdw, aqo' max_parallel_maintenance_workers = 1 # switch off parallel workers because of unsteadiness -aqo.wide_search = 'on' \ No newline at end of file +aqo.wide_search = 'on' +compute_query_id = 'regress' \ No newline at end of file diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out index a52ba851..b05be251 100644 --- a/expected/aqo_fdw.out +++ b/expected/aqo_fdw.out @@ -57,7 +57,7 @@ SELECT x FROM frgn; SELECT str FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) SELECT x FROM frgn WHERE x < 10; -') AS str WHERE str NOT LIKE '%Query Identifier%'; +') AS str; str ----------------------------------------------------------- Foreign Scan on public.frgn (actual rows=1 loops=1) @@ -72,7 +72,7 @@ SELECT str FROM expln(' SELECT str FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) SELECT x FROM frgn WHERE x < 10; -') AS str WHERE str NOT LIKE '%Query Identifier%'; +') AS str; str ----------------------------------------------------------- Foreign Scan on public.frgn (actual rows=1 loops=1) @@ -114,7 +114,7 @@ SELECT str FROM expln(' SELECT str FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; -') AS str WHERE str NOT LIKE '%Query Identifier%'; +') AS str; str -------------------------------------------------------------------------------------------------------- Foreign Scan (actual rows=1 loops=1) @@ -259,7 +259,7 @@ SELECT * FROM frgn AS a, frgn AS b WHERE a.x 10 group by(x);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE '%Memory%'; result ---------------------------------------------------------- HashAggregate (actual rows=0 loops=1) @@ -200,7 +196,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' SELECT x,y FROM A,B WHERE x < 10 and y > 10 AND A.x = B.y;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' +WHERE str NOT LIKE '%Memory%' ; result ---------------------------------------------------------- diff --git a/expected/unsupported.out b/expected/unsupported.out index efbbc36b..f8b15cc5 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -530,7 +530,7 @@ SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; SELECT str FROM expln(' EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1') AS str -WHERE str NOT LIKE '%Heap Blocks%' AND str NOT LIKE '%Query Identifier%'; +WHERE str NOT LIKE '%Heap Blocks%'; str ----------------------------------------------------------------- Aggregate (actual rows=1 loops=1) diff --git a/sql/aqo_fdw.sql b/sql/aqo_fdw.sql index fcfc535b..186ba9e0 100644 --- a/sql/aqo_fdw.sql +++ b/sql/aqo_fdw.sql @@ -47,11 +47,11 @@ SELECT x FROM frgn; SELECT str FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) SELECT x FROM frgn WHERE x < 10; -') AS str WHERE str NOT LIKE '%Query Identifier%'; +') AS str; SELECT str FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) SELECT x FROM frgn WHERE x < 10; -') AS str WHERE str NOT LIKE '%Query Identifier%'; +') AS str; EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT x FROM frgn WHERE x < -10; -- AQO ignores constants @@ -65,7 +65,7 @@ SELECT str FROM expln(' SELECT str FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; -') AS str WHERE str NOT LIKE '%Query Identifier%'; +') AS str; CREATE TABLE local_a(aid int primary key, aval text); CREATE TABLE local_b(bid int primary key, aid int references local_a(aid), bval text); @@ -133,7 +133,7 @@ SELECT * FROM frgn AS a, frgn AS b WHERE a.x 10 group by(x);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE '%Memory%'; -- -- TODO: @@ -75,7 +71,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' SELECT x,y FROM A,B WHERE x < 10 and y > 10 AND A.x = B.y;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' +WHERE str NOT LIKE '%Memory%' ; RESET enable_material; diff --git a/sql/unsupported.sql b/sql/unsupported.sql index fcf0896f..db673788 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -165,7 +165,7 @@ SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; SELECT str FROM expln(' EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1') AS str -WHERE str NOT LIKE '%Heap Blocks%' AND str NOT LIKE '%Query Identifier%'; +WHERE str NOT LIKE '%Heap Blocks%'; -- Best choice is ... ANALYZE t; From 961bdcf44d4e3d3394f4915bae73a61bcf3bfbe1 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Tue, 25 Oct 2022 22:08:03 +0300 Subject: [PATCH 076/134] Fix aqo_fdw output test. Delete platform dependent lines containing Memory and add order by command in feature_subspace test for statical result. --- expected/aqo_fdw.out | 20 ++++++++++++-------- sql/aqo_fdw.sql | 10 ++++++++-- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out index b05be251..e568e993 100644 --- a/expected/aqo_fdw.out +++ b/expected/aqo_fdw.out @@ -183,10 +183,13 @@ INSERT INTO main SELECT i, 'val_' || i FROM generate_series(1,100) i; INSERT INTO ref SELECT i, mod(i, 10) + 1, 'val_' || i FROM generate_series(1,1000) i; ANALYZE local_main_p0, local_main_p1, main_p2; ANALYZE local_ref_p0, local_ref_p1, ref_p2; +SELECT str AS result +FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * from main AS a, ref AS b -WHERE a.aid = b.aid AND b.bval like 'val%'; - QUERY PLAN +WHERE a.aid = b.aid AND b.bval like ''val%''') AS str +WHERE str NOT LIKE '%Memory%'; + result -------------------------------------------------------------------- Append (actual rows=1000 loops=1) AQO not used @@ -203,18 +206,20 @@ WHERE a.aid = b.aid AND b.bval like 'val%'; AQO not used Filter: (bval ~~ 'val%'::text) -> Hash (actual rows=38 loops=1) - Buckets: 1024 Batches: 1 Memory Usage: 10kB -> Seq Scan on main_p2 a_3 (actual rows=38 loops=1) AQO not used Using aqo: true AQO mode: LEARN JOINS: 1 -(21 rows) +(20 rows) +SELECT str AS result +FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * from main AS a, ref AS b -WHERE a.aid = b.aid AND b.bval like 'val%'; - QUERY PLAN +WHERE a.aid = b.aid AND b.bval like ''val%''') AS str +WHERE str NOT LIKE '%Memory%'; + result -------------------------------------------------------------------- Append (actual rows=1000 loops=1) AQO not used @@ -231,13 +236,12 @@ WHERE a.aid = b.aid AND b.bval like 'val%'; AQO: rows=300, error=0% Filter: (bval ~~ 'val%'::text) -> Hash (actual rows=38 loops=1) - Buckets: 1024 Batches: 1 Memory Usage: 10kB -> Seq Scan on main_p2 a_3 (actual rows=38 loops=1) AQO: rows=38, error=0% Using aqo: true AQO mode: LEARN JOINS: 1 -(21 rows) +(20 rows) DROP TABLE main, local_main_p0, local_main_p1; DROP TABLE ref, local_ref_p0, local_ref_p1; diff --git a/sql/aqo_fdw.sql b/sql/aqo_fdw.sql index 186ba9e0..bd211326 100644 --- a/sql/aqo_fdw.sql +++ b/sql/aqo_fdw.sql @@ -114,13 +114,19 @@ INSERT INTO ref SELECT i, mod(i, 10) + 1, 'val_' || i FROM generate_series(1,100 ANALYZE local_main_p0, local_main_p1, main_p2; ANALYZE local_ref_p0, local_ref_p1, ref_p2; +SELECT str AS result +FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * from main AS a, ref AS b -WHERE a.aid = b.aid AND b.bval like 'val%'; +WHERE a.aid = b.aid AND b.bval like ''val%''') AS str +WHERE str NOT LIKE '%Memory%'; +SELECT str AS result +FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * from main AS a, ref AS b -WHERE a.aid = b.aid AND b.bval like 'val%'; +WHERE a.aid = b.aid AND b.bval like ''val%''') AS str +WHERE str NOT LIKE '%Memory%'; DROP TABLE main, local_main_p0, local_main_p1; DROP TABLE ref, local_ref_p0, local_ref_p1; From b036dd9f1197c626681ef47d7de94a640eed1e86 Mon Sep 17 00:00:00 2001 From: Marina Polyakova Date: Wed, 16 Nov 2022 17:26:26 +0300 Subject: [PATCH 077/134] Partial revert "Change names of interface functions for better usage" This reverts commit f097d8b3c428d909a1f7da7977a5bef8dfaa2f7b except for changes to the function invalidate_deactivated_queries_cache. --- aqo--1.4--1.5.sql | 8 ++++---- expected/aqo_CVE-2020-14350.out | 24 ++++++++++++------------ expected/relocatable.out | 12 ++++++------ sql/aqo_CVE-2020-14350.sql | 16 ++++++++-------- sql/relocatable.sql | 4 ++-- storage.c | 8 ++++---- 6 files changed, 36 insertions(+), 36 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index 569f2c53..622bb7fa 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -77,14 +77,14 @@ CREATE VIEW aqo_queries AS SELECT * FROM aqo_queries(); /* UI functions */ -CREATE FUNCTION aqo_enable_class(queryid bigint) +CREATE FUNCTION aqo_enable_query(queryid bigint) RETURNS void -AS 'MODULE_PATHNAME', 'aqo_enable_class' +AS 'MODULE_PATHNAME', 'aqo_enable_query' LANGUAGE C STRICT VOLATILE; -CREATE FUNCTION aqo_disable_class(queryid bigint) +CREATE FUNCTION aqo_disable_query(queryid bigint) RETURNS void -AS 'MODULE_PATHNAME', 'aqo_disable_class' +AS 'MODULE_PATHNAME', 'aqo_enable_query' LANGUAGE C STRICT VOLATILE; CREATE FUNCTION aqo_queries_update( diff --git a/expected/aqo_CVE-2020-14350.out b/expected/aqo_CVE-2020-14350.out index 8685b935..ccdc4694 100644 --- a/expected/aqo_CVE-2020-14350.out +++ b/expected/aqo_CVE-2020-14350.out @@ -116,7 +116,7 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_enable_class(hash bigint) +CREATE FUNCTION aqo_enable_query(hash bigint) RETURNS VOID AS $$ BEGIN @@ -125,9 +125,9 @@ $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_enable_class" already exists with same argument types +ERROR: function "aqo_enable_query" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_enable_class(hash bigint) +CREATE OR REPLACE FUNCTION aqo_enable_query(hash bigint) RETURNS VOID AS $$ BEGIN @@ -135,8 +135,8 @@ BEGIN END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_enable_class(42); - aqo_enable_class +SELECT aqo_enable_query(42); + aqo_enable_query ------------------ (1 row) @@ -149,7 +149,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_enable_class(bigint); +DROP FUNCTION aqo_enable_query(bigint); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 4 @@ -162,7 +162,7 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_disable_class(hash bigint) +CREATE FUNCTION aqo_disable_query(hash bigint) RETURNS VOID AS $$ BEGIN @@ -171,9 +171,9 @@ $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_disable_class" already exists with same argument types +ERROR: function "aqo_disable_query" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_disable_class(hash bigint) +CREATE OR REPLACE FUNCTION aqo_disable_query(hash bigint) RETURNS VOID AS $$ BEGIN @@ -181,8 +181,8 @@ BEGIN END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_disable_class(42); - aqo_disable_class +SELECT aqo_disable_query(42); + aqo_disable_query ------------------- (1 row) @@ -195,7 +195,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_disable_class(bigint); +DROP FUNCTION aqo_disable_query(bigint); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 5 diff --git a/expected/relocatable.out b/expected/relocatable.out index 949896f6..5fcf06e6 100644 --- a/expected/relocatable.out +++ b/expected/relocatable.out @@ -80,9 +80,9 @@ ORDER BY (md5(query_text)) /* * Below, we should check each UI function */ -SELECT aqo_disable_class(id) FROM ( +SELECT aqo_disable_query(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; - aqo_disable_class + aqo_disable_query ------------------- @@ -93,13 +93,13 @@ ORDER BY (learn_aqo, use_aqo, auto_tuning); learn_aqo | use_aqo | auto_tuning -----------+---------+------------- f | f | f - f | f | f - f | f | f + t | t | f + t | t | f (3 rows) -SELECT aqo_enable_class(id) FROM ( +SELECT aqo_enable_query(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; - aqo_enable_class + aqo_enable_query ------------------ diff --git a/sql/aqo_CVE-2020-14350.sql b/sql/aqo_CVE-2020-14350.sql index 75833223..1b36b50b 100644 --- a/sql/aqo_CVE-2020-14350.sql +++ b/sql/aqo_CVE-2020-14350.sql @@ -103,7 +103,7 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_enable_class(hash bigint) +CREATE FUNCTION aqo_enable_query(hash bigint) RETURNS VOID AS $$ BEGIN @@ -115,7 +115,7 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_enable_class(hash bigint) +CREATE OR REPLACE FUNCTION aqo_enable_query(hash bigint) RETURNS VOID AS $$ BEGIN @@ -124,13 +124,13 @@ END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_enable_class(42); +SELECT aqo_enable_query(42); SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_enable_class(bigint); +DROP FUNCTION aqo_enable_query(bigint); DROP EXTENSION IF EXISTS aqo; -- Test 4 @@ -140,7 +140,7 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_disable_class(hash bigint) +CREATE FUNCTION aqo_disable_query(hash bigint) RETURNS VOID AS $$ BEGIN @@ -152,7 +152,7 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_disable_class(hash bigint) +CREATE OR REPLACE FUNCTION aqo_disable_query(hash bigint) RETURNS VOID AS $$ BEGIN @@ -161,13 +161,13 @@ END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_disable_class(42); +SELECT aqo_disable_query(42); SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_disable_class(bigint); +DROP FUNCTION aqo_disable_query(bigint); DROP EXTENSION IF EXISTS aqo; -- Test 5 diff --git a/sql/relocatable.sql b/sql/relocatable.sql index 780c385e..e8cc57c3 100644 --- a/sql/relocatable.sql +++ b/sql/relocatable.sql @@ -39,11 +39,11 @@ ORDER BY (md5(query_text)) /* * Below, we should check each UI function */ -SELECT aqo_disable_class(id) FROM ( +SELECT aqo_disable_query(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries ORDER BY (learn_aqo, use_aqo, auto_tuning); -SELECT aqo_enable_class(id) FROM ( +SELECT aqo_enable_query(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries ORDER BY (learn_aqo, use_aqo, auto_tuning); diff --git a/storage.c b/storage.c index 4825b54f..a6d580ed 100644 --- a/storage.c +++ b/storage.c @@ -96,8 +96,8 @@ PG_FUNCTION_INFO_V1(aqo_query_stat); PG_FUNCTION_INFO_V1(aqo_query_texts); PG_FUNCTION_INFO_V1(aqo_data); PG_FUNCTION_INFO_V1(aqo_queries); -PG_FUNCTION_INFO_V1(aqo_enable_class); -PG_FUNCTION_INFO_V1(aqo_disable_class); +PG_FUNCTION_INFO_V1(aqo_enable_query); +PG_FUNCTION_INFO_V1(aqo_disable_query); PG_FUNCTION_INFO_V1(aqo_queries_update); PG_FUNCTION_INFO_V1(aqo_reset); PG_FUNCTION_INFO_V1(aqo_cleanup); @@ -1903,7 +1903,7 @@ aqo_queries_reset(void) } Datum -aqo_enable_class(PG_FUNCTION_ARGS) +aqo_enable_query(PG_FUNCTION_ARGS) { uint64 queryid = (uint64) PG_GETARG_INT64(0); QueriesEntry *entry; @@ -1934,7 +1934,7 @@ aqo_enable_class(PG_FUNCTION_ARGS) } Datum -aqo_disable_class(PG_FUNCTION_ARGS) +aqo_disable_query(PG_FUNCTION_ARGS) { uint64 queryid = (uint64) PG_GETARG_INT64(0); QueriesEntry *entry; From 763b45b3d3ecfb78977947eb53a57485b6046eaa Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Thu, 17 Nov 2022 18:05:22 +1000 Subject: [PATCH 078/134] Suppress a line of EXPLAIN in parallel_workers test which contains substring 'Gather Merge'. It might be better to inquiry why OS/hardware platform changes decision on a number of parallel workers, but it requires direct access to problematic environment... So, postpone for the better future. --- expected/parallel_workers.out | 7 +++---- sql/parallel_workers.sql | 3 ++- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/expected/parallel_workers.out b/expected/parallel_workers.out index 14e086c8..fca67006 100644 --- a/expected/parallel_workers.out +++ b/expected/parallel_workers.out @@ -69,7 +69,8 @@ SELECT count(*) FROM (SELECT max(id) AS id, payload FROM t WHERE id % 101 = 0 GROUP BY (payload)) AS q2 WHERE q1.id = q2.id;') AS str -WHERE str NOT LIKE '%Workers%' AND str NOT LIKE '%Sort Method%'; +WHERE str NOT LIKE '%Workers%' AND str NOT LIKE '%Sort Method%' + AND str NOT LIKE '%Gather Merge%'; str -------------------------------------------------------------------------------------------------- Aggregate (actual rows=1 loops=1) @@ -84,7 +85,6 @@ WHERE str NOT LIKE '%Workers%' AND str NOT LIKE '%Sort Method%'; -> Finalize GroupAggregate (actual rows=1 loops=1) AQO not used Group Key: t.payload - -> Gather Merge (actual rows=3 loops=1) AQO not used -> Partial GroupAggregate (actual rows=1 loops=3) AQO not used @@ -99,7 +99,6 @@ WHERE str NOT LIKE '%Workers%' AND str NOT LIKE '%Sort Method%'; -> Group (actual rows=1000 loops=1) AQO not used Group Key: t_1.id - -> Gather Merge (actual rows=1000 loops=1) AQO not used -> Group (actual rows=333 loops=3) AQO not used @@ -114,7 +113,7 @@ WHERE str NOT LIKE '%Workers%' AND str NOT LIKE '%Sort Method%'; Using aqo: true AQO mode: LEARN JOINS: 1 -(42 rows) +(40 rows) RESET parallel_tuple_cost; RESET parallel_setup_cost; diff --git a/sql/parallel_workers.sql b/sql/parallel_workers.sql index 3fbccb48..b544cf19 100644 --- a/sql/parallel_workers.sql +++ b/sql/parallel_workers.sql @@ -49,7 +49,8 @@ SELECT count(*) FROM (SELECT max(id) AS id, payload FROM t WHERE id % 101 = 0 GROUP BY (payload)) AS q2 WHERE q1.id = q2.id;') AS str -WHERE str NOT LIKE '%Workers%' AND str NOT LIKE '%Sort Method%'; +WHERE str NOT LIKE '%Workers%' AND str NOT LIKE '%Sort Method%' + AND str NOT LIKE '%Gather Merge%'; RESET parallel_tuple_cost; From d2cddcd862caa9ae34a7097f6636582dc8f16ec4 Mon Sep 17 00:00:00 2001 From: Andrey Kazarinov Date: Fri, 9 Dec 2022 19:17:28 +0300 Subject: [PATCH 079/134] [PGPRO-7183] arrange stable branches 13, 14, 15 Minor changes --- .github/workflows/c-cpp.yml | 4 +- README.md | 5 ++- aqo.conf | 2 +- aqo_pg14.patch | 75 +++++++++++++++------------------- cardinality_hooks.c | 14 ++++--- cardinality_hooks.h | 3 +- expected/statement_timeout.out | 2 +- expected/unsupported.out | 14 +++---- path_utils.c | 5 +++ preprocessing.c | 1 - sql/statement_timeout.sql | 2 +- sql/unsupported.sql | 6 +-- storage.c | 1 + 13 files changed, 70 insertions(+), 64 deletions(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 7bab079d..96a2d3d9 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -7,7 +7,7 @@ on: branches: [ stable14 ] env: - BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} jobs: build: @@ -28,10 +28,10 @@ jobs: cd pg git checkout REL_14_STABLE - ./configure $CONFIGURE_OPTS CFLAGS="-O3" git clone https://github.com/postgrespro/aqo.git contrib/aqo git -C contrib/aqo checkout $BRANCH_NAME patch -p1 --no-backup-if-mismatch < contrib/aqo/aqo_pg14.patch + ./configure $CONFIGURE_OPTS CFLAGS="-O2" make -j4 > /dev/null && make -j4 -C contrib > /dev/null env CLIENTS=50 THREADS=50 make -C contrib/aqo check diff --git a/README.md b/README.md index e28ac89c..252c74ad 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,10 @@ To avoid compatibility issues, the following branches in the git-repository are * `stable9_6`. * `stable11` - for PG v10 and v11. * `stable12` - for PG v12. -* the `master` branch of the AQO repository correctly works with PGv13 and the PostgreSQL `master` branch. +* `stable13` - for PG v13. +* `stable14` - for PG v14. +* `stable15` - for PG v15. +* the `master` branch of the AQO repository correctly works with PGv15 and the PostgreSQL `master` branch. The module contains a patch and an extension. Patch has to be applied to the sources of PostgresSQL. Patch affects header files, that is why PostgreSQL diff --git a/aqo.conf b/aqo.conf index 586f5147..03de79ee 100644 --- a/aqo.conf +++ b/aqo.conf @@ -2,4 +2,4 @@ autovacuum = off shared_preload_libraries = 'postgres_fdw, aqo' max_parallel_maintenance_workers = 1 # switch off parallel workers because of unsteadiness aqo.wide_search = 'on' -compute_query_id = 'regress' \ No newline at end of file +compute_query_id = 'regress' diff --git a/aqo_pg14.patch b/aqo_pg14.patch index b211df01..7ee75eec 100644 --- a/aqo_pg14.patch +++ b/aqo_pg14.patch @@ -1,5 +1,5 @@ diff --git a/contrib/Makefile b/contrib/Makefile -index f27e458482e..0c621919045 100644 +index f27e458482..0c62191904 100644 --- a/contrib/Makefile +++ b/contrib/Makefile @@ -7,6 +7,7 @@ include $(top_builddir)/src/Makefile.global @@ -11,17 +11,17 @@ index f27e458482e..0c621919045 100644 auto_explain \ bloom \ diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c -index 70551522dac..d9cca82fe84 100644 +index 70551522da..958529fbab 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c -@@ -25,6 +25,7 @@ +@@ -24,6 +24,7 @@ + #include "nodes/extensible.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" - #include "parser/analyze.h" +#include "optimizer/cost.h" + #include "parser/analyze.h" #include "parser/parsetree.h" #include "rewrite/rewriteHandler.h" - #include "storage/bufmgr.h" @@ -47,6 +48,12 @@ ExplainOneQuery_hook_type ExplainOneQuery_hook = NULL; /* Hook for plugins to get control in explain_get_index_name() */ explain_get_index_name_hook_type explain_get_index_name_hook = NULL; @@ -57,7 +57,7 @@ index 70551522dac..d9cca82fe84 100644 if (es->format == EXPLAIN_FORMAT_TEXT) appendStringInfoChar(es->str, '\n'); diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c -index 4d9746d54a0..6fa85d1c71f 100644 +index 4d9746d54a..6fa85d1c71 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -132,6 +132,7 @@ CopyPlanFields(const Plan *from, Plan *newnode) @@ -69,7 +69,7 @@ index 4d9746d54a0..6fa85d1c71f 100644 /* diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c -index 58c2590698c..1e06738a137 100644 +index 58c2590698..1e06738a13 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -342,6 +342,7 @@ _outPlanInfo(StringInfo str, const Plan *node) @@ -81,7 +81,7 @@ index 58c2590698c..1e06738a137 100644 /* diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c -index eaa51c5c062..6ad8b78c7d5 100644 +index eaa51c5c06..6ad8b78c7d 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1628,6 +1628,11 @@ ReadCommonPlan(Plan *local_node) @@ -97,7 +97,7 @@ index eaa51c5c062..6ad8b78c7d5 100644 /* diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c -index 006f91f0a87..ef9c8ec5817 100644 +index 006f91f0a8..ef9c8ec581 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -98,6 +98,11 @@ @@ -362,7 +362,7 @@ index 006f91f0a87..ef9c8ec5817 100644 { double parallel_divisor = path->parallel_workers; diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c -index 0ed858f305a..9d4a6c59030 100644 +index 0ed858f305..9d4a6c5903 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -71,6 +71,7 @@ @@ -393,7 +393,7 @@ index 0ed858f305a..9d4a6c59030 100644 /* diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c -index 70899e5430e..dac6132af54 100644 +index 70899e5430..34075cc87b 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -143,7 +143,8 @@ static List *extract_rollup_sets(List *groupingSets); @@ -441,7 +441,7 @@ index 70899e5430e..dac6132af54 100644 - dNumGroups = estimate_num_groups(root, groupExprs, path_rows, - NULL, NULL); + dNumGroups = estimate_num_groups_ext(root, groupExprs, subpath, -+ grouped_rel, NULL); ++ grouped_rel, NULL, NULL); } } else if (parse->groupingSets) @@ -474,7 +474,7 @@ index 70899e5430e..dac6132af54 100644 extra->targetList); diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c -index e105a4d5f1d..c5bcc9d1d15 100644 +index e105a4d5f1..c5bcc9d1d1 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -258,6 +258,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) @@ -540,15 +540,9 @@ index e105a4d5f1d..c5bcc9d1d15 100644 return ppi; diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c -index 962dec6d504..899ee2bf4c5 100644 +index 87879c9ddc..1aad8c43d9 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c -@@ -1,4 +1,4 @@ --/*------------------------------------------------------------------------- -+ /*------------------------------------------------------------------------- - * - * selfuncs.c - * Selectivity functions and index cost estimation functions for @@ -143,6 +143,7 @@ /* Hooks for plugins to get control when we ask for stats */ get_relation_stats_hook_type get_relation_stats_hook = NULL; @@ -557,28 +551,29 @@ index 962dec6d504..899ee2bf4c5 100644 static double eqsel_internal(PG_FUNCTION_ARGS, bool negate); static double eqjoinsel_inner(Oid opfuncoid, Oid collation, -@@ -3293,6 +3294,19 @@ add_unique_group_var(PlannerInfo *root, List *varinfos, +@@ -3293,6 +3294,20 @@ add_unique_group_var(PlannerInfo *root, List *varinfos, return varinfos; } +double +estimate_num_groups_ext(PlannerInfo *root, List *groupExprs, Path *subpath, -+ RelOptInfo *grouped_rel, List **pgset) ++ RelOptInfo *grouped_rel, List **pgset, ++ EstimationInfo *estinfo) +{ + double input_rows = subpath->rows; + + if (estimate_num_groups_hook != NULL) -+ return (*estimate_num_groups_hook)(root, groupExprs, subpath, -+ grouped_rel, pgset); ++ return (*estimate_num_groups_hook)(root, groupExprs, subpath, grouped_rel, ++ pgset, estinfo); + -+ return estimate_num_groups(root, groupExprs, input_rows, pgset, NULL); ++ return estimate_num_groups(root, groupExprs, input_rows, pgset, estinfo); +} + /* * estimate_num_groups - Estimate number of groups in a grouped query * diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h -index e94d9e49cf6..49236ced77c 100644 +index e94d9e49cf..49236ced77 100644 --- a/src/include/commands/explain.h +++ b/src/include/commands/explain.h @@ -75,6 +75,18 @@ extern PGDLLIMPORT ExplainOneQuery_hook_type ExplainOneQuery_hook; @@ -601,7 +596,7 @@ index e94d9e49cf6..49236ced77c 100644 extern void ExplainQuery(ParseState *pstate, ExplainStmt *stmt, ParamListInfo params, DestReceiver *dest); diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h -index f16466a0df1..8f0ed706817 100644 +index e370a01141..9f2f1628f5 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -756,6 +756,10 @@ typedef struct RelOptInfo @@ -640,24 +635,21 @@ index f16466a0df1..8f0ed706817 100644 diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h -index 2308c80ddee..dc74cf85bd2 100644 +index 2308c80dde..a933afa483 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h -@@ -158,6 +158,12 @@ typedef struct Plan +@@ -158,6 +158,9 @@ typedef struct Plan */ Bitmapset *extParam; Bitmapset *allParam; + -+ /* -+ * Additional fields for an extension purposes. -+ * TODO: allow to serialize/deserialize this list. -+ */ ++ /* Additional field for an extension purposes. */ + List *ext_nodes; } Plan; /* ---------------- diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h -index 2113bc82de0..bcc2520cec5 100644 +index 2113bc82de..bcc2520cec 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -39,6 +39,37 @@ typedef enum @@ -741,7 +733,7 @@ index 2113bc82de0..bcc2520cec5 100644 #endif /* COST_H */ diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h -index 2922c0cdc14..c59dce6989e 100644 +index 2922c0cdc1..c59dce6989 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -18,6 +18,10 @@ @@ -756,7 +748,7 @@ index 2922c0cdc14..c59dce6989e 100644 * prototypes for pathnode.c */ diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h -index bf1adfc52ac..9c78e0f4e02 100644 +index bf1adfc52a..9c78e0f4e0 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -24,6 +24,12 @@ extern double cursor_tuple_fraction; @@ -773,10 +765,10 @@ index bf1adfc52ac..9c78e0f4e02 100644 * prototypes for plan/planmain.c */ diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h -index 9dd444e1ff5..37133340d84 100644 +index 9dd444e1ff..cfaae98aa2 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h -@@ -144,6 +144,12 @@ typedef bool (*get_index_stats_hook_type) (PlannerInfo *root, +@@ -144,6 +144,13 @@ typedef bool (*get_index_stats_hook_type) (PlannerInfo *root, AttrNumber indexattnum, VariableStatData *vardata); extern PGDLLIMPORT get_index_stats_hook_type get_index_stats_hook; @@ -784,18 +776,19 @@ index 9dd444e1ff5..37133340d84 100644 + List *groupExprs, + Path *subpath, + RelOptInfo *grouped_rel, -+ List **pgset); ++ List **pgset, ++ EstimationInfo *estinfo); +extern PGDLLIMPORT estimate_num_groups_hook_type estimate_num_groups_hook; /* Functions in selfuncs.c */ -@@ -213,6 +219,9 @@ extern void mergejoinscansel(PlannerInfo *root, Node *clause, +@@ -213,6 +220,9 @@ extern void mergejoinscansel(PlannerInfo *root, Node *clause, extern double estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, List **pgset, EstimationInfo *estinfo); +extern double estimate_num_groups_ext(PlannerInfo *root, List *groupExprs, + Path *subpath, RelOptInfo *grouped_rel, -+ List **pgset); ++ List **pgset, EstimationInfo *estinfo); extern void estimate_hash_bucket_stats(PlannerInfo *root, Node *hashkey, double nbuckets, diff --git a/cardinality_hooks.c b/cardinality_hooks.c index fe55b992..5380a560 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -117,7 +117,7 @@ default_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, static double default_estimate_num_groups(PlannerInfo *root, List *groupExprs, Path *subpath, RelOptInfo *grouped_rel, - List **pgset) + List **pgset, EstimationInfo *estinfo) { double input_rows = subpath->rows; @@ -125,9 +125,9 @@ default_estimate_num_groups(PlannerInfo *root, List *groupExprs, return (*prev_estimate_num_groups_hook)(root, groupExprs, subpath, grouped_rel, - pgset); + pgset, estinfo); else - return estimate_num_groups(root, groupExprs, input_rows, pgset, NULL); + return estimate_num_groups(root, groupExprs, input_rows, pgset, estinfo); } /* @@ -463,7 +463,7 @@ predict_num_groups(PlannerInfo *root, Path *subpath, List *group_exprs, double aqo_estimate_num_groups_hook(PlannerInfo *root, List *groupExprs, Path *subpath, RelOptInfo *grouped_rel, - List **pgset) + List **pgset, EstimationInfo *estinfo) { int fss; double predicted; @@ -479,6 +479,10 @@ aqo_estimate_num_groups_hook(PlannerInfo *root, List *groupExprs, if (prev_estimate_num_groups_hook != NULL) elog(WARNING, "AQO replaced another estimator of a groups number"); + /* Zero the estinfo output parameter, if non-NULL */ + if (estinfo != NULL) + memset(estinfo, 0, sizeof(EstimationInfo)); + if (groupExprs == NIL) return 1.0; @@ -504,5 +508,5 @@ aqo_estimate_num_groups_hook(PlannerInfo *root, List *groupExprs, default_estimator: return default_estimate_num_groups(root, groupExprs, subpath, grouped_rel, - pgset); + pgset, estinfo); } diff --git a/cardinality_hooks.h b/cardinality_hooks.h index 0e8c65c0..c34f9315 100644 --- a/cardinality_hooks.h +++ b/cardinality_hooks.h @@ -26,6 +26,7 @@ extern double aqo_get_parameterized_joinrel_size(PlannerInfo *root, extern double aqo_estimate_num_groups_hook(PlannerInfo *root, List *groupExprs, Path *subpath, RelOptInfo *grouped_rel, - List **pgset); + List **pgset, + EstimationInfo *estinfo); #endif /* CARDINALITY_HOOKS_H */ diff --git a/expected/statement_timeout.out b/expected/statement_timeout.out index 302b9b43..0b26b430 100644 --- a/expected/statement_timeout.out +++ b/expected/statement_timeout.out @@ -111,12 +111,12 @@ SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); 5 (1 row) -DROP TABLE t; SELECT 1 FROM aqo_reset(); ?column? ---------- 1 (1 row) +DROP TABLE t; DROP EXTENSION aqo; DROP FUNCTION check_estimated_rows; diff --git a/expected/unsupported.out b/expected/unsupported.out index f8b15cc5..c42a3be5 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -17,7 +17,7 @@ CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y FROM generate_series(1,1000) AS gs; ANALYZE t, t1; -- --- Do not support HAVING clause for now. +-- Do not support HAVING clauses for now. -- SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; count @@ -43,8 +43,8 @@ EXPLAIN (COSTS OFF) (11 rows) SELECT str FROM expln(' - EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) - SELECT * FROM t GROUP BY (x) HAVING x > 3; +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t GROUP BY (x) HAVING x > 3; ') AS str WHERE str NOT LIKE '%Memory Usage%'; str ----------------------------------------------- @@ -586,10 +586,6 @@ ORDER BY (md5(query_text),error) DESC; | SELECT count(*) FROM t WHERE + | x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR + | x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); - 0.106 | + - | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + - | SELECT * FROM t GROUP BY (x) HAVING x > 3; + - | 0.454 | SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; 0.000 | SELECT count(*) FROM ( + | SELECT count(*) AS x FROM ( + @@ -602,6 +598,10 @@ ORDER BY (md5(query_text),error) DESC; 0.000 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + | SELECT count(*) FROM t WHERE (SELECT avg(x) FROM t t0 WHERE t0.x = t.x) = + | (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + 0.106 | + + | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT * FROM t GROUP BY (x) HAVING x > 3; + + | 0.000 | SELECT count(*) FROM + | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + | JOIN + diff --git a/path_utils.c b/path_utils.c index 0e0e137d..0d24a048 100644 --- a/path_utils.c +++ b/path_utils.c @@ -366,6 +366,10 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) return get_path_clauses(((MaterialPath *) path)->subpath, root, selectivities); break; + case T_MemoizePath: + return get_path_clauses(((MemoizePath *) path)->subpath, root, + selectivities); + break; case T_ProjectionPath: return get_path_clauses(((ProjectionPath *) path)->subpath, root, selectivities); @@ -488,6 +492,7 @@ is_appropriate_path(Path *path) { case T_SortPath: case T_IncrementalSortPath: + case T_MemoizePath: case T_GatherPath: case T_GatherMergePath: appropriate = false; diff --git a/preprocessing.c b/preprocessing.c index 91689b91..7b909bdf 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -67,7 +67,6 @@ #include "preprocessing.h" #include "storage.h" - /* List of feature spaces, that are processing in this backend. */ List *cur_classes = NIL; diff --git a/sql/statement_timeout.sql b/sql/statement_timeout.sql index 9666c1de..36afc370 100644 --- a/sql/statement_timeout.sql +++ b/sql/statement_timeout.sql @@ -60,7 +60,7 @@ SET statement_timeout = 5500; SELECT *, pg_sleep(1) FROM t; -- Get reliable data SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); -DROP TABLE t; SELECT 1 FROM aqo_reset(); +DROP TABLE t; DROP EXTENSION aqo; DROP FUNCTION check_estimated_rows; diff --git a/sql/unsupported.sql b/sql/unsupported.sql index db673788..808a19e1 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -22,15 +22,15 @@ CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y ANALYZE t, t1; -- --- Do not support HAVING clause for now. +-- Do not support HAVING clauses for now. -- SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; EXPLAIN (COSTS OFF) SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; SELECT str FROM expln(' - EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) - SELECT * FROM t GROUP BY (x) HAVING x > 3; +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t GROUP BY (x) HAVING x > 3; ') AS str WHERE str NOT LIKE '%Memory Usage%'; -- diff --git a/storage.c b/storage.c index a6d580ed..bcbcfac4 100644 --- a/storage.c +++ b/storage.c @@ -105,6 +105,7 @@ PG_FUNCTION_INFO_V1(aqo_drop_class); PG_FUNCTION_INFO_V1(aqo_cardinality_error); PG_FUNCTION_INFO_V1(aqo_execution_time); + bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool isSafe) { From 09c591d31388d5aced3b8f539b2ed43d5efc40b3 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Fri, 14 Oct 2022 09:43:07 +0500 Subject: [PATCH 080/134] Several bugfixes here: 1. don't enable statement timeout in parallel worker and 2. minor DSM cache fix. 3. don't clear learn_cache in a parallel worker. --- aqo_shared.c | 4 +--- learn_cache.c | 10 ++++++++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/aqo_shared.c b/aqo_shared.c index ac5c5aea..5715a76e 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -77,12 +77,10 @@ reset_dsm_cache(void) Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE)); - if (aqo_state->dsm_handler == DSM_HANDLE_INVALID) + if (aqo_state->dsm_handler == DSM_HANDLE_INVALID || !seg) /* Fast path. No any cached data exists. */ return; - Assert(seg); - hdr = (dsm_seg_hdr *) dsm_segment_address(seg); start = (char *) hdr + sizeof(dsm_seg_hdr); diff --git a/learn_cache.c b/learn_cache.c index 2fc6644a..c7f6ef87 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -13,6 +13,7 @@ */ #include "postgres.h" +#include "access/parallel.h" /* Just for IsParallelWorker() */ #include "miscadmin.h" #include "aqo.h" @@ -316,14 +317,15 @@ lc_assign_hook(bool newval, void *extra) HASH_SEQ_STATUS status; htab_entry *entry; - if (!fss_htab || !IsUnderPostmaster) + if (!fss_htab || !IsUnderPostmaster || IsParallelWorker()) + /* Clean this shared cache only in main backend process. */ return; /* Remove all entries, reset memory context. */ elog(DEBUG5, "[AQO] Cleanup local cache of ML data."); - /* Remove all frozen plans from a plancache. */ + /* Remove all entries in the shared hash table. */ LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); hash_seq_init(&status, fss_htab); while ((entry = (htab_entry *) hash_seq_search(&status)) != NULL) @@ -331,5 +333,9 @@ lc_assign_hook(bool newval, void *extra) if (!hash_search(fss_htab, (void *) &entry->key, HASH_REMOVE, NULL)) elog(PANIC, "[AQO] The local ML cache is corrupted."); } + + /* Now, clean additional DSM block */ + reset_dsm_cache(); + LWLockRelease(&aqo_state->lock); } From 4429d9d5a325acf94859c33a86486cf30b519ea9 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 22 Dec 2022 12:03:44 +0500 Subject: [PATCH 081/134] Second stage of branches arrangement. Now: from master to the stable --- aqo.c | 2 +- aqo_shared.c | 4 ++-- aqo_shared.h | 3 +-- auto_tuning.c | 6 +++--- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/aqo.c b/aqo.c index 6d616e6c..0e3afec3 100644 --- a/aqo.c +++ b/aqo.c @@ -33,7 +33,7 @@ void _PG_init(void); #define AQO_MODULE_MAGIC (1234) /* Strategy of determining feature space for new queries. */ -int aqo_mode; +int aqo_mode = AQO_MODE_CONTROLLED; bool force_collect_stat; /* diff --git a/aqo_shared.c b/aqo_shared.c index 5715a76e..86908880 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -26,8 +26,8 @@ shmem_startup_hook_type prev_shmem_startup_hook = NULL; AQOSharedState *aqo_state = NULL; HTAB *fss_htab = NULL; static int aqo_htab_max_items = 1000; -int fs_max_items = 1; /* Max number of different feature spaces in ML model */ -int fss_max_items = 1; /* Max number of different feature subspaces in ML model */ +int fs_max_items = 10000; /* Max number of different feature spaces in ML model */ +int fss_max_items = 100000; /* Max number of different feature subspaces in ML model */ static uint32 temp_storage_size = 1024 * 1024 * 10; /* Storage size, in bytes */ static dsm_segment *seg = NULL; diff --git a/aqo_shared.h b/aqo_shared.h index 61c0d3d0..926a2723 100644 --- a/aqo_shared.h +++ b/aqo_shared.h @@ -1,12 +1,11 @@ #ifndef AQO_SHARED_H #define AQO_SHARED_H - +#include "lib/dshash.h" #include "storage/dsm.h" #include "storage/ipc.h" #include "storage/lwlock.h" #include "utils/dsa.h" -#include "lib/dshash.h" #define AQO_SHARED_MAGIC 0x053163 diff --git a/auto_tuning.c b/auto_tuning.c index 7a15e516..fad245ed 100644 --- a/auto_tuning.c +++ b/auto_tuning.c @@ -42,7 +42,7 @@ get_mean(double *elems, int nelems) double sum = 0; int i; - AssertArg(nelems > 0); + Assert(nelems > 0); for (i = 0; i < nelems; ++i) sum += elems[i]; @@ -58,7 +58,7 @@ get_estimation(double *elems, int nelems) { int start; - AssertArg(nelems > 0); + Assert(nelems > 0); if (nelems > auto_tuning_window_size) start = nelems - auto_tuning_window_size; @@ -77,7 +77,7 @@ is_stable(double *elems, int nelems) double est, last; - AssertArg(nelems > 1); + Assert(nelems > 1); est = get_mean(elems, nelems - 1); last = elems[nelems - 1]; From 60ed51ac44be7beb1fa4cde10fd9bb6d2c474dbb Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 22 Dec 2022 13:41:43 +0500 Subject: [PATCH 082/134] Raise AQO version to v1.6. Rename a couple of UI functions: 1. aqo_enable_query -> aqo_enable_class 2. aqo_disable_query -> aqo_disable_class Fix the bug of 1.5 with execution of "enable" routine from "disable" UI function. Correct aqo_cleanup() return type: It returns single set of values. So, we don't really needed all of the materialization machinery. Just to form and return a tuple. --- Makefile | 5 +++-- aqo--1.5--1.6.sql | 32 +++++++++++++++++++++++++++++ aqo.control | 2 +- expected/aqo_CVE-2020-14350.out | 24 +++++++++++----------- expected/gucs.out | 2 +- expected/relocatable.out | 12 +++++------ sql/aqo_CVE-2020-14350.sql | 16 +++++++-------- sql/relocatable.sql | 4 ++-- storage.c | 36 +++++++-------------------------- 9 files changed, 72 insertions(+), 61 deletions(-) create mode 100644 aqo--1.5--1.6.sql diff --git a/Makefile b/Makefile index b07d7f86..7370647f 100755 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # contrib/aqo/Makefile EXTENSION = aqo -EXTVERSION = 1.5 +EXTVERSION = 1.6 PGFILEDESC = "AQO - Adaptive Query Optimization" MODULE_big = aqo OBJS = $(WIN32RES) \ @@ -23,7 +23,8 @@ EXTRA_REGRESS_OPTS=--temp-config=$(top_srcdir)/$(subdir)/aqo.conf EXTRA_INSTALL = contrib/postgres_fdw contrib/pg_stat_statements DATA = aqo--1.0.sql aqo--1.0--1.1.sql aqo--1.1--1.2.sql aqo--1.2.sql \ - aqo--1.2--1.3.sql aqo--1.3--1.4.sql aqo--1.4--1.5.sql + aqo--1.2--1.3.sql aqo--1.3--1.4.sql aqo--1.4--1.5.sql \ + aqo--1.5--1.6.sql ifdef USE_PGXS PG_CONFIG ?= pg_config diff --git a/aqo--1.5--1.6.sql b/aqo--1.5--1.6.sql new file mode 100644 index 00000000..4101d33d --- /dev/null +++ b/aqo--1.5--1.6.sql @@ -0,0 +1,32 @@ +/* contrib/aqo/aqo--1.5--1.6.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION aqo UPDATE TO '1.6'" to load this file. \quit + +DROP FUNCTION aqo_enable_query; +DROP FUNCTION aqo_disable_query; +DROP FUNCTION aqo_cleanup; + +CREATE FUNCTION aqo_enable_class(queryid bigint) +RETURNS void +AS 'MODULE_PATHNAME', 'aqo_enable_query' +LANGUAGE C STRICT VOLATILE; + +CREATE FUNCTION aqo_disable_class(queryid bigint) +RETURNS void +AS 'MODULE_PATHNAME', 'aqo_disable_query' +LANGUAGE C STRICT VOLATILE; + +-- +-- Remove unneeded rows from the AQO ML storage. +-- For common feature space, remove rows from aqo_data only. +-- For custom feature space - remove all rows related to the space from all AQO +-- tables even if only one oid for one feature subspace of the space is illegal. +-- Returns number of deleted rows from aqo_queries and aqo_data tables. +-- +CREATE FUNCTION aqo_cleanup(OUT nfs integer, OUT nfss integer) +RETURNS record +AS 'MODULE_PATHNAME', 'aqo_cleanup' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_cleanup() IS +'Remove unneeded rows from the AQO ML storage'; diff --git a/aqo.control b/aqo.control index 5507effb..4ca0ecb6 100644 --- a/aqo.control +++ b/aqo.control @@ -1,5 +1,5 @@ # AQO extension comment = 'machine learning for cardinality estimation in optimizer' -default_version = '1.5' +default_version = '1.6' module_pathname = '$libdir/aqo' relocatable = true diff --git a/expected/aqo_CVE-2020-14350.out b/expected/aqo_CVE-2020-14350.out index ccdc4694..8685b935 100644 --- a/expected/aqo_CVE-2020-14350.out +++ b/expected/aqo_CVE-2020-14350.out @@ -116,7 +116,7 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_enable_query(hash bigint) +CREATE FUNCTION aqo_enable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -125,9 +125,9 @@ $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_enable_query" already exists with same argument types +ERROR: function "aqo_enable_class" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_enable_query(hash bigint) +CREATE OR REPLACE FUNCTION aqo_enable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -135,8 +135,8 @@ BEGIN END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_enable_query(42); - aqo_enable_query +SELECT aqo_enable_class(42); + aqo_enable_class ------------------ (1 row) @@ -149,7 +149,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_enable_query(bigint); +DROP FUNCTION aqo_enable_class(bigint); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 4 @@ -162,7 +162,7 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_disable_query(hash bigint) +CREATE FUNCTION aqo_disable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -171,9 +171,9 @@ $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_disable_query" already exists with same argument types +ERROR: function "aqo_disable_class" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_disable_query(hash bigint) +CREATE OR REPLACE FUNCTION aqo_disable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -181,8 +181,8 @@ BEGIN END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_disable_query(42); - aqo_disable_query +SELECT aqo_disable_class(42); + aqo_disable_class ------------------- (1 row) @@ -195,7 +195,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_disable_query(bigint); +DROP FUNCTION aqo_disable_class(bigint); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 5 diff --git a/expected/gucs.out b/expected/gucs.out index 2bccb447..d7ef6eeb 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -110,7 +110,7 @@ SELECT obj_description('aqo_reset'::regproc::oid); List of functions Schema | Name | Result data type | Argument data types | Type --------+-------------+------------------+-----------------------------------+------ - public | aqo_cleanup | SETOF record | OUT nfs integer, OUT nfss integer | func + public | aqo_cleanup | record | OUT nfs integer, OUT nfss integer | func (1 row) \df aqo_reset diff --git a/expected/relocatable.out b/expected/relocatable.out index 5fcf06e6..949896f6 100644 --- a/expected/relocatable.out +++ b/expected/relocatable.out @@ -80,9 +80,9 @@ ORDER BY (md5(query_text)) /* * Below, we should check each UI function */ -SELECT aqo_disable_query(id) FROM ( +SELECT aqo_disable_class(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; - aqo_disable_query + aqo_disable_class ------------------- @@ -93,13 +93,13 @@ ORDER BY (learn_aqo, use_aqo, auto_tuning); learn_aqo | use_aqo | auto_tuning -----------+---------+------------- f | f | f - t | t | f - t | t | f + f | f | f + f | f | f (3 rows) -SELECT aqo_enable_query(id) FROM ( +SELECT aqo_enable_class(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; - aqo_enable_query + aqo_enable_class ------------------ diff --git a/sql/aqo_CVE-2020-14350.sql b/sql/aqo_CVE-2020-14350.sql index 1b36b50b..75833223 100644 --- a/sql/aqo_CVE-2020-14350.sql +++ b/sql/aqo_CVE-2020-14350.sql @@ -103,7 +103,7 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_enable_query(hash bigint) +CREATE FUNCTION aqo_enable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -115,7 +115,7 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_enable_query(hash bigint) +CREATE OR REPLACE FUNCTION aqo_enable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -124,13 +124,13 @@ END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_enable_query(42); +SELECT aqo_enable_class(42); SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_enable_query(bigint); +DROP FUNCTION aqo_enable_class(bigint); DROP EXTENSION IF EXISTS aqo; -- Test 4 @@ -140,7 +140,7 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_disable_query(hash bigint) +CREATE FUNCTION aqo_disable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -152,7 +152,7 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_disable_query(hash bigint) +CREATE OR REPLACE FUNCTION aqo_disable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -161,13 +161,13 @@ END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_disable_query(42); +SELECT aqo_disable_class(42); SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_disable_query(bigint); +DROP FUNCTION aqo_disable_class(bigint); DROP EXTENSION IF EXISTS aqo; -- Test 5 diff --git a/sql/relocatable.sql b/sql/relocatable.sql index e8cc57c3..780c385e 100644 --- a/sql/relocatable.sql +++ b/sql/relocatable.sql @@ -39,11 +39,11 @@ ORDER BY (md5(query_text)) /* * Below, we should check each UI function */ -SELECT aqo_disable_query(id) FROM ( +SELECT aqo_disable_class(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries ORDER BY (learn_aqo, use_aqo, auto_tuning); -SELECT aqo_enable_query(id) FROM ( +SELECT aqo_enable_class(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries ORDER BY (learn_aqo, use_aqo, auto_tuning); diff --git a/storage.c b/storage.c index bcbcfac4..8a21892c 100644 --- a/storage.c +++ b/storage.c @@ -2170,39 +2170,16 @@ aqo_cleanup(PG_FUNCTION_ARGS) { int fs_num; int fss_num; - ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; TupleDesc tupDesc; - MemoryContext per_query_ctx; - MemoryContext oldcontext; - Tuplestorestate *tupstore; + HeapTuple tuple; + Datum result; Datum values[2]; bool nulls[2] = {0, 0}; - /* check to see if caller supports us returning a tuplestore */ - if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("set-valued function called in context that cannot accept a set"))); - if (!(rsinfo->allowedModes & SFRM_Materialize)) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("materialize mode required, but it is not allowed in this context"))); - - /* Switch into long-lived context to construct returned data structures */ - per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; - oldcontext = MemoryContextSwitchTo(per_query_ctx); - - /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); - Assert(tupDesc->natts == 2); - tupstore = tuplestore_begin_heap(true, false, work_mem); - rsinfo->returnMode = SFRM_Materialize; - rsinfo->setResult = tupstore; - rsinfo->setDesc = tupDesc; - - MemoryContextSwitchTo(oldcontext); + Assert(tupDesc->natts == 2); /* * Make forced cleanup: if at least one fss isn't actual, remove parent FS @@ -2216,9 +2193,10 @@ aqo_cleanup(PG_FUNCTION_ARGS) values[0] = Int32GetDatum(fs_num); values[1] = Int32GetDatum(fss_num); - tuplestore_putvalues(tupstore, tupDesc, values, nulls); - tuplestore_donestoring(tupstore); - PG_RETURN_VOID(); + tuple = heap_form_tuple(tupDesc, values, nulls); + result = HeapTupleGetDatum(tuple); + + PG_RETURN_DATUM(result); } /* From 32556265061d521ee478fdb9fbe73ad5c1b460de Mon Sep 17 00:00:00 2001 From: Daniil Anisimov Date: Tue, 10 Jan 2023 10:32:39 +0700 Subject: [PATCH 083/134] Removed the learn_cache routine. Now it is not needed, because non-transactional storage is used. --- Makefile | 2 +- aqo.c | 3 +- aqo.h | 7 +- aqo_shared.c | 158 --------------- aqo_shared.h | 19 -- cardinality_estimation.c | 2 +- cardinality_hooks.c | 2 +- expected/statement_timeout.out | 23 +++ learn_cache.c | 341 --------------------------------- learn_cache.h | 17 -- postprocessing.c | 24 +-- sql/statement_timeout.sql | 7 + storage.c | 19 +- 13 files changed, 50 insertions(+), 574 deletions(-) delete mode 100644 learn_cache.c delete mode 100644 learn_cache.h diff --git a/Makefile b/Makefile index 7370647f..d3aec440 100755 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ MODULE_big = aqo OBJS = $(WIN32RES) \ aqo.o auto_tuning.o cardinality_estimation.o cardinality_hooks.o \ hash.o machine_learning.o path_utils.o postprocessing.o preprocessing.o \ - selectivity_cache.o storage.o utils.o learn_cache.o aqo_shared.o + selectivity_cache.o storage.o utils.o aqo_shared.o TAP_TESTS = 1 diff --git a/aqo.c b/aqo.c index 0e3afec3..a80d0a0f 100644 --- a/aqo.c +++ b/aqo.c @@ -22,7 +22,6 @@ #include "cardinality_hooks.h" #include "path_utils.h" #include "preprocessing.h" -#include "learn_cache.h" #include "storage.h" @@ -212,7 +211,7 @@ _PG_init(void) PGC_USERSET, 0, NULL, - lc_assign_hook, + NULL, NULL ); diff --git a/aqo.h b/aqo.h index 4471d2b8..0a373147 100644 --- a/aqo.h +++ b/aqo.h @@ -174,6 +174,7 @@ extern bool aqo_show_hash; extern bool aqo_show_details; extern int aqo_join_threshold; extern bool use_wide_search; +extern bool aqo_learn_statement_timeout; /* Parameters for current query */ typedef struct QueryContextData @@ -256,10 +257,8 @@ int get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash); /* Storage interaction */ -extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, - bool isSafe); -extern bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, - List *reloids, bool isTimedOut); +extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids); +extern bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids); /* Query preprocessing hooks */ extern void print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, diff --git a/aqo_shared.c b/aqo_shared.c index 86908880..0a6a8db6 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -12,161 +12,13 @@ #include "storage.h" -typedef struct -{ - int magic; - uint32 total_size; - uint32 delta; -} dsm_seg_hdr; - -#define free_space(hdr) (uint32) (temp_storage_size - sizeof(dsm_seg_hdr) - hdr->delta) -#define addr(delta) ((char *) dsm_segment_address(seg) + sizeof(dsm_seg_hdr) + delta) - shmem_startup_hook_type prev_shmem_startup_hook = NULL; AQOSharedState *aqo_state = NULL; -HTAB *fss_htab = NULL; -static int aqo_htab_max_items = 1000; int fs_max_items = 10000; /* Max number of different feature spaces in ML model */ int fss_max_items = 100000; /* Max number of different feature subspaces in ML model */ -static uint32 temp_storage_size = 1024 * 1024 * 10; /* Storage size, in bytes */ -static dsm_segment *seg = NULL; - -static void aqo_detach_shmem(int code, Datum arg); static void on_shmem_shutdown(int code, Datum arg); - -void * -get_dsm_all(uint32 *size) -{ - dsm_seg_hdr *hdr; - - Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE)); - - if (aqo_state->dsm_handler == DSM_HANDLE_INVALID) - { - /* Fast path. No any cached data exists. */ - *size = 0; - return NULL; - } - - if (!seg) - { - /* if segment exists we should connect to */ - seg = dsm_attach(aqo_state->dsm_handler); - Assert(seg); - dsm_pin_mapping(seg); - before_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); - } - - hdr = (dsm_seg_hdr *) dsm_segment_address(seg); - *size = hdr->delta; - return (char *) hdr + sizeof(dsm_seg_hdr); -} - -/* - * Cleanup of DSM cache: set header into default state and zero the memory block. - * This operation can be coupled with the cache dump, so we do it under an external - * hold of the lock. - */ -void -reset_dsm_cache(void) -{ - dsm_seg_hdr *hdr; - char *start; - - Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE)); - - if (aqo_state->dsm_handler == DSM_HANDLE_INVALID || !seg) - /* Fast path. No any cached data exists. */ - return; - - hdr = (dsm_seg_hdr *) dsm_segment_address(seg); - start = (char *) hdr + sizeof(dsm_seg_hdr); - - /* Reset the cache */ - memset(start, 0, hdr->delta); - - hdr->delta = 0; - hdr->total_size = temp_storage_size - sizeof(dsm_seg_hdr); -} - -char * -get_cache_address(void) -{ - dsm_seg_hdr *hdr; - - Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE) || - LWLockHeldByMeInMode(&aqo_state->lock, LW_SHARED)); - - if (aqo_state->dsm_handler != DSM_HANDLE_INVALID) - { - if (!seg) - { - /* Another process created the segment yet. Just attach to. */ - seg = dsm_attach(aqo_state->dsm_handler); - dsm_pin_mapping(seg); - before_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); - } - - hdr = (dsm_seg_hdr *) dsm_segment_address(seg); - } - else - { - /* - * First request for DSM cache in this instance. - * Create the DSM segment. Pin it to live up to instance shutdown. - * Don't forget to detach DSM segment before an exit. - */ - seg = dsm_create(temp_storage_size, 0); - dsm_pin_mapping(seg); - dsm_pin_segment(seg); - aqo_state->dsm_handler = dsm_segment_handle(seg); - before_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); - - hdr = (dsm_seg_hdr *) dsm_segment_address(seg); - hdr->magic = AQO_SHARED_MAGIC; - hdr->delta = 0; - hdr->total_size = temp_storage_size - sizeof(dsm_seg_hdr); - } - - Assert(seg); - Assert(hdr->magic == AQO_SHARED_MAGIC && hdr->total_size > 0); - - return (char *) hdr + sizeof(dsm_seg_hdr); -} - -uint32 -get_dsm_cache_pos(uint32 size) -{ - dsm_seg_hdr *hdr; - uint32 pos; - - Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE) || - LWLockHeldByMeInMode(&aqo_state->lock, LW_SHARED)); - - (void) get_cache_address(); - hdr = (dsm_seg_hdr *) dsm_segment_address(seg); - - if (free_space(hdr) < size || size == 0) - elog(ERROR, - "DSM cache can't allcoate a mem block. Required: %u, free: %u", - size, free_space(hdr)); - - pos = hdr->delta; - hdr->delta += size; - Assert(free_space(hdr) >= 0); - return pos; -} - -static void -aqo_detach_shmem(int code, Datum arg) -{ - if (seg != NULL) - dsm_detach(seg); - seg = NULL; -} - void aqo_init_shmem(void) { @@ -177,7 +29,6 @@ aqo_init_shmem(void) prev_shmem_startup_hook(); aqo_state = NULL; - fss_htab = NULL; stat_htab = NULL; qtexts_htab = NULL; data_htab = NULL; @@ -189,7 +40,6 @@ aqo_init_shmem(void) { /* First time through ... */ - aqo_state->dsm_handler = DSM_HANDLE_INVALID; aqo_state->qtexts_dsa_handler = DSM_HANDLE_INVALID; aqo_state->data_dsa_handler = DSM_HANDLE_INVALID; @@ -207,13 +57,6 @@ aqo_init_shmem(void) LWLockInitialize(&aqo_state->queries_lock, LWLockNewTrancheId()); } - info.keysize = sizeof(htab_key); - info.entrysize = sizeof(htab_entry); - fss_htab = ShmemInitHash("AQO hash", - aqo_htab_max_items, aqo_htab_max_items, - &info, - HASH_ELEM | HASH_BLOBS); - info.keysize = sizeof(((StatEntry *) 0)->queryid); info.entrysize = sizeof(StatEntry); stat_htab = ShmemInitHash("AQO Stat HTAB", fs_max_items, fs_max_items, @@ -279,7 +122,6 @@ aqo_memsize(void) Size size; size = MAXALIGN(sizeof(AQOSharedState)); - size = add_size(size, hash_estimate_size(aqo_htab_max_items, sizeof(htab_entry))); size = add_size(size, hash_estimate_size(fs_max_items, sizeof(AQOSharedState))); size = add_size(size, hash_estimate_size(fs_max_items, sizeof(StatEntry))); size = add_size(size, hash_estimate_size(fs_max_items, sizeof(QueryTextEntry))); diff --git a/aqo_shared.h b/aqo_shared.h index 926a2723..e922fb1c 100644 --- a/aqo_shared.h +++ b/aqo_shared.h @@ -9,23 +9,9 @@ #define AQO_SHARED_MAGIC 0x053163 -typedef struct -{ - /* XXX we assume this struct contains no padding bytes */ - uint64 fs; - int64 fss; -} htab_key; - -typedef struct -{ - htab_key key; - uint32 hdr_off; /* offset of data in DSM cache */ -} htab_entry; - typedef struct AQOSharedState { LWLock lock; /* mutual exclusion */ - dsm_handle dsm_handler; /* Storage fields */ LWLock stat_lock; /* lock for access to stat storage */ @@ -47,16 +33,11 @@ typedef struct AQOSharedState extern shmem_startup_hook_type prev_shmem_startup_hook; extern AQOSharedState *aqo_state; -extern HTAB *fss_htab; extern int fs_max_items; /* Max number of feature spaces that AQO can operate */ extern int fss_max_items; extern Size aqo_memsize(void); -extern void reset_dsm_cache(void); -extern void *get_dsm_all(uint32 *size); -extern char *get_cache_address(void); -extern uint32 get_dsm_cache_pos(uint32 size); extern void aqo_init_shmem(void); #endif /* AQO_SHARED_H */ diff --git a/cardinality_estimation.c b/cardinality_estimation.c index 9db202a1..aca17f1e 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -81,7 +81,7 @@ predict_for_relation(List *clauses, List *selectivities, List *relsigns, &ncols, &features); data = OkNNr_allocate(ncols); - if (load_fss_ext(query_context.fspace_hash, *fss, data, NULL, true)) + if (load_fss_ext(query_context.fspace_hash, *fss, data, NULL)) result = OkNNr_predict(data, features); else { diff --git a/cardinality_hooks.c b/cardinality_hooks.c index 5380a560..c26fcccb 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -452,7 +452,7 @@ predict_num_groups(PlannerInfo *root, Path *subpath, List *group_exprs, *fss = get_grouped_exprs_hash(child_fss, group_exprs); memset(&data, 0, sizeof(OkNNrdata)); - if (!load_fss_ext(query_context.fspace_hash, *fss, &data, NULL, true)) + if (!load_fss_ext(query_context.fspace_hash, *fss, &data, NULL)) return -1; Assert(data.rows == 1); diff --git a/expected/statement_timeout.out b/expected/statement_timeout.out index 0b26b430..77a9a641 100644 --- a/expected/statement_timeout.out +++ b/expected/statement_timeout.out @@ -111,6 +111,29 @@ SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); 5 (1 row) +-- Interrupted query should immediately appear in aqo_data +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 +(1 row) + +SET statement_timeout = 500; +SELECT count(*) FROM aqo_data; -- Must be zero + count +------- + 0 +(1 row) + +SELECT x, pg_sleep(0.1) FROM t WHERE x > 0; +NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. +ERROR: canceling statement due to statement timeout +SELECT count(*) FROM aqo_data; -- Must be one + count +------- + 1 +(1 row) + SELECT 1 FROM aqo_reset(); ?column? ---------- diff --git a/learn_cache.c b/learn_cache.c deleted file mode 100644 index c7f6ef87..00000000 --- a/learn_cache.c +++ /dev/null @@ -1,341 +0,0 @@ -/* - ******************************************************************************* - * - * - * - ******************************************************************************* - * - * Copyright (c) 2016-2022, Postgres Professional - * - * IDENTIFICATION - * aqo/learn_cache.c - * - */ - -#include "postgres.h" -#include "access/parallel.h" /* Just for IsParallelWorker() */ -#include "miscadmin.h" - -#include "aqo.h" -#include "aqo_shared.h" -#include "learn_cache.h" -#include "storage.h" - - -typedef struct -{ - int magic; - htab_key key; - int rows; - int cols; - int nrelids; - - /* - * Links to variable data: - * double *matrix[aqo_K]; - * double *targets; - * double *rfactors; - * int *relids; - */ -} dsm_block_hdr; - - -bool aqo_learn_statement_timeout = false; - -static uint32 init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **relids); - - -/* Calculate, how many data we need to store an ML record. */ -static uint32 -calculate_size(int cols, List *reloids) -{ - uint32 size = sizeof(dsm_block_hdr); /* header's size */ - - size += sizeof(double) * cols * aqo_K; /* matrix */ - size += 2 * sizeof(double) * aqo_K; /* targets, rfactors */ - - /* Calculate memory size needed to store relation names */ - size += list_length(reloids) * sizeof(Oid); - return size; -} - -bool -lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids) -{ - htab_key key = {fs, fss}; - htab_entry *entry; - dsm_block_hdr *hdr; - char *ptr; - bool found; - int i; - ListCell *lc; - uint32 size; - - Assert(fss_htab && aqo_learn_statement_timeout); - - size = calculate_size(data->cols, reloids); - LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); - - entry = (htab_entry *) hash_search(fss_htab, &key, HASH_ENTER, &found); - if (found) - { - hdr = (dsm_block_hdr *) (get_cache_address() + entry->hdr_off); - - Assert(hdr->magic == AQO_SHARED_MAGIC); - Assert(hdr->key.fs == fs && hdr->key.fss == fss); - - if (data->cols != hdr->cols || list_length(reloids) != hdr->nrelids) - { - /* - * Collision found: the same {fs,fss}, but something different. - * For simplicity - just don't update. - */ - elog(DEBUG5, "[AQO]: A collision found in the temporary storage."); - LWLockRelease(&aqo_state->lock); - return false; - } - } - else - { - /* Get new block of DSM */ - entry->hdr_off = get_dsm_cache_pos(size); - hdr = (dsm_block_hdr *) (get_cache_address() + entry->hdr_off); - - /* These fields shouldn't change */ - hdr->magic = AQO_SHARED_MAGIC; - hdr->key.fs = fs; - hdr->key.fss = fss; - hdr->cols = data->cols; - hdr->nrelids = list_length(reloids); - } - - hdr->rows = data->rows; - ptr = (char *) hdr + sizeof(dsm_block_hdr); /* start point of variable data */ - - /* copy the matrix into DSM storage */ - - if (hdr->cols > 0) - { - for (i = 0; i < aqo_K; ++i) - { - if (i >= hdr->rows) - break; - - if (!ptr || !data->matrix[i]) - elog(PANIC, "Something disruptive have happened! %d, %d (%d %d)", i, hdr->rows, found, hdr->cols); - memcpy(ptr, data->matrix[i], sizeof(double) * hdr->cols); - ptr += sizeof(double) * data->cols; - } - } - - /* - * Kludge code. But we should rewrite this code because now all knowledge - * base lives in non-transactional shared memory. - */ - ptr = (char *) hdr + sizeof(dsm_block_hdr) + (sizeof(double) * data->cols * aqo_K); - - /* copy targets into DSM storage */ - memcpy(ptr, data->targets, sizeof(double) * hdr->rows); - ptr += sizeof(double) * aqo_K; - - /* copy rfactors into DSM storage */ - memcpy(ptr, data->rfactors, sizeof(double) * hdr->rows); - ptr += sizeof(double) * aqo_K; - - /* store list of relations */ - foreach(lc, reloids) - { - Oid reloid = lfirst_oid(lc); - - memcpy(ptr, &reloid, sizeof(Oid)); - ptr += sizeof(Oid); - } - - /* Check the invariant */ - Assert((uint32)(ptr - (char *) hdr) == size); - - elog(DEBUG5, "DSM entry: %s, targets: %d.", - found ? "Reused" : "New entry", hdr->rows); - LWLockRelease(&aqo_state->lock); - return true; -} - -bool -lc_has_fss(uint64 fs, int fss) -{ - htab_key key = {fs, fss}; - bool found; - - if (!aqo_learn_statement_timeout) - return false; - - Assert(fss_htab); - - LWLockAcquire(&aqo_state->lock, LW_SHARED); - (void) hash_search(fss_htab, &key, HASH_FIND, &found); - LWLockRelease(&aqo_state->lock); - - return found; -} - -/* - * Load ML data from a memory cache, not from a table. - */ -bool -lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids) -{ - htab_key key = {fs, fss}; - htab_entry *entry; - bool found; - dsm_block_hdr *hdr; - - Assert(fss_htab && aqo_learn_statement_timeout); - - if (aqo_show_details) - elog(NOTICE, "[AQO] Load ML data for fs "UINT64_FORMAT", fss %d from the cache", - fs, fss); - - LWLockAcquire(&aqo_state->lock, LW_SHARED); - entry = (htab_entry *) hash_search(fss_htab, &key, HASH_FIND, &found); - if (!found) - { - LWLockRelease(&aqo_state->lock); - return false; - } - - hdr = (dsm_block_hdr *) (get_cache_address() + entry->hdr_off); - Assert(hdr->magic == AQO_SHARED_MAGIC); - Assert(hdr->key.fs == fs && hdr->key.fss == fss); - - /* XXX */ - if (hdr->cols != data->cols) - { - LWLockRelease(&aqo_state->lock); - return false; - } - - init_with_dsm(data, hdr, reloids); - LWLockRelease(&aqo_state->lock); - return true; -} - -static uint32 -init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **reloids) -{ - int i; - char *ptr = (char *) hdr + sizeof(dsm_block_hdr); - - Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE) || - LWLockHeldByMeInMode(&aqo_state->lock, LW_SHARED)); - Assert(hdr->magic == AQO_SHARED_MAGIC); - Assert(hdr && ptr && hdr->rows > 0); - - data->rows = hdr->rows; - data->cols = hdr->cols; - - if (data->cols > 0) - { - for (i = 0; i < aqo_K; ++i) - { - if (i < data->rows) - { - data->matrix[i] = palloc(sizeof(double) * data->cols); - memcpy(data->matrix[i], ptr, sizeof(double) * data->cols); - } - ptr += sizeof(double) * data->cols; - } - } - - /* - * Kludge code. But we should rewrite this code because now all knowledge - * base lives in non-transactional shared memory. - */ - ptr = (char *) hdr + sizeof(dsm_block_hdr) + (sizeof(double) * data->cols * aqo_K); - - memcpy(data->targets, ptr, sizeof(double) * hdr->rows); - ptr += sizeof(double) * aqo_K; - memcpy(data->rfactors, ptr, sizeof(double) * hdr->rows); - ptr += sizeof(double) * aqo_K; - - if (reloids) - { - *reloids = NIL; - for (i = 0; i < hdr->nrelids; i++) - { - *reloids = lappend_oid(*reloids, *(Oid *)(ptr)); - ptr += sizeof(Oid); - } - return calculate_size(hdr->cols, *reloids); - } - - /* It is just a read operation. No any interest in size calculation. */ - return 0; -} - -void -lc_flush_data(void) -{ - char *ptr; - uint32 size; - - if (aqo_state->dsm_handler == DSM_HANDLE_INVALID) - /* Fast path. No any cached data exists. */ - return; - - LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); - ptr = get_dsm_all(&size); - - /* Iterate through records and store them into the aqo_data table */ - while (size > 0) - { - dsm_block_hdr *hdr = (dsm_block_hdr *) ptr; - OkNNrdata data; - List *reloids = NIL; - uint32 delta = 0; - - delta = init_with_dsm(&data, hdr, &reloids); - Assert(delta > 0); - ptr += delta; - size -= delta; - aqo_data_store(hdr->key.fs, hdr->key.fss, &data, reloids); - - if (!hash_search(fss_htab, (void *) &hdr->key, HASH_REMOVE, NULL)) - elog(PANIC, "[AQO] Flush: local ML cache is corrupted."); - } - - reset_dsm_cache(); - LWLockRelease(&aqo_state->lock); -} - -/* - * Main purpose of this hook is to cleanup a backend cache in some way to prevent - * memory leaks - in large queries we could have many unused fss nodes. - */ -void -lc_assign_hook(bool newval, void *extra) -{ - HASH_SEQ_STATUS status; - htab_entry *entry; - - if (!fss_htab || !IsUnderPostmaster || IsParallelWorker()) - /* Clean this shared cache only in main backend process. */ - return; - - /* Remove all entries, reset memory context. */ - - elog(DEBUG5, "[AQO] Cleanup local cache of ML data."); - - /* Remove all entries in the shared hash table. */ - LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); - hash_seq_init(&status, fss_htab); - while ((entry = (htab_entry *) hash_seq_search(&status)) != NULL) - { - if (!hash_search(fss_htab, (void *) &entry->key, HASH_REMOVE, NULL)) - elog(PANIC, "[AQO] The local ML cache is corrupted."); - } - - /* Now, clean additional DSM block */ - reset_dsm_cache(); - - LWLockRelease(&aqo_state->lock); -} diff --git a/learn_cache.h b/learn_cache.h deleted file mode 100644 index df61700e..00000000 --- a/learn_cache.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef LEARN_CACHE_H -#define LEARN_CACHE_H - -#include "nodes/pg_list.h" - -#include "machine_learning.h" - -extern bool aqo_learn_statement_timeout; - -extern bool lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids); -extern bool lc_has_fss(uint64 fs, int fss); -extern bool lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids); -extern void lc_remove_fss(uint64 fs, int fss); -extern void lc_flush_data(void); -extern void lc_assign_hook(bool newval, void *extra); - -#endif /* LEARN_CACHE_H */ diff --git a/postprocessing.c b/postprocessing.c index 75a61707..165391dd 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -28,10 +28,11 @@ #include "path_utils.h" #include "machine_learning.h" #include "preprocessing.h" -#include "learn_cache.h" #include "storage.h" +bool aqo_learn_statement_timeout = false; + typedef struct { List *clauselist; @@ -58,9 +59,8 @@ static char *PlanStateInfo = "PlanStateInfo"; /* Query execution statistics collecting utilities */ static void atomic_fss_learn_step(uint64 fhash, int fss, OkNNrdata *data, - double *features, - double target, double rfactor, - List *reloids, bool isTimedOut); + double *features, double target, + double rfactor, List *reloids); static bool learnOnPlanState(PlanState *p, void *context); static void learn_agg_sample(aqo_obj_stat *ctx, RelSortOut *rels, double learned, double rfactor, Plan *plan, @@ -85,13 +85,13 @@ static bool ExtractFromQueryEnv(QueryDesc *queryDesc); static void atomic_fss_learn_step(uint64 fs, int fss, OkNNrdata *data, double *features, double target, double rfactor, - List *reloids, bool isTimedOut) + List *reloids) { - if (!load_fss_ext(fs, fss, data, NULL, !isTimedOut)) + if (!load_fss_ext(fs, fss, data, NULL)) data->rows = 0; data->rows = OkNNr_learn(data, features, target, rfactor); - update_fss_ext(fs, fss, data, reloids, isTimedOut); + update_fss_ext(fs, fss, data, reloids); } static void @@ -120,7 +120,7 @@ learn_agg_sample(aqo_obj_stat *ctx, RelSortOut *rels, /* Critical section */ atomic_fss_learn_step(fs, fss, data, NULL, - target, rfactor, rels->hrels, ctx->isTimedOut); + target, rfactor, rels->hrels); /* End of critical section */ } @@ -157,8 +157,7 @@ learn_sample(aqo_obj_stat *ctx, RelSortOut *rels, data = OkNNr_allocate(ncols); /* Critical section */ - atomic_fss_learn_step(fs, fss, data, features, target, rfactor, - rels->hrels, ctx->isTimedOut); + atomic_fss_learn_step(fs, fss, data, features, target, rfactor, rels->hrels); /* End of critical section */ } @@ -750,11 +749,6 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) { aqo_obj_stat ctx = {NIL, NIL, NIL, query_context.learn_aqo, false}; - /* - * Before learn phase, flush all cached data down to ML base. - */ - lc_flush_data(); - /* * Analyze plan if AQO need to learn or need to collect statistics only. */ diff --git a/sql/statement_timeout.sql b/sql/statement_timeout.sql index 36afc370..60ae7a14 100644 --- a/sql/statement_timeout.sql +++ b/sql/statement_timeout.sql @@ -60,6 +60,13 @@ SET statement_timeout = 5500; SELECT *, pg_sleep(1) FROM t; -- Get reliable data SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); +-- Interrupted query should immediately appear in aqo_data +SELECT 1 FROM aqo_reset(); +SET statement_timeout = 500; +SELECT count(*) FROM aqo_data; -- Must be zero +SELECT x, pg_sleep(0.1) FROM t WHERE x > 0; +SELECT count(*) FROM aqo_data; -- Must be one + SELECT 1 FROM aqo_reset(); DROP TABLE t; DROP EXTENSION aqo; diff --git a/storage.c b/storage.c index 8a21892c..fcbe5569 100644 --- a/storage.c +++ b/storage.c @@ -27,7 +27,6 @@ #include "aqo_shared.h" #include "machine_learning.h" #include "preprocessing.h" -#include "learn_cache.h" #include "storage.h" @@ -107,25 +106,15 @@ PG_FUNCTION_INFO_V1(aqo_execution_time); bool -load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool isSafe) +load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids) { - if (isSafe && (!aqo_learn_statement_timeout || !lc_has_fss(fs, fss))) - return load_aqo_data(fs, fss, data, reloids, false); - else - { - Assert(aqo_learn_statement_timeout); - return lc_load_fss(fs, fss, data, reloids); - } + return load_aqo_data(fs, fss, data, reloids, false); } bool -update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids, - bool isTimedOut) +update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids) { - if (!isTimedOut) - return aqo_data_store(fs, fss, data, reloids); - else - return lc_update_fss(fs, fss, data, reloids); + return aqo_data_store(fs, fss, data, reloids); } /* From 54c7615981d16db586966ee6a5b28c486bae0d17 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Tue, 28 Jun 2022 12:28:23 +0300 Subject: [PATCH 084/134] Load neighbours with the fss hash except dublicated neighours. Rewrite test for look-a-like functional. Current tests contain correlation columns and queries have more nodes and description features. Add aqo_k as custom parameter to define few number of features for prediction. Its default value is 3. Queries can contain a larger number of features than 3 especially generic queries. Also add predict_a_few_neibours parameter for switch avalable to predict a few neibors than 3. It is done for not to change the previous logic of the code --- aqo.c | 26 +- aqo.h | 1 + cardinality_estimation.c | 2 +- expected/look_a_like.out | 517 ++++++++++++++++++++++++++++----------- machine_learning.c | 5 +- sql/look_a_like.sql | 110 ++++++--- storage.c | 78 ++++-- storage.h | 2 +- 8 files changed, 543 insertions(+), 198 deletions(-) diff --git a/aqo.c b/aqo.c index a80d0a0f..63e46a86 100644 --- a/aqo.c +++ b/aqo.c @@ -34,6 +34,7 @@ void _PG_init(void); /* Strategy of determining feature space for new queries. */ int aqo_mode = AQO_MODE_CONTROLLED; bool force_collect_stat; +bool aqo_predict_with_few_neighbors; /* * Show special info in EXPLAIN mode. @@ -71,7 +72,7 @@ int auto_tuning_infinite_loop = 8; /* Machine learning parameters */ /* The number of nearest neighbors which will be chosen for ML-operations */ -int aqo_k = 3; +int aqo_k; double log_selectivity_lower_bound = -30; /* @@ -293,6 +294,29 @@ _PG_init(void) NULL ); + DefineCustomIntVariable("aqo.k_neighbors_threshold", + "Set the threshold of number of neighbors for predicting.", + NULL, + &aqo_k, + 3, + 1, INT_MAX / 1000, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); + + DefineCustomBoolVariable("aqo.predict_with_few_neighbors", + "Make prediction with less neighbors than we should have.", + NULL, + &aqo_predict_with_few_neighbors, + true, + PGC_USERSET, + 0, + NULL, + lc_assign_hook, + NULL); + prev_shmem_startup_hook = shmem_startup_hook; shmem_startup_hook = aqo_init_shmem; prev_planner_hook = planner_hook; diff --git a/aqo.h b/aqo.h index 0a373147..9418646c 100644 --- a/aqo.h +++ b/aqo.h @@ -217,6 +217,7 @@ extern double auto_tuning_convergence_error; /* Machine learning parameters */ extern int aqo_k; +extern bool aqo_predict_with_few_neighbors; extern double log_selectivity_lower_bound; /* Parameters for current query */ diff --git a/cardinality_estimation.c b/cardinality_estimation.c index aca17f1e..f93e0905 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -93,7 +93,7 @@ predict_for_relation(List *clauses, List *selectivities, List *relsigns, */ /* Try to search in surrounding feature spaces for the same node */ - if (!load_aqo_data(query_context.fspace_hash, *fss, data, NULL, use_wide_search)) + if (!load_aqo_data(query_context.fspace_hash, *fss, data, NULL, use_wide_search, features)) result = -1; else { diff --git a/expected/look_a_like.out b/expected/look_a_like.out index ecd73fb4..b0d3047c 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -2,14 +2,17 @@ CREATE EXTENSION aqo; SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; +set aqo.show_hash = 'off'; +SET aqo.k_neighbors_threshold_for_predict = 1; SET enable_material = 'off'; DROP TABLE IF EXISTS a,b CASCADE; NOTICE: table "a" does not exist, skipping NOTICE: table "b" does not exist, skipping -CREATE TABLE a (x int); -INSERT INTO a (x) SELECT mod(ival,10) FROM generate_series(1,1000) As ival; -CREATE TABLE b (y int); -INSERT INTO b (y) SELECT mod(ival + 1,10) FROM generate_series(1,1000) As ival; +-- Create tables with correlated datas in columns +CREATE TABLE a (x1 int, x2 int, x3 int); +INSERT INTO a (x1, x2, x3) SELECT mod(ival,10), mod(ival,10), mod(ival,10) FROM generate_series(1,1000) As ival; +CREATE TABLE b (y1 int, y2 int, y3 int); +INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; -- -- Returns string-by-string explain of a query. Made for removing some strings -- from the explain output. @@ -25,207 +28,425 @@ $$ LANGUAGE PLPGSQL; -- in the next queries with the same fss_hash SELECT str AS result FROM expln(' -SELECT x FROM A where x = 5;') AS str; - result ------------------------------------------------- - Seq Scan on public.a (actual rows=100 loops=1) +SELECT x1,y1 FROM A,B WHERE x1 = 5 AND x2 = 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +-------------------------------------------------------- + Nested Loop (actual rows=10000 loops=1) AQO not used - Output: x - Filter: (a.x = 5) - Rows Removed by Filter: 900 + Output: a.x1, b.y1 + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 = 5) AND (a.x2 = 5)) + Rows Removed by Filter: 900 + -> Seq Scan on public.b (actual rows=100 loops=100) + AQO not used + Output: b.y1, b.y2, b.y3 + Filter: (b.y1 = 5) + Rows Removed by Filter: 900 Using aqo: true AQO mode: LEARN JOINS: 0 -(8 rows) +(16 rows) SELECT str AS result FROM expln(' -SELECT x FROM A,B WHERE x = 5 AND A.x = B.y;') AS str -; -- Find cardinality for SCAN A(x=5) from a neighbour class, created by the - result --------------------------------------------------------- - Nested Loop (actual rows=10000 loops=1) +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------ + Hash Join (actual rows=50000 loops=1) AQO not used - Output: a.x - -> Seq Scan on public.b (actual rows=100 loops=1) + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) AQO not used - Output: b.y - Filter: (b.y = 5) - Rows Removed by Filter: 900 - -> Seq Scan on public.a (actual rows=100 loops=100) - AQO: rows=100, error=0% - Output: a.x - Filter: (a.x = 5) - Rows Removed by Filter: 900 + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=500 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=500 loops=1) + AQO not used + Output: a.x1 + Filter: ((a.x1 < 5) AND (a.x2 < 5)) + Rows Removed by Filter: 500 Using aqo: true AQO mode: LEARN JOINS: 0 -(16 rows) +(17 rows) --- query, executed above. SELECT str AS result FROM expln(' -SELECT x, sum(x) FROM A,B WHERE y = 5 AND A.x = B.y group by(x);') AS str -; -- Find the JOIN cardinality from a neighbour class. - result --------------------------------------------------------------- - GroupAggregate (actual rows=1 loops=1) +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 = 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +---------------------------------------------------------- + Hash Join (actual rows=0 loops=1) AQO not used - Output: a.x, sum(a.x) - Group Key: a.x - -> Nested Loop (actual rows=10000 loops=1) - AQO: rows=10000, error=0% - Output: a.x - -> Seq Scan on public.a (actual rows=100 loops=1) - AQO: rows=100, error=0% - Output: a.x - Filter: (a.x = 5) - Rows Removed by Filter: 900 - -> Seq Scan on public.b (actual rows=100 loops=100) - AQO: rows=100, error=0% - Output: b.y - Filter: (b.y = 5) - Rows Removed by Filter: 900 + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1 loops=1) + AQO: rows=1000, error=100% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=0 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=0 loops=1) + AQO not used + Output: a.x1 + Filter: ((a.x1 < 5) AND (a.x2 = 5)) + Rows Removed by Filter: 1000 Using aqo: true AQO mode: LEARN - JOINS: 1 -(20 rows) + JOINS: 0 +(17 rows) --- cardinality 100 in the first Seq Scan on a +--query contains nodes that have already been predicted SELECT str AS result FROM expln(' -SELECT x, sum(x) FROM A WHERE x = 5 group by(x);') AS str; - result ------------------------------------------------------- - GroupAggregate (actual rows=1 loops=1) +SELECT x1,y1 FROM A,B WHERE x1 < 10 AND x2 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------ + Hash Join (actual rows=50000 loops=1) + AQO: rows=50000, error=0% + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=500 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=500 loops=1) + AQO: rows=500, error=0% + Output: a.x1 + Filter: ((a.x1 < 10) AND (a.x2 < 5)) + Rows Removed by Filter: 500 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(17 rows) + +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A,B WHERE x1 > 2 AND x2 > 2 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------ + Hash Join (actual rows=70000 loops=1) AQO not used - Output: x, sum(x) - Group Key: a.x - -> Seq Scan on public.a (actual rows=100 loops=1) - AQO: rows=100, error=0% - Output: x - Filter: (a.x = 5) - Rows Removed by Filter: 900 + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=700 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=700 loops=1) + AQO not used + Output: a.x1 + Filter: ((a.x1 > 2) AND (a.x2 > 2)) + Rows Removed by Filter: 300 Using aqo: true AQO mode: LEARN JOINS: 0 -(12 rows) +(17 rows) --- no one predicted rows. we use knowledge cardinalities of the query --- in the next queries with the same fss_hash SELECT str AS result FROM expln(' -SELECT x FROM A where x < 10 group by(x);') AS str -WHERE str NOT LIKE '%Memory%'; - result -------------------------------------------------------- - HashAggregate (actual rows=10 loops=1) +SELECT x1,y1 FROM A,B WHERE x1 > 5 AND x2 > 5 AND x3 < 10 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------- + Hash Join (actual rows=40000 loops=1) AQO not used - Output: x - Group Key: a.x - -> Seq Scan on public.a (actual rows=1000 loops=1) - AQO not used - Output: x - Filter: (a.x < 10) + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=400 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=400 loops=1) + AQO not used + Output: a.x1 + Filter: ((a.x1 > 5) AND (a.x2 > 5) AND (a.x3 < 10)) + Rows Removed by Filter: 600 Using aqo: true AQO mode: LEARN JOINS: 0 -(11 rows) +(17 rows) --- cardinality 1000 in Seq Scan on a SELECT str AS result FROM expln(' -SELECT x,y FROM A,B WHERE x < 10 AND A.x = B.y;') AS str -WHERE str NOT LIKE '%Memory%'; - result -------------------------------------------------------------- - Merge Join (actual rows=100000 loops=1) +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND x3 < 10 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------- + Hash Join (actual rows=50000 loops=1) AQO not used - Output: a.x, b.y - Merge Cond: (a.x = b.y) - -> Sort (actual rows=1000 loops=1) - Output: a.x - Sort Key: a.x - -> Seq Scan on public.a (actual rows=1000 loops=1) - AQO: rows=1000, error=0% - Output: a.x - Filter: (a.x < 10) - -> Sort (actual rows=99901 loops=1) - Output: b.y - Sort Key: b.y - -> Seq Scan on public.b (actual rows=1000 loops=1) + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=500 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=500 loops=1) AQO not used - Output: b.y + Output: a.x1 + Filter: ((a.x1 < 5) AND (a.x2 < 5) AND (a.x3 < 10)) + Rows Removed by Filter: 500 Using aqo: true AQO mode: LEARN JOINS: 0 -(20 rows) +(17 rows) --- cardinality 100 in Seq Scan on a and Seq Scan on b +--query contains nodes that have already been predicted SELECT str AS result FROM expln(' -SELECT x FROM A,B where x < 10 and y > 10 group by(x);') AS str -WHERE str NOT LIKE '%Memory%'; - result ----------------------------------------------------------- - HashAggregate (actual rows=0 loops=1) +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 4 AND x3 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + Hash Join (actual rows=40000 loops=1) + AQO: rows=50000, error=20% + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=400 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=400 loops=1) + AQO: rows=500, error=20% + Output: a.x1 + Filter: ((a.x1 < 5) AND (a.x2 < 4) AND (a.x3 < 5)) + Rows Removed by Filter: 600 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(17 rows) + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +--------------------------------------------------------------- + HashAggregate (actual rows=2 loops=1) AQO not used - Output: a.x - Group Key: a.x - -> Nested Loop (actual rows=0 loops=1) + Output: a.x1 + Group Key: a.x1 + -> Nested Loop (actual rows=200000 loops=1) AQO not used - Output: a.x - -> Seq Scan on public.b (actual rows=0 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) AQO not used - Output: b.y - Filter: (b.y > 10) - Rows Removed by Filter: 1000 - -> Seq Scan on public.a (never executed) - AQO: rows=1000 - Output: a.x - Filter: (a.x < 10) + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=1000 loops=200) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 Using aqo: true AQO mode: LEARN JOINS: 1 -(19 rows) +(18 rows) --- --- TODO: --- Not executed case. What could we do better here? --- +--query contains nodes that have already been predicted SELECT str AS result FROM expln(' -SELECT x,y FROM A,B WHERE x < 10 and y > 10 AND A.x = B.y;') AS str -WHERE str NOT LIKE '%Memory%' -; - result ----------------------------------------------------------- - Hash Join (actual rows=0 loops=1) +SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +--------------------------------------------------------------- + HashAggregate (actual rows=2 loops=1) + AQO: rows=2, error=0% + Output: a.x1 + Group Key: a.x1 + -> Nested Loop (actual rows=200000 loops=1) + AQO: rows=200000, error=0% + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) + AQO: rows=200, error=0% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=1000 loops=200) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(18 rows) + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 2 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +--------------------------------------------------------------- + HashAggregate (actual rows=1 loops=1) + AQO: rows=2, error=50% + Output: a.x1 + Group Key: a.x1 + -> Nested Loop (actual rows=100000 loops=1) + AQO: rows=200000, error=50% + Output: a.x1 + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=200, error=50% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x3 > 2)) + Rows Removed by Filter: 900 + -> Seq Scan on public.b (actual rows=1000 loops=100) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(18 rows) + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 5 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + HashAggregate (actual rows=1 loops=1) AQO not used - Output: a.x, b.y - Hash Cond: (a.x = b.y) - -> Seq Scan on public.a (actual rows=1 loops=1) - AQO: rows=1000, error=100% - Output: a.x - Filter: (a.x < 10) - -> Hash (actual rows=0 loops=1) - Output: b.y - -> Seq Scan on public.b (actual rows=0 loops=1) - AQO: rows=1, error=100% - Output: b.y - Filter: (b.y > 10) - Rows Removed by Filter: 1000 + Output: a.x1 + Group Key: a.x1 + -> Nested Loop (actual rows=100000 loops=1) + AQO not used + Output: a.x1 + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 3) AND (a.x2 < 5) AND (a.x3 > 1)) + Rows Removed by Filter: 900 + -> Seq Scan on public.b (actual rows=1000 loops=100) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 +(18 rows) + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + HashAggregate (actual rows=2 loops=1) + AQO not used + Output: a.x1 + Group Key: a.x1 + -> Nested Loop (actual rows=200000 loops=1) + AQO not used + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 > 1) AND (a.x2 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=1000 loops=200) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(18 rows) + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 < 5 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + HashAggregate (actual rows=2 loops=1) + AQO not used + Output: a.x1 + Group Key: a.x1 + -> Nested Loop (actual rows=200000 loops=1) + AQO not used + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 > 1) AND (a.x2 < 4) AND (a.x3 < 5)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=1000 loops=200) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + Using aqo: true + AQO mode: LEARN + JOINS: 1 (18 rows) -RESET enable_material; -DROP TABLE a,b CASCADE; -SELECT true FROM aqo_reset(); - bool ------- - t +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x2 < 5 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + HashAggregate (actual rows=2 loops=1) + AQO not used + Output: a.x1 + Group Key: a.x1 + -> Nested Loop (actual rows=140000 loops=1) + AQO not used + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) + AQO: rows=100, error=-100% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x2 < 5) AND (a.x3 > 1)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=700 loops=200) + AQO not used + Output: b.y1, b.y2, b.y3 + Filter: (b.y1 > 2) + Rows Removed by Filter: 300 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(20 rows) + +--query contains nodes that have already been predicted +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 4 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + HashAggregate (actual rows=1 loops=1) + AQO: rows=2, error=50% + Output: a.x1 + Group Key: a.x1 + -> Nested Loop (actual rows=70000 loops=1) + AQO: rows=140000, error=50% + Output: a.x1 + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=200, error=50% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 3) AND (a.x2 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 900 + -> Seq Scan on public.b (actual rows=700 loops=100) + AQO: rows=700, error=0% + Output: b.y1, b.y2, b.y3 + Filter: (b.y1 > 2) + Rows Removed by Filter: 300 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(20 rows) + +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 (1 row) +DROP TABLE a; +DROP TABLE b; DROP EXTENSION aqo CASCADE; diff --git a/machine_learning.c b/machine_learning.c index 7138db38..d4f5cbee 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -74,7 +74,7 @@ fs_distance(double *a, double *b, int len) res += (a[i] - b[i]) * (a[i] - b[i]); } if (len != 0) - res = sqrt(res / len); + res = sqrt(res); return res; } @@ -148,6 +148,9 @@ OkNNr_predict(OkNNrdata *data, double *features) Assert(data != NULL); + if (!aqo_predict_with_few_neighbors && data->rows < aqo_k) + return -1.; + for (i = 0; i < data->rows; ++i) distances[i] = fs_distance(data->matrix[i], features, data->cols); diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index be71feff..5a348cd5 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -2,15 +2,20 @@ CREATE EXTENSION aqo; SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; +set aqo.show_hash = 'off'; +SET aqo.k_neighbors_threshold_for_predict = 1; SET enable_material = 'off'; DROP TABLE IF EXISTS a,b CASCADE; -CREATE TABLE a (x int); -INSERT INTO a (x) SELECT mod(ival,10) FROM generate_series(1,1000) As ival; -CREATE TABLE b (y int); -INSERT INTO b (y) SELECT mod(ival + 1,10) FROM generate_series(1,1000) As ival; +-- Create tables with correlated datas in columns +CREATE TABLE a (x1 int, x2 int, x3 int); +INSERT INTO a (x1, x2, x3) SELECT mod(ival,10), mod(ival,10), mod(ival,10) FROM generate_series(1,1000) As ival; + +CREATE TABLE b (y1 int, y2 int, y3 int); +INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; + -- -- Returns string-by-string explain of a query. Made for removing some strings @@ -26,55 +31,96 @@ $$ LANGUAGE PLPGSQL; -- no one predicted rows. we use knowledge cardinalities of the query -- in the next queries with the same fss_hash + SELECT str AS result FROM expln(' -SELECT x FROM A where x = 5;') AS str; +SELECT x1,y1 FROM A,B WHERE x1 = 5 AND x2 = 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' -SELECT x FROM A,B WHERE x = 5 AND A.x = B.y;') AS str -; -- Find cardinality for SCAN A(x=5) from a neighbour class, created by the --- query, executed above. +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' -SELECT x, sum(x) FROM A,B WHERE y = 5 AND A.x = B.y group by(x);') AS str -; -- Find the JOIN cardinality from a neighbour class. +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 = 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +--query contains nodes that have already been predicted --- cardinality 100 in the first Seq Scan on a SELECT str AS result FROM expln(' -SELECT x, sum(x) FROM A WHERE x = 5 group by(x);') AS str; +SELECT x1,y1 FROM A,B WHERE x1 < 10 AND x2 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; --- no one predicted rows. we use knowledge cardinalities of the query --- in the next queries with the same fss_hash SELECT str AS result FROM expln(' -SELECT x FROM A where x < 10 group by(x);') AS str -WHERE str NOT LIKE '%Memory%'; --- cardinality 1000 in Seq Scan on a +SELECT x1,y1 FROM A,B WHERE x1 > 2 AND x2 > 2 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + SELECT str AS result FROM expln(' -SELECT x,y FROM A,B WHERE x < 10 AND A.x = B.y;') AS str -WHERE str NOT LIKE '%Memory%'; +SELECT x1,y1 FROM A,B WHERE x1 > 5 AND x2 > 5 AND x3 < 10 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; --- cardinality 100 in Seq Scan on a and Seq Scan on b SELECT str AS result FROM expln(' -SELECT x FROM A,B where x < 10 and y > 10 group by(x);') AS str -WHERE str NOT LIKE '%Memory%'; +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND x3 < 10 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +--query contains nodes that have already been predicted + +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 4 AND x3 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +--query contains nodes that have already been predicted + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 2 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 5 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 < 5 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x2 < 5 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +--query contains nodes that have already been predicted --- --- TODO: --- Not executed case. What could we do better here? --- SELECT str AS result FROM expln(' -SELECT x,y FROM A,B WHERE x < 10 and y > 10 AND A.x = B.y;') AS str -WHERE str NOT LIKE '%Memory%' -; +SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 4 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; -RESET enable_material; -DROP TABLE a,b CASCADE; -SELECT true FROM aqo_reset(); +SELECT 1 FROM aqo_reset(); +DROP TABLE a; +DROP TABLE b; DROP EXTENSION aqo CASCADE; diff --git a/storage.c b/storage.c index fcbe5569..9b92088e 100644 --- a/storage.c +++ b/storage.c @@ -90,6 +90,8 @@ static bool _aqo_stat_remove(uint64 queryid); static bool _aqo_queries_remove(uint64 queryid); static bool _aqo_qtexts_remove(uint64 queryid); static bool _aqo_data_remove(data_key *key); +static bool neirest_neighbor(double **matrix, int old_rows, double *neighbor, int cols); +static double fs_distance(double *a, double *b, int len); PG_FUNCTION_INFO_V1(aqo_query_stat); PG_FUNCTION_INFO_V1(aqo_query_texts); @@ -1409,25 +1411,73 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) return result; } +static double +fs_distance(double *a, double *b, int len) +{ + double res = 0; + int i; + + for (i = 0; i < len; ++i) + res += (a[i] - b[i]) * (a[i] - b[i]); + if (len != 0) + res = sqrt(res); + return res; +} + +bool +neirest_neighbor(double **matrix, int old_rows, double *neibour, int cols) +{ + int i; + for (i=0; icols == temp_data->cols); Assert(data->matrix); - if (data->rows > 0) - /* trivial strategy - use first suitable record and ignore others */ - return; - - memcpy(data, temp_data, sizeof(OkNNrdata)); - if (data->cols > 0) + if (features != NULL) { - int i; + int old_rows = data->rows; + int k = old_rows; - for (i = 0; i < data->rows; i++) + if (data->cols > 0) { - Assert(data->matrix[i]); - memcpy(data->matrix[i], temp_data->matrix[i], data->cols * sizeof(double)); + int i; + + for (i = 0; i < data->rows; i++) + { + if (k < aqo_K && !neirest_neighbor(data->matrix, old_rows, data->matrix[i], data->cols)) + { + memcpy(data->matrix[k], temp_data->matrix[i], data->cols * sizeof(double)); + data->rfactors[k] = temp_data->rfactors[i]; + data->targets[k] = temp_data->targets[i]; + k++; + } + } + } + } + else + { + if (data->rows > 0) + /* trivial strategy - use first suitable record and ignore others */ + return; + memcpy(data, temp_data, sizeof(OkNNrdata)); + if (data->cols > 0) + { + int i; + + for (i = 0; i < data->rows; i++) + { + Assert(data->matrix[i]); + memcpy(data->matrix[i], temp_data->matrix[i], data->cols * sizeof(double)); + } } } } @@ -1503,7 +1553,7 @@ _fill_knn_data(const DataEntry *entry, List **reloids) */ bool load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, - bool wideSearch) + bool wideSearch, double *features) { DataEntry *entry; bool found; @@ -1538,7 +1588,7 @@ load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, } temp_data = _fill_knn_data(entry, reloids); - build_knn_matrix(data, temp_data); + build_knn_matrix(data, temp_data, features); } else /* Iterate across all elements of the table. XXX: Maybe slow. */ @@ -1576,7 +1626,7 @@ load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, else list_free(tmp_oids); - build_knn_matrix(data, temp_data); + build_knn_matrix(data, temp_data, NULL); found = true; } } diff --git a/storage.h b/storage.h index 94891c5d..0e7745e1 100644 --- a/storage.h +++ b/storage.h @@ -101,7 +101,7 @@ extern void aqo_qtexts_load(void); extern bool aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids); extern bool load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, - bool wideSearch); + bool wideSearch, double *features); extern void aqo_data_flush(void); extern void aqo_data_load(void); From 4d7a416d43187dd938730adb877e78ab80862b00 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Fri, 20 Jan 2023 17:01:58 +0300 Subject: [PATCH 085/134] Add disabled nestloop and mergejoin parameters to stabilize look-a-like test, besides add two additional cases where look-a-like should not be applied. --- aqo.c | 2 +- expected/look_a_like.out | 400 ++++++++++++++++++++++++--------------- sql/look_a_like.sql | 56 ++++-- storage.c | 2 +- 4 files changed, 290 insertions(+), 170 deletions(-) diff --git a/aqo.c b/aqo.c index 63e46a86..b6a9a9ca 100644 --- a/aqo.c +++ b/aqo.c @@ -314,7 +314,7 @@ _PG_init(void) PGC_USERSET, 0, NULL, - lc_assign_hook, + NULL, NULL); prev_shmem_startup_hook = shmem_startup_hook; diff --git a/expected/look_a_like.out b/expected/look_a_like.out index b0d3047c..5910c8ac 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -3,7 +3,9 @@ SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; set aqo.show_hash = 'off'; -SET aqo.k_neighbors_threshold_for_predict = 1; +SET aqo.k_neighbors_threshold = 1; +SET enable_nestloop = 'off'; +SET enable_mergejoin = 'off'; SET enable_material = 'off'; DROP TABLE IF EXISTS a,b CASCADE; NOTICE: table "a" does not exist, skipping @@ -29,7 +31,7 @@ $$ LANGUAGE PLPGSQL; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 = 5 AND x2 = 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; result -------------------------------------------------------- Nested Loop (actual rows=10000 loops=1) @@ -52,49 +54,51 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' -SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +SELECT x1,y1 FROM A LEFT JOIN b ON A.x1 = B.y1 WHERE x1 = 5 AND x2 = 5;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; result ------------------------------------------------------------ - Hash Join (actual rows=50000 loops=1) + Hash Left Join (actual rows=10000 loops=1) AQO not used Output: a.x1, b.y1 - Hash Cond: (b.y1 = a.x1) - -> Seq Scan on public.b (actual rows=1000 loops=1) - AQO not used - Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=500 loops=1) - Output: a.x1 - -> Seq Scan on public.a (actual rows=500 loops=1) - AQO not used - Output: a.x1 - Filter: ((a.x1 < 5) AND (a.x2 < 5)) - Rows Removed by Filter: 500 + Hash Cond: (a.x1 = b.y1) + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=100, error=0% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 = 5) AND (a.x2 = 5)) + Rows Removed by Filter: 900 + -> Hash (actual rows=100 loops=1) + Output: b.y1 + -> Seq Scan on public.b (actual rows=100 loops=1) + AQO: rows=100, error=0% + Output: b.y1 + Filter: (b.y1 = 5) + Rows Removed by Filter: 900 Using aqo: true AQO mode: LEARN JOINS: 0 -(17 rows) +(19 rows) SELECT str AS result FROM expln(' -SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 = 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ----------------------------------------------------------- - Hash Join (actual rows=0 loops=1) +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------ + Hash Join (actual rows=50000 loops=1) AQO not used Output: a.x1, b.y1 Hash Cond: (b.y1 = a.x1) - -> Seq Scan on public.b (actual rows=1 loops=1) - AQO: rows=1000, error=100% + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO not used Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=0 loops=1) + -> Hash (actual rows=500 loops=1) Output: a.x1 - -> Seq Scan on public.a (actual rows=0 loops=1) + -> Seq Scan on public.a (actual rows=500 loops=1) AQO not used Output: a.x1 - Filter: ((a.x1 < 5) AND (a.x2 = 5)) - Rows Removed by Filter: 1000 + Filter: ((a.x1 < 5) AND (a.x2 < 5)) + Rows Removed by Filter: 500 Using aqo: true AQO mode: LEARN JOINS: 0 @@ -104,7 +108,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 < 10 AND x2 < 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; result ------------------------------------------------------------ Hash Join (actual rows=50000 loops=1) @@ -129,7 +133,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 > 2 AND x2 > 2 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; result ------------------------------------------------------------ Hash Join (actual rows=70000 loops=1) @@ -154,7 +158,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 > 5 AND x2 > 5 AND x3 < 10 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; result ------------------------------------------------------------------- Hash Join (actual rows=40000 loops=1) @@ -179,7 +183,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND x3 < 10 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; result ------------------------------------------------------------------- Hash Join (actual rows=50000 loops=1) @@ -205,7 +209,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 4 AND x3 < 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; result ------------------------------------------------------------------ Hash Join (actual rows=40000 loops=1) @@ -230,216 +234,315 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ---------------------------------------------------------------- - HashAggregate (actual rows=2 loops=1) +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +--------------------------------------------------------------------- + Group (actual rows=2 loops=1) AQO not used Output: a.x1 Group Key: a.x1 - -> Nested Loop (actual rows=200000 loops=1) + -> Sort (actual rows=200000 loops=1) AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=200 loops=1) + Sort Key: a.x1 + -> Nested Loop (actual rows=200000 loops=1) AQO not used - Output: a.x1, a.x2, a.x3 - Filter: ((a.x1 < 4) AND (a.x3 > 1)) - Rows Removed by Filter: 800 - -> Seq Scan on public.b (actual rows=1000 loops=200) - AQO: rows=1000, error=0% - Output: b.y1, b.y2, b.y3 + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=1000 loops=200) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 Using aqo: true AQO mode: LEARN JOINS: 1 -(18 rows) +(22 rows) --query contains nodes that have already been predicted SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ---------------------------------------------------------------- - HashAggregate (actual rows=2 loops=1) - AQO: rows=2, error=0% +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +--------------------------------------------------------------------- + Group (actual rows=2 loops=1) + AQO not used Output: a.x1 Group Key: a.x1 - -> Nested Loop (actual rows=200000 loops=1) - AQO: rows=200000, error=0% + -> Sort (actual rows=200000 loops=1) + AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=200 loops=1) - AQO: rows=200, error=0% - Output: a.x1, a.x2, a.x3 - Filter: ((a.x1 < 4) AND (a.x3 > 1)) - Rows Removed by Filter: 800 - -> Seq Scan on public.b (actual rows=1000 loops=200) - AQO: rows=1000, error=0% - Output: b.y1, b.y2, b.y3 + Sort Key: a.x1 + -> Nested Loop (actual rows=200000 loops=1) + AQO: rows=200000, error=0% + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) + AQO: rows=200, error=0% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=1000 loops=200) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 Using aqo: true AQO mode: LEARN JOINS: 1 -(18 rows) +(22 rows) SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 2 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ---------------------------------------------------------------- - HashAggregate (actual rows=1 loops=1) - AQO: rows=2, error=50% +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +--------------------------------------------------------------------- + Group (actual rows=1 loops=1) + AQO not used Output: a.x1 Group Key: a.x1 - -> Nested Loop (actual rows=100000 loops=1) - AQO: rows=200000, error=50% + -> Sort (actual rows=100000 loops=1) + AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=100 loops=1) - AQO: rows=200, error=50% - Output: a.x1, a.x2, a.x3 - Filter: ((a.x1 < 4) AND (a.x3 > 2)) - Rows Removed by Filter: 900 - -> Seq Scan on public.b (actual rows=1000 loops=100) - AQO: rows=1000, error=0% - Output: b.y1, b.y2, b.y3 + Sort Key: a.x1 + -> Nested Loop (actual rows=100000 loops=1) + AQO: rows=200000, error=50% + Output: a.x1 + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=200, error=50% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x3 > 2)) + Rows Removed by Filter: 900 + -> Seq Scan on public.b (actual rows=1000 loops=100) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 Using aqo: true AQO mode: LEARN JOINS: 1 -(18 rows) +(22 rows) SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 5 AND x3 > 1 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ------------------------------------------------------------------- - HashAggregate (actual rows=1 loops=1) +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------ + Group (actual rows=1 loops=1) AQO not used Output: a.x1 Group Key: a.x1 - -> Nested Loop (actual rows=100000 loops=1) + -> Sort (actual rows=100000 loops=1) AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=100 loops=1) + Sort Key: a.x1 + -> Nested Loop (actual rows=100000 loops=1) AQO not used - Output: a.x1, a.x2, a.x3 - Filter: ((a.x1 < 3) AND (a.x2 < 5) AND (a.x3 > 1)) - Rows Removed by Filter: 900 - -> Seq Scan on public.b (actual rows=1000 loops=100) - AQO: rows=1000, error=0% - Output: b.y1, b.y2, b.y3 + Output: a.x1 + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 3) AND (a.x2 < 5) AND (a.x3 > 1)) + Rows Removed by Filter: 900 + -> Seq Scan on public.b (actual rows=1000 loops=100) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 Using aqo: true AQO mode: LEARN JOINS: 1 -(18 rows) +(22 rows) SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 > 1 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ------------------------------------------------------------------- - HashAggregate (actual rows=2 loops=1) +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------ + Group (actual rows=2 loops=1) AQO not used Output: a.x1 Group Key: a.x1 - -> Nested Loop (actual rows=200000 loops=1) + -> Sort (actual rows=200000 loops=1) AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=200 loops=1) + Sort Key: a.x1 + -> Nested Loop (actual rows=200000 loops=1) AQO not used - Output: a.x1, a.x2, a.x3 - Filter: ((a.x1 > 1) AND (a.x2 < 4) AND (a.x3 > 1)) - Rows Removed by Filter: 800 - -> Seq Scan on public.b (actual rows=1000 loops=200) - AQO: rows=1000, error=0% - Output: b.y1, b.y2, b.y3 + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 > 1) AND (a.x2 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=1000 loops=200) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 Using aqo: true AQO mode: LEARN JOINS: 1 -(18 rows) +(22 rows) SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 < 5 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ------------------------------------------------------------------- - HashAggregate (actual rows=2 loops=1) +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------ + Group (actual rows=2 loops=1) AQO not used Output: a.x1 Group Key: a.x1 - -> Nested Loop (actual rows=200000 loops=1) + -> Sort (actual rows=200000 loops=1) AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=200 loops=1) + Sort Key: a.x1 + -> Nested Loop (actual rows=200000 loops=1) AQO not used - Output: a.x1, a.x2, a.x3 - Filter: ((a.x1 > 1) AND (a.x2 < 4) AND (a.x3 < 5)) - Rows Removed by Filter: 800 - -> Seq Scan on public.b (actual rows=1000 loops=200) - AQO: rows=1000, error=0% - Output: b.y1, b.y2, b.y3 + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 > 1) AND (a.x2 < 4) AND (a.x3 < 5)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=1000 loops=200) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 Using aqo: true AQO mode: LEARN JOINS: 1 -(18 rows) +(22 rows) SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x2 < 5 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ------------------------------------------------------------------- - HashAggregate (actual rows=2 loops=1) +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------ + Group (actual rows=2 loops=1) AQO not used Output: a.x1 Group Key: a.x1 - -> Nested Loop (actual rows=140000 loops=1) + -> Sort (actual rows=140000 loops=1) AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=200 loops=1) - AQO: rows=100, error=-100% - Output: a.x1, a.x2, a.x3 - Filter: ((a.x1 < 4) AND (a.x2 < 5) AND (a.x3 > 1)) - Rows Removed by Filter: 800 - -> Seq Scan on public.b (actual rows=700 loops=200) + Sort Key: a.x1 + -> Nested Loop (actual rows=140000 loops=1) AQO not used - Output: b.y1, b.y2, b.y3 - Filter: (b.y1 > 2) - Rows Removed by Filter: 300 + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) + AQO: rows=100, error=-100% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x2 < 5) AND (a.x3 > 1)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=700 loops=200) + AQO not used + Output: b.y1, b.y2, b.y3 + Filter: (b.y1 > 2) + Rows Removed by Filter: 300 Using aqo: true AQO mode: LEARN JOINS: 1 -(20 rows) +(24 rows) --query contains nodes that have already been predicted SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 4 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ------------------------------------------------------------------- - HashAggregate (actual rows=1 loops=1) - AQO: rows=2, error=50% +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------ + Group (actual rows=1 loops=1) + AQO not used Output: a.x1 Group Key: a.x1 - -> Nested Loop (actual rows=70000 loops=1) - AQO: rows=140000, error=50% + -> Sort (actual rows=70000 loops=1) + AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=100 loops=1) - AQO: rows=200, error=50% + Sort Key: a.x1 + -> Nested Loop (actual rows=70000 loops=1) + AQO: rows=140000, error=50% + Output: a.x1 + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=200, error=50% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 3) AND (a.x2 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 900 + -> Seq Scan on public.b (actual rows=700 loops=100) + AQO: rows=700, error=0% + Output: b.y1, b.y2, b.y3 + Filter: (b.y1 > 2) + Rows Removed by Filter: 300 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(24 rows) + +CREATE TABLE c (z1 int, z2 int, z3 int); +INSERT INTO c (z1, z2, z3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; +SELECT str AS result +FROM expln(' +SELECT * FROM (a LEFT JOIN b ON a.x1 = b.y1) sc WHERE +not exists (SELECT z1 FROM c WHERE sc.x1=c.z1 );') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------- + Hash Left Join (actual rows=0 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3, b.y1, b.y2, b.y3 + Hash Cond: (a.x1 = b.y1) + -> Hash Anti Join (actual rows=0 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Hash Cond: (a.x1 = c.z1) + -> Seq Scan on public.a (actual rows=1000 loops=1) + AQO not used Output: a.x1, a.x2, a.x3 - Filter: ((a.x1 < 3) AND (a.x2 < 4) AND (a.x3 > 1)) - Rows Removed by Filter: 900 - -> Seq Scan on public.b (actual rows=700 loops=100) - AQO: rows=700, error=0% + -> Hash (actual rows=1000 loops=1) + Output: c.z1 + -> Seq Scan on public.c (actual rows=1000 loops=1) + AQO not used + Output: c.z1 + -> Hash (never executed) + Output: b.y1, b.y2, b.y3 + -> Seq Scan on public.b (never executed) + AQO: rows=1000 Output: b.y1, b.y2, b.y3 - Filter: (b.y1 > 2) - Rows Removed by Filter: 300 Using aqo: true AQO mode: LEARN JOINS: 1 -(20 rows) +(24 rows) + +SELECT str AS result +FROM expln(' +SELECT * FROM (A LEFT JOIN B ON A.x1 = B.y1) sc left join C on sc.x1=C.z1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------- + Hash Right Join (actual rows=10000000 loops=1) + AQO: rows=1, error=-999999900% + Output: a.x1, a.x2, a.x3, b.y1, b.y2, b.y3, c.z1, c.z2, c.z3 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=100000 loops=1) + Output: a.x1, a.x2, a.x3, c.z1, c.z2, c.z3 + -> Hash Left Join (actual rows=100000 loops=1) + AQO: rows=1, error=-9999900% + Output: a.x1, a.x2, a.x3, c.z1, c.z2, c.z3 + Hash Cond: (a.x1 = c.z1) + -> Seq Scan on public.a (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: a.x1, a.x2, a.x3 + -> Hash (actual rows=1000 loops=1) + Output: c.z1, c.z2, c.z3 + -> Seq Scan on public.c (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: c.z1, c.z2, c.z3 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(24 rows) SELECT 1 FROM aqo_reset(); ?column? @@ -449,4 +552,5 @@ SELECT 1 FROM aqo_reset(); DROP TABLE a; DROP TABLE b; +DROP FUNCTION expln; DROP EXTENSION aqo CASCADE; diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index 5a348cd5..5dc85b7b 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -3,8 +3,9 @@ SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; set aqo.show_hash = 'off'; -SET aqo.k_neighbors_threshold_for_predict = 1; - +SET aqo.k_neighbors_threshold = 1; +SET enable_nestloop = 'off'; +SET enable_mergejoin = 'off'; SET enable_material = 'off'; DROP TABLE IF EXISTS a,b CASCADE; @@ -35,92 +36,107 @@ $$ LANGUAGE PLPGSQL; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 = 5 AND x2 = 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' -SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +SELECT x1,y1 FROM A LEFT JOIN b ON A.x1 = B.y1 WHERE x1 = 5 AND x2 = 5;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' -SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 = 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; --query contains nodes that have already been predicted SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 < 10 AND x2 < 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 > 2 AND x2 > 2 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 > 5 AND x2 > 5 AND x3 < 10 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND x3 < 10 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; --query contains nodes that have already been predicted SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 4 AND x3 < 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; --query contains nodes that have already been predicted SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 2 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 5 AND x3 > 1 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 > 1 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 < 5 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x2 < 5 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; --query contains nodes that have already been predicted SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 4 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +CREATE TABLE c (z1 int, z2 int, z3 int); +INSERT INTO c (z1, z2, z3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; + +SELECT str AS result +FROM expln(' +SELECT * FROM (a LEFT JOIN b ON a.x1 = b.y1) sc WHERE +not exists (SELECT z1 FROM c WHERE sc.x1=c.z1 );') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +SELECT str AS result +FROM expln(' +SELECT * FROM (A LEFT JOIN B ON A.x1 = B.y1) sc left join C on sc.x1=C.z1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT 1 FROM aqo_reset(); DROP TABLE a; DROP TABLE b; +DROP FUNCTION expln; DROP EXTENSION aqo CASCADE; diff --git a/storage.c b/storage.c index 9b92088e..32446d6c 100644 --- a/storage.c +++ b/storage.c @@ -110,7 +110,7 @@ PG_FUNCTION_INFO_V1(aqo_execution_time); bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids) { - return load_aqo_data(fs, fss, data, reloids, false); + return load_aqo_data(fs, fss, data, reloids, false, NULL); } bool From 124e9aef7d3c2ff90f7b05c306614c2b01097047 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Fri, 20 Jan 2023 18:24:04 +0300 Subject: [PATCH 086/134] Add delete table c after finished look-a-like test. --- expected/look_a_like.out | 1 + sql/look_a_like.sql | 1 + 2 files changed, 2 insertions(+) diff --git a/expected/look_a_like.out b/expected/look_a_like.out index 5910c8ac..8b2e315c 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -552,5 +552,6 @@ SELECT 1 FROM aqo_reset(); DROP TABLE a; DROP TABLE b; +DROP TABLE c; DROP FUNCTION expln; DROP EXTENSION aqo CASCADE; diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index 5dc85b7b..5a41c24a 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -138,5 +138,6 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L SELECT 1 FROM aqo_reset(); DROP TABLE a; DROP TABLE b; +DROP TABLE c; DROP FUNCTION expln; DROP EXTENSION aqo CASCADE; From 1b155f2b1fa3cca0ac964780b1a3e7a3f1ea74fc Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Tue, 24 Jan 2023 20:39:04 +0300 Subject: [PATCH 087/134] Rename guc, which connected with setting minimum number of neighbours for predicting and add more understandable explanations of guc. --- aqo.c | 6 +++--- expected/look_a_like.out | 2 +- sql/look_a_like.sql | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/aqo.c b/aqo.c index b6a9a9ca..6d125666 100644 --- a/aqo.c +++ b/aqo.c @@ -294,8 +294,8 @@ _PG_init(void) NULL ); - DefineCustomIntVariable("aqo.k_neighbors_threshold", - "Set the threshold of number of neighbors for predicting.", + DefineCustomIntVariable("aqo.min_neighbors_for_predicting", + "Set how many neighbors the cardinality prediction will be calculated", NULL, &aqo_k, 3, @@ -307,7 +307,7 @@ _PG_init(void) NULL); DefineCustomBoolVariable("aqo.predict_with_few_neighbors", - "Make prediction with less neighbors than we should have.", + "Establish the ability to make predictions with fewer neighbors than were found.", NULL, &aqo_predict_with_few_neighbors, true, diff --git a/expected/look_a_like.out b/expected/look_a_like.out index 8b2e315c..faa9b0fd 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -3,7 +3,7 @@ SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; set aqo.show_hash = 'off'; -SET aqo.k_neighbors_threshold = 1; +SET aqo.min_neighbors_for_predicting = 1; SET enable_nestloop = 'off'; SET enable_mergejoin = 'off'; SET enable_material = 'off'; diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index 5a41c24a..9705bf1a 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -3,7 +3,7 @@ SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; set aqo.show_hash = 'off'; -SET aqo.k_neighbors_threshold = 1; +SET aqo.min_neighbors_for_predicting = 1; SET enable_nestloop = 'off'; SET enable_mergejoin = 'off'; SET enable_material = 'off'; From 7b1683c83a2bf865de103275653d5c6ee3f7caae Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Tue, 20 Dec 2022 12:10:14 +0300 Subject: [PATCH 088/134] Rewriting the statement_timeout test to spend less time on its execution. unfortunately, this does not completely solve the problem of the imbalance between the cost of resources expended (namely, the duration of the test) and its usefulness, since its results are ignored. We cannot completely exclude the test from the test, since it is necessary to know about cases of test failure during the further development of the extension. --- expected/statement_timeout.out | 32 ++++++++++++++++---------------- sql/statement_timeout.sql | 26 +++++++++++++------------- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/expected/statement_timeout.out b/expected/statement_timeout.out index 77a9a641..14b2f0dc 100644 --- a/expected/statement_timeout.out +++ b/expected/statement_timeout.out @@ -17,7 +17,7 @@ BEGIN END IF; END LOOP; END; $$; -CREATE TABLE t AS SELECT * FROM generate_series(1,100) AS x; +CREATE TABLE t AS SELECT * FROM generate_series(1,50) AS x; ANALYZE t; DELETE FROM t WHERE x > 5; -- Force optimizer to make overestimated prediction. CREATE EXTENSION IF NOT EXISTS aqo; @@ -25,30 +25,30 @@ SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'off'; SET aqo.learn_statement_timeout = 'on'; -SET statement_timeout = 800; -- [0.8s] -SELECT *, pg_sleep(1) FROM t; +SET statement_timeout = 100; -- [0.1s] +SELECT *, pg_sleep(0.1) FROM t; NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); -- haven't any partial data check_estimated_rows ---------------------- - 100 + 50 (1 row) -- Don't learn because running node has smaller cardinality than an optimizer prediction -SET statement_timeout = 3500; -SELECT *, pg_sleep(1) FROM t; +SET statement_timeout = 400; +SELECT *, pg_sleep(0.1) FROM t; NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); check_estimated_rows ---------------------- - 100 + 50 (1 row) -- We have a real learning data. -SET statement_timeout = 10000; -SELECT *, pg_sleep(1) FROM t; +SET statement_timeout = 8000; +SELECT *, pg_sleep(0.1) FROM t; x | pg_sleep ---+---------- 1 | @@ -74,8 +74,8 @@ SELECT 1 FROM aqo_reset(); 1 (1 row) -SET statement_timeout = 800; -SELECT *, pg_sleep(1) FROM t; -- Not learned +SET statement_timeout = 100; +SELECT *, pg_sleep(0.1) FROM t; -- Not learned NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); @@ -84,18 +84,18 @@ SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); 2 (1 row) -SET statement_timeout = 3500; -SELECT *, pg_sleep(1) FROM t; -- Learn! +SET statement_timeout = 500; +SELECT *, pg_sleep(0.1) FROM t; -- Learn! NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); check_estimated_rows ---------------------- - 3 + 2 (1 row) -SET statement_timeout = 5500; -SELECT *, pg_sleep(1) FROM t; -- Get reliable data +SET statement_timeout = 800; +SELECT *, pg_sleep(0.1) FROM t; -- Get reliable data x | pg_sleep ---+---------- 1 | diff --git a/sql/statement_timeout.sql b/sql/statement_timeout.sql index 60ae7a14..b0ebb6ba 100644 --- a/sql/statement_timeout.sql +++ b/sql/statement_timeout.sql @@ -18,7 +18,7 @@ BEGIN END LOOP; END; $$; -CREATE TABLE t AS SELECT * FROM generate_series(1,100) AS x; +CREATE TABLE t AS SELECT * FROM generate_series(1,50) AS x; ANALYZE t; DELETE FROM t WHERE x > 5; -- Force optimizer to make overestimated prediction. @@ -28,18 +28,18 @@ SET aqo.mode = 'learn'; SET aqo.show_details = 'off'; SET aqo.learn_statement_timeout = 'on'; -SET statement_timeout = 800; -- [0.8s] -SELECT *, pg_sleep(1) FROM t; +SET statement_timeout = 100; -- [0.1s] +SELECT *, pg_sleep(0.1) FROM t; SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); -- haven't any partial data -- Don't learn because running node has smaller cardinality than an optimizer prediction -SET statement_timeout = 3500; -SELECT *, pg_sleep(1) FROM t; +SET statement_timeout = 400; +SELECT *, pg_sleep(0.1) FROM t; SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); -- We have a real learning data. -SET statement_timeout = 10000; -SELECT *, pg_sleep(1) FROM t; +SET statement_timeout = 8000; +SELECT *, pg_sleep(0.1) FROM t; SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); -- Force to make an underestimated prediction @@ -48,16 +48,16 @@ ANALYZE t; INSERT INTO t (x) (SELECT * FROM generate_series(3,5) AS x); SELECT 1 FROM aqo_reset(); -SET statement_timeout = 800; -SELECT *, pg_sleep(1) FROM t; -- Not learned +SET statement_timeout = 100; +SELECT *, pg_sleep(0.1) FROM t; -- Not learned SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); -SET statement_timeout = 3500; -SELECT *, pg_sleep(1) FROM t; -- Learn! +SET statement_timeout = 500; +SELECT *, pg_sleep(0.1) FROM t; -- Learn! SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); -SET statement_timeout = 5500; -SELECT *, pg_sleep(1) FROM t; -- Get reliable data +SET statement_timeout = 800; +SELECT *, pg_sleep(0.1) FROM t; -- Get reliable data SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); -- Interrupted query should immediately appear in aqo_data From cc2b98b64f4d81ac8b78c6fdddd40f100c5b0c03 Mon Sep 17 00:00:00 2001 From: Sergei Glukhov Date: Thu, 24 Nov 2022 10:32:07 +0400 Subject: [PATCH 089/134] Added functions: aqo_query_texts_update(), aqo_query_stat_update(), aqo_data_update(). Changed function to be able to insert a record: aqo_queries_update(). --- aqo--1.5--1.6.sql | 47 ++++ auto_tuning.c | 6 +- expected/plancache.out | 6 + expected/update_functions.out | 476 ++++++++++++++++++++++++++++++++++ machine_learning.h | 16 ++ postprocessing.c | 14 +- preprocessing.c | 2 +- regress_schedule | 1 + sql/plancache.sql | 3 +- sql/update_functions.sql | 205 +++++++++++++++ storage.c | 358 +++++++++++++++++++++---- storage.h | 48 +++- 12 files changed, 1127 insertions(+), 55 deletions(-) create mode 100644 expected/update_functions.out create mode 100644 sql/update_functions.sql diff --git a/aqo--1.5--1.6.sql b/aqo--1.5--1.6.sql index 4101d33d..077f11b1 100644 --- a/aqo--1.5--1.6.sql +++ b/aqo--1.5--1.6.sql @@ -30,3 +30,50 @@ AS 'MODULE_PATHNAME', 'aqo_cleanup' LANGUAGE C STRICT VOLATILE; COMMENT ON FUNCTION aqo_cleanup() IS 'Remove unneeded rows from the AQO ML storage'; + +-- +-- Update or insert an aqo_query_texts +-- table record for given 'queryid'. +-- + +CREATE FUNCTION aqo_query_texts_update( + queryid bigint, query_text text) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_query_texts_update' +LANGUAGE C VOLATILE; + +-- +-- Update or insert an aqo_query_stat +-- table record for given 'queryid'. +-- + +CREATE FUNCTION aqo_query_stat_update( + queryid bigint, + execution_time_with_aqo double precision[], + execution_time_without_aqo double precision[], + planning_time_with_aqo double precision[], + planning_time_without_aqo double precision[], + cardinality_error_with_aqo double precision[], + cardinality_error_without_aqo double precision[], + executions_with_aqo bigint, + executions_without_aqo bigint) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_query_stat_update' +LANGUAGE C VOLATILE; + +-- +-- Update or insert an aqo_data +-- table record for given 'fs' & 'fss'. +-- + +CREATE FUNCTION aqo_data_update( + fs bigint, + fss integer, + nfeatures integer, + features double precision[][], + targets double precision[], + reliability double precision[], + oids Oid[]) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_data_update' +LANGUAGE C VOLATILE; diff --git a/auto_tuning.c b/auto_tuning.c index fad245ed..cf96a2cf 100644 --- a/auto_tuning.c +++ b/auto_tuning.c @@ -202,8 +202,10 @@ automatical_query_tuning(uint64 queryid, StatEntry *stat) if (num_iterations <= auto_tuning_max_iterations || p_use > 0.5) aqo_queries_store(queryid, query_context.fspace_hash, - query_context.learn_aqo, query_context.use_aqo, true); + query_context.learn_aqo, query_context.use_aqo, true, + &aqo_queries_nulls); else aqo_queries_store(queryid, - query_context.fspace_hash, false, false, false); + query_context.fspace_hash, false, false, false, + &aqo_queries_nulls); } diff --git a/expected/plancache.out b/expected/plancache.out index edcf30e7..6874468a 100644 --- a/expected/plancache.out +++ b/expected/plancache.out @@ -44,4 +44,10 @@ SELECT * FROM f1(); DROP FUNCTION f1; DROP TABLE test CASCADE; +SELECT true FROM aqo_reset(); + bool +------ + t +(1 row) + DROP EXTENSION aqo; diff --git a/expected/update_functions.out b/expected/update_functions.out new file mode 100644 index 00000000..6a6198e5 --- /dev/null +++ b/expected/update_functions.out @@ -0,0 +1,476 @@ +CREATE TABLE aqo_test1(a int, b int); +WITH RECURSIVE t(a, b) +AS ( + VALUES (1, 2) + UNION ALL + SELECT t.a + 1, t.b + 1 FROM t WHERE t.a < 20 +) INSERT INTO aqo_test1 (SELECT * FROM t); +CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); +ANALYZE aqo_test1; +CREATE TABLE aqo_test2(a int); +WITH RECURSIVE t(a) +AS ( + VALUES (0) + UNION ALL + SELECT t.a + 1 FROM t WHERE t.a < 100000 +) INSERT INTO aqo_test2 (SELECT * FROM t); +CREATE INDEX aqo_test2_idx_a ON aqo_test2 (a); +ANALYZE aqo_test2; +CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; +SET aqo.mode='intelligent'; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; + count +------- + 20 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b, aqo_test2 c WHERE a.a = b.a AND b.a = c.a; + count +------- + 20 +(1 row) + +SET aqo.mode='learn'; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; + count +------- + 20 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; + count +------- + 20 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; + count +------- + 20 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 10; + count +------- + 10 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 10 and b.a > 200; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 11 and b.a > 200; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 12 and b.a > 200; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 14 and b.a > 200; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +b.a > 300 and b.a < 500; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +b.a > 300 and b.a < 500; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +b.a > 300 and b.a < 500; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); + count +------- + 0 +(1 row) + +SET aqo.mode='controlled'; +CREATE TABLE aqo_query_texts_dump AS SELECT * FROM aqo_query_texts; +CREATE TABLE aqo_queries_dump AS SELECT * FROM aqo_queries; +CREATE TABLE aqo_query_stat_dump AS SELECT * FROM aqo_query_stat; +CREATE TABLE aqo_data_dump AS SELECT * FROM aqo_data; +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 +(1 row) + +-- +-- aqo_query_texts_update() testing. +-- +-- Populate aqo_query_texts with dump data. +SELECT aqo_query_texts_update(queryid, query_text) AS res +FROM aqo_query_texts_dump +ORDER BY res; + res +----- + f + t + t + t + t + t + t +(7 rows) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_texts EXCEPT TABLE aqo_query_texts_dump) +UNION ALL +(TABLE aqo_query_texts_dump EXCEPT TABLE aqo_query_texts); + queryid | query_text +---------+------------ +(0 rows) + +-- Update aqo_query_texts with dump data. +SELECT aqo_query_texts_update(queryid, query_text) AS res +FROM aqo_query_texts_dump +ORDER BY res; + res +----- + f + t + t + t + t + t + t +(7 rows) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_texts EXCEPT TABLE aqo_query_texts_dump) +UNION ALL +(TABLE aqo_query_texts_dump EXCEPT TABLE aqo_query_texts); + queryid | query_text +---------+------------ +(0 rows) + +-- +-- aqo_queries_update testing. +-- +-- Populate aqo_queries with dump data. +SELECT aqo_queries_update(queryid, fs, learn_aqo, use_aqo, auto_tuning) AS res +FROM aqo_queries_dump +ORDER BY res; + res +----- + f + t + t + t + t + t + t +(7 rows) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_queries_dump EXCEPT TABLE aqo_queries) +UNION ALL +(TABLE aqo_queries EXCEPT TABLE aqo_queries_dump); + queryid | fs | learn_aqo | use_aqo | auto_tuning +---------+----+-----------+---------+------------- +(0 rows) + +-- Update aqo_queries with dump data. +SELECT aqo_queries_update(queryid, fs, learn_aqo, use_aqo, auto_tuning) AS res +FROM aqo_queries_dump +ORDER BY res; + res +----- + f + t + t + t + t + t + t +(7 rows) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_queries_dump EXCEPT TABLE aqo_queries) +UNION ALL +(TABLE aqo_queries EXCEPT TABLE aqo_queries_dump); + queryid | fs | learn_aqo | use_aqo | auto_tuning +---------+----+-----------+---------+------------- +(0 rows) + +-- +-- aqo_query_stat_update() testing. +-- +-- Populate aqo_query_stat with dump data. +SELECT aqo_query_stat_update(queryid, execution_time_with_aqo, +execution_time_without_aqo, planning_time_with_aqo, planning_time_without_aqo, +cardinality_error_with_aqo, cardinality_error_without_aqo, executions_with_aqo, +executions_without_aqo) AS res +FROM aqo_query_stat_dump +ORDER BY res; + res +----- + t + t + t + t + t + t +(6 rows) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_stat_dump EXCEPT TABLE aqo_query_stat) +UNION ALL +(TABLE aqo_query_stat EXCEPT TABLE aqo_query_stat_dump); + queryid | execution_time_with_aqo | execution_time_without_aqo | planning_time_with_aqo | planning_time_without_aqo | cardinality_error_with_aqo | cardinality_error_without_aqo | executions_with_aqo | executions_without_aqo +---------+-------------------------+----------------------------+------------------------+---------------------------+----------------------------+-------------------------------+---------------------+------------------------ +(0 rows) + +-- Update aqo_query_stat with dump data. +SELECT aqo_query_stat_update(queryid, execution_time_with_aqo, +execution_time_without_aqo, planning_time_with_aqo, planning_time_without_aqo, +cardinality_error_with_aqo, cardinality_error_without_aqo, executions_with_aqo, +executions_without_aqo) AS res +FROM aqo_query_stat_dump +ORDER BY res; + res +----- + t + t + t + t + t + t +(6 rows) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_stat_dump EXCEPT TABLE aqo_query_stat) +UNION ALL +(TABLE aqo_query_stat EXCEPT TABLE aqo_query_stat_dump); + queryid | execution_time_with_aqo | execution_time_without_aqo | planning_time_with_aqo | planning_time_without_aqo | cardinality_error_with_aqo | cardinality_error_without_aqo | executions_with_aqo | executions_without_aqo +---------+-------------------------+----------------------------+------------------------+---------------------------+----------------------------+-------------------------------+---------------------+------------------------ +(0 rows) + +-- +-- aqo_data_update() testing. +-- +-- Populate aqo_data with dump data. +SELECT aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS res +FROM aqo_data_dump +ORDER BY res; + res +----- + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t +(27 rows) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_data_dump EXCEPT TABLE aqo_data) +UNION ALL +(TABLE aqo_data EXCEPT TABLE aqo_data_dump); + fs | fss | nfeatures | features | targets | reliability | oids +----+-----+-----------+----------+---------+-------------+------ +(0 rows) + +-- Update aqo_data with dump data. +SELECT aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS res +FROM aqo_data_dump +ORDER BY res; + res +----- + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t +(27 rows) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_data_dump EXCEPT TABLE aqo_data) +UNION ALL +(TABLE aqo_data EXCEPT TABLE aqo_data_dump); + fs | fss | nfeatures | features | targets | reliability | oids +----+-----+-----------+----------+---------+-------------+------ +(0 rows) + +-- Reject aqo_query_stat_update if there is NULL elements in array arg. +SELECT aqo_query_stat_update(1, '{NULL, 1}', '{1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', 1, 1); + aqo_query_stat_update +----------------------- + f +(1 row) + +-- Reject aqo_query_stat_update if arrays don't have the same size. +SELECT aqo_query_stat_update(1, '{1, 1}', '{1, 1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', 1, 1); + aqo_query_stat_update +----------------------- + f +(1 row) + +-- Reject aqo_query_stat_update if there are negative executions. +SELECT aqo_query_stat_update(1, '{1, 1}', '{1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', -1, 1); + aqo_query_stat_update +----------------------- + f +(1 row) + +SELECT aqo_query_stat_update(1, '{1, 1}', '{1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', 1, -1); + aqo_query_stat_update +----------------------- + f +(1 row) + +-- Reject aqo_query_data_update if number of matrix columns and nfeatures +-- are different. +SELECT aqo_data_update(1, 1, 0, '{{1}}', '{1, 1}', '{1, 1}', '{1, 2, 3}'); + aqo_data_update +----------------- + f +(1 row) + +-- Reject aqo_query_data_update if there is NULL elements in array arg. +SELECT aqo_data_update(1, 1, 1, '{{NULL}}', '{1}', '{1}', '{1, 2, 3}'); + aqo_data_update +----------------- + f +(1 row) + +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{NULL}', '{1}', '{1, 2, 3}'); + aqo_data_update +----------------- + f +(1 row) + +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1}', '{NULL}', '{1, 2, 3}'); + aqo_data_update +----------------- + f +(1 row) + +-- Reject aqo_query_data_update if Oids is NULL. +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1}', '{1}', NULL); + aqo_data_update +----------------- + f +(1 row) + +-- Reject aqo_query_data_update if arrays don't have the same number of rows. +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1, 1}', '{1}', '{1, 2, 3}'); + aqo_data_update +----------------- + f +(1 row) + +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1}', '{1, 1}', '{1, 2, 3}'); + aqo_data_update +----------------- + f +(1 row) + +SELECT aqo_data_update(1, 1, 1, '{{1}, {2}}', '{1}', '{1}', '{1, 2, 3}'); + aqo_data_update +----------------- + f +(1 row) + +SET aqo.mode='disabled'; +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 +(1 row) + +DROP EXTENSION aqo; +DROP TABLE aqo_test1, aqo_test2; +DROP TABLE aqo_query_texts_dump, aqo_queries_dump, aqo_query_stat_dump, aqo_data_dump; diff --git a/machine_learning.h b/machine_learning.h index b114cade..1d6d8303 100644 --- a/machine_learning.h +++ b/machine_learning.h @@ -21,6 +21,22 @@ typedef struct OkNNrdata double rfactors[aqo_K]; } OkNNrdata; +/* + * Auxiliary struct, used for passing arguments + * to aqo_data_store() function. + */ +typedef struct AqoDataArgs +{ + int rows; /* Number of filled rows in the matrix */ + int cols; /* Number of columns in the matrix */ + int nrels; /* Number of oids */ + + double **matrix; /* Pointer ot matrix array */ + double *targets; /* Pointer to array of 'targets' */ + double *rfactors; /* Pointer to array of 'rfactors' */ + Oid *oids; /* Array of relation OIDs */ +} AqoDataArgs; + extern OkNNrdata* OkNNr_allocate(int ncols); extern void OkNNr_free(OkNNrdata *data); diff --git a/postprocessing.c b/postprocessing.c index 165391dd..70688b1a 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -767,11 +767,21 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) if (query_context.collect_stat) { + /* + * aqo_stat_store() is used in 'append' mode. + * 'AqoStatArgs' fields execs_with_aqo, execs_without_aqo, + * cur_stat_slot, cur_stat_slot_aqo are not used in this + * mode and dummy values(0) are set in this case. + */ + AqoStatArgs stat_arg = { 0, 0, 0, + &execution_time, &query_context.planning_time, &cardinality_error, + 0, + &execution_time, &query_context.planning_time, &cardinality_error}; + /* Write AQO statistics to the aqo_query_stat table */ stat = aqo_stat_store(query_context.query_hash, query_context.use_aqo, - query_context.planning_time, execution_time, - cardinality_error); + &stat_arg, true); if (stat != NULL) { diff --git a/preprocessing.c b/preprocessing.c index 7b909bdf..93b61e82 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -309,7 +309,7 @@ aqo_planner(Query *parse, */ if (aqo_queries_store(query_context.query_hash, query_context.fspace_hash, query_context.learn_aqo, query_context.use_aqo, - query_context.auto_tuning)) + query_context.auto_tuning, &aqo_queries_nulls)) { /* * Add query text into the ML-knowledge base. Just for further diff --git a/regress_schedule b/regress_schedule index 418e14ec..76a2e00e 100644 --- a/regress_schedule +++ b/regress_schedule @@ -12,6 +12,7 @@ test: unsupported test: clean_aqo_data test: parallel_workers test: plancache +test: update_functions # Performance-dependent test. Can be ignored if executes in containers or on slow machines ignore: statement_timeout test: statement_timeout diff --git a/sql/plancache.sql b/sql/plancache.sql index 3b074b90..c9aabae7 100644 --- a/sql/plancache.sql +++ b/sql/plancache.sql @@ -44,4 +44,5 @@ SELECT * FROM f1(); DROP FUNCTION f1; DROP TABLE test CASCADE; -DROP EXTENSION aqo; \ No newline at end of file +SELECT true FROM aqo_reset(); +DROP EXTENSION aqo; diff --git a/sql/update_functions.sql b/sql/update_functions.sql new file mode 100644 index 00000000..85b711e6 --- /dev/null +++ b/sql/update_functions.sql @@ -0,0 +1,205 @@ +CREATE TABLE aqo_test1(a int, b int); +WITH RECURSIVE t(a, b) +AS ( + VALUES (1, 2) + UNION ALL + SELECT t.a + 1, t.b + 1 FROM t WHERE t.a < 20 +) INSERT INTO aqo_test1 (SELECT * FROM t); +CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); +ANALYZE aqo_test1; + +CREATE TABLE aqo_test2(a int); +WITH RECURSIVE t(a) +AS ( + VALUES (0) + UNION ALL + SELECT t.a + 1 FROM t WHERE t.a < 100000 +) INSERT INTO aqo_test2 (SELECT * FROM t); +CREATE INDEX aqo_test2_idx_a ON aqo_test2 (a); +ANALYZE aqo_test2; + +CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; + +SET aqo.mode='intelligent'; + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b, aqo_test2 c WHERE a.a = b.a AND b.a = c.a; + +SET aqo.mode='learn'; + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 10; + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 10 and b.a > 200; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 11 and b.a > 200; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 12 and b.a > 200; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 14 and b.a > 200; + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +b.a > 300 and b.a < 500; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +b.a > 300 and b.a < 500; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +b.a > 300 and b.a < 500; + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); +SET aqo.mode='controlled'; + +CREATE TABLE aqo_query_texts_dump AS SELECT * FROM aqo_query_texts; +CREATE TABLE aqo_queries_dump AS SELECT * FROM aqo_queries; +CREATE TABLE aqo_query_stat_dump AS SELECT * FROM aqo_query_stat; +CREATE TABLE aqo_data_dump AS SELECT * FROM aqo_data; + +SELECT 1 FROM aqo_reset(); + +-- +-- aqo_query_texts_update() testing. +-- + +-- Populate aqo_query_texts with dump data. +SELECT aqo_query_texts_update(queryid, query_text) AS res +FROM aqo_query_texts_dump +ORDER BY res; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_texts EXCEPT TABLE aqo_query_texts_dump) +UNION ALL +(TABLE aqo_query_texts_dump EXCEPT TABLE aqo_query_texts); + +-- Update aqo_query_texts with dump data. +SELECT aqo_query_texts_update(queryid, query_text) AS res +FROM aqo_query_texts_dump +ORDER BY res; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_texts EXCEPT TABLE aqo_query_texts_dump) +UNION ALL +(TABLE aqo_query_texts_dump EXCEPT TABLE aqo_query_texts); + +-- +-- aqo_queries_update testing. +-- + +-- Populate aqo_queries with dump data. +SELECT aqo_queries_update(queryid, fs, learn_aqo, use_aqo, auto_tuning) AS res +FROM aqo_queries_dump +ORDER BY res; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_queries_dump EXCEPT TABLE aqo_queries) +UNION ALL +(TABLE aqo_queries EXCEPT TABLE aqo_queries_dump); + +-- Update aqo_queries with dump data. +SELECT aqo_queries_update(queryid, fs, learn_aqo, use_aqo, auto_tuning) AS res +FROM aqo_queries_dump +ORDER BY res; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_queries_dump EXCEPT TABLE aqo_queries) +UNION ALL +(TABLE aqo_queries EXCEPT TABLE aqo_queries_dump); + +-- +-- aqo_query_stat_update() testing. +-- + +-- Populate aqo_query_stat with dump data. +SELECT aqo_query_stat_update(queryid, execution_time_with_aqo, +execution_time_without_aqo, planning_time_with_aqo, planning_time_without_aqo, +cardinality_error_with_aqo, cardinality_error_without_aqo, executions_with_aqo, +executions_without_aqo) AS res +FROM aqo_query_stat_dump +ORDER BY res; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_stat_dump EXCEPT TABLE aqo_query_stat) +UNION ALL +(TABLE aqo_query_stat EXCEPT TABLE aqo_query_stat_dump); + +-- Update aqo_query_stat with dump data. +SELECT aqo_query_stat_update(queryid, execution_time_with_aqo, +execution_time_without_aqo, planning_time_with_aqo, planning_time_without_aqo, +cardinality_error_with_aqo, cardinality_error_without_aqo, executions_with_aqo, +executions_without_aqo) AS res +FROM aqo_query_stat_dump +ORDER BY res; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_stat_dump EXCEPT TABLE aqo_query_stat) +UNION ALL +(TABLE aqo_query_stat EXCEPT TABLE aqo_query_stat_dump); + +-- +-- aqo_data_update() testing. +-- + +-- Populate aqo_data with dump data. +SELECT aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS res +FROM aqo_data_dump +ORDER BY res; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_data_dump EXCEPT TABLE aqo_data) +UNION ALL +(TABLE aqo_data EXCEPT TABLE aqo_data_dump); + +-- Update aqo_data with dump data. +SELECT aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS res +FROM aqo_data_dump +ORDER BY res; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_data_dump EXCEPT TABLE aqo_data) +UNION ALL +(TABLE aqo_data EXCEPT TABLE aqo_data_dump); + + +-- Reject aqo_query_stat_update if there is NULL elements in array arg. +SELECT aqo_query_stat_update(1, '{NULL, 1}', '{1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', 1, 1); + +-- Reject aqo_query_stat_update if arrays don't have the same size. +SELECT aqo_query_stat_update(1, '{1, 1}', '{1, 1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', 1, 1); + +-- Reject aqo_query_stat_update if there are negative executions. +SELECT aqo_query_stat_update(1, '{1, 1}', '{1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', -1, 1); +SELECT aqo_query_stat_update(1, '{1, 1}', '{1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', 1, -1); + +-- Reject aqo_query_data_update if number of matrix columns and nfeatures +-- are different. +SELECT aqo_data_update(1, 1, 0, '{{1}}', '{1, 1}', '{1, 1}', '{1, 2, 3}'); + +-- Reject aqo_query_data_update if there is NULL elements in array arg. +SELECT aqo_data_update(1, 1, 1, '{{NULL}}', '{1}', '{1}', '{1, 2, 3}'); +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{NULL}', '{1}', '{1, 2, 3}'); +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1}', '{NULL}', '{1, 2, 3}'); + +-- Reject aqo_query_data_update if Oids is NULL. +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1}', '{1}', NULL); + +-- Reject aqo_query_data_update if arrays don't have the same number of rows. +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1, 1}', '{1}', '{1, 2, 3}'); +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1}', '{1, 1}', '{1, 2, 3}'); +SELECT aqo_data_update(1, 1, 1, '{{1}, {2}}', '{1}', '{1}', '{1, 2, 3}'); + +SET aqo.mode='disabled'; +SELECT 1 FROM aqo_reset(); +DROP EXTENSION aqo; + +DROP TABLE aqo_test1, aqo_test2; +DROP TABLE aqo_query_texts_dump, aqo_queries_dump, aqo_query_stat_dump, aqo_data_dump; diff --git a/storage.c b/storage.c index 32446d6c..02c8e0ca 100644 --- a/storage.c +++ b/storage.c @@ -78,6 +78,12 @@ HTAB *deactivated_queries = NULL; static const uint32 PGAQO_FILE_HEADER = 123467589; static const uint32 PGAQO_PG_MAJOR_VERSION = PG_VERSION_NUM / 100; +/* + * Used for internal aqo_queries_store() calls. + * No NULL arguments expected in this case. + */ +AqoQueriesNullArgs aqo_queries_nulls = { false, false, false, false }; + static ArrayType *form_matrix(double *matrix, int nrows, int ncols); static void dsa_init(void); @@ -105,6 +111,9 @@ PG_FUNCTION_INFO_V1(aqo_cleanup); PG_FUNCTION_INFO_V1(aqo_drop_class); PG_FUNCTION_INFO_V1(aqo_cardinality_error); PG_FUNCTION_INFO_V1(aqo_execution_time); +PG_FUNCTION_INFO_V1(aqo_query_texts_update); +PG_FUNCTION_INFO_V1(aqo_query_stat_update); +PG_FUNCTION_INFO_V1(aqo_data_update); bool @@ -116,7 +125,15 @@ load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids) bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids) { - return aqo_data_store(fs, fss, data, reloids); + /* + * 'reloids' explictly passed to aqo_data_store(). + * So AqoDataArgs fields 'nrels' & 'oids' are + * set to 0 and NULL repectively. + */ + AqoDataArgs data_arg = + {data->rows, data->cols, 0, data->matrix, + data->targets, data->rfactors, NULL}; + return aqo_data_store(fs, fss, &data_arg, reloids); } /* @@ -210,8 +227,8 @@ add_deactivated_query(uint64 queryid) * If stat hash table is full, return NULL and log this fact. */ StatEntry * -aqo_stat_store(uint64 queryid, bool use_aqo, - double plan_time, double exec_time, double est_error) +aqo_stat_store(uint64 queryid, bool use_aqo, AqoStatArgs *stat_arg, + bool append_mode) { StatEntry *entry; bool found; @@ -250,6 +267,34 @@ aqo_stat_store(uint64 queryid, bool use_aqo, entry->queryid = qid; } + if (!append_mode) + { + size_t sz; + if (found) + { + memset(entry, 0, sizeof(StatEntry)); + entry->queryid = queryid; + } + + sz = stat_arg->cur_stat_slot_aqo * sizeof(entry->est_error_aqo[0]); + memcpy(entry->plan_time_aqo, stat_arg->plan_time_aqo, sz); + memcpy(entry->exec_time_aqo, stat_arg->exec_time_aqo, sz); + memcpy(entry->est_error_aqo, stat_arg->est_error_aqo, sz); + entry->execs_with_aqo = stat_arg->execs_with_aqo; + entry->cur_stat_slot_aqo = stat_arg->cur_stat_slot_aqo; + + sz = stat_arg->cur_stat_slot * sizeof(entry->est_error[0]); + memcpy(entry->plan_time, stat_arg->plan_time, sz); + memcpy(entry->exec_time, stat_arg->exec_time, sz); + memcpy(entry->est_error, stat_arg->est_error, sz); + entry->execs_without_aqo = stat_arg->execs_without_aqo; + entry->cur_stat_slot = stat_arg->cur_stat_slot; + + aqo_state->stat_changed = true; + LWLockRelease(&aqo_state->stat_lock); + return entry; + } + /* Update the entry data */ if (use_aqo) @@ -269,9 +314,9 @@ aqo_stat_store(uint64 queryid, bool use_aqo, } entry->execs_with_aqo++; - entry->plan_time_aqo[pos] = plan_time; - entry->exec_time_aqo[pos] = exec_time; - entry->est_error_aqo[pos] = est_error; + entry->plan_time_aqo[pos] = *stat_arg->plan_time_aqo; + entry->exec_time_aqo[pos] = *stat_arg->exec_time_aqo; + entry->est_error_aqo[pos] = *stat_arg->est_error_aqo; } else { @@ -290,9 +335,9 @@ aqo_stat_store(uint64 queryid, bool use_aqo, } entry->execs_without_aqo++; - entry->plan_time[pos] = plan_time; - entry->exec_time[pos] = exec_time; - entry->est_error[pos] = est_error; + entry->plan_time[pos] = *stat_arg->plan_time; + entry->exec_time[pos] = *stat_arg->exec_time; + entry->est_error[pos] = *stat_arg->est_error; } entry = memcpy(palloc(sizeof(StatEntry)), entry, sizeof(StatEntry)); @@ -865,7 +910,7 @@ aqo_queries_load(void) LWLockRelease(&aqo_state->queries_lock); if (!found) { - if (!aqo_queries_store(0, 0, 0, 0, 0)) + if (!aqo_queries_store(0, 0, 0, 0, 0, &aqo_queries_nulls)) elog(PANIC, "[AQO] aqo_queries initialization was unsuccessful"); } } @@ -1279,7 +1324,7 @@ _compute_data_dsa(const DataEntry *entry) * Return true if data was changed. */ bool -aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) +aqo_data_store(uint64 fs, int fss, AqoDataArgs *data, List *reloids) { DataEntry *entry; bool found; @@ -1291,6 +1336,13 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) bool tblOverflow; HASHACTION action; bool result; + /* + * We should distinguish incoming data between internally + * passed structured data(reloids) and externaly + * passed data(plain arrays) from aqo_data_update() function. + */ + bool is_raw_data = (reloids == NULL); + int nrels = is_raw_data ? data->nrels : list_length(reloids); Assert(!LWLockHeldByMe(&aqo_state->data_lock)); @@ -1323,7 +1375,7 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) entry->cols = data->cols; entry->rows = data->rows; - entry->nrels = list_length(reloids); + entry->nrels = nrels; size = _compute_data_dsa(entry); entry->data_dp = dsa_allocate0(data_dsa, size); @@ -1342,7 +1394,7 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) Assert(DsaPointerIsValid(entry->data_dp)); - if (entry->cols != data->cols || entry->nrels != list_length(reloids)) + if (entry->cols != data->cols || entry->nrels != nrels) { /* Collision happened? */ elog(LOG, "[AQO] Does a collision happened? Check it if possible (fs: " @@ -1396,14 +1448,21 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) memcpy(ptr, data->rfactors, sizeof(double) * entry->rows); ptr += sizeof(double) * entry->rows; /* store list of relations. XXX: optimize ? */ - foreach(lc, reloids) + if (is_raw_data) { - Oid reloid = lfirst_oid(lc); - - memcpy(ptr, &reloid, sizeof(Oid)); - ptr += sizeof(Oid); + memcpy(ptr, data->oids, nrels * sizeof(Oid)); + ptr += nrels * sizeof(Oid); } + else + { + foreach(lc, reloids) + { + Oid reloid = lfirst_oid(lc); + memcpy(ptr, &reloid, sizeof(Oid)); + ptr += sizeof(Oid); + } + } aqo_state->data_changed = true; end: result = aqo_state->data_changed; @@ -1860,13 +1919,19 @@ aqo_queries(PG_FUNCTION_ARGS) bool aqo_queries_store(uint64 queryid, - uint64 fs, bool learn_aqo, bool use_aqo, bool auto_tuning) + uint64 fs, bool learn_aqo, bool use_aqo, bool auto_tuning, + AqoQueriesNullArgs *null_args) { QueriesEntry *entry; bool found; bool tblOverflow; HASHACTION action; + /* Insert is allowed if no args are NULL. */ + bool safe_insert = + (!null_args->fs_is_null && !null_args->learn_aqo_is_null && + !null_args->use_aqo_is_null && !null_args->auto_tuning_is_null); + Assert(queries_htab); /* Guard for default feature space */ @@ -1877,7 +1942,7 @@ aqo_queries_store(uint64 queryid, /* Check hash table overflow */ tblOverflow = hash_get_num_entries(queries_htab) < fs_max_items ? false : true; - action = tblOverflow ? HASH_FIND : HASH_ENTER; + action = (tblOverflow || !safe_insert) ? HASH_FIND : HASH_ENTER; entry = (QueriesEntry *) hash_search(queries_htab, &queryid, action, &found); @@ -1897,11 +1962,20 @@ aqo_queries_store(uint64 queryid, return false; } - entry->fs = fs; - entry->learn_aqo = learn_aqo; - entry->use_aqo = use_aqo; - entry->auto_tuning = auto_tuning; + if (!null_args->fs_is_null) + entry->fs = fs; + if (!null_args->learn_aqo_is_null) + entry->learn_aqo = learn_aqo; + if (!null_args->use_aqo_is_null) + entry->use_aqo = use_aqo; + if (!null_args->auto_tuning_is_null) + entry->auto_tuning = auto_tuning; + if (entry->learn_aqo || entry->use_aqo || entry->auto_tuning) + /* Remove the class from cache of deactivated queries */ + hash_search(deactivated_queries, &queryid, HASH_REMOVE, NULL); + + aqo_state->queries_changed = true; aqo_state->queries_changed = true; LWLockRelease(&aqo_state->queries_lock); return true; @@ -2030,32 +2104,37 @@ aqo_queries_find(uint64 queryid, QueryContextData *ctx) Datum aqo_queries_update(PG_FUNCTION_ARGS) { - QueriesEntry *entry; - uint64 queryid = PG_GETARG_INT64(AQ_QUERYID); - bool found; + uint64 queryid; + uint64 fs = 0; + bool learn_aqo = false; + bool use_aqo = false; + bool auto_tuning = false; - if (queryid == 0) - /* Do nothing for default feature space */ - PG_RETURN_BOOL(false); + AqoQueriesNullArgs null_args = + { PG_ARGISNULL(AQ_FS), PG_ARGISNULL(AQ_LEARN_AQO), + PG_ARGISNULL(AQ_USE_AQO), PG_ARGISNULL(AQ_AUTO_TUNING) }; - LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); - entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_FIND, - &found); - if (!PG_ARGISNULL(AQ_FS)) - entry->fs = PG_GETARG_INT64(AQ_FS); - if (!PG_ARGISNULL(AQ_LEARN_AQO)) - entry->learn_aqo = PG_GETARG_BOOL(AQ_LEARN_AQO); - if (!PG_ARGISNULL(AQ_USE_AQO)) - entry->use_aqo = PG_GETARG_BOOL(AQ_USE_AQO); - if (!PG_ARGISNULL(AQ_AUTO_TUNING)) - entry->auto_tuning = PG_GETARG_BOOL(AQ_AUTO_TUNING); + if (PG_ARGISNULL(AQ_QUERYID)) + PG_RETURN_BOOL(false); - /* Remove the class from cache of deactivated queries */ - hash_search(deactivated_queries, &queryid, HASH_REMOVE, NULL); + queryid = PG_GETARG_INT64(AQ_QUERYID); + if (queryid == 0) + /* Do nothing for default feature space */ + PG_RETURN_BOOL(false); - LWLockRelease(&aqo_state->queries_lock); - PG_RETURN_BOOL(true); + if (!null_args.fs_is_null) + fs = PG_GETARG_INT64(AQ_FS); + if (!null_args.learn_aqo_is_null) + learn_aqo = PG_GETARG_BOOL(AQ_LEARN_AQO); + if (!null_args.use_aqo_is_null) + use_aqo = PG_GETARG_BOOL(AQ_USE_AQO); + if (!null_args.auto_tuning_is_null) + auto_tuning = PG_GETARG_BOOL(AQ_AUTO_TUNING); + + PG_RETURN_BOOL(aqo_queries_store(queryid, + fs, learn_aqo, use_aqo, auto_tuning, + &null_args)); } Datum @@ -2483,3 +2562,192 @@ aqo_execution_time(PG_FUNCTION_ARGS) tuplestore_donestoring(tupstore); return (Datum) 0; } + +/* + * Update AQO query text for a given queryid value. + * Return true if operation have done some changes, + * false otherwize. + */ +Datum +aqo_query_texts_update(PG_FUNCTION_ARGS) +{ + uint64 queryid; + int str_len; + text *str; + char *str_buff; + bool res = false; + + /* Do nothing if any arguments are NULLs */ + if ((PG_ARGISNULL(QT_QUERYID) || PG_ARGISNULL(QT_QUERY_STRING))) + PG_RETURN_BOOL(false); + + if (!(queryid = PG_GETARG_INT64(QT_QUERYID))) + /* Do nothing for default feature space */ + PG_RETURN_BOOL(false); + + str = PG_GETARG_TEXT_PP(QT_QUERY_STRING); + str_len = VARSIZE_ANY_EXHDR(str) + 1; + if (str_len > querytext_max_size) + str_len = querytext_max_size; + + str_buff = (char*) palloc(str_len); + text_to_cstring_buffer(str, str_buff, str_len); + res = aqo_qtext_store(queryid, str_buff); + pfree(str_buff); + + PG_RETURN_BOOL(res); +} + +/* + * Check if incoming array is one dimensional array + * and array elements are not null. Init array field + * and return number of elements if check passed, + * otherwize return -1. + */ +static int init_dbl_array(double **dest, ArrayType *arr) +{ + if (ARR_NDIM(arr) > 1 || ARR_HASNULL(arr)) + return -1; + *dest = (double *) ARR_DATA_PTR(arr); + return ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr)); +} + +/* + * Update AQO query stat table for a given queryid value. + * Return true if operation have done some changes, + * false otherwize. + */ +Datum +aqo_query_stat_update(PG_FUNCTION_ARGS) +{ + uint64 queryid; + AqoStatArgs stat_arg; + + /* + * Arguments cannot be NULL. + */ + if (PG_ARGISNULL(QUERYID) || PG_ARGISNULL(NEXECS_AQO) || + PG_ARGISNULL(NEXECS) || PG_ARGISNULL(EXEC_TIME_AQO) || + PG_ARGISNULL(PLAN_TIME_AQO) || PG_ARGISNULL(EST_ERROR_AQO) || + PG_ARGISNULL(EXEC_TIME) || PG_ARGISNULL(PLAN_TIME) || + PG_ARGISNULL(EST_ERROR)) + PG_RETURN_BOOL(false); + + queryid = PG_GETARG_INT64(AQ_QUERYID); + stat_arg.execs_with_aqo = PG_GETARG_INT64(NEXECS_AQO); + stat_arg.execs_without_aqo = PG_GETARG_INT64(NEXECS); + if (queryid == 0 || stat_arg.execs_with_aqo < 0 || + stat_arg.execs_without_aqo < 0) + PG_RETURN_BOOL(false); + + /* + * Init 'with aqo' array fields for further update procedure and + * check that arrays have the same size. + */ + stat_arg.cur_stat_slot_aqo = + init_dbl_array(&stat_arg.exec_time_aqo, + PG_GETARG_ARRAYTYPE_P(EXEC_TIME_AQO)); + if (stat_arg.cur_stat_slot_aqo == -1 || + stat_arg.cur_stat_slot_aqo > STAT_SAMPLE_SIZE || + stat_arg.cur_stat_slot_aqo != + init_dbl_array(&stat_arg.plan_time_aqo, + PG_GETARG_ARRAYTYPE_P(PLAN_TIME_AQO)) || + stat_arg.cur_stat_slot_aqo != + init_dbl_array(&stat_arg.est_error_aqo, + PG_GETARG_ARRAYTYPE_P(EST_ERROR_AQO))) + PG_RETURN_BOOL(false); + + /* + * Init 'without aqo' array fields for further update procedure and + * check that arrays have the same size. + */ + stat_arg.cur_stat_slot = init_dbl_array(&stat_arg.exec_time, + PG_GETARG_ARRAYTYPE_P(EXEC_TIME)); + if (stat_arg.cur_stat_slot == -1 || + stat_arg.cur_stat_slot > STAT_SAMPLE_SIZE || + stat_arg.cur_stat_slot != + init_dbl_array(&stat_arg.plan_time, + PG_GETARG_ARRAYTYPE_P(PLAN_TIME)) || + stat_arg.cur_stat_slot != + init_dbl_array(&stat_arg.est_error, + PG_GETARG_ARRAYTYPE_P(EST_ERROR))) + PG_RETURN_BOOL(false); + + PG_RETURN_BOOL(aqo_stat_store(queryid, false, + &stat_arg, false) != NULL); +} + +/* + * Update AQO data for a given {fs, fss} values. + * Return true if operation have done some changes, + * false otherwize. + */ +Datum +aqo_data_update(PG_FUNCTION_ARGS) +{ + uint64 fs; + int fss; + double *features_arr[aqo_K]; + AqoDataArgs data_arg; + + ArrayType *arr; + + if (PG_ARGISNULL(AD_FS) || PG_ARGISNULL(AD_FSS) || + PG_ARGISNULL(AD_NFEATURES) || PG_ARGISNULL(AD_TARGETS) || + PG_ARGISNULL(AD_RELIABILITY) || PG_ARGISNULL(AD_OIDS)) + PG_RETURN_BOOL(false); + + fs = PG_GETARG_INT64(AD_FS); + fss = PG_GETARG_INT32(AD_FSS); + data_arg.cols = PG_GETARG_INT32(AD_NFEATURES); + + /* Init traget & reliability arrays. */ + data_arg.rows = + init_dbl_array(&data_arg.targets, + PG_GETARG_ARRAYTYPE_P(AD_TARGETS)); + if (data_arg.rows == -1 || data_arg.rows > aqo_K || + data_arg.rows != init_dbl_array(&data_arg.rfactors, + PG_GETARG_ARRAYTYPE_P(AD_RELIABILITY))) + PG_RETURN_BOOL(false); + + /* Init matrix array. */ + if (data_arg.cols == 0 && !PG_ARGISNULL(AD_FEATURES)) + PG_RETURN_BOOL(false); + if (PG_ARGISNULL(AD_FEATURES)) + { + if (data_arg.cols != 0) + PG_RETURN_BOOL(false); + data_arg.matrix = NULL; + } + else + { + int i; + + arr = PG_GETARG_ARRAYTYPE_P(AD_FEATURES); + /* + * Features is two dimensional array. + * Number of rows should be the same as for + * traget & reliability arrays. + */ + if (ARR_HASNULL(arr) || ARR_NDIM(arr) != 2 || + data_arg.rows != ARR_DIMS(arr)[0] || + data_arg.cols != ARR_DIMS(arr)[1]) + PG_RETURN_BOOL(false); + + for (i = 0; i < ARR_DIMS(arr)[0]; i++) + { + features_arr[i] = (double *) ARR_DATA_PTR(arr) + + i * ARR_DIMS(arr)[1]; + } + data_arg.matrix = features_arr; + } + + /* Init oids array. */ + arr = PG_GETARG_ARRAYTYPE_P(AD_OIDS); + if (ARR_HASNULL(arr)) + PG_RETURN_BOOL(false); + data_arg.oids = (Oid *) ARR_DATA_PTR(arr); + data_arg.nrels = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr)); + + PG_RETURN_BOOL(aqo_data_store(fs, fss, &data_arg, NULL)); +} diff --git a/storage.h b/storage.h index 0e7745e1..dcc1eec8 100644 --- a/storage.h +++ b/storage.h @@ -36,6 +36,26 @@ typedef struct StatEntry double est_error_aqo[STAT_SAMPLE_SIZE]; } StatEntry; +/* + * Auxiliary struct, used for passing arguments + * to aqo_stat_store() function. + */ +typedef struct AqoStatArgs +{ + int64 execs_with_aqo; + int64 execs_without_aqo; + + int cur_stat_slot; + double *exec_time; + double *plan_time; + double *est_error; + + int cur_stat_slot_aqo; + double *exec_time_aqo; + double *plan_time_aqo; + double *est_error_aqo; +} AqoStatArgs; + /* * Storage entry for query texts. * Query strings may have very different sizes. So, in hash table we store only @@ -82,6 +102,24 @@ typedef struct QueriesEntry bool auto_tuning; } QueriesEntry; +/* + * Auxiliary struct, used for passing arg NULL signs + * to aqo_queries_store() function. + */ +typedef struct AqoQueriesNullArgs +{ + bool fs_is_null; + bool learn_aqo_is_null; + bool use_aqo_is_null; + bool auto_tuning_is_null; +} AqoQueriesNullArgs; + +/* + * Used for internal aqo_queries_store() calls. + * No NULL arguments expected in this case. + */ +extern AqoQueriesNullArgs aqo_queries_nulls; + extern int querytext_max_size; extern int dsm_size_max; @@ -90,8 +128,8 @@ extern HTAB *qtexts_htab; extern HTAB *queries_htab; /* TODO */ extern HTAB *data_htab; /* TODO */ -extern StatEntry *aqo_stat_store(uint64 queryid, bool use_aqo, double plan_time, - double exec_time, double est_error); +extern StatEntry *aqo_stat_store(uint64 queryid, bool use_aqo, + AqoStatArgs *stat_arg, bool append_mode); extern void aqo_stat_flush(void); extern void aqo_stat_load(void); @@ -99,7 +137,8 @@ extern bool aqo_qtext_store(uint64 queryid, const char *query_string); extern void aqo_qtexts_flush(void); extern void aqo_qtexts_load(void); -extern bool aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids); +extern bool aqo_data_store(uint64 fs, int fss, AqoDataArgs *data, + List *reloids); extern bool load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool wideSearch, double *features); extern void aqo_data_flush(void); @@ -107,7 +146,8 @@ extern void aqo_data_load(void); extern bool aqo_queries_find(uint64 queryid, QueryContextData *ctx); extern bool aqo_queries_store(uint64 queryid, uint64 fs, bool learn_aqo, - bool use_aqo, bool auto_tuning); + bool use_aqo, bool auto_tuning, + AqoQueriesNullArgs *null_args); extern void aqo_queries_flush(void); extern void aqo_queries_load(void); From dab95f1457b2cb5e13fcf50d7c8cdba20795b14c Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Mon, 30 Jan 2023 09:25:10 +0500 Subject: [PATCH 090/134] Add assertion on incorrect number of rows in storing AQO data record. --- storage.c | 1 + 1 file changed, 1 insertion(+) diff --git a/storage.c b/storage.c index 02c8e0ca..8bb6f28e 100644 --- a/storage.c +++ b/storage.c @@ -1345,6 +1345,7 @@ aqo_data_store(uint64 fs, int fss, AqoDataArgs *data, List *reloids) int nrels = is_raw_data ? data->nrels : list_length(reloids); Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + Assert(data->rows > 0); dsa_init(); From 6b4f856207990d1faa88db3e7454d22b72d889d1 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Mon, 11 Jul 2022 11:54:01 +0300 Subject: [PATCH 091/134] Add smart statement timeout for learning aqo in special quesries within through manual retraining. AQO evaluates whether enough to execute the query through comparison integral error value with its fixed value (0.1), also if integral error didn't change compared to previous iterations, smart statemet timeout value will be increased. Besides, smart statemet timeout value won't be increased, if there is reached limit value, namely statement timeout. The initial smart_statement_timeout value is aqo statement timeout value or 0. Smart statement timeout value and number of its using are saved in aqo_queries. --- aqo--1.5--1.6.sql | 21 +++++++ aqo.c | 13 ++++ aqo.h | 11 ++++ auto_tuning.c | 4 +- expected/smart_statement_timeout.out | 94 ++++++++++++++++++++++++++++ expected/statement_timeout.out | 2 +- expected/update_functions.out | 8 +-- postprocessing.c | 51 +++++++++++++-- preprocessing.c | 2 + regress_schedule | 2 + sql/smart_statement_timeout.sql | 45 +++++++++++++ storage.c | 54 +++++++++++++++- storage.h | 5 ++ 13 files changed, 299 insertions(+), 13 deletions(-) create mode 100644 expected/smart_statement_timeout.out create mode 100644 sql/smart_statement_timeout.sql diff --git a/aqo--1.5--1.6.sql b/aqo--1.5--1.6.sql index 077f11b1..fa1b8bb7 100644 --- a/aqo--1.5--1.6.sql +++ b/aqo--1.5--1.6.sql @@ -3,9 +3,12 @@ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "ALTER EXTENSION aqo UPDATE TO '1.6'" to load this file. \quit +DROP VIEW aqo_queries; + DROP FUNCTION aqo_enable_query; DROP FUNCTION aqo_disable_query; DROP FUNCTION aqo_cleanup; +DROP FUNCTION aqo_queries; CREATE FUNCTION aqo_enable_class(queryid bigint) RETURNS void @@ -77,3 +80,21 @@ CREATE FUNCTION aqo_data_update( RETURNS bool AS 'MODULE_PATHNAME', 'aqo_data_update' LANGUAGE C VOLATILE; + +/* + * VIEWs to discover AQO data. + */ +CREATE FUNCTION aqo_queries ( + OUT queryid bigint, + OUT fs bigint, + OUT learn_aqo boolean, + OUT use_aqo boolean, + OUT auto_tuning boolean, + OUT smart_timeout bigint, + OUT count_increase_timeout bigint +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_queries' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE VIEW aqo_queries AS SELECT * FROM aqo_queries(); diff --git a/aqo.c b/aqo.c index 6d125666..4b776433 100644 --- a/aqo.c +++ b/aqo.c @@ -35,6 +35,7 @@ void _PG_init(void); int aqo_mode = AQO_MODE_CONTROLLED; bool force_collect_stat; bool aqo_predict_with_few_neighbors; +int aqo_statement_timeout; /* * Show special info in EXPLAIN mode. @@ -48,6 +49,7 @@ bool aqo_predict_with_few_neighbors; */ bool aqo_show_hash; bool aqo_show_details; +bool change_flex_timeout; /* GUC variables */ static const struct config_enum_entry format_options[] = { @@ -293,6 +295,17 @@ _PG_init(void) NULL, NULL ); + DefineCustomIntVariable("aqo.statement_timeout", + "Time limit on learning.", + NULL, + &aqo_statement_timeout, + 0, + 0, INT_MAX, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); DefineCustomIntVariable("aqo.min_neighbors_for_predicting", "Set how many neighbors the cardinality prediction will be calculated", diff --git a/aqo.h b/aqo.h index 9418646c..9600b136 100644 --- a/aqo.h +++ b/aqo.h @@ -199,8 +199,15 @@ typedef struct QueryContextData instr_time start_execution_time; double planning_time; + int64 smart_timeout; + int64 count_increase_timeout; } QueryContextData; +/* + * Indicator for using smart statement timeout for query + */ +extern bool change_flex_timeout; + struct StatEntry; extern double predicted_ppi_rows; @@ -250,6 +257,7 @@ extern ExplainOnePlan_hook_type prev_ExplainOnePlan_hook; extern ExplainOneNode_hook_type prev_ExplainOneNode_hook; extern void ppi_hook(ParamPathInfo *ppi); +extern int aqo_statement_timeout; /* Hash functions */ void get_eclasses(List *clauselist, int *nargs, int **args_hash, @@ -298,5 +306,8 @@ extern void selectivity_cache_clear(void); extern bool IsQueryDisabled(void); +extern bool update_query_timeout(uint64 queryid, int64 smart_timeout); +extern double get_mean(double *elems, int nelems); + extern List *cur_classes; #endif diff --git a/auto_tuning.c b/auto_tuning.c index cf96a2cf..b035a093 100644 --- a/auto_tuning.c +++ b/auto_tuning.c @@ -26,17 +26,15 @@ */ double auto_tuning_convergence_error = 0.01; -static double get_mean(double *elems, int nelems); static double get_estimation(double *elems, int nelems); static bool is_stable(double *elems, int nelems); static bool converged_cq(double *elems, int nelems); static bool is_in_infinite_loop_cq(double *elems, int nelems); - /* * Returns mean value of the array of doubles. */ -static double +double get_mean(double *elems, int nelems) { double sum = 0; diff --git a/expected/smart_statement_timeout.out b/expected/smart_statement_timeout.out new file mode 100644 index 00000000..7aacd184 --- /dev/null +++ b/expected/smart_statement_timeout.out @@ -0,0 +1,94 @@ +DROP TABLE IF EXISTS a,b CASCADE; +NOTICE: table "a" does not exist, skipping +NOTICE: table "b" does not exist, skipping +CREATE TABLE a (x1 int, x2 int, x3 int); +INSERT INTO a (x1, x2, x3) SELECT mod(ival,4), mod(ival,10), mod(ival,10) FROM generate_series(1,100) As ival; +CREATE TABLE b (y1 int, y2 int, y3 int); +INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,4), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,100) As ival; +CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.join_threshold = 0; +SET aqo.mode = 'learn'; +SET aqo.show_details = 'off'; +SET aqo.learn_statement_timeout = 'on'; +SET statement_timeout = 1500; -- [1.5s] +SET aqo.statement_timeout = 500; -- [0.5s] +SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; +NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. Timeout is 0 +NOTICE: [AQO] Time limit for execution of the statement was increased. Current timeout is 1 + count | count +-------+------- + 62500 | 62500 +(1 row) + +select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts + where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' + and aqo_query_texts.queryid = aqo_queries.queryid limit 1; + smart_timeout | count_increase_timeout +---------------+------------------------ + 1 | 1 +(1 row) + +SET aqo.learn_statement_timeout = 'off'; +SET aqo.statement_timeout = 1000; -- [1s] +INSERT INTO a (x1, x2, x3) SELECT mod(ival,20), mod(ival,10), mod(ival,10) FROM generate_series(1,1000) As ival; +SET aqo.learn_statement_timeout = 'on'; +SET aqo.statement_timeout = 500; -- [0.5s] +SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; +NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. Timeout is 1 +NOTICE: [AQO] Time limit for execution of the statement was increased. Current timeout is 6 + count | count +--------+-------- + 563300 | 562500 +(1 row) + +select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts + where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' + and aqo_query_texts.queryid = aqo_queries.queryid limit 1; + smart_timeout | count_increase_timeout +---------------+------------------------ + 6 | 2 +(1 row) + +SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; +NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. Timeout is 6 +NOTICE: [AQO] Time limit for execution of the statement was increased. Current timeout is 63 + count | count +--------+-------- + 563300 | 562500 +(1 row) + +select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts + where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' + and aqo_query_texts.queryid = aqo_queries.queryid limit 1; + smart_timeout | count_increase_timeout +---------------+------------------------ + 63 | 3 +(1 row) + +SET statement_timeout = 100; -- [0.1s] +SET aqo.statement_timeout = 150; +SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; +NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. Timeout is 63 +NOTICE: [AQO] Time limit for execution of the statement was increased. Current timeout is 1728 + count | count +--------+-------- + 563300 | 562500 +(1 row) + +select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts + where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' + and aqo_query_texts.queryid = aqo_queries.queryid limit 1; + smart_timeout | count_increase_timeout +---------------+------------------------ + 1728 | 4 +(1 row) + +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 +(1 row) + +DROP TABLE a; +DROP TABLE b; +DROP EXTENSION aqo; diff --git a/expected/statement_timeout.out b/expected/statement_timeout.out index 14b2f0dc..a12fe9dd 100644 --- a/expected/statement_timeout.out +++ b/expected/statement_timeout.out @@ -91,7 +91,7 @@ ERROR: canceling statement due to statement timeout SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); check_estimated_rows ---------------------- - 2 + 4 (1 row) SET statement_timeout = 800; diff --git a/expected/update_functions.out b/expected/update_functions.out index 6a6198e5..03a97fe7 100644 --- a/expected/update_functions.out +++ b/expected/update_functions.out @@ -211,8 +211,8 @@ ORDER BY res; (TABLE aqo_queries_dump EXCEPT TABLE aqo_queries) UNION ALL (TABLE aqo_queries EXCEPT TABLE aqo_queries_dump); - queryid | fs | learn_aqo | use_aqo | auto_tuning ----------+----+-----------+---------+------------- + queryid | fs | learn_aqo | use_aqo | auto_tuning | smart_timeout | count_increase_timeout +---------+----+-----------+---------+-------------+---------------+------------------------ (0 rows) -- Update aqo_queries with dump data. @@ -234,8 +234,8 @@ ORDER BY res; (TABLE aqo_queries_dump EXCEPT TABLE aqo_queries) UNION ALL (TABLE aqo_queries EXCEPT TABLE aqo_queries_dump); - queryid | fs | learn_aqo | use_aqo | auto_tuning ----------+----+-----------+---------+------------- + queryid | fs | learn_aqo | use_aqo | auto_tuning | smart_timeout | count_increase_timeout +---------+----+-----------+---------+-------------+---------------+------------------------ (0 rows) -- diff --git a/postprocessing.c b/postprocessing.c index 70688b1a..8a55a6cd 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -44,6 +44,8 @@ typedef struct static double cardinality_sum_errors; static int cardinality_num_objects; +static int64 max_timeout_value; +static int64 growth_rate = 3; /* * Store an AQO-related query data into the Query Environment structure. @@ -625,15 +627,46 @@ aqo_timeout_handler(void) ctx.learn = query_context.learn_aqo; ctx.isTimedOut = true; - elog(NOTICE, "[AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data."); + if (aqo_statement_timeout == 0) + elog(NOTICE, "[AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data."); + else + elog(NOTICE, "[AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. Timeout is %ld", max_timeout_value); + learnOnPlanState(timeoutCtl.queryDesc->planstate, (void *) &ctx); MemoryContextSwitchTo(oldctx); } +/* + * Function for updating smart statement timeout + */ +static int64 +increase_smart_timeout() +{ + int64 smart_timeout_fin_time = (query_context.smart_timeout + 1) * pow(growth_rate, query_context.count_increase_timeout); + + if (query_context.smart_timeout == max_timeout_value && !update_query_timeout(query_context.query_hash, smart_timeout_fin_time)) + elog(NOTICE, "[AQO] Timeout is not updated!"); + + return smart_timeout_fin_time; +} + static bool set_timeout_if_need(QueryDesc *queryDesc) { - TimestampTz fin_time; + int64 fintime = (int64) get_timeout_finish_time(STATEMENT_TIMEOUT)-1; + + if (aqo_learn_statement_timeout && aqo_statement_timeout > 0) + { + max_timeout_value = Min(query_context.smart_timeout, (int64) aqo_statement_timeout); + if (max_timeout_value > fintime) + { + max_timeout_value = fintime; + } + } + else + { + max_timeout_value = fintime; + } if (IsParallelWorker()) /* @@ -663,8 +696,7 @@ set_timeout_if_need(QueryDesc *queryDesc) else Assert(!get_timeout_active(timeoutCtl.id)); - fin_time = get_timeout_finish_time(STATEMENT_TIMEOUT); - enable_timeout_at(timeoutCtl.id, fin_time - 1); + enable_timeout_at(timeoutCtl.id, (TimestampTz) max_timeout_value); /* Save pointer to queryDesc to use at learning after a timeout interruption. */ timeoutCtl.queryDesc = queryDesc; @@ -720,6 +752,7 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) instr_time endtime; EphemeralNamedRelation enr = get_ENR(queryDesc->queryEnv, PlanStateInfo); MemoryContext oldctx = MemoryContextSwitchTo(AQOLearnMemCtx); + double error = .0; cardinality_sum_errors = 0.; cardinality_num_objects = 0; @@ -788,6 +821,16 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) /* Store all learn data into the AQO service relations. */ if (!query_context.adding_query && query_context.auto_tuning) automatical_query_tuning(query_context.query_hash, stat); + + error = stat->est_error_aqo[stat->cur_stat_slot_aqo-1] - cardinality_sum_errors/(1 + cardinality_num_objects); + + if ( aqo_learn_statement_timeout && aqo_statement_timeout > 0 && error >= 0.1) + { + int64 fintime = increase_smart_timeout(); + elog(NOTICE, "[AQO] Time limit for execution of the statement was increased. Current timeout is %ld", fintime); + } + + pfree(stat); } } diff --git a/preprocessing.c b/preprocessing.c index 93b61e82..ca71156d 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -249,6 +249,8 @@ aqo_planner(Query *parse, elog(ERROR, "unrecognized mode in AQO: %d", aqo_mode); break; } + query_context.count_increase_timeout = 0; + query_context.smart_timeout = 0; } else /* Query class exists in a ML knowledge base. */ { diff --git a/regress_schedule b/regress_schedule index 76a2e00e..6c558e9a 100644 --- a/regress_schedule +++ b/regress_schedule @@ -15,9 +15,11 @@ test: plancache test: update_functions # Performance-dependent test. Can be ignored if executes in containers or on slow machines ignore: statement_timeout +ignore: smart_statement_timeout test: statement_timeout test: temp_tables test: top_queries test: relocatable test: look_a_like test: feature_subspace +test: smart_statement_timeout diff --git a/sql/smart_statement_timeout.sql b/sql/smart_statement_timeout.sql new file mode 100644 index 00000000..a0573dee --- /dev/null +++ b/sql/smart_statement_timeout.sql @@ -0,0 +1,45 @@ +DROP TABLE IF EXISTS a,b CASCADE; +CREATE TABLE a (x1 int, x2 int, x3 int); +INSERT INTO a (x1, x2, x3) SELECT mod(ival,4), mod(ival,10), mod(ival,10) FROM generate_series(1,100) As ival; + +CREATE TABLE b (y1 int, y2 int, y3 int); +INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,4), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,100) As ival; + +CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.join_threshold = 0; +SET aqo.mode = 'learn'; +SET aqo.show_details = 'off'; +SET aqo.learn_statement_timeout = 'on'; +SET statement_timeout = 1500; -- [1.5s] +SET aqo.statement_timeout = 500; -- [0.5s] + +SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; +select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts + where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' + and aqo_query_texts.queryid = aqo_queries.queryid limit 1; + +SET aqo.learn_statement_timeout = 'off'; +SET aqo.statement_timeout = 1000; -- [1s] +INSERT INTO a (x1, x2, x3) SELECT mod(ival,20), mod(ival,10), mod(ival,10) FROM generate_series(1,1000) As ival; +SET aqo.learn_statement_timeout = 'on'; +SET aqo.statement_timeout = 500; -- [0.5s] +SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; +select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts + where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' + and aqo_query_texts.queryid = aqo_queries.queryid limit 1; +SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; +select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts + where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' + and aqo_query_texts.queryid = aqo_queries.queryid limit 1; + +SET statement_timeout = 100; -- [0.1s] +SET aqo.statement_timeout = 150; +SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; +select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts + where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' + and aqo_query_texts.queryid = aqo_queries.queryid limit 1; + +SELECT 1 FROM aqo_reset(); +DROP TABLE a; +DROP TABLE b; +DROP EXTENSION aqo; diff --git a/storage.c b/storage.c index 8bb6f28e..5cb1ef76 100644 --- a/storage.c +++ b/storage.c @@ -55,7 +55,7 @@ typedef enum { } aqo_data_cols; typedef enum { - AQ_QUERYID = 0, AQ_FS, AQ_LEARN_AQO, AQ_USE_AQO, AQ_AUTO_TUNING, + AQ_QUERYID = 0, AQ_FS, AQ_LEARN_AQO, AQ_USE_AQO, AQ_AUTO_TUNING, AQ_SMART_TIMEOUT, AQ_COUNT_INCREASE_TIMEOUT, AQ_TOTAL_NCOLS } aqo_queries_cols; @@ -1910,6 +1910,8 @@ aqo_queries(PG_FUNCTION_ARGS) values[AQ_LEARN_AQO] = BoolGetDatum(entry->learn_aqo); values[AQ_USE_AQO] = BoolGetDatum(entry->use_aqo); values[AQ_AUTO_TUNING] = BoolGetDatum(entry->auto_tuning); + values[AQ_SMART_TIMEOUT] = Int64GetDatum(entry->smart_timeout); + values[AQ_COUNT_INCREASE_TIMEOUT] = Int64GetDatum(entry->count_increase_timeout); tuplestore_putvalues(tupstore, tupDesc, values, nulls); } @@ -1971,6 +1973,10 @@ aqo_queries_store(uint64 queryid, entry->use_aqo = use_aqo; if (!null_args->auto_tuning_is_null) entry->auto_tuning = auto_tuning; + if (!null_args->smart_timeout) + entry->smart_timeout = 0; + if (!null_args->count_increase_timeout) + entry->count_increase_timeout = 0; if (entry->learn_aqo || entry->use_aqo || entry->auto_tuning) /* Remove the class from cache of deactivated queries */ @@ -2091,11 +2097,57 @@ aqo_queries_find(uint64 queryid, QueryContextData *ctx) ctx->learn_aqo = entry->learn_aqo; ctx->use_aqo = entry->use_aqo; ctx->auto_tuning = entry->auto_tuning; + ctx->smart_timeout = entry->smart_timeout; + ctx->count_increase_timeout = entry->count_increase_timeout; } LWLockRelease(&aqo_state->queries_lock); return found; } +/* + * Function for update and save value of smart statement timeout + * for query in aqu_queries table + */ +bool +update_query_timeout(uint64 queryid, int64 smart_timeout) +{ + QueriesEntry *entry; + bool found; + bool tblOverflow; + HASHACTION action; + + Assert(queries_htab); + + /* Guard for default feature space */ + Assert(queryid != 0); + + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + + /* Check hash table overflow */ + tblOverflow = hash_get_num_entries(queries_htab) < fs_max_items ? false : true; + action = tblOverflow ? HASH_FIND : HASH_ENTER; + + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, action, + &found); + + /* Initialize entry on first usage */ + if (!found && action == HASH_FIND) + { + /* + * Hash table is full. To avoid possible problems - don't try to add + * more, just exit + */ + LWLockRelease(&aqo_state->queries_lock); + return false; + } + + entry->smart_timeout = smart_timeout; + entry->count_increase_timeout = entry->count_increase_timeout + 1; + + LWLockRelease(&aqo_state->queries_lock); + return true; +} + /* * Update AQO preferences for a given queryid value. * if incoming param is null - leave it unchanged. diff --git a/storage.h b/storage.h index dcc1eec8..35d94336 100644 --- a/storage.h +++ b/storage.h @@ -100,6 +100,9 @@ typedef struct QueriesEntry bool learn_aqo; bool use_aqo; bool auto_tuning; + + int64 smart_timeout; + int64 count_increase_timeout; } QueriesEntry; /* @@ -112,6 +115,8 @@ typedef struct AqoQueriesNullArgs bool learn_aqo_is_null; bool use_aqo_is_null; bool auto_tuning_is_null; + int64 smart_timeout; + int64 count_increase_timeout; } AqoQueriesNullArgs; /* From 662e6d08d760e370b9e3cf9c789b8c1bd5071872 Mon Sep 17 00:00:00 2001 From: Andrey Kazarinov Date: Fri, 11 Nov 2022 17:52:52 +0300 Subject: [PATCH 092/134] [PGPRO-7366] add function which shows memory usage function memctx_htab_sizes outputs allocated sizes and used sizes of aqo's memory contexts and hash tables --- aqo--1.5--1.6.sql | 16 ++++++++++++++++ t/001_pgbench.pl | 9 +++++++++ 2 files changed, 25 insertions(+) diff --git a/aqo--1.5--1.6.sql b/aqo--1.5--1.6.sql index fa1b8bb7..5489ade3 100644 --- a/aqo--1.5--1.6.sql +++ b/aqo--1.5--1.6.sql @@ -98,3 +98,19 @@ AS 'MODULE_PATHNAME', 'aqo_queries' LANGUAGE C STRICT VOLATILE PARALLEL SAFE; CREATE VIEW aqo_queries AS SELECT * FROM aqo_queries(); + +CREATE FUNCTION aqo_memory_usage( + OUT name text, + OUT allocated_size int, + OUT used_size int +) +RETURNS SETOF record +AS $$ + SELECT name, total_bytes, used_bytes FROM pg_backend_memory_contexts + WHERE name LIKE 'AQO%' + UNION + SELECT name, allocated_size, size FROM pg_shmem_allocations + WHERE name LIKE 'AQO%'; +$$ LANGUAGE SQL; +COMMENT ON FUNCTION aqo_memory_usage() IS +'Show allocated sizes and used sizes of aqo`s memory contexts and hash tables'; diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index 3aa3b7b5..2374d83d 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -159,6 +159,9 @@ WHERE v.exec_time > 0."); is($res, 3); +$res = $node->safe_psql('postgres', "SELECT * FROM aqo_memory_usage() AS t1"); +note("MEMORY:\n$res\n"); + # ############################################################################## # # pgbench on a database with AQO in 'learn' mode. @@ -183,6 +186,9 @@ "$TRANSACTIONS", '-c', "$CLIENTS", '-j', "$THREADS" ], 'pgbench in frozen mode'); +$res = $node->safe_psql('postgres', "SELECT * FROM aqo_memory_usage() AS t1"); +note("MEMORY:\n$res\n"); + # ############################################################################## # # Check procedure of ML-knowledge data cleaning. @@ -298,6 +304,9 @@ is($new_stat_count == $stat_count - $pgb_stat_count, 1, 'Total number of samples in aqo_query_stat'); +$res = $node->safe_psql('postgres', "SELECT * FROM aqo_memory_usage() AS t1"); +note("MEMORY:\n$res\n"); + # ############################################################################## # # AQO works after moving to another schema From 8676259ded05c23d2f92542b81b0bafc84d98d80 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Tue, 31 Jan 2023 11:19:10 +0500 Subject: [PATCH 093/134] Collect some artifacts of CI tests - initial commit --- .github/workflows/c-cpp.yml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 96a2d3d9..a38a8734 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -41,3 +41,19 @@ jobs: ./configure $CONFIGURE_OPTS CFLAGS="-DAQO_DEBUG_PRINT -O0" make -j4 > /dev/null && make -j4 -C contrib > /dev/null env CLIENTS=50 THREADS=50 make -C contrib/aqo check + - name: Archive regression.diffs + if: ${{ always() }} + uses: actions/upload-artifact@v3 + with: + name: regression_diffs + path: /home/runner/work/aqo/aqo/pg/contrib/aqo/regression.diffs + retention-days: 1 + - name: Archive TAP tests log files + if: ${{ always() }} + uses: actions/upload-artifact@v3 + with: + name: tap_logs + path: | + log + retention-days: 1 + From 1724f2c59f49b7e528487aaf2d20d65c932e336e Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Tue, 31 Jan 2023 11:21:23 +0500 Subject: [PATCH 094/134] Remove regression tests on smart statement timeout. Should rethink test principles of time-dependendent features to make it more stable. --- expected/smart_statement_timeout.out | 94 ---------------------------- postprocessing.c | 4 +- sql/smart_statement_timeout.sql | 45 ------------- 3 files changed, 2 insertions(+), 141 deletions(-) delete mode 100644 expected/smart_statement_timeout.out delete mode 100644 sql/smart_statement_timeout.sql diff --git a/expected/smart_statement_timeout.out b/expected/smart_statement_timeout.out deleted file mode 100644 index 7aacd184..00000000 --- a/expected/smart_statement_timeout.out +++ /dev/null @@ -1,94 +0,0 @@ -DROP TABLE IF EXISTS a,b CASCADE; -NOTICE: table "a" does not exist, skipping -NOTICE: table "b" does not exist, skipping -CREATE TABLE a (x1 int, x2 int, x3 int); -INSERT INTO a (x1, x2, x3) SELECT mod(ival,4), mod(ival,10), mod(ival,10) FROM generate_series(1,100) As ival; -CREATE TABLE b (y1 int, y2 int, y3 int); -INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,4), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,100) As ival; -CREATE EXTENSION IF NOT EXISTS aqo; -SET aqo.join_threshold = 0; -SET aqo.mode = 'learn'; -SET aqo.show_details = 'off'; -SET aqo.learn_statement_timeout = 'on'; -SET statement_timeout = 1500; -- [1.5s] -SET aqo.statement_timeout = 500; -- [0.5s] -SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; -NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. Timeout is 0 -NOTICE: [AQO] Time limit for execution of the statement was increased. Current timeout is 1 - count | count --------+------- - 62500 | 62500 -(1 row) - -select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts - where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' - and aqo_query_texts.queryid = aqo_queries.queryid limit 1; - smart_timeout | count_increase_timeout ----------------+------------------------ - 1 | 1 -(1 row) - -SET aqo.learn_statement_timeout = 'off'; -SET aqo.statement_timeout = 1000; -- [1s] -INSERT INTO a (x1, x2, x3) SELECT mod(ival,20), mod(ival,10), mod(ival,10) FROM generate_series(1,1000) As ival; -SET aqo.learn_statement_timeout = 'on'; -SET aqo.statement_timeout = 500; -- [0.5s] -SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; -NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. Timeout is 1 -NOTICE: [AQO] Time limit for execution of the statement was increased. Current timeout is 6 - count | count ---------+-------- - 563300 | 562500 -(1 row) - -select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts - where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' - and aqo_query_texts.queryid = aqo_queries.queryid limit 1; - smart_timeout | count_increase_timeout ----------------+------------------------ - 6 | 2 -(1 row) - -SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; -NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. Timeout is 6 -NOTICE: [AQO] Time limit for execution of the statement was increased. Current timeout is 63 - count | count ---------+-------- - 563300 | 562500 -(1 row) - -select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts - where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' - and aqo_query_texts.queryid = aqo_queries.queryid limit 1; - smart_timeout | count_increase_timeout ----------------+------------------------ - 63 | 3 -(1 row) - -SET statement_timeout = 100; -- [0.1s] -SET aqo.statement_timeout = 150; -SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; -NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. Timeout is 63 -NOTICE: [AQO] Time limit for execution of the statement was increased. Current timeout is 1728 - count | count ---------+-------- - 563300 | 562500 -(1 row) - -select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts - where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' - and aqo_query_texts.queryid = aqo_queries.queryid limit 1; - smart_timeout | count_increase_timeout ----------------+------------------------ - 1728 | 4 -(1 row) - -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 -(1 row) - -DROP TABLE a; -DROP TABLE b; -DROP EXTENSION aqo; diff --git a/postprocessing.c b/postprocessing.c index 8a55a6cd..f6af5f48 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -630,7 +630,7 @@ aqo_timeout_handler(void) if (aqo_statement_timeout == 0) elog(NOTICE, "[AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data."); else - elog(NOTICE, "[AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. Timeout is %ld", max_timeout_value); + elog(NOTICE, "[AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. Timeout is "INT64_FORMAT, max_timeout_value); learnOnPlanState(timeoutCtl.queryDesc->planstate, (void *) &ctx); MemoryContextSwitchTo(oldctx); @@ -827,7 +827,7 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) if ( aqo_learn_statement_timeout && aqo_statement_timeout > 0 && error >= 0.1) { int64 fintime = increase_smart_timeout(); - elog(NOTICE, "[AQO] Time limit for execution of the statement was increased. Current timeout is %ld", fintime); + elog(NOTICE, "[AQO] Time limit for execution of the statement was increased. Current timeout is "UINT64_FORMAT, fintime); } pfree(stat); diff --git a/sql/smart_statement_timeout.sql b/sql/smart_statement_timeout.sql deleted file mode 100644 index a0573dee..00000000 --- a/sql/smart_statement_timeout.sql +++ /dev/null @@ -1,45 +0,0 @@ -DROP TABLE IF EXISTS a,b CASCADE; -CREATE TABLE a (x1 int, x2 int, x3 int); -INSERT INTO a (x1, x2, x3) SELECT mod(ival,4), mod(ival,10), mod(ival,10) FROM generate_series(1,100) As ival; - -CREATE TABLE b (y1 int, y2 int, y3 int); -INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,4), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,100) As ival; - -CREATE EXTENSION IF NOT EXISTS aqo; -SET aqo.join_threshold = 0; -SET aqo.mode = 'learn'; -SET aqo.show_details = 'off'; -SET aqo.learn_statement_timeout = 'on'; -SET statement_timeout = 1500; -- [1.5s] -SET aqo.statement_timeout = 500; -- [0.5s] - -SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; -select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts - where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' - and aqo_query_texts.queryid = aqo_queries.queryid limit 1; - -SET aqo.learn_statement_timeout = 'off'; -SET aqo.statement_timeout = 1000; -- [1s] -INSERT INTO a (x1, x2, x3) SELECT mod(ival,20), mod(ival,10), mod(ival,10) FROM generate_series(1,1000) As ival; -SET aqo.learn_statement_timeout = 'on'; -SET aqo.statement_timeout = 500; -- [0.5s] -SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; -select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts - where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' - and aqo_query_texts.queryid = aqo_queries.queryid limit 1; -SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; -select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts - where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' - and aqo_query_texts.queryid = aqo_queries.queryid limit 1; - -SET statement_timeout = 100; -- [0.1s] -SET aqo.statement_timeout = 150; -SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; -select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts - where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' - and aqo_query_texts.queryid = aqo_queries.queryid limit 1; - -SELECT 1 FROM aqo_reset(); -DROP TABLE a; -DROP TABLE b; -DROP EXTENSION aqo; From 351e135465ffe725f03f806cf364ff4b51b113c3 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Tue, 31 Jan 2023 11:24:55 +0500 Subject: [PATCH 095/134] Increase stability of the look_a_like test: clear learning data before the test. --- expected/look_a_like.out | 69 +++++++++++++++++++++------------------- sql/look_a_like.sql | 1 + 2 files changed, 37 insertions(+), 33 deletions(-) diff --git a/expected/look_a_like.out b/expected/look_a_like.out index faa9b0fd..70480334 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -1,4 +1,10 @@ CREATE EXTENSION aqo; +SELECT true FROM aqo_reset(); + ?column? +---------- + t +(1 row) + SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; @@ -56,28 +62,25 @@ SELECT str AS result FROM expln(' SELECT x1,y1 FROM A LEFT JOIN b ON A.x1 = B.y1 WHERE x1 = 5 AND x2 = 5;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result ------------------------------------------------------------- - Hash Left Join (actual rows=10000 loops=1) + result +-------------------------------------------------------- + Nested Loop Left Join (actual rows=10000 loops=1) AQO not used Output: a.x1, b.y1 - Hash Cond: (a.x1 = b.y1) -> Seq Scan on public.a (actual rows=100 loops=1) AQO: rows=100, error=0% Output: a.x1, a.x2, a.x3 Filter: ((a.x1 = 5) AND (a.x2 = 5)) Rows Removed by Filter: 900 - -> Hash (actual rows=100 loops=1) - Output: b.y1 - -> Seq Scan on public.b (actual rows=100 loops=1) - AQO: rows=100, error=0% - Output: b.y1 - Filter: (b.y1 = 5) - Rows Removed by Filter: 900 + -> Seq Scan on public.b (actual rows=100 loops=100) + AQO: rows=100, error=0% + Output: b.y1, b.y2, b.y3 + Filter: (b.y1 = 5) + Rows Removed by Filter: 900 Using aqo: true AQO mode: LEARN JOINS: 0 -(19 rows) +(16 rows) SELECT str AS result FROM expln(' @@ -516,29 +519,29 @@ SELECT str AS result FROM expln(' SELECT * FROM (A LEFT JOIN B ON A.x1 = B.y1) sc left join C on sc.x1=C.z1;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result -------------------------------------------------------------------------- - Hash Right Join (actual rows=10000000 loops=1) - AQO: rows=1, error=-999999900% + result +------------------------------------------------------------------- + Hash Left Join (actual rows=10000000 loops=1) + AQO not used Output: a.x1, a.x2, a.x3, b.y1, b.y2, b.y3, c.z1, c.z2, c.z3 - Hash Cond: (b.y1 = a.x1) - -> Seq Scan on public.b (actual rows=1000 loops=1) - AQO: rows=1000, error=0% - Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=100000 loops=1) - Output: a.x1, a.x2, a.x3, c.z1, c.z2, c.z3 - -> Hash Left Join (actual rows=100000 loops=1) - AQO: rows=1, error=-9999900% - Output: a.x1, a.x2, a.x3, c.z1, c.z2, c.z3 - Hash Cond: (a.x1 = c.z1) - -> Seq Scan on public.a (actual rows=1000 loops=1) + Hash Cond: (a.x1 = c.z1) + -> Hash Left Join (actual rows=100000 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3, b.y1, b.y2, b.y3 + Hash Cond: (a.x1 = b.y1) + -> Seq Scan on public.a (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: a.x1, a.x2, a.x3 + -> Hash (actual rows=1000 loops=1) + Output: b.y1, b.y2, b.y3 + -> Seq Scan on public.b (actual rows=1000 loops=1) AQO: rows=1000, error=0% - Output: a.x1, a.x2, a.x3 - -> Hash (actual rows=1000 loops=1) - Output: c.z1, c.z2, c.z3 - -> Seq Scan on public.c (actual rows=1000 loops=1) - AQO: rows=1000, error=0% - Output: c.z1, c.z2, c.z3 + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=1000 loops=1) + Output: c.z1, c.z2, c.z3 + -> Seq Scan on public.c (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: c.z1, c.z2, c.z3 Using aqo: true AQO mode: LEARN JOINS: 1 diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index 9705bf1a..b5e1f671 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -1,4 +1,5 @@ CREATE EXTENSION aqo; +SELECT true FROM aqo_reset(); SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; From 501e8c622feca2ff0f199c5ab3638aad5f390f15 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Tue, 31 Jan 2023 13:47:45 +0500 Subject: [PATCH 096/134] Bugfix. Initialization of kNN data structure was omitted in one newly added case. --- storage.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/storage.c b/storage.c index 5cb1ef76..439f3118 100644 --- a/storage.c +++ b/storage.c @@ -1465,6 +1465,7 @@ aqo_data_store(uint64 fs, int fss, AqoDataArgs *data, List *reloids) } } aqo_state->data_changed = true; + Assert(entry->rows > 0); end: result = aqo_state->data_changed; LWLockRelease(&aqo_state->data_lock); @@ -1505,15 +1506,19 @@ build_knn_matrix(OkNNrdata *data, const OkNNrdata *temp_data, double *features) if (features != NULL) { int old_rows = data->rows; - int k = old_rows; + int k = (old_rows < 0) ? 0 : old_rows; if (data->cols > 0) { int i; - for (i = 0; i < data->rows; i++) + Assert(data->cols == temp_data->cols); + + for (i = 0; i < temp_data->rows; i++) { - if (k < aqo_K && !neirest_neighbor(data->matrix, old_rows, data->matrix[i], data->cols)) + if (k < aqo_K && !neirest_neighbor(data->matrix, old_rows, + temp_data->matrix[i], + data->cols)) { memcpy(data->matrix[k], temp_data->matrix[i], data->cols * sizeof(double)); data->rfactors[k] = temp_data->rfactors[i]; @@ -1521,6 +1526,7 @@ build_knn_matrix(OkNNrdata *data, const OkNNrdata *temp_data, double *features) k++; } } + data->rows = k; } } else @@ -1605,11 +1611,13 @@ _fill_knn_data(const DataEntry *entry, List **reloids) } /* - * Return on feature subspace, unique defined by its class (fs) and hash value - * (fss). - * If reloids is NULL, skip loading of this list. + * By given feature space and subspace, build kNN data structure. + * * If wideSearch is true - make seqscan on the hash table to see for relevant * data across neighbours. + * If reloids is NULL - don't fill this list. + * + * Return false if the operation was unsuccessful. */ bool load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, @@ -1634,7 +1642,7 @@ load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, goto end; /* One entry with all correctly filled fields is found */ - Assert(entry); + Assert(entry && entry->rows > 0); Assert(DsaPointerIsValid(entry->data_dp)); if (entry->cols != data->cols) @@ -1643,12 +1651,14 @@ load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, elog(LOG, "[AQO] Does a collision happened? Check it if possible " "(fs: "UINT64_FORMAT", fss: %d).", fs, fss); - found = false; + found = false; /* Sign of unsuccessful operation */ goto end; } temp_data = _fill_knn_data(entry, reloids); + Assert(temp_data->rows > 0); build_knn_matrix(data, temp_data, features); + Assert(data->rows > 0); } else /* Iterate across all elements of the table. XXX: Maybe slow. */ @@ -1662,6 +1672,8 @@ load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, { List *tmp_oids = NIL; + Assert(entry->rows > 0); + if (entry->key.fss != fss || entry->cols != data->cols) continue; From 367b3df36fc3c69a41368fc60fad27032fd0c19e Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Tue, 31 Jan 2023 15:33:09 +0500 Subject: [PATCH 097/134] Rewrite update_functions.sql to avoid dependency on internal logic of the optimizer which can vary on version of PG core. --- expected/update_functions.out | 78 ++++++----------------------------- sql/update_functions.sql | 16 ++++--- 2 files changed, 22 insertions(+), 72 deletions(-) diff --git a/expected/update_functions.out b/expected/update_functions.out index 03a97fe7..cf9cee8e 100644 --- a/expected/update_functions.out +++ b/expected/update_functions.out @@ -295,40 +295,10 @@ UNION ALL -- aqo_data_update() testing. -- -- Populate aqo_data with dump data. -SELECT aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS res -FROM aqo_data_dump -ORDER BY res; - res ------ - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t -(27 rows) - +SELECT count(*) AS res1 FROM + aqo_data_dump, + LATERAL aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS ret +WHERE ret \gset -- Check if data is the same as in source, no result rows expected. (TABLE aqo_data_dump EXCEPT TABLE aqo_data) UNION ALL @@ -338,39 +308,15 @@ UNION ALL (0 rows) -- Update aqo_data with dump data. -SELECT aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS res -FROM aqo_data_dump -ORDER BY res; - res ------ - t - t - t - t - t - t - t - t - t - t - t - t +SELECT count(*) AS res2 FROM + aqo_data_dump, + LATERAL aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS ret +WHERE ret \gset +SELECT :res1 = :res2 AS ml_sizes_are_equal; + ml_sizes_are_equal +-------------------- t - t - t - t - t - t - t - t - t - t - t - t - t - t - t -(27 rows) +(1 row) -- Check if data is the same as in source, no result rows expected. (TABLE aqo_data_dump EXCEPT TABLE aqo_data) diff --git a/sql/update_functions.sql b/sql/update_functions.sql index 85b711e6..84add94a 100644 --- a/sql/update_functions.sql +++ b/sql/update_functions.sql @@ -146,9 +146,10 @@ UNION ALL -- -- Populate aqo_data with dump data. -SELECT aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS res -FROM aqo_data_dump -ORDER BY res; +SELECT count(*) AS res1 FROM + aqo_data_dump, + LATERAL aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS ret +WHERE ret \gset -- Check if data is the same as in source, no result rows expected. (TABLE aqo_data_dump EXCEPT TABLE aqo_data) @@ -156,9 +157,12 @@ UNION ALL (TABLE aqo_data EXCEPT TABLE aqo_data_dump); -- Update aqo_data with dump data. -SELECT aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS res -FROM aqo_data_dump -ORDER BY res; +SELECT count(*) AS res2 FROM + aqo_data_dump, + LATERAL aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS ret +WHERE ret \gset + +SELECT :res1 = :res2 AS ml_sizes_are_equal; -- Check if data is the same as in source, no result rows expected. (TABLE aqo_data_dump EXCEPT TABLE aqo_data) From cc53e238aa47f3481b3d99880804eab3a7c68e8e Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Tue, 31 Jan 2023 16:09:54 +0500 Subject: [PATCH 098/134] Arrange extension with subtle changes in the optimizer --- expected/look_a_like.out | 67 +++++++++++++++++++++------------------- regress_schedule | 2 -- 2 files changed, 35 insertions(+), 34 deletions(-) diff --git a/expected/look_a_like.out b/expected/look_a_like.out index 70480334..899ef271 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -1,7 +1,7 @@ CREATE EXTENSION aqo; SELECT true FROM aqo_reset(); - ?column? ----------- + bool +------ t (1 row) @@ -62,25 +62,28 @@ SELECT str AS result FROM expln(' SELECT x1,y1 FROM A LEFT JOIN b ON A.x1 = B.y1 WHERE x1 = 5 AND x2 = 5;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result --------------------------------------------------------- - Nested Loop Left Join (actual rows=10000 loops=1) + result +------------------------------------------------------------ + Hash Left Join (actual rows=10000 loops=1) AQO not used Output: a.x1, b.y1 + Hash Cond: (a.x1 = b.y1) -> Seq Scan on public.a (actual rows=100 loops=1) AQO: rows=100, error=0% Output: a.x1, a.x2, a.x3 Filter: ((a.x1 = 5) AND (a.x2 = 5)) Rows Removed by Filter: 900 - -> Seq Scan on public.b (actual rows=100 loops=100) - AQO: rows=100, error=0% - Output: b.y1, b.y2, b.y3 - Filter: (b.y1 = 5) - Rows Removed by Filter: 900 + -> Hash (actual rows=100 loops=1) + Output: b.y1 + -> Seq Scan on public.b (actual rows=100 loops=1) + AQO: rows=100, error=0% + Output: b.y1 + Filter: (b.y1 = 5) + Rows Removed by Filter: 900 Using aqo: true AQO mode: LEARN JOINS: 0 -(16 rows) +(19 rows) SELECT str AS result FROM expln(' @@ -519,29 +522,29 @@ SELECT str AS result FROM expln(' SELECT * FROM (A LEFT JOIN B ON A.x1 = B.y1) sc left join C on sc.x1=C.z1;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result -------------------------------------------------------------------- - Hash Left Join (actual rows=10000000 loops=1) - AQO not used + result +------------------------------------------------------------------------- + Hash Right Join (actual rows=10000000 loops=1) + AQO: rows=1, error=-999999900% Output: a.x1, a.x2, a.x3, b.y1, b.y2, b.y3, c.z1, c.z2, c.z3 - Hash Cond: (a.x1 = c.z1) - -> Hash Left Join (actual rows=100000 loops=1) - AQO not used - Output: a.x1, a.x2, a.x3, b.y1, b.y2, b.y3 - Hash Cond: (a.x1 = b.y1) - -> Seq Scan on public.a (actual rows=1000 loops=1) - AQO: rows=1000, error=0% - Output: a.x1, a.x2, a.x3 - -> Hash (actual rows=1000 loops=1) - Output: b.y1, b.y2, b.y3 - -> Seq Scan on public.b (actual rows=1000 loops=1) + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=100000 loops=1) + Output: a.x1, a.x2, a.x3, c.z1, c.z2, c.z3 + -> Hash Left Join (actual rows=100000 loops=1) + AQO: rows=1, error=-9999900% + Output: a.x1, a.x2, a.x3, c.z1, c.z2, c.z3 + Hash Cond: (a.x1 = c.z1) + -> Seq Scan on public.a (actual rows=1000 loops=1) AQO: rows=1000, error=0% - Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=1000 loops=1) - Output: c.z1, c.z2, c.z3 - -> Seq Scan on public.c (actual rows=1000 loops=1) - AQO: rows=1000, error=0% - Output: c.z1, c.z2, c.z3 + Output: a.x1, a.x2, a.x3 + -> Hash (actual rows=1000 loops=1) + Output: c.z1, c.z2, c.z3 + -> Seq Scan on public.c (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: c.z1, c.z2, c.z3 Using aqo: true AQO mode: LEARN JOINS: 1 diff --git a/regress_schedule b/regress_schedule index 6c558e9a..76a2e00e 100644 --- a/regress_schedule +++ b/regress_schedule @@ -15,11 +15,9 @@ test: plancache test: update_functions # Performance-dependent test. Can be ignored if executes in containers or on slow machines ignore: statement_timeout -ignore: smart_statement_timeout test: statement_timeout test: temp_tables test: top_queries test: relocatable test: look_a_like test: feature_subspace -test: smart_statement_timeout From 927d651b6c192dbcbf60e72aa34067bedebbc912 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 1 Feb 2023 14:36:46 +0500 Subject: [PATCH 099/134] Bugfix. Assertion on disabled query at the ExecutorEnd hook. In an extravagant situation: (mode=disabled, forced stat gathering = 'on') we can get into a situation when AQO is disabled for a query, but previously cached plan contains some AQO preferences. Even so, we should ignore the query at the end of execution. --- postprocessing.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/postprocessing.c b/postprocessing.c index f6af5f48..aa82a534 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -757,7 +757,7 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) cardinality_sum_errors = 0.; cardinality_num_objects = 0; - if (!ExtractFromQueryEnv(queryDesc)) + if (IsQueryDisabled() || !ExtractFromQueryEnv(queryDesc)) /* AQO keep all query-related preferences at the query context. * It is needed to prevent from possible recursive changes, at * preprocessing stage of subqueries. @@ -768,7 +768,6 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) njoins = (enr != NULL) ? *(int *) enr->reldata : -1; - Assert(!IsQueryDisabled()); Assert(!IsParallelWorker()); if (query_context.explain_only) From 048a6cc625f633f4741057f77f69e5607941facd Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Sun, 5 Feb 2023 14:05:11 +0500 Subject: [PATCH 100/134] Improvement. Clean a list of deactivated queries during the call of the aqo_reset() routine: we want to clean all the AQO internal state on reset. --- storage.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/storage.c b/storage.c index 439f3118..be14f3e9 100644 --- a/storage.c +++ b/storage.c @@ -195,7 +195,7 @@ init_deactivated_queries_storage(void) MemSet(&hash_ctl, 0, sizeof(hash_ctl)); hash_ctl.keysize = sizeof(uint64); hash_ctl.entrysize = sizeof(uint64); - deactivated_queries = hash_create("aqo_deactivated_queries", + deactivated_queries = hash_create("AQO deactivated queries", 128, /* start small and extend */ &hash_ctl, HASH_ELEM | HASH_BLOBS); @@ -207,7 +207,7 @@ query_is_deactivated(uint64 queryid) { bool found; - hash_search(deactivated_queries, &queryid, HASH_FIND, &found); + (void) hash_search(deactivated_queries, &queryid, HASH_FIND, &found); return found; } @@ -215,7 +215,21 @@ query_is_deactivated(uint64 queryid) void add_deactivated_query(uint64 queryid) { - hash_search(deactivated_queries, &queryid, HASH_ENTER, NULL); + (void) hash_search(deactivated_queries, &queryid, HASH_ENTER, NULL); +} + +static void +reset_deactivated_queries(void) +{ + HASH_SEQ_STATUS hash_seq; + uint64 *queryid; + + hash_seq_init(&hash_seq, deactivated_queries); + while ((queryid = hash_seq_search(&hash_seq)) != NULL) + { + if (!hash_search(deactivated_queries, queryid, HASH_REMOVE, NULL)) + elog(PANIC, "[AQO] hash table corrupted"); + } } /* @@ -2179,7 +2193,6 @@ aqo_queries_update(PG_FUNCTION_ARGS) { PG_ARGISNULL(AQ_FS), PG_ARGISNULL(AQ_LEARN_AQO), PG_ARGISNULL(AQ_USE_AQO), PG_ARGISNULL(AQ_AUTO_TUNING) }; - if (PG_ARGISNULL(AQ_QUERYID)) PG_RETURN_BOOL(false); @@ -2211,6 +2224,10 @@ aqo_reset(PG_FUNCTION_ARGS) counter += aqo_qtexts_reset(); counter += aqo_data_reset(); counter += aqo_queries_reset(); + + /* Cleanup cache of deactivated queries */ + reset_deactivated_queries(); + PG_RETURN_INT64(counter); } From 53e07da54ed276d4cee2706f12b47c5cd6883bee Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Sun, 19 Feb 2023 16:37:38 +0600 Subject: [PATCH 101/134] Generalize basic CI script reviewed-by: a.rybakina --- .github/workflows/c-cpp.yml | 82 +++++++++++++++++++++++-------------- 1 file changed, 52 insertions(+), 30 deletions(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index a38a8734..0123a181 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -1,10 +1,7 @@ -name: 'C/C++ CI for the stable14' +name: 'AQO basic CI' on: - push: - branches: [ stable14 ] pull_request: - branches: [ stable14 ] env: BRANCH_NAME: ${{ github.head_ref || github.ref_name }} @@ -15,45 +12,70 @@ jobs: runs-on: ubuntu-latest steps: - - name: pg + - uses: actions/checkout@v3 + - name: "Define PostreSQL major version" run: | - sudo apt install libipc-run-perl + echo "$(ls -la)" + patch_name=$(ls aqo_*.patch|tail -1) + echo "CORE_PATCH_NAME=$patch_name" >> $GITHUB_ENV + + # we can get number, otherwise set up master + vers_number=$(echo "$patch_name"|tr -d -c 0-9) + echo "PG_MAJOR_VERSION=$vers_number" >> $GITHUB_ENV + + branch_name="REL_${vers_number}_STABLE" + echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV + + echo "COPT=-Werror" >> $GITHUB_ENV + echo "CONFIGURE_OPTS=--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" >> $GITHUB_ENV + + - name: "Set master branch name, if needed" + if: env.PG_MAJOR_VERSION == '' + run: | + branch_name="master" + echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV - echo "Deploying to production server on branch" $BRANCH_NAME + - name: "Environment (debug output)" + if: ${{ always() }} + run: | + echo "Use PostgreSQL branch $PG_BRANCH (patch: $CORE_PATCH_NAME)" + echo "COPT: $COPT" + echo "CONFIGURE_OPTS: $CONFIGURE_OPTS" + echo "Deploying to production server on branch" $BRANCH_NAME "(PG $PG_BRANCH)" git config --global user.email "ci@postgrespro.ru" git config --global user.name "CI PgPro admin" - export COPT=-Werror - export CONFIGURE_OPTS="--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" - git clone https://github.com/postgres/postgres.git pg - cd pg - - git checkout REL_14_STABLE - git clone https://github.com/postgrespro/aqo.git contrib/aqo - git -C contrib/aqo checkout $BRANCH_NAME - patch -p1 --no-backup-if-mismatch < contrib/aqo/aqo_pg14.patch - ./configure $CONFIGURE_OPTS CFLAGS="-O2" + + - name: "Prepare PG directory" + run: | + git clone -b $PG_BRANCH --depth=1 --single-branch https://github.com/postgres/postgres.git $GITHUB_WORKSPACE/../pg + cd $GITHUB_WORKSPACE/../pg + ls -la + cp -r ../aqo contrib/aqo + patch -p1 --no-backup-if-mismatch < contrib/aqo/$CORE_PATCH_NAME + + - name: "make check" + run: | + sudo apt install libipc-run-perl + + cd $GITHUB_WORKSPACE/../pg + ./configure $CONFIGURE_OPTS CFLAGS="-O2" > /dev/null make -j4 > /dev/null && make -j4 -C contrib > /dev/null env CLIENTS=50 THREADS=50 make -C contrib/aqo check echo "Use AQO with debug code included" git clean -fdx git -C contrib/aqo clean -fdx - ./configure $CONFIGURE_OPTS CFLAGS="-DAQO_DEBUG_PRINT -O0" + ./configure $CONFIGURE_OPTS CFLAGS="-DAQO_DEBUG_PRINT -O0" > /dev/null make -j4 > /dev/null && make -j4 -C contrib > /dev/null env CLIENTS=50 THREADS=50 make -C contrib/aqo check - - name: Archive regression.diffs - if: ${{ always() }} - uses: actions/upload-artifact@v3 - with: - name: regression_diffs - path: /home/runner/work/aqo/aqo/pg/contrib/aqo/regression.diffs - retention-days: 1 - - name: Archive TAP tests log files + + - name: Archive artifacts if: ${{ always() }} uses: actions/upload-artifact@v3 with: - name: tap_logs + name: make_check_logs path: | - log - retention-days: 1 - + /home/runner/work/aqo/pg/contrib/aqo/regression.diffs + /home/runner/work/aqo/pg/contrib/aqo/log + /home/runner/work/aqo/pg/contrib/aqo/tmp_check/log + retention-days: 7 From 27bff632b300aa2f54bdd37327b4e74eb987a231 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Thu, 16 Feb 2023 10:02:14 +0600 Subject: [PATCH 102/134] Bugfix. Remove dangerous usage of short-lived AQO memory contexts. Using such a context we should remember about the risks: * Recursion in AQO hooks can induce accidential memory context reset. * System routines which we call from the extension, could require more long- lived memory contexts on the outside than our. --- preprocessing.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/preprocessing.c b/preprocessing.c index ca71156d..aadc959e 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -128,8 +128,6 @@ aqo_planner(Query *parse, bool query_is_stored = false; MemoryContext oldctx; - oldctx = MemoryContextSwitchTo(AQOPredictMemCtx); - /* * We do not work inside an parallel worker now by reason of insert into * the heap during planning. Transactions are synchronized between parallel @@ -146,7 +144,6 @@ aqo_planner(Query *parse, * We should disable AQO for this query to remember this decision along * all execution stages. */ - MemoryContextSwitchTo(oldctx); disable_aqo_for_query(); return call_default_planner(parse, @@ -176,7 +173,6 @@ aqo_planner(Query *parse, * feature space, that is processing yet (disallow invalidation * recursion, as an example). */ - MemoryContextSwitchTo(oldctx); disable_aqo_for_query(); return call_default_planner(parse, @@ -188,11 +184,9 @@ aqo_planner(Query *parse, elog(DEBUG1, "AQO will be used for query '%s', class "UINT64_FORMAT, query_string ? query_string : "null string", query_context.query_hash); - MemoryContextSwitchTo(oldctx); oldctx = MemoryContextSwitchTo(AQOCacheMemCtx); cur_classes = lappend_uint64(cur_classes, query_context.query_hash); MemoryContextSwitchTo(oldctx); - oldctx = MemoryContextSwitchTo(AQOPredictMemCtx); if (aqo_mode == AQO_MODE_DISABLED) { @@ -351,7 +345,7 @@ aqo_planner(Query *parse, INSTR_TIME_SET_CURRENT(query_context.start_planning_time); { PlannedStmt *stmt; - MemoryContextSwitchTo(oldctx); + stmt = call_default_planner(parse, query_string, cursorOptions, boundParams); @@ -458,7 +452,6 @@ jointree_walker(Node *jtnode, void *context) static bool isQueryUsingSystemRelation_walker(Node *node, void *context) { - MemoryContext oldctx = MemoryContextSwitchTo(AQOLearnMemCtx); AQOPreWalkerCtx *ctx = (AQOPreWalkerCtx *) context; if (node == NULL) @@ -500,7 +493,6 @@ isQueryUsingSystemRelation_walker(Node *node, void *context) } jointree_walker((Node *) query->jointree, context); - MemoryContextSwitchTo(oldctx); /* Recursively plunge into subqueries and CTEs */ return query_tree_walker(query, From 03d14f79b833fedb914b14e5c3e28e3027e3bbb2 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 1 Feb 2023 09:32:08 +0500 Subject: [PATCH 103/134] One more step towards improving the AQO regression tests stability. Move GUCs, which can be changed in runtime, from global regression tests conf to first executed test 'aqo_disabled.sql'. There we set these values by ALTER SYSTEM/pg_reload_conf() and use them during the test. Also, we call aqo_reset() at the start of each test. And a bit more: 1. Avoid to show a number of records in AQO ML storage - it can depend on optimizer settings and quite unstable (in progress). 2. Use aliases query in output to avoid unstability of naming of anonymous columns. --- Makefile | 6 +++ aqo.conf | 2 - expected/aqo_controlled.out | 21 +++++----- expected/aqo_disabled.out | 25 ++++++------ expected/aqo_fdw.out | 29 ++++++++++---- expected/aqo_forced.out | 17 ++++---- expected/aqo_intelligent.out | 16 ++++---- expected/aqo_learn.out | 26 ++++++------- expected/clean_aqo_data.out | 35 +++++++++-------- expected/feature_subspace.out | 38 +++++++++--------- expected/forced_stat_collection.out | 15 ++++---- expected/gucs.out | 22 +++++++---- expected/look_a_like.out | 20 ++++------ expected/parallel_workers.out | 9 ++++- expected/plancache.out | 15 ++++---- expected/relocatable.out | 9 ++++- expected/schema.out | 9 +++-- expected/statement_timeout.out | 60 ++++++++++++++++------------- expected/temp_tables.out | 45 ++++++++++++---------- expected/top_queries.out | 16 ++++---- expected/unsupported.out | 27 +++++++------ expected/update_functions.out | 26 ++++++------- sql/aqo_controlled.sql | 14 +++---- sql/aqo_disabled.sql | 18 ++++----- sql/aqo_fdw.sql | 6 +-- sql/aqo_forced.sql | 11 ++---- sql/aqo_intelligent.sql | 9 ++--- sql/aqo_learn.sql | 11 ++---- sql/clean_aqo_data.sql | 15 ++++---- sql/feature_subspace.sql | 6 +-- sql/forced_stat_collection.sql | 7 ++-- sql/gucs.sql | 9 +++-- sql/look_a_like.sql | 14 ++++--- sql/parallel_workers.sql | 5 +-- sql/plancache.sql | 7 ++-- sql/relocatable.sql | 5 ++- sql/schema.sql | 3 +- sql/statement_timeout.sql | 36 +++++++++-------- sql/temp_tables.sql | 19 +++++---- sql/top_queries.sql | 7 ++-- sql/unsupported.sql | 7 ++-- sql/update_functions.sql | 13 ++++--- t/001_pgbench.pl | 3 ++ t/002_pg_stat_statements_aqo.pl | 8 +++- 44 files changed, 386 insertions(+), 335 deletions(-) diff --git a/Makefile b/Makefile index d3aec440..ce9d00ba 100755 --- a/Makefile +++ b/Makefile @@ -16,6 +16,12 @@ TAP_TESTS = 1 REGRESS = aqo_dummy_test REGRESS_OPTS = --schedule=$(srcdir)/regress_schedule +# Set default values of some gucs to be stable on custom settings during +# a kind of installcheck +PGOPTIONS = --aqo.force_collect_stat=off --max_parallel_maintenance_workers=1 \ + --aqo.join_threshold=0 --max_parallel_workers_per_gather=1 +export PGOPTIONS + fdw_srcdir = $(top_srcdir)/contrib/postgres_fdw stat_srcdir = $(top_srcdir)/contrib/pg_stat_statements PG_CPPFLAGS += -I$(libpq_srcdir) -I$(fdw_srcdir) -I$(stat_srcdir) diff --git a/aqo.conf b/aqo.conf index 03de79ee..069c7dd7 100644 --- a/aqo.conf +++ b/aqo.conf @@ -1,5 +1,3 @@ autovacuum = off shared_preload_libraries = 'postgres_fdw, aqo' -max_parallel_maintenance_workers = 1 # switch off parallel workers because of unsteadiness -aqo.wide_search = 'on' compute_query_id = 'regress' diff --git a/expected/aqo_controlled.out b/expected/aqo_controlled.out index cf88bf42..43d27d74 100644 --- a/expected/aqo_controlled.out +++ b/expected/aqo_controlled.out @@ -1,3 +1,10 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -25,8 +32,6 @@ AS ( ) INSERT INTO aqo_test2 (SELECT * FROM t); CREATE INDEX aqo_test2_idx_a ON aqo_test2 (a); ANALYZE aqo_test2; -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; SET aqo.mode = 'controlled'; EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 @@ -199,11 +204,12 @@ WHERE t1.a = t2.b AND t2.a = t3.b; SELECT count(*) FROM (SELECT queryid AS id FROM aqo_queries) AS q1, - LATERAL aqo_queries_update(q1.id, NULL, NULL, true, NULL) + LATERAL aqo_queries_update(q1.id, NULL, NULL, true, NULL) AS ret +WHERE NOT ret ; -- set use = true count ------- - 12 + 1 (1 row) EXPLAIN (COSTS FALSE) @@ -311,11 +317,4 @@ DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; DROP INDEX aqo_test2_idx_a; DROP TABLE aqo_test2; --- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 -(1 row) - DROP EXTENSION aqo; diff --git a/expected/aqo_disabled.out b/expected/aqo_disabled.out index 606d258e..cf12e2fb 100644 --- a/expected/aqo_disabled.out +++ b/expected/aqo_disabled.out @@ -1,3 +1,12 @@ +-- Create the extension. Drop all lumps which could survive from +-- previous pass (repeated installcheck as an example). +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -16,8 +25,6 @@ AS ( ) INSERT INTO aqo_test1 (SELECT * FROM t); CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; SET aqo.mode = 'controlled'; CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -151,11 +158,12 @@ SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero SET aqo.mode = 'controlled'; SELECT count(*) FROM (SELECT queryid AS id FROM aqo_queries) AS q1, - LATERAL aqo_queries_update(q1.id, NULL, true, true, false) + LATERAL aqo_queries_update(q1.id, NULL, true, true, false) AS ret +WHERE NOT ret ; -- Enable all disabled query classes count ------- - 5 + 1 (1 row) EXPLAIN SELECT * FROM aqo_test0 @@ -223,15 +231,8 @@ SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero 0 (1 row) --- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 -(1 row) - -DROP EXTENSION aqo; DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; +DROP EXTENSION aqo; diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out index e568e993..69c1b132 100644 --- a/expected/aqo_fdw.out +++ b/expected/aqo_fdw.out @@ -3,12 +3,17 @@ -- JOIN push-down (check push of baserestrictinfo and joininfo) -- Aggregate push-down -- Push-down of groupings with HAVING clause. -CREATE EXTENSION aqo; -CREATE EXTENSION postgres_fdw; +CREATE EXTENSION IF NOT EXISTS aqo; +CREATE EXTENSION IF NOT EXISTS postgres_fdw; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + SET aqo.mode = 'learn'; SET aqo.show_details = 'true'; -- show AQO info for each node and entire query. SET aqo.show_hash = 'false'; -- a hash value is system-depended. Ignore it. -SET aqo.join_threshold = 0; DO $d$ BEGIN EXECUTE $$CREATE SERVER loopback FOREIGN DATA WRAPPER postgres_fdw @@ -100,15 +105,23 @@ SELECT str FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; ') AS str WHERE str NOT LIKE '%Sort Method%'; - str -------------------------------------------- - Foreign Scan (actual rows=1 loops=1) + str +------------------------------------------------------------ + Merge Join (actual rows=1 loops=1) AQO not used - Relations: (frgn a) INNER JOIN (frgn b) + Merge Cond: (a.x = b.x) + -> Sort (actual rows=1 loops=1) + Sort Key: a.x + -> Foreign Scan on frgn a (actual rows=1 loops=1) + AQO not used + -> Sort (actual rows=1 loops=1) + Sort Key: b.x + -> Foreign Scan on frgn b (actual rows=1 loops=1) + AQO not used Using aqo: true AQO mode: LEARN JOINS: 0 -(6 rows) +(14 rows) -- Should learn on postgres_fdw nodes SELECT str FROM expln(' diff --git a/expected/aqo_forced.out b/expected/aqo_forced.out index 091ead32..6d5d14a9 100644 --- a/expected/aqo_forced.out +++ b/expected/aqo_forced.out @@ -1,3 +1,11 @@ +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -16,8 +24,6 @@ AS ( ) INSERT INTO aqo_test1 (SELECT * FROM t); CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; SET aqo.mode = 'controlled'; EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 @@ -82,11 +88,4 @@ DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; --- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 -(1 row) - DROP EXTENSION aqo; diff --git a/expected/aqo_intelligent.out b/expected/aqo_intelligent.out index 7ec943f5..1d407ea7 100644 --- a/expected/aqo_intelligent.out +++ b/expected/aqo_intelligent.out @@ -1,3 +1,10 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -16,8 +23,6 @@ AS ( ) INSERT INTO aqo_test1 (SELECT * FROM t); CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; SET aqo.mode = 'intelligent'; EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -519,11 +524,4 @@ DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; --- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 -(1 row) - DROP EXTENSION aqo; diff --git a/expected/aqo_learn.out b/expected/aqo_learn.out index db117a0c..9a5ca8dd 100644 --- a/expected/aqo_learn.out +++ b/expected/aqo_learn.out @@ -1,3 +1,10 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + -- The function just copied from stats_ext.sql create function check_estimated_rows(text) returns table (estimated int, actual int) language plpgsql as @@ -36,8 +43,6 @@ AS ( ) INSERT INTO aqo_test1 (SELECT * FROM t); CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; SET aqo.mode = 'intelligent'; EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -236,10 +241,10 @@ SELECT count(*) FROM tmp1; (1 row) -- Remove data on some unneeded instances of tmp1 table. -SELECT * FROM aqo_cleanup(); - nfs | nfss ------+------ - 9 | 18 +SELECT true AS success FROM aqo_cleanup(); + success +--------- + t (1 row) -- Result of the query below should be empty @@ -563,7 +568,7 @@ SELECT * FROM check_estimated_rows( 'SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); estimated | actual -----------+-------- - 19 | 19 + 20 | 19 (1 row) SELECT count(*) FROM @@ -716,11 +721,4 @@ DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; --- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 -(1 row) - DROP EXTENSION aqo; diff --git a/expected/clean_aqo_data.out b/expected/clean_aqo_data.out index e66f274b..49b64832 100644 --- a/expected/clean_aqo_data.out +++ b/expected/clean_aqo_data.out @@ -1,5 +1,10 @@ -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + SET aqo.mode = 'learn'; DROP TABLE IF EXISTS a; NOTICE: table "a" does not exist, skipping @@ -11,9 +16,9 @@ SELECT * FROM a; (0 rows) SELECT 'a'::regclass::oid AS a_oid \gset -SELECT true FROM aqo_cleanup(); - bool ------- +SELECT true AS success FROM aqo_cleanup(); + success +--------- t (1 row) @@ -54,9 +59,9 @@ SELECT count(*) FROM aqo_query_stat WHERE (1 row) DROP TABLE a; -SELECT true FROM aqo_cleanup(); - bool ------- +SELECT true AS success FROM aqo_cleanup(); + success +--------- t (1 row) @@ -119,7 +124,7 @@ SELECT 'b'::regclass::oid AS b_oid \gset SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); count ------- - 2 + 3 (1 row) SELECT count(*) FROM aqo_queries WHERE @@ -175,9 +180,9 @@ SELECT count(*) FROM aqo_query_stat WHERE (1 row) DROP TABLE a; -SELECT true FROM aqo_cleanup(); - bool ------- +SELECT true AS success FROM aqo_cleanup(); + success +--------- t (1 row) @@ -253,9 +258,9 @@ SELECT count(*) FROM aqo_query_stat WHERE (1 row) DROP TABLE b; -SELECT true FROM aqo_cleanup(); - bool ------- +SELECT true AS success FROM aqo_cleanup(); + success +--------- t (1 row) diff --git a/expected/feature_subspace.out b/expected/feature_subspace.out index a49be254..a53b57e7 100644 --- a/expected/feature_subspace.out +++ b/expected/feature_subspace.out @@ -1,7 +1,12 @@ -- This test related to some issues on feature subspace calculation -CREATE EXTENSION aqo; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + SET aqo.mode = 'learn'; -SET aqo.join_threshold = 0; SET aqo.show_details = 'on'; CREATE TABLE a AS (SELECT gs AS x FROM generate_series(1,10) AS gs); CREATE TABLE b AS (SELECT gs AS x FROM generate_series(1,100) AS gs); @@ -46,20 +51,23 @@ SELECT str AS result FROM expln(' SELECT * FROM b LEFT JOIN a USING (x);') AS str WHERE str NOT LIKE '%Memory%'; - result ----------------------------------------------------- - Hash Left Join (actual rows=100 loops=1) - AQO: rows=10, error=-900% - Hash Cond: (b.x = a.x) - -> Seq Scan on b (actual rows=100 loops=1) - AQO: rows=100, error=0% - -> Hash (actual rows=10 loops=1) + result +----------------------------------------------------- + Merge Left Join (actual rows=100 loops=1) + AQO not used + Merge Cond: (b.x = a.x) + -> Sort (actual rows=100 loops=1) + Sort Key: b.x + -> Seq Scan on b (actual rows=100 loops=1) + AQO not used + -> Sort (actual rows=10 loops=1) + Sort Key: a.x -> Seq Scan on a (actual rows=10 loops=1) - AQO: rows=10, error=0% + AQO not used Using aqo: true AQO mode: LEARN JOINS: 0 -(11 rows) +(14 rows) -- Look into the reason: two JOINs from different classes have the same FSS. SELECT to_char(d1.targets[1], 'FM999.00') AS target FROM aqo_data d1 @@ -72,10 +80,4 @@ WHERE 'a'::regclass = ANY (d1.oids) AND 'b'::regclass = ANY (d1.oids) order by t (2 rows) DROP TABLE a,b CASCADE; -SELECT true FROM aqo_reset(); - bool ------- - t -(1 row) - DROP EXTENSION aqo; diff --git a/expected/forced_stat_collection.out b/expected/forced_stat_collection.out index f635fbcc..c5a6ac0e 100644 --- a/expected/forced_stat_collection.out +++ b/expected/forced_stat_collection.out @@ -1,5 +1,11 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + \set citizens 1000 -SET aqo.join_threshold = 0; SET aqo.mode = 'disabled'; SET aqo.force_collect_stat = 'off'; CREATE TABLE person ( @@ -19,7 +25,6 @@ INSERT INTO person (id,age,gender,passport) END FROM (SELECT *, 14+(id % 60) AS age FROM generate_series(1, :citizens) id) AS q1 ); -CREATE EXTENSION aqo; SET aqo.force_collect_stat = 'on'; SELECT count(*) FROM person WHERE age<18; count @@ -64,10 +69,4 @@ SELECT query_text FROM aqo_query_texts ORDER BY (md5(query_text)); (3 rows) DROP TABLE person; -SELECT 1 FROM aqo_reset(); -- Full remove of ML data before the end - ?column? ----------- - 1 -(1 row) - DROP EXTENSION aqo; diff --git a/expected/gucs.out b/expected/gucs.out index d7ef6eeb..f33aa6b2 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -1,4 +1,11 @@ -CREATE EXTENSION aqo; +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + -- Utility tool. Allow to filter system-dependent strings from an explain output. CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ BEGIN @@ -7,16 +14,15 @@ BEGIN RETURN; END; $$ LANGUAGE PLPGSQL; -SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = true; SET compute_query_id = 'auto'; CREATE TABLE t(x int); INSERT INTO t (x) (SELECT * FROM generate_series(1, 100) AS gs); ANALYZE t; -SELECT true FROM aqo_reset(); -- Remember! DROP EXTENSION doesn't remove any AQO data gathered. - bool ------- +SELECT true AS success FROM aqo_reset(); + success +--------- t (1 row) @@ -127,9 +133,9 @@ SELECT count(*) FROM aqo_query_stat; 1 (1 row) -SELECT true FROM aqo_reset(); -- Remove one record from all tables - bool ------- +SELECT true AS success FROM aqo_reset(); + success +--------- t (1 row) diff --git a/expected/look_a_like.out b/expected/look_a_like.out index 899ef271..fb76fdd6 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -1,11 +1,12 @@ -CREATE EXTENSION aqo; -SELECT true FROM aqo_reset(); - bool ------- +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- t (1 row) -SET aqo.join_threshold = 0; +SET aqo.wide_search = 'on'; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; set aqo.show_hash = 'off'; @@ -550,14 +551,9 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L JOINS: 1 (24 rows) -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 -(1 row) - +RESET aqo.wide_search; +DROP EXTENSION aqo CASCADE; DROP TABLE a; DROP TABLE b; DROP TABLE c; DROP FUNCTION expln; -DROP EXTENSION aqo CASCADE; diff --git a/expected/parallel_workers.out b/expected/parallel_workers.out index fca67006..3e408f49 100644 --- a/expected/parallel_workers.out +++ b/expected/parallel_workers.out @@ -1,6 +1,12 @@ -- Specifically test AQO machinery for queries uses partial paths and executed -- with parallel workers. -CREATE EXTENSION aqo; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + -- Utility tool. Allow to filter system-dependent strings from explain output. CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ BEGIN @@ -9,7 +15,6 @@ BEGIN RETURN; END; $$ LANGUAGE PLPGSQL; -SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = true; -- Be generous with a number parallel workers to test the machinery diff --git a/expected/plancache.out b/expected/plancache.out index 6874468a..88698463 100644 --- a/expected/plancache.out +++ b/expected/plancache.out @@ -1,6 +1,11 @@ -- Tests on interaction of AQO with cached plans. -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + SET aqo.mode = 'intelligent'; SET aqo.show_details = 'on'; SET aqo.show_hash = 'off'; @@ -44,10 +49,4 @@ SELECT * FROM f1(); DROP FUNCTION f1; DROP TABLE test CASCADE; -SELECT true FROM aqo_reset(); - bool ------- - t -(1 row) - DROP EXTENSION aqo; diff --git a/expected/relocatable.out b/expected/relocatable.out index 949896f6..3d7f386f 100644 --- a/expected/relocatable.out +++ b/expected/relocatable.out @@ -1,5 +1,10 @@ -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + SET aqo.mode = 'learn'; -- use this mode for unconditional learning CREATE TABLE test AS (SELECT id, 'payload' || id FROM generate_series(1,100) id); ANALYZE test; diff --git a/expected/schema.out b/expected/schema.out index 0b5a5c07..e712f407 100644 --- a/expected/schema.out +++ b/expected/schema.out @@ -1,5 +1,3 @@ -DROP EXTENSION IF EXISTS aqo CASCADE; -NOTICE: extension "aqo" does not exist, skipping DROP SCHEMA IF EXISTS test CASCADE; NOTICE: schema "test" does not exist, skipping -- Check Zero-schema path behaviour @@ -12,7 +10,12 @@ ERROR: no schema has been selected to create in CREATE SCHEMA IF NOT EXISTS test1; SET search_path TO test1, public; CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + SET aqo.mode = 'intelligent'; CREATE TABLE test (id SERIAL, data TEXT); INSERT INTO test (data) VALUES ('string'); diff --git a/expected/statement_timeout.out b/expected/statement_timeout.out index a12fe9dd..39796549 100644 --- a/expected/statement_timeout.out +++ b/expected/statement_timeout.out @@ -17,37 +17,43 @@ BEGIN END IF; END LOOP; END; $$; +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + CREATE TABLE t AS SELECT * FROM generate_series(1,50) AS x; ANALYZE t; DELETE FROM t WHERE x > 5; -- Force optimizer to make overestimated prediction. -CREATE EXTENSION IF NOT EXISTS aqo; -SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'off'; SET aqo.learn_statement_timeout = 'on'; -SET statement_timeout = 100; -- [0.1s] +SET statement_timeout = 80; -- [0.1s] SELECT *, pg_sleep(0.1) FROM t; NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout -SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); -- haven't any partial data +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -- haven't any partial data check_estimated_rows ---------------------- 50 (1 row) -- Don't learn because running node has smaller cardinality than an optimizer prediction -SET statement_timeout = 400; +SET statement_timeout = 350; SELECT *, pg_sleep(0.1) FROM t; NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout -SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); check_estimated_rows ---------------------- 50 (1 row) -- We have a real learning data. -SET statement_timeout = 8000; +SET statement_timeout = 800; SELECT *, pg_sleep(0.1) FROM t; x | pg_sleep ---+---------- @@ -58,7 +64,7 @@ SELECT *, pg_sleep(0.1) FROM t; 5 | (5 rows) -SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); check_estimated_rows ---------------------- 5 @@ -68,33 +74,33 @@ SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); DELETE FROM t WHERE x > 2; ANALYZE t; INSERT INTO t (x) (SELECT * FROM generate_series(3,5) AS x); -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 +SELECT true AS success FROM aqo_reset(); + success +--------- + t (1 row) -SET statement_timeout = 100; +SET statement_timeout = 80; SELECT *, pg_sleep(0.1) FROM t; -- Not learned NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout -SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); check_estimated_rows ---------------------- 2 (1 row) -SET statement_timeout = 500; +SET statement_timeout = 350; SELECT *, pg_sleep(0.1) FROM t; -- Learn! NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout -SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); check_estimated_rows ---------------------- - 4 + 3 (1 row) -SET statement_timeout = 800; +SET statement_timeout = 550; SELECT *, pg_sleep(0.1) FROM t; -- Get reliable data x | pg_sleep ---+---------- @@ -105,17 +111,17 @@ SELECT *, pg_sleep(0.1) FROM t; -- Get reliable data 5 | (5 rows) -SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); check_estimated_rows ---------------------- 5 (1 row) -- Interrupted query should immediately appear in aqo_data -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 +SELECT true AS success FROM aqo_reset(); + success +--------- + t (1 row) SET statement_timeout = 500; @@ -134,10 +140,10 @@ SELECT count(*) FROM aqo_data; -- Must be one 1 (1 row) -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 +SELECT true AS success FROM aqo_reset(); + success +--------- + t (1 row) DROP TABLE t; diff --git a/expected/temp_tables.out b/expected/temp_tables.out index cb1da23f..9fa20e7c 100644 --- a/expected/temp_tables.out +++ b/expected/temp_tables.out @@ -1,5 +1,12 @@ -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +SET aqo.wide_search = 'on'; SET aqo.mode = 'learn'; CREATE TEMP TABLE tt(); CREATE TABLE pt(); @@ -48,10 +55,10 @@ SELECT count(*) FROM aqo_data; -- Don't bother about false negatives because of (1 row) DROP TABLE tt; -SELECT * FROM aqo_cleanup(); - nfs | nfss ------+------ - 0 | 0 +SELECT true AS success FROM aqo_cleanup(); + success +--------- + t (1 row) SELECT count(*) FROM aqo_data; -- Should return the same as previous call above @@ -61,10 +68,10 @@ SELECT count(*) FROM aqo_data; -- Should return the same as previous call above (1 row) DROP TABLE pt; -SELECT * FROM aqo_cleanup(); - nfs | nfss ------+------ - 3 | 10 +SELECT true AS success FROM aqo_cleanup(); + success +--------- + t (1 row) SELECT count(*) FROM aqo_data; -- Should be 0 @@ -133,10 +140,10 @@ SELECT * FROM check_estimated_rows(' SET aqo.mode = 'forced'; -- Now we use all fss records for each query DROP TABLE pt; -SELECT * FROM aqo_cleanup(); - nfs | nfss ------+------ - 2 | 5 +SELECT true AS success FROM aqo_cleanup(); + success +--------- + t (1 row) CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; @@ -184,12 +191,8 @@ SELECT * FROM check_estimated_rows(' 100 | 0 (1 row) +-- Clear common parts of AQO state +RESET aqo.wide_search; +DROP EXTENSION aqo CASCADE; DROP TABLE pt CASCADE; -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 -(1 row) - -DROP EXTENSION aqo; DROP FUNCTION check_estimated_rows; diff --git a/expected/top_queries.out b/expected/top_queries.out index ba72d7c8..62186efc 100644 --- a/expected/top_queries.out +++ b/expected/top_queries.out @@ -1,5 +1,11 @@ -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + SET aqo.mode = 'disabled'; SET aqo.force_collect_stat = 'on'; -- @@ -95,10 +101,4 @@ ORDER BY (md5(query_text)); SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; | 1 (3 rows) -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 -(1 row) - DROP EXTENSION aqo; diff --git a/expected/unsupported.out b/expected/unsupported.out index c42a3be5..a1a6f4ae 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -1,4 +1,10 @@ -CREATE EXTENSION aqo; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + -- Utility tool. Allow to filter system-dependent strings from an explain output. CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ BEGIN @@ -7,7 +13,6 @@ BEGIN RETURN; END; $$ LANGUAGE PLPGSQL; -SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; DROP TABLE IF EXISTS t; @@ -52,7 +57,7 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) AQO not used Group Key: x -> Seq Scan on t (actual rows=801 loops=1) - AQO: rows=801, error=0% + AQO not used Filter: (x > 3) Rows Removed by Filter: 199 Using aqo: true @@ -406,7 +411,7 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) -> Aggregate (actual rows=1 loops=1000) AQO not used -> Seq Scan on t t0 (actual rows=50 loops=1000) - AQO: rows=50, error=0% + AQO not used Filter: (x = t.x) Rows Removed by Filter: 950 SubPlan 2 @@ -616,10 +621,10 @@ SELECT count(*) FROM aqo_data; -- Just to detect some changes in the logic. May 44 (1 row) -SELECT * FROM aqo_cleanup(); - nfs | nfss ------+------ - 13 | 44 +SELECT true AS success FROM aqo_cleanup(); + success +--------- + t (1 row) SELECT count(*) FROM aqo_data; -- No one row should be returned @@ -637,10 +642,4 @@ ORDER BY (md5(query_text),error) DESC; -------+------------ (0 rows) -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 -(1 row) - DROP EXTENSION aqo; diff --git a/expected/update_functions.out b/expected/update_functions.out index cf9cee8e..74428a35 100644 --- a/expected/update_functions.out +++ b/expected/update_functions.out @@ -1,3 +1,11 @@ +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + CREATE TABLE aqo_test1(a int, b int); WITH RECURSIVE t(a, b) AS ( @@ -16,8 +24,6 @@ AS ( ) INSERT INTO aqo_test2 (SELECT * FROM t); CREATE INDEX aqo_test2_idx_a ON aqo_test2 (a); ANALYZE aqo_test2; -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; SET aqo.mode='intelligent'; SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; count @@ -134,10 +140,10 @@ CREATE TABLE aqo_query_texts_dump AS SELECT * FROM aqo_query_texts; CREATE TABLE aqo_queries_dump AS SELECT * FROM aqo_queries; CREATE TABLE aqo_query_stat_dump AS SELECT * FROM aqo_query_stat; CREATE TABLE aqo_data_dump AS SELECT * FROM aqo_data; -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 +SELECT true AS success FROM aqo_reset(); + success +--------- + t (1 row) -- @@ -411,12 +417,6 @@ SELECT aqo_data_update(1, 1, 1, '{{1}, {2}}', '{1}', '{1}', '{1, 2, 3}'); (1 row) SET aqo.mode='disabled'; -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 -(1 row) - -DROP EXTENSION aqo; +DROP EXTENSION aqo CASCADE; DROP TABLE aqo_test1, aqo_test2; DROP TABLE aqo_query_texts_dump, aqo_queries_dump, aqo_query_stat_dump, aqo_data_dump; diff --git a/sql/aqo_controlled.sql b/sql/aqo_controlled.sql index 0ba88e56..8c8e5fb8 100644 --- a/sql/aqo_controlled.sql +++ b/sql/aqo_controlled.sql @@ -1,3 +1,6 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -28,9 +31,6 @@ AS ( CREATE INDEX aqo_test2_idx_a ON aqo_test2 (a); ANALYZE aqo_test2; -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; - SET aqo.mode = 'controlled'; EXPLAIN (COSTS FALSE) @@ -111,7 +111,8 @@ WHERE t1.a = t2.b AND t2.a = t3.b; SELECT count(*) FROM (SELECT queryid AS id FROM aqo_queries) AS q1, - LATERAL aqo_queries_update(q1.id, NULL, NULL, true, NULL) + LATERAL aqo_queries_update(q1.id, NULL, NULL, true, NULL) AS ret +WHERE NOT ret ; -- set use = true EXPLAIN (COSTS FALSE) @@ -147,14 +148,9 @@ WHERE t1.a = t2.b AND t2.a = t3.b; DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; - DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; - DROP INDEX aqo_test2_idx_a; DROP TABLE aqo_test2; --- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT 1 FROM aqo_reset(); - DROP EXTENSION aqo; diff --git a/sql/aqo_disabled.sql b/sql/aqo_disabled.sql index fd709cf3..8397f847 100644 --- a/sql/aqo_disabled.sql +++ b/sql/aqo_disabled.sql @@ -1,3 +1,8 @@ +-- Create the extension. Drop all lumps which could survive from +-- previous pass (repeated installcheck as an example). +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -17,8 +22,6 @@ AS ( ) INSERT INTO aqo_test1 (SELECT * FROM t); CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; SET aqo.mode = 'controlled'; @@ -77,7 +80,8 @@ SET aqo.mode = 'controlled'; SELECT count(*) FROM (SELECT queryid AS id FROM aqo_queries) AS q1, - LATERAL aqo_queries_update(q1.id, NULL, true, true, false) + LATERAL aqo_queries_update(q1.id, NULL, true, true, false) AS ret +WHERE NOT ret ; -- Enable all disabled query classes EXPLAIN SELECT * FROM aqo_test0 @@ -98,13 +102,9 @@ FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero --- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT 1 FROM aqo_reset(); - -DROP EXTENSION aqo; - DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; - DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; + +DROP EXTENSION aqo; diff --git a/sql/aqo_fdw.sql b/sql/aqo_fdw.sql index bd211326..5425dcf4 100644 --- a/sql/aqo_fdw.sql +++ b/sql/aqo_fdw.sql @@ -4,13 +4,13 @@ -- Aggregate push-down -- Push-down of groupings with HAVING clause. -CREATE EXTENSION aqo; -CREATE EXTENSION postgres_fdw; +CREATE EXTENSION IF NOT EXISTS aqo; +CREATE EXTENSION IF NOT EXISTS postgres_fdw; +SELECT true AS success FROM aqo_reset(); SET aqo.mode = 'learn'; SET aqo.show_details = 'true'; -- show AQO info for each node and entire query. SET aqo.show_hash = 'false'; -- a hash value is system-depended. Ignore it. -SET aqo.join_threshold = 0; DO $d$ BEGIN diff --git a/sql/aqo_forced.sql b/sql/aqo_forced.sql index 92a26564..34f97359 100644 --- a/sql/aqo_forced.sql +++ b/sql/aqo_forced.sql @@ -1,3 +1,7 @@ +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -18,9 +22,6 @@ AS ( CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; - SET aqo.mode = 'controlled'; EXPLAIN (COSTS FALSE) @@ -53,11 +54,7 @@ WHERE a < 5 AND b < 5 AND c < 5 AND d < 5; DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; - DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; --- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT 1 FROM aqo_reset(); - DROP EXTENSION aqo; diff --git a/sql/aqo_intelligent.sql b/sql/aqo_intelligent.sql index 545325c1..45ecaecc 100644 --- a/sql/aqo_intelligent.sql +++ b/sql/aqo_intelligent.sql @@ -1,3 +1,6 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -18,9 +21,6 @@ AS ( CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; - SET aqo.mode = 'intelligent'; EXPLAIN SELECT * FROM aqo_test0 @@ -215,7 +215,4 @@ DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; --- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT 1 FROM aqo_reset(); - DROP EXTENSION aqo; diff --git a/sql/aqo_learn.sql b/sql/aqo_learn.sql index 8b57972e..8acd2db7 100644 --- a/sql/aqo_learn.sql +++ b/sql/aqo_learn.sql @@ -1,3 +1,6 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + -- The function just copied from stats_ext.sql create function check_estimated_rows(text) returns table (estimated int, actual int) language plpgsql as @@ -39,9 +42,6 @@ AS ( CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; - SET aqo.mode = 'intelligent'; EXPLAIN SELECT * FROM aqo_test0 @@ -124,7 +124,7 @@ WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; SELECT count(*) FROM tmp1; -- Remove data on some unneeded instances of tmp1 table. -SELECT * FROM aqo_cleanup(); +SELECT true AS success FROM aqo_cleanup(); -- Result of the query below should be empty SELECT * FROM aqo_query_texts aqt1, aqo_query_texts aqt2 @@ -314,7 +314,4 @@ DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; --- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT 1 FROM aqo_reset(); - DROP EXTENSION aqo; diff --git a/sql/clean_aqo_data.sql b/sql/clean_aqo_data.sql index d2abeb93..3c504bdb 100644 --- a/sql/clean_aqo_data.sql +++ b/sql/clean_aqo_data.sql @@ -1,5 +1,6 @@ -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + SET aqo.mode = 'learn'; DROP TABLE IF EXISTS a; @@ -7,7 +8,7 @@ DROP TABLE IF EXISTS b; CREATE TABLE a(); SELECT * FROM a; SELECT 'a'::regclass::oid AS a_oid \gset -SELECT true FROM aqo_cleanup(); +SELECT true AS success FROM aqo_cleanup(); /* * lines with a_oid in aqo_data, @@ -27,7 +28,7 @@ SELECT count(*) FROM aqo_query_stat WHERE aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); DROP TABLE a; -SELECT true FROM aqo_cleanup(); +SELECT true AS success FROM aqo_cleanup(); /* * lines with a_oid in aqo_data, @@ -79,7 +80,7 @@ SELECT count(*) FROM aqo_query_stat WHERE aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); DROP TABLE a; -SELECT true FROM aqo_cleanup(); +SELECT true AS success FROM aqo_cleanup(); /* * lines corresponding to a_oid and both a_oid's fs deleted in aqo_data, @@ -115,7 +116,7 @@ SELECT count(*) FROM aqo_query_stat WHERE aqo_queries.fs = aqo_queries.queryid); DROP TABLE b; -SELECT true FROM aqo_cleanup(); +SELECT true AS success FROM aqo_cleanup(); -- lines corresponding to b_oid in theese tables deleted SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); @@ -131,4 +132,4 @@ SELECT count(*) FROM aqo_query_stat WHERE aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fs = aqo_queries.queryid); -DROP EXTENSION aqo; \ No newline at end of file +DROP EXTENSION aqo; diff --git a/sql/feature_subspace.sql b/sql/feature_subspace.sql index 0176a700..c9463d55 100644 --- a/sql/feature_subspace.sql +++ b/sql/feature_subspace.sql @@ -1,9 +1,9 @@ -- This test related to some issues on feature subspace calculation -CREATE EXTENSION aqo; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); SET aqo.mode = 'learn'; -SET aqo.join_threshold = 0; SET aqo.show_details = 'on'; CREATE TABLE a AS (SELECT gs AS x FROM generate_series(1,10) AS gs); @@ -41,5 +41,5 @@ JOIN aqo_data d2 ON (d1.fs <> d2.fs AND d1.fss = d2.fss) WHERE 'a'::regclass = ANY (d1.oids) AND 'b'::regclass = ANY (d1.oids) order by target; DROP TABLE a,b CASCADE; -SELECT true FROM aqo_reset(); + DROP EXTENSION aqo; diff --git a/sql/forced_stat_collection.sql b/sql/forced_stat_collection.sql index d9fac51a..cf3990fc 100644 --- a/sql/forced_stat_collection.sql +++ b/sql/forced_stat_collection.sql @@ -1,6 +1,8 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + \set citizens 1000 -SET aqo.join_threshold = 0; SET aqo.mode = 'disabled'; SET aqo.force_collect_stat = 'off'; @@ -23,7 +25,6 @@ INSERT INTO person (id,age,gender,passport) FROM (SELECT *, 14+(id % 60) AS age FROM generate_series(1, :citizens) id) AS q1 ); -CREATE EXTENSION aqo; SET aqo.force_collect_stat = 'on'; SELECT count(*) FROM person WHERE age<18; @@ -46,5 +47,5 @@ ORDER BY (cardinality_error_without_aqo); SELECT query_text FROM aqo_query_texts ORDER BY (md5(query_text)); DROP TABLE person; -SELECT 1 FROM aqo_reset(); -- Full remove of ML data before the end + DROP EXTENSION aqo; diff --git a/sql/gucs.sql b/sql/gucs.sql index 9b1bf9b8..0e948cf1 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -1,4 +1,6 @@ -CREATE EXTENSION aqo; +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); -- Utility tool. Allow to filter system-dependent strings from an explain output. CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ @@ -9,7 +11,6 @@ BEGIN END; $$ LANGUAGE PLPGSQL; -SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = true; SET compute_query_id = 'auto'; @@ -18,7 +19,7 @@ CREATE TABLE t(x int); INSERT INTO t (x) (SELECT * FROM generate_series(1, 100) AS gs); ANALYZE t; -SELECT true FROM aqo_reset(); -- Remember! DROP EXTENSION doesn't remove any AQO data gathered. +SELECT true AS success FROM aqo_reset(); -- Check AQO addons to explain (the only stable data) SELECT regexp_replace( str,'Query Identifier: -?\m\d+\M','Query Identifier: N','g') as str FROM expln(' @@ -47,7 +48,7 @@ SELECT obj_description('aqo_reset'::regproc::oid); -- Check stat reset SELECT count(*) FROM aqo_query_stat; -SELECT true FROM aqo_reset(); -- Remove one record from all tables +SELECT true AS success FROM aqo_reset(); SELECT count(*) FROM aqo_query_stat; DROP EXTENSION aqo; diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index b5e1f671..c9e59249 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -1,6 +1,9 @@ -CREATE EXTENSION aqo; -SELECT true FROM aqo_reset(); -SET aqo.join_threshold = 0; +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + +SET aqo.wide_search = 'on'; + SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; set aqo.show_hash = 'off'; @@ -136,9 +139,10 @@ FROM expln(' SELECT * FROM (A LEFT JOIN B ON A.x1 = B.y1) sc left join C on sc.x1=C.z1;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; -SELECT 1 FROM aqo_reset(); +RESET aqo.wide_search; +DROP EXTENSION aqo CASCADE; + DROP TABLE a; DROP TABLE b; DROP TABLE c; DROP FUNCTION expln; -DROP EXTENSION aqo CASCADE; diff --git a/sql/parallel_workers.sql b/sql/parallel_workers.sql index b544cf19..2cd04bc2 100644 --- a/sql/parallel_workers.sql +++ b/sql/parallel_workers.sql @@ -1,7 +1,8 @@ -- Specifically test AQO machinery for queries uses partial paths and executed -- with parallel workers. -CREATE EXTENSION aqo; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); -- Utility tool. Allow to filter system-dependent strings from explain output. CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ @@ -12,7 +13,6 @@ BEGIN END; $$ LANGUAGE PLPGSQL; -SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = true; @@ -52,7 +52,6 @@ WHERE q1.id = q2.id;') AS str WHERE str NOT LIKE '%Workers%' AND str NOT LIKE '%Sort Method%' AND str NOT LIKE '%Gather Merge%'; - RESET parallel_tuple_cost; RESET parallel_setup_cost; RESET max_parallel_workers; diff --git a/sql/plancache.sql b/sql/plancache.sql index c9aabae7..b2d1c6d6 100644 --- a/sql/plancache.sql +++ b/sql/plancache.sql @@ -1,7 +1,8 @@ -- Tests on interaction of AQO with cached plans. -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + SET aqo.mode = 'intelligent'; SET aqo.show_details = 'on'; SET aqo.show_hash = 'off'; @@ -44,5 +45,5 @@ SELECT * FROM f1(); DROP FUNCTION f1; DROP TABLE test CASCADE; -SELECT true FROM aqo_reset(); + DROP EXTENSION aqo; diff --git a/sql/relocatable.sql b/sql/relocatable.sql index 780c385e..adf20983 100644 --- a/sql/relocatable.sql +++ b/sql/relocatable.sql @@ -1,5 +1,6 @@ -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + SET aqo.mode = 'learn'; -- use this mode for unconditional learning CREATE TABLE test AS (SELECT id, 'payload' || id FROM generate_series(1,100) id); diff --git a/sql/schema.sql b/sql/schema.sql index 6f5f4454..28185710 100644 --- a/sql/schema.sql +++ b/sql/schema.sql @@ -1,4 +1,3 @@ -DROP EXTENSION IF EXISTS aqo CASCADE; DROP SCHEMA IF EXISTS test CASCADE; -- Check Zero-schema path behaviour @@ -11,7 +10,7 @@ CREATE EXTENSION aqo; -- fail CREATE SCHEMA IF NOT EXISTS test1; SET search_path TO test1, public; CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; +SELECT true AS success FROM aqo_reset(); SET aqo.mode = 'intelligent'; CREATE TABLE test (id SERIAL, data TEXT); diff --git a/sql/statement_timeout.sql b/sql/statement_timeout.sql index b0ebb6ba..43dab39e 100644 --- a/sql/statement_timeout.sql +++ b/sql/statement_timeout.sql @@ -18,56 +18,58 @@ BEGIN END LOOP; END; $$; +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + CREATE TABLE t AS SELECT * FROM generate_series(1,50) AS x; ANALYZE t; DELETE FROM t WHERE x > 5; -- Force optimizer to make overestimated prediction. -CREATE EXTENSION IF NOT EXISTS aqo; -SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'off'; SET aqo.learn_statement_timeout = 'on'; -SET statement_timeout = 100; -- [0.1s] +SET statement_timeout = 80; -- [0.1s] SELECT *, pg_sleep(0.1) FROM t; -SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); -- haven't any partial data +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -- haven't any partial data -- Don't learn because running node has smaller cardinality than an optimizer prediction -SET statement_timeout = 400; +SET statement_timeout = 350; SELECT *, pg_sleep(0.1) FROM t; -SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -- We have a real learning data. -SET statement_timeout = 8000; +SET statement_timeout = 800; SELECT *, pg_sleep(0.1) FROM t; -SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -- Force to make an underestimated prediction DELETE FROM t WHERE x > 2; ANALYZE t; INSERT INTO t (x) (SELECT * FROM generate_series(3,5) AS x); -SELECT 1 FROM aqo_reset(); +SELECT true AS success FROM aqo_reset(); -SET statement_timeout = 100; +SET statement_timeout = 80; SELECT *, pg_sleep(0.1) FROM t; -- Not learned -SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -SET statement_timeout = 500; +SET statement_timeout = 350; SELECT *, pg_sleep(0.1) FROM t; -- Learn! -SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -SET statement_timeout = 800; +SET statement_timeout = 550; SELECT *, pg_sleep(0.1) FROM t; -- Get reliable data -SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -- Interrupted query should immediately appear in aqo_data -SELECT 1 FROM aqo_reset(); +SELECT true AS success FROM aqo_reset(); SET statement_timeout = 500; SELECT count(*) FROM aqo_data; -- Must be zero SELECT x, pg_sleep(0.1) FROM t WHERE x > 0; SELECT count(*) FROM aqo_data; -- Must be one -SELECT 1 FROM aqo_reset(); +SELECT true AS success FROM aqo_reset(); DROP TABLE t; DROP EXTENSION aqo; DROP FUNCTION check_estimated_rows; diff --git a/sql/temp_tables.sql b/sql/temp_tables.sql index aba78aba..e7bc8fe5 100644 --- a/sql/temp_tables.sql +++ b/sql/temp_tables.sql @@ -1,5 +1,8 @@ -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + +SET aqo.wide_search = 'on'; SET aqo.mode = 'learn'; CREATE TEMP TABLE tt(); @@ -17,10 +20,10 @@ SELECT count(*) FROM pt AS pt1, tt AS tt1, tt AS tt2, pt AS pt2; SELECT count(*) FROM aqo_data; -- Don't bother about false negatives because of trivial query plans DROP TABLE tt; -SELECT * FROM aqo_cleanup(); +SELECT true AS success FROM aqo_cleanup(); SELECT count(*) FROM aqo_data; -- Should return the same as previous call above DROP TABLE pt; -SELECT * FROM aqo_cleanup(); +SELECT true AS success FROM aqo_cleanup(); SELECT count(*) FROM aqo_data; -- Should be 0 SELECT query_text FROM aqo_queries aq LEFT JOIN aqo_query_texts aqt ON aq.queryid = aqt.queryid @@ -67,7 +70,7 @@ SELECT * FROM check_estimated_rows(' SET aqo.mode = 'forced'; -- Now we use all fss records for each query DROP TABLE pt; -SELECT * FROM aqo_cleanup(); +SELECT true AS success FROM aqo_cleanup(); CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; CREATE TEMP TABLE ttd1 AS SELECT -(x*3) AS x, (x % 9) AS y1 FROM generate_series(1,100) AS x; @@ -91,7 +94,9 @@ SELECT * FROM check_estimated_rows(' SELECT pt.x, avg(pt.y) FROM pt,ttd1 WHERE pt.x = ttd1.x GROUP BY (pt.x); '); -- Don't use AQO for temp table because of different attname +-- Clear common parts of AQO state +RESET aqo.wide_search; +DROP EXTENSION aqo CASCADE; + DROP TABLE pt CASCADE; -SELECT 1 FROM aqo_reset(); -DROP EXTENSION aqo; DROP FUNCTION check_estimated_rows; diff --git a/sql/top_queries.sql b/sql/top_queries.sql index da3817a0..76000ac4 100755 --- a/sql/top_queries.sql +++ b/sql/top_queries.sql @@ -1,5 +1,7 @@ -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + SET aqo.mode = 'disabled'; SET aqo.force_collect_stat = 'on'; @@ -51,5 +53,4 @@ FROM aqo_cardinality_error(false) ce, aqo_query_texts aqt WHERE ce.id = aqt.queryid ORDER BY (md5(query_text)); -SELECT 1 FROM aqo_reset(); DROP EXTENSION aqo; diff --git a/sql/unsupported.sql b/sql/unsupported.sql index 808a19e1..8b36d721 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -1,4 +1,5 @@ -CREATE EXTENSION aqo; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); -- Utility tool. Allow to filter system-dependent strings from an explain output. CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ @@ -9,7 +10,6 @@ BEGIN END; $$ LANGUAGE PLPGSQL; -SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; @@ -182,7 +182,7 @@ ORDER BY (md5(query_text),error) DESC; DROP TABLE t,t1 CASCADE; -- delete all tables used in the test SELECT count(*) FROM aqo_data; -- Just to detect some changes in the logic. May some false positives really bother us here? -SELECT * FROM aqo_cleanup(); +SELECT true AS success FROM aqo_cleanup(); SELECT count(*) FROM aqo_data; -- No one row should be returned -- Look for any remaining queries in the ML storage. @@ -191,5 +191,4 @@ FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt WHERE aqt.queryid = cef.id ORDER BY (md5(query_text),error) DESC; -SELECT 1 FROM aqo_reset(); DROP EXTENSION aqo; diff --git a/sql/update_functions.sql b/sql/update_functions.sql index 84add94a..e2773978 100644 --- a/sql/update_functions.sql +++ b/sql/update_functions.sql @@ -1,3 +1,7 @@ +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + CREATE TABLE aqo_test1(a int, b int); WITH RECURSIVE t(a, b) AS ( @@ -18,9 +22,6 @@ AS ( CREATE INDEX aqo_test2_idx_a ON aqo_test2 (a); ANALYZE aqo_test2; -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; - SET aqo.mode='intelligent'; SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; @@ -61,7 +62,7 @@ CREATE TABLE aqo_queries_dump AS SELECT * FROM aqo_queries; CREATE TABLE aqo_query_stat_dump AS SELECT * FROM aqo_query_stat; CREATE TABLE aqo_data_dump AS SELECT * FROM aqo_data; -SELECT 1 FROM aqo_reset(); +SELECT true AS success FROM aqo_reset(); -- -- aqo_query_texts_update() testing. @@ -202,8 +203,8 @@ SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1}', '{1, 1}', '{1, 2, 3}'); SELECT aqo_data_update(1, 1, 1, '{{1}, {2}}', '{1}', '{1}', '{1, 2, 3}'); SET aqo.mode='disabled'; -SELECT 1 FROM aqo_reset(); -DROP EXTENSION aqo; + +DROP EXTENSION aqo CASCADE; DROP TABLE aqo_test1, aqo_test2; DROP TABLE aqo_query_texts_dump, aqo_queries_dump, aqo_query_stat_dump, aqo_data_dump; diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index 2374d83d..cb6b76de 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -20,6 +20,9 @@ my $CLIENTS = 10; my $THREADS = 10; +# Disable connection default settings, forced by PGOPTIONS in AQO Makefile +$ENV{PGOPTIONS}=""; + # Change pgbench parameters according to the environment variable. if (defined $ENV{TRANSACTIONS}) { diff --git a/t/002_pg_stat_statements_aqo.pl b/t/002_pg_stat_statements_aqo.pl index 4d8b04d7..edd20a4a 100644 --- a/t/002_pg_stat_statements_aqo.pl +++ b/t/002_pg_stat_statements_aqo.pl @@ -16,7 +16,13 @@ pg_stat_statements.track = 'none' }); my $query_id; -my ($res, $aqo_res); + +# Disable connection default settings, forced by PGOPTIONS in AQO Makefile +$ENV{PGOPTIONS}=""; + +# General purpose variables. +my $res; +my $aqo_res; my $total_classes; $node->start(); From fd02c7ef0d72a1cd7e634b2c9baa8398e2ff8812 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 1 Feb 2023 09:32:08 +0500 Subject: [PATCH 104/134] Add couple of github actions flows on each push event: - run make installcheck over an instance in different modes. - run JOB benchmark [1] on a self hosted runner. Utility scripts stores in the .github folder. Branch name is a key to define the name of suitable PostgreSQL core branch: use "stable[XX]" phrase in the name of git branch to trigger compiling and launch of this commit with REL_[XX]_STABLE branch of the core. If the branch name doesn't contain such a phrase, use master branch. TODO: ===== 1. Add 'long' JOB test (parallel strategy disabled). 2. Add JOB test which would be executed up to full convergency of learning on each query. 3. Add installchecks with reusage of existed database and the AQO extension installed (sanity checks will be definitely broken but still). 4. Additional queries [2] can be a marker for successful learning. [1] https://github.com/danolivo/jo-bench [2] https://github.com/RyanMarcus/imdb_pg_dataset --- .github/scripts/job/aqo_instance_launch.sh | 47 ++++++ .github/scripts/job/check_result.sh | 15 ++ .github/scripts/job/dump_knowledge.sh | 17 ++ .github/scripts/job/job_pass.sh | 58 +++++++ .github/scripts/job/load_imdb.sh | 5 + .github/scripts/job/set_test_conditions_1.sh | 41 +++++ .github/scripts/job/set_test_conditions_2.sh | 42 +++++ .github/scripts/job/set_test_conditions_3.sh | 42 +++++ .github/workflows/installchecks.yml | 153 ++++++++++++++++++ .github/workflows/job.yml | 157 +++++++++++++++++++ 10 files changed, 577 insertions(+) create mode 100755 .github/scripts/job/aqo_instance_launch.sh create mode 100755 .github/scripts/job/check_result.sh create mode 100755 .github/scripts/job/dump_knowledge.sh create mode 100755 .github/scripts/job/job_pass.sh create mode 100755 .github/scripts/job/load_imdb.sh create mode 100755 .github/scripts/job/set_test_conditions_1.sh create mode 100755 .github/scripts/job/set_test_conditions_2.sh create mode 100755 .github/scripts/job/set_test_conditions_3.sh create mode 100644 .github/workflows/installchecks.yml create mode 100644 .github/workflows/job.yml diff --git a/.github/scripts/job/aqo_instance_launch.sh b/.github/scripts/job/aqo_instance_launch.sh new file mode 100755 index 00000000..f43d6b8e --- /dev/null +++ b/.github/scripts/job/aqo_instance_launch.sh @@ -0,0 +1,47 @@ +#!/bin/bash +ulimit -c unlimited + +# Kill all orphan processes +pkill -U `whoami` -9 -e postgres +pkill -U `whoami` -9 -e pgbench +pkill -U `whoami` -9 -e psql + +sleep 1 + +M=`pwd`/PGDATA +U=`whoami` + +rm -rf $M || true +mkdir $M +rm -rf logfile.log || true + +export LC_ALL=C +export LANGUAGE="en_US:en" +initdb -D $M --locale=C + +# PG Version-specific settings +ver=$(pg_ctl -V | egrep -o "[0-9]." | head -1) +echo "PostgreSQL version: $ver" +if [ $ver -gt 13 ] +then + echo "compute_query_id = 'regress'" >> $M/postgresql.conf +fi + +# Speed up the 'Join Order Benchmark' test +echo "shared_buffers = 1GB" >> $M/postgresql.conf +echo "work_mem = 128MB" >> $M/postgresql.conf +echo "fsync = off" >> $M/postgresql.conf +echo "autovacuum = 'off'" >> $M/postgresql.conf + +# AQO preferences +echo "shared_preload_libraries = 'aqo, pg_stat_statements'" >> $M/postgresql.conf +echo "aqo.mode = 'disabled'" >> $M/postgresql.conf +echo "aqo.join_threshold = 0" >> $M/postgresql.conf +echo "aqo.force_collect_stat = 'off'" >> $M/postgresql.conf +echo "aqo.fs_max_items = 10000" >> $M/postgresql.conf +echo "aqo.fss_max_items = 20000" >> $M/postgresql.conf + +pg_ctl -w -D $M -l logfile.log start +createdb $U +psql -c "CREATE EXTENSION aqo;" +psql -c "CREATE EXTENSION pg_stat_statements" diff --git a/.github/scripts/job/check_result.sh b/.github/scripts/job/check_result.sh new file mode 100755 index 00000000..ab194cfc --- /dev/null +++ b/.github/scripts/job/check_result.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +# ############################################################################## +# +# +# ############################################################################## + +# Show error delta (Negative result is a signal of possible issue) +result=$(psql -t -c "SELECT count(*) FROM aqo_cardinality_error(true) c JOIN aqo_cardinality_error(false) o USING (id) WHERE (o.error - c.error) < 0") + +if [ $result -gt 0 ]; then + exit 1; +fi + +exit 0; diff --git a/.github/scripts/job/dump_knowledge.sh b/.github/scripts/job/dump_knowledge.sh new file mode 100755 index 00000000..c5cb9736 --- /dev/null +++ b/.github/scripts/job/dump_knowledge.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +# ############################################################################## +# +# Make dump of a knowledge base +# +# ############################################################################## + +psql -c "CREATE TABLE aqo_data_dump AS SELECT * FROM aqo_data;" +psql -c "CREATE TABLE aqo_queries_dump AS SELECT * FROM aqo_queries;" +psql -c "CREATE TABLE aqo_query_texts_dump AS SELECT * FROM aqo_query_texts;" +psql -c "CREATE TABLE aqo_query_stat_dump AS SELECT * FROM aqo_query_stat;" + +pg_dump --table='aqo*' -f knowledge_base.dump $PGDATABASE + +psql -c "DROP TABLE aqo_data_dump, aqo_queries_dump, aqo_query_texts_dump, aqo_query_stat_dump" + diff --git a/.github/scripts/job/job_pass.sh b/.github/scripts/job/job_pass.sh new file mode 100755 index 00000000..1ad62fbd --- /dev/null +++ b/.github/scripts/job/job_pass.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +# ############################################################################## +# +# Pass each JOB query over the DBMS instance. Use $1 to specify a number of +# iterations, if needed. +# +# Results: +# - explains.txt - explain of each query +# - job_onepass_aqo_stat.dat - short report on execution time +# - knowledge_base.dump - dump of the AQO knowledge base +# +# ############################################################################## + +echo "The Join Order Benchmark 1Pass" +echo -e "Query Number\tITER\tQuery Name\tExecution Time, ms" > report.txt +echo -e "Clear a file with explains" > explains.txt + +if [ $# -eq 0 ] +then + ITERS=1 +else + ITERS=$1 +fi + +echo "Execute JOB with the $ITERS iterations" + +filenum=1 +for file in $JOB_DIR/queries/*.sql +do + # Get filename + short_file=$(basename "$file") + + echo -n "EXPLAIN (ANALYZE, VERBOSE, FORMAT JSON) " > test.sql + cat $file >> test.sql + + for (( i=1; i<=$ITERS; i++ )) + do + result=$(psql -f test.sql) + echo -e $result >> explains.txt + exec_time=$(echo $result | sed -n 's/.*"Execution Time": \([0-9]*\.[0-9]*\).*/\1/p') + echo -e "$filenum\t$short_file\t$i\t$exec_time" >> report.txt + echo -e "$filenum\t$i\t$short_file\t$exec_time" + done +filenum=$((filenum+1)) +done + +# Show total optimizer error in the test +psql -c "SELECT sum(error) AS total_error FROM aqo_cardinality_error(false)" +psql -c "SELECT sum(error) AS total_error_aqo FROM aqo_cardinality_error(true)" + +# Show error delta (Negative result is a signal of possible issue) +psql -c " +SELECT id, (o.error - c.error) AS errdelta + FROM aqo_cardinality_error(true) c JOIN aqo_cardinality_error(false) o + USING (id) +" + diff --git a/.github/scripts/job/load_imdb.sh b/.github/scripts/job/load_imdb.sh new file mode 100755 index 00000000..3cb44fb2 --- /dev/null +++ b/.github/scripts/job/load_imdb.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +psql -f $JOB_DIR/schema.sql +psql -vdatadir="'$JOB_DIR'" -f $JOB_DIR/copy.sql + diff --git a/.github/scripts/job/set_test_conditions_1.sh b/.github/scripts/job/set_test_conditions_1.sh new file mode 100755 index 00000000..2140893d --- /dev/null +++ b/.github/scripts/job/set_test_conditions_1.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +# ############################################################################## +# +# Test conditions No.1: Quick pass in 'disabled' mode with statistics and +# forced usage of a bunch of parallel workers. +# +# - Disabled mode with a stat gathering and AQO details in explain +# - Force usage of parallel workers aggressively +# - Enable pg_stat_statements statistics +# +# ############################################################################## + +# AQO specific settings +psql -c "ALTER SYSTEM SET aqo.mode = 'disabled'" +psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" +psql -c "ALTER SYSTEM SET aqo.show_details = 'on'" +psql -c "ALTER SYSTEM SET aqo.show_hash = 'on'" + +# Core settings: force parallel workers +psql -c "ALTER SYSTEM SET max_parallel_workers_per_gather = 16" +psql -c "ALTER SYSTEM SET force_parallel_mode = 'on'" +psql -c "ALTER SYSTEM SET from_collapse_limit = 20" +psql -c "ALTER SYSTEM SET join_collapse_limit = 20" +psql -c "ALTER SYSTEM SET parallel_setup_cost = 1.0" +psql -c "ALTER SYSTEM SET parallel_tuple_cost = 0.00001" +psql -c "ALTER SYSTEM SET min_parallel_table_scan_size = 0" +psql -c "ALTER SYSTEM SET min_parallel_index_scan_size = 0" + +# pg_stat_statements +psql -c "ALTER SYSTEM SET pg_stat_statements.track = 'all'" +psql -c "ALTER SYSTEM SET pg_stat_statements.track_planning = 'on'" + +psql -c "SELECT pg_reload_conf();" + +# Enable all previously executed queries which could be disabled +psql -c " + SELECT count(*) FROM aqo_queries, LATERAL aqo_disable_class(queryid) + WHERE queryid <> 0 +" + diff --git a/.github/scripts/job/set_test_conditions_2.sh b/.github/scripts/job/set_test_conditions_2.sh new file mode 100755 index 00000000..609b9624 --- /dev/null +++ b/.github/scripts/job/set_test_conditions_2.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +# ############################################################################## +# +# Test conditions No.2: Learn mode with forced parallel workers +# +# - Disabled mode with a stat gathering and AQO details in explain +# - Force usage of parallel workers aggressively +# - Enable pg_stat_statements statistics +# +# ############################################################################## + +# AQO specific settings +psql -c "ALTER SYSTEM SET aqo.mode = 'learn'" +psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'off'" +psql -c "ALTER SYSTEM SET aqo.show_details = 'on'" +psql -c "ALTER SYSTEM SET aqo.show_hash = 'on'" +psql -c "ALTER SYSTEM SET aqo.join_threshold = 0" +psql -c "ALTER SYSTEM SET aqo.wide_search = 'off'" + +# Core settings: force parallel workers +psql -c "ALTER SYSTEM SET max_parallel_workers_per_gather = 16" +psql -c "ALTER SYSTEM SET force_parallel_mode = 'on'" +psql -c "ALTER SYSTEM SET from_collapse_limit = 20" +psql -c "ALTER SYSTEM SET join_collapse_limit = 20" +psql -c "ALTER SYSTEM SET parallel_setup_cost = 1.0" +psql -c "ALTER SYSTEM SET parallel_tuple_cost = 0.00001" +psql -c "ALTER SYSTEM SET min_parallel_table_scan_size = 0" +psql -c "ALTER SYSTEM SET min_parallel_index_scan_size = 0" + +# pg_stat_statements +psql -c "ALTER SYSTEM SET pg_stat_statements.track = 'all'" +psql -c "ALTER SYSTEM SET pg_stat_statements.track_planning = 'on'" + +psql -c "SELECT pg_reload_conf();" + +# Enable all previously executed queries which could be disabled +psql -c " + SELECT count(*) FROM aqo_queries, LATERAL aqo_enable_class(queryid) + WHERE queryid <> 0 +" + diff --git a/.github/scripts/job/set_test_conditions_3.sh b/.github/scripts/job/set_test_conditions_3.sh new file mode 100755 index 00000000..00f4dbf3 --- /dev/null +++ b/.github/scripts/job/set_test_conditions_3.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +# ############################################################################## +# +# Test conditions No.3: Freeze ML base and forced parallel workers +# +# - Disabled mode with a stat gathering and AQO details in explain +# - Force usage of parallel workers aggressively +# - Enable pg_stat_statements statistics +# +# ############################################################################## + +# AQO specific settings +psql -c "ALTER SYSTEM SET aqo.mode = 'frozen'" +psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'off'" +psql -c "ALTER SYSTEM SET aqo.show_details = 'on'" +psql -c "ALTER SYSTEM SET aqo.show_hash = 'on'" +psql -c "ALTER SYSTEM SET aqo.join_threshold = 0" +psql -c "ALTER SYSTEM SET aqo.wide_search = 'off'" + +# Core settings: force parallel workers +psql -c "ALTER SYSTEM SET max_parallel_workers_per_gather = 16" +psql -c "ALTER SYSTEM SET force_parallel_mode = 'on'" +psql -c "ALTER SYSTEM SET from_collapse_limit = 20" +psql -c "ALTER SYSTEM SET join_collapse_limit = 20" +psql -c "ALTER SYSTEM SET parallel_setup_cost = 1.0" +psql -c "ALTER SYSTEM SET parallel_tuple_cost = 0.00001" +psql -c "ALTER SYSTEM SET min_parallel_table_scan_size = 0" +psql -c "ALTER SYSTEM SET min_parallel_index_scan_size = 0" + +# pg_stat_statements +psql -c "ALTER SYSTEM SET pg_stat_statements.track = 'all'" +psql -c "ALTER SYSTEM SET pg_stat_statements.track_planning = 'on'" + +psql -c "SELECT pg_reload_conf();" + +# Enable all previously executed queries which could be disabled +psql -c " + SELECT count(*) FROM aqo_queries, LATERAL aqo_enable_class(queryid) + WHERE queryid <> 0 +" + diff --git a/.github/workflows/installchecks.yml b/.github/workflows/installchecks.yml new file mode 100644 index 00000000..aeb976e4 --- /dev/null +++ b/.github/workflows/installchecks.yml @@ -0,0 +1,153 @@ +name: "InstallChecks" + +on: + push: + +env: + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + + # Set major PostgreSQL version for all underlying steps + - name: "Extract Postgres major version number" + run: | + PG_MAJOR_VERSION=$(echo "$BRANCH_NAME" | grep --only-matching 'stable[0-9].' | grep --only-matching '[0-9].') + + # Declare PG_MAJOR_VERSION as a environment variable + echo "PG_MAJOR_VERSION=$PG_MAJOR_VERSION" >> $GITHUB_ENV + echo "CORE_BRANCH_NAME=REL_${PG_MAJOR_VERSION}_STABLE" >> $GITHUB_ENV + echo "AQO_PATCH_NAME=aqo_pg$PG_MAJOR_VERSION.patch" >> $GITHUB_ENV + - name: "Set proper names for the master case" + if: env.PG_MAJOR_VERSION == '' + run: | + echo "PG_MAJOR_VERSION=master" >> $GITHUB_ENV + echo "CORE_BRANCH_NAME=master" >> $GITHUB_ENV + echo "AQO_PATCH_NAME=aqo_master.patch" >> $GITHUB_ENV + + - name: "Preparations" + run: | + sudo apt install libipc-run-perl libxml2-utils libxml2-dev xsltproc libxslt1-dev + + echo "Deploying to production server on branch" $BRANCH_NAME + git config --global user.email "ci@postgrespro.ru" + git config --global user.name "CI PgPro admin" + git clone https://github.com/postgres/postgres.git pg + cd pg + git checkout $CORE_BRANCH_NAME + git clone https://github.com/postgrespro/aqo.git contrib/aqo + git -C contrib/aqo checkout $BRANCH_NAME + patch -p1 --no-backup-if-mismatch < contrib/aqo/$AQO_PATCH_NAME + COPT="-Werror" + CONFIGURE_OPTS="--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" + echo "CONFIGURE_OPTS=$CONFIGURE_OPTS" >> $GITHUB_ENV + echo "COPT=$COPT" >> $GITHUB_ENV + + - name: "Paths" + run: | + echo "$GITHUB_WORKSPACE/pg/contrib/aqo/.github/scripts/job" >> $GITHUB_PATH + ls -la pg/contrib/aqo/.github/scripts/job + echo "$GITHUB_WORKSPACE/pg/tmp_install/bin" >> $GITHUB_PATH + echo "LD_LIBRARY_PATH=$GITHUB_WORKSPACE/pg/tmp_install/lib" >> $GITHUB_ENV + echo "PGDATABASE=`whoami`" >> $GITHUB_ENV + echo "PGHOST=localhost" >> $GITHUB_ENV + echo "PGDATA=PGDATA" >> $GITHUB_ENV + echo "PGUSER=`whoami`" >> $GITHUB_ENV + echo "PGPORT=5432" >> $GITHUB_ENV + + - name: "Debug" + run: | + echo "paths: $PATH" + echo "PG_MAJOR_VERSION: $PG_MAJOR_VERSION, CORE_BRANCH_NAME: $CORE_BRANCH_NAME, AQO_PATCH_NAME: $AQO_PATCH_NAME, CONFIGURE_OPTS: $CONFIGURE_OPTS" + + - name: "Compilation" + run: | + cd pg + ./configure $CONFIGURE_OPTS CFLAGS="-O2" + make -j4 > /dev/null && make -j4 -C contrib > /dev/null + make install >> make.log && make -C contrib install > /dev/null + + - name: "Launch AQO instance" + run: | + cd pg + + # Launch an instance with AQO extension + aqo_instance_launch.sh + AQO_VERSION=$(psql -t -c "SELECT extversion FROM pg_extension WHERE extname='aqo'") + echo "AQO_VERSION=$AQO_VERSION" >> $GITHUB_ENV + echo "Use AQO v.$AQO_VERSION" + + # Pass installcheck in disabled mode + - name: installcheck_disabled + run: | + cd pg + psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'off'" + psql -c "SELECT pg_reload_conf()" + make installcheck-world + + - name: installcheck_disabled_forced_stat + run: | + cd pg + psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" + psql -c "SELECT pg_reload_conf()" + make installcheck-world + + - name: installcheck_frozen + run: | + cd pg + psql -c "ALTER SYSTEM SET aqo.mode = 'frozen'" + psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" + psql -c "SELECT pg_reload_conf()" + make installcheck-world + + - name: installcheck_controlled + run: | + cd pg + psql -c "ALTER SYSTEM SET aqo.mode = 'controlled'" + psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" + psql -c "SELECT pg_reload_conf()" + make installcheck-world + + - name: installcheck_learn + continue-on-error: true + run: | + cd pg + psql -c "ALTER SYSTEM SET aqo.mode = 'learn'" + psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" + psql -c "SELECT pg_reload_conf()" + learn_result=$(make -k installcheck-world) + + - name: installcheck_intelligent + continue-on-error: true + run: | + cd pg + psql -c "ALTER SYSTEM SET aqo.mode = 'intelligent'" + psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" + psql -c "SELECT pg_reload_conf()" + make -k installcheck-world + + - name: installcheck_forced + continue-on-error: true + run: | + cd pg + psql -c "ALTER SYSTEM SET aqo.mode = 'forced'" + psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" + psql -c "SELECT pg_reload_conf()" + make -k installcheck-world + + # Save Artifacts + - name: Archive artifacts + if: ${{ failure() }} + uses: actions/upload-artifact@v3 + with: + name: ${{ env.AQO_VERSION }}-${{ env.CORE_BRANCH_NAME }}-${{ env.BRANCH_NAME }}-artifacts + path: | + pg/src/test/regress/regression.diffs + pg/logfile.log + pg/contrib/aqo/tmp_check/log + retention-days: 2 + diff --git a/.github/workflows/job.yml b/.github/workflows/job.yml new file mode 100644 index 00000000..682f4b42 --- /dev/null +++ b/.github/workflows/job.yml @@ -0,0 +1,157 @@ +name: 'Join Order Benchmark' + +env: + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + +# Trigger the workflow on each push +on: push + +jobs: + AQO_Tests: + + runs-on: self-hosted + + steps: + - name: "Set common paths" + run: | + echo "$HOME/aqo/.github/scripts/job" >> $GITHUB_PATH + echo "JOB_DIR=$HOME/jo-bench" >> $GITHUB_ENV + + # PostgreSQL-related environment variables + echo "$GITHUB_WORKSPACE/pg/tmp_install/bin" >> $GITHUB_PATH + echo "LD_LIBRARY_PATH=$GITHUB_WORKSPACE/pg/tmp_install/lib" >> $GITHUB_ENV + echo "PGDATABASE=`whoami`" >> $GITHUB_ENV + echo "PGHOST=localhost" >> $GITHUB_ENV + echo "PGDATA=PGDATA" >> $GITHUB_ENV + echo "PGUSER=`whoami`" >> $GITHUB_ENV + echo "PGPORT=5432" >> $GITHUB_ENV + + # Set major PostgreSQL version for all underlying steps + - name: "Extract Postgres major version number" + run: | + PG_MAJOR_VERSION=$(echo "$BRANCH_NAME" | grep --only-matching 'stable[0-9].' | grep --only-matching '[0-9].') + + # Declare PG_MAJOR_VERSION as a environment variable + echo "PG_MAJOR_VERSION=$PG_MAJOR_VERSION" >> $GITHUB_ENV + echo "CORE_BRANCH_NAME=REL_${PG_MAJOR_VERSION}_STABLE" >> $GITHUB_ENV + echo "AQO_PATCH_NAME=aqo_pg$PG_MAJOR_VERSION.patch" >> $GITHUB_ENV + - name: "Set proper names for the master case" + if: env.PG_MAJOR_VERSION == '' + run: | + echo "PG_MAJOR_VERSION=master" >> $GITHUB_ENV + echo "CORE_BRANCH_NAME=master" >> $GITHUB_ENV + echo "AQO_PATCH_NAME=aqo_master.patch" >> $GITHUB_ENV + + # Just for debug + - name: "Print environment variables" + run: | + echo "Test data: $PG_MAJOR_VERSION; Core branch: $CORE_BRANCH_NAME, AQO patch: $AQO_PATCH_NAME" + echo "Paths: $PATH, JOB path: $JOB_DIR" + echo "PG Libs: $LD_LIBRARY_PATH" + echo "PG Environment: dbname: $PGDATABASE, host: $PGHOST, pgdata: $PGDATA, pguser: $PGUSER, pgport: $PGPORT" + + # Runner contains clone of postgres and AQO repositories. We must refresh them + - name: "Code pre-cleanup" + run: | + rm -rf pg + git -C ~/pg clean -fdx + git -C ~/pg pull + git -C ~/pg checkout $CORE_BRANCH_NAME + git -C ~/pg pull + + git -C ~/aqo clean -fdx + git -C ~/aqo pull + git -C ~/aqo checkout $BRANCH_NAME + git -C ~/aqo pull + + # Copy the codes into test folder, arrange code versions and do the patching + - name: "Prepare code directory" + run: | + cp -r ~/pg pg + cd pg + cp -r ~/aqo contrib/aqo + patch -p1 --no-backup-if-mismatch < contrib/aqo/$AQO_PATCH_NAME + + - name: "Compilation" + run: | + cd pg + export COPT=-Werror + export CONFIGURE_OPTS="--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" + ./configure $CONFIGURE_OPTS CFLAGS="-O0" + make clean > /dev/null + make -C contrib clean > /dev/null + make -j2 > /dev/null && make -j2 -C contrib > /dev/null + make install >> make.log + make -C contrib install >> make.log + make -C doc install > /dev/null + + - name: "Launch AQO instance" + run: | + cd pg + make -j2 > /dev/null && make -j2 -C contrib > /dev/null + make install > /dev/null && make -C contrib install > /dev/null + + # Launch an instance with AQO extension + aqo_instance_launch.sh + AQO_VERSION=$(psql -t -c "SELECT extversion FROM pg_extension WHERE extname='aqo'") + echo "AQO_VERSION=$AQO_VERSION" >> $GITHUB_ENV + + - name: "Load a dump of the test database" + run: | + cd pg + echo "AQO_VERSION: $AQO_VERSION" + load_imdb.sh + + # Quick pass in parallel mode with statistics + - name: "Test No.1: Gather statistics in disabled mode" + run: | + cd pg + set_test_conditions_1.sh + job_pass.sh + dump_knowledge.sh + + - name: "Archive JOB test results" + if: ${{ always() }} + uses: actions/upload-artifact@v3 + with: + name: ${{ env.AQO_VERSION }}-${{ env.CORE_BRANCH_NAME }}-${{ env.BRANCH_NAME }}-result_base_stat + path: | + pg/explains.txt + pg/report.txt + pg/knowledge_base.dump + pg/logfile.log + retention-days: 1 + + # Test No.2: Learn on all incoming queries + - name: "Test No.2: Learning stage" + run: | + cd pg + set_test_conditions_2.sh + job_pass.sh 10 + check_result.sh + + # One pass on frozen AQO data, dump knowledge base, check total error + - name: "Test No.3: Frozen execution" + run: | + cd pg + set_test_conditions_3.sh + job_pass.sh + dump_knowledge.sh + + - name: "Archive JOB test results - frozen" + if: ${{ always() }} + uses: actions/upload-artifact@v3 + with: + name: ${{ env.AQO_VERSION }}-${{ env.CORE_BRANCH_NAME }}-${{ env.BRANCH_NAME }}-result_frozen + path: | + pg/explains.txt + pg/report.txt + pg/knowledge_base.dump + pg/logfile.log + retention-days: 7 + + - name: "Cleanup" + run: | + cd pg + pg_ctl -D PGDATA stop + From c794823d6cb6875ea419480ff416e7e7914a692d Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 1 Mar 2023 08:45:57 +0500 Subject: [PATCH 105/134] Improvement of time-dependent test statement_timeout. Remember, each query can be executed longer than the timeout on an ancient machines of buildfarm. So, RESET this GUC each time when it isn't really needed for a test query. --- expected/statement_timeout.out | 11 +++++++++-- sql/statement_timeout.sql | 19 +++++++++++++++++-- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/expected/statement_timeout.out b/expected/statement_timeout.out index 39796549..1d957df7 100644 --- a/expected/statement_timeout.out +++ b/expected/statement_timeout.out @@ -35,6 +35,7 @@ SET statement_timeout = 80; -- [0.1s] SELECT *, pg_sleep(0.1) FROM t; NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout +RESET statement_timeout; SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -- haven't any partial data check_estimated_rows ---------------------- @@ -46,6 +47,7 @@ SET statement_timeout = 350; SELECT *, pg_sleep(0.1) FROM t; NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout +RESET statement_timeout; SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); check_estimated_rows ---------------------- @@ -64,6 +66,7 @@ SELECT *, pg_sleep(0.1) FROM t; 5 | (5 rows) +RESET statement_timeout; SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); check_estimated_rows ---------------------- @@ -84,6 +87,7 @@ SET statement_timeout = 80; SELECT *, pg_sleep(0.1) FROM t; -- Not learned NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout +RESET statement_timeout; SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); check_estimated_rows ---------------------- @@ -94,6 +98,7 @@ SET statement_timeout = 350; SELECT *, pg_sleep(0.1) FROM t; -- Learn! NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout +RESET statement_timeout; SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); check_estimated_rows ---------------------- @@ -111,6 +116,7 @@ SELECT *, pg_sleep(0.1) FROM t; -- Get reliable data 5 | (5 rows) +RESET statement_timeout; SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); check_estimated_rows ---------------------- @@ -134,18 +140,19 @@ SELECT count(*) FROM aqo_data; -- Must be zero SELECT x, pg_sleep(0.1) FROM t WHERE x > 0; NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout +RESET statement_timeout; SELECT count(*) FROM aqo_data; -- Must be one count ------- 1 (1 row) +DROP TABLE t; +DROP FUNCTION check_estimated_rows; SELECT true AS success FROM aqo_reset(); success --------- t (1 row) -DROP TABLE t; DROP EXTENSION aqo; -DROP FUNCTION check_estimated_rows; diff --git a/sql/statement_timeout.sql b/sql/statement_timeout.sql index 43dab39e..4ca9171f 100644 --- a/sql/statement_timeout.sql +++ b/sql/statement_timeout.sql @@ -32,16 +32,22 @@ SET aqo.learn_statement_timeout = 'on'; SET statement_timeout = 80; -- [0.1s] SELECT *, pg_sleep(0.1) FROM t; + +RESET statement_timeout; SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -- haven't any partial data -- Don't learn because running node has smaller cardinality than an optimizer prediction SET statement_timeout = 350; SELECT *, pg_sleep(0.1) FROM t; + +RESET statement_timeout; SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -- We have a real learning data. SET statement_timeout = 800; SELECT *, pg_sleep(0.1) FROM t; + +RESET statement_timeout; SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -- Force to make an underestimated prediction @@ -52,14 +58,20 @@ SELECT true AS success FROM aqo_reset(); SET statement_timeout = 80; SELECT *, pg_sleep(0.1) FROM t; -- Not learned + +RESET statement_timeout; SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); SET statement_timeout = 350; SELECT *, pg_sleep(0.1) FROM t; -- Learn! + +RESET statement_timeout; SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); SET statement_timeout = 550; SELECT *, pg_sleep(0.1) FROM t; -- Get reliable data + +RESET statement_timeout; SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -- Interrupted query should immediately appear in aqo_data @@ -67,9 +79,12 @@ SELECT true AS success FROM aqo_reset(); SET statement_timeout = 500; SELECT count(*) FROM aqo_data; -- Must be zero SELECT x, pg_sleep(0.1) FROM t WHERE x > 0; + +RESET statement_timeout; SELECT count(*) FROM aqo_data; -- Must be one -SELECT true AS success FROM aqo_reset(); DROP TABLE t; -DROP EXTENSION aqo; DROP FUNCTION check_estimated_rows; + +SELECT true AS success FROM aqo_reset(); +DROP EXTENSION aqo; From eb210d2ae8896e6068e71af62a7f64b14c5baf0e Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 1 Mar 2023 09:02:55 +0500 Subject: [PATCH 106/134] Improve basic CI and installcheck CI code. --- .github/workflows/c-cpp.yml | 4 +- .github/workflows/installchecks.yml | 90 +++++++++++++++-------------- 2 files changed, 47 insertions(+), 47 deletions(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 0123a181..27f911cb 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -15,7 +15,6 @@ jobs: - uses: actions/checkout@v3 - name: "Define PostreSQL major version" run: | - echo "$(ls -la)" patch_name=$(ls aqo_*.patch|tail -1) echo "CORE_PATCH_NAME=$patch_name" >> $GITHUB_ENV @@ -49,7 +48,6 @@ jobs: run: | git clone -b $PG_BRANCH --depth=1 --single-branch https://github.com/postgres/postgres.git $GITHUB_WORKSPACE/../pg cd $GITHUB_WORKSPACE/../pg - ls -la cp -r ../aqo contrib/aqo patch -p1 --no-backup-if-mismatch < contrib/aqo/$CORE_PATCH_NAME @@ -70,7 +68,7 @@ jobs: env CLIENTS=50 THREADS=50 make -C contrib/aqo check - name: Archive artifacts - if: ${{ always() }} + if: ${{ failure() }} uses: actions/upload-artifact@v3 with: name: make_check_logs diff --git a/.github/workflows/installchecks.yml b/.github/workflows/installchecks.yml index aeb976e4..94e38d6c 100644 --- a/.github/workflows/installchecks.yml +++ b/.github/workflows/installchecks.yml @@ -14,44 +14,48 @@ jobs: steps: # Set major PostgreSQL version for all underlying steps - - name: "Extract Postgres major version number" + - uses: actions/checkout@v3 + - name: "Define PostreSQL major version" run: | - PG_MAJOR_VERSION=$(echo "$BRANCH_NAME" | grep --only-matching 'stable[0-9].' | grep --only-matching '[0-9].') + patch_name=$(ls aqo_*.patch|tail -1) + echo "CORE_PATCH_NAME=$patch_name" >> $GITHUB_ENV - # Declare PG_MAJOR_VERSION as a environment variable - echo "PG_MAJOR_VERSION=$PG_MAJOR_VERSION" >> $GITHUB_ENV - echo "CORE_BRANCH_NAME=REL_${PG_MAJOR_VERSION}_STABLE" >> $GITHUB_ENV - echo "AQO_PATCH_NAME=aqo_pg$PG_MAJOR_VERSION.patch" >> $GITHUB_ENV - - name: "Set proper names for the master case" + # we can get number, otherwise set up master + vers_number=$(echo "$patch_name"|tr -d -c 0-9) + echo "PG_MAJOR_VERSION=$vers_number" >> $GITHUB_ENV + + branch_name="REL_${vers_number}_STABLE" + echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV + + - name: "Set master branch name, if needed" if: env.PG_MAJOR_VERSION == '' run: | - echo "PG_MAJOR_VERSION=master" >> $GITHUB_ENV - echo "CORE_BRANCH_NAME=master" >> $GITHUB_ENV - echo "AQO_PATCH_NAME=aqo_master.patch" >> $GITHUB_ENV + branch_name="master" + echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV - - name: "Preparations" + - name: "Environment (debug output)" + if: ${{ always() }} run: | - sudo apt install libipc-run-perl libxml2-utils libxml2-dev xsltproc libxslt1-dev - - echo "Deploying to production server on branch" $BRANCH_NAME + echo "Use PostgreSQL branch $PG_BRANCH (patch: $CORE_PATCH_NAME)" + echo "Deploying to production server on branch" $BRANCH_NAME "(PG $PG_BRANCH)" git config --global user.email "ci@postgrespro.ru" git config --global user.name "CI PgPro admin" - git clone https://github.com/postgres/postgres.git pg - cd pg - git checkout $CORE_BRANCH_NAME - git clone https://github.com/postgrespro/aqo.git contrib/aqo - git -C contrib/aqo checkout $BRANCH_NAME - patch -p1 --no-backup-if-mismatch < contrib/aqo/$AQO_PATCH_NAME - COPT="-Werror" - CONFIGURE_OPTS="--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" - echo "CONFIGURE_OPTS=$CONFIGURE_OPTS" >> $GITHUB_ENV - echo "COPT=$COPT" >> $GITHUB_ENV + + - name: "Prepare PG directory" + run: | + sudo apt install libipc-run-perl libxml2-utils libxml2-dev xsltproc libxslt1-dev + git clone -b $PG_BRANCH --depth=1 --single-branch https://github.com/postgres/postgres.git $GITHUB_WORKSPACE/../pg + cd $GITHUB_WORKSPACE/../pg + cp -r ../aqo contrib/aqo + patch -p1 --no-backup-if-mismatch < contrib/aqo/$CORE_PATCH_NAME - name: "Paths" run: | - echo "$GITHUB_WORKSPACE/pg/contrib/aqo/.github/scripts/job" >> $GITHUB_PATH - ls -la pg/contrib/aqo/.github/scripts/job - echo "$GITHUB_WORKSPACE/pg/tmp_install/bin" >> $GITHUB_PATH + cd $GITHUB_WORKSPACE/../pg + echo "COPT=-Werror" >> $GITHUB_ENV + echo "CONFIGURE_OPTS=--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" >> $GITHUB_ENV + echo "$GITHUB_WORKSPACE/../pg/tmp_install/bin" >> $GITHUB_PATH + echo "$GITHUB_WORKSPACE/../pg/contrib/aqo/.github/scripts/job" >> $GITHUB_PATH echo "LD_LIBRARY_PATH=$GITHUB_WORKSPACE/pg/tmp_install/lib" >> $GITHUB_ENV echo "PGDATABASE=`whoami`" >> $GITHUB_ENV echo "PGHOST=localhost" >> $GITHUB_ENV @@ -59,21 +63,19 @@ jobs: echo "PGUSER=`whoami`" >> $GITHUB_ENV echo "PGPORT=5432" >> $GITHUB_ENV - - name: "Debug" - run: | - echo "paths: $PATH" - echo "PG_MAJOR_VERSION: $PG_MAJOR_VERSION, CORE_BRANCH_NAME: $CORE_BRANCH_NAME, AQO_PATCH_NAME: $AQO_PATCH_NAME, CONFIGURE_OPTS: $CONFIGURE_OPTS" - - name: "Compilation" run: | - cd pg - ./configure $CONFIGURE_OPTS CFLAGS="-O2" + cd $GITHUB_WORKSPACE/../pg + echo "paths: $PATH" + echo "COPT: $COPT" + echo "CONFIGURE_OPTS: $CONFIGURE_OPTS" + ./configure $CONFIGURE_OPTS CFLAGS="-O2" > /dev/null make -j4 > /dev/null && make -j4 -C contrib > /dev/null make install >> make.log && make -C contrib install > /dev/null - name: "Launch AQO instance" run: | - cd pg + cd $GITHUB_WORKSPACE/../pg # Launch an instance with AQO extension aqo_instance_launch.sh @@ -84,21 +86,21 @@ jobs: # Pass installcheck in disabled mode - name: installcheck_disabled run: | - cd pg + cd $GITHUB_WORKSPACE/../pg psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'off'" psql -c "SELECT pg_reload_conf()" make installcheck-world - name: installcheck_disabled_forced_stat run: | - cd pg + cd $GITHUB_WORKSPACE/../pg psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" make installcheck-world - name: installcheck_frozen run: | - cd pg + cd $GITHUB_WORKSPACE/../pg psql -c "ALTER SYSTEM SET aqo.mode = 'frozen'" psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" @@ -106,7 +108,7 @@ jobs: - name: installcheck_controlled run: | - cd pg + cd $GITHUB_WORKSPACE/../pg psql -c "ALTER SYSTEM SET aqo.mode = 'controlled'" psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" @@ -115,7 +117,7 @@ jobs: - name: installcheck_learn continue-on-error: true run: | - cd pg + cd $GITHUB_WORKSPACE/../pg psql -c "ALTER SYSTEM SET aqo.mode = 'learn'" psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" @@ -124,7 +126,7 @@ jobs: - name: installcheck_intelligent continue-on-error: true run: | - cd pg + cd $GITHUB_WORKSPACE/../pg psql -c "ALTER SYSTEM SET aqo.mode = 'intelligent'" psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" @@ -133,7 +135,7 @@ jobs: - name: installcheck_forced continue-on-error: true run: | - cd pg + cd $GITHUB_WORKSPACE/../pg psql -c "ALTER SYSTEM SET aqo.mode = 'forced'" psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" @@ -141,10 +143,10 @@ jobs: # Save Artifacts - name: Archive artifacts - if: ${{ failure() }} + if: ${{ always() }} uses: actions/upload-artifact@v3 with: - name: ${{ env.AQO_VERSION }}-${{ env.CORE_BRANCH_NAME }}-${{ env.BRANCH_NAME }}-artifacts + name: ${{ env.AQO_VERSION }}-${{ env.PG_BRANCH }}-${{ env.CORE_PATCH_NAME }}-artifacts path: | pg/src/test/regress/regression.diffs pg/logfile.log From 667f644ea5b4fa347cd2eefad75946c402c7beee Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Thu, 9 Mar 2023 13:20:02 +0500 Subject: [PATCH 107/134] CI Refactoring: Unify code of all three CI workflows --- .github/workflows/c-cpp.yml | 60 ++++++----- .github/workflows/installchecks.yml | 93 +++++++++-------- .github/workflows/job.yml | 150 +++++++++++++++------------- 3 files changed, 170 insertions(+), 133 deletions(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 27f911cb..74e90277 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -1,11 +1,15 @@ name: 'AQO basic CI' -on: - pull_request: - env: + # Use it just for a report BRANCH_NAME: ${{ github.head_ref || github.ref_name }} +# Trigger it each timeon push or pull request. Honestly, it will be redundant +# most of the time, but external pull-request checks don't be missed out. +on: + push: + pull_request: + jobs: build: @@ -15,6 +19,11 @@ jobs: - uses: actions/checkout@v3 - name: "Define PostreSQL major version" run: | + echo "The action workflow is triggered by the $BRANCH_NAME" + sudo apt install libipc-run-perl + git config --global user.email "ci@postgrespro.ru" + git config --global user.name "CI PgPro admin" + patch_name=$(ls aqo_*.patch|tail -1) echo "CORE_PATCH_NAME=$patch_name" >> $GITHUB_ENV @@ -24,38 +33,43 @@ jobs: branch_name="REL_${vers_number}_STABLE" echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV - - echo "COPT=-Werror" >> $GITHUB_ENV - echo "CONFIGURE_OPTS=--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" >> $GITHUB_ENV - - name: "Set master branch name, if needed" if: env.PG_MAJOR_VERSION == '' run: | branch_name="master" echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV - - name: "Environment (debug output)" - if: ${{ always() }} + # Create workspace directory and environment variable. + # It is the second step because on the first we define versions and branches + - name: "Initial dir" run: | - echo "Use PostgreSQL branch $PG_BRANCH (patch: $CORE_PATCH_NAME)" - echo "COPT: $COPT" - echo "CONFIGURE_OPTS: $CONFIGURE_OPTS" - echo "Deploying to production server on branch" $BRANCH_NAME "(PG $PG_BRANCH)" - git config --global user.email "ci@postgrespro.ru" - git config --global user.name "CI PgPro admin" + git clone -b $PG_BRANCH --depth=1 --single-branch https://github.com/postgres/postgres.git $GITHUB_WORKSPACE/../pg + + # Invent variable with full path to PG directory just because github + # actions don't like relative paths ... + cd $GITHUB_WORKSPACE/../pg + echo PG_DIR=`pwd` >> $GITHUB_ENV - name: "Prepare PG directory" run: | - git clone -b $PG_BRANCH --depth=1 --single-branch https://github.com/postgres/postgres.git $GITHUB_WORKSPACE/../pg - cd $GITHUB_WORKSPACE/../pg + cd $PG_DIR cp -r ../aqo contrib/aqo patch -p1 --no-backup-if-mismatch < contrib/aqo/$CORE_PATCH_NAME + echo "COPT=-Werror" >> $GITHUB_ENV + echo "CONFIGURE_OPTS=--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" >> $GITHUB_ENV - - name: "make check" + # Just for debug + - name: "Environment (debug output)" + if: ${{ always() }} run: | - sudo apt install libipc-run-perl + echo "PG_MAJOR_VERSION: $PG_MAJOR_VERSION" + echo "PG_DIR: $PG_DIR" + echo "PG_BRANCH: $PG_BRANCH" + echo "CORE_PATCH_NAME: $CORE_PATCH_NAME" - cd $GITHUB_WORKSPACE/../pg + - name: "make check" + run: | + cd $PG_DIR ./configure $CONFIGURE_OPTS CFLAGS="-O2" > /dev/null make -j4 > /dev/null && make -j4 -C contrib > /dev/null env CLIENTS=50 THREADS=50 make -C contrib/aqo check @@ -73,7 +87,7 @@ jobs: with: name: make_check_logs path: | - /home/runner/work/aqo/pg/contrib/aqo/regression.diffs - /home/runner/work/aqo/pg/contrib/aqo/log - /home/runner/work/aqo/pg/contrib/aqo/tmp_check/log + ${{ env.PG_DIR }}/contrib/aqo/regression.diffs + ${{ env.PG_DIR }}/contrib/aqo/log + ${{ env.PG_DIR }}/contrib/aqo/tmp_check/log retention-days: 7 diff --git a/.github/workflows/installchecks.yml b/.github/workflows/installchecks.yml index 94e38d6c..075034a0 100644 --- a/.github/workflows/installchecks.yml +++ b/.github/workflows/installchecks.yml @@ -1,22 +1,29 @@ name: "InstallChecks" -on: - push: - env: + # Use it just for a report BRANCH_NAME: ${{ github.head_ref || github.ref_name }} +# Trigger it each timeon push or pull request. Honestly, it will be redundant +# most of the time, but external pull-request checks don't be missed out. +on: + push: + pull_request: + jobs: build: runs-on: ubuntu-latest steps: - - # Set major PostgreSQL version for all underlying steps - uses: actions/checkout@v3 - - name: "Define PostreSQL major version" + - name: "Define PostreSQL major version and set basic environment" run: | + echo "The action workflow is triggered by the $BRANCH_NAME" + sudo apt install libipc-run-perl + git config --global user.email "ci@postgrespro.ru" + git config --global user.name "CI PgPro admin" + patch_name=$(ls aqo_*.patch|tail -1) echo "CORE_PATCH_NAME=$patch_name" >> $GITHUB_ENV @@ -26,47 +33,51 @@ jobs: branch_name="REL_${vers_number}_STABLE" echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV - - name: "Set master branch name, if needed" if: env.PG_MAJOR_VERSION == '' run: | branch_name="master" echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV - - name: "Environment (debug output)" - if: ${{ always() }} + # Create workspace directory and environment variable. + # It is the second step because on the first we define versions and branches + - name: "Initial dir" run: | - echo "Use PostgreSQL branch $PG_BRANCH (patch: $CORE_PATCH_NAME)" - echo "Deploying to production server on branch" $BRANCH_NAME "(PG $PG_BRANCH)" - git config --global user.email "ci@postgrespro.ru" - git config --global user.name "CI PgPro admin" + git clone -b $PG_BRANCH --depth=1 --single-branch https://github.com/postgres/postgres.git $GITHUB_WORKSPACE/../pg + + # Invent variable with full path to PG directory just because github + # actions don't like relative paths ... + cd $GITHUB_WORKSPACE/../pg + echo PG_DIR=`pwd` >> $GITHUB_ENV - name: "Prepare PG directory" run: | - sudo apt install libipc-run-perl libxml2-utils libxml2-dev xsltproc libxslt1-dev - git clone -b $PG_BRANCH --depth=1 --single-branch https://github.com/postgres/postgres.git $GITHUB_WORKSPACE/../pg - cd $GITHUB_WORKSPACE/../pg + cd $PG_DIR cp -r ../aqo contrib/aqo patch -p1 --no-backup-if-mismatch < contrib/aqo/$CORE_PATCH_NAME - - - name: "Paths" - run: | - cd $GITHUB_WORKSPACE/../pg echo "COPT=-Werror" >> $GITHUB_ENV echo "CONFIGURE_OPTS=--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" >> $GITHUB_ENV - echo "$GITHUB_WORKSPACE/../pg/tmp_install/bin" >> $GITHUB_PATH - echo "$GITHUB_WORKSPACE/../pg/contrib/aqo/.github/scripts/job" >> $GITHUB_PATH - echo "LD_LIBRARY_PATH=$GITHUB_WORKSPACE/pg/tmp_install/lib" >> $GITHUB_ENV - echo "PGDATABASE=`whoami`" >> $GITHUB_ENV - echo "PGHOST=localhost" >> $GITHUB_ENV - echo "PGDATA=PGDATA" >> $GITHUB_ENV - echo "PGUSER=`whoami`" >> $GITHUB_ENV - echo "PGPORT=5432" >> $GITHUB_ENV + + # Instance-related environment + echo "$PG_DIR/tmp_install/bin" >> $GITHUB_PATH + echo "LD_LIBRARY_PATH=$PG_DIR/tmp_install/lib" >> $GITHUB_ENV + echo `pwd`/contrib/aqo/.github/scripts/job >> $GITHUB_PATH + + # Just for debug + - name: "Environment (debug output)" + if: ${{ always() }} + run: | + echo "PG_MAJOR_VERSION: $PG_MAJOR_VERSION" + echo "PG_DIR: $PG_DIR" + echo "PG_BRANCH: $PG_BRANCH" + echo "CORE_PATCH_NAME: $CORE_PATCH_NAME" + # See these paths to understand correctness of the instance initialization + echo "PATHs: $PATH" + echo "PG Libs: $LD_LIBRARY_PATH" - name: "Compilation" run: | - cd $GITHUB_WORKSPACE/../pg - echo "paths: $PATH" + cd $PG_DIR echo "COPT: $COPT" echo "CONFIGURE_OPTS: $CONFIGURE_OPTS" ./configure $CONFIGURE_OPTS CFLAGS="-O2" > /dev/null @@ -75,7 +86,7 @@ jobs: - name: "Launch AQO instance" run: | - cd $GITHUB_WORKSPACE/../pg + cd $PG_DIR # Launch an instance with AQO extension aqo_instance_launch.sh @@ -86,21 +97,21 @@ jobs: # Pass installcheck in disabled mode - name: installcheck_disabled run: | - cd $GITHUB_WORKSPACE/../pg + cd $PG_DIR psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'off'" psql -c "SELECT pg_reload_conf()" make installcheck-world - name: installcheck_disabled_forced_stat run: | - cd $GITHUB_WORKSPACE/../pg + cd $PG_DIR psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" make installcheck-world - name: installcheck_frozen run: | - cd $GITHUB_WORKSPACE/../pg + cd $PG_DIR psql -c "ALTER SYSTEM SET aqo.mode = 'frozen'" psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" @@ -108,7 +119,7 @@ jobs: - name: installcheck_controlled run: | - cd $GITHUB_WORKSPACE/../pg + cd $PG_DIR psql -c "ALTER SYSTEM SET aqo.mode = 'controlled'" psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" @@ -117,7 +128,7 @@ jobs: - name: installcheck_learn continue-on-error: true run: | - cd $GITHUB_WORKSPACE/../pg + cd $PG_DIR psql -c "ALTER SYSTEM SET aqo.mode = 'learn'" psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" @@ -126,7 +137,7 @@ jobs: - name: installcheck_intelligent continue-on-error: true run: | - cd $GITHUB_WORKSPACE/../pg + cd $PG_DIR psql -c "ALTER SYSTEM SET aqo.mode = 'intelligent'" psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" @@ -135,7 +146,7 @@ jobs: - name: installcheck_forced continue-on-error: true run: | - cd $GITHUB_WORKSPACE/../pg + cd $PG_DIR psql -c "ALTER SYSTEM SET aqo.mode = 'forced'" psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" @@ -148,8 +159,8 @@ jobs: with: name: ${{ env.AQO_VERSION }}-${{ env.PG_BRANCH }}-${{ env.CORE_PATCH_NAME }}-artifacts path: | - pg/src/test/regress/regression.diffs - pg/logfile.log - pg/contrib/aqo/tmp_check/log + ${{ env.PG_DIR }}/src/test/regress/regression.diffs + ${{ env.PG_DIR }}/logfile.log + ${{ env.PG_DIR }}/contrib/aqo/tmp_check/log retention-days: 2 diff --git a/.github/workflows/job.yml b/.github/workflows/job.yml index 682f4b42..817f0047 100644 --- a/.github/workflows/job.yml +++ b/.github/workflows/job.yml @@ -1,82 +1,94 @@ name: 'Join Order Benchmark' env: + # Use it just for a report BRANCH_NAME: ${{ github.head_ref || github.ref_name }} -# Trigger the workflow on each push -on: push +# Trigger the workflow on each release or on a manual action +on: + workflow_dispatch: + release: jobs: - AQO_Tests: + AQO_JOB_Benchmark: runs-on: self-hosted steps: - - name: "Set common paths" + - uses: actions/checkout@v3 + - name: "Define PostreSQL major version and set basic environment" run: | - echo "$HOME/aqo/.github/scripts/job" >> $GITHUB_PATH - echo "JOB_DIR=$HOME/jo-bench" >> $GITHUB_ENV + echo "The action workflow is triggered by the $BRANCH_NAME" + + # Cleanup, because of self-hosted runner + rm -rf $GITHUB_WORKSPACE/../pg + + patch_name=$(ls aqo_*.patch|tail -1) + echo "CORE_PATCH_NAME=$patch_name" >> $GITHUB_ENV + + # we can get number, otherwise set up master + vers_number=$(echo "$patch_name"|tr -d -c 0-9) + echo "PG_MAJOR_VERSION=$vers_number" >> $GITHUB_ENV + + branch_name="REL_${vers_number}_STABLE" + echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV + - name: "Set master branch name, if needed" + if: env.PG_MAJOR_VERSION == '' + run: | + branch_name="master" + echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV + + # Create workspace directory and environment variable. + # It is the second step because on the first we define versions and branches + - name: "Initial dir" + run: | + git clone -b $PG_BRANCH --depth=1 --single-branch https://github.com/postgres/postgres.git $GITHUB_WORKSPACE/../pg - # PostgreSQL-related environment variables - echo "$GITHUB_WORKSPACE/pg/tmp_install/bin" >> $GITHUB_PATH - echo "LD_LIBRARY_PATH=$GITHUB_WORKSPACE/pg/tmp_install/lib" >> $GITHUB_ENV + # Invent variable with full path to PG directory just because github + # actions don't like relative paths ... + cd $GITHUB_WORKSPACE/../pg + echo PG_DIR=`pwd` >> $GITHUB_ENV + + - name: "Prepare PG directory" + run: | + cd $PG_DIR + cp -r ../aqo contrib/aqo + patch -p1 --no-backup-if-mismatch < contrib/aqo/$CORE_PATCH_NAME + echo "COPT=-Werror" >> $GITHUB_ENV + echo "CONFIGURE_OPTS=--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" >> $GITHUB_ENV + + # Instance-related environment + echo "$PG_DIR/tmp_install/bin" >> $GITHUB_PATH + echo "LD_LIBRARY_PATH=$PG_DIR/tmp_install/lib" >> $GITHUB_ENV + echo `pwd`/contrib/aqo/.github/scripts/job >> $GITHUB_PATH + + # JOB-specific environment + echo "JOB_DIR=$HOME/jo-bench" >> $GITHUB_ENV echo "PGDATABASE=`whoami`" >> $GITHUB_ENV echo "PGHOST=localhost" >> $GITHUB_ENV echo "PGDATA=PGDATA" >> $GITHUB_ENV echo "PGUSER=`whoami`" >> $GITHUB_ENV echo "PGPORT=5432" >> $GITHUB_ENV - # Set major PostgreSQL version for all underlying steps - - name: "Extract Postgres major version number" - run: | - PG_MAJOR_VERSION=$(echo "$BRANCH_NAME" | grep --only-matching 'stable[0-9].' | grep --only-matching '[0-9].') - - # Declare PG_MAJOR_VERSION as a environment variable - echo "PG_MAJOR_VERSION=$PG_MAJOR_VERSION" >> $GITHUB_ENV - echo "CORE_BRANCH_NAME=REL_${PG_MAJOR_VERSION}_STABLE" >> $GITHUB_ENV - echo "AQO_PATCH_NAME=aqo_pg$PG_MAJOR_VERSION.patch" >> $GITHUB_ENV - - name: "Set proper names for the master case" - if: env.PG_MAJOR_VERSION == '' - run: | - echo "PG_MAJOR_VERSION=master" >> $GITHUB_ENV - echo "CORE_BRANCH_NAME=master" >> $GITHUB_ENV - echo "AQO_PATCH_NAME=aqo_master.patch" >> $GITHUB_ENV - # Just for debug - - name: "Print environment variables" + - name: "Environment (debug output)" + if: ${{ always() }} run: | - echo "Test data: $PG_MAJOR_VERSION; Core branch: $CORE_BRANCH_NAME, AQO patch: $AQO_PATCH_NAME" - echo "Paths: $PATH, JOB path: $JOB_DIR" + echo "PG_MAJOR_VERSION: $PG_MAJOR_VERSION" + echo "PG_DIR: $PG_DIR" + echo "PG_BRANCH: $PG_BRANCH" + echo "CORE_PATCH_NAME: $CORE_PATCH_NAME" + # See these paths to understand correctness of the instance initialization + echo "PATHs: $PATH" echo "PG Libs: $LD_LIBRARY_PATH" - echo "PG Environment: dbname: $PGDATABASE, host: $PGHOST, pgdata: $PGDATA, pguser: $PGUSER, pgport: $PGPORT" - # Runner contains clone of postgres and AQO repositories. We must refresh them - - name: "Code pre-cleanup" - run: | - rm -rf pg - git -C ~/pg clean -fdx - git -C ~/pg pull - git -C ~/pg checkout $CORE_BRANCH_NAME - git -C ~/pg pull - - git -C ~/aqo clean -fdx - git -C ~/aqo pull - git -C ~/aqo checkout $BRANCH_NAME - git -C ~/aqo pull - - # Copy the codes into test folder, arrange code versions and do the patching - - name: "Prepare code directory" - run: | - cp -r ~/pg pg - cd pg - cp -r ~/aqo contrib/aqo - patch -p1 --no-backup-if-mismatch < contrib/aqo/$AQO_PATCH_NAME + # JOB-specific environment variable + echo "JOB path: $JOB_DIR" + echo "PG Environment: dbname: $PGDATABASE, host: $PGHOST, pgdata: $PGDATA, pguser: $PGUSER, pgport: $PGPORT" - name: "Compilation" run: | - cd pg - export COPT=-Werror - export CONFIGURE_OPTS="--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" + cd $PG_DIR ./configure $CONFIGURE_OPTS CFLAGS="-O0" make clean > /dev/null make -C contrib clean > /dev/null @@ -87,9 +99,7 @@ jobs: - name: "Launch AQO instance" run: | - cd pg - make -j2 > /dev/null && make -j2 -C contrib > /dev/null - make install > /dev/null && make -C contrib install > /dev/null + cd $PG_DIR # Launch an instance with AQO extension aqo_instance_launch.sh @@ -98,14 +108,14 @@ jobs: - name: "Load a dump of the test database" run: | - cd pg + cd $PG_DIR echo "AQO_VERSION: $AQO_VERSION" load_imdb.sh # Quick pass in parallel mode with statistics - name: "Test No.1: Gather statistics in disabled mode" run: | - cd pg + cd $PG_DIR set_test_conditions_1.sh job_pass.sh dump_knowledge.sh @@ -116,16 +126,17 @@ jobs: with: name: ${{ env.AQO_VERSION }}-${{ env.CORE_BRANCH_NAME }}-${{ env.BRANCH_NAME }}-result_base_stat path: | - pg/explains.txt - pg/report.txt - pg/knowledge_base.dump - pg/logfile.log + # Relative paths not allowed ... + ${{ env.PG_DIR }}/explains.txt + ${{ env.PG_DIR }}/report.txt + ${{ env.PG_DIR }}/knowledge_base.dump + ${{ env.PG_DIR }}/logfile.log retention-days: 1 # Test No.2: Learn on all incoming queries - name: "Test No.2: Learning stage" run: | - cd pg + cd $PG_DIR set_test_conditions_2.sh job_pass.sh 10 check_result.sh @@ -133,7 +144,7 @@ jobs: # One pass on frozen AQO data, dump knowledge base, check total error - name: "Test No.3: Frozen execution" run: | - cd pg + cd $PG_DIR set_test_conditions_3.sh job_pass.sh dump_knowledge.sh @@ -144,14 +155,15 @@ jobs: with: name: ${{ env.AQO_VERSION }}-${{ env.CORE_BRANCH_NAME }}-${{ env.BRANCH_NAME }}-result_frozen path: | - pg/explains.txt - pg/report.txt - pg/knowledge_base.dump - pg/logfile.log + # Relative paths not allowed ... + ${{ env.PG_DIR }}/explains.txt + ${{ env.PG_DIR }}/report.txt + ${{ env.PG_DIR }}/knowledge_base.dump + ${{ env.PG_DIR }}/logfile.log retention-days: 7 - name: "Cleanup" run: | - cd pg + cd $PG_DIR pg_ctl -D PGDATA stop From ec03b4a925403907dfecc4187ffb5e04703208ae Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 30 Mar 2023 08:43:12 +0500 Subject: [PATCH 108/134] Add specific initial script for AQO 1.6. It mostly caused by desire of reducing number of failures 001_pgbench.pl test on WINDOWS OSes (it is related to speed of file descriptor allocations in the test, where we CREATE/DROP extensions competitively by several threads. Also, the aqo_CVE-2020-14350 test is corrected. --- Makefile | 2 +- aqo--1.6.sql | 210 ++++++++++++++++++++++++++++++++ expected/aqo_CVE-2020-14350.out | 138 +++++++-------------- sql/aqo_CVE-2020-14350.sql | 104 +++++----------- 4 files changed, 282 insertions(+), 172 deletions(-) create mode 100644 aqo--1.6.sql diff --git a/Makefile b/Makefile index ce9d00ba..1da2994c 100755 --- a/Makefile +++ b/Makefile @@ -30,7 +30,7 @@ EXTRA_INSTALL = contrib/postgres_fdw contrib/pg_stat_statements DATA = aqo--1.0.sql aqo--1.0--1.1.sql aqo--1.1--1.2.sql aqo--1.2.sql \ aqo--1.2--1.3.sql aqo--1.3--1.4.sql aqo--1.4--1.5.sql \ - aqo--1.5--1.6.sql + aqo--1.5--1.6.sql aqo--1.6.sql ifdef USE_PGXS PG_CONFIG ?= pg_config diff --git a/aqo--1.6.sql b/aqo--1.6.sql new file mode 100644 index 00000000..bb44cf22 --- /dev/null +++ b/aqo--1.6.sql @@ -0,0 +1,210 @@ +/* contrib/aqo/aqo--1.6.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION aqo" to load this file. \quit + +-- +-- Get cardinality error of queries the last time they were executed. +-- IN: +-- controlled - show queries executed under a control of AQO (true); +-- executed without an AQO control, but AQO has a stat on the query (false). +-- +-- OUT: +-- num - sequental number. Smaller number corresponds to higher error. +-- id - ID of a query. +-- fshash - feature space. Usually equal to zero or ID. +-- error - AQO error that calculated on plan nodes of the query. +-- nexecs - number of executions of queries associated with this ID. +-- +CREATE FUNCTION aqo_cardinality_error(controlled boolean) +RETURNS TABLE(num integer, id bigint, fshash bigint, error double precision, nexecs bigint) +AS 'MODULE_PATHNAME', 'aqo_cardinality_error' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_cardinality_error(boolean) IS +'Get cardinality error of queries the last time they were executed. Order queries according to an error value.'; + +-- +-- Remove unneeded rows from the AQO ML storage. +-- For common feature space, remove rows from aqo_data only. +-- For custom feature space - remove all rows related to the space from all AQO +-- tables even if only one oid for one feature subspace of the space is illegal. +-- Returns number of deleted rows from aqo_queries and aqo_data tables. +-- +CREATE FUNCTION aqo_cleanup(OUT nfs integer, OUT nfss integer) +RETURNS record +AS 'MODULE_PATHNAME', 'aqo_cleanup' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_cleanup() IS +'Remove unneeded rows from the AQO ML storage'; + +CREATE FUNCTION aqo_disable_class(queryid bigint) +RETURNS void +AS 'MODULE_PATHNAME', 'aqo_disable_query' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_disable_class(bigint) IS +'Set learn_aqo, use_aqo and auto_tuning into false for a class of queries with specific queryid.'; + +-- +-- Remove query class settings, text, statistics and ML data from AQO storage. +-- Return number of FSS records, removed from the storage. +-- +CREATE FUNCTION aqo_drop_class(queryid bigint) +RETURNS integer +AS 'MODULE_PATHNAME', 'aqo_drop_class' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_drop_class(bigint) IS +'Remove info about an query class from AQO ML knowledge base.'; + +CREATE FUNCTION aqo_enable_class(queryid bigint) +RETURNS void +AS 'MODULE_PATHNAME', 'aqo_enable_query' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_enable_class(bigint) IS +'Set learn_aqo, use_aqo and auto_tuning (in intelligent mode) into true for a class of queries with specific queryid.'; + +-- +-- Show execution time of queries, for which AQO has statistics. +-- controlled - show stat on executions where AQO was used for cardinality +-- estimations, or not used (controlled = false). +-- Last case is possible in disabled mode with aqo.force_collect_stat = 'on'. +-- +CREATE FUNCTION aqo_execution_time(controlled boolean) +RETURNS TABLE(num integer, id bigint, fshash bigint, exec_time double precision, nexecs bigint) +AS 'MODULE_PATHNAME', 'aqo_execution_time' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_execution_time(boolean) IS +'Get execution time of queries. If controlled = true (AQO could advise cardinality estimations), show time of last execution attempt. Another case (AQO not used), return an average value of execution time across all known executions.'; + +CREATE FUNCTION aqo_memory_usage( + OUT name text, + OUT allocated_size int, + OUT used_size int +) +RETURNS SETOF record +AS $$ + SELECT name, total_bytes, used_bytes FROM pg_backend_memory_contexts + WHERE name LIKE 'AQO%' + UNION + SELECT name, allocated_size, size FROM pg_shmem_allocations + WHERE name LIKE 'AQO%'; +$$ LANGUAGE SQL; +COMMENT ON FUNCTION aqo_memory_usage() IS +'Show allocated sizes and used sizes of aqo`s memory contexts and hash tables'; + +-- +-- Update or insert an aqo_data +-- table record for given 'fs' & 'fss'. +-- + +CREATE FUNCTION aqo_data_update( + fs bigint, + fss integer, + nfeatures integer, + features double precision[][], + targets double precision[], + reliability double precision[], + oids Oid[]) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_data_update' +LANGUAGE C VOLATILE; + +CREATE FUNCTION aqo_queries_update( + queryid bigint, fs bigint, learn_aqo bool, use_aqo bool, auto_tuning bool) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_queries_update' +LANGUAGE C VOLATILE; + +-- +-- Update or insert an aqo_query_stat +-- table record for given 'queryid'. +-- +CREATE FUNCTION aqo_query_stat_update( + queryid bigint, + execution_time_with_aqo double precision[], + execution_time_without_aqo double precision[], + planning_time_with_aqo double precision[], + planning_time_without_aqo double precision[], + cardinality_error_with_aqo double precision[], + cardinality_error_without_aqo double precision[], + executions_with_aqo bigint, + executions_without_aqo bigint) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_query_stat_update' +LANGUAGE C VOLATILE; + +-- +-- Update or insert an aqo_query_texts +-- table record for given 'queryid'. +-- +CREATE FUNCTION aqo_query_texts_update( + queryid bigint, query_text text) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_query_texts_update' +LANGUAGE C VOLATILE; + +-- +-- Remove all records in the AQO storage. +-- Return number of rows removed. +-- +CREATE FUNCTION aqo_reset() RETURNS bigint +AS 'MODULE_PATHNAME', 'aqo_reset' +LANGUAGE C PARALLEL SAFE; +COMMENT ON FUNCTION aqo_reset() IS +'Reset all data gathered by AQO'; + +-- ----------------------------------------------------------------------------- +-- +-- VIEWs +-- +-- ----------------------------------------------------------------------------- + +CREATE FUNCTION aqo_data ( + OUT fs bigint, + OUT fss integer, + OUT nfeatures integer, + OUT features double precision[][], + OUT targets double precision[], + OUT reliability double precision[], + OUT oids Oid[] +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_data' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE FUNCTION aqo_queries ( + OUT queryid bigint, + OUT fs bigint, + OUT learn_aqo boolean, + OUT use_aqo boolean, + OUT auto_tuning boolean, + OUT smart_timeout bigint, + OUT count_increase_timeout bigint +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_queries' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE FUNCTION aqo_query_stat ( + OUT queryid bigint, + OUT execution_time_with_aqo double precision[], + OUT execution_time_without_aqo double precision[], + OUT planning_time_with_aqo double precision[], + OUT planning_time_without_aqo double precision[], + OUT cardinality_error_with_aqo double precision[], + OUT cardinality_error_without_aqo double precision[], + OUT executions_with_aqo bigint, + OUT executions_without_aqo bigint +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_query_stat' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE FUNCTION aqo_query_texts(OUT queryid bigint, OUT query_text text) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_query_texts' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE VIEW aqo_data AS SELECT * FROM aqo_data(); +CREATE VIEW aqo_queries AS SELECT * FROM aqo_queries(); +CREATE VIEW aqo_query_stat AS SELECT * FROM aqo_query_stat(); +CREATE VIEW aqo_query_texts AS SELECT * FROM aqo_query_texts(); diff --git a/expected/aqo_CVE-2020-14350.out b/expected/aqo_CVE-2020-14350.out index 8685b935..5deb45ae 100644 --- a/expected/aqo_CVE-2020-14350.out +++ b/expected/aqo_CVE-2020-14350.out @@ -49,51 +49,32 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_status(hash bigint) -RETURNS TABLE ( - "learn" BOOL, - "use aqo" BOOL, - "auto tune" BOOL, - "fspace hash" bigINT, - "t_naqo" TEXT, - "err_naqo" TEXT, - "iters" BIGINT, - "t_aqo" TEXT, - "err_aqo" TEXT, - "iters_aqo" BIGINT -) +CREATE FUNCTION aqo_reset() +RETURNS bigint AS $$ BEGIN + RETURN 1; END $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_status" already exists with same argument types +ERROR: function "aqo_reset" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_status(hash bigint) -RETURNS TABLE ( - "learn" BOOL, - "use aqo" BOOL, - "auto tune" BOOL, - "fspace hash" bigINT, - "t_naqo" TEXT, - "err_naqo" TEXT, - "iters" BIGINT, - "t_aqo" TEXT, - "err_aqo" TEXT, - "iters_aqo" BIGINT -) +CREATE OR REPLACE FUNCTION aqo_reset() +RETURNS bigint AS $$ BEGIN ALTER ROLE regress_hacker SUPERUSER; + RETURN 2; END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_status(42); - aqo_status ------------- -(0 rows) +SELECT aqo_reset(); + aqo_reset +----------- + 2 +(1 row) SET ROLE regress_hacker; SHOW is_superuser; @@ -103,7 +84,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_status(bigint); +DROP FUNCTION aqo_reset(); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 3 @@ -208,29 +189,31 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_clear_hist(hash bigint) -RETURNS VOID +CREATE FUNCTION aqo_drop_class(queryid bigint) +RETURNS integer AS $$ BEGIN + RETURN 1; END $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_clear_hist" already exists with same argument types +ERROR: function "aqo_drop_class" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_clear_hist(hash bigint) -RETURNS VOID +CREATE OR REPLACE FUNCTION aqo_drop_class(queryid bigint) +RETURNS integer AS $$ BEGIN ALTER ROLE regress_hacker SUPERUSER; + RETURN 2; END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_clear_hist(42); - aqo_clear_hist +SELECT aqo_drop_class(42); + aqo_drop_class ---------------- - + 2 (1 row) SET ROLE regress_hacker; @@ -241,7 +224,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_clear_hist(bigint); +DROP FUNCTION aqo_drop_class(bigint); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 6 @@ -254,8 +237,8 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_drop(hash bigint) -RETURNS VOID +CREATE FUNCTION aqo_execution_time(controlled boolean) +RETURNS TABLE(num integer, id bigint, fshash bigint, exec_time double precision, nexecs bigint) AS $$ BEGIN END @@ -263,21 +246,20 @@ $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_drop" already exists with same argument types +ERROR: function "aqo_execution_time" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_drop(hash bigint) -RETURNS VOID +CREATE OR REPLACE FUNCTION aqo_execution_time(controlled boolean) +RETURNS TABLE(num integer, id bigint, fshash bigint, exec_time double precision, nexecs bigint) AS $$ BEGIN ALTER ROLE regress_hacker SUPERUSER; END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_drop(42); - aqo_drop ----------- - -(1 row) +SELECT aqo_execution_time(true); + aqo_execution_time +-------------------- +(0 rows) SET ROLE regress_hacker; SHOW is_superuser; @@ -287,7 +269,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_drop(bigint); +DROP FUNCTION aqo_execution_time(boolean); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 7 @@ -300,8 +282,8 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_ne_queries() -RETURNS SETOF int +CREATE FUNCTION aqo_memory_usage(OUT name text, OUT allocated_size int, OUT used_size int) +RETURNS SETOF record AS $$ BEGIN END @@ -309,19 +291,19 @@ $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_ne_queries" already exists with same argument types +ERROR: function "aqo_memory_usage" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_ne_queries() -RETURNS SETOF int +CREATE OR REPLACE FUNCTION aqo_memory_usage(OUT name text, OUT allocated_size int, OUT used_size int) +RETURNS SETOF record AS $$ BEGIN ALTER ROLE regress_hacker SUPERUSER; END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_ne_queries(); - aqo_ne_queries ----------------- +SELECT aqo_memory_usage(); + aqo_memory_usage +------------------ (0 rows) SET ROLE regress_hacker; @@ -332,43 +314,9 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_ne_queries(); +DROP FUNCTION aqo_memory_usage(); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping --- Test 8 -RESET ROLE; -ALTER ROLE regress_hacker NOSUPERUSER; -SET ROLE regress_hacker; -SHOW is_superuser; - is_superuser --------------- - off -(1 row) - -CREATE FUNCTION aqo_migrate_to_1_1_get_pk(rel text) -RETURNS regclass -AS $$ -DECLARE - ret regclass; -BEGIN - ALTER ROLE regress_hacker SUPERUSER; - SELECT * FROM aqo_migrate_to_1_1_get_pk(rel::regclass) INTO ret; - RETURN ret; -END -$$ LANGUAGE plpgsql; -RESET ROLE; -CREATE EXTENSION aqo; --- Test result (must be 'off') -SET ROLE regress_hacker; -SHOW is_superuser; - is_superuser --------------- - off -(1 row) - -RESET ROLE; -DROP FUNCTION aqo_migrate_to_1_1_get_pk(text); -DROP EXTENSION IF EXISTS aqo; -- Cleanup RESET ROLE; DROP OWNED BY regress_hacker CASCADE; diff --git a/sql/aqo_CVE-2020-14350.sql b/sql/aqo_CVE-2020-14350.sql index 75833223..c4979344 100644 --- a/sql/aqo_CVE-2020-14350.sql +++ b/sql/aqo_CVE-2020-14350.sql @@ -44,21 +44,11 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_status(hash bigint) -RETURNS TABLE ( - "learn" BOOL, - "use aqo" BOOL, - "auto tune" BOOL, - "fspace hash" bigINT, - "t_naqo" TEXT, - "err_naqo" TEXT, - "iters" BIGINT, - "t_aqo" TEXT, - "err_aqo" TEXT, - "iters_aqo" BIGINT -) +CREATE FUNCTION aqo_reset() +RETURNS bigint AS $$ BEGIN + RETURN 1; END $$ LANGUAGE plpgsql; @@ -67,33 +57,23 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_status(hash bigint) -RETURNS TABLE ( - "learn" BOOL, - "use aqo" BOOL, - "auto tune" BOOL, - "fspace hash" bigINT, - "t_naqo" TEXT, - "err_naqo" TEXT, - "iters" BIGINT, - "t_aqo" TEXT, - "err_aqo" TEXT, - "iters_aqo" BIGINT -) +CREATE OR REPLACE FUNCTION aqo_reset() +RETURNS bigint AS $$ BEGIN ALTER ROLE regress_hacker SUPERUSER; + RETURN 2; END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_status(42); +SELECT aqo_reset(); SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_status(bigint); +DROP FUNCTION aqo_reset(); DROP EXTENSION IF EXISTS aqo; -- Test 3 @@ -177,10 +157,11 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_clear_hist(hash bigint) -RETURNS VOID +CREATE FUNCTION aqo_drop_class(queryid bigint) +RETURNS integer AS $$ BEGIN + RETURN 1; END $$ LANGUAGE plpgsql; @@ -189,22 +170,23 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_clear_hist(hash bigint) -RETURNS VOID +CREATE OR REPLACE FUNCTION aqo_drop_class(queryid bigint) +RETURNS integer AS $$ BEGIN ALTER ROLE regress_hacker SUPERUSER; + RETURN 2; END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_clear_hist(42); +SELECT aqo_drop_class(42); SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_clear_hist(bigint); +DROP FUNCTION aqo_drop_class(bigint); DROP EXTENSION IF EXISTS aqo; -- Test 6 @@ -214,8 +196,8 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_drop(hash bigint) -RETURNS VOID +CREATE FUNCTION aqo_execution_time(controlled boolean) +RETURNS TABLE(num integer, id bigint, fshash bigint, exec_time double precision, nexecs bigint) AS $$ BEGIN END @@ -226,8 +208,8 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_drop(hash bigint) -RETURNS VOID +CREATE OR REPLACE FUNCTION aqo_execution_time(controlled boolean) +RETURNS TABLE(num integer, id bigint, fshash bigint, exec_time double precision, nexecs bigint) AS $$ BEGIN ALTER ROLE regress_hacker SUPERUSER; @@ -235,13 +217,13 @@ END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_drop(42); +SELECT aqo_execution_time(true); SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_drop(bigint); +DROP FUNCTION aqo_execution_time(boolean); DROP EXTENSION IF EXISTS aqo; -- Test 7 @@ -251,8 +233,8 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_ne_queries() -RETURNS SETOF int +CREATE FUNCTION aqo_memory_usage(OUT name text, OUT allocated_size int, OUT used_size int) +RETURNS SETOF record AS $$ BEGIN END @@ -263,52 +245,22 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_ne_queries() -RETURNS SETOF int -AS $$ -BEGIN - ALTER ROLE regress_hacker SUPERUSER; -END -$$ LANGUAGE plpgsql; - -RESET ROLE; -SELECT aqo_ne_queries(); - -SET ROLE regress_hacker; -SHOW is_superuser; - -RESET ROLE; -DROP FUNCTION aqo_ne_queries(); -DROP EXTENSION IF EXISTS aqo; - --- Test 8 -RESET ROLE; -ALTER ROLE regress_hacker NOSUPERUSER; - -SET ROLE regress_hacker; -SHOW is_superuser; - -CREATE FUNCTION aqo_migrate_to_1_1_get_pk(rel text) -RETURNS regclass +CREATE OR REPLACE FUNCTION aqo_memory_usage(OUT name text, OUT allocated_size int, OUT used_size int) +RETURNS SETOF record AS $$ -DECLARE - ret regclass; BEGIN ALTER ROLE regress_hacker SUPERUSER; - SELECT * FROM aqo_migrate_to_1_1_get_pk(rel::regclass) INTO ret; - RETURN ret; END $$ LANGUAGE plpgsql; RESET ROLE; -CREATE EXTENSION aqo; +SELECT aqo_memory_usage(); --- Test result (must be 'off') SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_migrate_to_1_1_get_pk(text); +DROP FUNCTION aqo_memory_usage(); DROP EXTENSION IF EXISTS aqo; -- Cleanup From 25e225d2f4ff14b1b94fed6c47052ab9b83d32bf Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Fri, 24 Mar 2023 08:28:48 +0500 Subject: [PATCH 109/134] Bugfix. Do away with possible conflict of hooks, declared as 'extern' in different libraries. To avoid such a problem in future, refactor AQO interfaces: declare all hooks as static, reduce number of exporting functions and introduce concept of *_init() function for a module that needs some actions in the PG_init() routine. Reviewed by: @Anisimov-ds --- aqo.c | 23 ++---- aqo.h | 56 ++------------ aqo_shared.c | 35 +++++++-- aqo_shared.h | 7 +- cardinality_hooks.c | 183 +++++++++++++++++--------------------------- cardinality_hooks.h | 32 -------- hash.h | 5 ++ path_utils.c | 55 ++++++++----- path_utils.h | 12 +-- postprocessing.c | 159 +++++++++++++++++++++----------------- preprocessing.c | 53 ++++--------- preprocessing.h | 12 --- storage.c | 2 +- storage.h | 6 ++ 14 files changed, 263 insertions(+), 377 deletions(-) delete mode 100644 cardinality_hooks.h delete mode 100644 preprocessing.h diff --git a/aqo.c b/aqo.c index 4b776433..86a37ccf 100644 --- a/aqo.c +++ b/aqo.c @@ -19,9 +19,7 @@ #include "aqo.h" #include "aqo_shared.h" -#include "cardinality_hooks.h" #include "path_utils.h" -#include "preprocessing.h" #include "storage.h" @@ -98,19 +96,6 @@ MemoryContext AQOLearnMemCtx = NULL; /* Additional plan info */ int njoins; -/* Saved hook values */ -post_parse_analyze_hook_type prev_post_parse_analyze_hook; -planner_hook_type prev_planner_hook; -ExecutorStart_hook_type prev_ExecutorStart_hook; -ExecutorRun_hook_type prev_ExecutorRun; -ExecutorEnd_hook_type prev_ExecutorEnd_hook; -set_baserel_rows_estimate_hook_type prev_set_foreign_rows_estimate_hook; -set_baserel_rows_estimate_hook_type prev_set_baserel_rows_estimate_hook; -get_parameterized_baserel_size_hook_type prev_get_parameterized_baserel_size_hook; -set_joinrel_size_estimates_hook_type prev_set_joinrel_size_estimates_hook; -get_parameterized_joinrel_size_hook_type prev_get_parameterized_joinrel_size_hook; -ExplainOnePlan_hook_type prev_ExplainOnePlan_hook; -ExplainOneNode_hook_type prev_ExplainOneNode_hook; /***************************************************************************** * @@ -330,6 +315,7 @@ _PG_init(void) NULL, NULL); +<<<<<<< HEAD prev_shmem_startup_hook = shmem_startup_hook; shmem_startup_hook = aqo_init_shmem; prev_planner_hook = planner_hook; @@ -366,6 +352,13 @@ _PG_init(void) prev_create_upper_paths_hook = create_upper_paths_hook; create_upper_paths_hook = aqo_store_upper_signature_hook; +======= + aqo_shmem_init(); + aqo_preprocessing_init(); + aqo_postprocessing_init(); + aqo_cardinality_hooks_init(); + aqo_path_utils_init(); +>>>>>>> daf05a0 (Bugfix. Do away with possible conflict of hooks, declared as 'extern' in) init_deactivated_queries_storage(); diff --git a/aqo.h b/aqo.h index 9600b136..6f57a4d1 100644 --- a/aqo.h +++ b/aqo.h @@ -132,7 +132,6 @@ #include "nodes/nodeFuncs.h" #include "optimizer/pathnode.h" #include "optimizer/planner.h" -#include "optimizer/cost.h" #include "parser/analyze.h" #include "parser/parsetree.h" #include "utils/builtins.h" @@ -140,11 +139,9 @@ #include "utils/hsearch.h" #include "utils/memutils.h" #include "utils/rel.h" -#include "utils/fmgroids.h" #include "utils/snapmgr.h" #include "machine_learning.h" -//#include "storage.h" /* Check PostgreSQL version (9.6.0 contains important changes in planner) */ #if PG_VERSION_NUM < 90600 @@ -237,58 +234,15 @@ extern MemoryContext AQOCacheMemCtx; extern MemoryContext AQOPredictMemCtx; extern MemoryContext AQOLearnMemCtx; -/* Saved hook values in case of unload */ -extern post_parse_analyze_hook_type prev_post_parse_analyze_hook; -extern planner_hook_type prev_planner_hook; -extern ExecutorStart_hook_type prev_ExecutorStart_hook; -extern ExecutorRun_hook_type prev_ExecutorRun; -extern ExecutorEnd_hook_type prev_ExecutorEnd_hook; -extern set_baserel_rows_estimate_hook_type - prev_set_foreign_rows_estimate_hook; -extern set_baserel_rows_estimate_hook_type - prev_set_baserel_rows_estimate_hook; -extern get_parameterized_baserel_size_hook_type - prev_get_parameterized_baserel_size_hook; -extern set_joinrel_size_estimates_hook_type - prev_set_joinrel_size_estimates_hook; -extern get_parameterized_joinrel_size_hook_type - prev_get_parameterized_joinrel_size_hook; -extern ExplainOnePlan_hook_type prev_ExplainOnePlan_hook; -extern ExplainOneNode_hook_type prev_ExplainOneNode_hook; - -extern void ppi_hook(ParamPathInfo *ppi); extern int aqo_statement_timeout; -/* Hash functions */ -void get_eclasses(List *clauselist, int *nargs, int **args_hash, - int **eclass_hash); -int get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash); - - -/* Storage interaction */ -extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids); -extern bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids); - -/* Query preprocessing hooks */ -extern void print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, - ExplainState *es, const char *queryString, - ParamListInfo params, - const instr_time *planduration, - QueryEnvironment *queryEnv); -extern void print_node_explain(ExplainState *es, PlanState *ps, Plan *plan); - /* Cardinality estimation */ extern double predict_for_relation(List *restrict_clauses, List *selectivities, List *relsigns, int *fss); -/* Query execution statistics collecting hooks */ -void aqo_ExecutorStart(QueryDesc *queryDesc, int eflags); -void aqo_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, - uint64 count, bool execute_once); -void aqo_ExecutorEnd(QueryDesc *queryDesc); - /* Automatic query tuning */ extern void automatical_query_tuning(uint64 query_hash, struct StatEntry *stat); +extern double get_mean(double *elems, int nelems); /* Utilities */ extern int int_cmp(const void *a, const void *b); @@ -306,8 +260,10 @@ extern void selectivity_cache_clear(void); extern bool IsQueryDisabled(void); -extern bool update_query_timeout(uint64 queryid, int64 smart_timeout); -extern double get_mean(double *elems, int nelems); - extern List *cur_classes; + +extern void aqo_cardinality_hooks_init(void); +extern void aqo_preprocessing_init(void); +extern void aqo_postprocessing_init(void); + #endif diff --git a/aqo_shared.c b/aqo_shared.c index 0a6a8db6..d704cf76 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -6,27 +6,30 @@ #include "lib/dshash.h" #include "miscadmin.h" +#include "storage/ipc.h" #include "storage/shmem.h" #include "aqo_shared.h" #include "storage.h" -shmem_startup_hook_type prev_shmem_startup_hook = NULL; AQOSharedState *aqo_state = NULL; int fs_max_items = 10000; /* Max number of different feature spaces in ML model */ int fss_max_items = 100000; /* Max number of different feature subspaces in ML model */ +static shmem_startup_hook_type aqo_shmem_startup_next = NULL; +static shmem_request_hook_type aqo_shmem_request_next = NULL; + static void on_shmem_shutdown(int code, Datum arg); -void +static void aqo_init_shmem(void) { bool found; HASHCTL info; - if (prev_shmem_startup_hook) - prev_shmem_startup_hook(); + if (aqo_shmem_startup_next) + aqo_shmem_startup_next(); aqo_state = NULL; stat_htab = NULL; @@ -116,10 +119,17 @@ on_shmem_shutdown(int code, Datum arg) return; } -Size -aqo_memsize(void) + +/* + * Requests any additional shared memory required for aqo. + */ +static void +aqo_shmem_request(void) { - Size size; + Size size; + + if (aqo_shmem_request_next) + aqo_shmem_request_next(); size = MAXALIGN(sizeof(AQOSharedState)); size = add_size(size, hash_estimate_size(fs_max_items, sizeof(AQOSharedState))); @@ -128,5 +138,14 @@ aqo_memsize(void) size = add_size(size, hash_estimate_size(fss_max_items, sizeof(DataEntry))); size = add_size(size, hash_estimate_size(fs_max_items, sizeof(QueriesEntry))); - return size; + RequestAddinShmemSpace(size); +} + +void +aqo_shmem_init(void) +{ + aqo_shmem_startup_next = shmem_startup_hook; + shmem_startup_hook = aqo_init_shmem; + aqo_shmem_request_next = shmem_request_hook; + shmem_request_hook = aqo_shmem_request; } diff --git a/aqo_shared.h b/aqo_shared.h index e922fb1c..ee9e3087 100644 --- a/aqo_shared.h +++ b/aqo_shared.h @@ -1,9 +1,6 @@ #ifndef AQO_SHARED_H #define AQO_SHARED_H -#include "lib/dshash.h" -#include "storage/dsm.h" -#include "storage/ipc.h" #include "storage/lwlock.h" #include "utils/dsa.h" @@ -31,13 +28,11 @@ typedef struct AQOSharedState } AQOSharedState; -extern shmem_startup_hook_type prev_shmem_startup_hook; extern AQOSharedState *aqo_state; extern int fs_max_items; /* Max number of feature spaces that AQO can operate */ extern int fss_max_items; -extern Size aqo_memsize(void); -extern void aqo_init_shmem(void); +extern void aqo_shmem_init(void); #endif /* AQO_SHARED_H */ diff --git a/cardinality_hooks.c b/cardinality_hooks.c index c26fcccb..a86d5fa2 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -27,115 +27,32 @@ #include "postgres.h" +#include "optimizer/cost.h" +#include "utils/selfuncs.h" + #include "aqo.h" -#include "cardinality_hooks.h" #include "hash.h" #include "machine_learning.h" #include "path_utils.h" - -estimate_num_groups_hook_type prev_estimate_num_groups_hook = NULL; +#include "storage.h" double predicted_ppi_rows; double fss_ppi_hash; -/* - * Calls standard set_baserel_rows_estimate or its previous hook. - */ -static void -default_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) -{ - if (prev_set_baserel_rows_estimate_hook) - prev_set_baserel_rows_estimate_hook(root, rel); - else - set_baserel_rows_estimate_standard(root, rel); -} - -/* - * Calls standard get_parameterized_baserel_size or its previous hook. - */ -static double -default_get_parameterized_baserel_size(PlannerInfo *root, - RelOptInfo *rel, - List *param_clauses) -{ - if (prev_get_parameterized_baserel_size_hook) - return prev_get_parameterized_baserel_size_hook(root, rel, param_clauses); - else - return get_parameterized_baserel_size_standard(root, rel, param_clauses); -} - -/* - * Calls standard get_parameterized_joinrel_size or its previous hook. - */ -static double -default_get_parameterized_joinrel_size(PlannerInfo *root, - RelOptInfo *rel, - Path *outer_path, - Path *inner_path, - SpecialJoinInfo *sjinfo, - List *restrict_clauses) -{ - if (prev_get_parameterized_joinrel_size_hook) - return prev_get_parameterized_joinrel_size_hook(root, rel, - outer_path, - inner_path, - sjinfo, - restrict_clauses); - else - return get_parameterized_joinrel_size_standard(root, rel, - outer_path, - inner_path, - sjinfo, - restrict_clauses); -} - -/* - * Calls standard set_joinrel_size_estimates or its previous hook. - */ -static void -default_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, - RelOptInfo *outer_rel, - RelOptInfo *inner_rel, - SpecialJoinInfo *sjinfo, - List *restrictlist) -{ - if (prev_set_joinrel_size_estimates_hook) - prev_set_joinrel_size_estimates_hook(root, rel, - outer_rel, - inner_rel, - sjinfo, - restrictlist); - else - set_joinrel_size_estimates_standard(root, rel, - outer_rel, - inner_rel, - sjinfo, - restrictlist); -} - -static double -default_estimate_num_groups(PlannerInfo *root, List *groupExprs, - Path *subpath, RelOptInfo *grouped_rel, - List **pgset, EstimationInfo *estinfo) -{ - double input_rows = subpath->rows; - - if (prev_estimate_num_groups_hook != NULL) - return (*prev_estimate_num_groups_hook)(root, groupExprs, - subpath, - grouped_rel, - pgset, estinfo); - else - return estimate_num_groups(root, groupExprs, input_rows, pgset, estinfo); -} +static set_baserel_rows_estimate_hook_type aqo_set_baserel_rows_estimate_next = NULL; +static get_parameterized_baserel_size_hook_type aqo_get_parameterized_baserel_size_next = NULL; +static set_joinrel_size_estimates_hook_type aqo_set_joinrel_size_estimates_next = NULL; +static get_parameterized_joinrel_size_hook_type aqo_get_parameterized_joinrel_size_next = NULL; +static set_parampathinfo_postinit_hook_type aqo_set_parampathinfo_postinit_next = NULL; +static estimate_num_groups_hook_type aqo_estimate_num_groups_next = NULL; /* * Our hook for setting baserel rows estimate. * Extracts clauses, their selectivities and list of relation relids and * passes them to predict_for_relation. */ -void +static void aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) { double predicted; @@ -187,13 +104,15 @@ aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) default_estimator: rel->predicted_cardinality = -1.; - default_set_baserel_rows_estimate(root, rel); + aqo_set_baserel_rows_estimate_next(root, rel); } - -void -ppi_hook(ParamPathInfo *ppi) +static void +aqo_parampathinfo_postinit(ParamPathInfo *ppi) { + if (aqo_set_parampathinfo_postinit_next) + (*aqo_set_parampathinfo_postinit_next)(ppi); + if (IsQueryDisabled()) return; @@ -206,7 +125,7 @@ ppi_hook(ParamPathInfo *ppi) * Extracts clauses (including parametrization ones), their selectivities * and list of relation relids and passes them to predict_for_relation. */ -double +static double aqo_get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, List *param_clauses) @@ -284,7 +203,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, return predicted; default_estimator: - return default_get_parameterized_baserel_size(root, rel, param_clauses); + return aqo_get_parameterized_baserel_size_next(root, rel, param_clauses); } /* @@ -292,7 +211,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, * Extracts clauses, their selectivities and list of relation relids and * passes them to predict_for_relation. */ -void +static void aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, RelOptInfo *outer_rel, RelOptInfo *inner_rel, @@ -354,9 +273,8 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, default_estimator: rel->predicted_cardinality = -1; - default_set_joinrel_size_estimates(root, rel, - outer_rel, inner_rel, - sjinfo, restrictlist); + aqo_set_joinrel_size_estimates_next(root, rel, outer_rel, inner_rel, + sjinfo, restrictlist); } /* @@ -364,7 +282,7 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, * Extracts clauses (including parametrization ones), their selectivities * and list of relation relids and passes them to predict_for_relation. */ -double +static double aqo_get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, Path *outer_path, @@ -421,7 +339,7 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, return predicted; default_estimator: - return default_get_parameterized_joinrel_size(root, rel, + return aqo_get_parameterized_joinrel_size_next(root, rel, outer_path, inner_path, sjinfo, clauses); } @@ -460,10 +378,10 @@ predict_num_groups(PlannerInfo *root, Path *subpath, List *group_exprs, return (prediction <= 0) ? -1 : prediction; } -double -aqo_estimate_num_groups_hook(PlannerInfo *root, List *groupExprs, - Path *subpath, RelOptInfo *grouped_rel, - List **pgset, EstimationInfo *estinfo) +static double +aqo_estimate_num_groups(PlannerInfo *root, List *groupExprs, + Path *subpath, RelOptInfo *grouped_rel, + List **pgset, EstimationInfo *estinfo) { int fss; double predicted; @@ -476,7 +394,7 @@ aqo_estimate_num_groups_hook(PlannerInfo *root, List *groupExprs, /* XXX: Don't support some GROUPING options */ goto default_estimator; - if (prev_estimate_num_groups_hook != NULL) + if (aqo_estimate_num_groups_next != NULL) elog(WARNING, "AQO replaced another estimator of a groups number"); /* Zero the estinfo output parameter, if non-NULL */ @@ -507,6 +425,45 @@ aqo_estimate_num_groups_hook(PlannerInfo *root, List *groupExprs, MemoryContextSwitchTo(old_ctx_m); default_estimator: - return default_estimate_num_groups(root, groupExprs, subpath, grouped_rel, - pgset, estinfo); + if (aqo_estimate_num_groups_next) + return aqo_estimate_num_groups_next(root, groupExprs, subpath, + grouped_rel, pgset, estinfo); + else + return estimate_num_groups(root, groupExprs, subpath->rows, + pgset, estinfo); +} + +void +aqo_cardinality_hooks_init(void) +{ + + /* Cardinality prediction hooks. */ + aqo_set_baserel_rows_estimate_next = set_baserel_rows_estimate_hook ? + set_baserel_rows_estimate_hook : + set_baserel_rows_estimate_standard; + set_baserel_rows_estimate_hook = aqo_set_baserel_rows_estimate; + + /* XXX: we have a problem here. Should be redesigned later */ + set_foreign_rows_estimate_hook = aqo_set_baserel_rows_estimate; + + aqo_get_parameterized_baserel_size_next = get_parameterized_baserel_size_hook ? + get_parameterized_baserel_size_hook : + get_parameterized_baserel_size_standard; + get_parameterized_baserel_size_hook = aqo_get_parameterized_baserel_size; + + aqo_set_joinrel_size_estimates_next = set_joinrel_size_estimates_hook ? + set_joinrel_size_estimates_hook : + set_joinrel_size_estimates_standard; + set_joinrel_size_estimates_hook = aqo_set_joinrel_size_estimates; + + aqo_get_parameterized_joinrel_size_next = get_parameterized_joinrel_size_hook ? + get_parameterized_joinrel_size_hook : + get_parameterized_joinrel_size_standard; + get_parameterized_joinrel_size_hook = aqo_get_parameterized_joinrel_size; + + aqo_set_parampathinfo_postinit_next = parampathinfo_postinit_hook; + parampathinfo_postinit_hook = aqo_parampathinfo_postinit; + + aqo_estimate_num_groups_next = estimate_num_groups_hook; + estimate_num_groups_hook = aqo_estimate_num_groups; } diff --git a/cardinality_hooks.h b/cardinality_hooks.h deleted file mode 100644 index c34f9315..00000000 --- a/cardinality_hooks.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef CARDINALITY_HOOKS_H -#define CARDINALITY_HOOKS_H - -#include "optimizer/planner.h" -#include "utils/selfuncs.h" - -extern estimate_num_groups_hook_type prev_estimate_num_groups_hook; - - -/* Cardinality estimation hooks */ -extern void aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel); -extern double aqo_get_parameterized_baserel_size(PlannerInfo *root, - RelOptInfo *rel, - List *param_clauses); -extern void aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, - RelOptInfo *outer_rel, - RelOptInfo *inner_rel, - SpecialJoinInfo *sjinfo, - List *restrictlist); -extern double aqo_get_parameterized_joinrel_size(PlannerInfo *root, - RelOptInfo *rel, - Path *outer_path, - Path *inner_path, - SpecialJoinInfo *sjinfo, - List *restrict_clauses); -extern double aqo_estimate_num_groups_hook(PlannerInfo *root, List *groupExprs, - Path *subpath, - RelOptInfo *grouped_rel, - List **pgset, - EstimationInfo *estinfo); - -#endif /* CARDINALITY_HOOKS_H */ diff --git a/hash.h b/hash.h index eb4b2b97..a1738ac4 100644 --- a/hash.h +++ b/hash.h @@ -13,4 +13,9 @@ extern int get_fss_for_object(List *relsigns, List *clauselist, extern int get_int_array_hash(int *arr, int len); extern int get_grouped_exprs_hash(int fss, List *group_exprs); +/* Hash functions */ +void get_eclasses(List *clauselist, int *nargs, int **args_hash, + int **eclass_hash); +int get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash); + #endif /* AQO_HASH_H */ \ No newline at end of file diff --git a/path_utils.c b/path_utils.c index 0d24a048..8f49f3ae 100644 --- a/path_utils.c +++ b/path_utils.c @@ -15,8 +15,11 @@ #include "access/relation.h" #include "nodes/readfuncs.h" +#include "optimizer/cost.h" #include "optimizer/optimizer.h" +#include "optimizer/planmain.h" #include "path_utils.h" +#include "storage/lmgr.h" #include "utils/syscache.h" #include "utils/lsyscache.h" @@ -25,13 +28,6 @@ #include "postgres_fdw.h" -/* - * Hook on creation of a plan node. We need to store AQO-specific data to - * support learning stage. - */ -create_plan_hook_type prev_create_plan_hook = NULL; - -create_upper_paths_hook_type prev_create_upper_paths_hook = NULL; static AQOPlanNode DefaultAQOPlanNode = { @@ -49,6 +45,15 @@ static AQOPlanNode DefaultAQOPlanNode = .prediction = -1 }; +/* + * Hook on creation of a plan node. We need to store AQO-specific data to + * support learning stage. + */ +static create_plan_hook_type aqo_create_plan_next = NULL; + +static create_upper_paths_hook_type aqo_create_upper_paths_next = NULL; + + static AQOPlanNode * create_aqo_plan_node() { @@ -175,8 +180,6 @@ hashTempTupleDesc(TupleDesc desc) return s; } -#include "storage/lmgr.h" - /* * Get list of relation indexes and prepare list of permanent table reloids, * list of temporary table reloids (can be changed between query launches) and @@ -514,15 +517,15 @@ is_appropriate_path(Path *path) * store AQO prediction in the same context, as the plan. So, explicitly free * all unneeded data. */ -void -aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) +static void +aqo_create_plan(PlannerInfo *root, Path *src, Plan **dest) { bool is_join_path; Plan *plan = *dest; AQOPlanNode *node; - if (prev_create_plan_hook) - prev_create_plan_hook(root, src, dest); + if (aqo_create_plan_next) + aqo_create_plan_next(root, src, dest); if (!query_context.use_aqo && !query_context.learn_aqo && !query_context.collect_stat) @@ -767,20 +770,20 @@ RegisterAQOPlanNodeMethods(void) * * Assume, that we are last in the chain of path creators. */ -void -aqo_store_upper_signature_hook(PlannerInfo *root, - UpperRelationKind stage, - RelOptInfo *input_rel, - RelOptInfo *output_rel, - void *extra) +static void +aqo_store_upper_signature(PlannerInfo *root, + UpperRelationKind stage, + RelOptInfo *input_rel, + RelOptInfo *output_rel, + void *extra) { A_Const *fss_node = makeNode(A_Const); RelSortOut rels = {NIL, NIL}; List *clauses; List *selectivities; - if (prev_create_upper_paths_hook) - (*prev_create_upper_paths_hook)(root, stage, input_rel, output_rel, extra); + if (aqo_create_upper_paths_next) + (*aqo_create_upper_paths_next)(root, stage, input_rel, output_rel, extra); if (!query_context.use_aqo && !query_context.learn_aqo && !force_collect_stat) /* Includes 'disabled query' state. */ @@ -799,3 +802,13 @@ aqo_store_upper_signature_hook(PlannerInfo *root, NULL, NULL); output_rel->ext_nodes = lappend(output_rel->ext_nodes, (void *) fss_node); } + +void +aqo_path_utils_init(void) +{ + aqo_create_plan_next = create_plan_hook; + create_plan_hook = aqo_create_plan; + + aqo_create_upper_paths_next = create_upper_paths_hook; + create_upper_paths_hook = aqo_store_upper_signature; +} diff --git a/path_utils.h b/path_utils.h index 1803e08d..cbe83da0 100644 --- a/path_utils.h +++ b/path_utils.h @@ -3,7 +3,6 @@ #include "nodes/extensible.h" #include "nodes/pathnodes.h" -#include "optimizer/planmain.h" #include "optimizer/planner.h" #define AQO_PLAN_NODE "AQOPlanNode" @@ -52,8 +51,6 @@ typedef struct AQOPlanNode #define booltostr(x) ((x) ? "true" : "false") -extern create_plan_hook_type prev_create_plan_hook; - /* Extracting path information utilities */ extern List *get_selectivities(PlannerInfo *root, List *clauses, @@ -67,16 +64,11 @@ extern List *get_path_clauses(Path *path, PlannerInfo *root, List **selectivities); -extern void aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest); extern AQOPlanNode *get_aqo_plan_node(Plan *plan, bool create); extern void RegisterAQOPlanNodeMethods(void); -extern create_upper_paths_hook_type prev_create_upper_paths_hook; -extern void aqo_store_upper_signature_hook(PlannerInfo *root, - UpperRelationKind stage, - RelOptInfo *input_rel, - RelOptInfo *output_rel, - void *extra); extern List *aqo_get_clauses(PlannerInfo *root, List *restrictlist); +void aqo_path_utils_init(void); + #endif /* PATH_UTILS_H */ diff --git a/postprocessing.c b/postprocessing.c index aa82a534..d4763955 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -27,7 +27,6 @@ #include "hash.h" #include "path_utils.h" #include "machine_learning.h" -#include "preprocessing.h" #include "storage.h" @@ -58,6 +57,13 @@ static int64 growth_rate = 3; static char *AQOPrivateData = "AQOPrivateData"; static char *PlanStateInfo = "PlanStateInfo"; +/* Saved hooks */ +static ExecutorStart_hook_type aqo_ExecutorStart_next = NULL; +static ExecutorRun_hook_type aqo_ExecutorRun_next = NULL; +static ExecutorEnd_hook_type aqo_ExecutorEnd_next = NULL; +static ExplainOnePlan_hook_type aqo_ExplainOnePlan_next = NULL; +static ExplainOneNode_hook_type aqo_ExplainOneNode_next = NULL; + /* Query execution statistics collecting utilities */ static void atomic_fss_learn_step(uint64 fhash, int fss, OkNNrdata *data, @@ -542,7 +548,7 @@ learnOnPlanState(PlanState *p, void *context) /* * Set up flags to store cardinality statistics. */ -void +static void aqo_ExecutorStart(QueryDesc *queryDesc, int eflags) { instr_time now; @@ -594,10 +600,7 @@ aqo_ExecutorStart(QueryDesc *queryDesc, int eflags) StoreToQueryEnv(queryDesc); } - if (prev_ExecutorStart_hook) - prev_ExecutorStart_hook(queryDesc, eflags); - else - standard_ExecutorStart(queryDesc, eflags); + aqo_ExecutorStart_next(queryDesc, eflags); if (use_aqo) StorePlanInternals(queryDesc); @@ -706,7 +709,7 @@ set_timeout_if_need(QueryDesc *queryDesc) /* * ExecutorRun hook. */ -void +static void aqo_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, bool execute_once) { @@ -722,10 +725,7 @@ aqo_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, PG_TRY(); { - if (prev_ExecutorRun) - prev_ExecutorRun(queryDesc, direction, count, execute_once); - else - standard_ExecutorRun(queryDesc, direction, count, execute_once); + aqo_ExecutorRun_next(queryDesc, direction, count, execute_once); } PG_FINALLY(); { @@ -743,7 +743,7 @@ aqo_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, * cardinality statistics. * Also it updates query execution statistics in aqo_query_stat. */ -void +static void aqo_ExecutorEnd(QueryDesc *queryDesc) { double execution_time; @@ -841,10 +841,7 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) MemoryContextSwitchTo(oldctx); MemoryContextReset(AQOLearnMemCtx); - if (prev_ExecutorEnd_hook) - prev_ExecutorEnd_hook(queryDesc); - else - standard_ExecutorEnd(queryDesc); + aqo_ExecutorEnd_next(queryDesc); /* * standard_ExecutorEnd clears the queryDesc->planstate. After this point no @@ -975,7 +972,64 @@ ExtractFromQueryEnv(QueryDesc *queryDesc) return true; } -void +/* + * Prints if the plan was constructed with AQO. + */ +static void +print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, + ExplainState *es, const char *queryString, + ParamListInfo params, const instr_time *planduration, + QueryEnvironment *queryEnv) +{ + if (aqo_ExplainOnePlan_next) + aqo_ExplainOnePlan_next(plannedstmt, into, es, queryString, + params, planduration, queryEnv); + + if (IsQueryDisabled() || !aqo_show_details) + return; + + /* Report to user about aqo state only in verbose mode */ + ExplainPropertyBool("Using aqo", query_context.use_aqo, es); + + switch (aqo_mode) + { + case AQO_MODE_INTELLIGENT: + ExplainPropertyText("AQO mode", "INTELLIGENT", es); + break; + case AQO_MODE_FORCED: + ExplainPropertyText("AQO mode", "FORCED", es); + break; + case AQO_MODE_CONTROLLED: + ExplainPropertyText("AQO mode", "CONTROLLED", es); + break; + case AQO_MODE_LEARN: + ExplainPropertyText("AQO mode", "LEARN", es); + break; + case AQO_MODE_FROZEN: + ExplainPropertyText("AQO mode", "FROZEN", es); + break; + case AQO_MODE_DISABLED: + ExplainPropertyText("AQO mode", "DISABLED", es); + break; + default: + elog(ERROR, "Bad AQO state"); + break; + } + + /* + * Query class provides an user the conveniently use of the AQO + * auxiliary functions. + */ + if (aqo_mode != AQO_MODE_DISABLED || force_collect_stat) + { + if (aqo_show_hash) + ExplainPropertyInteger("Query hash", NULL, + query_context.query_hash, es); + ExplainPropertyInteger("JOINS", NULL, njoins, es); + } +} + +static void print_node_explain(ExplainState *es, PlanState *ps, Plan *plan) { int wrkrs = 1; @@ -983,8 +1037,8 @@ print_node_explain(ExplainState *es, PlanState *ps, Plan *plan) AQOPlanNode *aqo_node; /* Extension, which took a hook early can be executed early too. */ - if (prev_ExplainOneNode_hook) - prev_ExplainOneNode_hook(es, ps, plan); + if (aqo_ExplainOneNode_next) + aqo_ExplainOneNode_next(es, ps, plan); if (IsQueryDisabled() || !plan || es->format != EXPLAIN_FORMAT_TEXT) return; @@ -1042,59 +1096,20 @@ print_node_explain(ExplainState *es, PlanState *ps, Plan *plan) appendStringInfo(es->str, ", fss=%d", aqo_node->fss); } -/* - * Prints if the plan was constructed with AQO. - */ void -print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, - ExplainState *es, const char *queryString, - ParamListInfo params, const instr_time *planduration, - QueryEnvironment *queryEnv) +aqo_postprocessing_init(void) { - if (prev_ExplainOnePlan_hook) - prev_ExplainOnePlan_hook(plannedstmt, into, es, queryString, - params, planduration, queryEnv); - - if (IsQueryDisabled() || !aqo_show_details) - return; - - /* Report to user about aqo state only in verbose mode */ - ExplainPropertyBool("Using aqo", query_context.use_aqo, es); - - switch (aqo_mode) - { - case AQO_MODE_INTELLIGENT: - ExplainPropertyText("AQO mode", "INTELLIGENT", es); - break; - case AQO_MODE_FORCED: - ExplainPropertyText("AQO mode", "FORCED", es); - break; - case AQO_MODE_CONTROLLED: - ExplainPropertyText("AQO mode", "CONTROLLED", es); - break; - case AQO_MODE_LEARN: - ExplainPropertyText("AQO mode", "LEARN", es); - break; - case AQO_MODE_FROZEN: - ExplainPropertyText("AQO mode", "FROZEN", es); - break; - case AQO_MODE_DISABLED: - ExplainPropertyText("AQO mode", "DISABLED", es); - break; - default: - elog(ERROR, "Bad AQO state"); - break; - } - - /* - * Query class provides an user the conveniently use of the AQO - * auxiliary functions. - */ - if (aqo_mode != AQO_MODE_DISABLED || force_collect_stat) - { - if (aqo_show_hash) - ExplainPropertyInteger("Query hash", NULL, - query_context.query_hash, es); - ExplainPropertyInteger("JOINS", NULL, njoins, es); - } + /* Executor hooks */ + aqo_ExecutorStart_next = ExecutorStart_hook ? ExecutorStart_hook : standard_ExecutorStart; + ExecutorStart_hook = aqo_ExecutorStart; + aqo_ExecutorRun_next = ExecutorRun_hook ? ExecutorRun_hook : standard_ExecutorRun; + ExecutorRun_hook = aqo_ExecutorRun; + aqo_ExecutorEnd_next = ExecutorEnd_hook ? ExecutorEnd_hook : standard_ExecutorEnd; + ExecutorEnd_hook = aqo_ExecutorEnd; + + /* Service hooks. */ + aqo_ExplainOnePlan_next = ExplainOnePlan_hook; + ExplainOnePlan_hook = print_into_explain; + aqo_ExplainOneNode_next = ExplainOneNode_hook; + ExplainOneNode_hook = print_node_explain; } diff --git a/preprocessing.c b/preprocessing.c index aadc959e..36c23ba2 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -64,7 +64,6 @@ #include "parser/scansup.h" #include "aqo.h" #include "hash.h" -#include "preprocessing.h" #include "storage.h" /* List of feature spaces, that are processing in this backend. */ @@ -72,30 +71,12 @@ List *cur_classes = NIL; int aqo_join_threshold = 0; +static planner_hook_type aqo_planner_next = NULL; + +static void disable_aqo_for_query(void); static bool isQueryUsingSystemRelation(Query *query); static bool isQueryUsingSystemRelation_walker(Node *node, void *context); -/* - * Calls standard query planner or its previous hook. - */ -static PlannedStmt * -call_default_planner(Query *parse, - const char *query_string, - int cursorOptions, - ParamListInfo boundParams) -{ - if (prev_planner_hook) - return prev_planner_hook(parse, - query_string, - cursorOptions, - boundParams); - else - return standard_planner(parse, - query_string, - cursorOptions, - boundParams); -} - /* * Can AQO be used for the query? */ @@ -119,10 +100,8 @@ aqoIsEnabled(Query *parse) * Creates an entry in aqo_queries for new type of query if it is * necessary, i. e. AQO mode is "intelligent". */ -PlannedStmt * -aqo_planner(Query *parse, - const char *query_string, - int cursorOptions, +static PlannedStmt * +aqo_planner(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams) { bool query_is_stored = false; @@ -146,10 +125,7 @@ aqo_planner(Query *parse, */ disable_aqo_for_query(); - return call_default_planner(parse, - query_string, - cursorOptions, - boundParams); + return aqo_planner_next(parse, query_string, cursorOptions, boundParams); } selectivity_cache_clear(); @@ -175,10 +151,7 @@ aqo_planner(Query *parse, */ disable_aqo_for_query(); - return call_default_planner(parse, - query_string, - cursorOptions, - boundParams); + return aqo_planner_next(parse, query_string, cursorOptions, boundParams); } elog(DEBUG1, "AQO will be used for query '%s', class "UINT64_FORMAT, @@ -346,8 +319,7 @@ aqo_planner(Query *parse, { PlannedStmt *stmt; - stmt = call_default_planner(parse, query_string, - cursorOptions, boundParams); + stmt = aqo_planner_next(parse, query_string, cursorOptions, boundParams); /* Release the memory, allocated for AQO predictions */ MemoryContextReset(AQOPredictMemCtx); @@ -358,7 +330,7 @@ aqo_planner(Query *parse, /* * Turn off all AQO functionality for the current query. */ -void +static void disable_aqo_for_query(void) { query_context.learn_aqo = false; @@ -505,3 +477,10 @@ isQueryUsingSystemRelation_walker(Node *node, void *context) isQueryUsingSystemRelation_walker, context); } + +void +aqo_preprocessing_init(void) +{ + aqo_planner_next = planner_hook ? planner_hook : standard_planner; + planner_hook = aqo_planner; +} \ No newline at end of file diff --git a/preprocessing.h b/preprocessing.h deleted file mode 100644 index f27deb91..00000000 --- a/preprocessing.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef __PREPROCESSING_H__ -#define __PREPROCESSING_H__ - -#include "nodes/pathnodes.h" -#include "nodes/plannodes.h" -extern PlannedStmt *aqo_planner(Query *parse, - const char *query_string, - int cursorOptions, - ParamListInfo boundParams); -extern void disable_aqo_for_query(void); - -#endif /* __PREPROCESSING_H__ */ diff --git a/storage.c b/storage.c index be14f3e9..0bdee72d 100644 --- a/storage.c +++ b/storage.c @@ -22,11 +22,11 @@ #include "funcapi.h" #include "miscadmin.h" #include "pgstat.h" +#include "storage/ipc.h" #include "aqo.h" #include "aqo_shared.h" #include "machine_learning.h" -#include "preprocessing.h" #include "storage.h" diff --git a/storage.h b/storage.h index 35d94336..2b4e4cdd 100644 --- a/storage.h +++ b/storage.h @@ -164,4 +164,10 @@ extern void init_deactivated_queries_storage(void); extern bool query_is_deactivated(uint64 query_hash); extern void add_deactivated_query(uint64 query_hash); +/* Storage interaction */ +extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids); +extern bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids); + +extern bool update_query_timeout(uint64 queryid, int64 smart_timeout); + #endif /* STORAGE_H */ From 0bc82f26639f24ad1b577bdfb761a1a77f6d895e Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Sat, 25 Mar 2023 20:09:25 +0500 Subject: [PATCH 110/134] Bugfix. Switch off quickly all AQO features if queryId is disabled. One installcheck test was added into the github actions workflow. Reviewed by: @Anisimov-ds --- .github/workflows/installchecks.yml | 14 +++++++++++++- preprocessing.c | 2 +- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/.github/workflows/installchecks.yml b/.github/workflows/installchecks.yml index 075034a0..4a4d478b 100644 --- a/.github/workflows/installchecks.yml +++ b/.github/workflows/installchecks.yml @@ -132,12 +132,24 @@ jobs: psql -c "ALTER SYSTEM SET aqo.mode = 'learn'" psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" - learn_result=$(make -k installcheck-world) + make -k installcheck-world + + # Should work like a total off for all the AQO features + - name: installcheck_learn_queryid_off + continue-on-error: true + run: | + cd $PG_DIR + aqo_instance_launch.sh + psql -c "ALTER SYSTEM SET compute_query_id = 'off'" + psql -c "SELECT pg_reload_conf()" + # The AQO tests itself wouldn't pass + make -k installcheck-world - name: installcheck_intelligent continue-on-error: true run: | cd $PG_DIR + psql -c "ALTER SYSTEM SET compute_query_id = 'regress'" psql -c "ALTER SYSTEM SET aqo.mode = 'intelligent'" psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" diff --git a/preprocessing.c b/preprocessing.c index 36c23ba2..6e618ae9 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -83,7 +83,7 @@ static bool isQueryUsingSystemRelation_walker(Node *node, void *context); static bool aqoIsEnabled(Query *parse) { - if (creating_extension || + if (creating_extension || !IsQueryIdEnabled() || (aqo_mode == AQO_MODE_DISABLED && !force_collect_stat) || (parse->commandType != CMD_SELECT && parse->commandType != CMD_INSERT && parse->commandType != CMD_UPDATE && parse->commandType != CMD_DELETE)) From 8e346c590d7c48e9a73834a93245191c5831a7f8 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Sat, 25 Mar 2023 22:13:15 +0500 Subject: [PATCH 111/134] Enhancement. Report if someone external inserted a hook into the chain of AQO prediction hooks. It isn't a strict rule, but we should know about that. --- cardinality_hooks.c | 102 ++++++++++++++++++++++++++++++-------------- 1 file changed, 69 insertions(+), 33 deletions(-) diff --git a/cardinality_hooks.c b/cardinality_hooks.c index a86d5fa2..bd7a0b2b 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -40,6 +40,12 @@ double predicted_ppi_rows; double fss_ppi_hash; +/* + * Cardinality prediction hooks. + * It isn't clear what to do if someone else tries to live in this chain. + * Of course, someone may want to just report some stat or something like that. + * So, it can be legal, sometimees. So far, we only report this fact. + */ static set_baserel_rows_estimate_hook_type aqo_set_baserel_rows_estimate_next = NULL; static get_parameterized_baserel_size_hook_type aqo_get_parameterized_baserel_size_next = NULL; static set_joinrel_size_estimates_hook_type aqo_set_joinrel_size_estimates_next = NULL; @@ -95,12 +101,17 @@ aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) /* Return to the caller's memory context. */ MemoryContextSwitchTo(old_ctx_m); - if (predicted >= 0) - { - rel->rows = predicted; - rel->predicted_cardinality = predicted; - return; - } + if (predicted < 0) + goto default_estimator; + + if ((aqo_set_baserel_rows_estimate_next != set_baserel_rows_estimate_standard || + set_baserel_rows_estimate_hook != aqo_set_baserel_rows_estimate)) + /* It is unclear that to do in situation of such kind. Just report it */ + elog(WARNING, "AQO is in the middle of the set_baserel_rows_estimate_hook chain"); + + rel->rows = predicted; + rel->predicted_cardinality = predicted; + return; default_estimator: rel->predicted_cardinality = -1.; @@ -116,6 +127,11 @@ aqo_parampathinfo_postinit(ParamPathInfo *ppi) if (IsQueryDisabled()) return; + if ((aqo_set_parampathinfo_postinit_next != NULL || + parampathinfo_postinit_hook != aqo_parampathinfo_postinit)) + /* It is unclear that to do in situation of such kind. Just report it */ + elog(WARNING, "AQO is in the middle of the parampathinfo_postinit_hook chain"); + ppi->predicted_ppi_rows = predicted_ppi_rows; ppi->fss_ppi_hash = fss_ppi_hash; } @@ -199,8 +215,15 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, predicted_ppi_rows = predicted; fss_ppi_hash = fss; - if (predicted >= 0) - return predicted; + if (predicted < 0) + goto default_estimator; + + if ((aqo_get_parameterized_baserel_size_next != get_parameterized_baserel_size_standard || + get_parameterized_baserel_size_hook != aqo_get_parameterized_baserel_size)) + /* It is unclear that to do in situation of such kind. Just report it */ + elog(WARNING, "AQO is in the middle of the aqo_get_parameterized_baserel_size_next chain"); + + return predicted; default_estimator: return aqo_get_parameterized_baserel_size_next(root, rel, param_clauses); @@ -264,12 +287,17 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, rel->fss_hash = fss; - if (predicted >= 0) - { - rel->predicted_cardinality = predicted; - rel->rows = predicted; - return; - } + if (predicted < 0) + goto default_estimator; + + if ((aqo_set_joinrel_size_estimates_next != set_joinrel_size_estimates_standard || + set_joinrel_size_estimates_hook != aqo_set_joinrel_size_estimates)) + /* It is unclear that to do in situation of such kind. Just report it */ + elog(WARNING, "AQO is in the middle of the set_joinrel_size_estimates_hook chain"); + + rel->predicted_cardinality = predicted; + rel->rows = predicted; + return; default_estimator: rel->predicted_cardinality = -1; @@ -335,8 +363,15 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, predicted_ppi_rows = predicted; fss_ppi_hash = fss; - if (predicted >= 0) - return predicted; + if (predicted < 0) + goto default_estimator; + + if ((aqo_get_parameterized_joinrel_size_next != get_parameterized_joinrel_size_standard || + get_parameterized_joinrel_size_hook != aqo_get_parameterized_joinrel_size)) + /* It is unclear that to do in situation of such kind. Just report it */ + elog(WARNING, "AQO is in the middle of the get_parameterized_joinrel_size_hook chain"); + + return predicted; default_estimator: return aqo_get_parameterized_joinrel_size_next(root, rel, @@ -394,13 +429,15 @@ aqo_estimate_num_groups(PlannerInfo *root, List *groupExprs, /* XXX: Don't support some GROUPING options */ goto default_estimator; - if (aqo_estimate_num_groups_next != NULL) - elog(WARNING, "AQO replaced another estimator of a groups number"); - /* Zero the estinfo output parameter, if non-NULL */ if (estinfo != NULL) memset(estinfo, 0, sizeof(EstimationInfo)); + if (aqo_estimate_num_groups_next != NULL || + estimate_num_groups_hook != aqo_estimate_num_groups) + /* It is unclear that to do in situation of such kind. Just report it */ + elog(WARNING, "AQO is in the middle of the estimate_num_groups_hook chain"); + if (groupExprs == NIL) return 1.0; @@ -436,29 +473,28 @@ aqo_estimate_num_groups(PlannerInfo *root, List *groupExprs, void aqo_cardinality_hooks_init(void) { - - /* Cardinality prediction hooks. */ - aqo_set_baserel_rows_estimate_next = set_baserel_rows_estimate_hook ? - set_baserel_rows_estimate_hook : - set_baserel_rows_estimate_standard; + if (set_baserel_rows_estimate_hook || + set_foreign_rows_estimate_hook || + get_parameterized_baserel_size_hook || + set_joinrel_size_estimates_hook || + get_parameterized_joinrel_size_hook || + parampathinfo_postinit_hook || + estimate_num_groups_hook) + elog(ERROR, "AQO estimation hooks shouldn't be intercepted"); + + aqo_set_baserel_rows_estimate_next = set_baserel_rows_estimate_standard; set_baserel_rows_estimate_hook = aqo_set_baserel_rows_estimate; /* XXX: we have a problem here. Should be redesigned later */ set_foreign_rows_estimate_hook = aqo_set_baserel_rows_estimate; - aqo_get_parameterized_baserel_size_next = get_parameterized_baserel_size_hook ? - get_parameterized_baserel_size_hook : - get_parameterized_baserel_size_standard; + aqo_get_parameterized_baserel_size_next = get_parameterized_baserel_size_standard; get_parameterized_baserel_size_hook = aqo_get_parameterized_baserel_size; - aqo_set_joinrel_size_estimates_next = set_joinrel_size_estimates_hook ? - set_joinrel_size_estimates_hook : - set_joinrel_size_estimates_standard; + aqo_set_joinrel_size_estimates_next = set_joinrel_size_estimates_standard; set_joinrel_size_estimates_hook = aqo_set_joinrel_size_estimates; - aqo_get_parameterized_joinrel_size_next = get_parameterized_joinrel_size_hook ? - get_parameterized_joinrel_size_hook : - get_parameterized_joinrel_size_standard; + aqo_get_parameterized_joinrel_size_next = get_parameterized_joinrel_size_standard; get_parameterized_joinrel_size_hook = aqo_get_parameterized_joinrel_size; aqo_set_parampathinfo_postinit_next = parampathinfo_postinit_hook; From 8bf4c445e7c570fdff5b67876ef856b3e0499c04 Mon Sep 17 00:00:00 2001 From: Daniil Anisimov Date: Tue, 28 Mar 2023 12:23:02 +0700 Subject: [PATCH 112/134] Fix. Conventionally use of hooks. Also, some arrangement for stable14 added by a.lepikhov --- aqo.c | 40 ---------------------------------------- aqo_shared.c | 10 +++------- cardinality_hooks.c | 10 +++++----- path_utils.c | 2 +- postprocessing.c | 10 +++++----- preprocessing.c | 6 +++--- 6 files changed, 17 insertions(+), 61 deletions(-) diff --git a/aqo.c b/aqo.c index 86a37ccf..0abb3c2f 100644 --- a/aqo.c +++ b/aqo.c @@ -315,50 +315,11 @@ _PG_init(void) NULL, NULL); -<<<<<<< HEAD - prev_shmem_startup_hook = shmem_startup_hook; - shmem_startup_hook = aqo_init_shmem; - prev_planner_hook = planner_hook; - planner_hook = aqo_planner; - prev_ExecutorStart_hook = ExecutorStart_hook; - ExecutorStart_hook = aqo_ExecutorStart; - prev_ExecutorRun = ExecutorRun_hook; - ExecutorRun_hook = aqo_ExecutorRun; - prev_ExecutorEnd_hook = ExecutorEnd_hook; - ExecutorEnd_hook = aqo_ExecutorEnd; - - /* Cardinality prediction hooks. */ - prev_set_baserel_rows_estimate_hook = set_baserel_rows_estimate_hook; - set_foreign_rows_estimate_hook = aqo_set_baserel_rows_estimate; - set_baserel_rows_estimate_hook = aqo_set_baserel_rows_estimate; - prev_get_parameterized_baserel_size_hook = get_parameterized_baserel_size_hook; - get_parameterized_baserel_size_hook = aqo_get_parameterized_baserel_size; - prev_set_joinrel_size_estimates_hook = set_joinrel_size_estimates_hook; - set_joinrel_size_estimates_hook = aqo_set_joinrel_size_estimates; - prev_get_parameterized_joinrel_size_hook = get_parameterized_joinrel_size_hook; - get_parameterized_joinrel_size_hook = aqo_get_parameterized_joinrel_size; - prev_estimate_num_groups_hook = estimate_num_groups_hook; - estimate_num_groups_hook = aqo_estimate_num_groups_hook; - parampathinfo_postinit_hook = ppi_hook; - - prev_create_plan_hook = create_plan_hook; - create_plan_hook = aqo_create_plan_hook; - - /* Service hooks. */ - prev_ExplainOnePlan_hook = ExplainOnePlan_hook; - ExplainOnePlan_hook = print_into_explain; - prev_ExplainOneNode_hook = ExplainOneNode_hook; - ExplainOneNode_hook = print_node_explain; - - prev_create_upper_paths_hook = create_upper_paths_hook; - create_upper_paths_hook = aqo_store_upper_signature_hook; -======= aqo_shmem_init(); aqo_preprocessing_init(); aqo_postprocessing_init(); aqo_cardinality_hooks_init(); aqo_path_utils_init(); ->>>>>>> daf05a0 (Bugfix. Do away with possible conflict of hooks, declared as 'extern' in) init_deactivated_queries_storage(); @@ -393,7 +354,6 @@ _PG_init(void) RegisterAQOPlanNodeMethods(); EmitWarningsOnPlaceholders("aqo"); - RequestAddinShmemSpace(aqo_memsize()); } /* diff --git a/aqo_shared.c b/aqo_shared.c index d704cf76..b7cfced8 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -18,7 +18,6 @@ int fs_max_items = 10000; /* Max number of different feature spaces in ML model int fss_max_items = 100000; /* Max number of different feature subspaces in ML model */ static shmem_startup_hook_type aqo_shmem_startup_next = NULL; -static shmem_request_hook_type aqo_shmem_request_next = NULL; static void on_shmem_shutdown(int code, Datum arg); @@ -29,7 +28,7 @@ aqo_init_shmem(void) HASHCTL info; if (aqo_shmem_startup_next) - aqo_shmem_startup_next(); + (*aqo_shmem_startup_next)(); aqo_state = NULL; stat_htab = NULL; @@ -128,9 +127,6 @@ aqo_shmem_request(void) { Size size; - if (aqo_shmem_request_next) - aqo_shmem_request_next(); - size = MAXALIGN(sizeof(AQOSharedState)); size = add_size(size, hash_estimate_size(fs_max_items, sizeof(AQOSharedState))); size = add_size(size, hash_estimate_size(fs_max_items, sizeof(StatEntry))); @@ -146,6 +142,6 @@ aqo_shmem_init(void) { aqo_shmem_startup_next = shmem_startup_hook; shmem_startup_hook = aqo_init_shmem; - aqo_shmem_request_next = shmem_request_hook; - shmem_request_hook = aqo_shmem_request; + + aqo_shmem_request(); } diff --git a/cardinality_hooks.c b/cardinality_hooks.c index bd7a0b2b..f0d745bb 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -115,7 +115,7 @@ aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) default_estimator: rel->predicted_cardinality = -1.; - aqo_set_baserel_rows_estimate_next(root, rel); + (*aqo_set_baserel_rows_estimate_next)(root, rel); } static void @@ -226,7 +226,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, return predicted; default_estimator: - return aqo_get_parameterized_baserel_size_next(root, rel, param_clauses); + return (*aqo_get_parameterized_baserel_size_next)(root, rel, param_clauses); } /* @@ -301,7 +301,7 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, default_estimator: rel->predicted_cardinality = -1; - aqo_set_joinrel_size_estimates_next(root, rel, outer_rel, inner_rel, + (*aqo_set_joinrel_size_estimates_next)(root, rel, outer_rel, inner_rel, sjinfo, restrictlist); } @@ -374,7 +374,7 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, return predicted; default_estimator: - return aqo_get_parameterized_joinrel_size_next(root, rel, + return (*aqo_get_parameterized_joinrel_size_next)(root, rel, outer_path, inner_path, sjinfo, clauses); } @@ -463,7 +463,7 @@ aqo_estimate_num_groups(PlannerInfo *root, List *groupExprs, default_estimator: if (aqo_estimate_num_groups_next) - return aqo_estimate_num_groups_next(root, groupExprs, subpath, + return (*aqo_estimate_num_groups_next)(root, groupExprs, subpath, grouped_rel, pgset, estinfo); else return estimate_num_groups(root, groupExprs, subpath->rows, diff --git a/path_utils.c b/path_utils.c index 8f49f3ae..351aa66a 100644 --- a/path_utils.c +++ b/path_utils.c @@ -525,7 +525,7 @@ aqo_create_plan(PlannerInfo *root, Path *src, Plan **dest) AQOPlanNode *node; if (aqo_create_plan_next) - aqo_create_plan_next(root, src, dest); + (*aqo_create_plan_next)(root, src, dest); if (!query_context.use_aqo && !query_context.learn_aqo && !query_context.collect_stat) diff --git a/postprocessing.c b/postprocessing.c index d4763955..ba2e19e0 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -600,7 +600,7 @@ aqo_ExecutorStart(QueryDesc *queryDesc, int eflags) StoreToQueryEnv(queryDesc); } - aqo_ExecutorStart_next(queryDesc, eflags); + (*aqo_ExecutorStart_next)(queryDesc, eflags); if (use_aqo) StorePlanInternals(queryDesc); @@ -725,7 +725,7 @@ aqo_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, PG_TRY(); { - aqo_ExecutorRun_next(queryDesc, direction, count, execute_once); + (*aqo_ExecutorRun_next)(queryDesc, direction, count, execute_once); } PG_FINALLY(); { @@ -841,7 +841,7 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) MemoryContextSwitchTo(oldctx); MemoryContextReset(AQOLearnMemCtx); - aqo_ExecutorEnd_next(queryDesc); + (*aqo_ExecutorEnd_next)(queryDesc); /* * standard_ExecutorEnd clears the queryDesc->planstate. After this point no @@ -982,7 +982,7 @@ print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, QueryEnvironment *queryEnv) { if (aqo_ExplainOnePlan_next) - aqo_ExplainOnePlan_next(plannedstmt, into, es, queryString, + (*aqo_ExplainOnePlan_next)(plannedstmt, into, es, queryString, params, planduration, queryEnv); if (IsQueryDisabled() || !aqo_show_details) @@ -1038,7 +1038,7 @@ print_node_explain(ExplainState *es, PlanState *ps, Plan *plan) /* Extension, which took a hook early can be executed early too. */ if (aqo_ExplainOneNode_next) - aqo_ExplainOneNode_next(es, ps, plan); + (*aqo_ExplainOneNode_next)(es, ps, plan); if (IsQueryDisabled() || !plan || es->format != EXPLAIN_FORMAT_TEXT) return; diff --git a/preprocessing.c b/preprocessing.c index 6e618ae9..03c3432a 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -125,7 +125,7 @@ aqo_planner(Query *parse, const char *query_string, int cursorOptions, */ disable_aqo_for_query(); - return aqo_planner_next(parse, query_string, cursorOptions, boundParams); + return (*aqo_planner_next)(parse, query_string, cursorOptions, boundParams); } selectivity_cache_clear(); @@ -151,7 +151,7 @@ aqo_planner(Query *parse, const char *query_string, int cursorOptions, */ disable_aqo_for_query(); - return aqo_planner_next(parse, query_string, cursorOptions, boundParams); + return (*aqo_planner_next)(parse, query_string, cursorOptions, boundParams); } elog(DEBUG1, "AQO will be used for query '%s', class "UINT64_FORMAT, @@ -319,7 +319,7 @@ aqo_planner(Query *parse, const char *query_string, int cursorOptions, { PlannedStmt *stmt; - stmt = aqo_planner_next(parse, query_string, cursorOptions, boundParams); + stmt = (*aqo_planner_next)(parse, query_string, cursorOptions, boundParams); /* Release the memory, allocated for AQO predictions */ MemoryContextReset(AQOPredictMemCtx); From bc72825b2977576c6464168d9640c3a4dc4c55aa Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Thu, 13 Apr 2023 15:31:17 +0500 Subject: [PATCH 113/134] Skip 'DROP EXTENSION' test in 001_pgbench.pl because of unstability on Windows --- t/001_pgbench.pl | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index cb6b76de..def7786e 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -403,8 +403,16 @@ "); $node->restart(); -$node->command_ok([ 'pgbench', '-T', +# Some specifics of core PostgreSQL pgbench code don't allow to stable pass this +# test on Windows OS. +# See https://www.postgresql.org/message-id/flat/8225e78650dd69f69c8cff37ecce9a09%40postgrespro.ru +SKIP: +{ + skip "Socket allocation issues. ", 1 + if ($windows_os); + $node->command_ok([ 'pgbench', '-T', "50", '-c', "$CLIENTS", '-j', "$THREADS" , '-f', "$bank"], 'Conflicts with an AQO dropping command.'); +} $node->stop(); From 89206150c3182e7771e9229d7da3eea2d2cba98b Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Thu, 20 Apr 2023 13:43:48 +0500 Subject: [PATCH 114/134] Enhancement. Buildfarm have detected curious unstability in the parallel_workers test: EXPLAIN of Partial Aggregate sometimes showed 0 rows instead 1. It is a race: parallel workers ran when main process have read all underlying tuples. Use explain without analyze to avoid such a problem. As I see, we don't lose anything important. --- expected/parallel_workers.out | 37 +++++++++++++++++------------------ sql/parallel_workers.sql | 5 ++--- 2 files changed, 20 insertions(+), 22 deletions(-) diff --git a/expected/parallel_workers.out b/expected/parallel_workers.out index 3e408f49..c64aed61 100644 --- a/expected/parallel_workers.out +++ b/expected/parallel_workers.out @@ -68,53 +68,52 @@ WHERE q1.id = q2.id; -- Learning stage -- XXX: Why grouping prediction isn't working here? SELECT str FROM expln(' -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) FROM (SELECT id FROM t WHERE id % 100 = 0 GROUP BY (id)) AS q1, (SELECT max(id) AS id, payload FROM t WHERE id % 101 = 0 GROUP BY (payload)) AS q2 WHERE q1.id = q2.id;') AS str -WHERE str NOT LIKE '%Workers%' AND str NOT LIKE '%Sort Method%' - AND str NOT LIKE '%Gather Merge%'; +WHERE str NOT LIKE '%Workers%'; str -------------------------------------------------------------------------------------------------- - Aggregate (actual rows=1 loops=1) + Aggregate AQO not used - -> Merge Join (actual rows=0 loops=1) + -> Merge Join AQO not used Merge Cond: (q2.id = t_1.id) - -> Sort (actual rows=1 loops=1) + -> Sort Sort Key: q2.id - -> Subquery Scan on q2 (actual rows=1 loops=1) + -> Subquery Scan on q2 AQO not used - -> Finalize GroupAggregate (actual rows=1 loops=1) + -> Finalize GroupAggregate AQO not used Group Key: t.payload + -> Gather Merge AQO not used - -> Partial GroupAggregate (actual rows=1 loops=3) + -> Partial GroupAggregate AQO not used Group Key: t.payload - -> Sort (actual rows=330 loops=3) + -> Sort AQO not used Sort Key: t.payload - -> Parallel Seq Scan on t (actual rows=330 loops=3) - AQO: rows=991, error=0% + -> Parallel Seq Scan on t + AQO: rows=991 Filter: ((id % '101'::numeric) = '0'::numeric) - Rows Removed by Filter: 33003 - -> Group (actual rows=1000 loops=1) + -> Group AQO not used Group Key: t_1.id + -> Gather Merge AQO not used - -> Group (actual rows=333 loops=3) + -> Group AQO not used Group Key: t_1.id - -> Sort (actual rows=333 loops=3) + -> Sort AQO not used Sort Key: t_1.id - -> Parallel Seq Scan on t t_1 (actual rows=333 loops=3) - AQO: rows=991, error=-1% + -> Parallel Seq Scan on t t_1 + AQO: rows=991 Filter: ((id % '100'::numeric) = '0'::numeric) - Rows Removed by Filter: 33000 Using aqo: true AQO mode: LEARN JOINS: 1 diff --git a/sql/parallel_workers.sql b/sql/parallel_workers.sql index 2cd04bc2..419f23e6 100644 --- a/sql/parallel_workers.sql +++ b/sql/parallel_workers.sql @@ -43,14 +43,13 @@ SELECT count(*) FROM WHERE q1.id = q2.id; -- Learning stage -- XXX: Why grouping prediction isn't working here? SELECT str FROM expln(' -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) FROM (SELECT id FROM t WHERE id % 100 = 0 GROUP BY (id)) AS q1, (SELECT max(id) AS id, payload FROM t WHERE id % 101 = 0 GROUP BY (payload)) AS q2 WHERE q1.id = q2.id;') AS str -WHERE str NOT LIKE '%Workers%' AND str NOT LIKE '%Sort Method%' - AND str NOT LIKE '%Gather Merge%'; +WHERE str NOT LIKE '%Workers%'; RESET parallel_tuple_cost; RESET parallel_setup_cost; From dcc5ec29849e226023ca5a9d78cb16c7b224e80b Mon Sep 17 00:00:00 2001 From: Daniil Anisimov Date: Tue, 11 Apr 2023 01:16:24 +0700 Subject: [PATCH 115/134] Bugfix. Correctly use of a routine for joins counting. --- expected/aqo_fdw.out | 2 +- expected/feature_subspace.out | 4 ++-- expected/look_a_like.out | 20 ++++++++++---------- expected/unsupported.out | 2 +- postprocessing.c | 2 +- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out index 69c1b132..ca69fab4 100644 --- a/expected/aqo_fdw.out +++ b/expected/aqo_fdw.out @@ -120,7 +120,7 @@ SELECT str FROM expln(' AQO not used Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 (14 rows) -- Should learn on postgres_fdw nodes diff --git a/expected/feature_subspace.out b/expected/feature_subspace.out index a53b57e7..eceb0eb1 100644 --- a/expected/feature_subspace.out +++ b/expected/feature_subspace.out @@ -43,7 +43,7 @@ WHERE str NOT LIKE '%Memory%'; AQO not used Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 (14 rows) -- TODO: Using method of other classes neighbours we get a bad estimation. @@ -66,7 +66,7 @@ WHERE str NOT LIKE '%Memory%'; AQO not used Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 (14 rows) -- Look into the reason: two JOINs from different classes have the same FSS. diff --git a/expected/look_a_like.out b/expected/look_a_like.out index fb76fdd6..9e3dc286 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -56,7 +56,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L Rows Removed by Filter: 900 Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 (16 rows) SELECT str AS result @@ -83,7 +83,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L Rows Removed by Filter: 900 Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 (19 rows) SELECT str AS result @@ -108,7 +108,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L Rows Removed by Filter: 500 Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 (17 rows) --query contains nodes that have already been predicted @@ -134,7 +134,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L Rows Removed by Filter: 500 Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 (17 rows) SELECT str AS result @@ -159,7 +159,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L Rows Removed by Filter: 300 Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 (17 rows) SELECT str AS result @@ -184,7 +184,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L Rows Removed by Filter: 600 Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 (17 rows) SELECT str AS result @@ -209,7 +209,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L Rows Removed by Filter: 500 Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 (17 rows) --query contains nodes that have already been predicted @@ -235,7 +235,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L Rows Removed by Filter: 600 Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 (17 rows) SELECT str AS result @@ -516,7 +516,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L Output: b.y1, b.y2, b.y3 Using aqo: true AQO mode: LEARN - JOINS: 1 + JOINS: 2 (24 rows) SELECT str AS result @@ -548,7 +548,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L Output: c.z1, c.z2, c.z3 Using aqo: true AQO mode: LEARN - JOINS: 1 + JOINS: 2 (24 rows) RESET aqo.wide_search; diff --git a/expected/unsupported.out b/expected/unsupported.out index a1a6f4ae..6e45dcd8 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -487,7 +487,7 @@ SELECT * FROM Filter: (x > 20) Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 (13 rows) -- AQO needs to predict total fetched tuples in a table. diff --git a/postprocessing.c b/postprocessing.c index ba2e19e0..6850cde4 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -916,7 +916,7 @@ StorePlanInternals(QueryDesc *queryDesc) MemoryContext oldctx = MemoryContextSwitchTo(AQOCacheMemCtx); njoins = 0; - planstate_tree_walker(queryDesc->planstate, calculateJoinNum, &njoins); + calculateJoinNum(queryDesc->planstate, &njoins); if (queryDesc->queryEnv == NULL) queryDesc->queryEnv = create_queryEnv(); From 1228c999bbd4b043447dc901934a25ac87804f63 Mon Sep 17 00:00:00 2001 From: Daniil Anisimov Date: Thu, 20 Apr 2023 13:49:32 +0700 Subject: [PATCH 116/134] Add the routine for safe update. Reviewed by: @Alena0704 --- storage.c | 43 ++++++++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/storage.c b/storage.c index 0bdee72d..af368aa1 100644 --- a/storage.c +++ b/storage.c @@ -74,8 +74,12 @@ HTAB *data_htab = NULL; dsa_area *data_dsa = NULL; HTAB *deactivated_queries = NULL; -/* Used to check data file consistency */ -static const uint32 PGAQO_FILE_HEADER = 123467589; +/* + * Used to check data file consistency + * When changing data structures, PGAQO_FILE_HEADER should also be changed. + * In this case, all AQO file storages will be reset. + */ +static const uint32 PGAQO_FILE_HEADER = 0x20230330; static const uint32 PGAQO_PG_MAJOR_VERSION = PG_VERSION_NUM / 100; /* @@ -374,7 +378,7 @@ aqo_query_stat(PG_FUNCTION_ARGS) Datum values[TOTAL_NCOLS + 1]; bool nulls[TOTAL_NCOLS + 1]; HASH_SEQ_STATUS hash_seq; - StatEntry *entry; + StatEntry *entry; /* check to see if caller supports us returning a tuplestore */ if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) @@ -393,7 +397,9 @@ aqo_query_stat(PG_FUNCTION_ARGS) /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); - Assert(tupDesc->natts == TOTAL_NCOLS); + + if (tupDesc->natts != TOTAL_NCOLS) + elog(ERROR, "[AQO] Incorrect number of output arguments"); tupstore = tuplestore_begin_heap(true, false, work_mem); rsinfo->returnMode = SFRM_Materialize; @@ -1160,7 +1166,9 @@ aqo_query_texts(PG_FUNCTION_ARGS) /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); - Assert(tupDesc->natts == QT_TOTAL_NCOLS); + + if (tupDesc->natts != QT_TOTAL_NCOLS) + elog(ERROR, "[AQO] Incorrect number of output arguments"); tupstore = tuplestore_begin_heap(true, false, work_mem); rsinfo->returnMode = SFRM_Materialize; @@ -1756,7 +1764,9 @@ aqo_data(PG_FUNCTION_ARGS) /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); - Assert(tupDesc->natts == AD_TOTAL_NCOLS); + + if (tupDesc->natts != AD_TOTAL_NCOLS) + elog(ERROR, "[AQO] Incorrect number of output arguments"); tupstore = tuplestore_begin_heap(true, false, work_mem); rsinfo->returnMode = SFRM_Materialize; @@ -1916,7 +1926,9 @@ aqo_queries(PG_FUNCTION_ARGS) /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); - Assert(tupDesc->natts == AQ_TOTAL_NCOLS); + + if (tupDesc->natts != AQ_TOTAL_NCOLS) + elog(ERROR, "[AQO] Incorrect number of output arguments"); tupstore = tuplestore_begin_heap(true, false, work_mem); rsinfo->returnMode = SFRM_Materialize; @@ -2379,7 +2391,8 @@ aqo_cleanup(PG_FUNCTION_ARGS) if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); - Assert(tupDesc->natts == 2); + if (tupDesc->natts != 2) + elog(ERROR, "[AQO] Incorrect number of output arguments"); /* * Make forced cleanup: if at least one fss isn't actual, remove parent FS @@ -2490,7 +2503,9 @@ aqo_cardinality_error(PG_FUNCTION_ARGS) /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); - Assert(tupDesc->natts == AQE_TOTAL_NCOLS); + + if (tupDesc->natts != AQE_TOTAL_NCOLS) + elog(ERROR, "[AQO] Incorrect number of output arguments"); tupstore = tuplestore_begin_heap(true, false, work_mem); rsinfo->returnMode = SFRM_Materialize; @@ -2558,8 +2573,8 @@ aqo_execution_time(PG_FUNCTION_ARGS) MemoryContext per_query_ctx; MemoryContext oldcontext; Tuplestorestate *tupstore; - Datum values[AQE_TOTAL_NCOLS]; - bool nulls[AQE_TOTAL_NCOLS]; + Datum values[ET_TOTAL_NCOLS]; + bool nulls[ET_TOTAL_NCOLS]; HASH_SEQ_STATUS hash_seq; QueriesEntry *qentry; StatEntry *sentry; @@ -2582,7 +2597,9 @@ aqo_execution_time(PG_FUNCTION_ARGS) /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); - Assert(tupDesc->natts == ET_TOTAL_NCOLS); + + if (tupDesc->natts != ET_TOTAL_NCOLS) + elog(ERROR, "[AQO] Incorrect number of output arguments"); tupstore = tuplestore_begin_heap(true, false, work_mem); rsinfo->returnMode = SFRM_Materialize; @@ -2715,7 +2732,7 @@ aqo_query_stat_update(PG_FUNCTION_ARGS) PG_ARGISNULL(EST_ERROR)) PG_RETURN_BOOL(false); - queryid = PG_GETARG_INT64(AQ_QUERYID); + queryid = PG_GETARG_INT64(QUERYID); stat_arg.execs_with_aqo = PG_GETARG_INT64(NEXECS_AQO); stat_arg.execs_without_aqo = PG_GETARG_INT64(NEXECS); if (queryid == 0 || stat_arg.execs_with_aqo < 0 || From 7d86f947016567e5831e342810727e368e02bde7 Mon Sep 17 00:00:00 2001 From: Daniil Anisimov Date: Thu, 20 Apr 2023 13:56:21 +0700 Subject: [PATCH 117/134] Add small bugfixes and refactoring. Reviewed by: @Alena0704 --- aqo.c | 1 - aqo.h | 1 - hash.c | 28 ++++++++++++++-------------- postprocessing.c | 4 ++-- preprocessing.c | 2 +- storage.c | 36 +++++++++++++++++------------------- 6 files changed, 34 insertions(+), 38 deletions(-) diff --git a/aqo.c b/aqo.c index 0abb3c2f..f09d360b 100644 --- a/aqo.c +++ b/aqo.c @@ -61,7 +61,6 @@ static const struct config_enum_entry format_options[] = { }; /* Parameters of autotuning */ -int aqo_stat_size = STAT_SAMPLE_SIZE; int auto_tuning_window_size = 5; double auto_tuning_exploration = 0.1; int auto_tuning_max_iterations = 50; diff --git a/aqo.h b/aqo.h index 6f57a4d1..85c3f3b2 100644 --- a/aqo.h +++ b/aqo.h @@ -211,7 +211,6 @@ extern double predicted_ppi_rows; extern double fss_ppi_hash; /* Parameters of autotuning */ -extern int aqo_stat_size; extern int auto_tuning_window_size; extern double auto_tuning_exploration; extern int auto_tuning_max_iterations; diff --git a/hash.c b/hash.c index fe7da8ee..e24d405c 100644 --- a/hash.c +++ b/hash.c @@ -326,7 +326,7 @@ get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash) /* * Computes hash for given string. */ -int +static int get_str_hash(const char *str) { return DatumGetInt32(hash_any((const unsigned char *) str, @@ -363,7 +363,7 @@ get_int_array_hash(int *arr, int len) * Sorts given array in-place to compute hash. * The hash is order-insensitive. */ -int +static int get_unsorted_unsafe_int_array_hash(int *arr, int len) { qsort(arr, len, sizeof(*arr), int_cmp); @@ -378,7 +378,7 @@ get_unsorted_unsafe_int_array_hash(int *arr, int len) * using 'hash_any'. * Frees allocated memory before returning hash. */ -int +static int get_unordered_int_list_hash(List *lst) { int i = 0; @@ -430,7 +430,7 @@ replace_patterns(const char *str, const char *start_pattern, * Computes hash for given feature subspace. * Hash is supposed to be clause-order-insensitive. */ -int +static int get_fss_hash(int clauses_hash, int eclasses_hash, int relidslist_hash) { int hashes[3]; @@ -499,7 +499,7 @@ remove_locations(const char *str) * Returns index of given value in given sorted integer array * or -1 if not found. */ -int +static int get_id_in_sorted_int_array(int val, int n, int *arr) { int *i; @@ -518,7 +518,7 @@ get_id_in_sorted_int_array(int val, int n, int *arr) * Returns class of equivalence for given argument hash or 0 if such hash * does not belong to any equivalence class. */ -int +static int get_arg_eclass(int arg_hash, int nargs, int *args_hash, int *eclass_hash) { int di = get_id_in_sorted_int_array(arg_hash, nargs, args_hash); @@ -533,7 +533,7 @@ get_arg_eclass(int arg_hash, int nargs, int *args_hash, int *eclass_hash) * Builds list of non-constant arguments of equivalence clauses * of given clauselist. */ -void +static void get_clauselist_args(List *clauselist, int *nargs, int **args_hash) { RestrictInfo *rinfo; @@ -579,7 +579,7 @@ get_clauselist_args(List *clauselist, int *nargs, int **args_hash) /* * Returns class of an object in disjoint set. */ -int +static int disjoint_set_get_parent(int *p, int v) { if (p[v] == -1) @@ -591,7 +591,7 @@ disjoint_set_get_parent(int *p, int v) /* * Merges two equivalence classes in disjoint set. */ -void +static void disjoint_set_merge_eclasses(int *p, int v1, int v2) { int p1, @@ -611,7 +611,7 @@ disjoint_set_merge_eclasses(int *p, int v1, int v2) /* * Constructs disjoint set on arguments. */ -int * +static int * perform_eclasses_join(List *clauselist, int nargs, int *args_hash) { RestrictInfo *rinfo; @@ -688,7 +688,7 @@ get_eclasses(List *clauselist, int *nargs, int **args_hash, int **eclass_hash) /* * Checks whether the given char is brace, i. e. '{' or '}'. */ -bool +static bool is_brace(char ch) { return ch == '{' || ch == '}'; @@ -697,7 +697,7 @@ is_brace(char ch) /* * Returns whether arguments list contain constants. */ -bool +static bool has_consts(List *lst) { ListCell *l; @@ -711,7 +711,7 @@ has_consts(List *lst) /* * Returns pointer on the args list in clause or NULL. */ -List ** +static List ** get_clause_args_ptr(Expr *clause) { switch (clause->type) @@ -737,7 +737,7 @@ get_clause_args_ptr(Expr *clause) /* * Returns whether the clause is an equivalence clause. */ -bool +static bool clause_is_eq_clause(Expr *clause) { /* TODO: fix this horrible mess */ diff --git a/postprocessing.c b/postprocessing.c index 6850cde4..66aca901 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -173,7 +173,7 @@ learn_sample(aqo_obj_stat *ctx, RelSortOut *rels, * For given node specified by clauselist, relidslist and join_type restores * the same selectivities of clauses as were used at query optimization stage. */ -List * +static List * restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, bool was_parametrized) { @@ -336,7 +336,7 @@ should_learn(PlanState *ps, AQOPlanNode *node, aqo_obj_stat *ctx, "predicted rows: %.0lf, updated prediction: %.0lf", query_context.query_hash, node->fss, predicted, nrows); - *rfactor = 0.9 * (RELIABILITY_MAX - RELIABILITY_MIN); + *rfactor = RELIABILITY_MIN + 0.9 * (RELIABILITY_MAX - RELIABILITY_MIN); return true; } } diff --git a/preprocessing.c b/preprocessing.c index 03c3432a..ef41ab0e 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -69,7 +69,7 @@ /* List of feature spaces, that are processing in this backend. */ List *cur_classes = NIL; -int aqo_join_threshold = 0; +int aqo_join_threshold = 3; static planner_hook_type aqo_planner_next = NULL; diff --git a/storage.c b/storage.c index af368aa1..17f97555 100644 --- a/storage.c +++ b/storage.c @@ -100,7 +100,7 @@ static bool _aqo_stat_remove(uint64 queryid); static bool _aqo_queries_remove(uint64 queryid); static bool _aqo_qtexts_remove(uint64 queryid); static bool _aqo_data_remove(data_key *key); -static bool neirest_neighbor(double **matrix, int old_rows, double *neighbor, int cols); +static bool nearest_neighbor(double **matrix, int old_rows, double *neighbor, int cols); static double fs_distance(double *a, double *b, int len); PG_FUNCTION_INFO_V1(aqo_query_stat); @@ -143,7 +143,7 @@ update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids) /* * Forms ArrayType object for storage from simple C-array matrix. */ -ArrayType * +static ArrayType * form_matrix(double *matrix, int nrows, int ncols) { Datum *elems; @@ -375,8 +375,8 @@ aqo_query_stat(PG_FUNCTION_ARGS) MemoryContext per_query_ctx; MemoryContext oldcontext; Tuplestorestate *tupstore; - Datum values[TOTAL_NCOLS + 1]; - bool nulls[TOTAL_NCOLS + 1]; + Datum values[TOTAL_NCOLS]; + bool nulls[TOTAL_NCOLS]; HASH_SEQ_STATUS hash_seq; StatEntry *entry; @@ -408,13 +408,11 @@ aqo_query_stat(PG_FUNCTION_ARGS) MemoryContextSwitchTo(oldcontext); - memset(nulls, 0, TOTAL_NCOLS + 1); + memset(nulls, 0, TOTAL_NCOLS); LWLockAcquire(&aqo_state->stat_lock, LW_SHARED); hash_seq_init(&hash_seq, stat_htab); while ((entry = hash_seq_search(&hash_seq)) != NULL) { - memset(nulls, 0, TOTAL_NCOLS + 1); - values[QUERYID] = Int64GetDatum(entry->queryid); values[NEXECS] = Int64GetDatum(entry->execs_without_aqo); values[NEXECS_AQO] = Int64GetDatum(entry->execs_with_aqo); @@ -1507,8 +1505,8 @@ fs_distance(double *a, double *b, int len) return res; } -bool -neirest_neighbor(double **matrix, int old_rows, double *neibour, int cols) +static bool +nearest_neighbor(double **matrix, int old_rows, double *neibour, int cols) { int i; for (i=0; irows; i++) { - if (k < aqo_K && !neirest_neighbor(data->matrix, old_rows, + if (k < aqo_K && !nearest_neighbor(data->matrix, old_rows, temp_data->matrix[i], data->cols)) { @@ -1904,8 +1902,8 @@ aqo_queries(PG_FUNCTION_ARGS) MemoryContext per_query_ctx; MemoryContext oldcontext; Tuplestorestate *tupstore; - Datum values[AQ_TOTAL_NCOLS + 1]; - bool nulls[AQ_TOTAL_NCOLS + 1]; + Datum values[AQ_TOTAL_NCOLS]; + bool nulls[AQ_TOTAL_NCOLS]; HASH_SEQ_STATUS hash_seq; QueriesEntry *entry; @@ -1937,12 +1935,12 @@ aqo_queries(PG_FUNCTION_ARGS) MemoryContextSwitchTo(oldcontext); + memset(nulls, 0, AQ_TOTAL_NCOLS); + LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); hash_seq_init(&hash_seq, queries_htab); while ((entry = hash_seq_search(&hash_seq)) != NULL) { - memset(nulls, 0, AQ_TOTAL_NCOLS + 1); - values[AQ_QUERYID] = Int64GetDatum(entry->queryid); values[AQ_FS] = Int64GetDatum(entry->fs); values[AQ_LEARN_AQO] = BoolGetDatum(entry->learn_aqo); @@ -2144,7 +2142,7 @@ aqo_queries_find(uint64 queryid, QueryContextData *ctx) /* * Function for update and save value of smart statement timeout - * for query in aqu_queries table + * for query in aqo_queries table */ bool update_query_timeout(uint64 queryid, int64 smart_timeout) @@ -2517,6 +2515,8 @@ aqo_cardinality_error(PG_FUNCTION_ARGS) LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); LWLockAcquire(&aqo_state->stat_lock, LW_SHARED); + memset(nulls, 0, AQE_TOTAL_NCOLS * sizeof(nulls[0])); + hash_seq_init(&hash_seq, queries_htab); while ((qentry = hash_seq_search(&hash_seq)) != NULL) { @@ -2525,8 +2525,6 @@ aqo_cardinality_error(PG_FUNCTION_ARGS) int64 nexecs; int nvals; - memset(nulls, 0, AQE_TOTAL_NCOLS * sizeof(nulls[0])); - sentry = (StatEntry *) hash_search(stat_htab, &qentry->queryid, HASH_FIND, &found); if (!found) @@ -2611,6 +2609,8 @@ aqo_execution_time(PG_FUNCTION_ARGS) LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); LWLockAcquire(&aqo_state->stat_lock, LW_SHARED); + memset(nulls, 0, ET_TOTAL_NCOLS * sizeof(nulls[0])); + hash_seq_init(&hash_seq, queries_htab); while ((qentry = hash_seq_search(&hash_seq)) != NULL) { @@ -2620,8 +2620,6 @@ aqo_execution_time(PG_FUNCTION_ARGS) int nvals; double tm = 0; - memset(nulls, 0, ET_TOTAL_NCOLS * sizeof(nulls[0])); - sentry = (StatEntry *) hash_search(stat_htab, &qentry->queryid, HASH_FIND, &found); if (!found) From 5655f51d1c4a941fb3875c8f6934126074ff84a5 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Wed, 26 Apr 2023 18:37:59 +0300 Subject: [PATCH 118/134] Set size one of table to 100 to ensure that it is choosen plan with only right side hash join. --- expected/look_a_like.out | 246 +++++++++++++++++++-------------------- sql/look_a_like.sql | 2 +- 2 files changed, 124 insertions(+), 124 deletions(-) diff --git a/expected/look_a_like.out b/expected/look_a_like.out index 9e3dc286..dc339ffa 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -19,7 +19,7 @@ NOTICE: table "a" does not exist, skipping NOTICE: table "b" does not exist, skipping -- Create tables with correlated datas in columns CREATE TABLE a (x1 int, x2 int, x3 int); -INSERT INTO a (x1, x2, x3) SELECT mod(ival,10), mod(ival,10), mod(ival,10) FROM generate_series(1,1000) As ival; +INSERT INTO a (x1, x2, x3) SELECT mod(ival,10), mod(ival,10), mod(ival,10) FROM generate_series(1,100) As ival; CREATE TABLE b (y1 int, y2 int, y3 int); INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; -- @@ -39,17 +39,17 @@ SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 = 5 AND x2 = 5 AND A.x1 = B.y1;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result --------------------------------------------------------- - Nested Loop (actual rows=10000 loops=1) + result +------------------------------------------------------- + Nested Loop (actual rows=1000 loops=1) AQO not used Output: a.x1, b.y1 - -> Seq Scan on public.a (actual rows=100 loops=1) + -> Seq Scan on public.a (actual rows=10 loops=1) AQO not used Output: a.x1, a.x2, a.x3 Filter: ((a.x1 = 5) AND (a.x2 = 5)) - Rows Removed by Filter: 900 - -> Seq Scan on public.b (actual rows=100 loops=100) + Rows Removed by Filter: 90 + -> Seq Scan on public.b (actual rows=100 loops=10) AQO not used Output: b.y1, b.y2, b.y3 Filter: (b.y1 = 5) @@ -63,24 +63,24 @@ SELECT str AS result FROM expln(' SELECT x1,y1 FROM A LEFT JOIN b ON A.x1 = B.y1 WHERE x1 = 5 AND x2 = 5;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result ------------------------------------------------------------- - Hash Left Join (actual rows=10000 loops=1) + result +----------------------------------------------------------- + Hash Right Join (actual rows=1000 loops=1) AQO not used Output: a.x1, b.y1 - Hash Cond: (a.x1 = b.y1) - -> Seq Scan on public.a (actual rows=100 loops=1) + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=100 loops=1) AQO: rows=100, error=0% - Output: a.x1, a.x2, a.x3 - Filter: ((a.x1 = 5) AND (a.x2 = 5)) + Output: b.y1, b.y2, b.y3 + Filter: (b.y1 = 5) Rows Removed by Filter: 900 - -> Hash (actual rows=100 loops=1) - Output: b.y1 - -> Seq Scan on public.b (actual rows=100 loops=1) - AQO: rows=100, error=0% - Output: b.y1 - Filter: (b.y1 = 5) - Rows Removed by Filter: 900 + -> Hash (actual rows=10 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=10 loops=1) + AQO: rows=10, error=0% + Output: a.x1 + Filter: ((a.x1 = 5) AND (a.x2 = 5)) + Rows Removed by Filter: 90 Using aqo: true AQO mode: LEARN JOINS: 1 @@ -90,22 +90,22 @@ SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND A.x1 = B.y1;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result ------------------------------------------------------------- - Hash Join (actual rows=50000 loops=1) + result +----------------------------------------------------------- + Hash Join (actual rows=5000 loops=1) AQO not used Output: a.x1, b.y1 Hash Cond: (b.y1 = a.x1) -> Seq Scan on public.b (actual rows=1000 loops=1) AQO not used Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=500 loops=1) + -> Hash (actual rows=50 loops=1) Output: a.x1 - -> Seq Scan on public.a (actual rows=500 loops=1) + -> Seq Scan on public.a (actual rows=50 loops=1) AQO not used Output: a.x1 Filter: ((a.x1 < 5) AND (a.x2 < 5)) - Rows Removed by Filter: 500 + Rows Removed by Filter: 50 Using aqo: true AQO mode: LEARN JOINS: 1 @@ -116,22 +116,22 @@ SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 < 10 AND x2 < 5 AND A.x1 = B.y1;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result ------------------------------------------------------------- - Hash Join (actual rows=50000 loops=1) - AQO: rows=50000, error=0% + result +----------------------------------------------------------- + Hash Join (actual rows=5000 loops=1) + AQO: rows=5000, error=0% Output: a.x1, b.y1 Hash Cond: (b.y1 = a.x1) -> Seq Scan on public.b (actual rows=1000 loops=1) AQO: rows=1000, error=0% Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=500 loops=1) + -> Hash (actual rows=50 loops=1) Output: a.x1 - -> Seq Scan on public.a (actual rows=500 loops=1) - AQO: rows=500, error=0% + -> Seq Scan on public.a (actual rows=50 loops=1) + AQO: rows=50, error=0% Output: a.x1 Filter: ((a.x1 < 10) AND (a.x2 < 5)) - Rows Removed by Filter: 500 + Rows Removed by Filter: 50 Using aqo: true AQO mode: LEARN JOINS: 1 @@ -141,22 +141,22 @@ SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 > 2 AND x2 > 2 AND A.x1 = B.y1;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result ------------------------------------------------------------- - Hash Join (actual rows=70000 loops=1) + result +----------------------------------------------------------- + Hash Join (actual rows=7000 loops=1) AQO not used Output: a.x1, b.y1 Hash Cond: (b.y1 = a.x1) -> Seq Scan on public.b (actual rows=1000 loops=1) AQO: rows=1000, error=0% Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=700 loops=1) + -> Hash (actual rows=70 loops=1) Output: a.x1 - -> Seq Scan on public.a (actual rows=700 loops=1) + -> Seq Scan on public.a (actual rows=70 loops=1) AQO not used Output: a.x1 Filter: ((a.x1 > 2) AND (a.x2 > 2)) - Rows Removed by Filter: 300 + Rows Removed by Filter: 30 Using aqo: true AQO mode: LEARN JOINS: 1 @@ -168,20 +168,20 @@ SELECT x1,y1 FROM A,B WHERE x1 > 5 AND x2 > 5 AND x3 < 10 AND A.x1 = B.y1;') AS WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; result ------------------------------------------------------------------- - Hash Join (actual rows=40000 loops=1) + Hash Join (actual rows=4000 loops=1) AQO not used Output: a.x1, b.y1 Hash Cond: (b.y1 = a.x1) -> Seq Scan on public.b (actual rows=1000 loops=1) AQO: rows=1000, error=0% Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=400 loops=1) + -> Hash (actual rows=40 loops=1) Output: a.x1 - -> Seq Scan on public.a (actual rows=400 loops=1) + -> Seq Scan on public.a (actual rows=40 loops=1) AQO not used Output: a.x1 Filter: ((a.x1 > 5) AND (a.x2 > 5) AND (a.x3 < 10)) - Rows Removed by Filter: 600 + Rows Removed by Filter: 60 Using aqo: true AQO mode: LEARN JOINS: 1 @@ -193,20 +193,20 @@ SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND x3 < 10 AND A.x1 = B.y1;') AS WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; result ------------------------------------------------------------------- - Hash Join (actual rows=50000 loops=1) + Hash Join (actual rows=5000 loops=1) AQO not used Output: a.x1, b.y1 Hash Cond: (b.y1 = a.x1) -> Seq Scan on public.b (actual rows=1000 loops=1) AQO: rows=1000, error=0% Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=500 loops=1) + -> Hash (actual rows=50 loops=1) Output: a.x1 - -> Seq Scan on public.a (actual rows=500 loops=1) + -> Seq Scan on public.a (actual rows=50 loops=1) AQO not used Output: a.x1 Filter: ((a.x1 < 5) AND (a.x2 < 5) AND (a.x3 < 10)) - Rows Removed by Filter: 500 + Rows Removed by Filter: 50 Using aqo: true AQO mode: LEARN JOINS: 1 @@ -219,20 +219,20 @@ SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 4 AND x3 < 5 AND A.x1 = B.y1;') AS s WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; result ------------------------------------------------------------------ - Hash Join (actual rows=40000 loops=1) - AQO: rows=50000, error=20% + Hash Join (actual rows=4000 loops=1) + AQO: rows=5000, error=20% Output: a.x1, b.y1 Hash Cond: (b.y1 = a.x1) -> Seq Scan on public.b (actual rows=1000 loops=1) AQO: rows=1000, error=0% Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=400 loops=1) + -> Hash (actual rows=40 loops=1) Output: a.x1 - -> Seq Scan on public.a (actual rows=400 loops=1) - AQO: rows=500, error=20% + -> Seq Scan on public.a (actual rows=40 loops=1) + AQO: rows=50, error=20% Output: a.x1 Filter: ((a.x1 < 5) AND (a.x2 < 4) AND (a.x3 < 5)) - Rows Removed by Filter: 600 + Rows Removed by Filter: 60 Using aqo: true AQO mode: LEARN JOINS: 1 @@ -242,25 +242,25 @@ SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result ---------------------------------------------------------------------- + result +-------------------------------------------------------------------- Group (actual rows=2 loops=1) AQO not used Output: a.x1 Group Key: a.x1 - -> Sort (actual rows=200000 loops=1) + -> Sort (actual rows=20000 loops=1) AQO not used Output: a.x1 Sort Key: a.x1 - -> Nested Loop (actual rows=200000 loops=1) + -> Nested Loop (actual rows=20000 loops=1) AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=200 loops=1) + -> Seq Scan on public.a (actual rows=20 loops=1) AQO not used Output: a.x1, a.x2, a.x3 Filter: ((a.x1 < 4) AND (a.x3 > 1)) - Rows Removed by Filter: 800 - -> Seq Scan on public.b (actual rows=1000 loops=200) + Rows Removed by Filter: 80 + -> Seq Scan on public.b (actual rows=1000 loops=20) AQO: rows=1000, error=0% Output: b.y1, b.y2, b.y3 Using aqo: true @@ -273,25 +273,25 @@ SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result ---------------------------------------------------------------------- + result +-------------------------------------------------------------------- Group (actual rows=2 loops=1) AQO not used Output: a.x1 Group Key: a.x1 - -> Sort (actual rows=200000 loops=1) + -> Sort (actual rows=20000 loops=1) AQO not used Output: a.x1 Sort Key: a.x1 - -> Nested Loop (actual rows=200000 loops=1) - AQO: rows=200000, error=0% + -> Nested Loop (actual rows=20000 loops=1) + AQO: rows=20000, error=0% Output: a.x1 - -> Seq Scan on public.a (actual rows=200 loops=1) - AQO: rows=200, error=0% + -> Seq Scan on public.a (actual rows=20 loops=1) + AQO: rows=20, error=0% Output: a.x1, a.x2, a.x3 Filter: ((a.x1 < 4) AND (a.x3 > 1)) - Rows Removed by Filter: 800 - -> Seq Scan on public.b (actual rows=1000 loops=200) + Rows Removed by Filter: 80 + -> Seq Scan on public.b (actual rows=1000 loops=20) AQO: rows=1000, error=0% Output: b.y1, b.y2, b.y3 Using aqo: true @@ -303,25 +303,25 @@ SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 2 GROUP BY(x1);') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result ---------------------------------------------------------------------- + result +-------------------------------------------------------------------- Group (actual rows=1 loops=1) AQO not used Output: a.x1 Group Key: a.x1 - -> Sort (actual rows=100000 loops=1) + -> Sort (actual rows=10000 loops=1) AQO not used Output: a.x1 Sort Key: a.x1 - -> Nested Loop (actual rows=100000 loops=1) - AQO: rows=200000, error=50% + -> Nested Loop (actual rows=10000 loops=1) + AQO: rows=20000, error=50% Output: a.x1 - -> Seq Scan on public.a (actual rows=100 loops=1) - AQO: rows=200, error=50% + -> Seq Scan on public.a (actual rows=10 loops=1) + AQO: rows=20, error=50% Output: a.x1, a.x2, a.x3 Filter: ((a.x1 < 4) AND (a.x3 > 2)) - Rows Removed by Filter: 900 - -> Seq Scan on public.b (actual rows=1000 loops=100) + Rows Removed by Filter: 90 + -> Seq Scan on public.b (actual rows=1000 loops=10) AQO: rows=1000, error=0% Output: b.y1, b.y2, b.y3 Using aqo: true @@ -339,19 +339,19 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L AQO not used Output: a.x1 Group Key: a.x1 - -> Sort (actual rows=100000 loops=1) + -> Sort (actual rows=10000 loops=1) AQO not used Output: a.x1 Sort Key: a.x1 - -> Nested Loop (actual rows=100000 loops=1) + -> Nested Loop (actual rows=10000 loops=1) AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=100 loops=1) + -> Seq Scan on public.a (actual rows=10 loops=1) AQO not used Output: a.x1, a.x2, a.x3 Filter: ((a.x1 < 3) AND (a.x2 < 5) AND (a.x3 > 1)) - Rows Removed by Filter: 900 - -> Seq Scan on public.b (actual rows=1000 loops=100) + Rows Removed by Filter: 90 + -> Seq Scan on public.b (actual rows=1000 loops=10) AQO: rows=1000, error=0% Output: b.y1, b.y2, b.y3 Using aqo: true @@ -369,19 +369,19 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L AQO not used Output: a.x1 Group Key: a.x1 - -> Sort (actual rows=200000 loops=1) + -> Sort (actual rows=20000 loops=1) AQO not used Output: a.x1 Sort Key: a.x1 - -> Nested Loop (actual rows=200000 loops=1) + -> Nested Loop (actual rows=20000 loops=1) AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=200 loops=1) + -> Seq Scan on public.a (actual rows=20 loops=1) AQO not used Output: a.x1, a.x2, a.x3 Filter: ((a.x1 > 1) AND (a.x2 < 4) AND (a.x3 > 1)) - Rows Removed by Filter: 800 - -> Seq Scan on public.b (actual rows=1000 loops=200) + Rows Removed by Filter: 80 + -> Seq Scan on public.b (actual rows=1000 loops=20) AQO: rows=1000, error=0% Output: b.y1, b.y2, b.y3 Using aqo: true @@ -399,19 +399,19 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L AQO not used Output: a.x1 Group Key: a.x1 - -> Sort (actual rows=200000 loops=1) + -> Sort (actual rows=20000 loops=1) AQO not used Output: a.x1 Sort Key: a.x1 - -> Nested Loop (actual rows=200000 loops=1) + -> Nested Loop (actual rows=20000 loops=1) AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=200 loops=1) + -> Seq Scan on public.a (actual rows=20 loops=1) AQO not used Output: a.x1, a.x2, a.x3 Filter: ((a.x1 > 1) AND (a.x2 < 4) AND (a.x3 < 5)) - Rows Removed by Filter: 800 - -> Seq Scan on public.b (actual rows=1000 loops=200) + Rows Removed by Filter: 80 + -> Seq Scan on public.b (actual rows=1000 loops=20) AQO: rows=1000, error=0% Output: b.y1, b.y2, b.y3 Using aqo: true @@ -429,19 +429,19 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L AQO not used Output: a.x1 Group Key: a.x1 - -> Sort (actual rows=140000 loops=1) + -> Sort (actual rows=14000 loops=1) AQO not used Output: a.x1 Sort Key: a.x1 - -> Nested Loop (actual rows=140000 loops=1) + -> Nested Loop (actual rows=14000 loops=1) AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=200 loops=1) - AQO: rows=100, error=-100% + -> Seq Scan on public.a (actual rows=20 loops=1) + AQO: rows=10, error=-100% Output: a.x1, a.x2, a.x3 Filter: ((a.x1 < 4) AND (a.x2 < 5) AND (a.x3 > 1)) - Rows Removed by Filter: 800 - -> Seq Scan on public.b (actual rows=700 loops=200) + Rows Removed by Filter: 80 + -> Seq Scan on public.b (actual rows=700 loops=20) AQO not used Output: b.y1, b.y2, b.y3 Filter: (b.y1 > 2) @@ -462,19 +462,19 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L AQO not used Output: a.x1 Group Key: a.x1 - -> Sort (actual rows=70000 loops=1) + -> Sort (actual rows=7000 loops=1) AQO not used Output: a.x1 Sort Key: a.x1 - -> Nested Loop (actual rows=70000 loops=1) - AQO: rows=140000, error=50% + -> Nested Loop (actual rows=7000 loops=1) + AQO: rows=14000, error=50% Output: a.x1 - -> Seq Scan on public.a (actual rows=100 loops=1) - AQO: rows=200, error=50% + -> Seq Scan on public.a (actual rows=10 loops=1) + AQO: rows=20, error=50% Output: a.x1, a.x2, a.x3 Filter: ((a.x1 < 3) AND (a.x2 < 4) AND (a.x3 > 1)) - Rows Removed by Filter: 900 - -> Seq Scan on public.b (actual rows=700 loops=100) + Rows Removed by Filter: 90 + -> Seq Scan on public.b (actual rows=700 loops=10) AQO: rows=700, error=0% Output: b.y1, b.y2, b.y3 Filter: (b.y1 > 2) @@ -501,7 +501,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L AQO not used Output: a.x1, a.x2, a.x3 Hash Cond: (a.x1 = c.z1) - -> Seq Scan on public.a (actual rows=1000 loops=1) + -> Seq Scan on public.a (actual rows=100 loops=1) AQO not used Output: a.x1, a.x2, a.x3 -> Hash (actual rows=1000 loops=1) @@ -523,29 +523,29 @@ SELECT str AS result FROM expln(' SELECT * FROM (A LEFT JOIN B ON A.x1 = B.y1) sc left join C on sc.x1=C.z1;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result -------------------------------------------------------------------------- - Hash Right Join (actual rows=10000000 loops=1) - AQO: rows=1, error=-999999900% + result +------------------------------------------------------------------------ + Hash Right Join (actual rows=1000000 loops=1) + AQO: rows=1, error=-99999900% Output: a.x1, a.x2, a.x3, b.y1, b.y2, b.y3, c.z1, c.z2, c.z3 Hash Cond: (b.y1 = a.x1) -> Seq Scan on public.b (actual rows=1000 loops=1) AQO: rows=1000, error=0% Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=100000 loops=1) + -> Hash (actual rows=10000 loops=1) Output: a.x1, a.x2, a.x3, c.z1, c.z2, c.z3 - -> Hash Left Join (actual rows=100000 loops=1) - AQO: rows=1, error=-9999900% + -> Hash Right Join (actual rows=10000 loops=1) + AQO: rows=1, error=-999900% Output: a.x1, a.x2, a.x3, c.z1, c.z2, c.z3 - Hash Cond: (a.x1 = c.z1) - -> Seq Scan on public.a (actual rows=1000 loops=1) + Hash Cond: (c.z1 = a.x1) + -> Seq Scan on public.c (actual rows=1000 loops=1) AQO: rows=1000, error=0% - Output: a.x1, a.x2, a.x3 - -> Hash (actual rows=1000 loops=1) Output: c.z1, c.z2, c.z3 - -> Seq Scan on public.c (actual rows=1000 loops=1) - AQO: rows=1000, error=0% - Output: c.z1, c.z2, c.z3 + -> Hash (actual rows=100 loops=1) + Output: a.x1, a.x2, a.x3 + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=100, error=0% + Output: a.x1, a.x2, a.x3 Using aqo: true AQO mode: LEARN JOINS: 2 diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index c9e59249..5edef7bb 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -16,7 +16,7 @@ DROP TABLE IF EXISTS a,b CASCADE; -- Create tables with correlated datas in columns CREATE TABLE a (x1 int, x2 int, x3 int); -INSERT INTO a (x1, x2, x3) SELECT mod(ival,10), mod(ival,10), mod(ival,10) FROM generate_series(1,1000) As ival; +INSERT INTO a (x1, x2, x3) SELECT mod(ival,10), mod(ival,10), mod(ival,10) FROM generate_series(1,100) As ival; CREATE TABLE b (y1 int, y2 int, y3 int); INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; From 95d49b27e4bb50dec483e4edf95e954912095a49 Mon Sep 17 00:00:00 2001 From: Alexandra Date: Fri, 28 Apr 2023 15:12:08 +0300 Subject: [PATCH 119/134] Fix dsa_allocate for aqo_qtext_store to avoid segfault when out of memory (#165) --- storage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage.c b/storage.c index 17f97555..bf004199 100644 --- a/storage.c +++ b/storage.c @@ -1111,7 +1111,7 @@ aqo_qtext_store(uint64 queryid, const char *query_string) entry->queryid = queryid; size = size > querytext_max_size ? querytext_max_size : size; - entry->qtext_dp = dsa_allocate(qtext_dsa, size); + entry->qtext_dp = dsa_allocate0(qtext_dsa, size); if (!_check_dsa_validity(entry->qtext_dp)) { From 79f85485c1e1923cd4d81c1d12f3fa904ea50696 Mon Sep 17 00:00:00 2001 From: Daniil Anisimov Date: Thu, 25 May 2023 16:17:11 +0700 Subject: [PATCH 120/134] Try reducing the memory overhead. Free some allocated memory right after use. Reset AQOPredictMemCtx as soon as posible. Remove learning attempts on SubPlan nodes. Bugfix. Free allocated memory on save/load data. Add memory context for storage. Change copyright to 2016-2023. --- aqo.c | 11 +++++++- aqo.h | 3 +- auto_tuning.c | 2 +- cardinality_estimation.c | 2 +- cardinality_hooks.c | 16 +++++++++-- expected/unsupported.out | 61 ++++++++++++++++++++++++++++++++++++++-- hash.c | 30 ++++++++++++++++---- machine_learning.c | 2 +- path_utils.c | 37 +++++++++++------------- postprocessing.c | 10 +++++-- preprocessing.c | 2 +- sql/unsupported.sql | 10 +++++++ storage.c | 46 ++++++++++++++++++++++-------- 13 files changed, 183 insertions(+), 49 deletions(-) diff --git a/aqo.c b/aqo.c index f09d360b..42e2d345 100644 --- a/aqo.c +++ b/aqo.c @@ -2,7 +2,7 @@ * aqo.c * Adaptive query optimization extension * - * Copyright (c) 2016-2022, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/aqo.c @@ -92,6 +92,9 @@ MemoryContext AQOPredictMemCtx = NULL; /* Is released at the end of learning */ MemoryContext AQOLearnMemCtx = NULL; +/* Is released at the end of load/store routines */ +MemoryContext AQOStorageMemCtx = NULL; + /* Additional plan info */ int njoins; @@ -349,6 +352,12 @@ _PG_init(void) AQOLearnMemCtx = AllocSetContextCreate(AQOTopMemCtx, "AQOLearnMemoryContext", ALLOCSET_DEFAULT_SIZES); + /* + * AQOStorageMemoryContext containe data for load/store routines. + */ + AQOStorageMemCtx = AllocSetContextCreate(AQOTopMemCtx, + "AQOStorageMemoryContext", + ALLOCSET_DEFAULT_SIZES); RegisterResourceReleaseCallback(aqo_free_callback, NULL); RegisterAQOPlanNodeMethods(); diff --git a/aqo.h b/aqo.h index 85c3f3b2..f3275003 100644 --- a/aqo.h +++ b/aqo.h @@ -105,7 +105,7 @@ * Module storage.c is responsible for storage query settings and models * (i. e. all information which is used in extension). * - * Copyright (c) 2016-2022, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/aqo.h @@ -232,6 +232,7 @@ extern MemoryContext AQOTopMemCtx; extern MemoryContext AQOCacheMemCtx; extern MemoryContext AQOPredictMemCtx; extern MemoryContext AQOLearnMemCtx; +extern MemoryContext AQOStorageMemCtx; extern int aqo_statement_timeout; diff --git a/auto_tuning.c b/auto_tuning.c index b035a093..36dfe2ef 100644 --- a/auto_tuning.c +++ b/auto_tuning.c @@ -8,7 +8,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2022, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/auto_tuning.c diff --git a/cardinality_estimation.c b/cardinality_estimation.c index f93e0905..8ab98f3c 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -8,7 +8,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2022, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/cardinality_estimation.c diff --git a/cardinality_hooks.c b/cardinality_hooks.c index f0d745bb..fd2f970c 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -18,7 +18,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2022, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/cardinality_hooks.c @@ -82,6 +82,7 @@ aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) if (!query_context.use_aqo) { MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); goto default_estimator; } @@ -100,6 +101,7 @@ aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) /* Return to the caller's memory context. */ MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); if (predicted < 0) goto default_estimator; @@ -191,12 +193,15 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, cache_selectivity(current_hash, rel->relid, rte->relid, *((double *) lfirst(l2))); } + + pfree(args_hash); + pfree(eclass_hash); } if (!query_context.use_aqo) { MemoryContextSwitchTo(oldctx); - + MemoryContextReset(AQOPredictMemCtx); goto default_estimator; } @@ -211,6 +216,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, /* Return to the caller's memory context */ MemoryContextSwitchTo(oldctx); + MemoryContextReset(AQOPredictMemCtx); predicted_ppi_rows = predicted; fss_ppi_hash = fss; @@ -265,6 +271,7 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, if (!query_context.use_aqo) { MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); goto default_estimator; } @@ -284,6 +291,7 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, /* Return to the caller's memory context */ MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); rel->fss_hash = fss; @@ -343,6 +351,7 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, if (!query_context.use_aqo) { MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); goto default_estimator; } @@ -359,6 +368,7 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, &fss); /* Return to the caller's memory context */ MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); predicted_ppi_rows = predicted; fss_ppi_hash = fss; @@ -450,6 +460,7 @@ aqo_estimate_num_groups(PlannerInfo *root, List *groupExprs, grouped_rel->rows = predicted; grouped_rel->fss_hash = fss; MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); return predicted; } else @@ -460,6 +471,7 @@ aqo_estimate_num_groups(PlannerInfo *root, List *groupExprs, grouped_rel->predicted_cardinality = -1; MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); default_estimator: if (aqo_estimate_num_groups_next) diff --git a/expected/unsupported.out b/expected/unsupported.out index 6e45dcd8..9db07618 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -311,6 +311,59 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) JOINS: 0 (23 rows) +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t WHERE + x = (SELECT x FROM t t0 WHERE t0.x = t.x LIMIT 1) AND + x IN (SELECT x FROM t t0 WHERE t0.x = t.x); + QUERY PLAN +----------------------------------------------------------- + Seq Scan on t (actual rows=1000 loops=1) + AQO not used + Filter: ((x = (SubPlan 1)) AND (SubPlan 2)) + SubPlan 1 + -> Limit (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=1 loops=1000) + AQO not used + Filter: (x = t.x) + Rows Removed by Filter: 475 + SubPlan 2 + -> Seq Scan on t t0_1 (actual rows=1 loops=1000) + AQO not used + Filter: (x = t.x) + Rows Removed by Filter: 475 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(18 rows) + +-- No prediction for top SeqScan, because it fss is changed +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t WHERE + x = (SELECT x FROM t t0 WHERE t0.x = t.x LIMIT 1) AND + x IN (SELECT x FROM t t0 WHERE t0.x = t.x); + QUERY PLAN +----------------------------------------------------------- + Seq Scan on t (actual rows=1000 loops=1) + AQO not used + Filter: ((SubPlan 2) AND (x = (SubPlan 1))) + SubPlan 2 + -> Seq Scan on t t0_1 (actual rows=1 loops=1000) + AQO: rows=1, error=0% + Filter: (x = t.x) + Rows Removed by Filter: 475 + SubPlan 1 + -> Limit (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=1 loops=1000) + AQO: rows=1, error=0% + Filter: (x = t.x) + Rows Removed by Filter: 475 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(18 rows) + -- It's OK to use the knowledge for a query with different constants. EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT count(*) FROM t WHERE @@ -580,6 +633,10 @@ ORDER BY (md5(query_text),error) DESC; -------+------------------------------------------------------------------------------------------------ 0.768 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; 0.070 | SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; + 1.554 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT * FROM t WHERE + + | x = (SELECT x FROM t t0 WHERE t0.x = t.x LIMIT 1) AND + + | x IN (SELECT x FROM t t0 WHERE t0.x = t.x); 0.000 | SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; 0.000 | SELECT * FROM + | (SELECT * FROM t WHERE x < 0) AS t0 + @@ -612,13 +669,13 @@ ORDER BY (md5(query_text),error) DESC; | JOIN + | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2 + | ON q1.x = q2.x+1; -(13 rows) +(14 rows) DROP TABLE t,t1 CASCADE; -- delete all tables used in the test SELECT count(*) FROM aqo_data; -- Just to detect some changes in the logic. May some false positives really bother us here? count ------- - 44 + 48 (1 row) SELECT true AS success FROM aqo_cleanup(); diff --git a/hash.c b/hash.c index e24d405c..dfb4a55c 100644 --- a/hash.c +++ b/hash.c @@ -12,7 +12,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2022, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/hash.c @@ -157,6 +157,8 @@ get_grouped_exprs_hash(int child_fss, List *group_exprs) final_hashes[0] = child_fss; final_hashes[1] = get_int_array_hash(hashes, i); + pfree(hashes); + return get_int_array_hash(final_hashes, 2); } @@ -224,6 +226,7 @@ get_fss_for_object(List *relsigns, List *clauselist, clause_has_consts[i] = (args != NULL && has_consts(*args)); i++; } + pfree(args_hash); idx = argsort(clause_hashes, n, sizeof(*clause_hashes), int_cmp); inverse_idx = inverse_permutation(idx, n); @@ -234,6 +237,7 @@ get_fss_for_object(List *relsigns, List *clauselist, sorted_clauses[inverse_idx[i]] = clause_hashes[i]; i++; } + pfree(clause_hashes); i = 0; foreach(lc, selectivities) @@ -249,6 +253,7 @@ get_fss_for_object(List *relsigns, List *clauselist, } i++; } + pfree(inverse_idx); for (i = 0; i < n;) { @@ -272,6 +277,8 @@ get_fss_for_object(List *relsigns, List *clauselist, sizeof(**features), double_cmp); i = j; } + pfree(idx); + pfree(clause_has_consts); /* * Generate feature subspace hash. @@ -281,6 +288,8 @@ get_fss_for_object(List *relsigns, List *clauselist, eclasses_hash = get_int_array_hash(eclass_hash, nargs); relations_hash = get_relations_hash(relsigns); fss_hash = get_fss_hash(clauses_hash, eclasses_hash, relations_hash); + pfree(sorted_clauses); + pfree(eclass_hash); if (nfeatures != NULL) { @@ -340,11 +349,17 @@ static int get_node_hash(Node *node) { char *str; + char *no_consts; + char *no_locations; int hash; - str = remove_locations(remove_consts(nodeToString(node))); - hash = get_str_hash(str); + str = nodeToString(node); + no_consts = remove_consts(str); pfree(str); + no_locations = remove_locations(no_consts); + pfree(no_consts); + hash = get_str_hash(no_locations); + pfree(no_locations); return hash; } @@ -467,6 +482,7 @@ get_relations_hash(List *relsigns) result = DatumGetInt32(hash_any((const unsigned char *) hashes, nhashes * sizeof(uint32))); + pfree(hashes); return result; } @@ -479,9 +495,11 @@ static char * remove_consts(const char *str) { char *res; + char *tmp; - res = replace_patterns(str, "{CONST", is_brace); - res = replace_patterns(res, ":stmt_len", is_brace); + tmp = replace_patterns(str, "{CONST", is_brace); + res = replace_patterns(tmp, ":stmt_len", is_brace); + pfree(tmp); return res; } @@ -683,6 +701,8 @@ get_eclasses(List *clauselist, int *nargs, int **args_hash, int **eclass_hash) for (i = 0; i < *nargs; ++i) (*eclass_hash)[i] = e_hashes[disjoint_set_get_parent(p, i)]; + + pfree(e_hashes); } /* diff --git a/machine_learning.c b/machine_learning.c index d4f5cbee..bfdf0aaa 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -12,7 +12,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2022, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/machine_learning.c diff --git a/path_utils.c b/path_utils.c index 351aa66a..2e7ad4ca 100644 --- a/path_utils.c +++ b/path_utils.c @@ -5,7 +5,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2022, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/path_utils.c @@ -51,7 +51,7 @@ static AQOPlanNode DefaultAQOPlanNode = */ static create_plan_hook_type aqo_create_plan_next = NULL; -static create_upper_paths_hook_type aqo_create_upper_paths_next = NULL; +/*static create_upper_paths_hook_type aqo_create_upper_paths_next = NULL;*/ static AQOPlanNode * @@ -260,7 +260,7 @@ get_list_of_relids(PlannerInfo *root, Relids relids, RelSortOut *rels) /* * Search for any subplans or initplans. - * if subplan is found, replace it by the feature space value of this subplan. + * if subplan is found, replace it by zero Const. */ static Node * subplan_hunter(Node *node, void *context) @@ -271,21 +271,13 @@ subplan_hunter(Node *node, void *context) if (IsA(node, SubPlan)) { - SubPlan *splan = (SubPlan *) node; - PlannerInfo *root = (PlannerInfo *) context; - PlannerInfo *subroot; - RelOptInfo *upper_rel; - A_Const *fss; + A_Const *fss = makeNode(A_Const); - subroot = (PlannerInfo *) list_nth(root->glob->subroots, - splan->plan_id - 1); - upper_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL); + fss->val.type = T_Integer; + fss->location = -1; + fss->val.val.ival = 0; + return (Node *) fss; - Assert(list_length(upper_rel->ext_nodes) == 1); - Assert(IsA((Node *) linitial(upper_rel->ext_nodes), A_Const)); - - fss = (A_Const *) linitial(upper_rel->ext_nodes); - return (Node *) copyObject(fss); } return expression_tree_mutator(node, subplan_hunter, context); } @@ -766,11 +758,14 @@ RegisterAQOPlanNodeMethods(void) } /* + * Warning! This function does not word properly. + * Because value of Const nodes removed by hash routine. + * * Hook for create_upper_paths_hook * * Assume, that we are last in the chain of path creators. */ -static void +/*static void aqo_store_upper_signature(PlannerInfo *root, UpperRelationKind stage, RelOptInfo *input_rel, @@ -786,7 +781,7 @@ aqo_store_upper_signature(PlannerInfo *root, (*aqo_create_upper_paths_next)(root, stage, input_rel, output_rel, extra); if (!query_context.use_aqo && !query_context.learn_aqo && !force_collect_stat) - /* Includes 'disabled query' state. */ + / * Includes 'disabled query' state. * / return; if (stage != UPPERREL_FINAL) @@ -801,7 +796,7 @@ aqo_store_upper_signature(PlannerInfo *root, fss_node->val.val.ival = get_fss_for_object(rels.signatures, clauses, NIL, NULL, NULL); output_rel->ext_nodes = lappend(output_rel->ext_nodes, (void *) fss_node); -} +}*/ void aqo_path_utils_init(void) @@ -809,6 +804,6 @@ aqo_path_utils_init(void) aqo_create_plan_next = create_plan_hook; create_plan_hook = aqo_create_plan; - aqo_create_upper_paths_next = create_upper_paths_hook; - create_upper_paths_hook = aqo_store_upper_signature; + /*aqo_create_upper_paths_next = create_upper_paths_hook; + create_upper_paths_hook = aqo_store_upper_signature;*/ } diff --git a/postprocessing.c b/postprocessing.c index 66aca901..a6b6d030 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -9,7 +9,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2022, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/postprocessing.c @@ -224,6 +224,12 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, lst = lappend(lst, cur_sel); } + if (parametrized_sel) + { + pfree(args_hash); + pfree(eclass_hash); + } + return lst; } @@ -833,11 +839,11 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) } } - selectivity_cache_clear(); cur_classes = ldelete_uint64(cur_classes, query_context.query_hash); end: /* Release all AQO-specific memory, allocated during learning procedure */ + selectivity_cache_clear(); MemoryContextSwitchTo(oldctx); MemoryContextReset(AQOLearnMemCtx); diff --git a/preprocessing.c b/preprocessing.c index ef41ab0e..feb28d39 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -49,7 +49,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2022, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/preprocessing.c diff --git a/sql/unsupported.sql b/sql/unsupported.sql index 8b36d721..e5853306 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -98,6 +98,16 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t WHERE + x = (SELECT x FROM t t0 WHERE t0.x = t.x LIMIT 1) AND + x IN (SELECT x FROM t t0 WHERE t0.x = t.x); +-- No prediction for top SeqScan, because it fss is changed +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t WHERE + x = (SELECT x FROM t t0 WHERE t0.x = t.x LIMIT 1) AND + x IN (SELECT x FROM t t0 WHERE t0.x = t.x); + -- It's OK to use the knowledge for a query with different constants. EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT count(*) FROM t WHERE diff --git a/storage.c b/storage.c index bf004199..f71f5207 100644 --- a/storage.c +++ b/storage.c @@ -8,7 +8,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2022, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/storage.c @@ -666,11 +666,12 @@ static int data_store(const char *filename, form_record_t callback, long nrecs, void *ctx) { - FILE *file; - size_t size; - uint32 counter = 0; - void *data; - char *tmpfile; + FILE *file; + size_t size; + uint32 counter = 0; + void *data; + char *tmpfile; + MemoryContext old_context = MemoryContextSwitchTo(AQOStorageMemCtx); tmpfile = psprintf("%s.tmp", filename); file = AllocateFile(tmpfile, PG_BINARY_W); @@ -687,7 +688,11 @@ data_store(const char *filename, form_record_t callback, /* TODO: Add CRC code ? */ if (fwrite(&size, sizeof(size), 1, file) != 1 || fwrite(data, size, 1, file) != 1) + { + pfree(data); goto error; + } + pfree(data); counter++; } @@ -701,6 +706,9 @@ data_store(const char *filename, form_record_t callback, /* Parallel (re)writing into a file haven't happen. */ (void) durable_rename(tmpfile, filename, PANIC); elog(LOG, "[AQO] %d records stored in file %s.", counter, filename); + + MemoryContextSwitchTo(old_context); + MemoryContextReset(AQOStorageMemCtx); return 0; error: @@ -712,6 +720,9 @@ data_store(const char *filename, form_record_t callback, FreeFile(file); unlink(tmpfile); pfree(tmpfile); + + MemoryContextSwitchTo(old_context); + MemoryContextReset(AQOStorageMemCtx); return -1; } @@ -936,17 +947,20 @@ aqo_queries_load(void) static void data_load(const char *filename, deform_record_t callback, void *ctx) { - FILE *file; - long i; - uint32 header; - int32 pgver; - long num; + FILE *file; + long i; + uint32 header; + int32 pgver; + long num; + MemoryContext old_context = MemoryContextSwitchTo(AQOStorageMemCtx); file = AllocateFile(filename, PG_BINARY_R); if (file == NULL) { if (errno != ENOENT) goto read_error; + + MemoryContextSwitchTo(old_context); return; } @@ -968,8 +982,12 @@ data_load(const char *filename, deform_record_t callback, void *ctx) goto read_error; data = palloc(size); if (fread(data, size, 1, file) != 1) + { + pfree(data); goto read_error; + } res = callback(data, size); + pfree(data); if (!res) { @@ -983,6 +1001,9 @@ data_load(const char *filename, deform_record_t callback, void *ctx) FreeFile(file); elog(LOG, "[AQO] %ld records loaded from file %s.", num, filename); + + MemoryContextSwitchTo(old_context); + MemoryContextReset(AQOStorageMemCtx); return; read_error: @@ -998,6 +1019,9 @@ data_load(const char *filename, deform_record_t callback, void *ctx) if (file) FreeFile(file); unlink(filename); + + MemoryContextSwitchTo(old_context); + MemoryContextReset(AQOStorageMemCtx); } static void From 9d2b27af12a98793cb1bc99384f8be37d59bb56e Mon Sep 17 00:00:00 2001 From: Andrey Kazarinov Date: Wed, 21 Jun 2023 21:51:33 +0300 Subject: [PATCH 121/134] cancel aqo timeout action in the critical section --- aqo.h | 1 + postprocessing.c | 13 ++++++--- preprocessing.c | 22 ++++++++++++++- t/003_assertion_error.pl | 59 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 90 insertions(+), 5 deletions(-) create mode 100644 t/003_assertion_error.pl diff --git a/aqo.h b/aqo.h index f3275003..04d9b8b3 100644 --- a/aqo.h +++ b/aqo.h @@ -172,6 +172,7 @@ extern bool aqo_show_details; extern int aqo_join_threshold; extern bool use_wide_search; extern bool aqo_learn_statement_timeout; +extern bool aqo_learn_statement_timeout_enable; /* Parameters for current query */ typedef struct QueryContextData diff --git a/postprocessing.c b/postprocessing.c index a6b6d030..7df0a253 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -22,6 +22,7 @@ #include "optimizer/optimizer.h" #include "postgres_fdw.h" #include "utils/queryenvironment.h" +#include "miscadmin.h" #include "aqo.h" #include "hash.h" @@ -628,8 +629,12 @@ aqo_timeout_handler(void) MemoryContext oldctx = MemoryContextSwitchTo(AQOLearnMemCtx); aqo_obj_stat ctx = {NIL, NIL, NIL, false, false}; - if (!timeoutCtl.queryDesc || !ExtractFromQueryEnv(timeoutCtl.queryDesc)) + if (CritSectionCount > 0 || !timeoutCtl.queryDesc || + !ExtractFromQueryEnv(timeoutCtl.queryDesc)) + { + MemoryContextSwitchTo(oldctx); return; + } /* Now we can analyze execution state of the query. */ @@ -664,7 +669,7 @@ set_timeout_if_need(QueryDesc *queryDesc) { int64 fintime = (int64) get_timeout_finish_time(STATEMENT_TIMEOUT)-1; - if (aqo_learn_statement_timeout && aqo_statement_timeout > 0) + if (aqo_learn_statement_timeout_enable && aqo_statement_timeout > 0) { max_timeout_value = Min(query_context.smart_timeout, (int64) aqo_statement_timeout); if (max_timeout_value > fintime) @@ -684,7 +689,7 @@ set_timeout_if_need(QueryDesc *queryDesc) */ return false; - if (!get_timeout_active(STATEMENT_TIMEOUT) || !aqo_learn_statement_timeout) + if (!get_timeout_active(STATEMENT_TIMEOUT) || !aqo_learn_statement_timeout_enable) return false; if (!ExtractFromQueryEnv(queryDesc)) @@ -829,7 +834,7 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) error = stat->est_error_aqo[stat->cur_stat_slot_aqo-1] - cardinality_sum_errors/(1 + cardinality_num_objects); - if ( aqo_learn_statement_timeout && aqo_statement_timeout > 0 && error >= 0.1) + if ( aqo_learn_statement_timeout_enable && aqo_statement_timeout > 0 && error >= 0.1) { int64 fintime = increase_smart_timeout(); elog(NOTICE, "[AQO] Time limit for execution of the statement was increased. Current timeout is "UINT64_FORMAT, fintime); diff --git a/preprocessing.c b/preprocessing.c index feb28d39..d5d6521e 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -71,7 +71,10 @@ List *cur_classes = NIL; int aqo_join_threshold = 3; +bool aqo_learn_statement_timeout_enable = false; + static planner_hook_type aqo_planner_next = NULL; +static post_parse_analyze_hook_type aqo_post_parse_analyze_hook = NULL; static void disable_aqo_for_query(void); static bool isQueryUsingSystemRelation(Query *query); @@ -478,9 +481,26 @@ isQueryUsingSystemRelation_walker(Node *node, void *context) context); } +static void +aqo_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate) +{ + aqo_learn_statement_timeout_enable = false; + /* + * Enable learn_statement_timeout for + * the top level SELECT statement only. + */ + if (query->commandType == CMD_SELECT) + aqo_learn_statement_timeout_enable = aqo_learn_statement_timeout; + + if (aqo_post_parse_analyze_hook) + aqo_post_parse_analyze_hook(pstate, query, jstate); +} + void aqo_preprocessing_init(void) { aqo_planner_next = planner_hook ? planner_hook : standard_planner; planner_hook = aqo_planner; -} \ No newline at end of file + aqo_post_parse_analyze_hook = post_parse_analyze_hook; + post_parse_analyze_hook = aqo_post_parse_analyze; +} diff --git a/t/003_assertion_error.pl b/t/003_assertion_error.pl new file mode 100644 index 00000000..e85206ff --- /dev/null +++ b/t/003_assertion_error.pl @@ -0,0 +1,59 @@ +use strict; +use warnings; + +use Config; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; + +use Test::More tests => 1; + +my $node = PostgreSQL::Test::Cluster->new('aqotest'); +$node->init; +$node->append_conf('postgresql.conf', qq{ + shared_preload_libraries = 'aqo' + aqo.join_threshold = 0 + aqo.mode = 'learn' + aqo.show_details = 'off' + aqo.learn_statement_timeout = 'on' + }); + +# Test constants. Default values. +my $TRANSACTIONS = 100; + +# Disable connection default settings, forced by PGOPTIONS in AQO Makefile +# $ENV{PGOPTIONS}=""; + +# Change pgbench parameters according to the environment variable. +if (defined $ENV{TRANSACTIONS}) +{ + $TRANSACTIONS = $ENV{TRANSACTIONS}; +} + +my $query_string = ' +CREATE TABLE IF NOT EXISTS aqo_test1(a int, b int); +WITH RECURSIVE t(a, b) +AS ( + VALUES (1, 2) + UNION ALL + SELECT t.a + 1, t.b + 1 FROM t WHERE t.a < 10 +) INSERT INTO aqo_test1 (SELECT * FROM t); + +SET statement_timeout = 10; + +CREATE TABLE tmp1 AS SELECT t1.a AS a, t2.a AS b, t3.a AS c +FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 +WHERE t1.a = t2.b AND t2.a = t3.b; +DROP TABLE tmp1; +'; + +$node->start(); + +$node->safe_psql('postgres', 'CREATE EXTENSION IF NOT EXISTS aqo;'); + +for (1..$TRANSACTIONS) { + $node->psql('postgres', $query_string); +} + +ok(1, "There are no segfaults"); + +$node->stop(); From cfc3bac120265c4fae34b6ba01595f5acc60c2bd Mon Sep 17 00:00:00 2001 From: Alexandra Date: Tue, 29 Aug 2023 16:17:15 +0300 Subject: [PATCH 122/134] Fix aqo.dsm_max_size segfault (#177) Fix aqo.dsm_max_size segfault Add test for dsm_max_size --------- Co-authored-by: Alexandra Pervushina --- aqo.c | 6 ++-- aqo_shared.c | 2 ++ preprocessing.c | 15 +++++++-- storage.c | 65 +++++++++++++++++++++++++++++++----- storage.h | 3 +- t/004_dsm_size_max.pl | 76 +++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 152 insertions(+), 15 deletions(-) create mode 100644 t/004_dsm_size_max.pl diff --git a/aqo.c b/aqo.c index 42e2d345..df622c12 100644 --- a/aqo.c +++ b/aqo.c @@ -276,8 +276,8 @@ _PG_init(void) &dsm_size_max, 100, 0, INT_MAX, - PGC_SUSET, - 0, + PGC_POSTMASTER, + GUC_UNIT_MB, NULL, NULL, NULL @@ -389,5 +389,5 @@ PG_FUNCTION_INFO_V1(invalidate_deactivated_queries_cache); Datum invalidate_deactivated_queries_cache(PG_FUNCTION_ARGS) { - PG_RETURN_POINTER(NULL); + PG_RETURN_POINTER(NULL); } diff --git a/aqo_shared.c b/aqo_shared.c index b7cfced8..9b478552 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -97,6 +97,8 @@ aqo_init_shmem(void) /* Doesn't use DSA, so can be loaded in postmaster */ aqo_stat_load(); aqo_queries_load(); + + check_dsa_file_size(); } } diff --git a/preprocessing.c b/preprocessing.c index d5d6521e..bc014121 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -283,14 +283,23 @@ aqo_planner(Query *parse, const char *query_string, int cursorOptions, query_context.learn_aqo, query_context.use_aqo, query_context.auto_tuning, &aqo_queries_nulls)) { + bool dsa_valid = true; /* * Add query text into the ML-knowledge base. Just for further * analysis. In the case of cached plans we may have NULL query text. */ - if (!aqo_qtext_store(query_context.query_hash, query_string)) + if (!aqo_qtext_store(query_context.query_hash, query_string, &dsa_valid)) { - Assert(0); /* panic only on debug installation */ - elog(ERROR, "[AQO] Impossible situation was detected. Maybe not enough of shared memory?"); + if (!dsa_valid) + { + disable_aqo_for_query(); + elog(WARNING, "[AQO] Not enough DSA. AQO was disabled for this query"); + } + else + { + Assert(0); /* panic only on debug installation */ + elog(ERROR, "[AQO] Impossible situation was detected. Maybe not enough of shared memory?"); + } } } else diff --git a/storage.c b/storage.c index f71f5207..a11f16f4 100644 --- a/storage.c +++ b/storage.c @@ -507,7 +507,7 @@ _form_qtext_record_cb(void *ctx, size_t *size) { HASH_SEQ_STATUS *hash_seq = (HASH_SEQ_STATUS *) ctx; QueryTextEntry *entry; - void *data; + void *data; char *query_string; char *ptr; @@ -784,7 +784,7 @@ _deform_qtexts_record_cb(void *data, size_t size) HASH_ENTER, &found); Assert(!found); - entry->qtext_dp = dsa_allocate(qtext_dsa, len); + entry->qtext_dp = dsa_allocate_extended(qtext_dsa, len, DSA_ALLOC_NO_OOM | DSA_ALLOC_ZERO); if (!_check_dsa_validity(entry->qtext_dp)) { /* @@ -829,7 +829,7 @@ aqo_qtexts_load(void) if (!found) { - if (!aqo_qtext_store(0, "COMMON feature space (do not delete!)")) + if (!aqo_qtext_store(0, "COMMON feature space (do not delete!)", NULL)) elog(PANIC, "[AQO] DSA Initialization was unsuccessful"); } } @@ -944,6 +944,49 @@ aqo_queries_load(void) } } +static long +aqo_get_file_size(const char *filename) +{ + FILE *file; + long size = 0; + + file = AllocateFile(filename, PG_BINARY_R); + if (file == NULL) + { + if (errno != ENOENT) + goto read_error; + return size; + } + + fseek(file, 0L, SEEK_END); + size = ftell(file); + + FreeFile(file); + return size; + +read_error: + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", filename))); + if (file) + FreeFile(file); + unlink(filename); + return -1; +} + +void +check_dsa_file_size(void) +{ + long qtext_size = aqo_get_file_size(PGAQO_TEXT_FILE); + long data_size = aqo_get_file_size(PGAQO_DATA_FILE); + + if (qtext_size == -1 || data_size == -1 || + qtext_size + data_size >= dsm_size_max * 1024 * 1024) + { + elog(ERROR, "aqo.dsm_size_max is too small"); + } +} + static void data_load(const char *filename, deform_record_t callback, void *ctx) { @@ -1090,13 +1133,16 @@ dsa_init() * XXX: Maybe merge with aqo_queries ? */ bool -aqo_qtext_store(uint64 queryid, const char *query_string) +aqo_qtext_store(uint64 queryid, const char *query_string, bool *dsa_valid) { QueryTextEntry *entry; bool found; bool tblOverflow; HASHACTION action; + if (dsa_valid) + *dsa_valid = true; + Assert(!LWLockHeldByMe(&aqo_state->qtexts_lock)); if (query_string == NULL || querytext_max_size == 0) @@ -1135,7 +1181,7 @@ aqo_qtext_store(uint64 queryid, const char *query_string) entry->queryid = queryid; size = size > querytext_max_size ? querytext_max_size : size; - entry->qtext_dp = dsa_allocate0(qtext_dsa, size); + entry->qtext_dp = dsa_allocate_extended(qtext_dsa, size, DSA_ALLOC_NO_OOM | DSA_ALLOC_ZERO); if (!_check_dsa_validity(entry->qtext_dp)) { @@ -1144,7 +1190,10 @@ aqo_qtext_store(uint64 queryid, const char *query_string) * that caller recognize it and don't try to call us more. */ (void) hash_search(qtexts_htab, &queryid, HASH_REMOVE, NULL); + _aqo_queries_remove(queryid); LWLockRelease(&aqo_state->qtexts_lock); + if (dsa_valid) + *dsa_valid = false; return false; } @@ -1423,7 +1472,7 @@ aqo_data_store(uint64 fs, int fss, AqoDataArgs *data, List *reloids) entry->nrels = nrels; size = _compute_data_dsa(entry); - entry->data_dp = dsa_allocate0(data_dsa, size); + entry->data_dp = dsa_allocate_extended(data_dsa, size, DSA_ALLOC_NO_OOM | DSA_ALLOC_ZERO); if (!_check_dsa_validity(entry->data_dp)) { @@ -1455,7 +1504,7 @@ aqo_data_store(uint64 fs, int fss, AqoDataArgs *data, List *reloids) /* Need to re-allocate DSA chunk */ dsa_free(data_dsa, entry->data_dp); - entry->data_dp = dsa_allocate0(data_dsa, size); + entry->data_dp = dsa_allocate_extended(data_dsa, size, DSA_ALLOC_NO_OOM | DSA_ALLOC_ZERO); if (!_check_dsa_validity(entry->data_dp)) { @@ -2713,7 +2762,7 @@ aqo_query_texts_update(PG_FUNCTION_ARGS) str_buff = (char*) palloc(str_len); text_to_cstring_buffer(str, str_buff, str_len); - res = aqo_qtext_store(queryid, str_buff); + res = aqo_qtext_store(queryid, str_buff, NULL); pfree(str_buff); PG_RETURN_BOOL(res); diff --git a/storage.h b/storage.h index 2b4e4cdd..9491e33e 100644 --- a/storage.h +++ b/storage.h @@ -138,7 +138,7 @@ extern StatEntry *aqo_stat_store(uint64 queryid, bool use_aqo, extern void aqo_stat_flush(void); extern void aqo_stat_load(void); -extern bool aqo_qtext_store(uint64 queryid, const char *query_string); +extern bool aqo_qtext_store(uint64 queryid, const char *query_string, bool *dsa_valid); extern void aqo_qtexts_flush(void); extern void aqo_qtexts_load(void); @@ -156,6 +156,7 @@ extern bool aqo_queries_store(uint64 queryid, uint64 fs, bool learn_aqo, extern void aqo_queries_flush(void); extern void aqo_queries_load(void); +extern void check_dsa_file_size(void); /* * Machinery for deactivated queries cache. * TODO: Should live in a custom memory context diff --git a/t/004_dsm_size_max.pl b/t/004_dsm_size_max.pl new file mode 100644 index 00000000..26898b79 --- /dev/null +++ b/t/004_dsm_size_max.pl @@ -0,0 +1,76 @@ +use strict; +use warnings; + +use Config; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; + +use Test::More tests => 5; + +my $node = PostgreSQL::Test::Cluster->new('aqotest'); +$node->init; +$node->append_conf('postgresql.conf', qq{ +shared_preload_libraries = 'aqo' +aqo.mode = 'learn' +log_statement = 'ddl' +aqo.join_threshold = 0 +aqo.dsm_size_max = 4 +aqo.fs_max_items = 30000 +aqo.querytext_max_size = 1000000 +}); + +# Disable connection default settings, forced by PGOPTIONS in AQO Makefile +$ENV{PGOPTIONS}=""; + +# General purpose variables. +my $long_string = 'a' x 1000000; + +$node->start(); +$node->psql('postgres', 'CREATE EXTENSION aqo;'); + +for my $i (1 .. 3) { + $node->psql('postgres', "select aqo_query_texts_update(" . $i . ", \'" . $long_string . "\');"); +} +$node->stop(); + +$node->append_conf('postgresql.conf', 'aqo.dsm_size_max = 1'); +is($node->start(fail_ok => 1), + 0, "node fails to start"); + +$node->append_conf('postgresql.conf', 'aqo.dsm_size_max = 4'); +is($node->start(), + 1, "node starts"); +$node->psql('postgres', 'select * from aqo_reset();'); + +$long_string = '1, ' x 10000; +for my $i (1 .. 30) { + $node->psql('postgres', "select aqo_data_update(" . $i . ", 1, 1, '{{1}}', '{1}', '{1}', '{" . $long_string . " 1}');"); +} +$node->stop(); + +$node->append_conf('postgresql.conf', 'aqo.dsm_size_max = 1'); +is($node->start(fail_ok => 1), + 0, "node fails to start"); + +$node->append_conf('postgresql.conf', 'aqo.dsm_size_max = 4'); +is($node->start(), + 1, "node starts"); +$node->psql('postgres', 'select * from aqo_reset();'); +$node->stop(); + +my $regex; +$long_string = 'a' x 100000; +$regex = qr/.*WARNING: \[AQO\] Not enough DSA\. AQO was disabled for this query/; +$node->append_conf('postgresql.conf', 'aqo.dsm_size_max = 1'); +$node->start(); +my ($stdout, $stderr); +for my $i (1 .. 20) { + $node->psql('postgres', "create table a as select s, md5(random()::text) from generate_Series(1,100) s;"); + $node->psql('postgres', + "SELECT a.s FROM a CROSS JOIN ( SELECT '" . $long_string . "' as long_string) AS extra_rows;", + stdout => \$stdout, stderr => \$stderr); + $node->psql('postgres', "drop table a"); +} +like($stderr, $regex, 'warning for exceeding the dsa limit'); +$node->stop; +done_testing(); From efbdef4562fa650a07d96d1a265f3281804dd2e3 Mon Sep 17 00:00:00 2001 From: Daniil Anisimov Date: Tue, 24 Oct 2023 00:54:37 +0700 Subject: [PATCH 123/134] Change aqo.querytext_max_size lower limit to 1. --- aqo.c | 2 +- expected/update_functions.out | 27 +++++++++++++++++++++++++++ sql/update_functions.sql | 8 ++++++++ 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/aqo.c b/aqo.c index df622c12..b82591c4 100644 --- a/aqo.c +++ b/aqo.c @@ -262,7 +262,7 @@ _PG_init(void) NULL, &querytext_max_size, 1000, - 0, INT_MAX, + 1, INT_MAX, PGC_SUSET, 0, NULL, diff --git a/expected/update_functions.out b/expected/update_functions.out index 74428a35..d2e7c84c 100644 --- a/expected/update_functions.out +++ b/expected/update_functions.out @@ -417,6 +417,33 @@ SELECT aqo_data_update(1, 1, 1, '{{1}, {2}}', '{1}', '{1}', '{1, 2, 3}'); (1 row) SET aqo.mode='disabled'; +-- Testing the minimum querytext size +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +SET aqo.querytext_max_size = 0; +ERROR: 0 is outside the valid range for parameter "aqo.querytext_max_size" (1 .. 2147483647) +SET aqo.querytext_max_size = 1; +SELECT queryid, query_text FROM aqo_query_texts WHERE queryid > 0; + queryid | query_text +---------+------------ +(0 rows) + +SELECT aqo_query_texts_update(1, 'test'); + aqo_query_texts_update +------------------------ + t +(1 row) + +SELECT queryid, query_text FROM aqo_query_texts WHERE queryid > 0; + queryid | query_text +---------+------------ + 1 | +(1 row) + DROP EXTENSION aqo CASCADE; DROP TABLE aqo_test1, aqo_test2; DROP TABLE aqo_query_texts_dump, aqo_queries_dump, aqo_query_stat_dump, aqo_data_dump; diff --git a/sql/update_functions.sql b/sql/update_functions.sql index e2773978..4c7fee53 100644 --- a/sql/update_functions.sql +++ b/sql/update_functions.sql @@ -204,6 +204,14 @@ SELECT aqo_data_update(1, 1, 1, '{{1}, {2}}', '{1}', '{1}', '{1, 2, 3}'); SET aqo.mode='disabled'; +-- Testing the minimum querytext size +SELECT true AS success FROM aqo_reset(); +SET aqo.querytext_max_size = 0; +SET aqo.querytext_max_size = 1; +SELECT queryid, query_text FROM aqo_query_texts WHERE queryid > 0; +SELECT aqo_query_texts_update(1, 'test'); +SELECT queryid, query_text FROM aqo_query_texts WHERE queryid > 0; + DROP EXTENSION aqo CASCADE; DROP TABLE aqo_test1, aqo_test2; From 5d9c6aa3877572a844863b0329ea5b4ae68ff6de Mon Sep 17 00:00:00 2001 From: Daniil Anisimov Date: Wed, 11 Oct 2023 13:34:54 +0700 Subject: [PATCH 124/134] Bugfix of look_a_like test. Add ANALYZE after creating tables to stabilize results of the test. --- expected/look_a_like.out | 82 ++++++++++++++++++++-------------------- sql/look_a_like.sql | 3 ++ 2 files changed, 45 insertions(+), 40 deletions(-) diff --git a/expected/look_a_like.out b/expected/look_a_like.out index dc339ffa..594f017e 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -22,6 +22,7 @@ CREATE TABLE a (x1 int, x2 int, x3 int); INSERT INTO a (x1, x2, x3) SELECT mod(ival,10), mod(ival,10), mod(ival,10) FROM generate_series(1,100) As ival; CREATE TABLE b (y1 int, y2 int, y3 int); INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; +ANALYZE a, b; -- -- Returns string-by-string explain of a query. Made for removing some strings -- from the explain output. @@ -90,22 +91,22 @@ SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND A.x1 = B.y1;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result ------------------------------------------------------------ + result +------------------------------------------------------------- Hash Join (actual rows=5000 loops=1) AQO not used Output: a.x1, b.y1 - Hash Cond: (b.y1 = a.x1) - -> Seq Scan on public.b (actual rows=1000 loops=1) + Hash Cond: (a.x1 = b.y1) + -> Seq Scan on public.a (actual rows=50 loops=1) AQO not used - Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=50 loops=1) - Output: a.x1 - -> Seq Scan on public.a (actual rows=50 loops=1) + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 5) AND (a.x2 < 5)) + Rows Removed by Filter: 50 + -> Hash (actual rows=1000 loops=1) + Output: b.y1 + -> Seq Scan on public.b (actual rows=1000 loops=1) AQO not used - Output: a.x1 - Filter: ((a.x1 < 5) AND (a.x2 < 5)) - Rows Removed by Filter: 50 + Output: b.y1 Using aqo: true AQO mode: LEARN JOINS: 1 @@ -191,22 +192,22 @@ SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND x3 < 10 AND A.x1 = B.y1;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result -------------------------------------------------------------------- + result +------------------------------------------------------------- Hash Join (actual rows=5000 loops=1) AQO not used Output: a.x1, b.y1 - Hash Cond: (b.y1 = a.x1) - -> Seq Scan on public.b (actual rows=1000 loops=1) - AQO: rows=1000, error=0% - Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=50 loops=1) - Output: a.x1 - -> Seq Scan on public.a (actual rows=50 loops=1) - AQO not used - Output: a.x1 - Filter: ((a.x1 < 5) AND (a.x2 < 5) AND (a.x3 < 10)) - Rows Removed by Filter: 50 + Hash Cond: (a.x1 = b.y1) + -> Seq Scan on public.a (actual rows=50 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 5) AND (a.x2 < 5) AND (a.x3 < 10)) + Rows Removed by Filter: 50 + -> Hash (actual rows=1000 loops=1) + Output: b.y1 + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1 Using aqo: true AQO mode: LEARN JOINS: 1 @@ -486,34 +487,35 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L CREATE TABLE c (z1 int, z2 int, z3 int); INSERT INTO c (z1, z2, z3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; +ANALYZE c; SELECT str AS result FROM expln(' SELECT * FROM (a LEFT JOIN b ON a.x1 = b.y1) sc WHERE not exists (SELECT z1 FROM c WHERE sc.x1=c.z1 );') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result -------------------------------------------------------------------- - Hash Left Join (actual rows=0 loops=1) + result +------------------------------------------------------------------------- + Hash Right Join (actual rows=0 loops=1) AQO not used Output: a.x1, a.x2, a.x3, b.y1, b.y2, b.y3 - Hash Cond: (a.x1 = b.y1) - -> Hash Anti Join (actual rows=0 loops=1) - AQO not used + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (never executed) + AQO: rows=1000 + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=0 loops=1) Output: a.x1, a.x2, a.x3 - Hash Cond: (a.x1 = c.z1) - -> Seq Scan on public.a (actual rows=100 loops=1) + -> Hash Anti Join (actual rows=0 loops=1) AQO not used Output: a.x1, a.x2, a.x3 - -> Hash (actual rows=1000 loops=1) - Output: c.z1 - -> Seq Scan on public.c (actual rows=1000 loops=1) + Hash Cond: (a.x1 = c.z1) + -> Seq Scan on public.a (actual rows=100 loops=1) AQO not used + Output: a.x1, a.x2, a.x3 + -> Hash (actual rows=1000 loops=1) Output: c.z1 - -> Hash (never executed) - Output: b.y1, b.y2, b.y3 - -> Seq Scan on public.b (never executed) - AQO: rows=1000 - Output: b.y1, b.y2, b.y3 + -> Seq Scan on public.c (actual rows=1000 loops=1) + AQO not used + Output: c.z1 Using aqo: true AQO mode: LEARN JOINS: 2 diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index 5edef7bb..f50e4e55 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -21,6 +21,7 @@ INSERT INTO a (x1, x2, x3) SELECT mod(ival,10), mod(ival,10), mod(ival,10) FROM CREATE TABLE b (y1 int, y2 int, y3 int); INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; +ANALYZE a, b; -- -- Returns string-by-string explain of a query. Made for removing some strings @@ -128,6 +129,8 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L CREATE TABLE c (z1 int, z2 int, z3 int); INSERT INTO c (z1, z2, z3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; +ANALYZE c; + SELECT str AS result FROM expln(' SELECT * FROM (a LEFT JOIN b ON a.x1 = b.y1) sc WHERE From 758257a9a27b711a74f1883ea7bfc001399c1598 Mon Sep 17 00:00:00 2001 From: Daniil Anisimov Date: Mon, 22 Jan 2024 08:53:47 +0300 Subject: [PATCH 125/134] Fix testing with WRITE_READ_PARSE_PLAN_TREES. Change RestrictInfo to AQOClause. Add AQOConstNode to use it instead of useless nodes. Serialize/deserialize all AQOPlanNode and AQOConstNode fields. --- aqo_pg14.patch | 70 ++++---- cardinality_hooks.c | 3 +- hash.c | 33 ++-- path_utils.c | 409 ++++++++++++++++++++++++++++++++++++-------- path_utils.h | 38 +++- postprocessing.c | 20 +-- 6 files changed, 431 insertions(+), 142 deletions(-) diff --git a/aqo_pg14.patch b/aqo_pg14.patch index 7ee75eec..14f2899f 100644 --- a/aqo_pg14.patch +++ b/aqo_pg14.patch @@ -1,5 +1,5 @@ diff --git a/contrib/Makefile b/contrib/Makefile -index f27e458482..0c62191904 100644 +index f27e458482e..0c621919045 100644 --- a/contrib/Makefile +++ b/contrib/Makefile @@ -7,6 +7,7 @@ include $(top_builddir)/src/Makefile.global @@ -11,7 +11,7 @@ index f27e458482..0c62191904 100644 auto_explain \ bloom \ diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c -index 70551522da..958529fbab 100644 +index 70551522dac..958529fbab4 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -24,6 +24,7 @@ @@ -57,7 +57,7 @@ index 70551522da..958529fbab 100644 if (es->format == EXPLAIN_FORMAT_TEXT) appendStringInfoChar(es->str, '\n'); diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c -index 4d9746d54a..6fa85d1c71 100644 +index 4d9746d54a0..6fa85d1c71f 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -132,6 +132,7 @@ CopyPlanFields(const Plan *from, Plan *newnode) @@ -69,35 +69,31 @@ index 4d9746d54a..6fa85d1c71 100644 /* diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c -index 58c2590698..1e06738a13 100644 +index 58c2590698c..b9f39d36e03 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -342,6 +342,7 @@ _outPlanInfo(StringInfo str, const Plan *node) WRITE_NODE_FIELD(initPlan); WRITE_BITMAPSET_FIELD(extParam); WRITE_BITMAPSET_FIELD(allParam); -+ /*WRITE_NODE_FIELD(ext_nodes); */ ++ WRITE_NODE_FIELD(ext_nodes); } /* diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c -index eaa51c5c06..6ad8b78c7d 100644 +index eaa51c5c062..65741a86a05 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c -@@ -1628,6 +1628,11 @@ ReadCommonPlan(Plan *local_node) +@@ -1628,6 +1628,7 @@ ReadCommonPlan(Plan *local_node) READ_NODE_FIELD(initPlan); READ_BITMAPSET_FIELD(extParam); READ_BITMAPSET_FIELD(allParam); -+ local_node->ext_nodes = NIL; -+ /* READ_NODE_FIELD(ext_nodes); -+ * Don't serialize this field. It is required to serialize RestrictInfo and -+ * EqualenceClass. -+ */ ++ READ_NODE_FIELD(ext_nodes); } /* diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c -index 006f91f0a8..ef9c8ec581 100644 +index 006f91f0a87..ef9c8ec5817 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -98,6 +98,11 @@ @@ -362,7 +358,7 @@ index 006f91f0a8..ef9c8ec581 100644 { double parallel_divisor = path->parallel_workers; diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c -index 0ed858f305..9d4a6c5903 100644 +index 0ed858f305a..9d4a6c59030 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -71,6 +71,7 @@ @@ -393,7 +389,7 @@ index 0ed858f305..9d4a6c5903 100644 /* diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c -index 70899e5430..34075cc87b 100644 +index 5da863d85de..5b21ffd0667 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -143,7 +143,8 @@ static List *extract_rollup_sets(List *groupingSets); @@ -406,7 +402,7 @@ index 70899e5430..34075cc87b 100644 grouping_sets_data *gd, List *target_list); static RelOptInfo *create_grouping_paths(PlannerInfo *root, -@@ -3151,7 +3152,8 @@ standard_qp_callback(PlannerInfo *root, void *extra) +@@ -3145,7 +3146,8 @@ standard_qp_callback(PlannerInfo *root, void *extra) */ static double get_number_of_groups(PlannerInfo *root, @@ -416,7 +412,7 @@ index 70899e5430..34075cc87b 100644 grouping_sets_data *gd, List *target_list) { -@@ -3188,7 +3190,7 @@ get_number_of_groups(PlannerInfo *root, +@@ -3182,7 +3184,7 @@ get_number_of_groups(PlannerInfo *root, GroupingSetData *gs = lfirst_node(GroupingSetData, lc2); double numGroups = estimate_num_groups(root, groupExprs, @@ -425,7 +421,7 @@ index 70899e5430..34075cc87b 100644 &gset, NULL); -@@ -3214,7 +3216,7 @@ get_number_of_groups(PlannerInfo *root, +@@ -3208,7 +3210,7 @@ get_number_of_groups(PlannerInfo *root, GroupingSetData *gs = lfirst_node(GroupingSetData, lc2); double numGroups = estimate_num_groups(root, groupExprs, @@ -434,7 +430,7 @@ index 70899e5430..34075cc87b 100644 &gset, NULL); -@@ -3231,8 +3233,8 @@ get_number_of_groups(PlannerInfo *root, +@@ -3225,8 +3227,8 @@ get_number_of_groups(PlannerInfo *root, groupExprs = get_sortgrouplist_exprs(parse->groupClause, target_list); @@ -445,7 +441,7 @@ index 70899e5430..34075cc87b 100644 } } else if (parse->groupingSets) -@@ -3619,7 +3621,8 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, +@@ -3613,7 +3615,8 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, * Estimate number of groups. */ dNumGroups = get_number_of_groups(root, @@ -455,7 +451,7 @@ index 70899e5430..34075cc87b 100644 gd, extra->targetList); -@@ -6425,13 +6428,15 @@ create_partial_grouping_paths(PlannerInfo *root, +@@ -6419,13 +6422,15 @@ create_partial_grouping_paths(PlannerInfo *root, if (cheapest_total_path != NULL) dNumPartialGroups = get_number_of_groups(root, @@ -474,7 +470,7 @@ index 70899e5430..34075cc87b 100644 extra->targetList); diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c -index e105a4d5f1..c5bcc9d1d1 100644 +index e105a4d5f1d..c5bcc9d1d15 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -258,6 +258,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) @@ -540,7 +536,7 @@ index e105a4d5f1..c5bcc9d1d1 100644 return ppi; diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c -index 87879c9ddc..1aad8c43d9 100644 +index 87879c9ddc8..1aad8c43d92 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -143,6 +143,7 @@ @@ -573,7 +569,7 @@ index 87879c9ddc..1aad8c43d9 100644 * estimate_num_groups - Estimate number of groups in a grouped query * diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h -index e94d9e49cf..49236ced77 100644 +index e94d9e49cf6..49236ced77c 100644 --- a/src/include/commands/explain.h +++ b/src/include/commands/explain.h @@ -75,6 +75,18 @@ extern PGDLLIMPORT ExplainOneQuery_hook_type ExplainOneQuery_hook; @@ -596,7 +592,7 @@ index e94d9e49cf..49236ced77 100644 extern void ExplainQuery(ParseState *pstate, ExplainStmt *stmt, ParamListInfo params, DestReceiver *dest); diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h -index e370a01141..9f2f1628f5 100644 +index 3c034fa3c5e..e441674970c 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -756,6 +756,10 @@ typedef struct RelOptInfo @@ -635,7 +631,7 @@ index e370a01141..9f2f1628f5 100644 diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h -index 2308c80dde..a933afa483 100644 +index 1c9357f6a77..58c005c1a9b 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -158,6 +158,9 @@ typedef struct Plan @@ -649,7 +645,7 @@ index 2308c80dde..a933afa483 100644 /* ---------------- diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h -index 2113bc82de..bcc2520cec 100644 +index 2113bc82de0..bcc2520cec5 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -39,6 +39,37 @@ typedef enum @@ -733,7 +729,7 @@ index 2113bc82de..bcc2520cec 100644 #endif /* COST_H */ diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h -index 2922c0cdc1..c59dce6989 100644 +index 2922c0cdc14..c59dce6989e 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -18,6 +18,10 @@ @@ -748,7 +744,7 @@ index 2922c0cdc1..c59dce6989 100644 * prototypes for pathnode.c */ diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h -index bf1adfc52a..9c78e0f4e0 100644 +index bf1adfc52ac..9c78e0f4e02 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -24,6 +24,12 @@ extern double cursor_tuple_fraction; @@ -765,7 +761,7 @@ index bf1adfc52a..9c78e0f4e0 100644 * prototypes for plan/planmain.c */ diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h -index 9dd444e1ff..cfaae98aa2 100644 +index 9dd444e1ff5..b0b5a656185 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h @@ -144,6 +144,13 @@ typedef bool (*get_index_stats_hook_type) (PlannerInfo *root, @@ -782,13 +778,13 @@ index 9dd444e1ff..cfaae98aa2 100644 /* Functions in selfuncs.c */ -@@ -213,6 +220,9 @@ extern void mergejoinscansel(PlannerInfo *root, Node *clause, - extern double estimate_num_groups(PlannerInfo *root, List *groupExprs, - double input_rows, List **pgset, - EstimationInfo *estinfo); +@@ -210,6 +217,9 @@ extern void mergejoinscansel(PlannerInfo *root, Node *clause, + Selectivity *leftstart, Selectivity *leftend, + Selectivity *rightstart, Selectivity *rightend); + +extern double estimate_num_groups_ext(PlannerInfo *root, List *groupExprs, + Path *subpath, RelOptInfo *grouped_rel, + List **pgset, EstimationInfo *estinfo); - - extern void estimate_hash_bucket_stats(PlannerInfo *root, - Node *hashkey, double nbuckets, + extern double estimate_num_groups(PlannerInfo *root, List *groupExprs, + double input_rows, List **pgset, + EstimationInfo *estinfo); diff --git a/cardinality_hooks.c b/cardinality_hooks.c index fd2f970c..ceb9612a 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -187,8 +187,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, forboth(l, allclauses, l2, selectivities) { - current_hash = get_clause_hash( - ((RestrictInfo *) lfirst(l))->clause, + current_hash = get_clause_hash(((AQOClause *) lfirst(l))->clause, nargs, args_hash, eclass_hash); cache_selectivity(current_hash, rel->relid, rte->relid, *((double *) lfirst(l2))); diff --git a/hash.c b/hash.c index dfb4a55c..1f8d36bd 100644 --- a/hash.c +++ b/hash.c @@ -27,6 +27,7 @@ #include "aqo.h" #include "hash.h" +#include "path_utils.h" static int get_str_hash(const char *str); static int get_node_hash(Node *node); @@ -218,11 +219,11 @@ get_fss_for_object(List *relsigns, List *clauselist, i = 0; foreach(lc, clauselist) { - RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); + AQOClause *clause = (AQOClause *) lfirst(lc); - clause_hashes[i] = get_clause_hash(rinfo->clause, + clause_hashes[i] = get_clause_hash(clause->clause, nargs, args_hash, eclass_hash); - args = get_clause_args_ptr(rinfo->clause); + args = get_clause_args_ptr(clause->clause); clause_has_consts[i] = (args != NULL && has_consts(*args)); i++; } @@ -317,14 +318,14 @@ get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash) cclause = copyObject(clause); args = get_clause_args_ptr(cclause); + /* XXX: Why does it work even if this loop is removed? */ foreach(l, *args) { arg_eclass = get_arg_eclass(get_node_hash(lfirst(l)), nargs, args_hash, eclass_hash); if (arg_eclass != 0) { - lfirst(l) = makeNode(Param); - ((Param *) lfirst(l))->paramid = arg_eclass; + lfirst(l) = create_aqo_const_node(AQO_NODE_EXPR, arg_eclass); } } if (!clause_is_eq_clause(clause) || has_consts(*args)) @@ -554,7 +555,7 @@ get_arg_eclass(int arg_hash, int nargs, int *args_hash, int *eclass_hash) static void get_clauselist_args(List *clauselist, int *nargs, int **args_hash) { - RestrictInfo *rinfo; + AQOClause *clause; List **args; ListCell *l; ListCell *l2; @@ -564,9 +565,9 @@ get_clauselist_args(List *clauselist, int *nargs, int **args_hash) foreach(l, clauselist) { - rinfo = (RestrictInfo *) lfirst(l); - args = get_clause_args_ptr(rinfo->clause); - if (args != NULL && clause_is_eq_clause(rinfo->clause)) + clause = (AQOClause *) lfirst(l); + args = get_clause_args_ptr(clause->clause); + if (args != NULL && clause_is_eq_clause(clause->clause)) foreach(l2, *args) if (!IsA(lfirst(l2), Const)) cnt++; @@ -575,9 +576,9 @@ get_clauselist_args(List *clauselist, int *nargs, int **args_hash) *args_hash = palloc(cnt * sizeof(**args_hash)); foreach(l, clauselist) { - rinfo = (RestrictInfo *) lfirst(l); - args = get_clause_args_ptr(rinfo->clause); - if (args != NULL && clause_is_eq_clause(rinfo->clause)) + clause = (AQOClause *) lfirst(l); + args = get_clause_args_ptr(clause->clause); + if (args != NULL && clause_is_eq_clause(clause->clause)) foreach(l2, *args) if (!IsA(lfirst(l2), Const)) (*args_hash)[i++] = get_node_hash(lfirst(l2)); @@ -632,7 +633,7 @@ disjoint_set_merge_eclasses(int *p, int v1, int v2) static int * perform_eclasses_join(List *clauselist, int nargs, int *args_hash) { - RestrictInfo *rinfo; + AQOClause *clause; int *p; ListCell *l, *l2; @@ -646,9 +647,9 @@ perform_eclasses_join(List *clauselist, int nargs, int *args_hash) foreach(l, clauselist) { - rinfo = (RestrictInfo *) lfirst(l); - args = get_clause_args_ptr(rinfo->clause); - if (args != NULL && clause_is_eq_clause(rinfo->clause)) + clause = (AQOClause *) lfirst(l); + args = get_clause_args_ptr(clause->clause); + if (args != NULL && clause_is_eq_clause(clause->clause)) { i3 = -1; foreach(l2, *args) diff --git a/path_utils.c b/path_utils.c index 2e7ad4ca..8feefbdf 100644 --- a/path_utils.c +++ b/path_utils.c @@ -22,6 +22,7 @@ #include "storage/lmgr.h" #include "utils/syscache.h" #include "utils/lsyscache.h" +#include "common/shortest_dec.h" #include "aqo.h" #include "hash.h" @@ -34,7 +35,8 @@ static AQOPlanNode DefaultAQOPlanNode = .node.type = T_ExtensibleNode, .node.extnodename = AQO_PLAN_NODE, .had_path = false, - .rels = NULL, + .rels.hrels = NIL, + .rels.signatures = NIL, .clauses = NIL, .selectivities = NIL, .grouping_exprs = NIL, @@ -42,18 +44,39 @@ static AQOPlanNode DefaultAQOPlanNode = .parallel_divisor = -1., .was_parametrized = false, .fss = INT_MAX, - .prediction = -1 + .prediction = -1. }; /* * Hook on creation of a plan node. We need to store AQO-specific data to * support learning stage. */ -static create_plan_hook_type aqo_create_plan_next = NULL; +static create_plan_hook_type aqo_create_plan_next = NULL; -/*static create_upper_paths_hook_type aqo_create_upper_paths_next = NULL;*/ +/*static create_upper_paths_hook_type aqo_create_upper_paths_next = NULL;*/ +/* Return a copy of the given list of AQOClause structs */ +static List * +copy_aqo_clauses(List *src) +{ + List *result = NIL; + ListCell *lc; + + foreach(lc, src) + { + AQOClause *old = (AQOClause *) lfirst(lc); + AQOClause *new = palloc(sizeof(AQOClause)); + + memcpy(new, old, sizeof(AQOClause)); + new->clause = copyObject(old->clause); + + result = lappend(result, (void *) new); + } + + return result; +} + static AQOPlanNode * create_aqo_plan_node() { @@ -61,12 +84,20 @@ create_aqo_plan_node() T_ExtensibleNode); Assert(node != NULL); memcpy(node, &DefaultAQOPlanNode, sizeof(AQOPlanNode)); - node->rels = palloc(sizeof(RelSortOut)); - node->rels->hrels = NIL; - node->rels->signatures = NIL; return node; } +AQOConstNode * +create_aqo_const_node(AQOConstType type, int fss) +{ + AQOConstNode *node = (AQOConstNode *) newNode(sizeof(AQOConstNode), + T_ExtensibleNode); + Assert(node != NULL); + node->node.extnodename = AQO_CONST_NODE; + node->type = type; + node->fss = fss; + return node; +} /* Ensure that it's postgres_fdw's foreign server oid */ static bool @@ -271,13 +302,8 @@ subplan_hunter(Node *node, void *context) if (IsA(node, SubPlan)) { - A_Const *fss = makeNode(A_Const); - - fss->val.type = T_Integer; - fss->location = -1; - fss->val.val.ival = 0; - return (Node *) fss; - + /* TODO: use fss of SubPlan here */ + return (Node *) create_aqo_const_node(AQO_NODE_SUBPLAN, 0); } return expression_tree_mutator(node, subplan_hunter, context); } @@ -287,8 +313,8 @@ subplan_hunter(Node *node, void *context) * During this operation clauses could be changed and we couldn't walk across * this list next. */ -List * -aqo_get_clauses(PlannerInfo *root, List *restrictlist) +static List * +aqo_get_raw_clauses(PlannerInfo *root, List *restrictlist) { List *clauses = NIL; ListCell *lc; @@ -306,14 +332,49 @@ aqo_get_clauses(PlannerInfo *root, List *restrictlist) return clauses; } +static List * +copy_aqo_clauses_from_rinfo(List *src) +{ + List *result = NIL; + ListCell *lc; + + foreach(lc, src) + { + RestrictInfo *old = (RestrictInfo *) lfirst(lc); + AQOClause *new = palloc(sizeof(AQOClause)); + + new->clause = copyObject(old->clause); + new->norm_selec = old->norm_selec; + new->outer_selec = old->outer_selec; + + result = lappend(result, (void *) new); + } + + return result; +} + /* - * For given path returns the list of all clauses used in it. - * Also returns selectivities for the clauses throw the selectivities variable. - * Both clauses and selectivities returned lists are copies and therefore - * may be modified without corruption of the input data. + * Return copy of clauses returned from the aqo_get_raw_clause() routine + * and convert it into AQOClause struct. */ List * -get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) +aqo_get_clauses(PlannerInfo *root, List *restrictlist) +{ + List *clauses = aqo_get_raw_clauses(root, restrictlist); + List *result = copy_aqo_clauses_from_rinfo(clauses); + + list_free_deep(clauses); + return result; +} + +/* + * Returns a list of all used clauses for the given path. + * Also returns selectivities for the clauses to 'selectivities' variable. + * The returned list of the selectivities is a copy and therefore + * may be modified without corruption of the input data. + */ +static List * +get_path_clauses_recurse(Path *path, PlannerInfo *root, List **selectivities) { List *inner; List *inner_sel = NIL; @@ -333,98 +394,98 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) case T_NestPath: case T_MergePath: case T_HashPath: - cur = ((JoinPath *) path)->joinrestrictinfo; + cur = list_concat(cur, ((JoinPath *) path)->joinrestrictinfo); /* Not quite correct to avoid sjinfo, but we believe in caching */ cur_sel = get_selectivities(root, cur, 0, ((JoinPath *) path)->jointype, NULL); - outer = get_path_clauses(((JoinPath *) path)->outerjoinpath, root, + outer = get_path_clauses_recurse(((JoinPath *) path)->outerjoinpath, root, &outer_sel); - inner = get_path_clauses(((JoinPath *) path)->innerjoinpath, root, + inner = get_path_clauses_recurse(((JoinPath *) path)->innerjoinpath, root, &inner_sel); *selectivities = list_concat(cur_sel, list_concat(outer_sel, inner_sel)); - return list_concat(list_copy(cur), list_concat(outer, inner)); + return list_concat(cur, list_concat(outer, inner)); break; case T_UniquePath: - return get_path_clauses(((UniquePath *) path)->subpath, root, + return get_path_clauses_recurse(((UniquePath *) path)->subpath, root, selectivities); break; case T_GatherPath: case T_GatherMergePath: - return get_path_clauses(((GatherPath *) path)->subpath, root, + return get_path_clauses_recurse(((GatherPath *) path)->subpath, root, selectivities); break; case T_MaterialPath: - return get_path_clauses(((MaterialPath *) path)->subpath, root, + return get_path_clauses_recurse(((MaterialPath *) path)->subpath, root, selectivities); break; case T_MemoizePath: - return get_path_clauses(((MemoizePath *) path)->subpath, root, + return get_path_clauses_recurse(((MemoizePath *) path)->subpath, root, selectivities); break; case T_ProjectionPath: - return get_path_clauses(((ProjectionPath *) path)->subpath, root, + return get_path_clauses_recurse(((ProjectionPath *) path)->subpath, root, selectivities); break; case T_ProjectSetPath: - return get_path_clauses(((ProjectSetPath *) path)->subpath, root, + return get_path_clauses_recurse(((ProjectSetPath *) path)->subpath, root, selectivities); break; case T_SortPath: - return get_path_clauses(((SortPath *) path)->subpath, root, + return get_path_clauses_recurse(((SortPath *) path)->subpath, root, selectivities); break; case T_IncrementalSortPath: { IncrementalSortPath *p = (IncrementalSortPath *) path; - return get_path_clauses(p->spath.subpath, root, + return get_path_clauses_recurse(p->spath.subpath, root, selectivities); } break; case T_GroupPath: - return get_path_clauses(((GroupPath *) path)->subpath, root, + return get_path_clauses_recurse(((GroupPath *) path)->subpath, root, selectivities); break; case T_UpperUniquePath: - return get_path_clauses(((UpperUniquePath *) path)->subpath, root, + return get_path_clauses_recurse(((UpperUniquePath *) path)->subpath, root, selectivities); break; case T_AggPath: - return get_path_clauses(((AggPath *) path)->subpath, root, + return get_path_clauses_recurse(((AggPath *) path)->subpath, root, selectivities); break; case T_GroupingSetsPath: - return get_path_clauses(((GroupingSetsPath *) path)->subpath, root, + return get_path_clauses_recurse(((GroupingSetsPath *) path)->subpath, root, selectivities); break; case T_WindowAggPath: - return get_path_clauses(((WindowAggPath *) path)->subpath, root, + return get_path_clauses_recurse(((WindowAggPath *) path)->subpath, root, selectivities); break; case T_SetOpPath: - return get_path_clauses(((SetOpPath *) path)->subpath, root, + return get_path_clauses_recurse(((SetOpPath *) path)->subpath, root, selectivities); break; case T_LockRowsPath: - return get_path_clauses(((LockRowsPath *) path)->subpath, root, + return get_path_clauses_recurse(((LockRowsPath *) path)->subpath, root, selectivities); break; case T_LimitPath: - return get_path_clauses(((LimitPath *) path)->subpath, root, + return get_path_clauses_recurse(((LimitPath *) path)->subpath, root, selectivities); break; case T_SubqueryScanPath: /* Recursing into Subquery we must use subroot */ Assert(path->parent->subroot != NULL); - return get_path_clauses(((SubqueryScanPath *) path)->subpath, + return get_path_clauses_recurse(((SubqueryScanPath *) path)->subpath, path->parent->subroot, selectivities); break; case T_ModifyTablePath: - return get_path_clauses(((ModifyTablePath *) path)->subpath, root, + return get_path_clauses_recurse(((ModifyTablePath *) path)->subpath, root, selectivities); break; /* TODO: RecursiveUnionPath */ @@ -441,11 +502,11 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) { Path *subpath = lfirst(lc); - cur = list_concat(cur, list_copy( - get_path_clauses(subpath, root, selectivities))); + cur = list_concat(cur, + get_path_clauses_recurse(subpath, root, selectivities)); cur_sel = list_concat(cur_sel, *selectivities); } - cur = list_concat(cur, aqo_get_clauses(root, + cur = list_concat(cur, aqo_get_raw_clauses(root, path->parent->baserestrictinfo)); *selectivities = list_concat(cur_sel, get_selectivities(root, @@ -457,7 +518,7 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) case T_ForeignPath: /* The same as in the default case */ default: - cur = list_concat(list_copy(path->parent->baserestrictinfo), + cur = list_concat(list_concat(cur, path->parent->baserestrictinfo), path->param_info ? path->param_info->ppi_clauses : NIL); if (path->param_info) @@ -466,12 +527,26 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) else cur_sel = get_selectivities(root, cur, 0, JOIN_INNER, NULL); *selectivities = cur_sel; - cur = aqo_get_clauses(root, cur); + cur = aqo_get_raw_clauses(root, cur); return cur; break; } } +/* + * Returns a list of AQOClauses for the given path, which is a copy + * of the clauses returned from the get_path_clauses_recurse() routine. + * Also returns selectivities for the clauses to 'selectivities' variable. + * Both returned lists are copies and therefore may be modified without + * corruption of the input data. + */ +List * +get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) +{ + return copy_aqo_clauses_from_rinfo( + get_path_clauses_recurse(path, root, selectivities)); +} + /* * Some of paths are kind of utility path. I mean, It isn't corresponding to * specific RelOptInfo node. So, it should be omitted in process of clauses @@ -578,7 +653,7 @@ aqo_create_plan(PlannerInfo *root, Path *src, Plan **dest) (*dest)->lefttree->targetlist); /* Copy bare expressions for further AQO learning case. */ node->grouping_exprs = copyObject(groupExprs); - get_list_of_relids(root, ap->subpath->parent->relids, node->rels); + get_list_of_relids(root, ap->subpath->parent->relids, &node->rels); node->jointype = JOIN_INNER; } else if (is_appropriate_path(src)) @@ -589,7 +664,7 @@ aqo_create_plan(PlannerInfo *root, Path *src, Plan **dest) node->jointype = JOIN_INNER; } - get_list_of_relids(root, src->parent->relids, node->rels); + get_list_of_relids(root, src->parent->relids, &node->rels); if (src->parallel_workers > 0) node->parallel_divisor = get_parallel_divisor(src); @@ -624,15 +699,19 @@ AQOnodeCopy(struct ExtensibleNode *enew, const struct ExtensibleNode *eold) Assert(strcmp(old->node.extnodename, AQO_PLAN_NODE) == 0); Assert(new && old); - /* Copy static fields in one command */ - memcpy(new, old, sizeof(AQOPlanNode)); + /* + * Copy static fields in one command. + * But do not copy fields of the old->node. + * Elsewise, we can use pointers that will be freed. + * For example, it is old->node.extnodename. + */ + memcpy(&new->had_path, &old->had_path, sizeof(AQOPlanNode) - offsetof(AQOPlanNode, had_path)); /* These lists couldn't contain AQO nodes. Use basic machinery */ - new->rels = palloc(sizeof(RelSortOut)); - new->rels->hrels = list_copy(old->rels->hrels); - new->rels->signatures = list_copy(old->rels->signatures); + new->rels.hrels = list_copy(old->rels.hrels); + new->rels.signatures = list_copy(old->rels.signatures); - new->clauses = copyObject(old->clauses); + new->clauses = copy_aqo_clauses(old->clauses); new->grouping_exprs = copyObject(old->grouping_exprs); new->selectivities = copyObject(old->selectivities); enew = (ExtensibleNode *) new; @@ -644,6 +723,39 @@ AQOnodeEqual(const struct ExtensibleNode *a, const struct ExtensibleNode *b) return false; } +static void +AQOconstCopy(struct ExtensibleNode *enew, const struct ExtensibleNode *eold) +{ + AQOConstNode *new = (AQOConstNode *) enew; + AQOConstNode *old = (AQOConstNode *) eold; + + Assert(IsA(old, ExtensibleNode)); + Assert(strcmp(old->node.extnodename, AQO_CONST_NODE) == 0); + Assert(new && old); + + new->type = old->type; + new->fss = old->fss; + enew = (ExtensibleNode *) new; +} + +static bool +AQOconstEqual(const struct ExtensibleNode *a, const struct ExtensibleNode *b) +{ + return false; +} + +/* + * Convert a double value, attempting to ensure the value is preserved exactly. + */ +static void +outDouble(StringInfo str, double d) +{ + char buf[DOUBLE_SHORTEST_DECIMAL_LEN]; + + double_to_shortest_decimal_buf(d, buf); + appendStringInfoString(str, buf); +} + #define WRITE_INT_FIELD(fldname) \ appendStringInfo(str, " :" CppAsString(fldname) " %d", node->fldname) @@ -661,17 +773,57 @@ AQOnodeEqual(const struct ExtensibleNode *a, const struct ExtensibleNode *b) appendStringInfo(str, " :" CppAsString(fldname) " %d", \ (int) node->fldname) -/* Write a float field --- caller must give format to define precision */ -#define WRITE_FLOAT_FIELD(fldname,format) \ - appendStringInfo(str, " :" CppAsString(fldname) " " format, node->fldname) +/* Write a float field */ +#define WRITE_FLOAT_FIELD(fldname) \ + (appendStringInfo(str, " :" CppAsString(fldname) " "), \ + outDouble(str, node->fldname)) + +/* The start part of a custom list writer */ +#define WRITE_CUSTOM_LIST_START(fldname) \ + { \ + appendStringInfo(str, " :N_" CppAsString(fldname) " %d ", \ + list_length(node->fldname)); \ + /* Serialize this list like an array */ \ + if (list_length(node->fldname)) \ + { \ + ListCell *lc; \ + appendStringInfo(str, "("); \ + foreach (lc, node->fldname) + +/* The end part of a custom list writer */ +#define WRITE_CUSTOM_LIST_END() \ + appendStringInfo(str, " )"); \ + } \ + else \ + appendStringInfo(str, "<>"); \ + } + +/* Write a list of int values */ +#define WRITE_INT_LIST(fldname) \ + WRITE_CUSTOM_LIST_START(fldname) \ + { \ + int val = lfirst_int(lc); \ + appendStringInfo(str, " %d", val); \ + } \ + WRITE_CUSTOM_LIST_END() + +/* Write a list of AQOClause values */ +#define WRITE_AQOCLAUSE_LIST(fldname) \ + WRITE_CUSTOM_LIST_START(clauses) \ + { \ + AQOClause *node = lfirst(lc); \ + /* Serialize this struct like a node */ \ + appendStringInfo(str, " {"); \ + WRITE_NODE_FIELD(clause); \ + WRITE_FLOAT_FIELD(norm_selec); \ + WRITE_FLOAT_FIELD(outer_selec); \ + appendStringInfo(str, " }"); \ + } \ + WRITE_CUSTOM_LIST_END() /* * Serialize AQO plan node to a string. * - * Right now we can't correctly serialize all fields of the node. Taking into - * account that this action needed when a plan moves into parallel workers or - * just during debugging, we serialize it only partially, just for debug - * purposes. * Some extensions may manipulate by parts of serialized plan too. */ static void @@ -679,9 +831,36 @@ AQOnodeOut(struct StringInfoData *str, const struct ExtensibleNode *enode) { AQOPlanNode *node = (AQOPlanNode *) enode; - /* For Adaptive optimization DEBUG purposes */ + WRITE_BOOL_FIELD(had_path); + + WRITE_NODE_FIELD(rels.hrels); + WRITE_INT_LIST(rels.signatures); + + WRITE_AQOCLAUSE_LIST(clauses); + + WRITE_NODE_FIELD(selectivities); + WRITE_NODE_FIELD(grouping_exprs); + WRITE_ENUM_FIELD(jointype, JoinType); + + WRITE_FLOAT_FIELD(parallel_divisor); + WRITE_BOOL_FIELD(was_parametrized); + + WRITE_INT_FIELD(fss); + WRITE_FLOAT_FIELD(prediction); +} + +/* + * Serialize AQO const node to a string. + * + * Some extensions may manipulate by parts of serialized plan too. + */ +static void +AQOconstOut(struct StringInfoData *str, const struct ExtensibleNode *enode) +{ + AQOConstNode *node = (AQOConstNode *) enode; + + WRITE_ENUM_FIELD(type, AQOConstType); WRITE_INT_FIELD(fss); - WRITE_FLOAT_FIELD(prediction, "%.0f"); } /* Read an integer field (anything written as ":fldname %d") */ @@ -714,6 +893,54 @@ AQOnodeOut(struct StringInfoData *str, const struct ExtensibleNode *enode) (void) token; /* in case not used elsewhere */ \ local_node->fldname = nodeRead(NULL, 0) +/* The start part of a custom list reader */ +#define READ_CUSTOM_LIST_START() \ + { \ + int counter; \ + token = pg_strtok(&length); /* skip the name */ \ + token = pg_strtok(&length); \ + counter = atoi(token); \ + token = pg_strtok(&length); /* left bracket "(" */ \ + if (length) \ + { \ + for (int i = 0; i < counter; i++) + +/* The end part of a custom list reader */ +#define READ_CUSTOM_LIST_END(fldname) \ + token = pg_strtok(&length); /* right bracket ")" */ \ + } \ + else \ + local_node->fldname = NIL; \ + } + +/* Read a list of int values */ +#define READ_INT_LIST(fldname) \ + READ_CUSTOM_LIST_START() \ + { \ + int val; \ + token = pg_strtok(&length); \ + val = atoi(token); \ + local_node->fldname = lappend_int( \ + local_node->fldname, val); \ + } \ + READ_CUSTOM_LIST_END(fldname) + +/* Read a list of AQOClause values */ +#define READ_AQOCLAUSE_LIST(fldname) \ + READ_CUSTOM_LIST_START() \ + { \ + /* copy to use in the inner blocks of code */ \ + AQOPlanNode *node_copy = local_node; \ + AQOClause *local_node = palloc(sizeof(AQOClause)); \ + token = pg_strtok(&length); /* left bracket "{" */ \ + READ_NODE_FIELD(clause); \ + READ_FLOAT_FIELD(norm_selec); \ + READ_FLOAT_FIELD(outer_selec); \ + token = pg_strtok(&length); /* right bracket "}" */ \ + node_copy->fldname = lappend(node_copy->fldname, local_node); \ + } \ + READ_CUSTOM_LIST_END(fldname) + /* * Deserialize AQO plan node from a string to internal representation. * @@ -726,22 +953,41 @@ AQOnodeRead(struct ExtensibleNode *enode) const char *token; int length; - local_node->had_path = false; - local_node->jointype = 0; - local_node->parallel_divisor = 1.0; - local_node->was_parametrized = false; + READ_BOOL_FIELD(had_path); + + READ_NODE_FIELD(rels.hrels); + READ_INT_LIST(rels.signatures); + + READ_AQOCLAUSE_LIST(clauses); + + READ_NODE_FIELD(selectivities); + READ_NODE_FIELD(grouping_exprs); + READ_ENUM_FIELD(jointype, JoinType); - local_node->rels = palloc0(sizeof(RelSortOut)); - local_node->clauses = NIL; - local_node->selectivities = NIL; - local_node->grouping_exprs = NIL; + READ_FLOAT_FIELD(parallel_divisor); + READ_BOOL_FIELD(was_parametrized); - /* For Adaptive optimization DEBUG purposes */ READ_INT_FIELD(fss); READ_FLOAT_FIELD(prediction); } -static const ExtensibleNodeMethods method = +/* + * Deserialize AQO const node from a string to internal representation. + * + * Should work in coherence with AQOconstOut(). + */ +static void +AQOconstRead(struct ExtensibleNode *enode) +{ + AQOConstNode *local_node = (AQOConstNode *) enode; + const char *token; + int length; + + READ_ENUM_FIELD(type, AQOConstType); + READ_INT_FIELD(fss); +} + +static const ExtensibleNodeMethods aqo_node_method = { .extnodename = AQO_PLAN_NODE, .node_size = sizeof(AQOPlanNode), @@ -751,10 +997,21 @@ static const ExtensibleNodeMethods method = .nodeRead = AQOnodeRead }; +static const ExtensibleNodeMethods aqo_const_method = +{ + .extnodename = AQO_CONST_NODE, + .node_size = sizeof(AQOConstNode), + .nodeCopy = AQOconstCopy, + .nodeEqual = AQOconstEqual, + .nodeOut = AQOconstOut, + .nodeRead = AQOconstRead +}; + void RegisterAQOPlanNodeMethods(void) { - RegisterExtensibleNodeMethods(&method); + RegisterExtensibleNodeMethods(&aqo_node_method); + RegisterExtensibleNodeMethods(&aqo_const_method); } /* diff --git a/path_utils.h b/path_utils.h index cbe83da0..0d5d68bd 100644 --- a/path_utils.h +++ b/path_utils.h @@ -6,6 +6,7 @@ #include "optimizer/planner.h" #define AQO_PLAN_NODE "AQOPlanNode" +#define AQO_CONST_NODE "AQOConstNode" /* * Find and sort out relations that used in the query: @@ -20,6 +21,20 @@ typedef struct * table or on a table structure for temp table */ } RelSortOut; +/* + * Fields of the RestrictInfo needed in the AQOPlanNode + */ +typedef struct AQOClause +{ + /* the represented clause of WHERE or JOIN */ + Expr *clause; + /* selectivity for "normal" (JOIN_INNER) semantics; -1 if not yet set */ + Selectivity norm_selec; + /* selectivity for outer join semantics; -1 if not yet set */ + Selectivity outer_selec; + +} AQOClause; + /* * information for adaptive query optimization */ @@ -27,7 +42,7 @@ typedef struct AQOPlanNode { ExtensibleNode node; bool had_path; - RelSortOut *rels; + RelSortOut rels; List *clauses; List *selectivities; @@ -43,6 +58,25 @@ typedef struct AQOPlanNode double prediction; } AQOPlanNode; +/* + * The type of a node that is replaced by AQOConstNode. + */ +typedef enum AQOConstType +{ + AQO_NODE_EXPR = 0, + AQO_NODE_SUBPLAN +} AQOConstType; + +/* + * A custom node that is used to calcucate a fss instead of regular node, + * such as SubPlan or Expr. + */ +typedef struct AQOConstNode +{ + ExtensibleNode node; + AQOConstType type; /* The type of the replaced node */ + int fss; /* The fss of the replaced node */ +} AQOConstNode; #define strtobool(x) ((*(x) == 't') ? true : false) @@ -64,6 +98,8 @@ extern List *get_path_clauses(Path *path, PlannerInfo *root, List **selectivities); +extern AQOConstNode *create_aqo_const_node(AQOConstType type, int fss); + extern AQOPlanNode *get_aqo_plan_node(Plan *plan, bool create); extern void RegisterAQOPlanNodeMethods(void); diff --git a/postprocessing.c b/postprocessing.c index 7df0a253..e166f84c 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -197,12 +197,12 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, foreach(l, clauselist) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); - Selectivity *cur_sel = NULL; + AQOClause *clause = (AQOClause *) lfirst(l); + Selectivity *cur_sel = NULL; if (parametrized_sel) { - cur_hash = get_clause_hash(rinfo->clause, nargs, + cur_hash = get_clause_hash(clause->clause, nargs, args_hash, eclass_hash); cur_sel = selectivity_cache_find_global_relid(cur_hash, cur_relid); } @@ -212,9 +212,9 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, cur_sel = palloc(sizeof(double)); if (join_type == JOIN_INNER) - *cur_sel = rinfo->norm_selec; + *cur_sel = clause->norm_selec; else - *cur_sel = rinfo->outer_selec; + *cur_sel = clause->outer_selec; if (*cur_sel < 0) *cur_sel = 0; @@ -500,7 +500,7 @@ learnOnPlanState(PlanState *p, void *context) List *cur_selectivities; cur_selectivities = restore_selectivities(aqo_node->clauses, - aqo_node->rels->hrels, + aqo_node->rels.hrels, aqo_node->jointype, aqo_node->was_parametrized); SubplanCtx.selectivities = list_concat(SubplanCtx.selectivities, @@ -508,14 +508,14 @@ learnOnPlanState(PlanState *p, void *context) SubplanCtx.clauselist = list_concat(SubplanCtx.clauselist, list_copy(aqo_node->clauses)); - if (aqo_node->rels->hrels != NIL) + if (aqo_node->rels.hrels != NIL) { /* * This plan can be stored as a cached plan. In the case we will have * bogus path_relids field (changed by list_concat routine) at the * next usage (and aqo-learn) of this plan. */ - ctx->relidslist = list_copy(aqo_node->rels->hrels); + ctx->relidslist = list_copy(aqo_node->rels.hrels); if (p->instrument) { @@ -527,12 +527,12 @@ learnOnPlanState(PlanState *p, void *context) { if (IsA(p, AggState)) learn_agg_sample(&SubplanCtx, - aqo_node->rels, learn_rows, rfactor, + &aqo_node->rels, learn_rows, rfactor, p->plan, notExecuted); else learn_sample(&SubplanCtx, - aqo_node->rels, learn_rows, rfactor, + &aqo_node->rels, learn_rows, rfactor, p->plan, notExecuted); } } From 5bda28306ee74e239750195f76abd4a615f07e63 Mon Sep 17 00:00:00 2001 From: Daniil Anisimov Date: Wed, 29 Nov 2023 14:34:23 +0700 Subject: [PATCH 126/134] Change the logic for equivalence classes. It now uses EquivalenceClass structures to indicate which clauses are equivalent. --- cardinality_hooks.c | 2 +- expected/eclasses.out | 1085 +++++++++++++++++++++++++++++++++ expected/eclasses_mchar.out | 6 + expected/eclasses_mchar_1.out | 181 ++++++ hash.c | 264 +++----- hash.h | 4 +- path_utils.c | 55 ++ path_utils.h | 9 + postprocessing.c | 4 +- regress_schedule | 2 + sql/eclasses.sql | 394 ++++++++++++ sql/eclasses_mchar.sql | 73 +++ 12 files changed, 1894 insertions(+), 185 deletions(-) create mode 100644 expected/eclasses.out create mode 100644 expected/eclasses_mchar.out create mode 100644 expected/eclasses_mchar_1.out create mode 100644 sql/eclasses.sql create mode 100644 sql/eclasses_mchar.sql diff --git a/cardinality_hooks.c b/cardinality_hooks.c index ceb9612a..cb3664e8 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -187,7 +187,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, forboth(l, allclauses, l2, selectivities) { - current_hash = get_clause_hash(((AQOClause *) lfirst(l))->clause, + current_hash = get_clause_hash((AQOClause *) lfirst(l), nargs, args_hash, eclass_hash); cache_selectivity(current_hash, rel->relid, rte->relid, *((double *) lfirst(l2))); diff --git a/expected/eclasses.out b/expected/eclasses.out new file mode 100644 index 00000000..01650286 --- /dev/null +++ b/expected/eclasses.out @@ -0,0 +1,1085 @@ +-- Testing for working with equivalence classes +CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.show_details = 'on'; +SET aqo.show_hash = 'off'; +SET aqo.mode = 'forced'; +-- +-- Returns string-by-string explain of a query. Made for removing some strings +-- from the explain output. +-- +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; +-- Integer fields +CREATE TABLE aqo_test_int(a int, b int, c int); +INSERT INTO aqo_test_int SELECT (x/10)::int, (x/100)::int, (x/1000)::int +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_int; +CREATE TABLE aqo_test_int1(a int, b int, c int); +INSERT INTO aqo_test_int1 SELECT (x/10)::int, (x/10)::int, (x/10)::int +FROM generate_series(0, 999) x; +ANALYZE aqo_test_int1; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND a = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: ((a = 0) AND (b = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = c AND a = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: ((a = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = c AND a = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: ((b = c) AND (a = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = a AND c = b AND a = any('{0, 1, 2}'::int[]); + QUERY PLAN +---------------------------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: ((b = a) AND (a = c) AND (a = ANY ('{0,1,2}'::integer[]))) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = a AND c = b AND a = all('{0, 1, 2}'::int[]); + QUERY PLAN +---------------------------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=0 loops=1) + AQO not used + Filter: ((b = a) AND (a = c) AND (a = ALL ('{0,1,2}'::integer[]))) + Rows Removed by Filter: 10000 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 5 +SELECT count(*) FROM aqo_data; + count +------- + 5 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND a = c AND a = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: ((a = 0) AND (b = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND a = c AND b = c AND a = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0 AND b = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0 AND c = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0 AND b = 0 AND c = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = a AND c = b AND 0 = a AND 0 = b AND 0 = c; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((b = 0) AND (a = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 1 +SELECT count(*) FROM aqo_data; + count +------- + 1 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Tests with JOIN clauses. +-- Case 1. +-- 4 cols in 1 eclass, all of them is 0. +-- 3 nodes with unique FSS. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.a = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + result +----------------------------------------------------------- + Nested Loop (actual rows=100 loops=1) + AQO not used + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: ((a = 0) AND (b = 0)) + Rows Removed by Filter: 9990 + -> Seq Scan on aqo_test_int1 (actual rows=10 loops=10) + AQO not used + Filter: ((a = 0) AND (b = 0)) + Rows Removed by Filter: 990 + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(13 rows) + +-- Case 2. +-- 4 cols in 2 eclasses, 2 is 0 and 2 is 1. +-- The top node must be unique, but all of nodes like in a query of case 1. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON 1 = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + result +---------------------------------------------------------------- + Nested Loop (actual rows=100 loops=1) + AQO not used + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0)) + Rows Removed by Filter: 9990 + -> Materialize (actual rows=10 loops=10) + AQO: rows=10, error=0% + -> Seq Scan on aqo_test_int1 (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 1) AND (b = 1)) + Rows Removed by Filter: 990 + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(15 rows) + +-- Case 3. +-- 4 cols in 2 eclasses, 2 is 0 and 2 is equal but not a const. +-- 1 scan node with FSS like in case 2 and 2 nodes with unique FSS. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.a = 0') AS str +WHERE str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + Nested Loop (actual rows=10000 loops=1) + AQO not used + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0)) + Rows Removed by Filter: 9990 + -> Materialize (actual rows=1000 loops=10) + AQO not used + -> Seq Scan on aqo_test_int1 (actual rows=1000 loops=1) + AQO not used + Filter: (a = b) + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(14 rows) + +-- Case 4. +-- 4 cols in 1 eclass, all of them is 0. +-- 3 nodes with unique FSS. This is not case 1, because it is SEMI-JOIN. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +WHERE EXISTS ( + SELECT * FROM aqo_test_int1 + WHERE a = b AND t1.a = a)') AS str +WHERE str NOT LIKE '%Memory%'; + result +---------------------------------------------------------- + Nested Loop Semi Join (actual rows=10 loops=1) + AQO not used + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: ((a = 0) AND (b = 0)) + Rows Removed by Filter: 9990 + -> Seq Scan on aqo_test_int1 (actual rows=1 loops=10) + AQO not used + Filter: ((a = 0) AND (b = 0)) + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(12 rows) + +-- Case 5. +-- 4 cols in 1 eclass, all of them is 0. +-- The top node with unique FSS. Leaf nodes like in the case 4. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +WHERE NOT EXISTS ( + SELECT * FROM aqo_test_int1 + WHERE a = b AND t1.a = a)') AS str +WHERE str NOT LIKE '%Memory%'; + result +--------------------------------------------------------------- + Nested Loop Anti Join (actual rows=0 loops=1) + AQO not used + Join Filter: (aqo_test_int.a = aqo_test_int1.a) + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0)) + Rows Removed by Filter: 9990 + -> Materialize (actual rows=1 loops=10) + AQO: rows=1, error=0% + -> Seq Scan on aqo_test_int1 (actual rows=1 loops=1) + AQO: rows=1, error=0% + Filter: ((a = 0) AND (b = 0)) + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(15 rows) + +-- Must be 10 rows. +SELECT count(*) FROM aqo_data; + count +------- + 13 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Case 6. +-- 4 cols in 1 eclass. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE b = a) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE b = a) t2 +ON t1.a = t2.b') AS str +WHERE str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + Hash Join (actual rows=100 loops=1) + AQO not used + Hash Cond: (aqo_test_int.b = aqo_test_int1.b) + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: (b = a) + Rows Removed by Filter: 9990 + -> Hash (actual rows=1000 loops=1) + -> Seq Scan on aqo_test_int1 (actual rows=1000 loops=1) + AQO not used + Filter: (b = a) + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(14 rows) + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE b = a) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE b = a) t2 +ON t1.a = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + Hash Join (actual rows=100 loops=1) + AQO: rows=100, error=0% + Hash Cond: (aqo_test_int.b = aqo_test_int1.b) + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: (b = a) + Rows Removed by Filter: 9990 + -> Hash (actual rows=1000 loops=1) + -> Seq Scan on aqo_test_int1 (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Filter: (b = a) + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(14 rows) + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.a = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + Hash Join (actual rows=100 loops=1) + AQO: rows=100, error=0% + Hash Cond: (aqo_test_int.a = aqo_test_int1.a) + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: (a = b) + Rows Removed by Filter: 9990 + -> Hash (actual rows=1000 loops=1) + -> Seq Scan on aqo_test_int1 (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Filter: (a = b) + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(14 rows) + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.b = t2.b') AS str +WHERE str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + Hash Join (actual rows=100 loops=1) + AQO: rows=100, error=0% + Hash Cond: (aqo_test_int.a = aqo_test_int1.a) + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: (a = b) + Rows Removed by Filter: 9990 + -> Hash (actual rows=1000 loops=1) + -> Seq Scan on aqo_test_int1 (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Filter: (a = b) + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(14 rows) + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.b::text = t2.b::text') AS str +WHERE str NOT LIKE '%Memory%'; + result +----------------------------------------------------------------- + Hash Join (actual rows=100 loops=1) + AQO not used + Hash Cond: ((aqo_test_int1.b)::text = (aqo_test_int.b)::text) + -> Seq Scan on aqo_test_int1 (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Filter: (a = b) + -> Hash (actual rows=10 loops=1) + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: (a = b) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(14 rows) + +-- Must be 4 rows. +SELECT count(*) FROM aqo_data; + count +------- + 4 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Text fields +CREATE TABLE aqo_test_text(a text, b text, c text); +INSERT INTO aqo_test_text +SELECT (x/10)::text, (x/100)::text, (x/1000)::text +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_text; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND a = '0'; + QUERY PLAN +---------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::text) AND (b = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = c AND a = '0'; + QUERY PLAN +---------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::text) AND (c = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE b = c AND a = '0'; + QUERY PLAN +---------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO not used + Filter: ((b = c) AND (a = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 3 +SELECT count(*) FROM aqo_data; + count +------- + 3 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND a = c AND a = '0'; + QUERY PLAN +--------------------------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::text) AND (b = '0'::text) AND (c = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0'; + QUERY PLAN +--------------------------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::text) AND (b = '0'::text) AND (c = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND a = c AND b = c AND a = '0'; + QUERY PLAN +--------------------------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::text) AND (b = '0'::text) AND (c = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0' AND b = '0'; + QUERY PLAN +--------------------------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::text) AND (b = '0'::text) AND (c = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0' AND c = '0'; + QUERY PLAN +--------------------------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::text) AND (b = '0'::text) AND (c = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0' AND b = '0' AND c = '0'; + QUERY PLAN +--------------------------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::text) AND (b = '0'::text) AND (c = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 1 +SELECT count(*) FROM aqo_data; + count +------- + 1 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- JSONB fields +CREATE TABLE aqo_test_jsonb(a jsonb, b jsonb, c jsonb); +INSERT INTO aqo_test_jsonb SELECT +to_jsonb(x/10), to_jsonb(x/100), to_jsonb(x/1000) +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_jsonb; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND a = '0'::jsonb; + QUERY PLAN +----------------------------------------------------- + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::jsonb) AND (b = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = c AND a = '0'::jsonb; + QUERY PLAN +----------------------------------------------------- + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::jsonb) AND (c = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE b = c AND a = '0'::jsonb; + QUERY PLAN +----------------------------------------------------- + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO not used + Filter: ((b = c) AND (a = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 3 +SELECT count(*) FROM aqo_data; + count +------- + 3 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND a = c AND a = '0'::jsonb; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::jsonb) AND (b = '0'::jsonb) AND (c = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::jsonb) AND (b = '0'::jsonb) AND (c = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND a = c AND b = c AND a = '0'::jsonb; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::jsonb) AND (b = '0'::jsonb) AND (c = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb AND b = '0'::jsonb; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::jsonb) AND (b = '0'::jsonb) AND (c = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb AND c = '0'::jsonb; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::jsonb) AND (b = '0'::jsonb) AND (c = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb AND b = '0'::jsonb AND c = '0'::jsonb; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::jsonb) AND (b = '0'::jsonb) AND (c = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 1 +SELECT count(*) FROM aqo_data; + count +------- + 1 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- BOX fields +CREATE TABLE aqo_test_box(a box, b box, c box); +INSERT INTO aqo_test_box SELECT +('((0,0), ('||(x/10)||', '||(x/10)||'))')::box, +('((0,0), ('||(x/100)||', '||(x/100)||'))')::box, +('((0,0), ('||(x/1000)||', '||(x/1000)||'))')::box +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_box; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND a = c AND a = '((0,0), (0,0))'::box; + QUERY PLAN +-------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a = b) AND (a = c) AND (a = '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box; + QUERY PLAN +-------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a = b) AND (b = c) AND (a = '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND a = c AND b = c AND a = '((0,0), (0,0))'::box; + QUERY PLAN +-------------------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a = b) AND (a = c) AND (b = c) AND (a = '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box AND b = '((0,0), (0,0))'::box; + QUERY PLAN +------------------------------------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a = b) AND (b = c) AND (a = '(0,0),(0,0)'::box) AND (b = '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box AND c = '((0,0), (0,0))'::box; + QUERY PLAN +------------------------------------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a = b) AND (b = c) AND (a = '(0,0),(0,0)'::box) AND (c = '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box AND b = '((0,0), (0,0))'::box AND c = '((0,0), (0,0))'::box; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------ + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a = b) AND (b = c) AND (a = '(0,0),(0,0)'::box) AND (b = '(0,0),(0,0)'::box) AND (c = '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 6 +SELECT count(*) FROM aqo_data; + count +------- + 6 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Not equivalent queries too +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND a ~= c AND a ~= '((0,0), (0,0))'::box; + QUERY PLAN +----------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a ~= b) AND (a ~= c) AND (a ~= '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box; + QUERY PLAN +----------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a ~= b) AND (b ~= c) AND (a ~= '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND a ~= c AND b ~= c AND a ~= '((0,0), (0,0))'::box; + QUERY PLAN +------------------------------------------------------------------------------ + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a ~= b) AND (a ~= c) AND (b ~= c) AND (a ~= '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box AND b ~= '((0,0), (0,0))'::box; + QUERY PLAN +----------------------------------------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a ~= b) AND (b ~= c) AND (a ~= '(0,0),(0,0)'::box) AND (b ~= '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box AND c ~= '((0,0), (0,0))'::box; + QUERY PLAN +----------------------------------------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a ~= b) AND (b ~= c) AND (a ~= '(0,0),(0,0)'::box) AND (c ~= '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box AND b ~= '((0,0), (0,0))'::box AND c ~= '((0,0), (0,0))'::box; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a ~= b) AND (b ~= c) AND (a ~= '(0,0),(0,0)'::box) AND (b ~= '(0,0),(0,0)'::box) AND (c ~= '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 6 +SELECT count(*) FROM aqo_data; + count +------- + 6 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +DROP TABLE aqo_test_int; +DROP TABLE aqo_test_text; +DROP TABLE aqo_test_jsonb; +DROP TABLE aqo_test_box; +DROP EXTENSION aqo; diff --git a/expected/eclasses_mchar.out b/expected/eclasses_mchar.out new file mode 100644 index 00000000..5593e045 --- /dev/null +++ b/expected/eclasses_mchar.out @@ -0,0 +1,6 @@ +-- Testing for working with equivalence classes for mchar type +-- Skip test if mchar extension does not exist +SELECT count(*) = 0 AS skip_test +FROM pg_available_extensions WHERE name = 'mchar' \gset +\if :skip_test +\quit diff --git a/expected/eclasses_mchar_1.out b/expected/eclasses_mchar_1.out new file mode 100644 index 00000000..a50422cb --- /dev/null +++ b/expected/eclasses_mchar_1.out @@ -0,0 +1,181 @@ +-- Testing for working with equivalence classes for mchar type +-- Skip test if mchar extension does not exist +SELECT count(*) = 0 AS skip_test +FROM pg_available_extensions WHERE name = 'mchar' \gset +\if :skip_test +\quit +\endif +CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.show_details = 'on'; +SET aqo.show_hash = 'off'; +SET aqo.mode = 'forced'; +-- MCHAR fields +CREATE EXTENSION MCHAR; +CREATE TABLE aqo_test_mchar(a mchar, b mchar, c mchar); +INSERT INTO aqo_test_mchar +SELECT (x/10)::text::mchar, (x/100)::text::mchar, (x/1000)::text::mchar +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_mchar; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND a = '0'; + QUERY PLAN +----------------------------------------------------- + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::mchar) AND (b = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = c AND a = '0'; + QUERY PLAN +----------------------------------------------------- + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::mchar) AND (c = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE b = c AND a = '0'; + QUERY PLAN +----------------------------------------------------- + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO not used + Filter: ((b = c) AND (a = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 3 +SELECT count(*) FROM aqo_data; + count +------- + 3 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND a = c AND a = '0'; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::mchar) AND (b = '0'::mchar) AND (c = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0'; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::mchar) AND (b = '0'::mchar) AND (c = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND a = c AND b = c AND a = '0'; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::mchar) AND (b = '0'::mchar) AND (c = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0' AND b = '0'; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::mchar) AND (b = '0'::mchar) AND (c = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0' AND c = '0'; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::mchar) AND (b = '0'::mchar) AND (c = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0' AND b = '0' AND c = '0'; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::mchar) AND (b = '0'::mchar) AND (c = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 1 +SELECT count(*) FROM aqo_data; + count +------- + 1 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +DROP TABLE aqo_test_mchar; +DROP EXTENSION mchar; +DROP EXTENSION aqo; diff --git a/hash.c b/hash.c index 1f8d36bd..93e43a20 100644 --- a/hash.c +++ b/hash.c @@ -47,15 +47,11 @@ static int get_id_in_sorted_int_array(int val, int n, int *arr); static int get_arg_eclass(int arg_hash, int nargs, int *args_hash, int *eclass_hash); -static void get_clauselist_args(List *clauselist, int *nargs, int **args_hash); -static int disjoint_set_get_parent(int *p, int v); -static void disjoint_set_merge_eclasses(int *p, int v1, int v2); -static int *perform_eclasses_join(List *clauselist, int nargs, int *args_hash); +static int *get_clauselist_args(List *clauselist, int *nargs, int **args_hash); static bool is_brace(char ch); static bool has_consts(List *lst); static List **get_clause_args_ptr(Expr *clause); -static bool clause_is_eq_clause(Expr *clause); /********************************************************************************* @@ -221,8 +217,8 @@ get_fss_for_object(List *relsigns, List *clauselist, { AQOClause *clause = (AQOClause *) lfirst(lc); - clause_hashes[i] = get_clause_hash(clause->clause, - nargs, args_hash, eclass_hash); + clause_hashes[i] = get_clause_hash(clause, nargs, args_hash, + eclass_hash); args = get_clause_args_ptr(clause->clause); clause_has_consts[i] = (args != NULL && has_consts(*args)); i++; @@ -306,19 +302,19 @@ get_fss_for_object(List *relsigns, List *clauselist, * Also args-order-insensitiveness for equal clause is required. */ int -get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash) +get_clause_hash(AQOClause *clause, int nargs, int *args_hash, int *eclass_hash) { Expr *cclause; - List **args = get_clause_args_ptr(clause); + List **args = get_clause_args_ptr(clause->clause); int arg_eclass; ListCell *l; if (args == NULL) - return get_node_hash((Node *) clause); + return get_node_hash((Node *) clause->clause); - cclause = copyObject(clause); + cclause = copyObject(clause->clause); args = get_clause_args_ptr(cclause); - /* XXX: Why does it work even if this loop is removed? */ + foreach(l, *args) { arg_eclass = get_arg_eclass(get_node_hash(lfirst(l)), @@ -328,7 +324,7 @@ get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash) lfirst(l) = create_aqo_const_node(AQO_NODE_EXPR, arg_eclass); } } - if (!clause_is_eq_clause(clause) || has_consts(*args)) + if (!clause->is_eq_clause || has_consts(*args)) return get_node_hash((Node *) cclause); return get_node_hash((Node *) linitial(*args)); } @@ -552,121 +548,98 @@ get_arg_eclass(int arg_hash, int nargs, int *args_hash, int *eclass_hash) * Builds list of non-constant arguments of equivalence clauses * of given clauselist. */ -static void +static int * get_clauselist_args(List *clauselist, int *nargs, int **args_hash) { AQOClause *clause; List **args; ListCell *l; - ListCell *l2; int i = 0; int sh = 0; int cnt = 0; + int *p; + int *p_sorted; + int *args_hash_sorted; + int *idx; + + /* Not more than 2 args in each clause from clauselist */ + *args_hash = palloc(2 * list_length(clauselist) * sizeof(**args_hash)); + p = palloc(2 * list_length(clauselist) * sizeof(*p)); foreach(l, clauselist) { + Expr *e; + clause = (AQOClause *) lfirst(l); args = get_clause_args_ptr(clause->clause); - if (args != NULL && clause_is_eq_clause(clause->clause)) - foreach(l2, *args) - if (!IsA(lfirst(l2), Const)) - cnt++; + if (args == NULL || !clause->is_eq_clause) + continue; + + /* Left argument */ + e = (args != NULL && list_length(*args) ? linitial(*args) : NULL); + if (e && !IsA(e, Const)) + { + (*args_hash)[cnt] = get_node_hash((Node *) e); + p[cnt++] = clause->left_ec; + } + + /* Right argument */ + e = (args != NULL && list_length(*args) >= 2 ? lsecond(*args) : NULL); + if (e && !IsA(e, Const)) + { + (*args_hash)[cnt] = get_node_hash((Node *) e); + p[cnt++] = clause->right_ec; + } } - *args_hash = palloc(cnt * sizeof(**args_hash)); - foreach(l, clauselist) + /* Use argsort for simultaniously sorting of args_hash and p arrays */ + idx = argsort(*args_hash, cnt, sizeof(**args_hash), int_cmp); + + args_hash_sorted = palloc(cnt * sizeof(*args_hash_sorted)); + p_sorted = palloc(cnt * sizeof(*p_sorted)); + + for (i = 0; i < cnt; ++i) { - clause = (AQOClause *) lfirst(l); - args = get_clause_args_ptr(clause->clause); - if (args != NULL && clause_is_eq_clause(clause->clause)) - foreach(l2, *args) - if (!IsA(lfirst(l2), Const)) - (*args_hash)[i++] = get_node_hash(lfirst(l2)); + args_hash_sorted[i] = (*args_hash)[idx[i]]; + p_sorted[i] = p[idx[i]]; } - qsort(*args_hash, cnt, sizeof(**args_hash), int_cmp); + pfree(idx); + pfree(p); + pfree(*args_hash); + *args_hash = args_hash_sorted; + + /* Remove duplicates of the hashes */ for (i = 1; i < cnt; ++i) if ((*args_hash)[i - 1] == (*args_hash)[i]) sh++; else + { (*args_hash)[i - sh] = (*args_hash)[i]; + p_sorted[i - sh] = p_sorted[i]; + } *nargs = cnt - sh; *args_hash = repalloc(*args_hash, (*nargs) * sizeof(**args_hash)); -} - -/* - * Returns class of an object in disjoint set. - */ -static int -disjoint_set_get_parent(int *p, int v) -{ - if (p[v] == -1) - return v; - else - return p[v] = disjoint_set_get_parent(p, p[v]); -} - -/* - * Merges two equivalence classes in disjoint set. - */ -static void -disjoint_set_merge_eclasses(int *p, int v1, int v2) -{ - int p1, - p2; - - p1 = disjoint_set_get_parent(p, v1); - p2 = disjoint_set_get_parent(p, v2); - if (p1 != p2) - { - if ((v1 + v2) % 2) - p[p1] = p2; - else - p[p2] = p1; - } -} - -/* - * Constructs disjoint set on arguments. - */ -static int * -perform_eclasses_join(List *clauselist, int nargs, int *args_hash) -{ - AQOClause *clause; - int *p; - ListCell *l, - *l2; - List **args; - int h2; - int i2, - i3; - - p = palloc(nargs * sizeof(*p)); - memset(p, -1, nargs * sizeof(*p)); + p_sorted = repalloc(p_sorted, (*nargs) * sizeof(*p_sorted)); - foreach(l, clauselist) + /* Compress the values of eclasses */ + if (*nargs > 0) { - clause = (AQOClause *) lfirst(l); - args = get_clause_args_ptr(clause->clause); - if (args != NULL && clause_is_eq_clause(clause->clause)) + int prev = p_sorted[0]; + p_sorted[0] = 0; + for (i = 1; i < *nargs; i++) { - i3 = -1; - foreach(l2, *args) - { - if (!IsA(lfirst(l2), Const)) - { - h2 = get_node_hash(lfirst(l2)); - i2 = get_id_in_sorted_int_array(h2, nargs, args_hash); - if (i3 != -1) - disjoint_set_merge_eclasses(p, i2, i3); - i3 = i2; - } - } + int cur = p_sorted[i]; + if (cur == prev) + p_sorted[i] = p_sorted[i-1]; + else + p_sorted[i] = p_sorted[i-1] + 1; + prev = cur; } } - return p; + return p_sorted; } /* @@ -678,30 +651,31 @@ get_eclasses(List *clauselist, int *nargs, int **args_hash, int **eclass_hash) { int *p; List **lsts; - int i, - v; + int i; + /* + * An auxiliary array of equivalence clauses hashes + * used to improve performance. + */ int *e_hashes; - get_clauselist_args(clauselist, nargs, args_hash); + p = get_clauselist_args(clauselist, nargs, args_hash); *eclass_hash = palloc((*nargs) * sizeof(**eclass_hash)); - p = perform_eclasses_join(clauselist, *nargs, *args_hash); - lsts = palloc((*nargs) * sizeof(*lsts)); + lsts = palloc0((*nargs) * sizeof(*lsts)); e_hashes = palloc((*nargs) * sizeof(*e_hashes)); + /* Combine args hashes corresponding to the same eclass into one list. */ for (i = 0; i < *nargs; ++i) - lsts[i] = NIL; + lsts[p[i]] = lappend_int(lsts[p[i]], (*args_hash)[i]); + /* Precompute eclasses hashes only once per eclass. */ for (i = 0; i < *nargs; ++i) - { - v = disjoint_set_get_parent(p, i); - lsts[v] = lappend_int(lsts[v], (*args_hash)[i]); - } - for (i = 0; i < *nargs; ++i) - e_hashes[i] = get_unordered_int_list_hash(lsts[i]); + if (lsts[i] != NIL) + e_hashes[i] = get_unordered_int_list_hash(lsts[i]); + /* Determine the hashes of each eclass. */ for (i = 0; i < *nargs; ++i) - (*eclass_hash)[i] = e_hashes[disjoint_set_get_parent(p, i)]; + (*eclass_hash)[i] = e_hashes[p[i]]; pfree(e_hashes); } @@ -754,75 +728,3 @@ get_clause_args_ptr(Expr *clause) break; } } - -/* - * Returns whether the clause is an equivalence clause. - */ -static bool -clause_is_eq_clause(Expr *clause) -{ - /* TODO: fix this horrible mess */ - return ( - clause->type == T_OpExpr || - clause->type == T_DistinctExpr || - clause->type == T_NullIfExpr || - clause->type == T_ScalarArrayOpExpr - ) && ( - ((OpExpr *) clause)->opno == Int4EqualOperator || - ((OpExpr *) clause)->opno == BooleanEqualOperator || - ((OpExpr *) clause)->opno == TextEqualOperator || - ((OpExpr *) clause)->opno == TIDEqualOperator || - ((OpExpr *) clause)->opno == ARRAY_EQ_OP || - ((OpExpr *) clause)->opno == RECORD_EQ_OP || - ((OpExpr *) clause)->opno == 15 || - ((OpExpr *) clause)->opno == 92 || - ((OpExpr *) clause)->opno == 93 || - ((OpExpr *) clause)->opno == 94 || - ((OpExpr *) clause)->opno == 352 || - ((OpExpr *) clause)->opno == 353 || - ((OpExpr *) clause)->opno == 385 || - ((OpExpr *) clause)->opno == 386 || - ((OpExpr *) clause)->opno == 410 || - ((OpExpr *) clause)->opno == 416 || - ((OpExpr *) clause)->opno == 503 || - ((OpExpr *) clause)->opno == 532 || - ((OpExpr *) clause)->opno == 533 || - ((OpExpr *) clause)->opno == 560 || - ((OpExpr *) clause)->opno == 566 || - ((OpExpr *) clause)->opno == 607 || - ((OpExpr *) clause)->opno == 649 || - ((OpExpr *) clause)->opno == 620 || - ((OpExpr *) clause)->opno == 670 || - ((OpExpr *) clause)->opno == 792 || - ((OpExpr *) clause)->opno == 811 || - ((OpExpr *) clause)->opno == 900 || - ((OpExpr *) clause)->opno == 1093 || - ((OpExpr *) clause)->opno == 1108 || - ((OpExpr *) clause)->opno == 1550 || - ((OpExpr *) clause)->opno == 1120 || - ((OpExpr *) clause)->opno == 1130 || - ((OpExpr *) clause)->opno == 1320 || - ((OpExpr *) clause)->opno == 1330 || - ((OpExpr *) clause)->opno == 1500 || - ((OpExpr *) clause)->opno == 1535 || - ((OpExpr *) clause)->opno == 1616 || - ((OpExpr *) clause)->opno == 1220 || - ((OpExpr *) clause)->opno == 1201 || - ((OpExpr *) clause)->opno == 1752 || - ((OpExpr *) clause)->opno == 1784 || - ((OpExpr *) clause)->opno == 1804 || - ((OpExpr *) clause)->opno == 1862 || - ((OpExpr *) clause)->opno == 1868 || - ((OpExpr *) clause)->opno == 1955 || - ((OpExpr *) clause)->opno == 2060 || - ((OpExpr *) clause)->opno == 2542 || - ((OpExpr *) clause)->opno == 2972 || - ((OpExpr *) clause)->opno == 3222 || - ((OpExpr *) clause)->opno == 3516 || - ((OpExpr *) clause)->opno == 3629 || - ((OpExpr *) clause)->opno == 3676 || - ((OpExpr *) clause)->opno == 3882 || - ((OpExpr *) clause)->opno == 3240 || - ((OpExpr *) clause)->opno == 3240 - ); -} diff --git a/hash.h b/hash.h index a1738ac4..0e3ff50b 100644 --- a/hash.h +++ b/hash.h @@ -2,6 +2,7 @@ #define AQO_HASH_H #include "nodes/pg_list.h" +#include "path_utils.h" extern bool list_member_uint64(const List *list, uint64 datum); extern List *list_copy_uint64(List *list); @@ -16,6 +17,7 @@ extern int get_grouped_exprs_hash(int fss, List *group_exprs); /* Hash functions */ void get_eclasses(List *clauselist, int *nargs, int **args_hash, int **eclass_hash); -int get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash); +int get_clause_hash(AQOClause *clause, int nargs, int *args_hash, + int *eclass_hash); #endif /* AQO_HASH_H */ \ No newline at end of file diff --git a/path_utils.c b/path_utils.c index 8feefbdf..d050abae 100644 --- a/path_utils.c +++ b/path_utils.c @@ -47,6 +47,14 @@ static AQOPlanNode DefaultAQOPlanNode = .prediction = -1. }; + +/* + * Auxiliary list for relabel equivalence classes + * from pointers to the serial numbers - indexes of this list. + * Maybe it's need to use some smart data structure such a HTAB? + */ +List *eclass_collector = NIL; + /* * Hook on creation of a plan node. We need to store AQO-specific data to * support learning stage. @@ -332,6 +340,42 @@ aqo_get_raw_clauses(PlannerInfo *root, List *restrictlist) return clauses; } +void +eclass_collector_free(void) +{ + list_free(eclass_collector); + eclass_collector = NIL; +} + +static int +get_eclass_index(EquivalenceClass *ec) +{ + ListCell *lc; + int i = 0; + MemoryContext old_ctx; + + if (ec == NULL) + return -1; + + /* Get the top of merged eclasses */ + while(ec->ec_merged) + ec = ec->ec_merged; + + foreach (lc, eclass_collector) + { + if (lfirst(lc) == ec) + break; + i++; + } + + old_ctx = MemoryContextSwitchTo(AQOCacheMemCtx); + if (i == list_length(eclass_collector)) + eclass_collector = lappend(eclass_collector, ec); + MemoryContextSwitchTo(old_ctx); + + return i; +} + static List * copy_aqo_clauses_from_rinfo(List *src) { @@ -347,6 +391,11 @@ copy_aqo_clauses_from_rinfo(List *src) new->norm_selec = old->norm_selec; new->outer_selec = old->outer_selec; + new->left_ec = get_eclass_index(old->left_ec); + new->right_ec = get_eclass_index(old->right_ec); + + new->is_eq_clause = (old->left_ec != NULL || old->left_ec != NULL); + result = lappend(result, (void *) new); } @@ -817,6 +866,9 @@ outDouble(StringInfo str, double d) WRITE_NODE_FIELD(clause); \ WRITE_FLOAT_FIELD(norm_selec); \ WRITE_FLOAT_FIELD(outer_selec); \ + WRITE_INT_FIELD(left_ec); \ + WRITE_INT_FIELD(right_ec); \ + WRITE_BOOL_FIELD(is_eq_clause); \ appendStringInfo(str, " }"); \ } \ WRITE_CUSTOM_LIST_END() @@ -936,6 +988,9 @@ AQOconstOut(struct StringInfoData *str, const struct ExtensibleNode *enode) READ_NODE_FIELD(clause); \ READ_FLOAT_FIELD(norm_selec); \ READ_FLOAT_FIELD(outer_selec); \ + READ_INT_FIELD(left_ec); \ + READ_INT_FIELD(right_ec); \ + READ_BOOL_FIELD(is_eq_clause); \ token = pg_strtok(&length); /* right bracket "}" */ \ node_copy->fldname = lappend(node_copy->fldname, local_node); \ } \ diff --git a/path_utils.h b/path_utils.h index 0d5d68bd..a6c65bfc 100644 --- a/path_utils.h +++ b/path_utils.h @@ -33,6 +33,14 @@ typedef struct AQOClause /* selectivity for outer join semantics; -1 if not yet set */ Selectivity outer_selec; + /* Serial number of EquivalenceClass containing lefthand */ + int left_ec; + /* Serial number of EquivalenceClass containing righthand */ + int right_ec; + /* Quick check for equivalence class */ + bool is_eq_clause; + + EquivalenceClass *ec; } AQOClause; /* @@ -106,5 +114,6 @@ extern void RegisterAQOPlanNodeMethods(void); extern List *aqo_get_clauses(PlannerInfo *root, List *restrictlist); void aqo_path_utils_init(void); +void eclass_collector_free(void); #endif /* PATH_UTILS_H */ diff --git a/postprocessing.c b/postprocessing.c index e166f84c..b8a70faf 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -202,8 +202,7 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, if (parametrized_sel) { - cur_hash = get_clause_hash(clause->clause, nargs, - args_hash, eclass_hash); + cur_hash = get_clause_hash(clause, nargs, args_hash, eclass_hash); cur_sel = selectivity_cache_find_global_relid(cur_hash, cur_relid); } @@ -849,6 +848,7 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) end: /* Release all AQO-specific memory, allocated during learning procedure */ selectivity_cache_clear(); + eclass_collector_free(); MemoryContextSwitchTo(oldctx); MemoryContextReset(AQOLearnMemCtx); diff --git a/regress_schedule b/regress_schedule index 76a2e00e..96b2cb93 100644 --- a/regress_schedule +++ b/regress_schedule @@ -21,3 +21,5 @@ test: top_queries test: relocatable test: look_a_like test: feature_subspace +test: eclasses +test: eclasses_mchar diff --git a/sql/eclasses.sql b/sql/eclasses.sql new file mode 100644 index 00000000..a041d2cb --- /dev/null +++ b/sql/eclasses.sql @@ -0,0 +1,394 @@ +-- Testing for working with equivalence classes + +CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.show_details = 'on'; +SET aqo.show_hash = 'off'; +SET aqo.mode = 'forced'; + +-- +-- Returns string-by-string explain of a query. Made for removing some strings +-- from the explain output. +-- +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; + +-- Integer fields +CREATE TABLE aqo_test_int(a int, b int, c int); +INSERT INTO aqo_test_int SELECT (x/10)::int, (x/100)::int, (x/1000)::int +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_int; + +CREATE TABLE aqo_test_int1(a int, b int, c int); +INSERT INTO aqo_test_int1 SELECT (x/10)::int, (x/10)::int, (x/10)::int +FROM generate_series(0, 999) x; +ANALYZE aqo_test_int1; + +SELECT true AS success FROM aqo_reset(); + +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND a = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = c AND a = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = c AND a = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = a AND c = b AND a = any('{0, 1, 2}'::int[]); + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = a AND c = b AND a = all('{0, 1, 2}'::int[]); +-- Must be 5 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND a = c AND a = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND a = c AND b = c AND a = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0 AND b = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0 AND c = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0 AND b = 0 AND c = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = a AND c = b AND 0 = a AND 0 = b AND 0 = c; + +-- Must be 1 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + + +-- Tests with JOIN clauses. + +-- Case 1. +-- 4 cols in 1 eclass, all of them is 0. +-- 3 nodes with unique FSS. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.a = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + +-- Case 2. +-- 4 cols in 2 eclasses, 2 is 0 and 2 is 1. +-- The top node must be unique, but all of nodes like in a query of case 1. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON 1 = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + +-- Case 3. +-- 4 cols in 2 eclasses, 2 is 0 and 2 is equal but not a const. +-- 1 scan node with FSS like in case 2 and 2 nodes with unique FSS. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.a = 0') AS str +WHERE str NOT LIKE '%Memory%'; + +-- Case 4. +-- 4 cols in 1 eclass, all of them is 0. +-- 3 nodes with unique FSS. This is not case 1, because it is SEMI-JOIN. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +WHERE EXISTS ( + SELECT * FROM aqo_test_int1 + WHERE a = b AND t1.a = a)') AS str +WHERE str NOT LIKE '%Memory%'; + +-- Case 5. +-- 4 cols in 1 eclass, all of them is 0. +-- The top node with unique FSS. Leaf nodes like in the case 4. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +WHERE NOT EXISTS ( + SELECT * FROM aqo_test_int1 + WHERE a = b AND t1.a = a)') AS str +WHERE str NOT LIKE '%Memory%'; + +-- Must be 10 rows. +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +-- Case 6. +-- 4 cols in 1 eclass. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE b = a) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE b = a) t2 +ON t1.a = t2.b') AS str +WHERE str NOT LIKE '%Memory%'; + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE b = a) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE b = a) t2 +ON t1.a = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.a = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.b = t2.b') AS str +WHERE str NOT LIKE '%Memory%'; + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.b::text = t2.b::text') AS str +WHERE str NOT LIKE '%Memory%'; + +-- Must be 4 rows. +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + + +-- Text fields +CREATE TABLE aqo_test_text(a text, b text, c text); +INSERT INTO aqo_test_text +SELECT (x/10)::text, (x/100)::text, (x/1000)::text +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_text; + +SELECT true AS success FROM aqo_reset(); +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE b = c AND a = '0'; +-- Must be 3 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND a = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND a = c AND b = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0' AND b = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0' AND c = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0' AND b = '0' AND c = '0'; +-- Must be 1 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + + +-- JSONB fields +CREATE TABLE aqo_test_jsonb(a jsonb, b jsonb, c jsonb); +INSERT INTO aqo_test_jsonb SELECT +to_jsonb(x/10), to_jsonb(x/100), to_jsonb(x/1000) +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_jsonb; + +SELECT true AS success FROM aqo_reset(); +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND a = '0'::jsonb; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = c AND a = '0'::jsonb; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE b = c AND a = '0'::jsonb; +-- Must be 3 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND a = c AND a = '0'::jsonb; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND a = c AND b = c AND a = '0'::jsonb; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb AND b = '0'::jsonb; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb AND c = '0'::jsonb; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb AND b = '0'::jsonb AND c = '0'::jsonb; +-- Must be 1 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + + +-- BOX fields +CREATE TABLE aqo_test_box(a box, b box, c box); +INSERT INTO aqo_test_box SELECT +('((0,0), ('||(x/10)||', '||(x/10)||'))')::box, +('((0,0), ('||(x/100)||', '||(x/100)||'))')::box, +('((0,0), ('||(x/1000)||', '||(x/1000)||'))')::box +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_box; + +SELECT true AS success FROM aqo_reset(); +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND a = c AND a = '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND a = c AND b = c AND a = '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box AND b = '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box AND c = '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box AND b = '((0,0), (0,0))'::box AND c = '((0,0), (0,0))'::box; +-- Must be 6 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +-- Not equivalent queries too +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND a ~= c AND a ~= '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND a ~= c AND b ~= c AND a ~= '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box AND b ~= '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box AND c ~= '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box AND b ~= '((0,0), (0,0))'::box AND c ~= '((0,0), (0,0))'::box; +-- Must be 6 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +DROP TABLE aqo_test_int; +DROP TABLE aqo_test_text; +DROP TABLE aqo_test_jsonb; +DROP TABLE aqo_test_box; + +DROP EXTENSION aqo; diff --git a/sql/eclasses_mchar.sql b/sql/eclasses_mchar.sql new file mode 100644 index 00000000..62e10802 --- /dev/null +++ b/sql/eclasses_mchar.sql @@ -0,0 +1,73 @@ +-- Testing for working with equivalence classes for mchar type + +-- Skip test if mchar extension does not exist +SELECT count(*) = 0 AS skip_test +FROM pg_available_extensions WHERE name = 'mchar' \gset + +\if :skip_test +\quit +\endif + +CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.show_details = 'on'; +SET aqo.show_hash = 'off'; +SET aqo.mode = 'forced'; + +-- MCHAR fields +CREATE EXTENSION MCHAR; +CREATE TABLE aqo_test_mchar(a mchar, b mchar, c mchar); +INSERT INTO aqo_test_mchar +SELECT (x/10)::text::mchar, (x/100)::text::mchar, (x/1000)::text::mchar +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_mchar; + +SELECT true AS success FROM aqo_reset(); +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE b = c AND a = '0'; + +-- Must be 3 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND a = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND a = c AND b = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0' AND b = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0' AND c = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0' AND b = '0' AND c = '0'; +-- Must be 1 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +DROP TABLE aqo_test_mchar; + +DROP EXTENSION mchar; +DROP EXTENSION aqo; From 7cbb5044e03974de5a90791d9ee095849c48a268 Mon Sep 17 00:00:00 2001 From: Andrey Kazarinov Date: Mon, 30 Oct 2023 16:44:11 +0300 Subject: [PATCH 127/134] assign fss without conditions in estimation of group number --- cardinality_hooks.c | 2 +- t/005_display_groupby_fss.pl | 79 ++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 t/005_display_groupby_fss.pl diff --git a/cardinality_hooks.c b/cardinality_hooks.c index cb3664e8..888fe717 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -453,11 +453,11 @@ aqo_estimate_num_groups(PlannerInfo *root, List *groupExprs, old_ctx_m = MemoryContextSwitchTo(AQOPredictMemCtx); predicted = predict_num_groups(root, subpath, groupExprs, &fss); + grouped_rel->fss_hash = fss; if (predicted > 0.) { grouped_rel->predicted_cardinality = predicted; grouped_rel->rows = predicted; - grouped_rel->fss_hash = fss; MemoryContextSwitchTo(old_ctx_m); MemoryContextReset(AQOPredictMemCtx); return predicted; diff --git a/t/005_display_groupby_fss.pl b/t/005_display_groupby_fss.pl new file mode 100644 index 00000000..6f663f0c --- /dev/null +++ b/t/005_display_groupby_fss.pl @@ -0,0 +1,79 @@ +use strict; +use warnings; + +use Config; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; + +use Test::More tests => 2; + +my $node = PostgreSQL::Test::Cluster->new('aqotest'); +$node->init; +$node->append_conf('postgresql.conf', qq{ + shared_preload_libraries = 'aqo' + log_statement = 'ddl' + aqo.join_threshold = 0 + aqo.mode = 'learn' + aqo.show_details = 'on' + aqo.show_hash = 'on' + aqo.min_neighbors_for_predicting = 1 + enable_nestloop = 'off' + enable_mergejoin = 'off' + enable_material = 'off' + }); + +$node->start(); +$node->safe_psql('postgres', 'CREATE EXTENSION aqo'); + +# Create tables with correlated datas in columns + +$node->safe_psql('postgres', 'CREATE TABLE a (x1 int, x2 int, x3 int); +INSERT INTO a (x1, x2, x3) SELECT mod(ival,10), mod(ival,10), mod(ival,10) FROM generate_series(1,1000) As ival'); + +$node->safe_psql('postgres', 'CREATE TABLE b (y1 int, y2 int, y3 int); +INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival'); + +my $result; + +my $plan = $node->safe_psql('postgres', 'EXPLAIN (analyze true, verbose true) +SELECT a.x1, b.y1, COUNT(*) FROM a, b WHERE a.x2 = b.y2 GROUP BY a.x1, b.y1;'); +my @fss = $plan =~ /fss=(-?\d+)/g; + +$result = $node->safe_psql('postgres', 'SELECT count(*) FROM aqo_data;'); +is($result, 4); + +$result = $node->safe_psql('postgres', 'SELECT fss FROM aqo_data;'); + +my @storage = split(/\n/, $result); + +# compare fss from plan and fss from storage +my $test2 = 1; +if (scalar @fss == scalar @storage) { + foreach my $numb1 (@fss) { + my $found = 0; + + # check fss not zero + if ($numb1 == 0) { + $test2 = 0; + last; + } + + foreach my $numb2 (@storage) { + if ($numb2 == $numb1) { + $found = 1; + last; + } + } + + if (!$found) { + $test2 = 0; + last; + } + } +} else { + $test2 = 0; +} + +is($test2, 1); + +$node->stop(); \ No newline at end of file From 86ac7f52627c51b4356dad05f73cc394a14a7087 Mon Sep 17 00:00:00 2001 From: Alexandra Pervushina Date: Mon, 25 Sep 2023 02:46:24 +0300 Subject: [PATCH 128/134] Print aqo details regardless of IsQueryDisabled --- aqo.c | 2 +- postprocessing.c | 16 +++++++--------- preprocessing.c | 5 +++++ 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/aqo.c b/aqo.c index b82591c4..02865094 100644 --- a/aqo.c +++ b/aqo.c @@ -96,7 +96,7 @@ MemoryContext AQOLearnMemCtx = NULL; MemoryContext AQOStorageMemCtx = NULL; /* Additional plan info */ -int njoins; +int njoins = -1; /***************************************************************************** diff --git a/postprocessing.c b/postprocessing.c index b8a70faf..0f2f7f57 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -85,7 +85,6 @@ static void StoreToQueryEnv(QueryDesc *queryDesc); static void StorePlanInternals(QueryDesc *queryDesc); static bool ExtractFromQueryEnv(QueryDesc *queryDesc); - /* * This is the critical section: only one runner is allowed to be inside this * function for one feature subspace. @@ -766,6 +765,7 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) cardinality_sum_errors = 0.; cardinality_num_objects = 0; + njoins = -1; if (IsQueryDisabled() || !ExtractFromQueryEnv(queryDesc)) /* AQO keep all query-related preferences at the query context. @@ -996,7 +996,8 @@ print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, (*aqo_ExplainOnePlan_next)(plannedstmt, into, es, queryString, params, planduration, queryEnv); - if (IsQueryDisabled() || !aqo_show_details) + if (!(aqo_mode != AQO_MODE_DISABLED || force_collect_stat) || + !aqo_show_details) return; /* Report to user about aqo state only in verbose mode */ @@ -1031,13 +1032,10 @@ print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, * Query class provides an user the conveniently use of the AQO * auxiliary functions. */ - if (aqo_mode != AQO_MODE_DISABLED || force_collect_stat) - { - if (aqo_show_hash) - ExplainPropertyInteger("Query hash", NULL, - query_context.query_hash, es); - ExplainPropertyInteger("JOINS", NULL, njoins, es); - } + if (aqo_show_hash) + ExplainPropertyInteger("Query hash", NULL, + (int64) query_context.query_hash, es); + ExplainPropertyInteger("JOINS", NULL, njoins, es); } static void diff --git a/preprocessing.c b/preprocessing.c index bc014121..954120a2 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -127,6 +127,7 @@ aqo_planner(Query *parse, const char *query_string, int cursorOptions, * all execution stages. */ disable_aqo_for_query(); + query_context.query_hash = 0; return (*aqo_planner_next)(parse, query_string, cursorOptions, boundParams); } @@ -233,7 +234,11 @@ aqo_planner(Query *parse, const char *query_string, int cursorOptions, */ if (!query_context.learn_aqo && !query_context.use_aqo && !query_context.auto_tuning && !force_collect_stat) + { add_deactivated_query(query_context.query_hash); + disable_aqo_for_query(); + goto ignore_query_settings; + } /* * That we can do if query exists in database. From 549115e383da42f7ad90636ff8f2d9a12ed72896 Mon Sep 17 00:00:00 2001 From: Daniil Anisimov Date: Mon, 19 Feb 2024 16:50:44 +0700 Subject: [PATCH 129/134] Fix collecting eclasses routine. --- aqo.c | 1 + hash.c | 22 ++++++++++++++++------ path_utils.c | 19 ++++++------------- path_utils.h | 3 ++- postprocessing.c | 1 - 5 files changed, 25 insertions(+), 21 deletions(-) diff --git a/aqo.c b/aqo.c index 02865094..327f12d8 100644 --- a/aqo.c +++ b/aqo.c @@ -118,6 +118,7 @@ aqo_free_callback(ResourceReleasePhase phase, { MemoryContextReset(AQOCacheMemCtx); cur_classes = NIL; + aqo_eclass_collector = NIL; } } diff --git a/hash.c b/hash.c index 93e43a20..937aaebc 100644 --- a/hash.c +++ b/hash.c @@ -623,18 +623,28 @@ get_clauselist_args(List *clauselist, int *nargs, int **args_hash) *args_hash = repalloc(*args_hash, (*nargs) * sizeof(**args_hash)); p_sorted = repalloc(p_sorted, (*nargs) * sizeof(*p_sorted)); - /* Compress the values of eclasses */ + /* + * Compress the values of eclasses. + * It is only sorted in order of args_hash. + * Get the indexes in ascending order of the elements. + */ + idx = argsort(p_sorted, *nargs, sizeof(*p_sorted), int_cmp); + + /* + * Remove the holes from given array. + * Later we can use it as indexes of args_hash. + */ if (*nargs > 0) { - int prev = p_sorted[0]; - p_sorted[0] = 0; + int prev = p_sorted[idx[0]]; + p_sorted[idx[0]] = 0; for (i = 1; i < *nargs; i++) { - int cur = p_sorted[i]; + int cur = p_sorted[idx[i]]; if (cur == prev) - p_sorted[i] = p_sorted[i-1]; + p_sorted[idx[i]] = p_sorted[idx[i-1]]; else - p_sorted[i] = p_sorted[i-1] + 1; + p_sorted[idx[i]] = p_sorted[idx[i-1]] + 1; prev = cur; } } diff --git a/path_utils.c b/path_utils.c index d050abae..5e9cb6ba 100644 --- a/path_utils.c +++ b/path_utils.c @@ -47,13 +47,13 @@ static AQOPlanNode DefaultAQOPlanNode = .prediction = -1. }; - /* * Auxiliary list for relabel equivalence classes * from pointers to the serial numbers - indexes of this list. - * Maybe it's need to use some smart data structure such a HTAB? + * XXX: Maybe it's need to use some smart data structure such a HTAB? + * It must be allocated in AQOCacheMemCtx. */ -List *eclass_collector = NIL; +List *aqo_eclass_collector = NIL; /* * Hook on creation of a plan node. We need to store AQO-specific data to @@ -340,13 +340,6 @@ aqo_get_raw_clauses(PlannerInfo *root, List *restrictlist) return clauses; } -void -eclass_collector_free(void) -{ - list_free(eclass_collector); - eclass_collector = NIL; -} - static int get_eclass_index(EquivalenceClass *ec) { @@ -361,7 +354,7 @@ get_eclass_index(EquivalenceClass *ec) while(ec->ec_merged) ec = ec->ec_merged; - foreach (lc, eclass_collector) + foreach (lc, aqo_eclass_collector) { if (lfirst(lc) == ec) break; @@ -369,8 +362,8 @@ get_eclass_index(EquivalenceClass *ec) } old_ctx = MemoryContextSwitchTo(AQOCacheMemCtx); - if (i == list_length(eclass_collector)) - eclass_collector = lappend(eclass_collector, ec); + if (i == list_length(aqo_eclass_collector)) + aqo_eclass_collector = lappend(aqo_eclass_collector, ec); MemoryContextSwitchTo(old_ctx); return i; diff --git a/path_utils.h b/path_utils.h index a6c65bfc..dec9eb1e 100644 --- a/path_utils.h +++ b/path_utils.h @@ -8,6 +8,8 @@ #define AQO_PLAN_NODE "AQOPlanNode" #define AQO_CONST_NODE "AQOConstNode" +extern List *aqo_eclass_collector; + /* * Find and sort out relations that used in the query: * Use oids of relations to store dependency of ML row on a set of tables. @@ -114,6 +116,5 @@ extern void RegisterAQOPlanNodeMethods(void); extern List *aqo_get_clauses(PlannerInfo *root, List *restrictlist); void aqo_path_utils_init(void); -void eclass_collector_free(void); #endif /* PATH_UTILS_H */ diff --git a/postprocessing.c b/postprocessing.c index 0f2f7f57..b902e064 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -848,7 +848,6 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) end: /* Release all AQO-specific memory, allocated during learning procedure */ selectivity_cache_clear(); - eclass_collector_free(); MemoryContextSwitchTo(oldctx); MemoryContextReset(AQOLearnMemCtx); From 70b78c227e25f8b9279eb356bd7174c5c596832c Mon Sep 17 00:00:00 2001 From: Daniil Anisimov Date: Thu, 22 Feb 2024 20:24:30 +0700 Subject: [PATCH 130/134] Refactor the comparator functions. --- utils.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/utils.c b/utils.c index c44b3a64..13908783 100644 --- a/utils.c +++ b/utils.c @@ -32,11 +32,14 @@ static int argsort_cmp(const void *a, const void *b); * Function for qsorting an integer arrays */ int -int_cmp(const void *a, const void *b) +int_cmp(const void *arg1, const void *arg2) { - if (*(int *) a < *(int *) b) + int v1 = *((const int *) arg1); + int v2 = *((const int *) arg2); + + if (v1 < v2) return -1; - else if (*(int *) a > *(int *) b) + else if (v1 > v2) return 1; else return 0; @@ -46,11 +49,14 @@ int_cmp(const void *a, const void *b) * Function for qsorting an double arrays */ int -double_cmp(const void *a, const void *b) +double_cmp(const void *arg1, const void *arg2) { - if (*(double *) a < *(double *) b) + double v1 = *((const double *) arg1); + double v2 = *((const double *) arg2); + + if (v1 < v2) return -1; - else if (*(double *) a > *(double *) b) + else if (v1 > v2) return 1; else return 0; @@ -60,12 +66,14 @@ double_cmp(const void *a, const void *b) * Compares elements for two given indexes */ int -argsort_cmp(const void *a, const void *b) +argsort_cmp(const void *arg1, const void *arg2) { - return (*argsort_value_cmp) ((char *) argsort_a + - *((int *) a) * argsort_es, - (char *) argsort_a + - *((int *) b) * argsort_es); + int idx1 = *((const int *) arg1); + int idx2 = *((const int *) arg2); + char *arr = (char *) argsort_a; + + return (*argsort_value_cmp) (&arr[idx1 * argsort_es], + &arr[idx2 * argsort_es]); } /* From bc900f7e9fb68c9c194828476818cf04bd476553 Mon Sep 17 00:00:00 2001 From: Timur Magomedov Date: Thu, 4 Apr 2024 20:01:24 +0300 Subject: [PATCH 131/134] Reset aqo mode to frozen in case of shmem overflow --- preprocessing.c | 7 ++++--- t/006_overflow.pl | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 3 deletions(-) create mode 100644 t/006_overflow.pl diff --git a/preprocessing.c b/preprocessing.c index 954120a2..9de7acfd 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -316,10 +316,11 @@ aqo_planner(Query *parse, const char *query_string, int cursorOptions, disable_aqo_for_query(); /* - * Switch AQO to controlled mode. In this mode we wouldn't add new - * query classes, just use and learn on existed set. + * Switch AQO to frozen mode. In this mode we wouldn't collect + * any new data, just read collected statistics for already + * known query classes. */ - aqo_mode = AQO_MODE_CONTROLLED; + aqo_mode = AQO_MODE_FROZEN; } } diff --git a/t/006_overflow.pl b/t/006_overflow.pl new file mode 100644 index 00000000..eb2d71b9 --- /dev/null +++ b/t/006_overflow.pl @@ -0,0 +1,47 @@ +use strict; +use warnings; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More tests => 4; + +my $node = PostgreSQL::Test::Cluster->new('aqotest'); +$node->init; +$node->append_conf('postgresql.conf', qq{ + shared_preload_libraries = 'aqo' + aqo.join_threshold = 0 + aqo.mode = 'frozen' + aqo.show_details = 'on' + aqo.dsm_size_max = 10 + aqo.force_collect_stat = 'on' + aqo.fs_max_items = 3 + aqo.fss_max_items = 10 +}); + +# General purpose variables. +my $res; +my $mode; + +# Disable default settings, forced by PGOPTIONS in AQO Makefile +$ENV{PGOPTIONS}=""; + +$node->start(); +$node->safe_psql('postgres', 'CREATE EXTENSION aqo'); + +$mode = $node->safe_psql('postgres',"show aqo.mode"); +like($mode, qr/frozen/); + +$node->safe_psql('postgres', 'CREATE TABLE a (x int); +INSERT INTO a (x) SELECT mod(ival,10) FROM generate_series(1,1000) As ival'); + +$res = $node->safe_psql('postgres', 'EXPLAIN ANALYZE SELECT x FROM a WHERE x < 5;'); +like($res, qr/AQO mode: FROZEN/); + +$res = $node->safe_psql('postgres', 'EXPLAIN ANALYZE SELECT count(x) FROM a WHERE x > 5;'); +like($res, qr/AQO mode: FROZEN/); + +$mode = $node->safe_psql('postgres',"show aqo.mode"); +like($mode, qr/frozen/); + +$node->stop(); +done_testing(); From 80606727e574f78af00832a7d6d78539ce73cf4d Mon Sep 17 00:00:00 2001 From: Artem Fadeev Date: Fri, 23 Aug 2024 14:47:13 +0300 Subject: [PATCH 132/134] Fix svace warnings Fixed arithmetics in check_dsa_file_size to avoid server startup failure when aqo.dsm_size_max in bytes overflows signed integer. Updated corresponding tap-test. Two unreachable paths were removed. (cherry-picked from master with a minor change in tap-test) --- cardinality_hooks.c | 3 --- storage.c | 4 +--- t/004_dsm_size_max.pl | 8 +++++++- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/cardinality_hooks.c b/cardinality_hooks.c index 888fe717..ae6dff5e 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -447,9 +447,6 @@ aqo_estimate_num_groups(PlannerInfo *root, List *groupExprs, /* It is unclear that to do in situation of such kind. Just report it */ elog(WARNING, "AQO is in the middle of the estimate_num_groups_hook chain"); - if (groupExprs == NIL) - return 1.0; - old_ctx_m = MemoryContextSwitchTo(AQOPredictMemCtx); predicted = predict_num_groups(root, subpath, groupExprs, &fss); diff --git a/storage.c b/storage.c index a11f16f4..10b7cfc6 100644 --- a/storage.c +++ b/storage.c @@ -968,8 +968,6 @@ aqo_get_file_size(const char *filename) ereport(LOG, (errcode_for_file_access(), errmsg("could not read file \"%s\": %m", filename))); - if (file) - FreeFile(file); unlink(filename); return -1; } @@ -981,7 +979,7 @@ check_dsa_file_size(void) long data_size = aqo_get_file_size(PGAQO_DATA_FILE); if (qtext_size == -1 || data_size == -1 || - qtext_size + data_size >= dsm_size_max * 1024 * 1024) + ((unsigned long) qtext_size + (unsigned long) data_size) >> 20 >= dsm_size_max) { elog(ERROR, "aqo.dsm_size_max is too small"); } diff --git a/t/004_dsm_size_max.pl b/t/004_dsm_size_max.pl index 26898b79..8b7f8e62 100644 --- a/t/004_dsm_size_max.pl +++ b/t/004_dsm_size_max.pl @@ -5,7 +5,7 @@ use PostgreSQL::Test::Cluster; use PostgreSQL::Test::Utils; -use Test::More tests => 5; +use Test::More tests => 6; my $node = PostgreSQL::Test::Cluster->new('aqotest'); $node->init; @@ -58,6 +58,12 @@ $node->psql('postgres', 'select * from aqo_reset();'); $node->stop(); +# 3000mb (more than 2*31 bytes) overflows 4-byte signed int +$node->append_conf('postgresql.conf', 'aqo.dsm_size_max = 3000'); +is($node->start(fail_ok => 1), 1, "Large aqo.dsm_size_max doesn't cause integer overflow"); +$node->stop(); + + my $regex; $long_string = 'a' x 100000; $regex = qr/.*WARNING: \[AQO\] Not enough DSA\. AQO was disabled for this query/; From 294f523a6511b1044b28fa8a321d9fc504f38d38 Mon Sep 17 00:00:00 2001 From: Artem Fadeev Date: Mon, 30 Sep 2024 11:16:14 +0300 Subject: [PATCH 133/134] Fix build_knn_matrix (now called update_knn_matrix) Previous version of build_knn_matrix had an unreachable branch (`if (features!=NULL)`), which lead to use_wide_search having no effect. There was also a memory bug of copying a memory area into itself. predict_for_relation was fixed with interoperation of use_wide_search and predict_with_few_neighbors features in mind. Additions to the look_a_like regression test reflect those changes. This commit also removes unused arguments from several functions and fixes a couple of typos. --- cardinality_estimation.c | 23 ++++--- cardinality_hooks.c | 2 +- expected/gucs.out | 1 + expected/look_a_like.out | 125 ++++++++++++++++++++++++++++++++++++++- expected/unsupported.out | 1 + machine_learning.c | 1 + postprocessing.c | 2 +- sql/gucs.sql | 1 + sql/look_a_like.sql | 66 ++++++++++++++++++++- storage.c | 99 +++++++++++++------------------ storage.h | 4 +- 11 files changed, 248 insertions(+), 77 deletions(-) diff --git a/cardinality_estimation.c b/cardinality_estimation.c index 8ab98f3c..f0cca328 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -81,8 +81,17 @@ predict_for_relation(List *clauses, List *selectivities, List *relsigns, &ncols, &features); data = OkNNr_allocate(ncols); - if (load_fss_ext(query_context.fspace_hash, *fss, data, NULL)) + if (load_aqo_data(query_context.fspace_hash, *fss, data, false) && + data->rows >= (aqo_predict_with_few_neighbors ? 1 : aqo_k)) result = OkNNr_predict(data, features); + /* Try to search in surrounding feature spaces for the same node */ + else if (use_wide_search && load_aqo_data(query_context.fspace_hash, *fss, data, true)) + { + elog(DEBUG5, "[AQO] Make prediction for fss "INT64_FORMAT" by a neighbour " + "includes %d feature(s) and %d fact(s).", + (int64) *fss, data->cols, data->rows); + result = OkNNr_predict(data, features); + } else { /* @@ -91,17 +100,7 @@ predict_for_relation(List *clauses, List *selectivities, List *relsigns, * small part of paths was used for AQO learning and stored into * the AQO knowledge base. */ - - /* Try to search in surrounding feature spaces for the same node */ - if (!load_aqo_data(query_context.fspace_hash, *fss, data, NULL, use_wide_search, features)) - result = -1; - else - { - elog(DEBUG5, "[AQO] Make prediction for fss %d by a neighbour " - "includes %d feature(s) and %d fact(s).", - *fss, data->cols, data->rows); - result = OkNNr_predict(data, features); - } + result = -1; } #ifdef AQO_DEBUG_PRINT diff --git a/cardinality_hooks.c b/cardinality_hooks.c index ae6dff5e..93fb73b1 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -414,7 +414,7 @@ predict_num_groups(PlannerInfo *root, Path *subpath, List *group_exprs, *fss = get_grouped_exprs_hash(child_fss, group_exprs); memset(&data, 0, sizeof(OkNNrdata)); - if (!load_fss_ext(query_context.fspace_hash, *fss, &data, NULL)) + if (!load_aqo_data(query_context.fspace_hash, *fss, &data, false)) return -1; Assert(data.rows == 1); diff --git a/expected/gucs.out b/expected/gucs.out index f33aa6b2..d083f6e2 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -145,4 +145,5 @@ SELECT count(*) FROM aqo_query_stat; 0 (1 row) +DROP TABLE t; DROP EXTENSION aqo; diff --git a/expected/look_a_like.out b/expected/look_a_like.out index 594f017e..854bb852 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -9,8 +9,9 @@ SELECT true AS success FROM aqo_reset(); SET aqo.wide_search = 'on'; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; -set aqo.show_hash = 'off'; +SET aqo.show_hash = 'off'; SET aqo.min_neighbors_for_predicting = 1; +SET aqo.predict_with_few_neighbors = 'off'; SET enable_nestloop = 'off'; SET enable_mergejoin = 'off'; SET enable_material = 'off'; @@ -553,9 +554,131 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L JOINS: 2 (24 rows) +-- Next few test cases focus on fss corresponding to (x1 > ? AND x2 < ? AND x3 < ?). We will denote +-- it by fss0. At this moment there is exactly one fs with (fs, fss0, dbid) record in aqo_data. We'll +-- refer to it as fs0. +-- Let's create another fs for fss0. We'll call this fs fs1. Since aqo.wide_search='on', +-- aqo.min_neighbors_for_predicting=1, and there is (fs0, fss0, dbid) data record, AQO must be used here. +SELECT str AS result +FROM expln(' +SELECT * FROM A WHERE x1 > -100 AND x2 < 10 AND x3 < 10;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +---------------------------------------------------------------------- + Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=20, error=-400% + Output: x1, x2, x3 + Filter: ((a.x1 > '-100'::integer) AND (a.x2 < 10) AND (a.x3 < 10)) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(7 rows) + +-- Now there are 2 data records for fss0: one for (fs0, fss0, dbid) and one for (fs1, fss0, dbid) +-- We repeat previous query, but set aqo.min_neighbors_for_predicting to 2. Since aqo.predict_with_few_neighbors +-- is 'off', AQO is obliged to use both data records for fss0. +SET aqo.min_neighbors_for_predicting = 2; +SELECT str AS result +FROM expln(' +SELECT * FROM A WHERE x1 > 1 AND x2 < 10 AND x3 < 10;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +-------------------------------------------------------- + Seq Scan on public.a (actual rows=80 loops=1) + AQO: rows=77, error=-4% + Output: x1, x2, x3 + Filter: ((a.x1 > 1) AND (a.x2 < 10) AND (a.x3 < 10)) + Rows Removed by Filter: 20 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(8 rows) + +-- Now there are 3 data records for fss0: 1 for (fs0, fss0, dbid) and 2 for (fs1, fss0, dbid) +-- Lastly, we run invoke query with previously unseen fs with fss0 feature subspace. AQO must use +-- three data records from two neighbors for this one. +SET aqo.min_neighbors_for_predicting = 3; +SELECT str AS result +FROM expln(' +SELECT x2 FROM A WHERE x1 > 3 AND x2 < 10 AND x3 < 10 GROUP BY(x2);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +-------------------------------------------------------------- + HashAggregate (actual rows=6 loops=1) + AQO not used + Output: x2 + Group Key: a.x2 + -> Seq Scan on public.a (actual rows=60 loops=1) + AQO: rows=71, error=15% + Output: x1, x2, x3 + Filter: ((a.x1 > 3) AND (a.x2 < 10) AND (a.x3 < 10)) + Rows Removed by Filter: 40 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(12 rows) + +----- +DROP TABLE IF EXISTS t; +NOTICE: table "t" does not exist, skipping +CREATE TABLE t AS SELECT x, x AS y, x AS z FROM generate_series(1, 10000) x; +ANALYZE t; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Test that when there are less records than aqo.min_neighbors_for_predicting for given (fs, fss, dbid) +-- and aqo.predict_with_few_neighbors is off, those records have higher precedence for cardinality estimation +-- than neighbors' records. +SELECT str AS result +FROM expln(' +select * from t where x <= 10000 and y <= 10000 and z <= 10000;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------ + Seq Scan on public.t (actual rows=10000 loops=1) + AQO not used + Output: x, y, z + Filter: ((t.x <= 10000) AND (t.y <= 10000) AND (t.z <= 10000)) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(7 rows) + +DO +$$ +BEGIN + for counter in 1..20 loop + EXECUTE format('explain analyze select *, 1 from t where x <= 1 and y <= 1 and z <= %L;', 10 * counter); + EXECUTE format('explain analyze select *, 1 from t where x <= 1 and y <= %L and z <= 1;', 10 * counter); + EXECUTE format('explain analyze select *, 1 from t where x <= %L and y <= 1 and z <= 1;', 10 * counter); + end loop; +END; +$$ LANGUAGE PLPGSQL; +-- AQO should predict ~1000 rows to indicate that the record from previous invocation was used. +SELECT str AS result +FROM expln(' +select * from t where x <= 10000 and y <= 10000 and z <= 10000;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------ + Seq Scan on public.t (actual rows=10000 loops=1) + AQO: rows=9987, error=-0% + Output: x, y, z + Filter: ((t.x <= 10000) AND (t.y <= 10000) AND (t.z <= 10000)) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(7 rows) + RESET aqo.wide_search; +RESET aqo.predict_with_few_neighbors; +RESET aqo.min_neighbors_for_predicting; DROP EXTENSION aqo CASCADE; DROP TABLE a; DROP TABLE b; DROP TABLE c; +DROP TABLE t; DROP FUNCTION expln; diff --git a/expected/unsupported.out b/expected/unsupported.out index 9db07618..a088a47c 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -16,6 +16,7 @@ $$ LANGUAGE PLPGSQL; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; DROP TABLE IF EXISTS t; +NOTICE: table "t" does not exist, skipping CREATE TABLE t AS SELECT (gs.* / 50) AS x FROM generate_series(1,1000) AS gs; ANALYZE t; CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y diff --git a/machine_learning.c b/machine_learning.c index bfdf0aaa..d7520a94 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -150,6 +150,7 @@ OkNNr_predict(OkNNrdata *data, double *features) if (!aqo_predict_with_few_neighbors && data->rows < aqo_k) return -1.; + Assert(data->rows > 0); for (i = 0; i < data->rows; ++i) distances[i] = fs_distance(data->matrix[i], features, data->cols); diff --git a/postprocessing.c b/postprocessing.c index b902e064..cae61e98 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -95,7 +95,7 @@ atomic_fss_learn_step(uint64 fs, int fss, OkNNrdata *data, double *features, double target, double rfactor, List *reloids) { - if (!load_fss_ext(fs, fss, data, NULL)) + if (!load_aqo_data(fs, fss, data, false)) data->rows = 0; data->rows = OkNNr_learn(data, features, target, rfactor); diff --git a/sql/gucs.sql b/sql/gucs.sql index 0e948cf1..81e245b7 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -51,4 +51,5 @@ SELECT count(*) FROM aqo_query_stat; SELECT true AS success FROM aqo_reset(); SELECT count(*) FROM aqo_query_stat; +DROP TABLE t; DROP EXTENSION aqo; diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index f50e4e55..5eb47a65 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -6,8 +6,9 @@ SET aqo.wide_search = 'on'; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; -set aqo.show_hash = 'off'; +SET aqo.show_hash = 'off'; SET aqo.min_neighbors_for_predicting = 1; +SET aqo.predict_with_few_neighbors = 'off'; SET enable_nestloop = 'off'; SET enable_mergejoin = 'off'; SET enable_material = 'off'; @@ -142,10 +143,73 @@ FROM expln(' SELECT * FROM (A LEFT JOIN B ON A.x1 = B.y1) sc left join C on sc.x1=C.z1;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +-- Next few test cases focus on fss corresponding to (x1 > ? AND x2 < ? AND x3 < ?). We will denote +-- it by fss0. At this moment there is exactly one fs with (fs, fss0, dbid) record in aqo_data. We'll +-- refer to it as fs0. + +-- Let's create another fs for fss0. We'll call this fs fs1. Since aqo.wide_search='on', +-- aqo.min_neighbors_for_predicting=1, and there is (fs0, fss0, dbid) data record, AQO must be used here. +SELECT str AS result +FROM expln(' +SELECT * FROM A WHERE x1 > -100 AND x2 < 10 AND x3 < 10;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; +-- Now there are 2 data records for fss0: one for (fs0, fss0, dbid) and one for (fs1, fss0, dbid) + +-- We repeat previous query, but set aqo.min_neighbors_for_predicting to 2. Since aqo.predict_with_few_neighbors +-- is 'off', AQO is obliged to use both data records for fss0. +SET aqo.min_neighbors_for_predicting = 2; +SELECT str AS result +FROM expln(' +SELECT * FROM A WHERE x1 > 1 AND x2 < 10 AND x3 < 10;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; +-- Now there are 3 data records for fss0: 1 for (fs0, fss0, dbid) and 2 for (fs1, fss0, dbid) + +-- Lastly, we run invoke query with previously unseen fs with fss0 feature subspace. AQO must use +-- three data records from two neighbors for this one. +SET aqo.min_neighbors_for_predicting = 3; +SELECT str AS result +FROM expln(' +SELECT x2 FROM A WHERE x1 > 3 AND x2 < 10 AND x3 < 10 GROUP BY(x2);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +----- +DROP TABLE IF EXISTS t; +CREATE TABLE t AS SELECT x, x AS y, x AS z FROM generate_series(1, 10000) x; +ANALYZE t; +SELECT true AS success FROM aqo_reset(); + +-- Test that when there are less records than aqo.min_neighbors_for_predicting for given (fs, fss, dbid) +-- and aqo.predict_with_few_neighbors is off, those records have higher precedence for cardinality estimation +-- than neighbors' records. +SELECT str AS result +FROM expln(' +select * from t where x <= 10000 and y <= 10000 and z <= 10000;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; +DO +$$ +BEGIN + for counter in 1..20 loop + EXECUTE format('explain analyze select *, 1 from t where x <= 1 and y <= 1 and z <= %L;', 10 * counter); + EXECUTE format('explain analyze select *, 1 from t where x <= 1 and y <= %L and z <= 1;', 10 * counter); + EXECUTE format('explain analyze select *, 1 from t where x <= %L and y <= 1 and z <= 1;', 10 * counter); + end loop; +END; +$$ LANGUAGE PLPGSQL; +-- AQO should predict ~1000 rows to indicate that the record from previous invocation was used. +SELECT str AS result +FROM expln(' +select * from t where x <= 10000 and y <= 10000 and z <= 10000;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + + RESET aqo.wide_search; +RESET aqo.predict_with_few_neighbors; +RESET aqo.min_neighbors_for_predicting; DROP EXTENSION aqo CASCADE; DROP TABLE a; DROP TABLE b; DROP TABLE c; +DROP TABLE t; DROP FUNCTION expln; diff --git a/storage.c b/storage.c index 10b7cfc6..79b1b11d 100644 --- a/storage.c +++ b/storage.c @@ -120,12 +120,6 @@ PG_FUNCTION_INFO_V1(aqo_query_stat_update); PG_FUNCTION_INFO_V1(aqo_data_update); -bool -load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids) -{ - return load_aqo_data(fs, fss, data, reloids, false, NULL); -} - bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids) { @@ -1577,66 +1571,53 @@ fs_distance(double *a, double *b, int len) } static bool -nearest_neighbor(double **matrix, int old_rows, double *neibour, int cols) +nearest_neighbor(double **matrix, int old_rows, double *neighbor, int cols) { int i; for (i=0; irows is kept <= aqo_K. + */ static void -build_knn_matrix(OkNNrdata *data, const OkNNrdata *temp_data, double *features) +update_knn_matrix(OkNNrdata *data, const OkNNrdata *temp_data) { + int k = (data->rows < 0) ? 0 : data->rows; + int i; + Assert(data->cols == temp_data->cols); Assert(data->matrix); - if (features != NULL) + if (data->cols > 0) { - int old_rows = data->rows; - int k = (old_rows < 0) ? 0 : old_rows; - - if (data->cols > 0) + for (i = 0; i < temp_data->rows && k < aqo_K; i++) { - int i; - - Assert(data->cols == temp_data->cols); - - for (i = 0; i < temp_data->rows; i++) + if (!nearest_neighbor(data->matrix, k, temp_data->matrix[i], data->cols)) { - if (k < aqo_K && !nearest_neighbor(data->matrix, old_rows, - temp_data->matrix[i], - data->cols)) - { - memcpy(data->matrix[k], temp_data->matrix[i], data->cols * sizeof(double)); - data->rfactors[k] = temp_data->rfactors[i]; - data->targets[k] = temp_data->targets[i]; - k++; - } + memcpy(data->matrix[k], temp_data->matrix[i], data->cols * sizeof(double)); + data->rfactors[k] = temp_data->rfactors[i]; + data->targets[k] = temp_data->targets[i]; + k++; } - data->rows = k; } } - else + /* Data has no columns. Only one record can be added */ + else if (k == 0 && temp_data->rows > 0) { - if (data->rows > 0) - /* trivial strategy - use first suitable record and ignore others */ - return; - memcpy(data, temp_data, sizeof(OkNNrdata)); - if (data->cols > 0) - { - int i; - - for (i = 0; i < data->rows; i++) - { - Assert(data->matrix[i]); - memcpy(data->matrix[i], temp_data->matrix[i], data->cols * sizeof(double)); - } - } + data->rfactors[0] = temp_data->rfactors[0]; + data->targets[0] = temp_data->targets[0]; + k = 1; } + data->rows = k; + + Assert(data->rows >= 0 && data->rows <= aqo_K); } static OkNNrdata * @@ -1706,13 +1687,11 @@ _fill_knn_data(const DataEntry *entry, List **reloids) * * If wideSearch is true - make seqscan on the hash table to see for relevant * data across neighbours. - * If reloids is NULL - don't fill this list. * * Return false if the operation was unsuccessful. */ bool -load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, - bool wideSearch, double *features) +load_aqo_data(uint64 fs, int fss, OkNNrdata *data, bool wideSearch) { DataEntry *entry; bool found; @@ -1720,6 +1699,7 @@ load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, OkNNrdata *temp_data; Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + Assert(wideSearch || data->rows <= 0); dsa_init(); @@ -1739,16 +1719,16 @@ load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, if (entry->cols != data->cols) { /* Collision happened? */ - elog(LOG, "[AQO] Does a collision happened? Check it if possible " + elog(LOG, "[AQO] Did a collision happen? Check it if possible " "(fs: "UINT64_FORMAT", fss: %d).", fs, fss); found = false; /* Sign of unsuccessful operation */ goto end; } - temp_data = _fill_knn_data(entry, reloids); + temp_data = _fill_knn_data(entry, NULL); Assert(temp_data->rows > 0); - build_knn_matrix(data, temp_data, features); + update_knn_matrix(data, temp_data); Assert(data->rows > 0); } else @@ -1770,28 +1750,31 @@ load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, temp_data = _fill_knn_data(entry, &tmp_oids); - if (data->rows > 0 && list_length(tmp_oids) != noids) + if (noids >= 0 && list_length(tmp_oids) != noids) { /* Dubious case. So log it and skip these data */ elog(LOG, "[AQO] different number depended oids for the same fss %d: " "%d and %d correspondingly.", fss, list_length(tmp_oids), noids); - Assert(noids >= 0); list_free(tmp_oids); continue; } noids = list_length(tmp_oids); + list_free(tmp_oids); - if (reloids != NULL && *reloids == NIL) - *reloids = tmp_oids; - else - list_free(tmp_oids); - - build_knn_matrix(data, temp_data, NULL); + update_knn_matrix(data, temp_data); found = true; + + /* Abort if data is full */ + if (data->rows == aqo_K || (data->cols == 0 && data->rows == 1)) + { + hash_seq_term(&hash_seq); + break; + } } + } Assert(!found || (data->rows > 0 && data->rows <= aqo_K)); diff --git a/storage.h b/storage.h index 9491e33e..692014c3 100644 --- a/storage.h +++ b/storage.h @@ -144,8 +144,7 @@ extern void aqo_qtexts_load(void); extern bool aqo_data_store(uint64 fs, int fss, AqoDataArgs *data, List *reloids); -extern bool load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, - bool wideSearch, double *features); +extern bool load_aqo_data(uint64 fs, int fss, OkNNrdata *data, bool wideSearch); extern void aqo_data_flush(void); extern void aqo_data_load(void); @@ -166,7 +165,6 @@ extern bool query_is_deactivated(uint64 query_hash); extern void add_deactivated_query(uint64 query_hash); /* Storage interaction */ -extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids); extern bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids); extern bool update_query_timeout(uint64 queryid, int64 smart_timeout); From b6bea365c3d38cb0e28c0b66be1420bc1242a0f3 Mon Sep 17 00:00:00 2001 From: Artem Fadeev Date: Mon, 30 Sep 2024 14:52:29 +0300 Subject: [PATCH 134/134] Fix smart statement timeout update logic and aqo_stat_store Note: due to a mix of absolute and relative time in set_timeout_if_need function, smart statement timeout feature doesn't currently work since its timeouts are set in the past. This commit changes checked precondition for smart statement timeout change to fix array indexing bug, but the feature itself remains broken. This commit also fixes arithmetic errors in aqo_stat_store in the case of fully filled arrays. --- expected/aqo_query_stat.out | 155 ++++++++++++++++++++++++++++++++++++ postprocessing.c | 21 +++-- regress_schedule | 1 + sql/aqo_query_stat.sql | 74 +++++++++++++++++ storage.c | 18 +++-- 5 files changed, 254 insertions(+), 15 deletions(-) create mode 100644 expected/aqo_query_stat.out create mode 100644 sql/aqo_query_stat.sql diff --git a/expected/aqo_query_stat.out b/expected/aqo_query_stat.out new file mode 100644 index 00000000..2478b4e5 --- /dev/null +++ b/expected/aqo_query_stat.out @@ -0,0 +1,155 @@ +-- Testing aqo_query_stat update logic +-- Note: this test assumes STAT_SAMPLE_SIZE to be 20. +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +DROP TABLE IF EXISTS A; +NOTICE: table "a" does not exist, skipping +CREATE TABLE A AS SELECT x FROM generate_series(1, 20) as x; +ANALYZE A; +DROP TABLE IF EXISTS B; +NOTICE: table "b" does not exist, skipping +CREATE TABLE B AS SELECT y FROM generate_series(1, 10) as y; +ANALYZE B; +CREATE OR REPLACE FUNCTION round_array (double precision[]) +RETURNS double precision[] +LANGUAGE SQL +AS $$ + SELECT array_agg(round(elem::numeric, 3)) + FROM unnest($1) as arr(elem); +$$ +SET aqo.mode = 'learn'; +SET aqo.force_collect_stat = 'on'; +SET aqo.min_neighbors_for_predicting = 1; +-- First test: adding real records +SET aqo.mode = 'disabled'; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 15 AND B.y < 5; + count +------- + 20 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 16 AND B.y < 6; + count +------- + 20 +(1 row) + +SET aqo.mode = 'learn'; +SELECT aqo_enable_class(queryid) FROM aqo_queries WHERE queryid != 0; + aqo_enable_class +------------------ + +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 17 AND B.y < 7; + count +------- + 18 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 18 AND B.y < 8; + count +------- + 14 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 19 AND B.y < 9; + count +------- + 8 +(1 row) + +-- Ignore unstable time-related columns +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + error_aqo | error_no_aqo | executions_with_aqo | executions_without_aqo +--------------------+--------------+---------------------+------------------------ + {0.22,0.362,0.398} | {0.392,0.21} | 3 | 2 +(1 row) + +SELECT true AS success from aqo_reset(); + success +--------- + t +(1 row) + +-- Second test: fake data in aqo_query_stat +SET aqo.mode = 'disabled'; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 5 AND B.y < 100; + count +------- + 135 +(1 row) + +SELECT aqo_query_stat_update( + queryid, + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + 100, 50) +FROM aqo_query_stat; + aqo_query_stat_update +----------------------- + t +(1 row) + +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + error_aqo | error_no_aqo | executions_with_aqo | executions_without_aqo +------------------------------------------------------+------------------------------------------------------+---------------------+------------------------ + {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20} | {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20} | 100 | 50 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 10 AND B.y < 100; + count +------- + 100 +(1 row) + +SET aqo.mode = 'learn'; +SELECT aqo_enable_class(queryid) FROM aqo_queries WHERE queryid != 0; + aqo_enable_class +------------------ + +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 15 AND B.y < 5; + count +------- + 20 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 16 AND B.y < 6; + count +------- + 20 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 17 AND B.y < 7; + count +------- + 18 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 18 AND B.y < 8; + count +------- + 14 +(1 row) + +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + error_aqo | error_no_aqo | executions_with_aqo | executions_without_aqo +---------------------------------------------------------------------+----------------------------------------------------------+---------------------+------------------------ + {5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,0.392,0.344,0.34,0.362} | {2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,0.218} | 104 | 51 +(1 row) + +SET aqo.mode TO DEFAULT; +SET aqo.force_collect_stat TO DEFAULT; +SET aqo.min_neighbors_for_predicting TO DEFAULT; +DROP FUNCTION round_array; +DROP TABLE A; +DROP TABLE B; +DROP EXTENSION aqo CASCADE; diff --git a/postprocessing.c b/postprocessing.c index cae61e98..50d27624 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -30,6 +30,8 @@ #include "machine_learning.h" #include "storage.h" +#define SMART_TIMEOUT_ERROR_THRESHOLD (0.1) + bool aqo_learn_statement_timeout = false; @@ -761,7 +763,6 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) instr_time endtime; EphemeralNamedRelation enr = get_ENR(queryDesc->queryEnv, PlanStateInfo); MemoryContext oldctx = MemoryContextSwitchTo(AQOLearnMemCtx); - double error = .0; cardinality_sum_errors = 0.; cardinality_num_objects = 0; @@ -827,18 +828,22 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) if (stat != NULL) { - /* Store all learn data into the AQO service relations. */ - if (!query_context.adding_query && query_context.auto_tuning) - automatical_query_tuning(query_context.query_hash, stat); - - error = stat->est_error_aqo[stat->cur_stat_slot_aqo-1] - cardinality_sum_errors/(1 + cardinality_num_objects); - - if ( aqo_learn_statement_timeout_enable && aqo_statement_timeout > 0 && error >= 0.1) + Assert(!query_context.use_aqo || stat->cur_stat_slot_aqo > 0); + /* If query used aqo, increase smart timeout if needed */ + if (query_context.use_aqo && + aqo_learn_statement_timeout_enable && + aqo_statement_timeout > 0 && + stat->est_error_aqo[stat->cur_stat_slot_aqo-1] - + cardinality_sum_errors/(1 + cardinality_num_objects) >= SMART_TIMEOUT_ERROR_THRESHOLD) { int64 fintime = increase_smart_timeout(); elog(NOTICE, "[AQO] Time limit for execution of the statement was increased. Current timeout is "UINT64_FORMAT, fintime); } + /* Store all learn data into the AQO service relations. */ + if (!query_context.adding_query && query_context.auto_tuning) + automatical_query_tuning(query_context.query_hash, stat); + pfree(stat); } } diff --git a/regress_schedule b/regress_schedule index 96b2cb93..f3084fc8 100644 --- a/regress_schedule +++ b/regress_schedule @@ -23,3 +23,4 @@ test: look_a_like test: feature_subspace test: eclasses test: eclasses_mchar +test: aqo_query_stat diff --git a/sql/aqo_query_stat.sql b/sql/aqo_query_stat.sql new file mode 100644 index 00000000..a9228b5e --- /dev/null +++ b/sql/aqo_query_stat.sql @@ -0,0 +1,74 @@ +-- Testing aqo_query_stat update logic +-- Note: this test assumes STAT_SAMPLE_SIZE to be 20. +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + +DROP TABLE IF EXISTS A; +CREATE TABLE A AS SELECT x FROM generate_series(1, 20) as x; +ANALYZE A; + +DROP TABLE IF EXISTS B; +CREATE TABLE B AS SELECT y FROM generate_series(1, 10) as y; +ANALYZE B; + +CREATE OR REPLACE FUNCTION round_array (double precision[]) +RETURNS double precision[] +LANGUAGE SQL +AS $$ + SELECT array_agg(round(elem::numeric, 3)) + FROM unnest($1) as arr(elem); +$$ + +SET aqo.mode = 'learn'; +SET aqo.force_collect_stat = 'on'; +SET aqo.min_neighbors_for_predicting = 1; + +-- First test: adding real records +SET aqo.mode = 'disabled'; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 15 AND B.y < 5; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 16 AND B.y < 6; + +SET aqo.mode = 'learn'; +SELECT aqo_enable_class(queryid) FROM aqo_queries WHERE queryid != 0; + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 17 AND B.y < 7; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 18 AND B.y < 8; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 19 AND B.y < 9; +-- Ignore unstable time-related columns +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + +SELECT true AS success from aqo_reset(); + + +-- Second test: fake data in aqo_query_stat +SET aqo.mode = 'disabled'; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 5 AND B.y < 100; +SELECT aqo_query_stat_update( + queryid, + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + 100, 50) +FROM aqo_query_stat; +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 10 AND B.y < 100; + +SET aqo.mode = 'learn'; +SELECT aqo_enable_class(queryid) FROM aqo_queries WHERE queryid != 0; + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 15 AND B.y < 5; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 16 AND B.y < 6; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 17 AND B.y < 7; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 18 AND B.y < 8; +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + + +SET aqo.mode TO DEFAULT; +SET aqo.force_collect_stat TO DEFAULT; +SET aqo.min_neighbors_for_predicting TO DEFAULT; + +DROP FUNCTION round_array; +DROP TABLE A; +DROP TABLE B; +DROP EXTENSION aqo CASCADE; diff --git a/storage.c b/storage.c index 79b1b11d..a65ce463 100644 --- a/storage.c +++ b/storage.c @@ -233,7 +233,9 @@ reset_deactivated_queries(void) /* * Update AQO statistics. * - * Add a record (or update an existed) to stat storage for the query class. + * In append mode, append one element to exec_time, plan_time, est_error arrays + * (or their *_aqo counterparts, if use_aqo is true). Without append mode, add a + * record (or overwrite an existing) to stat storage for the query class. * Returns a copy of stat entry, allocated in current memory context. Caller is * in charge to free this struct after usage. * If stat hash table is full, return NULL and log this fact. @@ -312,19 +314,20 @@ aqo_stat_store(uint64 queryid, bool use_aqo, AqoStatArgs *stat_arg, if (use_aqo) { Assert(entry->cur_stat_slot_aqo >= 0); - pos = entry->cur_stat_slot_aqo; - if (entry->cur_stat_slot_aqo < STAT_SAMPLE_SIZE - 1) + if (entry->cur_stat_slot_aqo < STAT_SAMPLE_SIZE) entry->cur_stat_slot_aqo++; else { size_t sz = (STAT_SAMPLE_SIZE - 1) * sizeof(entry->est_error_aqo[0]); - Assert(entry->cur_stat_slot_aqo = STAT_SAMPLE_SIZE - 1); + Assert(entry->cur_stat_slot_aqo == STAT_SAMPLE_SIZE); + memmove(entry->plan_time_aqo, &entry->plan_time_aqo[1], sz); memmove(entry->exec_time_aqo, &entry->exec_time_aqo[1], sz); memmove(entry->est_error_aqo, &entry->est_error_aqo[1], sz); } + pos = entry->cur_stat_slot_aqo - 1; entry->execs_with_aqo++; entry->plan_time_aqo[pos] = *stat_arg->plan_time_aqo; entry->exec_time_aqo[pos] = *stat_arg->exec_time_aqo; @@ -333,19 +336,20 @@ aqo_stat_store(uint64 queryid, bool use_aqo, AqoStatArgs *stat_arg, else { Assert(entry->cur_stat_slot >= 0); - pos = entry->cur_stat_slot; - if (entry->cur_stat_slot < STAT_SAMPLE_SIZE - 1) + if (entry->cur_stat_slot < STAT_SAMPLE_SIZE) entry->cur_stat_slot++; else { size_t sz = (STAT_SAMPLE_SIZE - 1) * sizeof(entry->est_error[0]); - Assert(entry->cur_stat_slot = STAT_SAMPLE_SIZE - 1); + Assert(entry->cur_stat_slot == STAT_SAMPLE_SIZE); + memmove(entry->plan_time, &entry->plan_time[1], sz); memmove(entry->exec_time, &entry->exec_time[1], sz); memmove(entry->est_error, &entry->est_error[1], sz); } + pos = entry->cur_stat_slot - 1; entry->execs_without_aqo++; entry->plan_time[pos] = *stat_arg->plan_time; entry->exec_time[pos] = *stat_arg->exec_time;