From 27041024be25a82314d61c4dd987ff8721c04634 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 3 Dec 2020 15:15:57 +0500 Subject: [PATCH 001/203] Prevent distributed deadlocks. Disable AQO for FDW queries. Tags: shardman. --- preprocessing.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/preprocessing.c b/preprocessing.c index 79097a92..bb81d31e 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -134,6 +134,8 @@ aqo_planner(Query *parse, */ if ((parse->commandType != CMD_SELECT && parse->commandType != CMD_INSERT && parse->commandType != CMD_UPDATE && parse->commandType != CMD_DELETE) || + strstr(application_name, "postgres_fdw") != NULL || /* Prevent distributed deadlocks */ + strstr(application_name, "pgfdw:") != NULL || /* caused by fdw */ get_extension_oid("aqo", true) == InvalidOid || creating_extension || IsParallelWorker() || From 5e33230d8006fd389c351c6583f5c9993022ffdc Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Fri, 4 Dec 2020 08:42:56 +0500 Subject: [PATCH 002/203] Allow learning (and predicting for) on a ForeignScan, an Append, a MergeAppend, a SubqueryScan nodes. Tags: shardman. --- path_utils.c | 28 +++++++++++++++++++++++++++- postprocessing.c | 4 +++- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/path_utils.c b/path_utils.c index 6e809818..3a8a2a12 100644 --- a/path_utils.c +++ b/path_utils.c @@ -79,7 +79,7 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) List *inner_sel = NIL; List *outer; List *outer_sel = NIL; - List *cur; + List *cur = NIL; List *cur_sel = NIL; Assert(selectivities != NULL); @@ -160,6 +160,32 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) return get_path_clauses(((LimitPath *) path)->subpath, root, selectivities); break; + case T_SubqueryScanPath: + return get_path_clauses(((SubqueryScanPath *) path)->subpath, root, + selectivities); + break; + case T_AppendPath: + { + ListCell *lc; + + foreach (lc, ((AppendPath *) path)->subpaths) + { + Path *subpath = lfirst(lc); + + cur = list_concat(cur, list_copy( + get_path_clauses(subpath, root, selectivities))); + cur_sel = list_concat(cur_sel, *selectivities); + } + cur = list_concat(cur, list_copy(path->parent->baserestrictinfo)); + *selectivities = list_concat(cur_sel, + get_selectivities(root, + path->parent->baserestrictinfo, + 0, JOIN_INNER, NULL)); + return cur; + } + break; + case T_ForeignPath: + /* The same as in the default case */ default: cur = list_concat(list_copy(path->parent->baserestrictinfo), path->param_info ? diff --git a/postprocessing.c b/postprocessing.c index db38b2d4..2ca4d6a3 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -252,7 +252,9 @@ learnOnPlanState(PlanState *p, void *context) ctx->relidslist = list_copy(p->plan->path_relids); if (p->instrument && (p->righttree != NULL || p->lefttree == NULL || - p->plan->path_clauses != NIL)) + p->plan->path_clauses != NIL || + IsA(p, ForeignScanState) || + IsA(p, AppendState) || IsA(p, MergeAppendState))) { double learn_rows = 0.; double predicted = 0.; From 949293614e01012bd56ecd86114bacbbdac4801b Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Fri, 4 Dec 2020 17:28:43 +0500 Subject: [PATCH 003/203] Add support of postgres_fdw push-down. Tags: shardman. --- Makefile | 2 ++ postprocessing.c | 20 +++++++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 30d5967a..1b1575c0 100644 --- a/Makefile +++ b/Makefile @@ -16,6 +16,8 @@ REGRESS = aqo_disabled \ schema \ aqo_CVE-2020-14350 +fdw_srcdir = $(top_srcdir)/contrib/postgres_fdw +PG_CPPFLAGS += -I$(libpq_srcdir) -I$(fdw_srcdir) EXTRA_REGRESS_OPTS=--temp-config=$(top_srcdir)/$(subdir)/conf.add DATA = aqo--1.0.sql aqo--1.0--1.1.sql aqo--1.1--1.2.sql aqo--1.2.sql diff --git a/postprocessing.c b/postprocessing.c index 2ca4d6a3..8b385a75 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -19,6 +19,7 @@ #include "aqo.h" #include "access/parallel.h" #include "optimizer/optimizer.h" +#include "postgres_fdw.h" #include "utils/queryenvironment.h" typedef struct @@ -544,7 +545,7 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) void aqo_copy_generic_path_info(PlannerInfo *root, Plan *dest, Path *src) { - bool is_join_path; + bool is_join_path; if (prev_copy_generic_path_info_hook) prev_copy_generic_path_info_hook(root, dest, src); @@ -569,6 +570,23 @@ aqo_copy_generic_path_info(PlannerInfo *root, Plan *dest, Path *src) dest->path_clauses = ((JoinPath *) src)->joinrestrictinfo; dest->path_jointype = ((JoinPath *) src)->jointype; } + else if (src->type == T_ForeignPath) + { + ForeignPath *fpath = (ForeignPath *) src; + PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) fpath->path.parent->fdw_private; + + /* + * Pushed down foreign join keeps clauses in special fdw_private + * structure. + * I'm not sure what fpinfo structure keeps clauses for sufficient time. + * So, copy clauses. + */ + dest->path_clauses = list_concat(list_copy(fpinfo->joinclauses), + list_copy(fpinfo->remote_conds)); + dest->path_clauses = list_concat(dest->path_clauses, + list_copy(fpinfo->local_conds)); + dest->path_jointype = ((JoinPath *) src)->jointype; + } else { dest->path_clauses = list_concat( From f512e561089c137115e8840a4841d8fbb9201e4e Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Tue, 15 Dec 2020 10:37:12 +0500 Subject: [PATCH 004/203] Process GatherMergePath in get_path_clauses routine. Replace find_ok assert with elog panic message. --- path_utils.c | 1 + storage.c | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/path_utils.c b/path_utils.c index 3a8a2a12..59edc35c 100644 --- a/path_utils.c +++ b/path_utils.c @@ -113,6 +113,7 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) selectivities); break; case T_GatherPath: + case T_GatherMergePath: return get_path_clauses(((GatherPath *) path)->subpath, root, selectivities); break; diff --git a/storage.c b/storage.c index 936a37c2..cd733a97 100644 --- a/storage.c +++ b/storage.c @@ -234,7 +234,9 @@ update_query(int query_hash, bool learn_aqo, bool use_aqo, slot = MakeSingleTupleTableSlot(query_index_scan->heapRelation->rd_att, &TTSOpsBufferHeapTuple); find_ok = index_getnext_slot(query_index_scan, ForwardScanDirection, slot); - Assert(find_ok); + if (!find_ok) + elog(PANIC, "Query isn't found in AQO learning database."); + tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); Assert(shouldFree != true); From 5b4b48c1a407e040d75c67d106c6d5ae5df0b938 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Fri, 15 Jan 2021 11:35:16 +0500 Subject: [PATCH 005/203] Add a foreign relation estimation hook into the core patch and AQO. Improve the elog panic message introduced in previous commit. Fix ForeignScan estimation logic. --- aqo.c | 2 + aqo.h | 2 + aqo_pg13.patch | 176 +++++++++++++++++++++++++++-------------------- postprocessing.c | 27 +++++++- storage.c | 3 +- 5 files changed, 132 insertions(+), 78 deletions(-) diff --git a/aqo.c b/aqo.c index 4f0eac87..95ef4c12 100644 --- a/aqo.c +++ b/aqo.c @@ -76,6 +76,7 @@ post_parse_analyze_hook_type prev_post_parse_analyze_hook; planner_hook_type prev_planner_hook; ExecutorStart_hook_type prev_ExecutorStart_hook; ExecutorEnd_hook_type prev_ExecutorEnd_hook; +set_baserel_rows_estimate_hook_type prev_set_foreign_rows_estimate_hook; set_baserel_rows_estimate_hook_type prev_set_baserel_rows_estimate_hook; get_parameterized_baserel_size_hook_type prev_get_parameterized_baserel_size_hook; set_joinrel_size_estimates_hook_type prev_set_joinrel_size_estimates_hook; @@ -126,6 +127,7 @@ _PG_init(void) prev_ExecutorEnd_hook = ExecutorEnd_hook; ExecutorEnd_hook = aqo_ExecutorEnd; prev_set_baserel_rows_estimate_hook = set_baserel_rows_estimate_hook; + set_foreign_rows_estimate_hook = aqo_set_baserel_rows_estimate; set_baserel_rows_estimate_hook = aqo_set_baserel_rows_estimate; prev_get_parameterized_baserel_size_hook = get_parameterized_baserel_size_hook; get_parameterized_baserel_size_hook = aqo_get_parameterized_baserel_size; diff --git a/aqo.h b/aqo.h index 080d076b..b6e934f1 100644 --- a/aqo.h +++ b/aqo.h @@ -253,6 +253,8 @@ extern post_parse_analyze_hook_type prev_post_parse_analyze_hook; extern planner_hook_type prev_planner_hook; extern ExecutorStart_hook_type prev_ExecutorStart_hook; extern ExecutorEnd_hook_type prev_ExecutorEnd_hook; +extern set_baserel_rows_estimate_hook_type + prev_set_foreign_rows_estimate_hook; extern set_baserel_rows_estimate_hook_type prev_set_baserel_rows_estimate_hook; extern get_parameterized_baserel_size_hook_type diff --git a/aqo_pg13.patch b/aqo_pg13.patch index b933ca49..6925b773 100644 --- a/aqo_pg13.patch +++ b/aqo_pg13.patch @@ -11,7 +11,7 @@ index 7a4866e338..47a18b9698 100644 auto_explain \ bloom \ diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c -index 43f9b01e83..707211308c 100644 +index 5d7eb3574c..87402b6859 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -24,6 +24,7 @@ @@ -32,7 +32,7 @@ index 43f9b01e83..707211308c 100644 /* OR-able flags for ExplainXMLTag() */ #define X_OPENING 0 -@@ -638,6 +642,10 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, +@@ -654,6 +658,10 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, ExplainPropertyFloat("Execution Time", "ms", 1000.0 * totaltime, 3, es); @@ -43,7 +43,7 @@ index 43f9b01e83..707211308c 100644 ExplainCloseGroup("Query", NULL, true, es); } -@@ -1579,6 +1587,38 @@ ExplainNode(PlanState *planstate, List *ancestors, +@@ -1595,6 +1603,38 @@ ExplainNode(PlanState *planstate, List *ancestors, appendStringInfo(es->str, " (actual rows=%.0f loops=%.0f)", rows, nloops); @@ -83,7 +83,7 @@ index 43f9b01e83..707211308c 100644 else { diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c -index 530aac68a7..1d94feadb9 100644 +index ba3ccc712c..74a090e6f9 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -126,6 +126,12 @@ CopyPlanFields(const Plan *from, Plan *newnode) @@ -100,21 +100,22 @@ index 530aac68a7..1d94feadb9 100644 COPY_BITMAPSET_FIELD(allParam); } diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c -index f1dfdc1a4a..359cafa531 100644 +index 380336518f..ecf0c45629 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c -@@ -97,6 +97,10 @@ +@@ -97,6 +97,11 @@ #include "utils/spccache.h" #include "utils/tuplesort.h" +set_baserel_rows_estimate_hook_type set_baserel_rows_estimate_hook = NULL; ++set_foreign_rows_estimate_hook_type set_foreign_rows_estimate_hook = NULL; +get_parameterized_baserel_size_hook_type get_parameterized_baserel_size_hook = NULL; +get_parameterized_joinrel_size_hook_type get_parameterized_joinrel_size_hook = NULL; +set_joinrel_size_estimates_hook_type set_joinrel_size_estimates_hook = NULL; #define LOG2(x) (log(x) / 0.693147180559945) -@@ -185,7 +189,6 @@ static Cost append_nonpartial_cost(List *subpaths, int numpaths, +@@ -185,7 +190,6 @@ static Cost append_nonpartial_cost(List *subpaths, int numpaths, static void set_rel_width(PlannerInfo *root, RelOptInfo *rel); static double relation_byte_size(double tuples, int width); static double page_size(double tuples, int width); @@ -122,7 +123,7 @@ index f1dfdc1a4a..359cafa531 100644 /* -@@ -266,7 +269,7 @@ cost_seqscan(Path *path, PlannerInfo *root, +@@ -266,7 +270,7 @@ cost_seqscan(Path *path, PlannerInfo *root, /* Adjust costing for parallelism, if used. */ if (path->parallel_workers > 0) { @@ -131,7 +132,7 @@ index f1dfdc1a4a..359cafa531 100644 /* The CPU cost is divided among all the workers. */ cpu_run_cost /= parallel_divisor; -@@ -745,7 +748,7 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count, +@@ -745,7 +749,7 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count, /* Adjust costing for parallelism, if used. */ if (path->path.parallel_workers > 0) { @@ -140,7 +141,7 @@ index f1dfdc1a4a..359cafa531 100644 path->path.rows = clamp_row_est(path->path.rows / parallel_divisor); -@@ -1026,7 +1029,7 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel, +@@ -1026,7 +1030,7 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel, /* Adjust costing for parallelism, if used. */ if (path->parallel_workers > 0) { @@ -149,7 +150,7 @@ index f1dfdc1a4a..359cafa531 100644 /* The CPU cost is divided among all the workers. */ cpu_run_cost /= parallel_divisor; -@@ -2129,7 +2132,7 @@ cost_append(AppendPath *apath) +@@ -2129,7 +2133,7 @@ cost_append(AppendPath *apath) else /* parallel-aware */ { int i = 0; @@ -158,7 +159,7 @@ index f1dfdc1a4a..359cafa531 100644 /* Parallel-aware Append never produces ordered output. */ Assert(apath->path.pathkeys == NIL); -@@ -2163,7 +2166,7 @@ cost_append(AppendPath *apath) +@@ -2163,7 +2167,7 @@ cost_append(AppendPath *apath) { double subpath_parallel_divisor; @@ -167,7 +168,7 @@ index f1dfdc1a4a..359cafa531 100644 apath->path.rows += subpath->rows * (subpath_parallel_divisor / parallel_divisor); apath->path.total_cost += subpath->total_cost; -@@ -2761,7 +2764,7 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path, +@@ -2762,7 +2766,7 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path, /* For partial paths, scale row estimate. */ if (path->path.parallel_workers > 0) { @@ -176,7 +177,7 @@ index f1dfdc1a4a..359cafa531 100644 path->path.rows = clamp_row_est(path->path.rows / parallel_divisor); -@@ -3207,7 +3210,7 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path, +@@ -3208,7 +3212,7 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path, /* For partial paths, scale row estimate. */ if (path->jpath.path.parallel_workers > 0) { @@ -185,7 +186,7 @@ index f1dfdc1a4a..359cafa531 100644 path->jpath.path.rows = clamp_row_est(path->jpath.path.rows / parallel_divisor); -@@ -3541,7 +3544,7 @@ initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace, +@@ -3542,7 +3546,7 @@ initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace, * number, so we need to undo the division. */ if (parallel_hash) @@ -194,7 +195,7 @@ index f1dfdc1a4a..359cafa531 100644 /* * Get hash table size that executor would use for inner relation. -@@ -3638,7 +3641,7 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, +@@ -3639,7 +3643,7 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, /* For partial paths, scale row estimate. */ if (path->jpath.path.parallel_workers > 0) { @@ -203,10 +204,19 @@ index f1dfdc1a4a..359cafa531 100644 path->jpath.path.rows = clamp_row_est(path->jpath.path.rows / parallel_divisor); -@@ -4633,6 +4636,49 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) +@@ -4634,6 +4638,58 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) } ++void ++set_foreign_rows_estimate(PlannerInfo *root, RelOptInfo *rel) ++{ ++ if (set_foreign_rows_estimate_hook) ++ (*set_foreign_rows_estimate_hook) (root, rel); ++ else ++ rel->rows = 1000; /* entirely bogus default estimate */ ++} ++ +/* + * set_baserel_rows_estimate + * Set the rows estimate for the given base relation. @@ -253,7 +263,7 @@ index f1dfdc1a4a..359cafa531 100644 /* * set_baserel_size_estimates * Set the size estimates for the given base relation. -@@ -4649,19 +4695,10 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) +@@ -4650,19 +4706,10 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) void set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) { @@ -274,7 +284,7 @@ index f1dfdc1a4a..359cafa531 100644 cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); -@@ -4672,13 +4709,33 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) +@@ -4673,13 +4720,33 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) * get_parameterized_baserel_size * Make a size estimate for a parameterized scan of a base relation. * @@ -310,7 +320,7 @@ index f1dfdc1a4a..359cafa531 100644 { List *allclauses; double nrows; -@@ -4707,6 +4764,36 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, +@@ -4708,6 +4775,36 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, * set_joinrel_size_estimates * Set the size estimates for the given join relation. * @@ -347,7 +357,7 @@ index f1dfdc1a4a..359cafa531 100644 * The rel's targetlist must have been constructed already, and a * restriction clause list that matches the given component rels must * be provided. -@@ -4726,11 +4813,11 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, +@@ -4727,11 +4824,11 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, * build_joinrel_tlist, and baserestrictcost is not used for join rels. */ void @@ -364,7 +374,7 @@ index f1dfdc1a4a..359cafa531 100644 { rel->rows = calc_joinrel_size_estimate(root, rel, -@@ -4746,6 +4833,35 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, +@@ -4747,6 +4844,35 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, * get_parameterized_joinrel_size * Make a size estimate for a parameterized scan of a join relation. * @@ -400,7 +410,7 @@ index f1dfdc1a4a..359cafa531 100644 * 'rel' is the joinrel under consideration. * 'outer_path', 'inner_path' are (probably also parameterized) Paths that * produce the relations being joined. -@@ -4758,11 +4874,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, +@@ -4759,11 +4885,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, * set_joinrel_size_estimates must have been applied already. */ double @@ -417,7 +427,16 @@ index f1dfdc1a4a..359cafa531 100644 { double nrows; -@@ -5760,14 +5876,25 @@ page_size(double tuples, int width) +@@ -5479,7 +5605,7 @@ set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel) + /* Should only be applied to base relations */ + Assert(rel->relid > 0); + +- rel->rows = 1000; /* entirely bogus default estimate */ ++ set_foreign_rows_estimate(root, rel); + + cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); + +@@ -5761,14 +5887,25 @@ page_size(double tuples, int width) return ceil(relation_byte_size(tuples, width) / BLCKSZ); } @@ -446,7 +465,7 @@ index f1dfdc1a4a..359cafa531 100644 /* * Early experience with parallel query suggests that when there is only -@@ -5784,7 +5911,7 @@ get_parallel_divisor(Path *path) +@@ -5785,7 +5922,7 @@ get_parallel_divisor(Path *path) { double leader_contribution; @@ -456,7 +475,7 @@ index f1dfdc1a4a..359cafa531 100644 parallel_divisor += leader_contribution; } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c -index 40abe6f9f6..9edd6daeff 100644 +index 25d4750ca6..d0ea7bd2ff 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -70,6 +70,8 @@ @@ -558,7 +577,7 @@ index 40abe6f9f6..9edd6daeff 100644 /* Assign the rescan Param. */ gm_plan->rescan_param = assign_special_exec_param(root); -@@ -1901,7 +1903,7 @@ create_projection_plan(PlannerInfo *root, ProjectionPath *best_path, int flags) +@@ -1903,7 +1905,7 @@ create_projection_plan(PlannerInfo *root, ProjectionPath *best_path, int flags) /* We need a Result node */ plan = (Plan *) make_result(tlist, NULL, subplan); @@ -567,7 +586,7 @@ index 40abe6f9f6..9edd6daeff 100644 } return plan; -@@ -2002,7 +2004,7 @@ create_sort_plan(PlannerInfo *root, SortPath *best_path, int flags) +@@ -2004,7 +2006,7 @@ create_sort_plan(PlannerInfo *root, SortPath *best_path, int flags) IS_OTHER_REL(best_path->subpath->parent) ? best_path->path.parent->relids : NULL); @@ -576,7 +595,7 @@ index 40abe6f9f6..9edd6daeff 100644 return plan; } -@@ -2028,7 +2030,7 @@ create_incrementalsort_plan(PlannerInfo *root, IncrementalSortPath *best_path, +@@ -2030,7 +2032,7 @@ create_incrementalsort_plan(PlannerInfo *root, IncrementalSortPath *best_path, best_path->spath.path.parent->relids : NULL, best_path->nPresortedCols); @@ -585,7 +604,7 @@ index 40abe6f9f6..9edd6daeff 100644 return plan; } -@@ -2067,7 +2069,7 @@ create_group_plan(PlannerInfo *root, GroupPath *best_path) +@@ -2069,7 +2071,7 @@ create_group_plan(PlannerInfo *root, GroupPath *best_path) subplan->targetlist), subplan); @@ -594,7 +613,7 @@ index 40abe6f9f6..9edd6daeff 100644 return plan; } -@@ -2095,7 +2097,7 @@ create_upper_unique_plan(PlannerInfo *root, UpperUniquePath *best_path, int flag +@@ -2097,7 +2099,7 @@ create_upper_unique_plan(PlannerInfo *root, UpperUniquePath *best_path, int flag best_path->path.pathkeys, best_path->numkeys); @@ -603,7 +622,7 @@ index 40abe6f9f6..9edd6daeff 100644 return plan; } -@@ -2139,7 +2141,7 @@ create_agg_plan(PlannerInfo *root, AggPath *best_path) +@@ -2141,7 +2143,7 @@ create_agg_plan(PlannerInfo *root, AggPath *best_path) best_path->transitionSpace, subplan); @@ -612,7 +631,7 @@ index 40abe6f9f6..9edd6daeff 100644 return plan; } -@@ -2341,7 +2343,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) +@@ -2343,7 +2345,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) subplan); /* Copy cost data from Path to Plan */ @@ -621,7 +640,7 @@ index 40abe6f9f6..9edd6daeff 100644 } return (Plan *) plan; -@@ -2399,7 +2401,7 @@ create_minmaxagg_plan(PlannerInfo *root, MinMaxAggPath *best_path) +@@ -2401,7 +2403,7 @@ create_minmaxagg_plan(PlannerInfo *root, MinMaxAggPath *best_path) plan = make_result(tlist, (Node *) best_path->quals, NULL); @@ -630,7 +649,7 @@ index 40abe6f9f6..9edd6daeff 100644 /* * During setrefs.c, we'll need to replace references to the Agg nodes -@@ -2518,7 +2520,7 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) +@@ -2520,7 +2522,7 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) wc->inRangeNullsFirst, subplan); @@ -639,7 +658,7 @@ index 40abe6f9f6..9edd6daeff 100644 return plan; } -@@ -2554,7 +2556,7 @@ create_setop_plan(PlannerInfo *root, SetOpPath *best_path, int flags) +@@ -2556,7 +2558,7 @@ create_setop_plan(PlannerInfo *root, SetOpPath *best_path, int flags) best_path->firstFlag, numGroups); @@ -648,7 +667,7 @@ index 40abe6f9f6..9edd6daeff 100644 return plan; } -@@ -2590,7 +2592,7 @@ create_recursiveunion_plan(PlannerInfo *root, RecursiveUnionPath *best_path) +@@ -2592,7 +2594,7 @@ create_recursiveunion_plan(PlannerInfo *root, RecursiveUnionPath *best_path) best_path->distinctList, numGroups); @@ -657,7 +676,7 @@ index 40abe6f9f6..9edd6daeff 100644 return plan; } -@@ -2613,7 +2615,7 @@ create_lockrows_plan(PlannerInfo *root, LockRowsPath *best_path, +@@ -2615,7 +2617,7 @@ create_lockrows_plan(PlannerInfo *root, LockRowsPath *best_path, plan = make_lockrows(subplan, best_path->rowMarks, best_path->epqParam); @@ -666,7 +685,7 @@ index 40abe6f9f6..9edd6daeff 100644 return plan; } -@@ -2674,7 +2676,7 @@ create_modifytable_plan(PlannerInfo *root, ModifyTablePath *best_path) +@@ -2676,7 +2678,7 @@ create_modifytable_plan(PlannerInfo *root, ModifyTablePath *best_path) best_path->onconflict, best_path->epqParam); @@ -675,7 +694,7 @@ index 40abe6f9f6..9edd6daeff 100644 return plan; } -@@ -2728,7 +2730,7 @@ create_limit_plan(PlannerInfo *root, LimitPath *best_path, int flags) +@@ -2730,7 +2732,7 @@ create_limit_plan(PlannerInfo *root, LimitPath *best_path, int flags) best_path->limitOption, numUniqkeys, uniqColIdx, uniqOperators, uniqCollations); @@ -684,7 +703,7 @@ index 40abe6f9f6..9edd6daeff 100644 return plan; } -@@ -2774,7 +2776,7 @@ create_seqscan_plan(PlannerInfo *root, Path *best_path, +@@ -2776,7 +2778,7 @@ create_seqscan_plan(PlannerInfo *root, Path *best_path, scan_clauses, scan_relid); @@ -693,7 +712,7 @@ index 40abe6f9f6..9edd6daeff 100644 return scan_plan; } -@@ -2820,7 +2822,7 @@ create_samplescan_plan(PlannerInfo *root, Path *best_path, +@@ -2822,7 +2824,7 @@ create_samplescan_plan(PlannerInfo *root, Path *best_path, scan_relid, tsc); @@ -702,7 +721,7 @@ index 40abe6f9f6..9edd6daeff 100644 return scan_plan; } -@@ -2998,7 +3000,7 @@ create_indexscan_plan(PlannerInfo *root, +@@ -3000,7 +3002,7 @@ create_indexscan_plan(PlannerInfo *root, indexorderbyops, best_path->indexscandir); @@ -711,7 +730,7 @@ index 40abe6f9f6..9edd6daeff 100644 return scan_plan; } -@@ -3113,7 +3115,7 @@ create_bitmap_scan_plan(PlannerInfo *root, +@@ -3115,7 +3117,7 @@ create_bitmap_scan_plan(PlannerInfo *root, bitmapqualorig, baserelid); @@ -720,7 +739,7 @@ index 40abe6f9f6..9edd6daeff 100644 return scan_plan; } -@@ -3433,7 +3435,7 @@ create_tidscan_plan(PlannerInfo *root, TidPath *best_path, +@@ -3435,7 +3437,7 @@ create_tidscan_plan(PlannerInfo *root, TidPath *best_path, scan_relid, tidquals); @@ -729,7 +748,7 @@ index 40abe6f9f6..9edd6daeff 100644 return scan_plan; } -@@ -3483,7 +3485,7 @@ create_subqueryscan_plan(PlannerInfo *root, SubqueryScanPath *best_path, +@@ -3485,7 +3487,7 @@ create_subqueryscan_plan(PlannerInfo *root, SubqueryScanPath *best_path, scan_relid, subplan); @@ -738,7 +757,7 @@ index 40abe6f9f6..9edd6daeff 100644 return scan_plan; } -@@ -3526,7 +3528,7 @@ create_functionscan_plan(PlannerInfo *root, Path *best_path, +@@ -3528,7 +3530,7 @@ create_functionscan_plan(PlannerInfo *root, Path *best_path, scan_plan = make_functionscan(tlist, scan_clauses, scan_relid, functions, rte->funcordinality); @@ -747,7 +766,7 @@ index 40abe6f9f6..9edd6daeff 100644 return scan_plan; } -@@ -3569,7 +3571,7 @@ create_tablefuncscan_plan(PlannerInfo *root, Path *best_path, +@@ -3571,7 +3573,7 @@ create_tablefuncscan_plan(PlannerInfo *root, Path *best_path, scan_plan = make_tablefuncscan(tlist, scan_clauses, scan_relid, tablefunc); @@ -756,7 +775,7 @@ index 40abe6f9f6..9edd6daeff 100644 return scan_plan; } -@@ -3613,7 +3615,7 @@ create_valuesscan_plan(PlannerInfo *root, Path *best_path, +@@ -3615,7 +3617,7 @@ create_valuesscan_plan(PlannerInfo *root, Path *best_path, scan_plan = make_valuesscan(tlist, scan_clauses, scan_relid, values_lists); @@ -765,7 +784,7 @@ index 40abe6f9f6..9edd6daeff 100644 return scan_plan; } -@@ -3706,7 +3708,7 @@ create_ctescan_plan(PlannerInfo *root, Path *best_path, +@@ -3708,7 +3710,7 @@ create_ctescan_plan(PlannerInfo *root, Path *best_path, scan_plan = make_ctescan(tlist, scan_clauses, scan_relid, plan_id, cte_param_id); @@ -774,7 +793,7 @@ index 40abe6f9f6..9edd6daeff 100644 return scan_plan; } -@@ -3745,7 +3747,7 @@ create_namedtuplestorescan_plan(PlannerInfo *root, Path *best_path, +@@ -3747,7 +3749,7 @@ create_namedtuplestorescan_plan(PlannerInfo *root, Path *best_path, scan_plan = make_namedtuplestorescan(tlist, scan_clauses, scan_relid, rte->enrname); @@ -783,7 +802,7 @@ index 40abe6f9f6..9edd6daeff 100644 return scan_plan; } -@@ -3783,7 +3785,7 @@ create_resultscan_plan(PlannerInfo *root, Path *best_path, +@@ -3785,7 +3787,7 @@ create_resultscan_plan(PlannerInfo *root, Path *best_path, scan_plan = make_result(tlist, (Node *) scan_clauses, NULL); @@ -792,7 +811,7 @@ index 40abe6f9f6..9edd6daeff 100644 return scan_plan; } -@@ -3843,7 +3845,7 @@ create_worktablescan_plan(PlannerInfo *root, Path *best_path, +@@ -3845,7 +3847,7 @@ create_worktablescan_plan(PlannerInfo *root, Path *best_path, scan_plan = make_worktablescan(tlist, scan_clauses, scan_relid, cteroot->wt_param_id); @@ -801,7 +820,7 @@ index 40abe6f9f6..9edd6daeff 100644 return scan_plan; } -@@ -3903,7 +3905,7 @@ create_foreignscan_plan(PlannerInfo *root, ForeignPath *best_path, +@@ -3905,7 +3907,7 @@ create_foreignscan_plan(PlannerInfo *root, ForeignPath *best_path, outer_plan); /* Copy cost data from Path to Plan; no need to make FDW do this */ @@ -810,7 +829,7 @@ index 40abe6f9f6..9edd6daeff 100644 /* Copy foreign server OID; likewise, no need to make FDW do this */ scan_plan->fs_server = rel->serverid; -@@ -4037,7 +4039,7 @@ create_customscan_plan(PlannerInfo *root, CustomPath *best_path, +@@ -4039,7 +4041,7 @@ create_customscan_plan(PlannerInfo *root, CustomPath *best_path, * Copy cost data from Path to Plan; no need to make custom-plan providers * do this */ @@ -819,7 +838,7 @@ index 40abe6f9f6..9edd6daeff 100644 /* Likewise, copy the relids that are represented by this custom scan */ cplan->custom_relids = best_path->path.parent->relids; -@@ -4139,7 +4141,7 @@ create_nestloop_plan(PlannerInfo *root, +@@ -4141,7 +4143,7 @@ create_nestloop_plan(PlannerInfo *root, best_path->jointype, best_path->inner_unique); @@ -828,7 +847,7 @@ index 40abe6f9f6..9edd6daeff 100644 return join_plan; } -@@ -4446,7 +4448,7 @@ create_mergejoin_plan(PlannerInfo *root, +@@ -4448,7 +4450,7 @@ create_mergejoin_plan(PlannerInfo *root, best_path->skip_mark_restore); /* Costs of sort and material steps are included in path cost already */ @@ -837,7 +856,7 @@ index 40abe6f9f6..9edd6daeff 100644 return join_plan; } -@@ -4619,7 +4621,7 @@ create_hashjoin_plan(PlannerInfo *root, +@@ -4621,7 +4623,7 @@ create_hashjoin_plan(PlannerInfo *root, best_path->jpath.jointype, best_path->jpath.inner_unique); @@ -846,7 +865,7 @@ index 40abe6f9f6..9edd6daeff 100644 return join_plan; } -@@ -5119,7 +5121,7 @@ order_qual_clauses(PlannerInfo *root, List *clauses) +@@ -5121,7 +5123,7 @@ order_qual_clauses(PlannerInfo *root, List *clauses) * Also copy the parallel-related flags, which the executor *will* use. */ static void @@ -855,7 +874,7 @@ index 40abe6f9f6..9edd6daeff 100644 { dest->startup_cost = src->startup_cost; dest->total_cost = src->total_cost; -@@ -5127,6 +5129,9 @@ copy_generic_path_info(Plan *dest, Path *src) +@@ -5129,6 +5131,9 @@ copy_generic_path_info(Plan *dest, Path *src) dest->plan_width = src->pathtarget->width; dest->parallel_aware = src->parallel_aware; dest->parallel_safe = src->parallel_safe; @@ -866,10 +885,10 @@ index 40abe6f9f6..9edd6daeff 100644 /* diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c -index 76245c1ff3..cac6adf35e 100644 +index 731ff708b9..e862e2a974 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c -@@ -1261,6 +1261,7 @@ find_childrel_parents(PlannerInfo *root, RelOptInfo *rel) +@@ -1260,6 +1260,7 @@ find_childrel_parents(PlannerInfo *root, RelOptInfo *rel) } @@ -877,7 +896,7 @@ index 76245c1ff3..cac6adf35e 100644 /* * get_baserel_parampathinfo * Get the ParamPathInfo for a parameterized path for a base relation, -@@ -1329,6 +1330,10 @@ get_baserel_parampathinfo(PlannerInfo *root, RelOptInfo *baserel, +@@ -1328,6 +1329,10 @@ get_baserel_parampathinfo(PlannerInfo *root, RelOptInfo *baserel, ppi->ppi_req_outer = required_outer; ppi->ppi_rows = rows; ppi->ppi_clauses = pclauses; @@ -888,7 +907,7 @@ index 76245c1ff3..cac6adf35e 100644 baserel->ppilist = lappend(baserel->ppilist, ppi); return ppi; -@@ -1554,6 +1559,10 @@ get_joinrel_parampathinfo(PlannerInfo *root, RelOptInfo *joinrel, +@@ -1553,6 +1558,10 @@ get_joinrel_parampathinfo(PlannerInfo *root, RelOptInfo *joinrel, ppi->ppi_req_outer = required_outer; ppi->ppi_rows = rows; ppi->ppi_clauses = NIL; @@ -900,7 +919,7 @@ index 76245c1ff3..cac6adf35e 100644 return ppi; diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h -index ba661d32a6..3c2595d639 100644 +index e94d9e49cf..4404155fbd 100644 --- a/src/include/commands/explain.h +++ b/src/include/commands/explain.h @@ -75,6 +75,12 @@ extern PGDLLIMPORT ExplainOneQuery_hook_type ExplainOneQuery_hook; @@ -917,10 +936,10 @@ index ba661d32a6..3c2595d639 100644 extern void ExplainQuery(ParseState *pstate, ExplainStmt *stmt, ParamListInfo params, DestReceiver *dest); diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h -index 8f62d61702..cfcd2c249d 100644 +index cde2637798..74ffaa9c8a 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h -@@ -734,6 +734,10 @@ typedef struct RelOptInfo +@@ -739,6 +739,10 @@ typedef struct RelOptInfo Relids top_parent_relids; /* Relids of topmost parents (if "other" * rel) */ @@ -931,7 +950,7 @@ index 8f62d61702..cfcd2c249d 100644 /* used for partitioned relations: */ PartitionScheme part_scheme; /* Partitioning scheme */ int nparts; /* Number of partitions; -1 if not yet set; in -@@ -1101,6 +1105,10 @@ typedef struct ParamPathInfo +@@ -1107,6 +1111,10 @@ typedef struct ParamPathInfo Relids ppi_req_outer; /* rels supplying parameters used by path */ double ppi_rows; /* estimated number of result tuples */ List *ppi_clauses; /* join clauses available from outer rels */ @@ -943,7 +962,7 @@ index 8f62d61702..cfcd2c249d 100644 diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h -index 7e6b10f86b..148720a566 100644 +index 43160439f0..86988ca32d 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -140,6 +140,19 @@ typedef struct Plan @@ -967,18 +986,22 @@ index 7e6b10f86b..148720a566 100644 * Information for management of parameter-change-driven rescanning * diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h -index 6141654e47..0915da8618 100644 +index ed2e4af4be..7e3cbcca14 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h -@@ -39,6 +39,33 @@ typedef enum +@@ -39,6 +39,37 @@ typedef enum } ConstraintExclusionType; +/* Hook for plugins to get control of cardinality estimation */ +typedef void (*set_baserel_rows_estimate_hook_type) (PlannerInfo *root, + RelOptInfo *rel); ++typedef void (*set_foreign_rows_estimate_hook_type) (PlannerInfo *root, ++ RelOptInfo *rel); +extern PGDLLIMPORT set_baserel_rows_estimate_hook_type + set_baserel_rows_estimate_hook; ++extern PGDLLIMPORT set_foreign_rows_estimate_hook_type ++ set_foreign_rows_estimate_hook; +typedef double (*get_parameterized_baserel_size_hook_type) (PlannerInfo *root, + RelOptInfo *rel, + List *param_clauses); @@ -1004,10 +1027,11 @@ index 6141654e47..0915da8618 100644 /* * prototypes for costsize.c * routines to compute costs and sizes -@@ -175,10 +202,21 @@ extern void compute_semi_anti_join_factors(PlannerInfo *root, +@@ -175,10 +206,22 @@ extern void compute_semi_anti_join_factors(PlannerInfo *root, SpecialJoinInfo *sjinfo, List *restrictlist, SemiAntiJoinFactors *semifactors); ++extern void set_foreign_rows_estimate(PlannerInfo *root, RelOptInfo *rel); +extern void set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel); +extern void set_baserel_rows_estimate_standard(PlannerInfo *root, RelOptInfo *rel); extern void set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel); @@ -1026,7 +1050,7 @@ index 6141654e47..0915da8618 100644 extern double get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, Path *outer_path, -@@ -190,6 +228,11 @@ extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, +@@ -190,6 +233,11 @@ extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, RelOptInfo *inner_rel, SpecialJoinInfo *sjinfo, List *restrictlist); @@ -1038,7 +1062,7 @@ index 6141654e47..0915da8618 100644 extern void set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel); extern void set_function_size_estimates(PlannerInfo *root, RelOptInfo *rel); extern void set_values_size_estimates(PlannerInfo *root, RelOptInfo *rel); -@@ -202,5 +245,7 @@ extern void set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel); +@@ -202,5 +250,7 @@ extern void set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel); extern PathTarget *set_pathtarget_cost_width(PlannerInfo *root, PathTarget *target); extern double compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual, int loop_count, Cost *cost, double *tuple); @@ -1047,7 +1071,7 @@ index 6141654e47..0915da8618 100644 #endif /* COST_H */ diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h -index 715a24ad29..7311ba92f4 100644 +index 23dec14cbd..58489cb620 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -18,6 +18,10 @@ @@ -1062,7 +1086,7 @@ index 715a24ad29..7311ba92f4 100644 * prototypes for pathnode.c */ diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h -index 81c4a7e560..59daf7fb81 100644 +index 777655210b..dac8231291 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -24,6 +24,12 @@ extern double cursor_tuple_fraction; diff --git a/postprocessing.c b/postprocessing.c index 8b385a75..b74701e7 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -581,11 +581,35 @@ aqo_copy_generic_path_info(PlannerInfo *root, Plan *dest, Path *src) * I'm not sure what fpinfo structure keeps clauses for sufficient time. * So, copy clauses. */ + dest->path_clauses = list_concat(list_copy(fpinfo->joinclauses), list_copy(fpinfo->remote_conds)); dest->path_clauses = list_concat(dest->path_clauses, list_copy(fpinfo->local_conds)); + dest->path_jointype = ((JoinPath *) src)->jointype; + + dest->path_relids = get_list_of_relids(root, fpinfo->lower_subquery_rels); + + if (fpinfo->outerrel) + { + dest->path_clauses = list_concat(dest->path_clauses, + list_copy(fpinfo->outerrel->baserestrictinfo)); + dest->path_clauses = list_concat(dest->path_clauses, + list_copy(fpinfo->outerrel->joininfo)); + dest->path_relids = list_concat(dest->path_relids, + get_list_of_relids(root, fpinfo->outerrel->relids)); + } + + if (fpinfo->innerrel) + { + dest->path_clauses = list_concat(dest->path_clauses, + list_copy(fpinfo->innerrel->baserestrictinfo)); + dest->path_clauses = list_concat(dest->path_clauses, + list_copy(fpinfo->innerrel->joininfo)); + dest->path_relids = list_concat(dest->path_relids, + get_list_of_relids(root, fpinfo->innerrel->relids)); + } } else { @@ -595,7 +619,8 @@ aqo_copy_generic_path_info(PlannerInfo *root, Plan *dest, Path *src) dest->path_jointype = JOIN_INNER; } - dest->path_relids = get_list_of_relids(root, src->parent->relids); + dest->path_relids = list_concat(dest->path_relids, + get_list_of_relids(root, src->parent->relids)); dest->path_parallel_workers = src->parallel_workers; dest->was_parametrized = (src->param_info != NULL); diff --git a/storage.c b/storage.c index cd733a97..af468f30 100644 --- a/storage.c +++ b/storage.c @@ -235,7 +235,8 @@ update_query(int query_hash, bool learn_aqo, bool use_aqo, &TTSOpsBufferHeapTuple); find_ok = index_getnext_slot(query_index_scan, ForwardScanDirection, slot); if (!find_ok) - elog(PANIC, "Query isn't found in AQO learning database."); + elog(PANIC, "[AQO]: Update of non-existed query: query hash: %d, fss hash: %d, use aqo: %s", + query_hash, fspace_hash, use_aqo ? "true" : "false"); tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); Assert(shouldFree != true); From f6396e2cb7be7577074d4ec491e954be35cce7c6 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Fri, 15 Jan 2021 17:45:17 +0500 Subject: [PATCH 006/203] Add basic regression test on FDW support. Switch from AQO_EXPLAIN define to the two user GUCS: aqo.details and aqo.show_hash. New ExplainOneNode hook allows to add info into explain of each node. --- Makefile | 2 + aqo.c | 33 ++++++- aqo.h | 6 ++ aqo_pg13.patch | 209 ++++++++++++++++++++----------------------- conf.add | 4 +- expected/aqo_fdw.out | 74 +++++++++++++++ postprocessing.c | 126 +++++++++++++++++--------- 7 files changed, 295 insertions(+), 159 deletions(-) create mode 100644 expected/aqo_fdw.out diff --git a/Makefile b/Makefile index 1b1575c0..54aa96a5 100644 --- a/Makefile +++ b/Makefile @@ -14,11 +14,13 @@ REGRESS = aqo_disabled \ aqo_forced \ aqo_learn \ schema \ + aqo_fdw \ aqo_CVE-2020-14350 fdw_srcdir = $(top_srcdir)/contrib/postgres_fdw PG_CPPFLAGS += -I$(libpq_srcdir) -I$(fdw_srcdir) EXTRA_REGRESS_OPTS=--temp-config=$(top_srcdir)/$(subdir)/conf.add +EXTRA_INSTALL = contrib/postgres_fdw DATA = aqo--1.0.sql aqo--1.0--1.1.sql aqo--1.1--1.2.sql aqo--1.2.sql diff --git a/aqo.c b/aqo.c index 95ef4c12..84c5d39f 100644 --- a/aqo.c +++ b/aqo.c @@ -18,6 +18,8 @@ void _PG_init(void); /* Strategy of determining feature space for new queries. */ int aqo_mode; bool force_collect_stat; +bool aqo_show_hash; +bool aqo_details; /* GUC variables */ static const struct config_enum_entry format_options[] = { @@ -83,6 +85,7 @@ set_joinrel_size_estimates_hook_type prev_set_joinrel_size_estimates_hook; get_parameterized_joinrel_size_hook_type prev_get_parameterized_joinrel_size_hook; copy_generic_path_info_hook_type prev_copy_generic_path_info_hook; ExplainOnePlan_hook_type prev_ExplainOnePlan_hook; +ExplainOneNode_hook_type prev_ExplainOneNode_hook; /***************************************************************************** * @@ -116,7 +119,33 @@ _PG_init(void) NULL, NULL, NULL - ); + ); + + DefineCustomBoolVariable( + "aqo.show_hash", + "Show query and node hash on explain.", + "Hash value depend on each instance and is not good to enable it in regression or TAP tests.", + &aqo_show_hash, + false, + PGC_USERSET, + 0, + NULL, + NULL, + NULL + ); + + DefineCustomBoolVariable( + "aqo.details", + "Show AQO state on a query.", + NULL, + &aqo_details, + false, + PGC_USERSET, + 0, + NULL, + NULL, + NULL + ); prev_planner_hook = planner_hook; planner_hook = aqo_planner; @@ -139,6 +168,8 @@ _PG_init(void) copy_generic_path_info_hook = aqo_copy_generic_path_info; prev_ExplainOnePlan_hook = ExplainOnePlan_hook; ExplainOnePlan_hook = print_into_explain; + prev_ExplainOneNode_hook = ExplainOneNode_hook; + ExplainOneNode_hook = print_node_explain; parampathinfo_postinit_hook = ppi_hook; init_deactivated_queries_storage(); diff --git a/aqo.h b/aqo.h index b6e934f1..0381ba3c 100644 --- a/aqo.h +++ b/aqo.h @@ -174,6 +174,8 @@ typedef enum extern int aqo_mode; extern bool force_collect_stat; +extern bool aqo_show_hash; +extern bool aqo_details; /* * It is mostly needed for auto tuning of query. with auto tuning mode aqo @@ -312,6 +314,10 @@ void print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, const char *queryString, ParamListInfo params, const instr_time *planduration, QueryEnvironment *queryEnv); +extern void print_node_explain(ExplainState *es, + PlanState *ps, + Plan *plan, + double rows); void disable_aqo_for_query(void); /* Cardinality estimation hooks */ diff --git a/aqo_pg13.patch b/aqo_pg13.patch index 6925b773..1c30cadc 100644 --- a/aqo_pg13.patch +++ b/aqo_pg13.patch @@ -1,5 +1,5 @@ diff --git a/contrib/Makefile b/contrib/Makefile -index 7a4866e338..47a18b9698 100644 +index 1846d415b6..95519ac11d 100644 --- a/contrib/Makefile +++ b/contrib/Makefile @@ -7,6 +7,7 @@ include $(top_builddir)/src/Makefile.global @@ -11,7 +11,7 @@ index 7a4866e338..47a18b9698 100644 auto_explain \ bloom \ diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c -index 5d7eb3574c..87402b6859 100644 +index 0ad49612d2..7c0b82bde7 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -24,6 +24,7 @@ @@ -22,17 +22,20 @@ index 5d7eb3574c..87402b6859 100644 #include "parser/parsetree.h" #include "rewrite/rewriteHandler.h" #include "storage/bufmgr.h" -@@ -46,6 +47,9 @@ ExplainOneQuery_hook_type ExplainOneQuery_hook = NULL; +@@ -46,6 +47,12 @@ ExplainOneQuery_hook_type ExplainOneQuery_hook = NULL; /* Hook for plugins to get control in explain_get_index_name() */ explain_get_index_name_hook_type explain_get_index_name_hook = NULL; +/* Hook for plugins to get control in ExplainOnePlan() */ +ExplainOnePlan_hook_type ExplainOnePlan_hook = NULL; ++ ++/* Hook for plugins to get control in ExplainOnePlan() */ ++ExplainOneNode_hook_type ExplainOneNode_hook = NULL; + /* OR-able flags for ExplainXMLTag() */ #define X_OPENING 0 -@@ -654,6 +658,10 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, +@@ -638,6 +645,10 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, ExplainPropertyFloat("Execution Time", "ms", 1000.0 * totaltime, 3, es); @@ -43,50 +46,21 @@ index 5d7eb3574c..87402b6859 100644 ExplainCloseGroup("Query", NULL, true, es); } -@@ -1595,6 +1603,38 @@ ExplainNode(PlanState *planstate, List *ancestors, +@@ -1582,6 +1593,9 @@ ExplainNode(PlanState *planstate, List *ancestors, appendStringInfo(es->str, " (actual rows=%.0f loops=%.0f)", rows, nloops); + -+#ifdef AQO_EXPLAIN -+ if (es->verbose && plan && planstate->instrument) -+ { -+ int wrkrs = 1; -+ double error = -1.; -+ -+ if (planstate->worker_instrument && IsParallelTuplesProcessing(plan)) -+ { -+ int i; -+ for (i = 0; i < planstate->worker_instrument->num_workers; i++) -+ { -+ Instrumentation *instrument = &planstate->worker_instrument->instrument[i]; -+ if (instrument->nloops <= 0) -+ continue; -+ wrkrs++; -+ } -+ } -+ -+ if (plan->predicted_cardinality > 0.) -+ { -+ error = 100. * (plan->predicted_cardinality - (rows*wrkrs)) -+ / plan->predicted_cardinality; -+ appendStringInfo(es->str, -+ " (AQO: cardinality=%.0lf, error=%.0lf%%, fsspace_hash=%d)", -+ plan->predicted_cardinality, error, plan->fss_hash); -+ } -+ else -+ appendStringInfo(es->str, " (AQO not used, fsspace_hash=%d)", -+ plan->fss_hash); -+ } -+#endif ++ if (ExplainOneNode_hook) ++ ExplainOneNode_hook(es, planstate, plan, rows); } else { diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c -index ba3ccc712c..74a090e6f9 100644 +index 256ab54003..cfdc0247ec 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c -@@ -126,6 +126,12 @@ CopyPlanFields(const Plan *from, Plan *newnode) +@@ -127,6 +127,12 @@ CopyPlanFields(const Plan *from, Plan *newnode) COPY_NODE_FIELD(lefttree); COPY_NODE_FIELD(righttree); COPY_NODE_FIELD(initPlan); @@ -100,7 +74,7 @@ index ba3ccc712c..74a090e6f9 100644 COPY_BITMAPSET_FIELD(allParam); } diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c -index 380336518f..ecf0c45629 100644 +index ef7e8281cc..93d24b905a 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -97,6 +97,11 @@ @@ -115,7 +89,7 @@ index 380336518f..ecf0c45629 100644 #define LOG2(x) (log(x) / 0.693147180559945) -@@ -185,7 +190,6 @@ static Cost append_nonpartial_cost(List *subpaths, int numpaths, +@@ -178,7 +183,6 @@ static Cost append_nonpartial_cost(List *subpaths, int numpaths, static void set_rel_width(PlannerInfo *root, RelOptInfo *rel); static double relation_byte_size(double tuples, int width); static double page_size(double tuples, int width); @@ -123,7 +97,7 @@ index 380336518f..ecf0c45629 100644 /* -@@ -266,7 +270,7 @@ cost_seqscan(Path *path, PlannerInfo *root, +@@ -256,7 +260,7 @@ cost_seqscan(Path *path, PlannerInfo *root, /* Adjust costing for parallelism, if used. */ if (path->parallel_workers > 0) { @@ -132,7 +106,7 @@ index 380336518f..ecf0c45629 100644 /* The CPU cost is divided among all the workers. */ cpu_run_cost /= parallel_divisor; -@@ -745,7 +749,7 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count, +@@ -735,7 +739,7 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count, /* Adjust costing for parallelism, if used. */ if (path->path.parallel_workers > 0) { @@ -141,7 +115,7 @@ index 380336518f..ecf0c45629 100644 path->path.rows = clamp_row_est(path->path.rows / parallel_divisor); -@@ -1026,7 +1030,7 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel, +@@ -1016,7 +1020,7 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel, /* Adjust costing for parallelism, if used. */ if (path->parallel_workers > 0) { @@ -150,7 +124,7 @@ index 380336518f..ecf0c45629 100644 /* The CPU cost is divided among all the workers. */ cpu_run_cost /= parallel_divisor; -@@ -2129,7 +2133,7 @@ cost_append(AppendPath *apath) +@@ -2119,7 +2123,7 @@ cost_append(AppendPath *apath) else /* parallel-aware */ { int i = 0; @@ -159,7 +133,7 @@ index 380336518f..ecf0c45629 100644 /* Parallel-aware Append never produces ordered output. */ Assert(apath->path.pathkeys == NIL); -@@ -2163,7 +2167,7 @@ cost_append(AppendPath *apath) +@@ -2153,7 +2157,7 @@ cost_append(AppendPath *apath) { double subpath_parallel_divisor; @@ -168,7 +142,7 @@ index 380336518f..ecf0c45629 100644 apath->path.rows += subpath->rows * (subpath_parallel_divisor / parallel_divisor); apath->path.total_cost += subpath->total_cost; -@@ -2762,7 +2766,7 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path, +@@ -2752,7 +2756,7 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path, /* For partial paths, scale row estimate. */ if (path->path.parallel_workers > 0) { @@ -177,7 +151,7 @@ index 380336518f..ecf0c45629 100644 path->path.rows = clamp_row_est(path->path.rows / parallel_divisor); -@@ -3208,7 +3212,7 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path, +@@ -3200,7 +3204,7 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path, /* For partial paths, scale row estimate. */ if (path->jpath.path.parallel_workers > 0) { @@ -186,7 +160,7 @@ index 380336518f..ecf0c45629 100644 path->jpath.path.rows = clamp_row_est(path->jpath.path.rows / parallel_divisor); -@@ -3542,7 +3546,7 @@ initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace, +@@ -3534,7 +3538,7 @@ initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace, * number, so we need to undo the division. */ if (parallel_hash) @@ -195,7 +169,7 @@ index 380336518f..ecf0c45629 100644 /* * Get hash table size that executor would use for inner relation. -@@ -3639,7 +3643,7 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, +@@ -3631,7 +3635,7 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, /* For partial paths, scale row estimate. */ if (path->jpath.path.parallel_workers > 0) { @@ -204,7 +178,7 @@ index 380336518f..ecf0c45629 100644 path->jpath.path.rows = clamp_row_est(path->jpath.path.rows / parallel_divisor); -@@ -4634,6 +4638,58 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) +@@ -4626,6 +4630,58 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) } @@ -263,7 +237,7 @@ index 380336518f..ecf0c45629 100644 /* * set_baserel_size_estimates * Set the size estimates for the given base relation. -@@ -4650,19 +4706,10 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) +@@ -4642,19 +4698,10 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) void set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) { @@ -284,7 +258,7 @@ index 380336518f..ecf0c45629 100644 cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); -@@ -4673,13 +4720,33 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) +@@ -4665,13 +4712,33 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) * get_parameterized_baserel_size * Make a size estimate for a parameterized scan of a base relation. * @@ -320,7 +294,7 @@ index 380336518f..ecf0c45629 100644 { List *allclauses; double nrows; -@@ -4708,6 +4775,36 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, +@@ -4700,6 +4767,36 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, * set_joinrel_size_estimates * Set the size estimates for the given join relation. * @@ -357,7 +331,7 @@ index 380336518f..ecf0c45629 100644 * The rel's targetlist must have been constructed already, and a * restriction clause list that matches the given component rels must * be provided. -@@ -4727,11 +4824,11 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, +@@ -4719,11 +4816,11 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, * build_joinrel_tlist, and baserestrictcost is not used for join rels. */ void @@ -374,7 +348,7 @@ index 380336518f..ecf0c45629 100644 { rel->rows = calc_joinrel_size_estimate(root, rel, -@@ -4747,6 +4844,35 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, +@@ -4739,6 +4836,35 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, * get_parameterized_joinrel_size * Make a size estimate for a parameterized scan of a join relation. * @@ -410,7 +384,7 @@ index 380336518f..ecf0c45629 100644 * 'rel' is the joinrel under consideration. * 'outer_path', 'inner_path' are (probably also parameterized) Paths that * produce the relations being joined. -@@ -4759,11 +4885,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, +@@ -4751,11 +4877,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, * set_joinrel_size_estimates must have been applied already. */ double @@ -427,7 +401,7 @@ index 380336518f..ecf0c45629 100644 { double nrows; -@@ -5479,7 +5605,7 @@ set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel) +@@ -5424,7 +5550,7 @@ set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel) /* Should only be applied to base relations */ Assert(rel->relid > 0); @@ -436,7 +410,7 @@ index 380336518f..ecf0c45629 100644 cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); -@@ -5761,14 +5887,25 @@ page_size(double tuples, int width) +@@ -5706,14 +5832,25 @@ page_size(double tuples, int width) return ceil(relation_byte_size(tuples, width) / BLCKSZ); } @@ -465,7 +439,7 @@ index 380336518f..ecf0c45629 100644 /* * Early experience with parallel query suggests that when there is only -@@ -5785,7 +5922,7 @@ get_parallel_divisor(Path *path) +@@ -5730,7 +5867,7 @@ get_parallel_divisor(Path *path) { double leader_contribution; @@ -475,7 +449,7 @@ index 380336518f..ecf0c45629 100644 parallel_divisor += leader_contribution; } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c -index 25d4750ca6..d0ea7bd2ff 100644 +index 84f2d186d9..a35d8ec9ee 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -70,6 +70,8 @@ @@ -505,7 +479,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 return plan; } -@@ -1257,7 +1259,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) +@@ -1258,7 +1260,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) plan->first_partial_plan = best_path->first_partial_path; plan->part_prune_info = partpruneinfo; @@ -514,7 +488,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 /* * If prepare_sort_from_pathkeys added sort columns, but we were told to -@@ -1303,7 +1305,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path, +@@ -1304,7 +1306,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path, * prepare_sort_from_pathkeys on it before we do so on the individual * child plans, to make cross-checking the sort info easier. */ @@ -523,7 +497,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 plan->targetlist = tlist; plan->qual = NIL; plan->lefttree = NULL; -@@ -1456,7 +1458,7 @@ create_group_result_plan(PlannerInfo *root, GroupResultPath *best_path) +@@ -1458,7 +1460,7 @@ create_group_result_plan(PlannerInfo *root, GroupResultPath *best_path) plan = make_result(tlist, (Node *) quals, NULL); @@ -532,7 +506,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 return plan; } -@@ -1481,7 +1483,7 @@ create_project_set_plan(PlannerInfo *root, ProjectSetPath *best_path) +@@ -1483,7 +1485,7 @@ create_project_set_plan(PlannerInfo *root, ProjectSetPath *best_path) plan = make_project_set(tlist, subplan); @@ -541,7 +515,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 return plan; } -@@ -1509,7 +1511,7 @@ create_material_plan(PlannerInfo *root, MaterialPath *best_path, int flags) +@@ -1511,7 +1513,7 @@ create_material_plan(PlannerInfo *root, MaterialPath *best_path, int flags) plan = make_material(subplan); @@ -550,7 +524,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 return plan; } -@@ -1709,7 +1711,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) +@@ -1711,7 +1713,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) } /* Copy cost data from Path to Plan */ @@ -577,7 +551,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 /* Assign the rescan Param. */ gm_plan->rescan_param = assign_special_exec_param(root); -@@ -1903,7 +1905,7 @@ create_projection_plan(PlannerInfo *root, ProjectionPath *best_path, int flags) +@@ -1901,7 +1903,7 @@ create_projection_plan(PlannerInfo *root, ProjectionPath *best_path, int flags) /* We need a Result node */ plan = (Plan *) make_result(tlist, NULL, subplan); @@ -586,7 +560,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 } return plan; -@@ -2004,7 +2006,7 @@ create_sort_plan(PlannerInfo *root, SortPath *best_path, int flags) +@@ -2002,7 +2004,7 @@ create_sort_plan(PlannerInfo *root, SortPath *best_path, int flags) IS_OTHER_REL(best_path->subpath->parent) ? best_path->path.parent->relids : NULL); @@ -595,7 +569,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 return plan; } -@@ -2030,7 +2032,7 @@ create_incrementalsort_plan(PlannerInfo *root, IncrementalSortPath *best_path, +@@ -2028,7 +2030,7 @@ create_incrementalsort_plan(PlannerInfo *root, IncrementalSortPath *best_path, best_path->spath.path.parent->relids : NULL, best_path->nPresortedCols); @@ -604,7 +578,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 return plan; } -@@ -2069,7 +2071,7 @@ create_group_plan(PlannerInfo *root, GroupPath *best_path) +@@ -2067,7 +2069,7 @@ create_group_plan(PlannerInfo *root, GroupPath *best_path) subplan->targetlist), subplan); @@ -613,7 +587,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 return plan; } -@@ -2097,7 +2099,7 @@ create_upper_unique_plan(PlannerInfo *root, UpperUniquePath *best_path, int flag +@@ -2095,7 +2097,7 @@ create_upper_unique_plan(PlannerInfo *root, UpperUniquePath *best_path, int flag best_path->path.pathkeys, best_path->numkeys); @@ -622,7 +596,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 return plan; } -@@ -2141,7 +2143,7 @@ create_agg_plan(PlannerInfo *root, AggPath *best_path) +@@ -2139,7 +2141,7 @@ create_agg_plan(PlannerInfo *root, AggPath *best_path) best_path->transitionSpace, subplan); @@ -631,7 +605,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 return plan; } -@@ -2343,7 +2345,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) +@@ -2341,7 +2343,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) subplan); /* Copy cost data from Path to Plan */ @@ -640,7 +614,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 } return (Plan *) plan; -@@ -2401,7 +2403,7 @@ create_minmaxagg_plan(PlannerInfo *root, MinMaxAggPath *best_path) +@@ -2399,7 +2401,7 @@ create_minmaxagg_plan(PlannerInfo *root, MinMaxAggPath *best_path) plan = make_result(tlist, (Node *) best_path->quals, NULL); @@ -649,7 +623,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 /* * During setrefs.c, we'll need to replace references to the Agg nodes -@@ -2520,7 +2522,7 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) +@@ -2518,7 +2520,7 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) wc->inRangeNullsFirst, subplan); @@ -658,7 +632,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 return plan; } -@@ -2556,7 +2558,7 @@ create_setop_plan(PlannerInfo *root, SetOpPath *best_path, int flags) +@@ -2554,7 +2556,7 @@ create_setop_plan(PlannerInfo *root, SetOpPath *best_path, int flags) best_path->firstFlag, numGroups); @@ -667,7 +641,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 return plan; } -@@ -2592,7 +2594,7 @@ create_recursiveunion_plan(PlannerInfo *root, RecursiveUnionPath *best_path) +@@ -2590,7 +2592,7 @@ create_recursiveunion_plan(PlannerInfo *root, RecursiveUnionPath *best_path) best_path->distinctList, numGroups); @@ -676,7 +650,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 return plan; } -@@ -2615,7 +2617,7 @@ create_lockrows_plan(PlannerInfo *root, LockRowsPath *best_path, +@@ -2613,7 +2615,7 @@ create_lockrows_plan(PlannerInfo *root, LockRowsPath *best_path, plan = make_lockrows(subplan, best_path->rowMarks, best_path->epqParam); @@ -685,7 +659,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 return plan; } -@@ -2676,7 +2678,7 @@ create_modifytable_plan(PlannerInfo *root, ModifyTablePath *best_path) +@@ -2674,7 +2676,7 @@ create_modifytable_plan(PlannerInfo *root, ModifyTablePath *best_path) best_path->onconflict, best_path->epqParam); @@ -694,7 +668,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 return plan; } -@@ -2730,7 +2732,7 @@ create_limit_plan(PlannerInfo *root, LimitPath *best_path, int flags) +@@ -2728,7 +2730,7 @@ create_limit_plan(PlannerInfo *root, LimitPath *best_path, int flags) best_path->limitOption, numUniqkeys, uniqColIdx, uniqOperators, uniqCollations); @@ -703,7 +677,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 return plan; } -@@ -2776,7 +2778,7 @@ create_seqscan_plan(PlannerInfo *root, Path *best_path, +@@ -2774,7 +2776,7 @@ create_seqscan_plan(PlannerInfo *root, Path *best_path, scan_clauses, scan_relid); @@ -712,7 +686,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 return scan_plan; } -@@ -2822,7 +2824,7 @@ create_samplescan_plan(PlannerInfo *root, Path *best_path, +@@ -2820,7 +2822,7 @@ create_samplescan_plan(PlannerInfo *root, Path *best_path, scan_relid, tsc); @@ -721,7 +695,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 return scan_plan; } -@@ -3000,7 +3002,7 @@ create_indexscan_plan(PlannerInfo *root, +@@ -2998,7 +3000,7 @@ create_indexscan_plan(PlannerInfo *root, indexorderbyops, best_path->indexscandir); @@ -730,7 +704,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 return scan_plan; } -@@ -3115,7 +3117,7 @@ create_bitmap_scan_plan(PlannerInfo *root, +@@ -3113,7 +3115,7 @@ create_bitmap_scan_plan(PlannerInfo *root, bitmapqualorig, baserelid); @@ -739,7 +713,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 return scan_plan; } -@@ -3435,7 +3437,7 @@ create_tidscan_plan(PlannerInfo *root, TidPath *best_path, +@@ -3433,7 +3435,7 @@ create_tidscan_plan(PlannerInfo *root, TidPath *best_path, scan_relid, tidquals); @@ -748,7 +722,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 return scan_plan; } -@@ -3485,7 +3487,7 @@ create_subqueryscan_plan(PlannerInfo *root, SubqueryScanPath *best_path, +@@ -3483,7 +3485,7 @@ create_subqueryscan_plan(PlannerInfo *root, SubqueryScanPath *best_path, scan_relid, subplan); @@ -757,7 +731,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 return scan_plan; } -@@ -3528,7 +3530,7 @@ create_functionscan_plan(PlannerInfo *root, Path *best_path, +@@ -3526,7 +3528,7 @@ create_functionscan_plan(PlannerInfo *root, Path *best_path, scan_plan = make_functionscan(tlist, scan_clauses, scan_relid, functions, rte->funcordinality); @@ -766,7 +740,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 return scan_plan; } -@@ -3571,7 +3573,7 @@ create_tablefuncscan_plan(PlannerInfo *root, Path *best_path, +@@ -3569,7 +3571,7 @@ create_tablefuncscan_plan(PlannerInfo *root, Path *best_path, scan_plan = make_tablefuncscan(tlist, scan_clauses, scan_relid, tablefunc); @@ -775,7 +749,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 return scan_plan; } -@@ -3615,7 +3617,7 @@ create_valuesscan_plan(PlannerInfo *root, Path *best_path, +@@ -3613,7 +3615,7 @@ create_valuesscan_plan(PlannerInfo *root, Path *best_path, scan_plan = make_valuesscan(tlist, scan_clauses, scan_relid, values_lists); @@ -784,7 +758,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 return scan_plan; } -@@ -3708,7 +3710,7 @@ create_ctescan_plan(PlannerInfo *root, Path *best_path, +@@ -3706,7 +3708,7 @@ create_ctescan_plan(PlannerInfo *root, Path *best_path, scan_plan = make_ctescan(tlist, scan_clauses, scan_relid, plan_id, cte_param_id); @@ -793,7 +767,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 return scan_plan; } -@@ -3747,7 +3749,7 @@ create_namedtuplestorescan_plan(PlannerInfo *root, Path *best_path, +@@ -3745,7 +3747,7 @@ create_namedtuplestorescan_plan(PlannerInfo *root, Path *best_path, scan_plan = make_namedtuplestorescan(tlist, scan_clauses, scan_relid, rte->enrname); @@ -802,7 +776,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 return scan_plan; } -@@ -3785,7 +3787,7 @@ create_resultscan_plan(PlannerInfo *root, Path *best_path, +@@ -3783,7 +3785,7 @@ create_resultscan_plan(PlannerInfo *root, Path *best_path, scan_plan = make_result(tlist, (Node *) scan_clauses, NULL); @@ -811,7 +785,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 return scan_plan; } -@@ -3845,7 +3847,7 @@ create_worktablescan_plan(PlannerInfo *root, Path *best_path, +@@ -3843,7 +3845,7 @@ create_worktablescan_plan(PlannerInfo *root, Path *best_path, scan_plan = make_worktablescan(tlist, scan_clauses, scan_relid, cteroot->wt_param_id); @@ -820,7 +794,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 return scan_plan; } -@@ -3905,7 +3907,7 @@ create_foreignscan_plan(PlannerInfo *root, ForeignPath *best_path, +@@ -3903,7 +3905,7 @@ create_foreignscan_plan(PlannerInfo *root, ForeignPath *best_path, outer_plan); /* Copy cost data from Path to Plan; no need to make FDW do this */ @@ -829,7 +803,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 /* Copy foreign server OID; likewise, no need to make FDW do this */ scan_plan->fs_server = rel->serverid; -@@ -4039,7 +4041,7 @@ create_customscan_plan(PlannerInfo *root, CustomPath *best_path, +@@ -4037,7 +4039,7 @@ create_customscan_plan(PlannerInfo *root, CustomPath *best_path, * Copy cost data from Path to Plan; no need to make custom-plan providers * do this */ @@ -838,7 +812,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 /* Likewise, copy the relids that are represented by this custom scan */ cplan->custom_relids = best_path->path.parent->relids; -@@ -4141,7 +4143,7 @@ create_nestloop_plan(PlannerInfo *root, +@@ -4139,7 +4141,7 @@ create_nestloop_plan(PlannerInfo *root, best_path->jointype, best_path->inner_unique); @@ -847,7 +821,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 return join_plan; } -@@ -4448,7 +4450,7 @@ create_mergejoin_plan(PlannerInfo *root, +@@ -4446,7 +4448,7 @@ create_mergejoin_plan(PlannerInfo *root, best_path->skip_mark_restore); /* Costs of sort and material steps are included in path cost already */ @@ -856,7 +830,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 return join_plan; } -@@ -4621,7 +4623,7 @@ create_hashjoin_plan(PlannerInfo *root, +@@ -4619,7 +4621,7 @@ create_hashjoin_plan(PlannerInfo *root, best_path->jpath.jointype, best_path->jpath.inner_unique); @@ -865,7 +839,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 return join_plan; } -@@ -5121,7 +5123,7 @@ order_qual_clauses(PlannerInfo *root, List *clauses) +@@ -5119,7 +5121,7 @@ order_qual_clauses(PlannerInfo *root, List *clauses) * Also copy the parallel-related flags, which the executor *will* use. */ static void @@ -874,7 +848,7 @@ index 25d4750ca6..d0ea7bd2ff 100644 { dest->startup_cost = src->startup_cost; dest->total_cost = src->total_cost; -@@ -5129,6 +5131,9 @@ copy_generic_path_info(Plan *dest, Path *src) +@@ -5127,6 +5129,9 @@ copy_generic_path_info(Plan *dest, Path *src) dest->plan_width = src->pathtarget->width; dest->parallel_aware = src->parallel_aware; dest->parallel_safe = src->parallel_safe; @@ -885,10 +859,10 @@ index 25d4750ca6..d0ea7bd2ff 100644 /* diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c -index 731ff708b9..e862e2a974 100644 +index a203e6f1ff..a335ede976 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c -@@ -1260,6 +1260,7 @@ find_childrel_parents(PlannerInfo *root, RelOptInfo *rel) +@@ -1264,6 +1264,7 @@ find_childrel_parents(PlannerInfo *root, RelOptInfo *rel) } @@ -896,7 +870,7 @@ index 731ff708b9..e862e2a974 100644 /* * get_baserel_parampathinfo * Get the ParamPathInfo for a parameterized path for a base relation, -@@ -1328,6 +1329,10 @@ get_baserel_parampathinfo(PlannerInfo *root, RelOptInfo *baserel, +@@ -1332,6 +1333,10 @@ get_baserel_parampathinfo(PlannerInfo *root, RelOptInfo *baserel, ppi->ppi_req_outer = required_outer; ppi->ppi_rows = rows; ppi->ppi_clauses = pclauses; @@ -907,7 +881,7 @@ index 731ff708b9..e862e2a974 100644 baserel->ppilist = lappend(baserel->ppilist, ppi); return ppi; -@@ -1553,6 +1558,10 @@ get_joinrel_parampathinfo(PlannerInfo *root, RelOptInfo *joinrel, +@@ -1557,6 +1562,10 @@ get_joinrel_parampathinfo(PlannerInfo *root, RelOptInfo *joinrel, ppi->ppi_req_outer = required_outer; ppi->ppi_rows = rows; ppi->ppi_clauses = NIL; @@ -919,10 +893,10 @@ index 731ff708b9..e862e2a974 100644 return ppi; diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h -index e94d9e49cf..4404155fbd 100644 +index ba661d32a6..74e4f7592c 100644 --- a/src/include/commands/explain.h +++ b/src/include/commands/explain.h -@@ -75,6 +75,12 @@ extern PGDLLIMPORT ExplainOneQuery_hook_type ExplainOneQuery_hook; +@@ -75,6 +75,19 @@ extern PGDLLIMPORT ExplainOneQuery_hook_type ExplainOneQuery_hook; typedef const char *(*explain_get_index_name_hook_type) (Oid indexId); extern PGDLLIMPORT explain_get_index_name_hook_type explain_get_index_name_hook; @@ -932,14 +906,21 @@ index e94d9e49cf..4404155fbd 100644 + ParamListInfo params, const instr_time *planduration, + QueryEnvironment *queryEnv); +extern PGDLLIMPORT ExplainOnePlan_hook_type ExplainOnePlan_hook; ++ ++/* Explain a node info */ ++typedef void (*ExplainOneNode_hook_type) (ExplainState *es, ++ PlanState *ps, ++ Plan *plan, ++ double rows); ++extern PGDLLIMPORT ExplainOneNode_hook_type ExplainOneNode_hook; extern void ExplainQuery(ParseState *pstate, ExplainStmt *stmt, ParamListInfo params, DestReceiver *dest); diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h -index cde2637798..74ffaa9c8a 100644 +index 10f0a149e9..fecf543f44 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h -@@ -739,6 +739,10 @@ typedef struct RelOptInfo +@@ -738,6 +738,10 @@ typedef struct RelOptInfo Relids top_parent_relids; /* Relids of topmost parents (if "other" * rel) */ @@ -950,7 +931,7 @@ index cde2637798..74ffaa9c8a 100644 /* used for partitioned relations: */ PartitionScheme part_scheme; /* Partitioning scheme */ int nparts; /* Number of partitions; -1 if not yet set; in -@@ -1107,6 +1111,10 @@ typedef struct ParamPathInfo +@@ -1104,6 +1108,10 @@ typedef struct ParamPathInfo Relids ppi_req_outer; /* rels supplying parameters used by path */ double ppi_rows; /* estimated number of result tuples */ List *ppi_clauses; /* join clauses available from outer rels */ @@ -962,10 +943,10 @@ index cde2637798..74ffaa9c8a 100644 diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h -index 43160439f0..86988ca32d 100644 +index 83e01074ed..5f1de775ca 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h -@@ -140,6 +140,19 @@ typedef struct Plan +@@ -146,6 +146,19 @@ typedef struct Plan List *initPlan; /* Init Plan nodes (un-correlated expr * subselects) */ @@ -986,7 +967,7 @@ index 43160439f0..86988ca32d 100644 * Information for management of parameter-change-driven rescanning * diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h -index ed2e4af4be..7e3cbcca14 100644 +index 6141654e47..3288548af6 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -39,6 +39,37 @@ typedef enum @@ -1071,7 +1052,7 @@ index ed2e4af4be..7e3cbcca14 100644 #endif /* COST_H */ diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h -index 23dec14cbd..58489cb620 100644 +index 3bd7072ae8..21bbaba11c 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -18,6 +18,10 @@ @@ -1086,7 +1067,7 @@ index 23dec14cbd..58489cb620 100644 * prototypes for pathnode.c */ diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h -index 777655210b..dac8231291 100644 +index f3cefe67b8..6d77f6e871 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -24,6 +24,12 @@ extern double cursor_tuple_fraction; diff --git a/conf.add b/conf.add index 21843d00..3556e4d6 100644 --- a/conf.add +++ b/conf.add @@ -1 +1,3 @@ -shared_preload_libraries = 'aqo' +autovacuum = off +shared_preload_libraries = 'postgres_fdw, aqo' +max_parallel_workers = 0 # switch off parallel workers because of unsteadiness diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out new file mode 100644 index 00000000..25acc0c8 --- /dev/null +++ b/expected/aqo_fdw.out @@ -0,0 +1,74 @@ +-- Tests on cardinality estimation of FDW-queries: +-- simple ForeignScan. +-- JOIN push-down (check push of baserestrictinfo and joininfo) +-- Aggregate push-down +-- Push-down of groupings with HAVING clause. +CREATE EXTENSION aqo; +CREATE EXTENSION postgres_fdw; +SET aqo.mode = 'learn'; +SET aqo.details = 'true'; -- show AQO info for each node and entire query. +SET aqo.show_hash = 'false'; -- a hash value is system-depended. Ignore it. +DO $d$ + BEGIN + EXECUTE $$CREATE SERVER loopback FOREIGN DATA WRAPPER postgres_fdw + OPTIONS (dbname '$$||current_database()||$$', + port '$$||current_setting('port')||$$' + )$$; + END; +$d$; +CREATE USER MAPPING FOR PUBLIC SERVER loopback; +CREATE TABLE local (x int); +CREATE FOREIGN TABLE frgn(x int) SERVER loopback OPTIONS (table_name 'local'); +INSERT INTO frgn (x) VALUES (1); +ANALYZE local; +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT x FROM frgn; + QUERY PLAN +------------------------------------------------------------- + Foreign Scan on frgn (actual rows=1 loops=1) (AQO not used) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(4 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT x FROM frgn; + QUERY PLAN +----------------------------------------------------------------------------- + Foreign Scan on frgn (actual rows=1 loops=1) (AQO: cardinality=1, error=0%) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(4 rows) + +-- Push down base filters. +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) SELECT x FROM frgn WHERE x < 10; + QUERY PLAN +-------------------------------------------------------------------- + Foreign Scan on public.frgn (actual rows=1 loops=1) (AQO not used) + Output: x + Remote SQL: SELECT x FROM public.local WHERE ((x < 10)) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(6 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) SELECT x FROM frgn WHERE x < 10; + QUERY PLAN +------------------------------------------------------------------------------------ + Foreign Scan on public.frgn (actual rows=1 loops=1) (AQO: cardinality=1, error=0%) + Output: x + Remote SQL: SELECT x FROM public.local WHERE ((x < 10)) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(6 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT x FROM frgn WHERE x < -10; -- AQO ignores constants + QUERY PLAN +------------------------------------------------------------------------------- + Foreign Scan on frgn (actual rows=0 loops=1) (AQO: cardinality=1, error=100%) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(4 rows) + +DROP EXTENSION aqo; diff --git a/postprocessing.c b/postprocessing.c index b74701e7..1c299326 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -749,59 +749,99 @@ RemoveFromQueryContext(QueryDesc *queryDesc) pfree(enr); } +void +print_node_explain(ExplainState *es, PlanState *ps, Plan *plan, double rows) +{ + int wrkrs = 1; + double error = -1.; + + if (!aqo_details || !plan || !ps->instrument) + return; + + if (ps->worker_instrument && IsParallelTuplesProcessing(plan)) + { + int i; + + for (i = 0; i < ps->worker_instrument->num_workers; i++) + { + Instrumentation *instrument = &ps->worker_instrument->instrument[i]; + + if (instrument->nloops <= 0) + continue; + + wrkrs++; + } + } + + if (plan->predicted_cardinality > 0.) + { + error = 100. * (plan->predicted_cardinality - (rows*wrkrs)) + / plan->predicted_cardinality; + appendStringInfo(es->str, + " (AQO: cardinality=%.0lf, error=%.0lf%%", + plan->predicted_cardinality, error); + } + else + appendStringInfo(es->str, " (AQO not used"); + + if (aqo_show_hash) + appendStringInfo(es->str, ", fss hash = %d", plan->fss_hash); + appendStringInfoChar(es->str, ')'); +} + /* * Prints if the plan was constructed with AQO. */ -void print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, - ExplainState *es, const char *queryString, - ParamListInfo params, const instr_time *planduration, - QueryEnvironment *queryEnv) +void +print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, + ExplainState *es, const char *queryString, + ParamListInfo params, const instr_time *planduration, + QueryEnvironment *queryEnv) { if (prev_ExplainOnePlan_hook) prev_ExplainOnePlan_hook(plannedstmt, into, es, queryString, - params, planduration, queryEnv); + params, planduration, queryEnv); + + if (!aqo_details) + return; -#ifdef AQO_EXPLAIN /* Report to user about aqo state only in verbose mode */ - if (es->verbose) - { - ExplainPropertyBool("Using aqo", query_context.use_aqo, es); + ExplainPropertyBool("Using aqo", query_context.use_aqo, es); - switch (aqo_mode) - { - case AQO_MODE_INTELLIGENT: - ExplainPropertyText("AQO mode", "INTELLIGENT", es); - break; - case AQO_MODE_FORCED: - ExplainPropertyText("AQO mode", "FORCED", es); - break; - case AQO_MODE_CONTROLLED: - ExplainPropertyText("AQO mode", "CONTROLLED", es); - break; - case AQO_MODE_LEARN: - ExplainPropertyText("AQO mode", "LEARN", es); - break; - case AQO_MODE_FROZEN: - ExplainPropertyText("AQO mode", "FROZEN", es); - break; - case AQO_MODE_DISABLED: - ExplainPropertyText("AQO mode", "DISABLED", es); - break; - default: - elog(ERROR, "Bad AQO state"); - break; - } + switch (aqo_mode) + { + case AQO_MODE_INTELLIGENT: + ExplainPropertyText("AQO mode", "INTELLIGENT", es); + break; + case AQO_MODE_FORCED: + ExplainPropertyText("AQO mode", "FORCED", es); + break; + case AQO_MODE_CONTROLLED: + ExplainPropertyText("AQO mode", "CONTROLLED", es); + break; + case AQO_MODE_LEARN: + ExplainPropertyText("AQO mode", "LEARN", es); + break; + case AQO_MODE_FROZEN: + ExplainPropertyText("AQO mode", "FROZEN", es); + break; + case AQO_MODE_DISABLED: + ExplainPropertyText("AQO mode", "DISABLED", es); + break; + default: + elog(ERROR, "Bad AQO state"); + break; + } - /* - * Query hash provides an user the conveniently use of the AQO - * auxiliary functions. - */ - if (aqo_mode != AQO_MODE_DISABLED || force_collect_stat) - { + /* + * Query hash provides an user the conveniently use of the AQO + * auxiliary functions. + */ + if (aqo_mode != AQO_MODE_DISABLED || force_collect_stat) + { + if (aqo_show_hash) ExplainPropertyInteger("Query hash", NULL, - query_context.query_hash, es); - ExplainPropertyInteger("JOINS", NULL, njoins, es); - } + query_context.query_hash, es); + ExplainPropertyInteger("JOINS", NULL, njoins, es); } -#endif } From 159922175dcd79059b293e0a1308eed69dc1b661 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Tue, 19 Jan 2021 08:31:04 +0500 Subject: [PATCH 007/203] Add missed aqo_fdw.sql --- sql/aqo_fdw.sql | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 sql/aqo_fdw.sql diff --git a/sql/aqo_fdw.sql b/sql/aqo_fdw.sql new file mode 100644 index 00000000..76e528d0 --- /dev/null +++ b/sql/aqo_fdw.sql @@ -0,0 +1,38 @@ +-- Tests on cardinality estimation of FDW-queries: +-- simple ForeignScan. +-- JOIN push-down (check push of baserestrictinfo and joininfo) +-- Aggregate push-down +-- Push-down of groupings with HAVING clause. + +CREATE EXTENSION aqo; +CREATE EXTENSION postgres_fdw; +SET aqo.mode = 'learn'; +SET aqo.details = 'true'; -- show AQO info for each node and entire query. +SET aqo.show_hash = 'false'; -- a hash value is system-depended. Ignore it. + +DO $d$ + BEGIN + EXECUTE $$CREATE SERVER loopback FOREIGN DATA WRAPPER postgres_fdw + OPTIONS (dbname '$$||current_database()||$$', + port '$$||current_setting('port')||$$' + )$$; + END; +$d$; + +CREATE USER MAPPING FOR PUBLIC SERVER loopback; + +CREATE TABLE local (x int); +CREATE FOREIGN TABLE frgn(x int) SERVER loopback OPTIONS (table_name 'local'); +INSERT INTO frgn (x) VALUES (1); +ANALYZE local; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT x FROM frgn; +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT x FROM frgn; + +-- Push down base filters. +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) SELECT x FROM frgn WHERE x < 10; +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) SELECT x FROM frgn WHERE x < 10; +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT x FROM frgn WHERE x < -10; -- AQO ignores constants + +DROP EXTENSION aqo; + From 4f24796c3011d4ecd8f94443107be83488e208f7 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Mon, 18 Jan 2021 20:00:55 +0500 Subject: [PATCH 008/203] Prepare the insert_index routine for PGv14 signature --- storage.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/storage.c b/storage.c index af468f30..735ceda2 100644 --- a/storage.c +++ b/storage.c @@ -968,10 +968,14 @@ my_index_insert(Relation indexRelation, #if PG_VERSION_NUM < 100000 return index_insert(indexRelation, values, isnull, heap_t_ctid, heapRelation, checkUnique); -#else +#elif PG_VERSION_NUM < 140000 return index_insert(indexRelation, values, isnull, heap_t_ctid, heapRelation, checkUnique, BuildIndexInfo(indexRelation)); +#else + return index_insert(indexRelation, values, isnull, heap_t_ctid, + heapRelation, checkUnique, false, + BuildIndexInfo(indexRelation)); #endif } From 38af0df18afdbdef93d8d75eaa0514fbb6170345 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Tue, 19 Jan 2021 09:15:39 +0500 Subject: [PATCH 009/203] Change the license sentence in accordance with 2021. --- aqo.c | 2 +- aqo.h | 2 +- auto_tuning.c | 2 +- cardinality_estimation.c | 2 +- cardinality_hooks.c | 2 +- hash.c | 2 +- machine_learning.c | 2 +- path_utils.c | 2 +- postprocessing.c | 2 +- preprocessing.c | 2 +- selectivity_cache.c | 2 +- storage.c | 2 +- utils.c | 2 +- 13 files changed, 13 insertions(+), 13 deletions(-) diff --git a/aqo.c b/aqo.c index 84c5d39f..98c15a43 100644 --- a/aqo.c +++ b/aqo.c @@ -2,7 +2,7 @@ * aqo.c * Adaptive query optimization extension * - * Copyright (c) 2016-2020, Postgres Professional + * Copyright (c) 2016-2021, Postgres Professional * * IDENTIFICATION * aqo/aqo.c diff --git a/aqo.h b/aqo.h index 0381ba3c..18c12f56 100644 --- a/aqo.h +++ b/aqo.h @@ -105,7 +105,7 @@ * Module storage.c is responsible for storage query settings and models * (i. e. all information which is used in extension). * - * Copyright (c) 2016-2020, Postgres Professional + * Copyright (c) 2016-2021, Postgres Professional * * IDENTIFICATION * aqo/aqo.h diff --git a/auto_tuning.c b/auto_tuning.c index a19f42d0..dae92e48 100644 --- a/auto_tuning.c +++ b/auto_tuning.c @@ -8,7 +8,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2020, Postgres Professional + * Copyright (c) 2016-2021, Postgres Professional * * IDENTIFICATION * aqo/auto_tuning.c diff --git a/cardinality_estimation.c b/cardinality_estimation.c index 89ddf1ee..6483a3ec 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -8,7 +8,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2020, Postgres Professional + * Copyright (c) 2016-2021, Postgres Professional * * IDENTIFICATION * aqo/cardinality_estimation.c diff --git a/cardinality_hooks.c b/cardinality_hooks.c index 76f54d68..8eb9745b 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -18,7 +18,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2020, Postgres Professional + * Copyright (c) 2016-2021, Postgres Professional * * IDENTIFICATION * aqo/cardinality_hooks.c diff --git a/hash.c b/hash.c index b039be9e..f853c359 100644 --- a/hash.c +++ b/hash.c @@ -12,7 +12,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2020, Postgres Professional + * Copyright (c) 2016-2021, Postgres Professional * * IDENTIFICATION * aqo/hash.c diff --git a/machine_learning.c b/machine_learning.c index 7b4612cd..9ebbae6a 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -12,7 +12,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2020, Postgres Professional + * Copyright (c) 2016-2021, Postgres Professional * * IDENTIFICATION * aqo/machine_learning.c diff --git a/path_utils.c b/path_utils.c index 59edc35c..f91d8be8 100644 --- a/path_utils.c +++ b/path_utils.c @@ -5,7 +5,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2020, Postgres Professional + * Copyright (c) 2016-2021, Postgres Professional * * IDENTIFICATION * aqo/path_utils.c diff --git a/postprocessing.c b/postprocessing.c index 1c299326..c8b42f7a 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -9,7 +9,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2020, Postgres Professional + * Copyright (c) 2016-2021, Postgres Professional * * IDENTIFICATION * aqo/postprocessing.c diff --git a/preprocessing.c b/preprocessing.c index bb81d31e..ecfaf1a7 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -49,7 +49,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2020, Postgres Professional + * Copyright (c) 2016-2021, Postgres Professional * * IDENTIFICATION * aqo/preprocessing.c diff --git a/selectivity_cache.c b/selectivity_cache.c index 455d13b1..12ecd699 100644 --- a/selectivity_cache.c +++ b/selectivity_cache.c @@ -9,7 +9,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2020, Postgres Professional + * Copyright (c) 2016-2021, Postgres Professional * * IDENTIFICATION * aqo/selectivity_cache.c diff --git a/storage.c b/storage.c index 735ceda2..acd421e5 100644 --- a/storage.c +++ b/storage.c @@ -8,7 +8,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2020, Postgres Professional + * Copyright (c) 2016-2021, Postgres Professional * * IDENTIFICATION * aqo/storage.c diff --git a/utils.c b/utils.c index 1ae45abe..62e6d122 100644 --- a/utils.c +++ b/utils.c @@ -5,7 +5,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2020, Postgres Professional + * Copyright (c) 2016-2021, Postgres Professional * * IDENTIFICATION * aqo/utils.c From ce50e4bc5053871c467078ecffc03c1f4f9fab88 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Tue, 19 Jan 2021 22:04:29 +0500 Subject: [PATCH 010/203] Fix the state: AQO supports push-down of trivial JOIN and JOIN with mergejoinable clauses. --- expected/aqo_fdw.out | 54 +++++++++++++++++++++++++++++++++++++++----- sql/aqo_fdw.sql | 32 ++++++++++++++++++++------ 2 files changed, 73 insertions(+), 13 deletions(-) diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out index 25acc0c8..7479faad 100644 --- a/expected/aqo_fdw.out +++ b/expected/aqo_fdw.out @@ -21,7 +21,9 @@ CREATE TABLE local (x int); CREATE FOREIGN TABLE frgn(x int) SERVER loopback OPTIONS (table_name 'local'); INSERT INTO frgn (x) VALUES (1); ANALYZE local; -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT x FROM frgn; +-- Trivial foreign scan.s +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT x FROM frgn; QUERY PLAN ------------------------------------------------------------- Foreign Scan on frgn (actual rows=1 loops=1) (AQO not used) @@ -30,7 +32,8 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT x FROM frgn; JOINS: 0 (4 rows) -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT x FROM frgn; +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT x FROM frgn; QUERY PLAN ----------------------------------------------------------------------------- Foreign Scan on frgn (actual rows=1 loops=1) (AQO: cardinality=1, error=0%) @@ -39,8 +42,9 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT x FROM frgn; JOINS: 0 (4 rows) --- Push down base filters. -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) SELECT x FROM frgn WHERE x < 10; +-- Push down base filters. Use verbose mode to see filters. +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) +SELECT x FROM frgn WHERE x < 10; QUERY PLAN -------------------------------------------------------------------- Foreign Scan on public.frgn (actual rows=1 loops=1) (AQO not used) @@ -51,7 +55,8 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) SELECT x FROM frg JOINS: 0 (6 rows) -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) SELECT x FROM frgn WHERE x < 10; +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) +SELECT x FROM frgn WHERE x < 10; QUERY PLAN ------------------------------------------------------------------------------------ Foreign Scan on public.frgn (actual rows=1 loops=1) (AQO: cardinality=1, error=0%) @@ -62,7 +67,8 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) SELECT x FROM frg JOINS: 0 (6 rows) -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT x FROM frgn WHERE x < -10; -- AQO ignores constants +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT x FROM frgn WHERE x < -10; -- AQO ignores constants QUERY PLAN ------------------------------------------------------------------------------- Foreign Scan on frgn (actual rows=0 loops=1) (AQO: cardinality=1, error=100%) @@ -71,4 +77,40 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT x FROM frgn WHERE x JOINS: 0 (4 rows) +-- Trivial JOIN push-down. +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; + QUERY PLAN +--------------------------------------------------------------------------- + Merge Join (actual rows=1 loops=1) (AQO not used) + Merge Cond: (a.x = b.x) + -> Sort (actual rows=1 loops=1) (AQO not used) + Sort Key: a.x + Sort Method: quicksort Memory: 25kB + -> Foreign Scan on frgn a (actual rows=1 loops=1) (AQO not used) + -> Sort (actual rows=1 loops=1) (AQO not used) + Sort Key: b.x + Sort Method: quicksort Memory: 25kB + -> Foreign Scan on frgn b (actual rows=1 loops=1) (AQO not used) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(13 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; + QUERY PLAN +--------------------------------------------------------------------- + Foreign Scan (actual rows=1 loops=1) (AQO: cardinality=1, error=0%) + Relations: (frgn a) INNER JOIN (frgn b) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(5 rows) + +-- Non-mergejoinable join condition +--EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) +--SELECT * FROM frgn AS a, frgn AS b WHERE a.x Date: Wed, 20 Jan 2021 22:37:43 +0500 Subject: [PATCH 011/203] Rename aqo.details to aqo.show_details. --- aqo.c | 2 +- expected/aqo_fdw.out | 2 +- sql/aqo_fdw.sql | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/aqo.c b/aqo.c index 98c15a43..c5eba992 100644 --- a/aqo.c +++ b/aqo.c @@ -135,7 +135,7 @@ _PG_init(void) ); DefineCustomBoolVariable( - "aqo.details", + "aqo.show_details", "Show AQO state on a query.", NULL, &aqo_details, diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out index 7479faad..ed0969e5 100644 --- a/expected/aqo_fdw.out +++ b/expected/aqo_fdw.out @@ -6,7 +6,7 @@ CREATE EXTENSION aqo; CREATE EXTENSION postgres_fdw; SET aqo.mode = 'learn'; -SET aqo.details = 'true'; -- show AQO info for each node and entire query. +SET aqo.show_details = 'true'; -- show AQO info for each node and entire query. SET aqo.show_hash = 'false'; -- a hash value is system-depended. Ignore it. DO $d$ BEGIN diff --git a/sql/aqo_fdw.sql b/sql/aqo_fdw.sql index 06984d4f..f6cdb14a 100644 --- a/sql/aqo_fdw.sql +++ b/sql/aqo_fdw.sql @@ -7,7 +7,7 @@ CREATE EXTENSION aqo; CREATE EXTENSION postgres_fdw; SET aqo.mode = 'learn'; -SET aqo.details = 'true'; -- show AQO info for each node and entire query. +SET aqo.show_details = 'true'; -- show AQO info for each node and entire query. SET aqo.show_hash = 'false'; -- a hash value is system-depended. Ignore it. DO $d$ From b208eebcf4b0005f415db35d8cae52183ad06e4a Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Wed, 20 Jan 2021 22:35:07 +0500 Subject: [PATCH 012/203] Update the readme file in accordance with the new AQO version. --- README.md | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 1b5284dd..123e5f55 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,11 @@ complicated queries. ## Installation The module works with PostgreSQL 9.6 and above. +To avoid compatibility issues, the following branches in the git-repository are allocated: +* `stable9_6`. +* `stable11` - for PG v10 and v11. +* `stable12` - for PG v12. +* the `master` branch of the AQO repository correctly works with PGv13 and the PostgreSQL `master` branch. The module contains a patch and an extension. Patch has to be applied to the sources of PostgresSQL. Patch affects header files, that is why PostgreSQL @@ -28,7 +33,7 @@ make check # check whether it works ``` Tag `version` at the patch name corresponds to suitable PostgreSQL release. -For PostgreSQL 10 use aqo_pg10.patch; for PostgreSQL 11 use aqo_pg11.patch and so on. +For PostgreSQL 9.6 use the 'aqo_pg9_6.patch' file; PostgreSQL 10 use aqo_pg10.patch; for PostgreSQL 11 use aqo_pg11.patch and so on. Also, you can see git tags at the master branch for more accurate definition of suitable PostgreSQL version. @@ -50,7 +55,7 @@ of per-database. The typical case is follows: you have complicated query, which executes too long. `EXPLAIN ANALYZE` shows, that the possible reason is bad cardinality -estimnation. +estimation. Example: ``` @@ -127,16 +132,16 @@ When the plan stops changing, you can often observe performance improvement: (23 rows) ``` -The settings system in AQO works with normalized queries, i. e. queries with -removed constants. For example, the normalized version of +The settings system in AQO works with normalised queries, i. e. queries with +removed constants. For example, the normalised version of `SELECT * FROM tbl WHERE a < 25 AND b = 'str';` is `SELECT * FROM tbl WHERE a < CONST and b = CONST;` -So the queries have equal normalization if and only if they differ only +So the queries have equal normalisation if and only if they differ only in their constants. -Each normalized query has its own hash. The correspondence between normalized +Each normalised query has its own hash. The correspondence between normalised query hash and query text is stored in aqo_query_texts table: ``` SELECT * FROM aqo_query_texts; @@ -174,6 +179,10 @@ if the data tends to change significantly), you can do `UPDATE SET aqo_learn=false WHERE query_hash = ;` before commit. +The extension includes two GUC's to display the executed cardinality predictions for a query. +The `aqo.show_details = 'on'` (default - off) allows to see the aqo cardinality prediction results for each node of a query plan and an AQO summary. +The `aqo.show_hash = 'on'` (default - off) will print hash signature for each plan node and overall query. It is system-specific information and should be used for situational analysis. + The more detailed reference of AQO settings mechanism is available further. ## Advanced tuning From 53fde444264516b54ce9ceded56d7de31a7f3257 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Wed, 20 Jan 2021 22:53:14 +0500 Subject: [PATCH 013/203] Bugfix of the aqo_fdw regress test --- expected/aqo_fdw.out | 55 ++++++++++++++++++++++++++++---------------- sql/aqo_fdw.sql | 2 +- 2 files changed, 36 insertions(+), 21 deletions(-) diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out index ed0969e5..040c004a 100644 --- a/expected/aqo_fdw.out +++ b/expected/aqo_fdw.out @@ -43,6 +43,11 @@ SELECT x FROM frgn; (4 rows) -- Push down base filters. Use verbose mode to see filters. +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE)) +SELECT x FROM frgn WHERE x < 10; +ERROR: syntax error at or near ")" +LINE 1: ...LAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE)) + ^ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) SELECT x FROM frgn WHERE x < 10; QUERY PLAN @@ -55,18 +60,6 @@ SELECT x FROM frgn WHERE x < 10; JOINS: 0 (6 rows) -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) -SELECT x FROM frgn WHERE x < 10; - QUERY PLAN ------------------------------------------------------------------------------------- - Foreign Scan on public.frgn (actual rows=1 loops=1) (AQO: cardinality=1, error=0%) - Output: x - Remote SQL: SELECT x FROM public.local WHERE ((x < 10)) - Using aqo: true - AQO mode: LEARN - JOINS: 0 -(6 rows) - EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT x FROM frgn WHERE x < -10; -- AQO ignores constants QUERY PLAN @@ -97,20 +90,42 @@ SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; JOINS: 0 (13 rows) -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; - QUERY PLAN ---------------------------------------------------------------------- + QUERY PLAN +-------------------------------------------------------------------------------------------------------- Foreign Scan (actual rows=1 loops=1) (AQO: cardinality=1, error=0%) + Output: a.x, b.x + Relations: (public.frgn a) INNER JOIN (public.frgn b) + Remote SQL: SELECT r1.x, r2.x FROM (public.local r1 INNER JOIN public.local r2 ON (((r1.x = r2.x)))) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(7 rows) + +-- TODO: Non-mergejoinable join condition. +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM frgn AS a, frgn AS b WHERE a.x Date: Sat, 6 Feb 2021 22:10:45 +0500 Subject: [PATCH 014/203] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 123e5f55..45ea1072 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ installed with `make install`. ``` cd postgresql-9.6 # enter postgresql source directory -git clone https://github.com/tigvarts/aqo.git contrib/aqo # clone aqo into contrib +git clone https://github.com/postgrespro/aqo.git contrib/aqo # clone aqo into contrib patch -p1 --no-backup-if-mismatch < contrib/aqo/aqo_pg.patch # patch postgresql make clean && make && make install # recompile postgresql cd contrib/aqo # enter aqo directory From 3b4e40a23717d8d6974e9f5e557d4faddd621d16 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 18 Feb 2021 08:22:19 +0500 Subject: [PATCH 015/203] Rename the aqo_details to aqo_show_details --- aqo.c | 4 ++-- aqo.h | 2 +- postprocessing.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/aqo.c b/aqo.c index c5eba992..10bc23ef 100644 --- a/aqo.c +++ b/aqo.c @@ -19,7 +19,7 @@ void _PG_init(void); int aqo_mode; bool force_collect_stat; bool aqo_show_hash; -bool aqo_details; +bool aqo_show_details; /* GUC variables */ static const struct config_enum_entry format_options[] = { @@ -138,7 +138,7 @@ _PG_init(void) "aqo.show_details", "Show AQO state on a query.", NULL, - &aqo_details, + &aqo_show_details, false, PGC_USERSET, 0, diff --git a/aqo.h b/aqo.h index 18c12f56..0a5c209b 100644 --- a/aqo.h +++ b/aqo.h @@ -175,7 +175,7 @@ typedef enum extern int aqo_mode; extern bool force_collect_stat; extern bool aqo_show_hash; -extern bool aqo_details; +extern bool aqo_show_details; /* * It is mostly needed for auto tuning of query. with auto tuning mode aqo diff --git a/postprocessing.c b/postprocessing.c index c8b42f7a..68e1d5a4 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -755,7 +755,7 @@ print_node_explain(ExplainState *es, PlanState *ps, Plan *plan, double rows) int wrkrs = 1; double error = -1.; - if (!aqo_details || !plan || !ps->instrument) + if (!aqo_show_details || !plan || !ps->instrument) return; if (ps->worker_instrument && IsParallelTuplesProcessing(plan)) @@ -802,7 +802,7 @@ print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, prev_ExplainOnePlan_hook(plannedstmt, into, es, queryString, params, planduration, queryEnv); - if (!aqo_details) + if (!aqo_show_details) return; /* Report to user about aqo state only in verbose mode */ From d4d5da3eb1d46d3ecfb2ae4daccf653d764dac9f Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 18 Feb 2021 08:38:44 +0500 Subject: [PATCH 016/203] Store selectivity cache data in the cache memory context instead of short-living planner memory context. Some indentations fixed. --- cardinality_hooks.c | 6 ++++++ postprocessing.c | 18 +++++++++--------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/cardinality_hooks.c b/cardinality_hooks.c index 8eb9745b..977e894c 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -208,12 +208,16 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, if (query_context.use_aqo || query_context.learn_aqo) { + MemoryContext mcxt; + allclauses = list_concat(list_copy(param_clauses), list_copy(rel->baserestrictinfo)); selectivities = get_selectivities(root, allclauses, rel->relid, JOIN_INNER, NULL); relid = planner_rt_fetch(rel->relid, root)->relid; get_eclasses(allclauses, &nargs, &args_hash, &eclass_hash); + + mcxt = MemoryContextSwitchTo(CacheMemoryContext); forboth(l, allclauses, l2, selectivities) { current_hash = get_clause_hash( @@ -222,6 +226,8 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, cache_selectivity(current_hash, rel->relid, relid, *((double *) lfirst(l2))); } + + MemoryContextSwitchTo(mcxt); pfree(args_hash); pfree(eclass_hash); } diff --git a/postprocessing.c b/postprocessing.c index 68e1d5a4..475c791f 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -94,9 +94,9 @@ learn_sample(List *clauselist, List *selectivities, List *relidslist, { int fss_hash; int nfeatures; - double *matrix[aqo_K]; - double targets[aqo_K]; - double *features; + double *matrix[aqo_K]; + double targets[aqo_K]; + double *features; double target; int i; @@ -138,14 +138,14 @@ restore_selectivities(List *clauselist, JoinType join_type, bool was_parametrized) { - List *lst = NIL; - ListCell *l; + List *lst = NIL; + ListCell *l; int i = 0; bool parametrized_sel; int nargs; - int *args_hash; - int *eclass_hash; - double *cur_sel; + int *args_hash; + int *eclass_hash; + double *cur_sel; int cur_hash; int cur_relid; @@ -290,7 +290,7 @@ learnOnPlanState(PlanState *p, void *context) } else /* This node does not required to sum tuples of each worker - * to calculate produced rows. */ + * to calculate produced rows. */ learn_rows = p->instrument->ntuples / p->instrument->nloops; if (p->plan->predicted_cardinality > 0.) From 3f7e7044121ffcac2249fba241bac3ef02b86f38 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 18 Feb 2021 09:33:57 +0500 Subject: [PATCH 017/203] Change insert/update scheme for the ML-knowledge base. Big commit. To avoiding UPDATE/UPDATE/DELETE/INSERT conflicts we use user locks on a pair (query_hash, fspace_hash) to guarantee that only one backend can do INSERT/UPDATE/DELETE of a aqo table row. Also we use dirty snapshot to see concurrently inserted/updated tuples. Such conflict may cause deadlocks on end-of-transaction waiting. Currently, if we detected such conflict, we refuse from we ML-base changing. --- Makefile | 7 +- aqo.c | 94 ++++- aqo.h | 71 ++-- auto_tuning.c | 9 +- cardinality_estimation.c | 3 +- cardinality_hooks.c | 3 +- expected/aqo_fdw.out | 8 +- expected/gucs.out | 93 +++++ expected/schema.out | 11 +- ignorance.c | 182 ++++++++++ ignorance.h | 12 + postprocessing.c | 172 +++++---- preprocessing.c | 44 ++- sql/aqo_fdw.sql | 4 +- sql/gucs.sql | 40 ++ storage.c | 761 +++++++++++++++------------------------ t/001_pgbench.pl | 48 +++ 17 files changed, 970 insertions(+), 592 deletions(-) create mode 100644 expected/gucs.out create mode 100644 ignorance.c create mode 100644 ignorance.h create mode 100644 sql/gucs.sql create mode 100644 t/001_pgbench.pl diff --git a/Makefile b/Makefile index 54aa96a5..50aaea9a 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,9 @@ PGFILEDESC = "AQO - adaptive query optimization" MODULES = aqo OBJS = aqo.o auto_tuning.o cardinality_estimation.o cardinality_hooks.o \ hash.o machine_learning.o path_utils.o postprocessing.o preprocessing.o \ -selectivity_cache.o storage.o utils.o $(WIN32RES) +selectivity_cache.o storage.o utils.o ignorance.o $(WIN32RES) + +TAP_TESTS = 1 REGRESS = aqo_disabled \ aqo_controlled \ @@ -15,7 +17,8 @@ REGRESS = aqo_disabled \ aqo_learn \ schema \ aqo_fdw \ - aqo_CVE-2020-14350 + aqo_CVE-2020-14350 \ + gucs fdw_srcdir = $(top_srcdir)/contrib/postgres_fdw PG_CPPFLAGS += -I$(libpq_srcdir) -I$(fdw_srcdir) diff --git a/aqo.c b/aqo.c index 10bc23ef..b35bc2fc 100644 --- a/aqo.c +++ b/aqo.c @@ -9,15 +9,33 @@ */ #include "aqo.h" +#include "ignorance.h" + +#include "access/relation.h" +#include "access/table.h" +#include "catalog/pg_extension.h" +#include "commands/extension.h" PG_MODULE_MAGIC; void _PG_init(void); +#define AQO_MODULE_MAGIC (1234) /* Strategy of determining feature space for new queries. */ int aqo_mode; bool force_collect_stat; + +/* + * Show special info in EXPLAIN mode. + * + * aqo_show_hash - show query class (hash) and a feature space value (hash) + * of each plan node. This is instance-dependent value and can't be used + * in regression and TAP tests. + * + * aqo_show_details - show AQO settings for this class and prediction + * for each plan node. + */ bool aqo_show_hash; bool aqo_show_details; @@ -106,7 +124,8 @@ _PG_init(void) 0, NULL, NULL, - NULL); + NULL + ); DefineCustomBoolVariable( "aqo.force_collect_stat", @@ -147,6 +166,19 @@ _PG_init(void) NULL ); + DefineCustomBoolVariable( + "aqo.log_ignorance", + "Log in a special table all feature spaces for which the AQO prediction was not successful.", + NULL, + &aqo_log_ignorance, + false, + PGC_SUSET, + 0, + NULL, + set_ignorance, + NULL + ); + prev_planner_hook = planner_hook; planner_hook = aqo_planner; prev_post_parse_analyze_hook = post_parse_analyze_hook; @@ -191,3 +223,63 @@ invalidate_deactivated_queries_cache(PG_FUNCTION_ARGS) init_deactivated_queries_storage(); PG_RETURN_POINTER(NULL); } + +/* + * Return AQO schema's Oid or InvalidOid if that's not possible. + */ +Oid +get_aqo_schema(void) +{ + Oid result; + Relation rel; + SysScanDesc scandesc; + HeapTuple tuple; + ScanKeyData entry[1]; + Oid ext_oid; + + /* It's impossible to fetch pg_aqo's schema now */ + if (!IsTransactionState()) + return InvalidOid; + + ext_oid = get_extension_oid("aqo", true); + if (ext_oid == InvalidOid) + return InvalidOid; /* exit if pg_aqo does not exist */ + + ScanKeyInit(&entry[0], +#if PG_VERSION_NUM >= 120000 + Anum_pg_extension_oid, +#else + ObjectIdAttributeNumber, +#endif + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(ext_oid)); + + rel = relation_open(ExtensionRelationId, AccessShareLock); + scandesc = systable_beginscan(rel, ExtensionOidIndexId, true, + NULL, 1, entry); + tuple = systable_getnext(scandesc); + + /* We assume that there can be at most one matching tuple */ + if (HeapTupleIsValid(tuple)) + result = ((Form_pg_extension) GETSTRUCT(tuple))->extnamespace; + else + result = InvalidOid; + + systable_endscan(scandesc); + relation_close(rel, AccessShareLock); + return result; +} + +/* + * Init userlock + */ +void +init_lock_tag(LOCKTAG *tag, uint32 key1, uint32 key2) +{ + tag->locktag_field1 = AQO_MODULE_MAGIC; + tag->locktag_field2 = key1; + tag->locktag_field3 = key2; + tag->locktag_field4 = 0; + tag->locktag_type = LOCKTAG_USERLOCK; + tag->locktag_lockmethodid = USER_LOCKMETHOD; +} diff --git a/aqo.h b/aqo.h index 0a5c209b..1b37a3a7 100644 --- a/aqo.h +++ b/aqo.h @@ -267,6 +267,7 @@ extern get_parameterized_joinrel_size_hook_type prev_get_parameterized_joinrel_size_hook; extern copy_generic_path_info_hook_type prev_copy_generic_path_info_hook; extern ExplainOnePlan_hook_type prev_ExplainOnePlan_hook; +extern ExplainOneNode_hook_type prev_ExplainOneNode_hook; extern void ppi_hook(ParamPathInfo *ppi); @@ -281,44 +282,42 @@ int get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash); /* Storage interaction */ -bool find_query(int query_hash, - Datum *search_values, - bool *search_nulls); -bool add_query(int query_hash, bool learn_aqo, bool use_aqo, - int fspace_hash, bool auto_tuning); -bool update_query(int query_hash, bool learn_aqo, bool use_aqo, - int fspace_hash, bool auto_tuning); -bool add_query_text(int query_hash, const char *query_text); -bool load_fss(int fss_hash, int ncols, - double **matrix, double *targets, int *rows); -extern bool update_fss(int fss_hash, int nrows, int ncols, +extern bool find_query(int qhash, Datum *search_values, bool *search_nulls); +extern bool update_query(int qhash, int fhash, + bool learn_aqo, bool use_aqo, bool auto_tuning); +extern bool add_query_text(int query_hash, const char *query_text); +extern bool load_fss(int fhash, int fss_hash, + int ncols, double **matrix, double *targets, int *rows); +extern bool update_fss(int fhash, int fss_hash, int nrows, int ncols, double **matrix, double *targets); QueryStat *get_aqo_stat(int query_hash); void update_aqo_stat(int query_hash, QueryStat * stat); +extern bool my_index_insert(Relation indexRelation, Datum *values, bool *isnull, + ItemPointer heap_t_ctid, Relation heapRelation, + IndexUniqueCheck checkUnique); void init_deactivated_queries_storage(void); void fini_deactivated_queries_storage(void); bool query_is_deactivated(int query_hash); void add_deactivated_query(int query_hash); /* Query preprocessing hooks */ -void get_query_text(ParseState *pstate, Query *query); -PlannedStmt *call_default_planner(Query *parse, - const char *query_string, - int cursorOptions, - ParamListInfo boundParams); -PlannedStmt *aqo_planner(Query *parse, - const char *query_string, - int cursorOptions, - ParamListInfo boundParams); -void print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, - ExplainState *es, const char *queryString, - ParamListInfo params, const instr_time *planduration, - QueryEnvironment *queryEnv); -extern void print_node_explain(ExplainState *es, - PlanState *ps, - Plan *plan, +extern void get_query_text(ParseState *pstate, Query *query); +extern PlannedStmt *call_default_planner(Query *parse, + const char *query_string, + int cursorOptions, + ParamListInfo boundParams); +extern PlannedStmt *aqo_planner(Query *parse, + const char *query_string, + int cursorOptions, + ParamListInfo boundParams); +extern void print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, + ExplainState *es, const char *queryString, + ParamListInfo params, + const instr_time *planduration, + QueryEnvironment *queryEnv); +extern void print_node_explain(ExplainState *es, PlanState *ps, Plan *plan, double rows); -void disable_aqo_for_query(void); +extern void disable_aqo_for_query(void); /* Cardinality estimation hooks */ extern void aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel); @@ -364,7 +363,7 @@ extern int OkNNr_learn(int matrix_rows, int matrix_cols, double *features, double target); /* Automatic query tuning */ -void automatical_query_tuning(int query_hash, QueryStat * stat); +extern void automatical_query_tuning(int query_hash, QueryStat * stat); /* Utilities */ int int_cmp(const void *a, const void *b); @@ -376,11 +375,13 @@ QueryStat *palloc_query_stat(void); void pfree_query_stat(QueryStat *stat); /* Selectivity cache for parametrized baserels */ -void cache_selectivity(int clause_hash, - int relid, - int global_relid, - double selectivity); -double *selectivity_cache_find_global_relid(int clause_hash, int global_relid); -void selectivity_cache_clear(void); +extern void cache_selectivity(int clause_hash, int relid, int global_relid, + double selectivity); +extern double *selectivity_cache_find_global_relid(int clause_hash, + int global_relid); +extern void selectivity_cache_clear(void); + +extern Oid get_aqo_schema(void); +extern void init_lock_tag(LOCKTAG *tag, uint32 key1, uint32 key2); #endif diff --git a/auto_tuning.c b/auto_tuning.c index dae92e48..b82b415b 100644 --- a/auto_tuning.c +++ b/auto_tuning.c @@ -187,8 +187,11 @@ automatical_query_tuning(int query_hash, QueryStat * stat) } if (num_iterations <= auto_tuning_max_iterations || p_use > 0.5) - update_query(query_hash, query_context.learn_aqo, query_context.use_aqo, - query_context.fspace_hash, true); + update_query(query_hash, + query_context.fspace_hash, + query_context.learn_aqo, + query_context.use_aqo, + true); else - update_query(query_hash, false, false, query_context.fspace_hash, false); + update_query(query_hash, query_context.fspace_hash, false, false, false); } diff --git a/cardinality_estimation.c b/cardinality_estimation.c index 6483a3ec..3b4dda09 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -40,7 +40,8 @@ predict_for_relation(List *restrict_clauses, List *selectivities, for (i = 0; i < aqo_K; ++i) matrix[i] = palloc0(sizeof(**matrix) * nfeatures); - if (load_fss(*fss_hash, nfeatures, matrix, targets, &rows)) + if (load_fss(query_context.fspace_hash, *fss_hash, nfeatures, matrix, + targets, &rows)) result = OkNNr_predict(rows, nfeatures, matrix, targets, features); else { diff --git a/cardinality_hooks.c b/cardinality_hooks.c index 977e894c..dd631161 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -156,7 +156,8 @@ aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) relids = list_make1_int(relid); restrict_clauses = list_copy(rel->baserestrictinfo); - predicted = predict_for_relation(restrict_clauses, selectivities, relids, &fss); + predicted = predict_for_relation(restrict_clauses, selectivities, + relids, &fss); rel->fss_hash = fss; if (predicted >= 0) diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out index 040c004a..c7cb734d 100644 --- a/expected/aqo_fdw.out +++ b/expected/aqo_fdw.out @@ -128,4 +128,10 @@ SELECT * FROM frgn AS a, frgn AS b WHERE a.xpredicted_cardinality < 0.) + { + char nodestr[1024]; + char *qplan = nodeToString(plan); + + memset(nodestr, 0, 1024); + strncpy(nodestr, qplan, 1023); + pfree(qplan); + + /* + * AQO failed to predict cardinality for this node. + */ + values[0] = Int32GetDatum(qhash); + values[1] = Int32GetDatum(fhash); + values[2] = Int32GetDatum(fss_hash); + values[3] = Int32GetDatum(nodeTag(plan)); + values[4] = CStringGetTextDatum(nodestr); + tuple = heap_form_tuple(tupDesc, values, isnull); + + simple_heap_insert(hrel, tuple); + my_index_insert(irel, values, isnull, &(tuple->t_self), + hrel, UNIQUE_CHECK_YES); + } + else + { + /* AQO works as expected. */ + } + } + else if (!TransactionIdIsValid(snap.xmin) && + !TransactionIdIsValid(snap.xmax)) + { + /* + * AQO made prediction for this node. Delete it from the ignorance + * table. + */ + tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); + Assert(shouldFree != true); + simple_heap_delete(hrel, &(tuple->t_self)); + } + else + { + /* + * The data exists. We can't do anything for now. + */ + } + + ExecDropSingleTupleTableSlot(slot); + index_endscan(scan); + index_close(irel, RowExclusiveLock); + table_close(hrel, RowExclusiveLock); + + CommandCounterIncrement(); + LockRelease(&tag, ExclusiveLock, false); +} diff --git a/ignorance.h b/ignorance.h new file mode 100644 index 00000000..bceb855b --- /dev/null +++ b/ignorance.h @@ -0,0 +1,12 @@ +#ifndef IGNORANCE_H +#define IGNORANCE_H + +#include "postgres.h" + +extern bool aqo_log_ignorance; + +extern void set_ignorance(bool newval, void *extra); +extern bool create_ignorance_table(bool fail_ok); +extern void update_ignorance(int qhash, int fhash, int fss_hash, Plan *plan); + +#endif /* IGNORANCE_H */ diff --git a/postprocessing.c b/postprocessing.c index 475c791f..c9fc3280 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -17,11 +17,14 @@ */ #include "aqo.h" +#include "ignorance.h" + #include "access/parallel.h" #include "optimizer/optimizer.h" #include "postgres_fdw.h" #include "utils/queryenvironment.h" + typedef struct { List *clauselist; @@ -41,29 +44,30 @@ static char *PlanStateInfo = "PlanStateInfo"; /* Query execution statistics collecting utilities */ -static void atomic_fss_learn_step(int fss_hash, int ncols, - double **matrix, double *targets, - double *features, double target); +static void atomic_fss_learn_step(int fhash, int fss_hash, int ncols, + double **matrix, double *targets, + double *features, double target); static void learn_sample(List *clauselist, - List *selectivities, - List *relidslist, - double true_cardinality, - double predicted_cardinality); + List *selectivities, + List *relidslist, + double true_cardinality, + Plan *plan); static List *restore_selectivities(List *clauselist, - List *relidslist, - JoinType join_type, - bool was_parametrized); + List *relidslist, + JoinType join_type, + bool was_parametrized); static void update_query_stat_row(double *et, int *et_size, - double *pt, int *pt_size, - double *ce, int *ce_size, - double planning_time, - double execution_time, - double cardinality_error, - int64 *n_exec); -static void StoreToQueryContext(QueryDesc *queryDesc); + double *pt, int *pt_size, + double *ce, int *ce_size, + double planning_time, + double execution_time, + double cardinality_error, + int64 *n_exec); +static void StoreToQueryEnv(QueryDesc *queryDesc); static void StorePlanInternals(QueryDesc *queryDesc); -static bool ExtractFromQueryContext(QueryDesc *queryDesc); -static void RemoveFromQueryContext(QueryDesc *queryDesc); +static bool ExtractFromQueryEnv(QueryDesc *queryDesc); +static void RemoveFromQueryEnv(QueryDesc *queryDesc); + /* * This is the critical section: only one runner is allowed to be inside this @@ -71,17 +75,23 @@ static void RemoveFromQueryContext(QueryDesc *queryDesc); * matrix and targets are just preallocated memory for computations. */ static void -atomic_fss_learn_step(int fss_hash, int ncols, - double **matrix, double *targets, - double *features, double target) +atomic_fss_learn_step(int fhash, int fss_hash, int ncols, + double **matrix, double *targets, + double *features, double target) { - int nrows; + LOCKTAG tag; + int nrows; - if (!load_fss(fss_hash, ncols, matrix, targets, &nrows)) + init_lock_tag(&tag, (uint32) fhash, (uint32) fss_hash); + LockAcquire(&tag, ExclusiveLock, false, false); + + if (!load_fss(fhash, fss_hash, ncols, matrix, targets, &nrows)) nrows = 0; nrows = OkNNr_learn(nrows, ncols, matrix, targets, features, target); - update_fss(fss_hash, nrows, ncols, matrix, targets); + update_fss(fhash, fss_hash, nrows, ncols, matrix, targets); + + LockRelease(&tag, ExclusiveLock, false); } /* @@ -90,36 +100,38 @@ atomic_fss_learn_step(int fss_hash, int ncols, */ static void learn_sample(List *clauselist, List *selectivities, List *relidslist, - double true_cardinality, double predicted_cardinality) + double true_cardinality, Plan *plan) { - int fss_hash; - int nfeatures; - double *matrix[aqo_K]; - double targets[aqo_K]; - double *features; - double target; - int i; + int fhash = query_context.fspace_hash; + int fss_hash; + int nfeatures; + double *matrix[aqo_K]; + double targets[aqo_K]; + double *features; + double target; + int i; -/* - * Suppress the optimization for debug purposes. - if (fabs(log(predicted_cardinality) - log(true_cardinality)) < - object_selection_prediction_threshold) - { - return; - } -*/ target = log(true_cardinality); - fss_hash = get_fss_for_object(clauselist, selectivities, relidslist, - &nfeatures, &features); + &nfeatures, &features); + + if (aqo_log_ignorance /* && load_fss(fhash, fss_hash, 0, NULL, NULL, NULL) */) + { + /* + * If ignorance logging is enabled and the feature space was existed in + * the ML knowledge base, log this issue. + */ + update_ignorance(query_context.query_hash, fhash, fss_hash, plan); + } if (nfeatures > 0) for (i = 0; i < aqo_K; ++i) matrix[i] = palloc(sizeof(double) * nfeatures); - /* Here should be critical section */ - atomic_fss_learn_step(fss_hash, nfeatures, matrix, targets, features, target); - /* Here should be the end of critical section */ + /* Critical section */ + atomic_fss_learn_step(fhash, fss_hash, + nfeatures, matrix, targets, features, target); + /* End of critical section */ if (nfeatures > 0) for (i = 0; i < aqo_K; ++i) @@ -332,13 +344,14 @@ learnOnPlanState(PlanState *p, void *context) if (ctx->learn) learn_sample(SubplanCtx.clauselist, SubplanCtx.selectivities, - p->plan->path_relids, learn_rows, predicted); + p->plan->path_relids, learn_rows, + p->plan); } } ctx->clauselist = list_concat(ctx->clauselist, SubplanCtx.clauselist); ctx->selectivities = list_concat(ctx->selectivities, - SubplanCtx.selectivities); + SubplanCtx.selectivities); return false; } @@ -414,7 +427,7 @@ aqo_ExecutorStart(QueryDesc *queryDesc, int eflags) queryDesc->instrument_options |= INSTRUMENT_ROWS; /* Save all query-related parameters into the query context. */ - StoreToQueryContext(queryDesc); + StoreToQueryEnv(queryDesc); } if (prev_ExecutorStart_hook) @@ -440,11 +453,12 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) QueryStat *stat = NULL; instr_time endtime; EphemeralNamedRelation enr = get_ENR(queryDesc->queryEnv, PlanStateInfo); + LOCKTAG tag; cardinality_sum_errors = 0.; cardinality_num_objects = 0; - if (!ExtractFromQueryContext(queryDesc)) + if (!ExtractFromQueryEnv(queryDesc)) /* AQO keep all query-related preferences at the query context. * It is needed to prevent from possible recursive changes, at * preprocessing stage of subqueries. @@ -474,6 +488,11 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) list_free(ctx.selectivities); } + /* Prevent concurrent updates. */ + init_lock_tag(&tag, (uint32) query_context.query_hash, + (uint32) query_context.fspace_hash); + LockAcquire(&tag, ExclusiveLock, false, false); + if (query_context.collect_stat) { INSTR_TIME_SET_CURRENT(endtime); @@ -490,26 +509,26 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) { if (query_context.use_aqo) update_query_stat_row(stat->execution_time_with_aqo, - &stat->execution_time_with_aqo_size, - stat->planning_time_with_aqo, - &stat->planning_time_with_aqo_size, - stat->cardinality_error_with_aqo, - &stat->cardinality_error_with_aqo_size, - query_context.query_planning_time, - totaltime - query_context.query_planning_time, - cardinality_error, - &stat->executions_with_aqo); + &stat->execution_time_with_aqo_size, + stat->planning_time_with_aqo, + &stat->planning_time_with_aqo_size, + stat->cardinality_error_with_aqo, + &stat->cardinality_error_with_aqo_size, + query_context.query_planning_time, + totaltime - query_context.query_planning_time, + cardinality_error, + &stat->executions_with_aqo); else update_query_stat_row(stat->execution_time_without_aqo, - &stat->execution_time_without_aqo_size, - stat->planning_time_without_aqo, - &stat->planning_time_without_aqo_size, - stat->cardinality_error_without_aqo, - &stat->cardinality_error_without_aqo_size, - query_context.query_planning_time, - totaltime - query_context.query_planning_time, - cardinality_error, - &stat->executions_without_aqo); + &stat->execution_time_without_aqo_size, + stat->planning_time_without_aqo, + &stat->planning_time_without_aqo_size, + stat->cardinality_error_without_aqo, + &stat->cardinality_error_without_aqo_size, + query_context.query_planning_time, + totaltime - query_context.query_planning_time, + cardinality_error, + &stat->executions_without_aqo); } } selectivity_cache_clear(); @@ -525,7 +544,9 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) update_aqo_stat(query_context.fspace_hash, stat); pfree_query_stat(stat); } - RemoveFromQueryContext(queryDesc); + + LockRelease(&tag, ExclusiveLock, false); + RemoveFromQueryEnv(queryDesc); end: if (prev_ExecutorEnd_hook) @@ -561,7 +582,7 @@ aqo_copy_generic_path_info(PlannerInfo *root, Plan *dest, Path *src) * path_parallel_workers, and was_parameterized. */ Assert(dest->path_clauses && dest->path_jointype && - dest->path_relids && dest->path_parallel_workers); + dest->path_relids && dest->path_parallel_workers); return; } @@ -644,7 +665,7 @@ aqo_copy_generic_path_info(PlannerInfo *root, Plan *dest, Path *src) * top-level query. */ static void -StoreToQueryContext(QueryDesc *queryDesc) +StoreToQueryEnv(QueryDesc *queryDesc) { EphemeralNamedRelation enr; int qcsize = sizeof(QueryContextData); @@ -712,7 +733,7 @@ StorePlanInternals(QueryDesc *queryDesc) * Restore AQO data, related to the query. */ static bool -ExtractFromQueryContext(QueryDesc *queryDesc) +ExtractFromQueryEnv(QueryDesc *queryDesc) { EphemeralNamedRelation enr; @@ -735,7 +756,7 @@ ExtractFromQueryContext(QueryDesc *queryDesc) } static void -RemoveFromQueryContext(QueryDesc *queryDesc) +RemoveFromQueryEnv(QueryDesc *queryDesc) { EphemeralNamedRelation enr = get_ENR(queryDesc->queryEnv, AQOPrivateData); unregister_ENR(queryDesc->queryEnv, AQOPrivateData); @@ -787,6 +808,9 @@ print_node_explain(ExplainState *es, PlanState *ps, Plan *plan, double rows) if (aqo_show_hash) appendStringInfo(es->str, ", fss hash = %d", plan->fss_hash); appendStringInfoChar(es->str, ')'); + + if (prev_ExplainOneNode_hook) + prev_ExplainOneNode_hook(es, ps, plan, rows); } /* @@ -834,7 +858,7 @@ print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, } /* - * Query hash provides an user the conveniently use of the AQO + * Query class provides an user the conveniently use of the AQO * auxiliary functions. */ if (aqo_mode != AQO_MODE_DISABLED || force_collect_stat) diff --git a/preprocessing.c b/preprocessing.c index ecfaf1a7..3ef0ac20 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -124,6 +124,7 @@ aqo_planner(Query *parse, bool query_is_stored; Datum query_params[5]; bool query_nulls[5] = {false, false, false, false, false}; + LOCKTAG tag; selectivity_cache_clear(); @@ -163,6 +164,13 @@ aqo_planner(Query *parse, boundParams); } + /* + * find-add query and query text must be atomic operation to prevent + * concurrent insertions. + */ + init_lock_tag(&tag, (uint32) query_context.query_hash, (uint32) 0); + LockAcquire(&tag, ExclusiveLock, false, false); + query_is_stored = find_query(query_context.query_hash, &query_params[0], &query_nulls[0]); @@ -217,9 +225,18 @@ aqo_planner(Query *parse, if (query_context.adding_query || force_collect_stat) { - add_query(query_context.query_hash, query_context.learn_aqo, - query_context.use_aqo, query_context.fspace_hash, - query_context.auto_tuning); + /* + * Add query into the AQO knowledge base. To process an error with + * concurrent addition from another backend we will try to restart + * preprocessing routine. + */ + update_query(query_context.query_hash, + query_context.fspace_hash, + query_context.learn_aqo, + query_context.use_aqo, + query_context.auto_tuning); + + add_query_text(query_context.query_hash, query_text); } } @@ -273,6 +290,8 @@ aqo_planner(Query *parse, } } + LockRelease(&tag, ExclusiveLock, false); + /* * This mode is possible here, because force collect statistics uses AQO * machinery. @@ -319,6 +338,22 @@ isQueryUsingSystemRelation(Query *query) return isQueryUsingSystemRelation_walker((Node *) query, NULL); } +static bool +IsAQORelation(Relation rel) +{ + char *relname = NameStr(rel->rd_rel->relname); + + if (strcmp(relname, "aqo_data") == 0 || + strcmp(relname, "aqo_query_texts") == 0 || + strcmp(relname, "aqo_query_stat") == 0 || + strcmp(relname, "aqo_queries") == 0 || + strcmp(relname, "aqo_ignorance") == 0 + ) + return true; + + return false; +} + bool isQueryUsingSystemRelation_walker(Node *node, void *context) { @@ -338,9 +373,10 @@ isQueryUsingSystemRelation_walker(Node *node, void *context) { Relation rel = table_open(rte->relid, AccessShareLock); bool is_catalog = IsCatalogRelation(rel); + bool is_aqo_rel = IsAQORelation(rel); table_close(rel, AccessShareLock); - if (is_catalog) + if (is_catalog || is_aqo_rel) return true; } } diff --git a/sql/aqo_fdw.sql b/sql/aqo_fdw.sql index fc3d9115..38e31ea1 100644 --- a/sql/aqo_fdw.sql +++ b/sql/aqo_fdw.sql @@ -52,5 +52,7 @@ SELECT * FROM frgn AS a, frgn AS b WHERE a.xheapRelation->rd_att, - &TTSOpsBufferHeapTuple); - find_ok = index_getnext_slot(query_index_scan, ForwardScanDirection, slot); + index_rescan(scan, &key, 1, NULL, 0); + slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); + find_ok = index_getnext_slot(scan, ForwardScanDirection, slot); if (find_ok) { tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); Assert(shouldFree != true); - heap_deform_tuple(tuple, aqo_queries_heap->rd_att, - search_values, search_nulls); + heap_deform_tuple(tuple, hrel->rd_att, search_values, search_nulls); } ExecDropSingleTupleTableSlot(slot); - index_endscan(query_index_scan); - index_close(query_index_rel, lockmode); - table_close(aqo_queries_heap, lockmode); + index_endscan(scan); + index_close(irel, AccessShareLock); + table_close(hrel, AccessShareLock); return find_ok; } /* - * Creates entry for new query in aqo_queries table with given fields. - * Returns false if the operation failed, true otherwise. + * Update query status in intelligent mode. + * + * Do it gently: to prevent possible deadlocks, revert this update if any + * concurrent transaction is doing it. + * + * Such logic is possible, because this update is performed by AQO itself. It is + * not break any learning logic besides possible additional learning iterations. */ bool -add_query(int query_hash, bool learn_aqo, bool use_aqo, - int fspace_hash, bool auto_tuning) +update_query(int qhash, int fhash, + bool learn_aqo, bool use_aqo, bool auto_tuning) { - RangeVar *aqo_queries_table_rv; - Relation aqo_queries_heap; - HeapTuple tuple; - - LOCKMODE lockmode = RowExclusiveLock; - - Datum values[5]; - bool nulls[5] = {false, false, false, false, false}; - - Relation query_index_rel; - Oid query_index_rel_oid; - - values[0] = Int32GetDatum(query_hash); - values[1] = BoolGetDatum(learn_aqo); - values[2] = BoolGetDatum(use_aqo); - values[3] = Int32GetDatum(fspace_hash); - values[4] = BoolGetDatum(auto_tuning); - - query_index_rel_oid = RelnameGetRelid("aqo_queries_query_hash_idx"); - if (!OidIsValid(query_index_rel_oid)) - { - disable_aqo_for_query(); - return false; - } - query_index_rel = index_open(query_index_rel_oid, lockmode); - - aqo_queries_table_rv = makeRangeVar("public", "aqo_queries", -1); - aqo_queries_heap = table_openrv(aqo_queries_table_rv, lockmode); - - tuple = heap_form_tuple(RelationGetDescr(aqo_queries_heap), - values, nulls); - PG_TRY(); - { - simple_heap_insert(aqo_queries_heap, tuple); - my_index_insert(query_index_rel, - values, nulls, - &(tuple->t_self), - aqo_queries_heap, - UNIQUE_CHECK_YES); - } - PG_CATCH(); - { - /* - * Main goal is to catch deadlock errors during the index insertion. - */ - CommandCounterIncrement(); - simple_heap_delete(aqo_queries_heap, &(tuple->t_self)); - PG_RE_THROW(); - } - PG_END_TRY(); - - index_close(query_index_rel, lockmode); - table_close(aqo_queries_heap, lockmode); - - CommandCounterIncrement(); - - return true; -} - -bool -update_query(int query_hash, bool learn_aqo, bool use_aqo, - int fspace_hash, bool auto_tuning) -{ - RangeVar *aqo_queries_table_rv; - Relation aqo_queries_heap; + RangeVar *rv; + Relation hrel; + Relation irel; + TupleTableSlot *slot; HeapTuple tuple, nw_tuple; - - TupleTableSlot *slot; - bool shouldFree; - bool find_ok = false; - bool update_indexes; - - LOCKMODE lockmode = RowExclusiveLock; - - Relation query_index_rel; - Oid query_index_rel_oid; - IndexScanDesc query_index_scan; - ScanKeyData key; - Datum values[5]; bool isnull[5] = { false, false, false, false, false }; bool replace[5] = { false, true, true, true, true }; + bool shouldFree; + bool result = true; + bool update_indexes; + Oid reloid; + IndexScanDesc scan; + ScanKeyData key; + SnapshotData snap; - query_index_rel_oid = RelnameGetRelid("aqo_queries_query_hash_idx"); - if (!OidIsValid(query_index_rel_oid)) + reloid = RelnameGetRelid("aqo_queries_query_hash_idx"); + if (!OidIsValid(reloid)) { disable_aqo_for_query(); return false; } - aqo_queries_table_rv = makeRangeVar("public", "aqo_queries", -1); - aqo_queries_heap = table_openrv(aqo_queries_table_rv, lockmode); - - query_index_rel = index_open(query_index_rel_oid, lockmode); - query_index_scan = index_beginscan(aqo_queries_heap, - query_index_rel, - SnapshotSelf, - 1, - 0); - - ScanKeyInit(&key, - 1, - BTEqualStrategyNumber, - F_INT4EQ, - Int32GetDatum(query_hash)); - - index_rescan(query_index_scan, &key, 1, NULL, 0); - slot = MakeSingleTupleTableSlot(query_index_scan->heapRelation->rd_att, - &TTSOpsBufferHeapTuple); - find_ok = index_getnext_slot(query_index_scan, ForwardScanDirection, slot); - if (!find_ok) - elog(PANIC, "[AQO]: Update of non-existed query: query hash: %d, fss hash: %d, use aqo: %s", - query_hash, fspace_hash, use_aqo ? "true" : "false"); + rv = makeRangeVar("public", "aqo_queries", -1); + hrel = table_openrv(rv, RowExclusiveLock); + irel = index_open(reloid, RowExclusiveLock); - tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); - Assert(shouldFree != true); + /* + * Start an index scan. Use dirty snapshot to check concurrent updates that + * can be made before, but still not visible. + */ + InitDirtySnapshot(snap); + scan = index_beginscan(hrel, irel, &snap, 1, 0); + ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(qhash)); - heap_deform_tuple(tuple, aqo_queries_heap->rd_att, - values, isnull); + index_rescan(scan, &key, 1, NULL, 0); + slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); + values[0] = Int32GetDatum(qhash); values[1] = BoolGetDatum(learn_aqo); values[2] = BoolGetDatum(use_aqo); - values[3] = Int32GetDatum(fspace_hash); + values[3] = Int32GetDatum(fhash); values[4] = BoolGetDatum(auto_tuning); - nw_tuple = heap_modify_tuple(tuple, aqo_queries_heap->rd_att, - values, isnull, replace); - if (my_simple_heap_update(aqo_queries_heap, &(nw_tuple->t_self), nw_tuple, - &update_indexes)) + if (!index_getnext_slot(scan, ForwardScanDirection, slot)) { - if (update_indexes) - my_index_insert(query_index_rel, values, isnull, - &(nw_tuple->t_self), - aqo_queries_heap, UNIQUE_CHECK_YES); + /* New tuple for the ML knowledge base */ + tuple = heap_form_tuple(RelationGetDescr(hrel), values, isnull); + simple_heap_insert(hrel, tuple); + my_index_insert(irel, values, isnull, &(tuple->t_self), + hrel, UNIQUE_CHECK_YES); + } + else if (!TransactionIdIsValid(snap.xmin) && + !TransactionIdIsValid(snap.xmax)) + { + /* + * Update existed data. No one concurrent transaction doesn't update this + * right now. + */ + tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); + Assert(shouldFree != true); + nw_tuple = heap_modify_tuple(tuple, hrel->rd_att, values, isnull, replace); + + if (my_simple_heap_update(hrel, &(nw_tuple->t_self), nw_tuple, + &update_indexes)) + { + if (update_indexes) + my_index_insert(irel, values, isnull, + &(nw_tuple->t_self), + hrel, UNIQUE_CHECK_YES); + result = true; + } + else + { + /* + * Ooops, somebody concurrently updated the tuple. It is possible + * only in the case of changes made by third-party code. + */ + elog(ERROR, "AQO feature space data for signature (%d, %d) concurrently" + " updated by a stranger backend.", + qhash, fhash); + result = false; + } } else { /* - * Ooops, somebody concurrently updated the tuple. We have to merge - * our changes somehow, but now we just discard ours. We don't believe - * in high probability of simultaneously finishing of two long, - * complex, and important queries, so we don't loss important data. + * Concurrent update was made. To prevent deadlocks refuse to update. */ + result = false; } ExecDropSingleTupleTableSlot(slot); - index_endscan(query_index_scan); - index_close(query_index_rel, lockmode); - table_close(aqo_queries_heap, lockmode); + index_endscan(scan); + index_close(irel, RowExclusiveLock); + table_close(hrel, RowExclusiveLock); CommandCounterIncrement(); - - return true; + return result; } /* @@ -284,64 +215,39 @@ update_query(int query_hash, bool learn_aqo, bool use_aqo, * Returns false if the operation failed, true otherwise. */ bool -add_query_text(int query_hash, const char *query_text) +add_query_text(int qhash, const char *query_text) { - RangeVar *aqo_query_texts_table_rv; - Relation aqo_query_texts_heap; + RangeVar *rv; + Relation hrel; + Relation irel; HeapTuple tuple; - - LOCKMODE lockmode = RowExclusiveLock; - Datum values[2]; bool isnull[2] = {false, false}; + Oid reloid; - Relation query_index_rel; - Oid query_index_rel_oid; - - values[0] = Int32GetDatum(query_hash); + values[0] = Int32GetDatum(qhash); values[1] = CStringGetTextDatum(query_text); - query_index_rel_oid = RelnameGetRelid("aqo_query_texts_query_hash_idx"); - if (!OidIsValid(query_index_rel_oid)) + reloid = RelnameGetRelid("aqo_query_texts_query_hash_idx"); + if (!OidIsValid(reloid)) { disable_aqo_for_query(); return false; } - query_index_rel = index_open(query_index_rel_oid, lockmode); - - aqo_query_texts_table_rv = makeRangeVar("public", - "aqo_query_texts", - -1); - aqo_query_texts_heap = table_openrv(aqo_query_texts_table_rv, - lockmode); - tuple = heap_form_tuple(RelationGetDescr(aqo_query_texts_heap), - values, isnull); + rv = makeRangeVar("public", "aqo_query_texts", -1); + hrel = table_openrv(rv, RowExclusiveLock); + irel = index_open(reloid, RowExclusiveLock); + tuple = heap_form_tuple(RelationGetDescr(hrel), values, isnull); - PG_TRY(); - { - simple_heap_insert(aqo_query_texts_heap, tuple); - my_index_insert(query_index_rel, - values, isnull, - &(tuple->t_self), - aqo_query_texts_heap, - UNIQUE_CHECK_YES); - } - PG_CATCH(); - { - CommandCounterIncrement(); - simple_heap_delete(aqo_query_texts_heap, &(tuple->t_self)); - index_close(query_index_rel, lockmode); - table_close(aqo_query_texts_heap, lockmode); - PG_RE_THROW(); - } - PG_END_TRY(); + simple_heap_insert(hrel, tuple); + my_index_insert(irel, values, isnull, &(tuple->t_self), hrel, + UNIQUE_CHECK_YES); - index_close(query_index_rel, lockmode); - table_close(aqo_query_texts_heap, lockmode); + index_close(irel, RowExclusiveLock); + table_close(hrel, RowExclusiveLock); CommandCounterIncrement(); - return true; } @@ -360,67 +266,52 @@ add_query_text(int query_hash, const char *query_text) * objects in the given feature space */ bool -load_fss(int fss_hash, int ncols, double **matrix, double *targets, int *rows) +load_fss(int fhash, int fss_hash, + int ncols, double **matrix, double *targets, int *rows) { - RangeVar *aqo_data_table_rv; - Relation aqo_data_heap; + RangeVar *rv; + Relation hrel; + Relation irel; HeapTuple tuple; TupleTableSlot *slot; bool shouldFree; bool find_ok = false; - - Relation data_index_rel; - Oid data_index_rel_oid; - IndexScanDesc data_index_scan; + Oid reloid; + IndexScanDesc scan; ScanKeyData key[2]; - - LOCKMODE lockmode = AccessShareLock; - Datum values[5]; bool isnull[5]; - bool success = true; - data_index_rel_oid = RelnameGetRelid("aqo_fss_access_idx"); - if (!OidIsValid(data_index_rel_oid)) + reloid = RelnameGetRelid("aqo_fss_access_idx"); + if (!OidIsValid(reloid)) { disable_aqo_for_query(); return false; } - aqo_data_table_rv = makeRangeVar("public", "aqo_data", -1); - aqo_data_heap = table_openrv(aqo_data_table_rv, lockmode); - - data_index_rel = index_open(data_index_rel_oid, lockmode); - data_index_scan = index_beginscan(aqo_data_heap, - data_index_rel, - SnapshotSelf, - 2, - 0); - - ScanKeyInit(&key[0], - 1, - BTEqualStrategyNumber, - F_INT4EQ, - Int32GetDatum(query_context.fspace_hash)); + rv = makeRangeVar("public", "aqo_data", -1); + hrel = table_openrv(rv, AccessShareLock); + irel = index_open(reloid, AccessShareLock); + scan = index_beginscan(hrel, irel, SnapshotSelf, 2, 0); - ScanKeyInit(&key[1], - 2, - BTEqualStrategyNumber, - F_INT4EQ, - Int32GetDatum(fss_hash)); + ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fhash)); + ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fss_hash)); + index_rescan(scan, key, 2, NULL, 0); - index_rescan(data_index_scan, key, 2, NULL, 0); + slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); + find_ok = index_getnext_slot(scan, ForwardScanDirection, slot); - slot = MakeSingleTupleTableSlot(data_index_scan->heapRelation->rd_att, - &TTSOpsBufferHeapTuple); - find_ok = index_getnext_slot(data_index_scan, ForwardScanDirection, slot); - - if (find_ok) + if (matrix == NULL && targets == NULL && rows == NULL) + { + /* Just check availability */ + success = find_ok; + } + else if (find_ok) { tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); Assert(shouldFree != true); - heap_deform_tuple(tuple, aqo_data_heap->rd_att, values, isnull); + heap_deform_tuple(tuple, hrel->rd_att, values, isnull); if (DatumGetInt32(values[2]) == ncols) { @@ -433,21 +324,17 @@ load_fss(int fss_hash, int ncols, double **matrix, double *targets, int *rows) deform_vector(values[4], targets, rows); } else - { - elog(WARNING, "unexpected number of features for hash (%d, %d):\ + elog(ERROR, "unexpected number of features for hash (%d, %d):\ expected %d features, obtained %d", - query_context.fspace_hash, - fss_hash, ncols, DatumGetInt32(values[2])); - success = false; - } + fhash, fss_hash, ncols, DatumGetInt32(values[2])); } else success = false; ExecDropSingleTupleTableSlot(slot); - index_endscan(data_index_scan); - index_close(data_index_rel, lockmode); - table_close(aqo_data_heap, lockmode); + index_endscan(scan); + index_close(irel, AccessShareLock); + table_close(hrel, AccessShareLock); return success; } @@ -456,76 +343,64 @@ load_fss(int fss_hash, int ncols, double **matrix, double *targets, int *rows) * Updates the specified line in the specified feature subspace. * Returns false if the operation failed, true otherwise. * - * 'fss_hash' specifies the feature subspace - * 'nrows' x 'ncols' is the shape of 'matrix' - * 'targets' is vector of size 'nrows' + * 'fss_hash' specifies the feature subspace 'nrows' x 'ncols' is the shape + * of 'matrix' 'targets' is vector of size 'nrows' + * + * Necessary to prevent waiting for another transaction to commit in index + * insertion or heap update. + * + * Caller guaranteed that no one AQO process insert or update this data row. */ bool -update_fss(int fss_hash, int nrows, int ncols, double **matrix, double *targets) +update_fss(int fhash, int fsshash, int nrows, int ncols, + double **matrix, double *targets) { - RangeVar *aqo_data_table_rv; - Relation aqo_data_heap; - TupleDesc tuple_desc; + RangeVar *rv; + Relation hrel; + Relation irel; + SnapshotData snap; + TupleTableSlot *slot; + TupleDesc tupDesc; HeapTuple tuple, nw_tuple; - - TupleTableSlot *slot; + Datum values[5]; + bool isnull[5] = { false, false, false, false, false }; + bool replace[5] = { false, false, false, true, true }; bool shouldFree; bool find_ok = false; bool update_indexes; - - LOCKMODE lockmode = RowExclusiveLock; - - Relation data_index_rel; - Oid data_index_rel_oid; - IndexScanDesc data_index_scan; + Oid reloid; + IndexScanDesc scan; ScanKeyData key[2]; + bool result = true; - Datum values[5]; - bool isnull[5] = { false, false, false, false, false }; - bool replace[5] = { false, false, false, true, true }; - - data_index_rel_oid = RelnameGetRelid("aqo_fss_access_idx"); - if (!OidIsValid(data_index_rel_oid)) + reloid = RelnameGetRelid("aqo_fss_access_idx"); + if (!OidIsValid(reloid)) { disable_aqo_for_query(); return false; } - aqo_data_table_rv = makeRangeVar("public", "aqo_data", -1); - aqo_data_heap = table_openrv(aqo_data_table_rv, lockmode); - - tuple_desc = RelationGetDescr(aqo_data_heap); - - data_index_rel = index_open(data_index_rel_oid, lockmode); - data_index_scan = index_beginscan(aqo_data_heap, - data_index_rel, - SnapshotSelf, - 2, - 0); + rv = makeRangeVar("public", "aqo_data", -1); + hrel = table_openrv(rv, RowExclusiveLock); + irel = index_open(reloid, RowExclusiveLock); + tupDesc = RelationGetDescr(hrel); - ScanKeyInit(&key[0], - 1, - BTEqualStrategyNumber, - F_INT4EQ, - Int32GetDatum(query_context.fspace_hash)); + InitDirtySnapshot(snap); + scan = index_beginscan(hrel, irel, &snap, 2, 0); - ScanKeyInit(&key[1], - 2, - BTEqualStrategyNumber, - F_INT4EQ, - Int32GetDatum(fss_hash)); + ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fhash)); + ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fsshash)); - index_rescan(data_index_scan, key, 2, NULL, 0); + index_rescan(scan, key, 2, NULL, 0); - slot = MakeSingleTupleTableSlot(data_index_scan->heapRelation->rd_att, - &TTSOpsBufferHeapTuple); - find_ok = index_getnext_slot(data_index_scan, ForwardScanDirection, slot); + slot = MakeSingleTupleTableSlot(tupDesc, &TTSOpsBufferHeapTuple); + find_ok = index_getnext_slot(scan, ForwardScanDirection, slot); if (!find_ok) { - values[0] = Int32GetDatum(query_context.fspace_hash); - values[1] = Int32GetDatum(fss_hash); + values[0] = Int32GetDatum(fhash); + values[1] = Int32GetDatum(fsshash); values[2] = Int32GetDatum(ncols); if (ncols > 0) @@ -534,26 +409,22 @@ update_fss(int fss_hash, int nrows, int ncols, double **matrix, double *targets) isnull[3] = true; values[4] = PointerGetDatum(form_vector(targets, nrows)); - tuple = heap_form_tuple(tuple_desc, values, isnull); - PG_TRY(); - { - simple_heap_insert(aqo_data_heap, tuple); - my_index_insert(data_index_rel, values, isnull, &(tuple->t_self), - aqo_data_heap, UNIQUE_CHECK_YES); - } - PG_CATCH(); - { - CommandCounterIncrement(); - simple_heap_delete(aqo_data_heap, &(tuple->t_self)); - PG_RE_THROW(); - } - PG_END_TRY(); + tuple = heap_form_tuple(tupDesc, values, isnull); + + /* + * Don't use PG_TRY() section because of dirty snapshot and caller atomic + * prerequisities guarantees to us that no one concurrent insertion can + * exists. + */ + simple_heap_insert(hrel, tuple); + my_index_insert(irel, values, isnull, &(tuple->t_self), + hrel, UNIQUE_CHECK_YES); } - else + else if (!TransactionIdIsValid(snap.xmin) && !TransactionIdIsValid(snap.xmax)) { tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); Assert(shouldFree != true); - heap_deform_tuple(tuple, aqo_data_heap->rd_att, values, isnull); + heap_deform_tuple(tuple, hrel->rd_att, values, isnull); if (ncols > 0) values[3] = PointerGetDatum(form_matrix(matrix, nrows, ncols)); @@ -561,36 +432,44 @@ update_fss(int fss_hash, int nrows, int ncols, double **matrix, double *targets) isnull[3] = true; values[4] = PointerGetDatum(form_vector(targets, nrows)); - nw_tuple = heap_modify_tuple(tuple, tuple_desc, + nw_tuple = heap_modify_tuple(tuple, tupDesc, values, isnull, replace); - if (my_simple_heap_update(aqo_data_heap, &(nw_tuple->t_self), nw_tuple, + if (my_simple_heap_update(hrel, &(nw_tuple->t_self), nw_tuple, &update_indexes)) { if (update_indexes) - my_index_insert(data_index_rel, values, isnull, + my_index_insert(irel, values, isnull, &(nw_tuple->t_self), - aqo_data_heap, UNIQUE_CHECK_YES); + hrel, UNIQUE_CHECK_YES); + result = true; } else { /* - * Ooops, somebody concurrently updated the tuple. We have to - * merge our changes somehow, but now we just discard ours. We - * don't believe in high probability of simultaneously finishing - * of two long, complex, and important queries, so we don't loss - * important data. + * Ooops, somebody concurrently updated the tuple. It is possible + * only in the case of changes made by third-party code. */ + elog(ERROR, "AQO data piece (%d %d) concurrently updated" + " by a stranger backend.", + fhash, fsshash); + result = false; } } + else + { + /* + * Concurrent update was made. To prevent deadlocks refuse to update. + */ + result = false; + } ExecDropSingleTupleTableSlot(slot); - index_endscan(data_index_scan); - index_close(data_index_rel, lockmode); - table_close(aqo_data_heap, lockmode); + index_endscan(scan); + index_close(irel, RowExclusiveLock); + table_close(hrel, RowExclusiveLock); CommandCounterIncrement(); - - return true; + return result; } /* @@ -600,62 +479,43 @@ update_fss(int fss_hash, int nrows, int ncols, double **matrix, double *targets) * is not found. */ QueryStat * -get_aqo_stat(int query_hash) +get_aqo_stat(int qhash) { - RangeVar *aqo_stat_table_rv; - Relation aqo_stat_heap; - HeapTuple tuple; - LOCKMODE heap_lock = AccessShareLock; - - Relation stat_index_rel; - Oid stat_index_rel_oid; - IndexScanDesc stat_index_scan; + RangeVar *rv; + Relation hrel; + Relation irel; + TupleTableSlot *slot; + Oid reloid; + IndexScanDesc scan; ScanKeyData key; - LOCKMODE index_lock = AccessShareLock; - - Datum values[9]; - bool nulls[9]; - QueryStat *stat = palloc_query_stat(); - - TupleTableSlot *slot; bool shouldFree; - bool find_ok = false; - stat_index_rel_oid = RelnameGetRelid("aqo_query_stat_idx"); - if (!OidIsValid(stat_index_rel_oid)) + reloid = RelnameGetRelid("aqo_query_stat_idx"); + if (!OidIsValid(reloid)) { disable_aqo_for_query(); return NULL; } - aqo_stat_table_rv = makeRangeVar("public", "aqo_query_stat", -1); - aqo_stat_heap = table_openrv(aqo_stat_table_rv, heap_lock); + rv = makeRangeVar("public", "aqo_query_stat", -1); + hrel = table_openrv(rv, AccessShareLock); + irel = index_open(reloid, AccessShareLock); - stat_index_rel = index_open(stat_index_rel_oid, index_lock); - stat_index_scan = index_beginscan(aqo_stat_heap, - stat_index_rel, - SnapshotSelf, - 1, - 0); + scan = index_beginscan(hrel, irel, SnapshotSelf, 1, 0); + ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(qhash)); + index_rescan(scan, &key, 1, NULL, 0); + slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); - ScanKeyInit(&key, - 1, - BTEqualStrategyNumber, - F_INT4EQ, - Int32GetDatum(query_hash)); - - index_rescan(stat_index_scan, &key, 1, NULL, 0); - - slot = MakeSingleTupleTableSlot(stat_index_scan->heapRelation->rd_att, - &TTSOpsBufferHeapTuple); - find_ok = index_getnext_slot(stat_index_scan, ForwardScanDirection, slot); - - if (find_ok) + if (index_getnext_slot(scan, ForwardScanDirection, slot)) { + HeapTuple tuple; + Datum values[9]; + bool nulls[9]; + tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); Assert(shouldFree != true); - heap_deform_tuple(tuple, aqo_stat_heap->rd_att, values, nulls); + heap_deform_tuple(tuple, hrel->rd_att, values, nulls); DeformVectorSz(values[1], stat->execution_time_with_aqo); DeformVectorSz(values[2], stat->execution_time_without_aqo); @@ -669,10 +529,9 @@ get_aqo_stat(int query_hash) } ExecDropSingleTupleTableSlot(slot); - index_endscan(stat_index_scan); - index_close(stat_index_rel, index_lock); - table_close(aqo_stat_heap, heap_lock); - + index_endscan(scan); + index_close(irel, AccessShareLock); + table_close(hrel, AccessShareLock); return stat; } @@ -681,26 +540,16 @@ get_aqo_stat(int query_hash) * Executes disable_aqo_for_query if aqo_query_stat is not found. */ void -update_aqo_stat(int query_hash, QueryStat *stat) +update_aqo_stat(int qhash, QueryStat *stat) { - RangeVar *aqo_stat_table_rv; - Relation aqo_stat_heap; + RangeVar *rv; + Relation hrel; + Relation irel; + SnapshotData snap; + TupleTableSlot *slot; + TupleDesc tupDesc; HeapTuple tuple, nw_tuple; - TupleDesc tuple_desc; - - TupleTableSlot *slot; - bool shouldFree; - bool find_ok = false; - bool update_indexes; - - LOCKMODE lockmode = RowExclusiveLock; - - Relation stat_index_rel; - Oid stat_index_rel_oid; - IndexScanDesc stat_index_scan; - ScanKeyData key; - Datum values[9]; bool isnull[9] = { false, false, false, false, false, false, @@ -708,37 +557,29 @@ update_aqo_stat(int query_hash, QueryStat *stat) bool replace[9] = { false, true, true, true, true, true, true, true, true }; + bool shouldFree; + bool update_indexes; + Oid reloid; + IndexScanDesc scan; + ScanKeyData key; - stat_index_rel_oid = RelnameGetRelid("aqo_query_stat_idx"); - if (!OidIsValid(stat_index_rel_oid)) + reloid = RelnameGetRelid("aqo_query_stat_idx"); + if (!OidIsValid(reloid)) { disable_aqo_for_query(); return; } - aqo_stat_table_rv = makeRangeVar("public", "aqo_query_stat", -1); - aqo_stat_heap = table_openrv(aqo_stat_table_rv, lockmode); - - tuple_desc = RelationGetDescr(aqo_stat_heap); - - stat_index_rel = index_open(stat_index_rel_oid, lockmode); - stat_index_scan = index_beginscan(aqo_stat_heap, - stat_index_rel, - SnapshotSelf, - 1, - 0); + rv = makeRangeVar("public", "aqo_query_stat", -1); + hrel = table_openrv(rv, RowExclusiveLock); + irel = index_open(reloid, RowExclusiveLock); + tupDesc = RelationGetDescr(hrel); - ScanKeyInit(&key, - 1, - BTEqualStrategyNumber, - F_INT4EQ, - Int32GetDatum(query_hash)); - - index_rescan(stat_index_scan, &key, 1, NULL, 0); - - slot = MakeSingleTupleTableSlot(stat_index_scan->heapRelation->rd_att, - &TTSOpsBufferHeapTuple); - find_ok = index_getnext_slot(stat_index_scan, ForwardScanDirection, slot); + InitDirtySnapshot(snap); + scan = index_beginscan(hrel, irel, &snap, 1, 0); + ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(qhash)); + index_rescan(scan, &key, 1, NULL, 0); + slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); /*values[0] will be initialized later */ values[1] = PointerGetDatum(FormVectorSz(stat->execution_time_with_aqo)); @@ -751,57 +592,53 @@ update_aqo_stat(int query_hash, QueryStat *stat) values[7] = Int64GetDatum(stat->executions_with_aqo); values[8] = Int64GetDatum(stat->executions_without_aqo); - if (!find_ok) + if (!index_getnext_slot(scan, ForwardScanDirection, slot)) { - values[0] = Int32GetDatum(query_hash); - tuple = heap_form_tuple(tuple_desc, values, isnull); - PG_TRY(); - { - simple_heap_insert(aqo_stat_heap, tuple); - my_index_insert(stat_index_rel, values, isnull, &(tuple->t_self), - aqo_stat_heap, UNIQUE_CHECK_YES); - } - PG_CATCH(); - { - CommandCounterIncrement(); - simple_heap_delete(aqo_stat_heap, &(tuple->t_self)); - PG_RE_THROW(); - } - PG_END_TRY(); + /* Such signature (hash) doesn't yet exist in the ML knowledge base. */ + values[0] = Int32GetDatum(qhash); + tuple = heap_form_tuple(tupDesc, values, isnull); + simple_heap_insert(hrel, tuple); + my_index_insert(irel, values, isnull, &(tuple->t_self), + hrel, UNIQUE_CHECK_YES); } - else + else if (!TransactionIdIsValid(snap.xmin) && !TransactionIdIsValid(snap.xmax)) { + /* Need to update ML data row and no one backend concurrently doing it. */ tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); Assert(shouldFree != true); - values[0] = heap_getattr(tuple, 1, - RelationGetDescr(aqo_stat_heap), &isnull[0]); - nw_tuple = heap_modify_tuple(tuple, tuple_desc, - values, isnull, replace); - if (my_simple_heap_update(aqo_stat_heap, &(nw_tuple->t_self), nw_tuple, + values[0] = heap_getattr(tuple, 1, tupDesc, &isnull[0]); + nw_tuple = heap_modify_tuple(tuple, tupDesc, values, isnull, replace); + if (my_simple_heap_update(hrel, &(nw_tuple->t_self), nw_tuple, &update_indexes)) { /* NOTE: insert index tuple iff heap update succeeded! */ if (update_indexes) - my_index_insert(stat_index_rel, values, isnull, + my_index_insert(irel, values, isnull, &(nw_tuple->t_self), - aqo_stat_heap, UNIQUE_CHECK_YES); + hrel, UNIQUE_CHECK_YES); } else { /* - * Ooops, somebody concurrently updated the tuple. We have to - * merge our changes somehow, but now we just discard ours. We - * don't believe in high probability of simultaneously finishing - * of two long, complex, and important queries, so we don't loss - * important data. + * Ooops, somebody concurrently updated the tuple. It is possible + * only in the case of changes made by third-party code. */ + elog(ERROR, "AQO statistic data for query signature %d concurrently" + " updated by a stranger backend.", + qhash); } } + else + { + /* + * Concurrent update was made. To prevent deadlocks refuse to update. + */ + } ExecDropSingleTupleTableSlot(slot); - index_endscan(stat_index_scan); - index_close(stat_index_rel, lockmode); - table_close(aqo_stat_heap, lockmode); + index_endscan(scan); + index_close(irel, RowExclusiveLock); + table_close(hrel, RowExclusiveLock); CommandCounterIncrement(); } @@ -954,7 +791,7 @@ my_simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup, /* Provides correct insert in both PostgreQL 9.6.X and 10.X.X */ -static bool +bool my_index_insert(Relation indexRelation, Datum *values, bool *isnull, ItemPointer heap_t_ctid, diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl new file mode 100644 index 00000000..fcb7f3fd --- /dev/null +++ b/t/001_pgbench.pl @@ -0,0 +1,48 @@ +use strict; +use warnings; +use TestLib; +use Test::More tests => 6; +use PostgresNode; + +my $node = get_new_node('aqotest'); +$node->init; +$node->append_conf('postgresql.conf', qq{ + shared_preload_libraries = 'aqo' + aqo.mode = 'intelligent' + aqo.log_ignorance = 'on' + }); + +#my $result1; + +$node->start(); + +# Check conflicts of accessing to the ML knowledge base +# intelligent mode +$node->safe_psql('postgres', "CREATE EXTENSION aqo"); +$node->safe_psql('postgres', "ALTER SYSTEM SET aqo.mode = 'intelligent'"); +$node->command_ok([ 'pgbench', '-i', '-s', '1' ], 'init pgbench tables'); +$node->command_ok([ 'pgbench', '-t', "1000", '-c', "20", '-j', "20" ], + 'pgbench in intelligent mode'); + +$node->safe_psql('postgres', "ALTER SYSTEM SET aqo.mode = 'controlled'"); +$node->command_ok([ 'pgbench', '-t', "1000", '-c', "20", '-j', "20" ], + 'pgbench in controlled mode'); + +$node->safe_psql('postgres', "ALTER SYSTEM SET aqo.mode = 'disabled'"); +$node->command_ok([ 'pgbench', '-t', "1000", '-c', "20", '-j', "20" ], + 'pgbench in disabled mode'); + +$node->safe_psql('postgres', "DROP EXTENSION aqo"); +$node->safe_psql('postgres', "CREATE EXTENSION aqo"); + +$node->safe_psql('postgres', "ALTER SYSTEM SET aqo.mode = 'learn'"); +$node->command_ok([ 'pgbench', '-t', "1000", '-c', "20", '-j', "20" ], + 'pgbench in learn mode'); + +$node->safe_psql('postgres', "ALTER SYSTEM SET aqo.mode = 'frozen'"); +$node->command_ok([ 'pgbench', '-t', "1000", '-c', "20", '-j', "20" ], + 'pgbench in frozen mode'); + +$node->safe_psql('postgres', "DROP EXTENSION aqo"); + +$node->stop(); From 75f47e23782ec7d2fce23d50f3dd777f8ccc7585 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Thu, 18 Feb 2021 16:23:06 +0500 Subject: [PATCH 018/203] Improve AQO makefile --- Makefile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 50aaea9a..ff9d7af8 100644 --- a/Makefile +++ b/Makefile @@ -2,8 +2,8 @@ EXTENSION = aqo EXTVERSION = 1.2 -PGFILEDESC = "AQO - adaptive query optimization" -MODULES = aqo +PGFILEDESC = "AQO - Adaptive Query Optimization" +MODULE_big = aqo OBJS = aqo.o auto_tuning.o cardinality_estimation.o cardinality_hooks.o \ hash.o machine_learning.o path_utils.o postprocessing.o preprocessing.o \ selectivity_cache.o storage.o utils.o ignorance.o $(WIN32RES) @@ -27,7 +27,6 @@ EXTRA_INSTALL = contrib/postgres_fdw DATA = aqo--1.0.sql aqo--1.0--1.1.sql aqo--1.1--1.2.sql aqo--1.2.sql -MODULE_big = aqo ifdef USE_PGXS PG_CONFIG ?= pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) From da59897d0115ca379c7f7ef470a4f413917eb4d8 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Thu, 18 Feb 2021 16:27:40 +0500 Subject: [PATCH 019/203] Bugfix: don't create ignorance table in parallel worker. --- ignorance.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ignorance.c b/ignorance.c index 84c3f5d9..88bb97d1 100644 --- a/ignorance.c +++ b/ignorance.c @@ -2,6 +2,7 @@ #include "ignorance.h" #include "access/heapam.h" +#include "access/parallel.h" #include "executor/spi.h" #include "utils/lsyscache.h" #include "miscadmin.h" @@ -16,9 +17,10 @@ set_ignorance(bool newval, void *extra) * It is not problem. We will check existence at each update and create this * table in dynamic mode, if needed. */ - if (IsUnderPostmaster && newval && (aqo_log_ignorance != newval)) + if (IsUnderPostmaster && !IsParallelWorker() && newval && + (aqo_log_ignorance != newval)) /* Create storage and no error, if it exists already. */ - (bool) create_ignorance_table(true); + create_ignorance_table(true); aqo_log_ignorance = newval; } @@ -101,7 +103,7 @@ update_ignorance(int qhash, int fhash, int fss_hash, Plan *plan) if (!OidIsValid(reloid)) { /* This table doesn't created on instance startup. Create now. */ - (bool) create_ignorance_table(false); + create_ignorance_table(false); reloid = RangeVarGetRelid(rv, NoLock, true); if (!OidIsValid(reloid)) elog(PANIC, "Ignorance table does not exists!"); From 3416ef58cce135fc532d269148777107fd284250 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Mon, 1 Mar 2021 10:22:41 +0300 Subject: [PATCH 020/203] Make the explain of AQO more readable. --- expected/aqo_fdw.out | 76 +++++++++++++++++++++++++------------------- expected/gucs.out | 43 ++++++++++++------------- postprocessing.c | 12 ++++--- sql/gucs.sql | 2 +- 4 files changed, 73 insertions(+), 60 deletions(-) diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out index c7cb734d..23cd2f3f 100644 --- a/expected/aqo_fdw.out +++ b/expected/aqo_fdw.out @@ -24,23 +24,25 @@ ANALYZE local; -- Trivial foreign scan.s EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT x FROM frgn; - QUERY PLAN -------------------------------------------------------------- - Foreign Scan on frgn (actual rows=1 loops=1) (AQO not used) + QUERY PLAN +---------------------------------------------- + Foreign Scan on frgn (actual rows=1 loops=1) + AQO not used Using aqo: true AQO mode: LEARN JOINS: 0 -(4 rows) +(5 rows) EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT x FROM frgn; - QUERY PLAN ------------------------------------------------------------------------------ - Foreign Scan on frgn (actual rows=1 loops=1) (AQO: cardinality=1, error=0%) + QUERY PLAN +---------------------------------------------- + Foreign Scan on frgn (actual rows=1 loops=1) + AQO: rows=1, error=0% Using aqo: true AQO mode: LEARN JOINS: 0 -(4 rows) +(5 rows) -- Push down base filters. Use verbose mode to see filters. EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE)) @@ -50,83 +52,93 @@ LINE 1: ...LAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE)) ^ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) SELECT x FROM frgn WHERE x < 10; - QUERY PLAN --------------------------------------------------------------------- - Foreign Scan on public.frgn (actual rows=1 loops=1) (AQO not used) + QUERY PLAN +----------------------------------------------------------- + Foreign Scan on public.frgn (actual rows=1 loops=1) + AQO not used Output: x Remote SQL: SELECT x FROM public.local WHERE ((x < 10)) Using aqo: true AQO mode: LEARN JOINS: 0 -(6 rows) +(7 rows) EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT x FROM frgn WHERE x < -10; -- AQO ignores constants - QUERY PLAN -------------------------------------------------------------------------------- - Foreign Scan on frgn (actual rows=0 loops=1) (AQO: cardinality=1, error=100%) + QUERY PLAN +---------------------------------------------- + Foreign Scan on frgn (actual rows=0 loops=1) + AQO: rows=1, error=100% Using aqo: true AQO mode: LEARN JOINS: 0 -(4 rows) +(5 rows) -- Trivial JOIN push-down. EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; - QUERY PLAN ---------------------------------------------------------------------------- - Merge Join (actual rows=1 loops=1) (AQO not used) + QUERY PLAN +------------------------------------------------------------ + Merge Join (actual rows=1 loops=1) + AQO not used Merge Cond: (a.x = b.x) - -> Sort (actual rows=1 loops=1) (AQO not used) + -> Sort (actual rows=1 loops=1) + AQO not used Sort Key: a.x Sort Method: quicksort Memory: 25kB - -> Foreign Scan on frgn a (actual rows=1 loops=1) (AQO not used) - -> Sort (actual rows=1 loops=1) (AQO not used) + -> Foreign Scan on frgn a (actual rows=1 loops=1) + AQO not used + -> Sort (actual rows=1 loops=1) + AQO not used Sort Key: b.x Sort Method: quicksort Memory: 25kB - -> Foreign Scan on frgn b (actual rows=1 loops=1) (AQO not used) + -> Foreign Scan on frgn b (actual rows=1 loops=1) + AQO not used Using aqo: true AQO mode: LEARN JOINS: 0 -(13 rows) +(18 rows) EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; QUERY PLAN -------------------------------------------------------------------------------------------------------- - Foreign Scan (actual rows=1 loops=1) (AQO: cardinality=1, error=0%) + Foreign Scan (actual rows=1 loops=1) + AQO: rows=1, error=0% Output: a.x, b.x Relations: (public.frgn a) INNER JOIN (public.frgn b) Remote SQL: SELECT r1.x, r2.x FROM (public.local r1 INNER JOIN public.local r2 ON (((r1.x = r2.x)))) Using aqo: true AQO mode: LEARN JOINS: 0 -(7 rows) +(8 rows) -- TODO: Non-mergejoinable join condition. EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM frgn AS a, frgn AS b WHERE a.xstr, '\n'); + Assert(es->format == EXPLAIN_FORMAT_TEXT); + if (es->str->len == 0 || es->str->data[es->str->len - 1] == '\n') + appendStringInfoSpaces(es->str, es->indent * 2); + if (plan->predicted_cardinality > 0.) { error = 100. * (plan->predicted_cardinality - (rows*wrkrs)) / plan->predicted_cardinality; appendStringInfo(es->str, - " (AQO: cardinality=%.0lf, error=%.0lf%%", + "AQO: rows=%.0lf, error=%.0lf%%", plan->predicted_cardinality, error); } else - appendStringInfo(es->str, " (AQO not used"); + appendStringInfo(es->str, "AQO not used"); if (aqo_show_hash) - appendStringInfo(es->str, ", fss hash = %d", plan->fss_hash); - appendStringInfoChar(es->str, ')'); + appendStringInfo(es->str, ", fss=%d", plan->fss_hash); if (prev_ExplainOneNode_hook) prev_ExplainOneNode_hook(es, ps, plan, rows); diff --git a/sql/gucs.sql b/sql/gucs.sql index 5121c928..7c04d98f 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -17,7 +17,7 @@ SET aqo.log_ignorance = 'on'; SET aqo.log_ignorance = 'off'; SET aqo.log_ignorance = 'off'; SET aqo.log_ignorance = 'on'; -\d + CREATE EXTENSION aqo; SET aqo.log_ignorance = 'off'; SET aqo.log_ignorance = 'on'; From 9b8c5980e0643f708633f9279a426fb9a6ca2a60 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Tue, 2 Mar 2021 13:12:31 +0300 Subject: [PATCH 021/203] Bugfix: zero query error in disabled AQO mode with a forced stat collection. Add some comments on auto tuning algorithm. change the intelligent.sql test to avoid unsteady behaviour in the intelligent mode. Change the logic of a node hash showing. Add regression test on forced stat collection mode. --- Makefile | 3 +- auto_tuning.c | 17 ++- expected/aqo_intelligent.out | 9 -- expected/forced_stat_collection.out | 55 +++++++++ postprocessing.c | 170 +++++++++++++++++----------- preprocessing.c | 1 - sql/aqo_intelligent.sql | 5 - sql/forced_stat_collection.sql | 36 ++++++ 8 files changed, 212 insertions(+), 84 deletions(-) create mode 100644 expected/forced_stat_collection.out create mode 100644 sql/forced_stat_collection.sql diff --git a/Makefile b/Makefile index ff9d7af8..d36e732b 100644 --- a/Makefile +++ b/Makefile @@ -18,7 +18,8 @@ REGRESS = aqo_disabled \ schema \ aqo_fdw \ aqo_CVE-2020-14350 \ - gucs + gucs \ + forced_stat_collection fdw_srcdir = $(top_srcdir)/contrib/postgres_fdw PG_CPPFLAGS += -I$(libpq_srcdir) -I$(fdw_srcdir) diff --git a/auto_tuning.c b/auto_tuning.c index b82b415b..8b7b32b6 100644 --- a/auto_tuning.c +++ b/auto_tuning.c @@ -177,11 +177,26 @@ automatical_query_tuning(int query_hash, QueryStat * stat) stat->planning_time_without_aqo_size); p_use = t_not_aqo / (t_not_aqo + t_aqo); + + /* + * Here p_use<0.5 and p_use->0, if AQO decreases performance, + * Otherwise, p_use>0.5 and p_use->1. + */ + p_use = 1 / (1 + exp((p_use - 0.5) / unstability)); + + /* + * Here p_use in (0.5..max) if AQO decreases preformance. + * p_use in (0..0.5), otherwise. + */ + p_use -= 1 / (1 + exp(-0.5 / unstability)); p_use /= 1 - 2 / (1 + exp(-0.5 / unstability)); - /* borrowed from drandom() in float.c */ + /* + * If our decision is using AQO for this query class, then learn on new + * queries of this type. Otherwise, turn off. + */ query_context.use_aqo = (random() / ((double) MAX_RANDOM_VALUE + 1)) < p_use; query_context.learn_aqo = query_context.use_aqo; } diff --git a/expected/aqo_intelligent.out b/expected/aqo_intelligent.out index 266ff745..1e984a2c 100644 --- a/expected/aqo_intelligent.out +++ b/expected/aqo_intelligent.out @@ -229,15 +229,6 @@ SELECT count(*) FROM tmp1; 4 (1 row) -DROP TABLE tmp1; -CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 -WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; -SELECT count(*) FROM tmp1; - count -------- - 3 -(1 row) - DROP TABLE tmp1; CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 WHERE a < 5 AND b < 5 AND c < 5 AND d < 5; diff --git a/expected/forced_stat_collection.out b/expected/forced_stat_collection.out new file mode 100644 index 00000000..4569d572 --- /dev/null +++ b/expected/forced_stat_collection.out @@ -0,0 +1,55 @@ +\set citizens 1000 +SET aqo.mode = 'disabled'; +SET aqo.force_collect_stat = 'on'; +CREATE TABLE person ( + id serial PRIMARY KEY, + age integer, + gender text, + passport integer +); +-- Fill the person table with workers data. +INSERT INTO person (id,age,gender,passport) + (SELECT q1.id,q1.age, + CASE WHEN q1.id % 4 = 0 THEN 'Female' + ELSE 'Male' + END, + CASE WHEN (q1.age>18) THEN 1E6 + q1.id * 1E3 + ELSE NULL + END + FROM (SELECT *, 14+(id % 60) AS age FROM generate_series(1, :citizens) id) AS q1 + ); +CREATE EXTENSION aqo; +SELECT count(*) FROM person WHERE age<18; + count +------- + 67 +(1 row) + +SELECT count(*) FROM person WHERE age<18 AND passport IS NOT NULL; + count +------- + 0 +(1 row) + +SELECT * FROM aqo_data; + fspace_hash | fsspace_hash | nfeatures | features | targets +-------------+--------------+-----------+----------+--------- +(0 rows) + +SELECT learn_aqo,use_aqo,auto_tuning,cardinality_error_without_aqo ce,executions_without_aqo nex +FROM aqo_queries JOIN aqo_query_stat USING (query_hash); + learn_aqo | use_aqo | auto_tuning | ce | nex +-----------+---------+-------------+----------------------+----- + f | f | f | {2.9661225937240054} | 1 + f | f | f | {2.9634630129852053} | 1 +(2 rows) + +SELECT query_text FROM aqo_query_texts ORDER BY (query_text); + query_text +-------------------------------------------------------------------- + COMMON feature space (do not delete!) + SELECT count(*) FROM person WHERE age<18 AND passport IS NOT NULL; + SELECT count(*) FROM person WHERE age<18; +(3 rows) + +DROP EXTENSION aqo; diff --git a/postprocessing.c b/postprocessing.c index 74072fdb..a0979c55 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -236,9 +236,86 @@ learnOnPlanState(PlanState *p, void *context) { aqo_obj_stat *ctx = (aqo_obj_stat *) context; aqo_obj_stat SubplanCtx = {NIL, NIL, NIL, ctx->learn}; + double predicted = 0.; + double learn_rows = 0.; + + if (!p->instrument) + return true; planstate_tree_walker(p, learnOnPlanState, (void *) &SubplanCtx); + if (p->instrument->nloops > 0.) + { + /* If we can strongly calculate produced rows, do it. */ + if (p->worker_instrument && IsParallelTuplesProcessing(p->plan)) + { + double wnloops = 0.; + double wntuples = 0.; + int i; + + for (i = 0; i < p->worker_instrument->num_workers; i++) + { + double t = p->worker_instrument->instrument[i].ntuples; + double l = p->worker_instrument->instrument[i].nloops; + + if (l <= 0) + continue; + + wntuples += t; + wnloops += l; + learn_rows += t/l; + } + + Assert(p->instrument->nloops >= wnloops); + Assert(p->instrument->ntuples >= wntuples); + if (p->instrument->nloops - wnloops > 0.5) + learn_rows += (p->instrument->ntuples - wntuples) / + (p->instrument->nloops - wnloops); + } + else + /* This node does not required to sum tuples of each worker + * to calculate produced rows. */ + learn_rows = p->instrument->ntuples / p->instrument->nloops; + } + + /* Calculate predicted cardinality */ + if (p->plan->predicted_cardinality > 0.) + { + Assert(query_context.use_aqo); + + /* AQO made prediction. use it. */ + predicted = p->plan->predicted_cardinality; + } + else if (IsParallelTuplesProcessing(p->plan)) + /* + * AQO didn't make a prediction and we need to calculate real number + * of tuples passed because of parallel workers. + */ + predicted = p->plan->plan_rows * + get_parallel_divisor(p->plan->path_parallel_workers); + else + /* No AQO prediction. Parallel workers not used for this plan node. */ + predicted = p->plan->plan_rows; + + if (!ctx->learn && query_context.collect_stat) + { + double p,l; + + /* Special case of forced gathering of statistics. */ + Assert(predicted >= 0 && learn_rows >= 0); + p = (predicted < 1) ? 0 : log(predicted); + l = (learn_rows < 1) ? 0 : log(learn_rows); + cardinality_sum_errors += fabs(p - l); + cardinality_num_objects += 1; + return false; + } + else if (!ctx->learn) + return true; + + /* It is needed for correct exp(result) calculation. */ + predicted = clamp_row_est(predicted); + learn_rows = clamp_row_est(learn_rows); + /* * Some nodes inserts after planning step (See T_Hash node type). * In this case we have'nt AQO prediction and fss record. @@ -269,60 +346,15 @@ learnOnPlanState(PlanState *p, void *context) IsA(p, ForeignScanState) || IsA(p, AppendState) || IsA(p, MergeAppendState))) { - double learn_rows = 0.; - double predicted = 0.; - - if (p->instrument->nloops > 0.) - { - /* If we can strongly calculate produced rows, do it. */ - if (p->worker_instrument && IsParallelTuplesProcessing(p->plan)) - { - double wnloops = 0.; - double wntuples = 0.; - int i; - - for (i = 0; i < p->worker_instrument->num_workers; i++) - { - double t = p->worker_instrument->instrument[i].ntuples; - double l = p->worker_instrument->instrument[i].nloops; - - if (l <= 0) - continue; - - wntuples += t; - wnloops += l; - learn_rows += t/l; - } - - Assert(p->instrument->nloops >= wnloops); - Assert(p->instrument->ntuples >= wntuples); - if (p->instrument->nloops - wnloops > 0.5) - learn_rows += (p->instrument->ntuples - wntuples) / - (p->instrument->nloops - wnloops); - } - else - /* This node does not required to sum tuples of each worker - * to calculate produced rows. */ - learn_rows = p->instrument->ntuples / p->instrument->nloops; - - if (p->plan->predicted_cardinality > 0.) - predicted = p->plan->predicted_cardinality; - else if (IsParallelTuplesProcessing(p->plan)) - predicted = p->plan->plan_rows * - get_parallel_divisor(p->plan->path_parallel_workers); - else - predicted = p->plan->plan_rows; - - /* It is needed for correct exp(result) calculation. */ - predicted = clamp_row_est(predicted); - learn_rows = clamp_row_est(learn_rows); - } - else + if (p->instrument->nloops <= 0.) { /* * LAV: I found two cases for this code: * 1. if query returns with error. - * 2. plan node has never visited. + * 2. plan node has never visited. In this case we can not teach + * AQO because ntuples value is equal to 0 and we will got + * learn rows == 1. It is false knowledge: at another place of + * a plan, scanning of the node may produce many tuples. * Both cases can't be used to learning AQO because give an * incorrect number of rows. */ @@ -330,17 +362,6 @@ learnOnPlanState(PlanState *p, void *context) } Assert(predicted >= 1 && learn_rows >= 1); - cardinality_sum_errors += fabs(log(predicted) - log(learn_rows)); - cardinality_num_objects += 1; - - /* - * A subtree was not visited. In this case we can not teach AQO - * because ntuples value is equal to 0 and we will got - * learn rows == 1. - * It is false knowledge: at another place of a plan, scanning of - * the node may produce many tuples. - */ - Assert(p->instrument->nloops >= 1); if (ctx->learn) learn_sample(SubplanCtx.clauselist, SubplanCtx.selectivities, @@ -356,7 +377,10 @@ learnOnPlanState(PlanState *p, void *context) } /* - * Updates given row of query statistics. + * Updates given row of query statistics: + * et - execution time + * pt - planning time + * ce - cardinality error */ void update_query_stat_row(double *et, int *et_size, @@ -412,7 +436,8 @@ aqo_ExecutorStart(QueryDesc *queryDesc, int eflags) bool use_aqo; use_aqo = !IsParallelWorker() && (query_context.use_aqo || - query_context.learn_aqo || force_collect_stat); + query_context.learn_aqo || + force_collect_stat); if (use_aqo) { @@ -477,11 +502,15 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) query_context.collect_stat = false; } - if ((query_context.learn_aqo || query_context.collect_stat) && - !HasNeverExecutedNodes(queryDesc->planstate, NULL)) + if ((query_context.learn_aqo && + !HasNeverExecutedNodes(queryDesc->planstate, NULL)) || + (!query_context.learn_aqo && query_context.collect_stat)) { aqo_obj_stat ctx = {NIL, NIL, NIL, query_context.learn_aqo}; + /* + * Analyze plan if AQO need to learn or need to collect statistics only. + */ learnOnPlanState(queryDesc->planstate, (void *) &ctx); list_free(ctx.clauselist); list_free(ctx.relidslist); @@ -507,7 +536,9 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) if (stat != NULL) { + /* Calculate AQO statistics. */ if (query_context.use_aqo) + /* For the case, when query executed with AQO predictions. */ update_query_stat_row(stat->execution_time_with_aqo, &stat->execution_time_with_aqo_size, stat->planning_time_with_aqo, @@ -519,6 +550,7 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) cardinality_error, &stat->executions_with_aqo); else + /* For the case, when query executed without AQO predictions. */ update_query_stat_row(stat->execution_time_without_aqo, &stat->execution_time_without_aqo_size, stat->planning_time_without_aqo, @@ -541,6 +573,7 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) if (!query_context.adding_query && query_context.auto_tuning) automatical_query_tuning(query_context.query_hash, stat); + /* Write AQO statistics to the aqo_query_stat table */ update_aqo_stat(query_context.fspace_hash, stat); pfree_query_stat(stat); } @@ -777,7 +810,9 @@ print_node_explain(ExplainState *es, PlanState *ps, Plan *plan, double rows) double error = -1.; if (!aqo_show_details || !plan || !ps->instrument) - return; + goto explain_end; + + Assert(es->format == EXPLAIN_FORMAT_TEXT); if (ps->worker_instrument && IsParallelTuplesProcessing(plan)) { @@ -810,7 +845,8 @@ print_node_explain(ExplainState *es, PlanState *ps, Plan *plan, double rows) else appendStringInfo(es->str, "AQO not used"); - if (aqo_show_hash) +explain_end: + if (plan && aqo_show_hash) appendStringInfo(es->str, ", fss=%d", plan->fss_hash); if (prev_ExplainOneNode_hook) diff --git a/preprocessing.c b/preprocessing.c index 3ef0ac20..971f2a80 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -152,7 +152,6 @@ aqo_planner(Query *parse, } INSTR_TIME_SET_CURRENT(query_context.query_starttime); - query_context.query_hash = get_query_hash(parse, query_text); if (query_is_deactivated(query_context.query_hash)) diff --git a/sql/aqo_intelligent.sql b/sql/aqo_intelligent.sql index 7082ca8e..bc3351de 100644 --- a/sql/aqo_intelligent.sql +++ b/sql/aqo_intelligent.sql @@ -109,11 +109,6 @@ WHERE a < 4 AND b < 4 AND c < 4 AND d < 4; SELECT count(*) FROM tmp1; DROP TABLE tmp1; -CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 -WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; -SELECT count(*) FROM tmp1; -DROP TABLE tmp1; - CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 WHERE a < 5 AND b < 5 AND c < 5 AND d < 5; SELECT count(*) FROM tmp1; diff --git a/sql/forced_stat_collection.sql b/sql/forced_stat_collection.sql new file mode 100644 index 00000000..df540d04 --- /dev/null +++ b/sql/forced_stat_collection.sql @@ -0,0 +1,36 @@ +\set citizens 1000 + +SET aqo.mode = 'disabled'; +SET aqo.force_collect_stat = 'on'; + +CREATE TABLE person ( + id serial PRIMARY KEY, + age integer, + gender text, + passport integer +); + +-- Fill the person table with workers data. +INSERT INTO person (id,age,gender,passport) + (SELECT q1.id,q1.age, + CASE WHEN q1.id % 4 = 0 THEN 'Female' + ELSE 'Male' + END, + CASE WHEN (q1.age>18) THEN 1E6 + q1.id * 1E3 + ELSE NULL + END + FROM (SELECT *, 14+(id % 60) AS age FROM generate_series(1, :citizens) id) AS q1 + ); + +CREATE EXTENSION aqo; + +SELECT count(*) FROM person WHERE age<18; +SELECT count(*) FROM person WHERE age<18 AND passport IS NOT NULL; +SELECT * FROM aqo_data; + +SELECT learn_aqo,use_aqo,auto_tuning,cardinality_error_without_aqo ce,executions_without_aqo nex +FROM aqo_queries JOIN aqo_query_stat USING (query_hash); + +SELECT query_text FROM aqo_query_texts ORDER BY (query_text); + +DROP EXTENSION aqo; From 2667678e39f2c0e864002ae7d93d7513e436d547 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Sun, 13 Jun 2021 16:47:06 +0500 Subject: [PATCH 022/203] Add CI script to automatize 'make check' on each commit. Enable ICU and TAP tests in the AQO CI --- .github/workflows/c-cpp.yml | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 .github/workflows/c-cpp.yml diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml new file mode 100644 index 00000000..8f1dbeba --- /dev/null +++ b/.github/workflows/c-cpp.yml @@ -0,0 +1,29 @@ +name: 'C/C++ CI for the stable13' + +on: + push: + branches: [ stable13 ] + pull_request: + branches: [ stable13 ] + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - name: Install additional packages + run: sudo apt-get install -y libperl-dev libipc-run-perl + - name: pg + run: | + echo "Deploying to production server on branch $GITHUB_REF" + git config --global user.email "ci@postgrespro.ru" + git config --global user.name "CI PgPro admin" + git clone https://github.com/postgres/postgres.git pg + cd pg + git checkout REL_13_STABLE + ./configure --prefix=`pwd`/tmp_install + git subtree add --prefix=contrib/aqo https://github.com/postgrespro/aqo.git $GITHUB_REF + patch -p1 --no-backup-if-mismatch < contrib/aqo/aqo_pg13.patch + make -j4 > /dev/null && make -j4 -C contrib > /dev/null + make -C contrib/aqo check From e6488e2a32c374e0c8ee7689eff6f15a12f5e0de Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Wed, 10 Mar 2021 21:44:02 +0300 Subject: [PATCH 023/203] Log into ignorance table only problematic nodes. Add regression test on currently unsupported planner clauses. --- Makefile | 3 ++- expected/gucs.out | 7 +++---- expected/unsupported.out | 41 ++++++++++++++++++++++++++++++++++++++++ postprocessing.c | 3 ++- sql/gucs.sql | 4 ++-- sql/unsupported.sql | 15 +++++++++++++++ 6 files changed, 65 insertions(+), 8 deletions(-) create mode 100644 expected/unsupported.out create mode 100644 sql/unsupported.sql diff --git a/Makefile b/Makefile index d36e732b..bff27f38 100644 --- a/Makefile +++ b/Makefile @@ -19,7 +19,8 @@ REGRESS = aqo_disabled \ aqo_fdw \ aqo_CVE-2020-14350 \ gucs \ - forced_stat_collection + forced_stat_collection \ + unsupported fdw_srcdir = $(top_srcdir)/contrib/postgres_fdw PG_CPPFLAGS += -I$(libpq_srcdir) -I$(fdw_srcdir) diff --git a/expected/gucs.out b/expected/gucs.out index f4b42041..a4f91130 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -60,11 +60,10 @@ SELECT * FROM t; JOINS: 0 (5 rows) -SELECT node_type FROM aqo_ignorance; -- SeqScan on t must be existed in ignorance table +SELECT node_type FROM aqo_ignorance; node_type ----------- - 19 -(1 row) +(0 rows) EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT * FROM t; QUERY PLAN @@ -76,7 +75,7 @@ EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT * FROM t; JOINS: 0 (5 rows) -SELECT node_type FROM aqo_ignorance; -- SeqScan on t must be excluded from ignorance table +SELECT node_type FROM aqo_ignorance; node_type ----------- (0 rows) diff --git a/expected/unsupported.out b/expected/unsupported.out new file mode 100644 index 00000000..a9cbeca6 --- /dev/null +++ b/expected/unsupported.out @@ -0,0 +1,41 @@ +CREATE EXTENSION aqo; +SET aqo.mode = 'learn'; +SET aqo.show_details = 'on'; +DROP TABLE IF EXISTS t; +CREATE TABLE t AS SELECT (gs.* / 50) AS x FROM generate_series(1,1000) AS gs; +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t GROUP BY (x) HAVING x > 3; + QUERY PLAN +----------------------------------------------- + HashAggregate (actual rows=17 loops=1) + AQO not used + Group Key: x + Batches: 1 Memory Usage: 40kB + -> Seq Scan on t (actual rows=801 loops=1) + AQO not used + Filter: (x > 3) + Rows Removed by Filter: 199 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(11 rows) + +-- Do not support having clauses for now. +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t GROUP BY (x) HAVING x > 3; + QUERY PLAN +----------------------------------------------- + HashAggregate (actual rows=17 loops=1) + AQO not used + Group Key: x + Batches: 1 Memory Usage: 40kB + -> Seq Scan on t (actual rows=801 loops=1) + AQO: rows=801, error=0% + Filter: (x > 3) + Rows Removed by Filter: 199 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(11 rows) + +DROP EXTENSION aqo; diff --git a/postprocessing.c b/postprocessing.c index a0979c55..f10e94ea 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -115,7 +115,8 @@ learn_sample(List *clauselist, List *selectivities, List *relidslist, fss_hash = get_fss_for_object(clauselist, selectivities, relidslist, &nfeatures, &features); - if (aqo_log_ignorance /* && load_fss(fhash, fss_hash, 0, NULL, NULL, NULL) */) + if (aqo_log_ignorance && plan->predicted_cardinality <= 0 && + load_fss(fhash, fss_hash, 0, NULL, NULL, NULL) ) { /* * If ignorance logging is enabled and the feature space was existed in diff --git a/sql/gucs.sql b/sql/gucs.sql index 7c04d98f..99804669 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -26,10 +26,10 @@ SET aqo.log_ignorance = 'on'; EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT * FROM t; -SELECT node_type FROM aqo_ignorance; -- SeqScan on t must be existed in ignorance table +SELECT node_type FROM aqo_ignorance; EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT * FROM t; -SELECT node_type FROM aqo_ignorance; -- SeqScan on t must be excluded from ignorance table +SELECT node_type FROM aqo_ignorance; -- This GUC can be changed by an admin only. CREATE ROLE noadmin; diff --git a/sql/unsupported.sql b/sql/unsupported.sql new file mode 100644 index 00000000..320e9adf --- /dev/null +++ b/sql/unsupported.sql @@ -0,0 +1,15 @@ +CREATE EXTENSION aqo; +SET aqo.mode = 'learn'; +SET aqo.show_details = 'on'; + +DROP TABLE IF EXISTS t; +CREATE TABLE t AS SELECT (gs.* / 50) AS x FROM generate_series(1,1000) AS gs; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t GROUP BY (x) HAVING x > 3; + +-- Do not support having clauses for now. +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t GROUP BY (x) HAVING x > 3; + +DROP EXTENSION aqo; From 4d08b21057ecdfc97363cfbcb80d606cdcd69bfb Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Tue, 8 Jun 2021 09:57:38 +0500 Subject: [PATCH 024/203] Stabilize the forced_stat_collection regression test. As a criteria of sorting table rows use md5(string) instead of string itself. Tags: aqo. --- expected/forced_stat_collection.out | 4 ++-- sql/forced_stat_collection.sql | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/expected/forced_stat_collection.out b/expected/forced_stat_collection.out index 4569d572..294c7fdb 100644 --- a/expected/forced_stat_collection.out +++ b/expected/forced_stat_collection.out @@ -44,12 +44,12 @@ FROM aqo_queries JOIN aqo_query_stat USING (query_hash); f | f | f | {2.9634630129852053} | 1 (2 rows) -SELECT query_text FROM aqo_query_texts ORDER BY (query_text); +SELECT query_text FROM aqo_query_texts ORDER BY md5(query_text); query_text -------------------------------------------------------------------- + SELECT count(*) FROM person WHERE age<18; COMMON feature space (do not delete!) SELECT count(*) FROM person WHERE age<18 AND passport IS NOT NULL; - SELECT count(*) FROM person WHERE age<18; (3 rows) DROP EXTENSION aqo; diff --git a/sql/forced_stat_collection.sql b/sql/forced_stat_collection.sql index df540d04..209edd19 100644 --- a/sql/forced_stat_collection.sql +++ b/sql/forced_stat_collection.sql @@ -31,6 +31,6 @@ SELECT * FROM aqo_data; SELECT learn_aqo,use_aqo,auto_tuning,cardinality_error_without_aqo ce,executions_without_aqo nex FROM aqo_queries JOIN aqo_query_stat USING (query_hash); -SELECT query_text FROM aqo_query_texts ORDER BY (query_text); +SELECT query_text FROM aqo_query_texts ORDER BY md5(query_text); DROP EXTENSION aqo; From 95ea28ad2a27c94106a11d9deec7a38708a7d68d Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Tue, 8 Jun 2021 14:45:20 +0500 Subject: [PATCH 025/203] Bugfix. According to the SubqueryScanPath description, we need to use path.parent->subroot as the planning context for interpretation of the subpath. Tags: aqo. --- path_utils.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/path_utils.c b/path_utils.c index f91d8be8..022dff32 100644 --- a/path_utils.c +++ b/path_utils.c @@ -153,6 +153,16 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) return get_path_clauses(((SetOpPath *) path)->subpath, root, selectivities); break; + case T_SubqueryScanPath: + /* + * According to the SubqueryScanPath description, we need to use + * path.parent->subroot as the planning context for interpretation + * of the subpath. + */ + return get_path_clauses(((SubqueryScanPath *) path)->subpath, + path->parent->subroot, + selectivities); + break; case T_LockRowsPath: return get_path_clauses(((LockRowsPath *) path)->subpath, root, selectivities); @@ -161,10 +171,6 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) return get_path_clauses(((LimitPath *) path)->subpath, root, selectivities); break; - case T_SubqueryScanPath: - return get_path_clauses(((SubqueryScanPath *) path)->subpath, root, - selectivities); - break; case T_AppendPath: { ListCell *lc; From 1b563c6316ff67c66b4ea8117d1871f1a96fb92d Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 9 Jun 2021 09:49:53 +0500 Subject: [PATCH 026/203] Remove unneeded (I think so) calculation of selectivities in the learning mode without using AQO. In this mode we aren't going to use any such data during a statistics gathering at the end of execution. Also add more safe initialization of a predicted cardinality field. Tags: aqo. --- cardinality_hooks.c | 38 ++++++++++++-------------------------- 1 file changed, 12 insertions(+), 26 deletions(-) diff --git a/cardinality_hooks.c b/cardinality_hooks.c index dd631161..38bfa6c9 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -139,15 +139,12 @@ aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) List *restrict_clauses; int fss = 0; - if (query_context.use_aqo || query_context.learn_aqo) + if (query_context.use_aqo) selectivities = get_selectivities(root, rel->baserestrictinfo, 0, JOIN_INNER, NULL); - - if (!query_context.use_aqo) + else { - if (query_context.learn_aqo) - list_free_deep(selectivities); - + rel->predicted_cardinality = -2.; call_default_set_baserel_rows_estimate(root, rel); return; } @@ -207,7 +204,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, int current_hash; int fss = 0; - if (query_context.use_aqo || query_context.learn_aqo) + if (query_context.use_aqo) { MemoryContext mcxt; @@ -232,14 +229,9 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, pfree(args_hash); pfree(eclass_hash); } - - if (!query_context.use_aqo) + else { - if (query_context.learn_aqo) - { - list_free_deep(selectivities); - list_free(allclauses); - } + predicted_ppi_rows = -3.; return call_default_get_parameterized_baserel_size(root, rel, param_clauses); } @@ -281,15 +273,12 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, List *current_selectivities = NULL; int fss = 0; - if (query_context.use_aqo || query_context.learn_aqo) + if (query_context.use_aqo) current_selectivities = get_selectivities(root, restrictlist, 0, sjinfo->jointype, sjinfo); - - if (!query_context.use_aqo) + else { - if (query_context.learn_aqo) - list_free_deep(current_selectivities); - + rel->predicted_cardinality = -2.; call_default_set_joinrel_size_estimates(root, rel, outer_rel, inner_rel, @@ -352,15 +341,12 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, List *current_selectivities = NULL; int fss = 0; - if (query_context.use_aqo || query_context.learn_aqo) + if (query_context.use_aqo) current_selectivities = get_selectivities(root, restrict_clauses, 0, sjinfo->jointype, sjinfo); - - if (!query_context.use_aqo) + else { - if (query_context.learn_aqo) - list_free_deep(current_selectivities); - + predicted_ppi_rows = -3.; return call_default_get_parameterized_joinrel_size(root, rel, outer_path, inner_path, From 1d03734a04f2abcc1bb713b0bec10b531d4f5ede Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Wed, 9 Jun 2021 16:10:25 +0500 Subject: [PATCH 027/203] Switch off parallel workers because of unsteadiness. Change variable and remove comment. Induced by a test passing problem on the powerpc node of the buildfarm. Tags: aqo. --- conf.add | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf.add b/conf.add index 3556e4d6..936ca166 100644 --- a/conf.add +++ b/conf.add @@ -1,3 +1,3 @@ autovacuum = off shared_preload_libraries = 'postgres_fdw, aqo' -max_parallel_workers = 0 # switch off parallel workers because of unsteadiness +max_parallel_workers_per_gather = 0 From 0482406ac063a818f2fe44449baff76e4b23118e Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Wed, 9 Jun 2021 16:34:02 +0500 Subject: [PATCH 028/203] Fix the problem with system-dependent detailed information in the EXPLAIN ANALYZE of a join node. Tags: aqo. --- expected/aqo_fdw.out | 31 +++++++++++++++---------------- sql/aqo_fdw.sql | 3 ++- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out index 23cd2f3f..d947c9c7 100644 --- a/expected/aqo_fdw.out +++ b/expected/aqo_fdw.out @@ -75,29 +75,28 @@ SELECT x FROM frgn WHERE x < -10; -- AQO ignores constants (5 rows) -- Trivial JOIN push-down. -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +EXPLAIN (COSTS OFF) SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; - QUERY PLAN ------------------------------------------------------------- - Merge Join (actual rows=1 loops=1) - AQO not used + QUERY PLAN +------------------------------------ + Merge Join Merge Cond: (a.x = b.x) - -> Sort (actual rows=1 loops=1) - AQO not used + -> Sort Sort Key: a.x - Sort Method: quicksort Memory: 25kB - -> Foreign Scan on frgn a (actual rows=1 loops=1) - AQO not used - -> Sort (actual rows=1 loops=1) - AQO not used + -> Foreign Scan on frgn a + -> Sort Sort Key: b.x - Sort Method: quicksort Memory: 25kB - -> Foreign Scan on frgn b (actual rows=1 loops=1) - AQO not used + -> Foreign Scan on frgn b Using aqo: true AQO mode: LEARN JOINS: 0 -(18 rows) +(11 rows) + +SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; + x | x +---+--- + 1 | 1 +(1 row) EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; diff --git a/sql/aqo_fdw.sql b/sql/aqo_fdw.sql index 38e31ea1..e8612339 100644 --- a/sql/aqo_fdw.sql +++ b/sql/aqo_fdw.sql @@ -41,7 +41,8 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT x FROM frgn WHERE x < -10; -- AQO ignores constants -- Trivial JOIN push-down. -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +EXPLAIN (COSTS OFF) +SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; From 91a78d96d64605e2f4c03204ff341359d87e9c26 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Wed, 9 Jun 2021 17:26:25 +0500 Subject: [PATCH 029/203] Reduce logging in the pgbench TAP test.Induces by requirement of comfort observation of a contrib-check in browser (By A.Lakhin report). Tags: aqo. --- t/001_pgbench.pl | 1 + 1 file changed, 1 insertion(+) diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index fcb7f3fd..6b3f34a4 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -8,6 +8,7 @@ $node->init; $node->append_conf('postgresql.conf', qq{ shared_preload_libraries = 'aqo' + log_statement = 'none' aqo.mode = 'intelligent' aqo.log_ignorance = 'on' }); From 9f1fe2a68dd295e623313820049d79d287645b92 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Fri, 18 Jun 2021 08:43:23 +0500 Subject: [PATCH 030/203] Remove unneeded core patches. Arrange the PG13 core patch for smoother applying. --- aqo_pg10.patch | 795 ----------------------------------- aqo_pg11.patch | 909 ---------------------------------------- aqo_pg12.patch | 1059 ----------------------------------------------- aqo_pg13.patch | 140 +++---- aqo_pg9_6.patch | 758 --------------------------------- 5 files changed, 70 insertions(+), 3591 deletions(-) delete mode 100644 aqo_pg10.patch delete mode 100644 aqo_pg11.patch delete mode 100644 aqo_pg12.patch delete mode 100644 aqo_pg9_6.patch diff --git a/aqo_pg10.patch b/aqo_pg10.patch deleted file mode 100644 index 5c0cdf73..00000000 --- a/aqo_pg10.patch +++ /dev/null @@ -1,795 +0,0 @@ -diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c -index 953e74d..e3f381f 100644 ---- a/src/backend/commands/explain.c -+++ b/src/backend/commands/explain.c -@@ -46,6 +46,9 @@ ExplainOneQuery_hook_type ExplainOneQuery_hook = NULL; - /* Hook for plugins to get control in explain_get_index_name() */ - explain_get_index_name_hook_type explain_get_index_name_hook = NULL; - -+/* Hook for plugins to get control in ExplainOnePlan() */ -+ExplainOnePlan_hook_type ExplainOnePlan_hook = NULL; -+ - - /* OR-able flags for ExplainXMLTag() */ - #define X_OPENING 0 -@@ -599,6 +602,10 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, - 3, es); - } - -+ if (ExplainOnePlan_hook) -+ ExplainOnePlan_hook(plannedstmt, into, es, -+ queryString, params, planduration); -+ - ExplainCloseGroup("Query", NULL, true, es); - } - -diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c -index 4d67070..6b98fb5 100644 ---- a/src/backend/nodes/copyfuncs.c -+++ b/src/backend/nodes/copyfuncs.c -@@ -126,6 +126,12 @@ CopyPlanFields(const Plan *from, Plan *newnode) - COPY_NODE_FIELD(lefttree); - COPY_NODE_FIELD(righttree); - COPY_NODE_FIELD(initPlan); -+ COPY_SCALAR_FIELD(had_path); -+ COPY_NODE_FIELD(path_clauses); -+ COPY_NODE_FIELD(path_relids); -+ COPY_SCALAR_FIELD(path_jointype); -+ COPY_SCALAR_FIELD(path_parallel_workers); -+ COPY_SCALAR_FIELD(was_parametrized); - COPY_BITMAPSET_FIELD(extParam); - COPY_BITMAPSET_FIELD(allParam); - } -diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c -index b35acb7..c3da124 100644 ---- a/src/backend/optimizer/path/costsize.c -+++ b/src/backend/optimizer/path/costsize.c -@@ -100,6 +100,10 @@ - - #define LOG2(x) (log(x) / 0.693147180559945) - -+set_baserel_rows_estimate_hook_type set_baserel_rows_estimate_hook = NULL; -+get_parameterized_baserel_size_hook_type get_parameterized_baserel_size_hook = NULL; -+get_parameterized_joinrel_size_hook_type get_parameterized_joinrel_size_hook = NULL; -+set_joinrel_size_estimates_hook_type set_joinrel_size_estimates_hook = NULL; - - double seq_page_cost = DEFAULT_SEQ_PAGE_COST; - double random_page_cost = DEFAULT_RANDOM_PAGE_COST; -@@ -3996,6 +4000,49 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) - - - /* -+ * set_baserel_rows_estimate -+ * Set the rows estimate for the given base relation. -+ * -+ * Rows is the estimated number of output tuples after applying -+ * restriction clauses. -+ * -+ * To support loadable plugins that monitor or modify cardinality estimation, -+ * we provide a hook variable that lets a plugin get control before and -+ * after the cardinality estimation. -+ * The hook must set rel->rows. -+ */ -+void -+set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) -+{ -+ if (set_baserel_rows_estimate_hook) -+ (*set_baserel_rows_estimate_hook) (root, rel); -+ else -+ set_baserel_rows_estimate_standard(root, rel); -+} -+ -+/* -+ * set_baserel_rows_estimate -+ * Set the rows estimate for the given base relation. -+ * -+ * Rows is the estimated number of output tuples after applying -+ * restriction clauses. -+ */ -+void -+set_baserel_rows_estimate_standard(PlannerInfo *root, RelOptInfo *rel) -+{ -+ double nrows; -+ -+ nrows = rel->tuples * -+ clauselist_selectivity(root, -+ rel->baserestrictinfo, -+ 0, -+ JOIN_INNER, -+ NULL); -+ -+ rel->rows = clamp_row_est(nrows); -+} -+ -+/* - * set_baserel_size_estimates - * Set the size estimates for the given base relation. - * -@@ -4011,19 +4058,10 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) - void - set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) - { -- double nrows; -- - /* Should only be applied to base relations */ - Assert(rel->relid > 0); - -- nrows = rel->tuples * -- clauselist_selectivity(root, -- rel->baserestrictinfo, -- 0, -- JOIN_INNER, -- NULL); -- -- rel->rows = clamp_row_est(nrows); -+ set_baserel_rows_estimate(root, rel); - - cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); - -@@ -4034,13 +4072,33 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) - * get_parameterized_baserel_size - * Make a size estimate for a parameterized scan of a base relation. - * -+ * To support loadable plugins that monitor or modify cardinality estimation, -+ * we provide a hook variable that lets a plugin get control before and -+ * after the cardinality estimation. -+ */ -+double -+get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, -+ List *param_clauses) -+{ -+ if (get_parameterized_baserel_size_hook) -+ return (*get_parameterized_baserel_size_hook) (root, rel, -+ param_clauses); -+ else -+ return get_parameterized_baserel_size_standard(root, rel, -+ param_clauses); -+} -+ -+/* -+ * get_parameterized_baserel_size_standard -+ * Make a size estimate for a parameterized scan of a base relation. -+ * - * 'param_clauses' lists the additional join clauses to be used. - * - * set_baserel_size_estimates must have been applied already. - */ - double --get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, -- List *param_clauses) -+get_parameterized_baserel_size_standard(PlannerInfo *root, RelOptInfo *rel, -+ List *param_clauses) - { - List *allclauses; - double nrows; -@@ -4070,6 +4128,36 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, - * set_joinrel_size_estimates - * Set the size estimates for the given join relation. - * -+ * To support loadable plugins that monitor or modify cardinality estimation, -+ * we provide a hook variable that lets a plugin get control before and -+ * after the cardinality estimation. -+ * The hook must set rel->rows value. -+ */ -+void -+set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, -+ RelOptInfo *outer_rel, -+ RelOptInfo *inner_rel, -+ SpecialJoinInfo *sjinfo, -+ List *restrictlist) -+{ -+ if (set_joinrel_size_estimates_hook) -+ (*set_joinrel_size_estimates_hook) (root, rel, -+ outer_rel, -+ inner_rel, -+ sjinfo, -+ restrictlist); -+ else -+ set_joinrel_size_estimates_standard(root, rel, -+ outer_rel, -+ inner_rel, -+ sjinfo, -+ restrictlist); -+} -+ -+/* -+ * set_joinrel_size_estimates_standard -+ * Set the size estimates for the given join relation. -+ * - * The rel's targetlist must have been constructed already, and a - * restriction clause list that matches the given component rels must - * be provided. -@@ -4089,11 +4177,11 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, - * build_joinrel_tlist, and baserestrictcost is not used for join rels. - */ - void --set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, -- RelOptInfo *outer_rel, -- RelOptInfo *inner_rel, -- SpecialJoinInfo *sjinfo, -- List *restrictlist) -+set_joinrel_size_estimates_standard(PlannerInfo *root, RelOptInfo *rel, -+ RelOptInfo *outer_rel, -+ RelOptInfo *inner_rel, -+ SpecialJoinInfo *sjinfo, -+ List *restrictlist) - { - rel->rows = calc_joinrel_size_estimate(root, - outer_rel, -@@ -4108,6 +4196,35 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, - * get_parameterized_joinrel_size - * Make a size estimate for a parameterized scan of a join relation. - * -+ * To support loadable plugins that monitor or modify cardinality estimation, -+ * we provide a hook variable that lets a plugin get control before and -+ * after the cardinality estimation. -+ */ -+double -+get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, -+ Path *outer_path, -+ Path *inner_path, -+ SpecialJoinInfo *sjinfo, -+ List *restrict_clauses) -+{ -+ if (get_parameterized_joinrel_size_hook) -+ return (*get_parameterized_joinrel_size_hook) (root, rel, -+ outer_path, -+ inner_path, -+ sjinfo, -+ restrict_clauses); -+ else -+ return get_parameterized_joinrel_size_standard(root, rel, -+ outer_path, -+ inner_path, -+ sjinfo, -+ restrict_clauses); -+} -+ -+/* -+ * get_parameterized_joinrel_size_standard -+ * Make a size estimate for a parameterized scan of a join relation. -+ * - * 'rel' is the joinrel under consideration. - * 'outer_path', 'inner_path' are (probably also parameterized) Paths that - * produce the relations being joined. -@@ -4120,11 +4237,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, - * set_joinrel_size_estimates must have been applied already. - */ - double --get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, -- Path *outer_path, -- Path *inner_path, -- SpecialJoinInfo *sjinfo, -- List *restrict_clauses) -+get_parameterized_joinrel_size_standard(PlannerInfo *root, RelOptInfo *rel, -+ Path *outer_path, -+ Path *inner_path, -+ SpecialJoinInfo *sjinfo, -+ List *restrict_clauses) - { - double nrows; - -diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c -index 2821662..12e643f 100644 ---- a/src/backend/optimizer/plan/createplan.c -+++ b/src/backend/optimizer/plan/createplan.c -@@ -67,6 +67,8 @@ - #define CP_SMALL_TLIST 0x0002 /* Prefer narrower tlists */ - #define CP_LABEL_TLIST 0x0004 /* tlist must contain sortgrouprefs */ - -+/* Hook for plugins to get control in creating plan from path */ -+copy_generic_path_info_hook_type copy_generic_path_info_hook = NULL; - - static Plan *create_plan_recurse(PlannerInfo *root, Path *best_path, - int flags); -@@ -160,7 +162,7 @@ static List *fix_indexorderby_references(PlannerInfo *root, IndexPath *index_pat - static Node *fix_indexqual_operand(Node *node, IndexOptInfo *index, int indexcol); - static List *get_switched_clauses(List *clauses, Relids outerrelids); - static List *order_qual_clauses(PlannerInfo *root, List *clauses); --static void copy_generic_path_info(Plan *dest, Path *src); -+static void copy_generic_path_info(PlannerInfo *root, Plan *dest, Path *src); - static void copy_plan_costsize(Plan *dest, Plan *src); - static void label_sort_with_costsize(PlannerInfo *root, Sort *plan, - double limit_tuples); -@@ -1025,7 +1027,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) - false)), - NULL); - -- copy_generic_path_info(plan, (Path *) best_path); -+ copy_generic_path_info(root, plan, (Path *) best_path); - - return plan; - } -@@ -1051,7 +1053,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) - - plan = make_append(subplans, tlist, best_path->partitioned_rels); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return (Plan *) plan; - } -@@ -1079,7 +1081,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path) - * prepare_sort_from_pathkeys on it before we do so on the individual - * child plans, to make cross-checking the sort info easier. - */ -- copy_generic_path_info(plan, (Path *) best_path); -+ copy_generic_path_info(root, plan, (Path *) best_path); - plan->targetlist = tlist; - plan->qual = NIL; - plan->lefttree = NULL; -@@ -1185,7 +1187,7 @@ create_result_plan(PlannerInfo *root, ResultPath *best_path) - - plan = make_result(tlist, (Node *) quals, NULL); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1210,7 +1212,7 @@ create_project_set_plan(PlannerInfo *root, ProjectSetPath *best_path) - - plan = make_project_set(tlist, subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1238,7 +1240,7 @@ create_material_plan(PlannerInfo *root, MaterialPath *best_path, int flags) - - plan = make_material(subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1442,7 +1444,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) - } - - /* Copy cost data from Path to Plan */ -- copy_generic_path_info(plan, &best_path->path); -+ copy_generic_path_info(root, plan, &best_path->path); - - return plan; - } -@@ -1475,7 +1477,7 @@ create_gather_plan(PlannerInfo *root, GatherPath *best_path) - best_path->single_copy, - subplan); - -- copy_generic_path_info(&gather_plan->plan, &best_path->path); -+ copy_generic_path_info(root, &gather_plan->plan, &best_path->path); - - /* use parallel mode for parallel plans. */ - root->glob->parallelModeNeeded = true; -@@ -1504,7 +1506,7 @@ create_gather_merge_plan(PlannerInfo *root, GatherMergePath *best_path) - gm_plan = makeNode(GatherMerge); - gm_plan->plan.targetlist = tlist; - gm_plan->num_workers = best_path->num_workers; -- copy_generic_path_info(&gm_plan->plan, &best_path->path); -+ copy_generic_path_info(root, &gm_plan->plan, &best_path->path); - - /* Assign the rescan Param. */ - gm_plan->rescan_param = SS_assign_special_param(root); -@@ -1595,7 +1597,7 @@ create_projection_plan(PlannerInfo *root, ProjectionPath *best_path) - /* We need a Result node */ - plan = (Plan *) make_result(tlist, NULL, subplan); - -- copy_generic_path_info(plan, (Path *) best_path); -+ copy_generic_path_info(root, plan, (Path *) best_path); - } - - return plan; -@@ -1654,7 +1656,7 @@ create_sort_plan(PlannerInfo *root, SortPath *best_path, int flags) - - plan = make_sort_from_pathkeys(subplan, best_path->path.pathkeys); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1691,7 +1693,7 @@ create_group_plan(PlannerInfo *root, GroupPath *best_path) - extract_grouping_ops(best_path->groupClause), - subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1719,7 +1721,7 @@ create_upper_unique_plan(PlannerInfo *root, UpperUniquePath *best_path, int flag - best_path->path.pathkeys, - best_path->numkeys); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1760,7 +1762,7 @@ create_agg_plan(PlannerInfo *root, AggPath *best_path) - best_path->numGroups, - subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1959,7 +1961,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) - subplan); - - /* Copy cost data from Path to Plan */ -- copy_generic_path_info(&plan->plan, &best_path->path); -+ copy_generic_path_info(root, &plan->plan, &best_path->path); - } - - return (Plan *) plan; -@@ -2015,7 +2017,7 @@ create_minmaxagg_plan(PlannerInfo *root, MinMaxAggPath *best_path) - - plan = make_result(tlist, (Node *) best_path->quals, NULL); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - /* - * During setrefs.c, we'll need to replace references to the Agg nodes -@@ -2109,7 +2111,7 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) - wc->endOffset, - subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2251,7 +2253,7 @@ create_setop_plan(PlannerInfo *root, SetOpPath *best_path, int flags) - best_path->firstFlag, - numGroups); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2287,7 +2289,7 @@ create_recursiveunion_plan(PlannerInfo *root, RecursiveUnionPath *best_path) - best_path->distinctList, - numGroups); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2310,7 +2312,7 @@ create_lockrows_plan(PlannerInfo *root, LockRowsPath *best_path, - - plan = make_lockrows(subplan, best_path->rowMarks, best_path->epqParam); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2369,7 +2371,7 @@ create_modifytable_plan(PlannerInfo *root, ModifyTablePath *best_path) - best_path->onconflict, - best_path->epqParam); - -- copy_generic_path_info(&plan->plan, &best_path->path); -+ copy_generic_path_info(root, &plan->plan, &best_path->path); - - return plan; - } -@@ -2393,7 +2395,7 @@ create_limit_plan(PlannerInfo *root, LimitPath *best_path, int flags) - best_path->limitOffset, - best_path->limitCount); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2439,7 +2441,7 @@ create_seqscan_plan(PlannerInfo *root, Path *best_path, - scan_clauses, - scan_relid); - -- copy_generic_path_info(&scan_plan->plan, best_path); -+ copy_generic_path_info(root, &scan_plan->plan, best_path); - - return scan_plan; - } -@@ -2485,7 +2487,7 @@ create_samplescan_plan(PlannerInfo *root, Path *best_path, - scan_relid, - tsc); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -2666,7 +2668,7 @@ create_indexscan_plan(PlannerInfo *root, - indexorderbyops, - best_path->indexscandir); - -- copy_generic_path_info(&scan_plan->plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->plan, &best_path->path); - - return scan_plan; - } -@@ -2781,7 +2783,7 @@ create_bitmap_scan_plan(PlannerInfo *root, - bitmapqualorig, - baserelid); - -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - return scan_plan; - } -@@ -3046,7 +3048,7 @@ create_tidscan_plan(PlannerInfo *root, TidPath *best_path, - scan_relid, - tidquals); - -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - return scan_plan; - } -@@ -3096,7 +3098,7 @@ create_subqueryscan_plan(PlannerInfo *root, SubqueryScanPath *best_path, - scan_relid, - subplan); - -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - return scan_plan; - } -@@ -3139,7 +3141,7 @@ create_functionscan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_functionscan(tlist, scan_clauses, scan_relid, - functions, rte->funcordinality); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3182,7 +3184,7 @@ create_tablefuncscan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_tablefuncscan(tlist, scan_clauses, scan_relid, - tablefunc); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3226,7 +3228,7 @@ create_valuesscan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_valuesscan(tlist, scan_clauses, scan_relid, - values_lists); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3319,7 +3321,7 @@ create_ctescan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_ctescan(tlist, scan_clauses, scan_relid, - plan_id, cte_param_id); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3358,7 +3360,7 @@ create_namedtuplestorescan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_namedtuplestorescan(tlist, scan_clauses, scan_relid, - rte->enrname); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3418,7 +3420,7 @@ create_worktablescan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_worktablescan(tlist, scan_clauses, scan_relid, - cteroot->wt_param_id); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3478,7 +3480,7 @@ create_foreignscan_plan(PlannerInfo *root, ForeignPath *best_path, - outer_plan); - - /* Copy cost data from Path to Plan; no need to make FDW do this */ -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - /* Copy foreign server OID; likewise, no need to make FDW do this */ - scan_plan->fs_server = rel->serverid; -@@ -3612,7 +3614,7 @@ create_customscan_plan(PlannerInfo *root, CustomPath *best_path, - * Copy cost data from Path to Plan; no need to make custom-plan providers - * do this - */ -- copy_generic_path_info(&cplan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &cplan->scan.plan, &best_path->path); - - /* Likewise, copy the relids that are represented by this custom scan */ - cplan->custom_relids = best_path->path.parent->relids; -@@ -3744,7 +3746,7 @@ create_nestloop_plan(PlannerInfo *root, - best_path->jointype, - best_path->inner_unique); - -- copy_generic_path_info(&join_plan->join.plan, &best_path->path); -+ copy_generic_path_info(root, &join_plan->join.plan, &best_path->path); - - return join_plan; - } -@@ -4049,7 +4051,7 @@ create_mergejoin_plan(PlannerInfo *root, - best_path->skip_mark_restore); - - /* Costs of sort and material steps are included in path cost already */ -- copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path); -+ copy_generic_path_info(root, &join_plan->join.plan, &best_path->jpath.path); - - return join_plan; - } -@@ -4183,7 +4185,7 @@ create_hashjoin_plan(PlannerInfo *root, - best_path->jpath.jointype, - best_path->jpath.inner_unique); - -- copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path); -+ copy_generic_path_info(root, &join_plan->join.plan, &best_path->jpath.path); - - return join_plan; - } -@@ -4857,7 +4859,7 @@ order_qual_clauses(PlannerInfo *root, List *clauses) - * Also copy the parallel-related flags, which the executor *will* use. - */ - static void --copy_generic_path_info(Plan *dest, Path *src) -+copy_generic_path_info(PlannerInfo *root, Plan *dest, Path *src) - { - dest->startup_cost = src->startup_cost; - dest->total_cost = src->total_cost; -@@ -4865,6 +4867,9 @@ copy_generic_path_info(Plan *dest, Path *src) - dest->plan_width = src->pathtarget->width; - dest->parallel_aware = src->parallel_aware; - dest->parallel_safe = src->parallel_safe; -+ -+ if (copy_generic_path_info_hook) -+ (*copy_generic_path_info_hook) (root, dest, src); - } - - /* -diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h -index 78822b7..da814ad 100644 ---- a/src/include/commands/explain.h -+++ b/src/include/commands/explain.h -@@ -60,6 +60,12 @@ extern PGDLLIMPORT ExplainOneQuery_hook_type ExplainOneQuery_hook; - typedef const char *(*explain_get_index_name_hook_type) (Oid indexId); - extern PGDLLIMPORT explain_get_index_name_hook_type explain_get_index_name_hook; - -+/* Hook for plugins to get control in ExplainOnePlan() */ -+typedef void (*ExplainOnePlan_hook_type) (PlannedStmt *plannedstmt, IntoClause *into, -+ ExplainState *es, const char *queryString, -+ ParamListInfo params, const instr_time *planduration); -+extern PGDLLIMPORT ExplainOnePlan_hook_type ExplainOnePlan_hook; -+ - - extern void ExplainQuery(ParseState *pstate, ExplainStmt *stmt, const char *queryString, - ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest); -diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h -index a382331..a014e17 100644 ---- a/src/include/nodes/plannodes.h -+++ b/src/include/nodes/plannodes.h -@@ -149,6 +149,16 @@ typedef struct Plan - * subselects) */ - - /* -+ * information for adaptive query optimization -+ */ -+ bool had_path; -+ List *path_clauses; -+ List *path_relids; -+ JoinType path_jointype; -+ int path_parallel_workers; -+ bool was_parametrized; -+ -+ /* - * Information for management of parameter-change-driven rescanning - * - * extParam includes the paramIDs of all external PARAM_EXEC params -diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h -index 63feba0..9c5fa96 100644 ---- a/src/include/optimizer/cost.h -+++ b/src/include/optimizer/cost.h -@@ -39,6 +39,34 @@ typedef enum - } ConstraintExclusionType; - - -+/* Hook for plugins to get control of cardinality estimation */ -+typedef void (*set_baserel_rows_estimate_hook_type) (PlannerInfo *root, -+ RelOptInfo *rel); -+extern PGDLLIMPORT set_baserel_rows_estimate_hook_type -+ set_baserel_rows_estimate_hook; -+typedef double (*get_parameterized_baserel_size_hook_type) (PlannerInfo *root, -+ RelOptInfo *rel, -+ List *param_clauses); -+extern PGDLLIMPORT get_parameterized_baserel_size_hook_type -+ get_parameterized_baserel_size_hook; -+typedef double (*get_parameterized_joinrel_size_hook_type) (PlannerInfo *root, -+ RelOptInfo *rel, -+ Path *outer_path, -+ Path *inner_path, -+ SpecialJoinInfo *sjinfo, -+ List *restrict_clauses); -+extern PGDLLIMPORT get_parameterized_joinrel_size_hook_type -+ get_parameterized_joinrel_size_hook; -+typedef void (*set_joinrel_size_estimates_hook_type) (PlannerInfo *root, -+ RelOptInfo *rel, -+ RelOptInfo *outer_rel, -+ RelOptInfo *inner_rel, -+ SpecialJoinInfo *sjinfo, -+ List *restrictlist); -+extern PGDLLIMPORT set_joinrel_size_estimates_hook_type -+ set_joinrel_size_estimates_hook; -+ -+ - /* - * prototypes for costsize.c - * routines to compute costs and sizes -@@ -164,21 +192,37 @@ extern void compute_semi_anti_join_factors(PlannerInfo *root, - SpecialJoinInfo *sjinfo, - List *restrictlist, - SemiAntiJoinFactors *semifactors); -+extern void set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel); -+extern void set_baserel_rows_estimate_standard(PlannerInfo *root, RelOptInfo *rel); - extern void set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel); - extern double get_parameterized_baserel_size(PlannerInfo *root, - RelOptInfo *rel, - List *param_clauses); -+extern double get_parameterized_baserel_size_standard(PlannerInfo *root, -+ RelOptInfo *rel, -+ List *param_clauses); - extern double get_parameterized_joinrel_size(PlannerInfo *root, - RelOptInfo *rel, - Path *outer_path, - Path *inner_path, - SpecialJoinInfo *sjinfo, - List *restrict_clauses); -+extern double get_parameterized_joinrel_size_standard(PlannerInfo *root, -+ RelOptInfo *rel, -+ Path *outer_path, -+ Path *inner_path, -+ SpecialJoinInfo *sjinfo, -+ List *restrict_clauses); - extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, - RelOptInfo *outer_rel, - RelOptInfo *inner_rel, - SpecialJoinInfo *sjinfo, - List *restrictlist); -+extern void set_joinrel_size_estimates_standard(PlannerInfo *root, RelOptInfo *rel, -+ RelOptInfo *outer_rel, -+ RelOptInfo *inner_rel, -+ SpecialJoinInfo *sjinfo, -+ List *restrictlist); - extern void set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel); - extern void set_function_size_estimates(PlannerInfo *root, RelOptInfo *rel); - extern void set_values_size_estimates(PlannerInfo *root, RelOptInfo *rel); -diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h -index f1d16cf..4229886 100644 ---- a/src/include/optimizer/planmain.h -+++ b/src/include/optimizer/planmain.h -@@ -33,6 +33,12 @@ extern int force_parallel_mode; - /* query_planner callback to compute query_pathkeys */ - typedef void (*query_pathkeys_callback) (PlannerInfo *root, void *extra); - -+/* hook for plugins to get control in creating plan from path */ -+typedef void (*copy_generic_path_info_hook_type) (PlannerInfo *root, -+ Plan *dest, Path *src); -+ -+extern PGDLLIMPORT copy_generic_path_info_hook_type copy_generic_path_info_hook; -+ - /* - * prototypes for plan/planmain.c - */ diff --git a/aqo_pg11.patch b/aqo_pg11.patch deleted file mode 100644 index df6a6d79..00000000 --- a/aqo_pg11.patch +++ /dev/null @@ -1,909 +0,0 @@ -diff --git a/contrib/Makefile b/contrib/Makefile -index 92184ed487..9b91ad1952 100644 ---- a/contrib/Makefile -+++ b/contrib/Makefile -@@ -7,6 +7,7 @@ include $(top_builddir)/src/Makefile.global - SUBDIRS = \ - adminpack \ - amcheck \ -+ aqo \ - auth_delay \ - auto_explain \ - bloom \ -diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c -index 799a22e9d5..ece4ffffd9 100644 ---- a/src/backend/commands/explain.c -+++ b/src/backend/commands/explain.c -@@ -47,6 +47,9 @@ ExplainOneQuery_hook_type ExplainOneQuery_hook = NULL; - /* Hook for plugins to get control in explain_get_index_name() */ - explain_get_index_name_hook_type explain_get_index_name_hook = NULL; - -+/* Hook for plugins to get control in ExplainOnePlan() */ -+ExplainOnePlan_hook_type ExplainOnePlan_hook = NULL; -+ - - /* OR-able flags for ExplainXMLTag() */ - #define X_OPENING 0 -@@ -594,6 +597,10 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, - ExplainPropertyFloat("Execution Time", "ms", 1000.0 * totaltime, 3, - es); - -+ if (ExplainOnePlan_hook) -+ ExplainOnePlan_hook(plannedstmt, into, es, -+ queryString, params, planduration); -+ - ExplainCloseGroup("Query", NULL, true, es); - } - -@@ -1448,6 +1455,24 @@ ExplainNode(PlanState *planstate, List *ancestors, - appendStringInfo(es->str, - " (actual rows=%.0f loops=%.0f)", - rows, nloops); -+#ifdef AQO_EXPLAIN -+ if (es->verbose && plan) -+ { -+ int wrkrs = 1; -+ double error = -1.; -+ -+ if (planstate->worker_instrument && plan->parallel_aware) -+ wrkrs = planstate->worker_instrument->num_workers + 1; -+ -+ if (plan->predicted_cardinality > 0.) -+ { -+ error = 100. * (plan->predicted_cardinality-(rows*wrkrs)) / (rows * wrkrs); -+ appendStringInfo(es->str, -+ " (AQO predicted: cardinality=%.0lf, error=%.0lf%%, fss=%d)", -+ plan->predicted_cardinality, error, plan->fss_hash); -+ } -+ } -+#endif - } - else - { -diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c -index 648758de4a..5cc1491507 100644 ---- a/src/backend/nodes/copyfuncs.c -+++ b/src/backend/nodes/copyfuncs.c -@@ -127,6 +127,12 @@ CopyPlanFields(const Plan *from, Plan *newnode) - COPY_NODE_FIELD(lefttree); - COPY_NODE_FIELD(righttree); - COPY_NODE_FIELD(initPlan); -+ COPY_SCALAR_FIELD(had_path); -+ COPY_NODE_FIELD(path_clauses); -+ COPY_NODE_FIELD(path_relids); -+ COPY_SCALAR_FIELD(path_jointype); -+ COPY_SCALAR_FIELD(path_parallel_workers); -+ COPY_SCALAR_FIELD(was_parametrized); - COPY_BITMAPSET_FIELD(extParam); - COPY_BITMAPSET_FIELD(allParam); - } -diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c -index c7400941ee..a07c1551dc 100644 ---- a/src/backend/optimizer/path/costsize.c -+++ b/src/backend/optimizer/path/costsize.c -@@ -97,6 +97,10 @@ - #include "utils/spccache.h" - #include "utils/tuplesort.h" - -+set_baserel_rows_estimate_hook_type set_baserel_rows_estimate_hook = NULL; -+get_parameterized_baserel_size_hook_type get_parameterized_baserel_size_hook = NULL; -+get_parameterized_joinrel_size_hook_type get_parameterized_joinrel_size_hook = NULL; -+set_joinrel_size_estimates_hook_type set_joinrel_size_estimates_hook = NULL; - - #define LOG2(x) (log(x) / 0.693147180559945) - -@@ -4283,6 +4287,49 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) - } - - -+/* -+ * set_baserel_rows_estimate -+ * Set the rows estimate for the given base relation. -+ * -+ * Rows is the estimated number of output tuples after applying -+ * restriction clauses. -+ * -+ * To support loadable plugins that monitor or modify cardinality estimation, -+ * we provide a hook variable that lets a plugin get control before and -+ * after the cardinality estimation. -+ * The hook must set rel->rows. -+ */ -+void -+set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) -+{ -+ if (set_baserel_rows_estimate_hook) -+ (*set_baserel_rows_estimate_hook) (root, rel); -+ else -+ set_baserel_rows_estimate_standard(root, rel); -+} -+ -+/* -+ * set_baserel_rows_estimate -+ * Set the rows estimate for the given base relation. -+ * -+ * Rows is the estimated number of output tuples after applying -+ * restriction clauses. -+ */ -+void -+set_baserel_rows_estimate_standard(PlannerInfo *root, RelOptInfo *rel) -+{ -+ double nrows; -+ -+ nrows = rel->tuples * -+ clauselist_selectivity(root, -+ rel->baserestrictinfo, -+ 0, -+ JOIN_INNER, -+ NULL); -+ -+ rel->rows = clamp_row_est(nrows); -+} -+ - /* - * set_baserel_size_estimates - * Set the size estimates for the given base relation. -@@ -4299,19 +4346,10 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) - void - set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) - { -- double nrows; -- - /* Should only be applied to base relations */ - Assert(rel->relid > 0); - -- nrows = rel->tuples * -- clauselist_selectivity(root, -- rel->baserestrictinfo, -- 0, -- JOIN_INNER, -- NULL); -- -- rel->rows = clamp_row_est(nrows); -+ set_baserel_rows_estimate(root, rel); - - cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); - -@@ -4322,13 +4360,33 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) - * get_parameterized_baserel_size - * Make a size estimate for a parameterized scan of a base relation. - * -+ * To support loadable plugins that monitor or modify cardinality estimation, -+ * we provide a hook variable that lets a plugin get control before and -+ * after the cardinality estimation. -+ */ -+double -+get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, -+ List *param_clauses) -+{ -+ if (get_parameterized_baserel_size_hook) -+ return (*get_parameterized_baserel_size_hook) (root, rel, -+ param_clauses); -+ else -+ return get_parameterized_baserel_size_standard(root, rel, -+ param_clauses); -+} -+ -+/* -+ * get_parameterized_baserel_size_standard -+ * Make a size estimate for a parameterized scan of a base relation. -+ * - * 'param_clauses' lists the additional join clauses to be used. - * - * set_baserel_size_estimates must have been applied already. - */ - double --get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, -- List *param_clauses) -+get_parameterized_baserel_size_standard(PlannerInfo *root, RelOptInfo *rel, -+ List *param_clauses) - { - List *allclauses; - double nrows; -@@ -4358,6 +4416,36 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, - * set_joinrel_size_estimates - * Set the size estimates for the given join relation. - * -+ * To support loadable plugins that monitor or modify cardinality estimation, -+ * we provide a hook variable that lets a plugin get control before and -+ * after the cardinality estimation. -+ * The hook must set rel->rows value. -+ */ -+void -+set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, -+ RelOptInfo *outer_rel, -+ RelOptInfo *inner_rel, -+ SpecialJoinInfo *sjinfo, -+ List *restrictlist) -+{ -+ if (set_joinrel_size_estimates_hook) -+ (*set_joinrel_size_estimates_hook) (root, rel, -+ outer_rel, -+ inner_rel, -+ sjinfo, -+ restrictlist); -+ else -+ set_joinrel_size_estimates_standard(root, rel, -+ outer_rel, -+ inner_rel, -+ sjinfo, -+ restrictlist); -+} -+ -+/* -+ * set_joinrel_size_estimates_standard -+ * Set the size estimates for the given join relation. -+ * - * The rel's targetlist must have been constructed already, and a - * restriction clause list that matches the given component rels must - * be provided. -@@ -4377,11 +4465,11 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, - * build_joinrel_tlist, and baserestrictcost is not used for join rels. - */ - void --set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, -- RelOptInfo *outer_rel, -- RelOptInfo *inner_rel, -- SpecialJoinInfo *sjinfo, -- List *restrictlist) -+set_joinrel_size_estimates_standard(PlannerInfo *root, RelOptInfo *rel, -+ RelOptInfo *outer_rel, -+ RelOptInfo *inner_rel, -+ SpecialJoinInfo *sjinfo, -+ List *restrictlist) - { - rel->rows = calc_joinrel_size_estimate(root, - rel, -@@ -4397,6 +4485,35 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, - * get_parameterized_joinrel_size - * Make a size estimate for a parameterized scan of a join relation. - * -+ * To support loadable plugins that monitor or modify cardinality estimation, -+ * we provide a hook variable that lets a plugin get control before and -+ * after the cardinality estimation. -+ */ -+double -+get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, -+ Path *outer_path, -+ Path *inner_path, -+ SpecialJoinInfo *sjinfo, -+ List *restrict_clauses) -+{ -+ if (get_parameterized_joinrel_size_hook) -+ return (*get_parameterized_joinrel_size_hook) (root, rel, -+ outer_path, -+ inner_path, -+ sjinfo, -+ restrict_clauses); -+ else -+ return get_parameterized_joinrel_size_standard(root, rel, -+ outer_path, -+ inner_path, -+ sjinfo, -+ restrict_clauses); -+} -+ -+/* -+ * get_parameterized_joinrel_size_standard -+ * Make a size estimate for a parameterized scan of a join relation. -+ * - * 'rel' is the joinrel under consideration. - * 'outer_path', 'inner_path' are (probably also parameterized) Paths that - * produce the relations being joined. -@@ -4409,11 +4526,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, - * set_joinrel_size_estimates must have been applied already. - */ - double --get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, -- Path *outer_path, -- Path *inner_path, -- SpecialJoinInfo *sjinfo, -- List *restrict_clauses) -+get_parameterized_joinrel_size_standard(PlannerInfo *root, RelOptInfo *rel, -+ Path *outer_path, -+ Path *inner_path, -+ SpecialJoinInfo *sjinfo, -+ List *restrict_clauses) - { - double nrows; - -diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c -index 5f6d2bad7b..eecdf53c21 100644 ---- a/src/backend/optimizer/plan/createplan.c -+++ b/src/backend/optimizer/plan/createplan.c -@@ -71,6 +71,8 @@ - #define CP_LABEL_TLIST 0x0004 /* tlist must contain sortgrouprefs */ - #define CP_IGNORE_TLIST 0x0008 /* caller will replace tlist */ - -+/* Hook for plugins to get control in creating plan from path */ -+copy_generic_path_info_hook_type copy_generic_path_info_hook = NULL; - - static Plan *create_plan_recurse(PlannerInfo *root, Path *best_path, - int flags); -@@ -157,7 +159,7 @@ static List *fix_indexorderby_references(PlannerInfo *root, IndexPath *index_pat - static Node *fix_indexqual_operand(Node *node, IndexOptInfo *index, int indexcol); - static List *get_switched_clauses(List *clauses, Relids outerrelids); - static List *order_qual_clauses(PlannerInfo *root, List *clauses); --static void copy_generic_path_info(Plan *dest, Path *src); -+static void copy_generic_path_info(PlannerInfo *root, Plan *dest, Path *src); - static void copy_plan_costsize(Plan *dest, Plan *src); - static void label_sort_with_costsize(PlannerInfo *root, Sort *plan, - double limit_tuples); -@@ -1052,7 +1054,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) - false)), - NULL); - -- copy_generic_path_info(plan, (Path *) best_path); -+ copy_generic_path_info(root, plan, (Path *) best_path); - - return plan; - } -@@ -1119,7 +1121,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) - tlist, best_path->partitioned_rels, - partpruneinfo); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return (Plan *) plan; - } -@@ -1150,7 +1152,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path, - * prepare_sort_from_pathkeys on it before we do so on the individual - * child plans, to make cross-checking the sort info easier. - */ -- copy_generic_path_info(plan, (Path *) best_path); -+ copy_generic_path_info(root, plan, (Path *) best_path); - plan->targetlist = tlist; - plan->qual = NIL; - plan->lefttree = NULL; -@@ -1274,7 +1276,7 @@ create_result_plan(PlannerInfo *root, ResultPath *best_path) - - plan = make_result(tlist, (Node *) quals, NULL); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1299,7 +1301,7 @@ create_project_set_plan(PlannerInfo *root, ProjectSetPath *best_path) - - plan = make_project_set(tlist, subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1327,7 +1329,7 @@ create_material_plan(PlannerInfo *root, MaterialPath *best_path, int flags) - - plan = make_material(subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1521,7 +1523,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) - } - - /* Copy cost data from Path to Plan */ -- copy_generic_path_info(plan, &best_path->path); -+ copy_generic_path_info(root, plan, &best_path->path); - - return plan; - } -@@ -1554,7 +1556,7 @@ create_gather_plan(PlannerInfo *root, GatherPath *best_path) - best_path->single_copy, - subplan); - -- copy_generic_path_info(&gather_plan->plan, &best_path->path); -+ copy_generic_path_info(root, &gather_plan->plan, &best_path->path); - - /* use parallel mode for parallel plans. */ - root->glob->parallelModeNeeded = true; -@@ -1583,7 +1585,7 @@ create_gather_merge_plan(PlannerInfo *root, GatherMergePath *best_path) - gm_plan = makeNode(GatherMerge); - gm_plan->plan.targetlist = tlist; - gm_plan->num_workers = best_path->num_workers; -- copy_generic_path_info(&gm_plan->plan, &best_path->path); -+ copy_generic_path_info(root, &gm_plan->plan, &best_path->path); - - /* Assign the rescan Param. */ - gm_plan->rescan_param = assign_special_exec_param(root); -@@ -1711,7 +1713,7 @@ create_projection_plan(PlannerInfo *root, ProjectionPath *best_path, int flags) - /* We need a Result node */ - plan = (Plan *) make_result(tlist, NULL, subplan); - -- copy_generic_path_info(plan, (Path *) best_path); -+ copy_generic_path_info(root, plan, (Path *) best_path); - } - - return plan; -@@ -1812,7 +1814,7 @@ create_sort_plan(PlannerInfo *root, SortPath *best_path, int flags) - IS_OTHER_REL(best_path->subpath->parent) ? - best_path->path.parent->relids : NULL); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1849,7 +1851,7 @@ create_group_plan(PlannerInfo *root, GroupPath *best_path) - extract_grouping_ops(best_path->groupClause), - subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1877,7 +1879,7 @@ create_upper_unique_plan(PlannerInfo *root, UpperUniquePath *best_path, int flag - best_path->path.pathkeys, - best_path->numkeys); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1918,7 +1920,7 @@ create_agg_plan(PlannerInfo *root, AggPath *best_path) - best_path->numGroups, - subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2117,7 +2119,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) - subplan); - - /* Copy cost data from Path to Plan */ -- copy_generic_path_info(&plan->plan, &best_path->path); -+ copy_generic_path_info(root, &plan->plan, &best_path->path); - } - - return (Plan *) plan; -@@ -2173,7 +2175,7 @@ create_minmaxagg_plan(PlannerInfo *root, MinMaxAggPath *best_path) - - plan = make_result(tlist, (Node *) best_path->quals, NULL); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - /* - * During setrefs.c, we'll need to replace references to the Agg nodes -@@ -2281,7 +2283,7 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) - wc->inRangeNullsFirst, - subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2317,7 +2319,7 @@ create_setop_plan(PlannerInfo *root, SetOpPath *best_path, int flags) - best_path->firstFlag, - numGroups); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2353,7 +2355,7 @@ create_recursiveunion_plan(PlannerInfo *root, RecursiveUnionPath *best_path) - best_path->distinctList, - numGroups); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2376,7 +2378,7 @@ create_lockrows_plan(PlannerInfo *root, LockRowsPath *best_path, - - plan = make_lockrows(subplan, best_path->rowMarks, best_path->epqParam); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2437,7 +2439,7 @@ create_modifytable_plan(PlannerInfo *root, ModifyTablePath *best_path) - best_path->onconflict, - best_path->epqParam); - -- copy_generic_path_info(&plan->plan, &best_path->path); -+ copy_generic_path_info(root, &plan->plan, &best_path->path); - - return plan; - } -@@ -2461,7 +2463,7 @@ create_limit_plan(PlannerInfo *root, LimitPath *best_path, int flags) - best_path->limitOffset, - best_path->limitCount); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2507,7 +2509,7 @@ create_seqscan_plan(PlannerInfo *root, Path *best_path, - scan_clauses, - scan_relid); - -- copy_generic_path_info(&scan_plan->plan, best_path); -+ copy_generic_path_info(root, &scan_plan->plan, best_path); - - return scan_plan; - } -@@ -2553,7 +2555,7 @@ create_samplescan_plan(PlannerInfo *root, Path *best_path, - scan_relid, - tsc); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -2734,7 +2736,7 @@ create_indexscan_plan(PlannerInfo *root, - indexorderbyops, - best_path->indexscandir); - -- copy_generic_path_info(&scan_plan->plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->plan, &best_path->path); - - return scan_plan; - } -@@ -2849,7 +2851,7 @@ create_bitmap_scan_plan(PlannerInfo *root, - bitmapqualorig, - baserelid); - -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - return scan_plan; - } -@@ -3114,7 +3116,7 @@ create_tidscan_plan(PlannerInfo *root, TidPath *best_path, - scan_relid, - tidquals); - -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - return scan_plan; - } -@@ -3164,7 +3166,7 @@ create_subqueryscan_plan(PlannerInfo *root, SubqueryScanPath *best_path, - scan_relid, - subplan); - -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - return scan_plan; - } -@@ -3207,7 +3209,7 @@ create_functionscan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_functionscan(tlist, scan_clauses, scan_relid, - functions, rte->funcordinality); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3250,7 +3252,7 @@ create_tablefuncscan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_tablefuncscan(tlist, scan_clauses, scan_relid, - tablefunc); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3294,7 +3296,7 @@ create_valuesscan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_valuesscan(tlist, scan_clauses, scan_relid, - values_lists); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3387,7 +3389,7 @@ create_ctescan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_ctescan(tlist, scan_clauses, scan_relid, - plan_id, cte_param_id); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3426,7 +3428,7 @@ create_namedtuplestorescan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_namedtuplestorescan(tlist, scan_clauses, scan_relid, - rte->enrname); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3486,7 +3488,7 @@ create_worktablescan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_worktablescan(tlist, scan_clauses, scan_relid, - cteroot->wt_param_id); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3546,7 +3548,7 @@ create_foreignscan_plan(PlannerInfo *root, ForeignPath *best_path, - outer_plan); - - /* Copy cost data from Path to Plan; no need to make FDW do this */ -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - /* Copy foreign server OID; likewise, no need to make FDW do this */ - scan_plan->fs_server = rel->serverid; -@@ -3680,7 +3682,7 @@ create_customscan_plan(PlannerInfo *root, CustomPath *best_path, - * Copy cost data from Path to Plan; no need to make custom-plan providers - * do this - */ -- copy_generic_path_info(&cplan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &cplan->scan.plan, &best_path->path); - - /* Likewise, copy the relids that are represented by this custom scan */ - cplan->custom_relids = best_path->path.parent->relids; -@@ -3782,7 +3784,7 @@ create_nestloop_plan(PlannerInfo *root, - best_path->jointype, - best_path->inner_unique); - -- copy_generic_path_info(&join_plan->join.plan, &best_path->path); -+ copy_generic_path_info(root, &join_plan->join.plan, &best_path->path); - - return join_plan; - } -@@ -4089,7 +4091,7 @@ create_mergejoin_plan(PlannerInfo *root, - best_path->skip_mark_restore); - - /* Costs of sort and material steps are included in path cost already */ -- copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path); -+ copy_generic_path_info(root, &join_plan->join.plan, &best_path->jpath.path); - - return join_plan; - } -@@ -4235,7 +4237,7 @@ create_hashjoin_plan(PlannerInfo *root, - best_path->jpath.jointype, - best_path->jpath.inner_unique); - -- copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path); -+ copy_generic_path_info(root, &join_plan->join.plan, &best_path->jpath.path); - - return join_plan; - } -@@ -4781,7 +4783,7 @@ order_qual_clauses(PlannerInfo *root, List *clauses) - * Also copy the parallel-related flags, which the executor *will* use. - */ - static void --copy_generic_path_info(Plan *dest, Path *src) -+copy_generic_path_info(PlannerInfo *root, Plan *dest, Path *src) - { - dest->startup_cost = src->startup_cost; - dest->total_cost = src->total_cost; -@@ -4789,6 +4791,9 @@ copy_generic_path_info(Plan *dest, Path *src) - dest->plan_width = src->pathtarget->width; - dest->parallel_aware = src->parallel_aware; - dest->parallel_safe = src->parallel_safe; -+ -+ if (copy_generic_path_info_hook) -+ (*copy_generic_path_info_hook) (root, dest, src); - } - - /* -diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c -index 284215a717..f5249aca95 100644 ---- a/src/backend/optimizer/util/relnode.c -+++ b/src/backend/optimizer/util/relnode.c -@@ -1226,6 +1226,7 @@ find_childrel_parents(PlannerInfo *root, RelOptInfo *rel) - } - - -+set_parampathinfo_postinit_hook_type parampathinfo_postinit_hook = NULL; - /* - * get_baserel_parampathinfo - * Get the ParamPathInfo for a parameterized path for a base relation, -@@ -1294,6 +1295,10 @@ get_baserel_parampathinfo(PlannerInfo *root, RelOptInfo *baserel, - ppi->ppi_req_outer = required_outer; - ppi->ppi_rows = rows; - ppi->ppi_clauses = pclauses; -+ -+ if (parampathinfo_postinit_hook) -+ (*parampathinfo_postinit_hook)(ppi); -+ - baserel->ppilist = lappend(baserel->ppilist, ppi); - - return ppi; -@@ -1519,6 +1524,10 @@ get_joinrel_parampathinfo(PlannerInfo *root, RelOptInfo *joinrel, - ppi->ppi_req_outer = required_outer; - ppi->ppi_rows = rows; - ppi->ppi_clauses = NIL; -+ -+ if (parampathinfo_postinit_hook) -+ (*parampathinfo_postinit_hook)(ppi); -+ - joinrel->ppilist = lappend(joinrel->ppilist, ppi); - - return ppi; -diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h -index d3f70fda08..2dd4200282 100644 ---- a/src/include/commands/explain.h -+++ b/src/include/commands/explain.h -@@ -61,6 +61,12 @@ extern PGDLLIMPORT ExplainOneQuery_hook_type ExplainOneQuery_hook; - typedef const char *(*explain_get_index_name_hook_type) (Oid indexId); - extern PGDLLIMPORT explain_get_index_name_hook_type explain_get_index_name_hook; - -+/* Hook for plugins to get control in ExplainOnePlan() */ -+typedef void (*ExplainOnePlan_hook_type) (PlannedStmt *plannedstmt, IntoClause *into, -+ ExplainState *es, const char *queryString, -+ ParamListInfo params, const instr_time *planduration); -+extern PGDLLIMPORT ExplainOnePlan_hook_type ExplainOnePlan_hook; -+ - - extern void ExplainQuery(ParseState *pstate, ExplainStmt *stmt, const char *queryString, - ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest); -diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h -index 249aa6520a..13d0961ed7 100644 ---- a/src/include/nodes/plannodes.h -+++ b/src/include/nodes/plannodes.h -@@ -151,6 +151,19 @@ typedef struct Plan - List *initPlan; /* Init Plan nodes (un-correlated expr - * subselects) */ - -+ /* -+ * information for adaptive query optimization -+ */ -+ bool had_path; -+ List *path_clauses; -+ List *path_relids; -+ JoinType path_jointype; -+ int path_parallel_workers; -+ bool was_parametrized; -+ /* For Adaptive optimization DEBUG purposes */ -+ double predicted_cardinality; -+ int fss_hash; -+ - /* - * Information for management of parameter-change-driven rescanning - * -diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h -index e61c1a2a29..2be4cad86b 100644 ---- a/src/include/nodes/relation.h -+++ b/src/include/nodes/relation.h -@@ -694,6 +694,10 @@ typedef struct RelOptInfo - Relids top_parent_relids; /* Relids of topmost parents (if "other" - * rel) */ - -+ /* For Adaptive optimization DEBUG purposes */ -+ double predicted_cardinality; -+ int fss_hash; -+ - /* used for partitioned relations */ - PartitionScheme part_scheme; /* Partitioning scheme. */ - int nparts; /* number of partitions */ -@@ -1048,6 +1052,10 @@ typedef struct ParamPathInfo - Relids ppi_req_outer; /* rels supplying parameters used by path */ - double ppi_rows; /* estimated number of result tuples */ - List *ppi_clauses; /* join clauses available from outer rels */ -+ -+ /* AQO DEBUG purposes */ -+ double predicted_ppi_rows; -+ double fss_ppi_hash; - } ParamPathInfo; - - -diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h -index 77ca7ff837..f95ea34063 100644 ---- a/src/include/optimizer/cost.h -+++ b/src/include/optimizer/cost.h -@@ -39,6 +39,34 @@ typedef enum - } ConstraintExclusionType; - - -+/* Hook for plugins to get control of cardinality estimation */ -+typedef void (*set_baserel_rows_estimate_hook_type) (PlannerInfo *root, -+ RelOptInfo *rel); -+extern PGDLLIMPORT set_baserel_rows_estimate_hook_type -+ set_baserel_rows_estimate_hook; -+typedef double (*get_parameterized_baserel_size_hook_type) (PlannerInfo *root, -+ RelOptInfo *rel, -+ List *param_clauses); -+extern PGDLLIMPORT get_parameterized_baserel_size_hook_type -+ get_parameterized_baserel_size_hook; -+typedef double (*get_parameterized_joinrel_size_hook_type) (PlannerInfo *root, -+ RelOptInfo *rel, -+ Path *outer_path, -+ Path *inner_path, -+ SpecialJoinInfo *sjinfo, -+ List *restrict_clauses); -+extern PGDLLIMPORT get_parameterized_joinrel_size_hook_type -+ get_parameterized_joinrel_size_hook; -+typedef void (*set_joinrel_size_estimates_hook_type) (PlannerInfo *root, -+ RelOptInfo *rel, -+ RelOptInfo *outer_rel, -+ RelOptInfo *inner_rel, -+ SpecialJoinInfo *sjinfo, -+ List *restrictlist); -+extern PGDLLIMPORT set_joinrel_size_estimates_hook_type -+ set_joinrel_size_estimates_hook; -+ -+ - /* - * prototypes for costsize.c - * routines to compute costs and sizes -@@ -174,21 +202,37 @@ extern void compute_semi_anti_join_factors(PlannerInfo *root, - SpecialJoinInfo *sjinfo, - List *restrictlist, - SemiAntiJoinFactors *semifactors); -+extern void set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel); -+extern void set_baserel_rows_estimate_standard(PlannerInfo *root, RelOptInfo *rel); - extern void set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel); - extern double get_parameterized_baserel_size(PlannerInfo *root, - RelOptInfo *rel, - List *param_clauses); -+extern double get_parameterized_baserel_size_standard(PlannerInfo *root, -+ RelOptInfo *rel, -+ List *param_clauses); - extern double get_parameterized_joinrel_size(PlannerInfo *root, - RelOptInfo *rel, - Path *outer_path, - Path *inner_path, - SpecialJoinInfo *sjinfo, - List *restrict_clauses); -+extern double get_parameterized_joinrel_size_standard(PlannerInfo *root, -+ RelOptInfo *rel, -+ Path *outer_path, -+ Path *inner_path, -+ SpecialJoinInfo *sjinfo, -+ List *restrict_clauses); - extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, - RelOptInfo *outer_rel, - RelOptInfo *inner_rel, - SpecialJoinInfo *sjinfo, - List *restrictlist); -+extern void set_joinrel_size_estimates_standard(PlannerInfo *root, RelOptInfo *rel, -+ RelOptInfo *outer_rel, -+ RelOptInfo *inner_rel, -+ SpecialJoinInfo *sjinfo, -+ List *restrictlist); - extern void set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel); - extern void set_function_size_estimates(PlannerInfo *root, RelOptInfo *rel); - extern void set_values_size_estimates(PlannerInfo *root, RelOptInfo *rel); -diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h -index 7c5ff22650..178e7888af 100644 ---- a/src/include/optimizer/pathnode.h -+++ b/src/include/optimizer/pathnode.h -@@ -17,6 +17,9 @@ - #include "nodes/bitmapset.h" - #include "nodes/relation.h" - -+typedef void (*set_parampathinfo_postinit_hook_type) (ParamPathInfo *ppi); -+ -+extern PGDLLIMPORT set_parampathinfo_postinit_hook_type parampathinfo_postinit_hook; - - /* - * prototypes for pathnode.c -diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h -index a081ca689a..d42da2980f 100644 ---- a/src/include/optimizer/planmain.h -+++ b/src/include/optimizer/planmain.h -@@ -34,6 +34,12 @@ extern bool parallel_leader_participation; - /* query_planner callback to compute query_pathkeys */ - typedef void (*query_pathkeys_callback) (PlannerInfo *root, void *extra); - -+/* hook for plugins to get control in creating plan from path */ -+typedef void (*copy_generic_path_info_hook_type) (PlannerInfo *root, -+ Plan *dest, Path *src); -+ -+extern PGDLLIMPORT copy_generic_path_info_hook_type copy_generic_path_info_hook; -+ - /* - * prototypes for plan/planmain.c - */ diff --git a/aqo_pg12.patch b/aqo_pg12.patch deleted file mode 100644 index 2075911d..00000000 --- a/aqo_pg12.patch +++ /dev/null @@ -1,1059 +0,0 @@ -diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c -index 92969636b7..d05b07e037 100644 ---- a/src/backend/commands/explain.c -+++ b/src/backend/commands/explain.c -@@ -24,6 +24,7 @@ - #include "nodes/extensible.h" - #include "nodes/makefuncs.h" - #include "nodes/nodeFuncs.h" -+#include "optimizer/cost.h" - #include "parser/parsetree.h" - #include "rewrite/rewriteHandler.h" - #include "storage/bufmgr.h" -@@ -46,6 +47,9 @@ ExplainOneQuery_hook_type ExplainOneQuery_hook = NULL; - /* Hook for plugins to get control in explain_get_index_name() */ - explain_get_index_name_hook_type explain_get_index_name_hook = NULL; - -+/* Hook for plugins to get control in ExplainOnePlan() */ -+ExplainOnePlan_hook_type ExplainOnePlan_hook = NULL; -+ - - /* OR-able flags for ExplainXMLTag() */ - #define X_OPENING 0 -@@ -596,6 +600,10 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, - ExplainPropertyFloat("Execution Time", "ms", 1000.0 * totaltime, 3, - es); - -+ if (ExplainOnePlan_hook) -+ ExplainOnePlan_hook(plannedstmt, into, es, -+ queryString, params, planduration, queryEnv); -+ - ExplainCloseGroup("Query", NULL, true, es); - } - -@@ -1523,6 +1531,38 @@ ExplainNode(PlanState *planstate, List *ancestors, - appendStringInfo(es->str, - " (actual rows=%.0f loops=%.0f)", - rows, nloops); -+ -+#ifdef AQO_EXPLAIN -+ if (es->verbose && plan && planstate->instrument) -+ { -+ int wrkrs = 1; -+ double error = -1.; -+ -+ if (planstate->worker_instrument && IsParallelTuplesProcessing(plan)) -+ { -+ int i; -+ for (i = 0; i < planstate->worker_instrument->num_workers; i++) -+ { -+ Instrumentation *instrument = &planstate->worker_instrument->instrument[i]; -+ if (instrument->nloops <= 0) -+ continue; -+ wrkrs++; -+ } -+ } -+ -+ if (plan->predicted_cardinality > 0.) -+ { -+ error = 100. * (plan->predicted_cardinality - (rows*wrkrs)) -+ / plan->predicted_cardinality; -+ appendStringInfo(es->str, -+ " (AQO: cardinality=%.0lf, error=%.0lf%%, fsspace_hash=%d)", -+ plan->predicted_cardinality, error, plan->fss_hash); -+ } -+ else -+ appendStringInfo(es->str, " (AQO not used, fsspace_hash=%d)", -+ plan->fss_hash); -+ } -+#endif - } - else - { -diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c -index 78deade89b..b1470147e9 100644 ---- a/src/backend/nodes/copyfuncs.c -+++ b/src/backend/nodes/copyfuncs.c -@@ -126,6 +126,12 @@ CopyPlanFields(const Plan *from, Plan *newnode) - COPY_NODE_FIELD(lefttree); - COPY_NODE_FIELD(righttree); - COPY_NODE_FIELD(initPlan); -+ COPY_SCALAR_FIELD(had_path); -+ COPY_NODE_FIELD(path_clauses); -+ COPY_NODE_FIELD(path_relids); -+ COPY_SCALAR_FIELD(path_jointype); -+ COPY_SCALAR_FIELD(path_parallel_workers); -+ COPY_SCALAR_FIELD(was_parametrized); - COPY_BITMAPSET_FIELD(extParam); - COPY_BITMAPSET_FIELD(allParam); - } -diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c -index a2a9b1f7be..4b766b9885 100644 ---- a/src/backend/optimizer/path/costsize.c -+++ b/src/backend/optimizer/path/costsize.c -@@ -96,6 +96,10 @@ - #include "utils/spccache.h" - #include "utils/tuplesort.h" - -+set_baserel_rows_estimate_hook_type set_baserel_rows_estimate_hook = NULL; -+get_parameterized_baserel_size_hook_type get_parameterized_baserel_size_hook = NULL; -+get_parameterized_joinrel_size_hook_type get_parameterized_joinrel_size_hook = NULL; -+set_joinrel_size_estimates_hook_type set_joinrel_size_estimates_hook = NULL; - - #define LOG2(x) (log(x) / 0.693147180559945) - -@@ -176,7 +180,6 @@ static Cost append_nonpartial_cost(List *subpaths, int numpaths, - static void set_rel_width(PlannerInfo *root, RelOptInfo *rel); - static double relation_byte_size(double tuples, int width); - static double page_size(double tuples, int width); --static double get_parallel_divisor(Path *path); - - - /* -@@ -254,7 +257,7 @@ cost_seqscan(Path *path, PlannerInfo *root, - /* Adjust costing for parallelism, if used. */ - if (path->parallel_workers > 0) - { -- double parallel_divisor = get_parallel_divisor(path); -+ double parallel_divisor = get_parallel_divisor(path->parallel_workers); - - /* The CPU cost is divided among all the workers. */ - cpu_run_cost /= parallel_divisor; -@@ -733,7 +736,7 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count, - /* Adjust costing for parallelism, if used. */ - if (path->path.parallel_workers > 0) - { -- double parallel_divisor = get_parallel_divisor(&path->path); -+ double parallel_divisor = get_parallel_divisor(path->path.parallel_workers); - - path->path.rows = clamp_row_est(path->path.rows / parallel_divisor); - -@@ -1014,7 +1017,7 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel, - /* Adjust costing for parallelism, if used. */ - if (path->parallel_workers > 0) - { -- double parallel_divisor = get_parallel_divisor(path); -+ double parallel_divisor = get_parallel_divisor(path->parallel_workers); - - /* The CPU cost is divided among all the workers. */ - cpu_run_cost /= parallel_divisor; -@@ -1960,7 +1963,7 @@ cost_append(AppendPath *apath) - else /* parallel-aware */ - { - int i = 0; -- double parallel_divisor = get_parallel_divisor(&apath->path); -+ double parallel_divisor = get_parallel_divisor(apath->path.parallel_workers); - - /* Parallel-aware Append never produces ordered output. */ - Assert(apath->path.pathkeys == NIL); -@@ -1994,7 +1997,7 @@ cost_append(AppendPath *apath) - { - double subpath_parallel_divisor; - -- subpath_parallel_divisor = get_parallel_divisor(subpath); -+ subpath_parallel_divisor = get_parallel_divisor(subpath->parallel_workers); - apath->path.rows += subpath->rows * (subpath_parallel_divisor / - parallel_divisor); - apath->path.total_cost += subpath->total_cost; -@@ -2517,7 +2520,7 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path, - /* For partial paths, scale row estimate. */ - if (path->path.parallel_workers > 0) - { -- double parallel_divisor = get_parallel_divisor(&path->path); -+ double parallel_divisor = get_parallel_divisor(path->path.parallel_workers); - - path->path.rows = - clamp_row_est(path->path.rows / parallel_divisor); -@@ -2963,7 +2966,7 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path, - /* For partial paths, scale row estimate. */ - if (path->jpath.path.parallel_workers > 0) - { -- double parallel_divisor = get_parallel_divisor(&path->jpath.path); -+ double parallel_divisor = get_parallel_divisor(path->jpath.path.parallel_workers); - - path->jpath.path.rows = - clamp_row_est(path->jpath.path.rows / parallel_divisor); -@@ -3297,7 +3300,7 @@ initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace, - * number, so we need to undo the division. - */ - if (parallel_hash) -- inner_path_rows_total *= get_parallel_divisor(inner_path); -+ inner_path_rows_total *= get_parallel_divisor(inner_path->parallel_workers); - - /* - * Get hash table size that executor would use for inner relation. -@@ -3393,7 +3396,7 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, - /* For partial paths, scale row estimate. */ - if (path->jpath.path.parallel_workers > 0) - { -- double parallel_divisor = get_parallel_divisor(&path->jpath.path); -+ double parallel_divisor = get_parallel_divisor(path->jpath.path.parallel_workers); - - path->jpath.path.rows = - clamp_row_est(path->jpath.path.rows / parallel_divisor); -@@ -4387,6 +4390,49 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) - } - - -+/* -+ * set_baserel_rows_estimate -+ * Set the rows estimate for the given base relation. -+ * -+ * Rows is the estimated number of output tuples after applying -+ * restriction clauses. -+ * -+ * To support loadable plugins that monitor or modify cardinality estimation, -+ * we provide a hook variable that lets a plugin get control before and -+ * after the cardinality estimation. -+ * The hook must set rel->rows. -+ */ -+void -+set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) -+{ -+ if (set_baserel_rows_estimate_hook) -+ (*set_baserel_rows_estimate_hook) (root, rel); -+ else -+ set_baserel_rows_estimate_standard(root, rel); -+} -+ -+/* -+ * set_baserel_rows_estimate -+ * Set the rows estimate for the given base relation. -+ * -+ * Rows is the estimated number of output tuples after applying -+ * restriction clauses. -+ */ -+void -+set_baserel_rows_estimate_standard(PlannerInfo *root, RelOptInfo *rel) -+{ -+ double nrows; -+ -+ nrows = rel->tuples * -+ clauselist_selectivity(root, -+ rel->baserestrictinfo, -+ 0, -+ JOIN_INNER, -+ NULL); -+ -+ rel->rows = clamp_row_est(nrows); -+} -+ - /* - * set_baserel_size_estimates - * Set the size estimates for the given base relation. -@@ -4403,19 +4449,10 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) - void - set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) - { -- double nrows; -- - /* Should only be applied to base relations */ - Assert(rel->relid > 0); - -- nrows = rel->tuples * -- clauselist_selectivity(root, -- rel->baserestrictinfo, -- 0, -- JOIN_INNER, -- NULL); -- -- rel->rows = clamp_row_est(nrows); -+ set_baserel_rows_estimate(root, rel); - - cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); - -@@ -4426,13 +4463,33 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) - * get_parameterized_baserel_size - * Make a size estimate for a parameterized scan of a base relation. - * -+ * To support loadable plugins that monitor or modify cardinality estimation, -+ * we provide a hook variable that lets a plugin get control before and -+ * after the cardinality estimation. -+ */ -+double -+get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, -+ List *param_clauses) -+{ -+ if (get_parameterized_baserel_size_hook) -+ return (*get_parameterized_baserel_size_hook) (root, rel, -+ param_clauses); -+ else -+ return get_parameterized_baserel_size_standard(root, rel, -+ param_clauses); -+} -+ -+/* -+ * get_parameterized_baserel_size_standard -+ * Make a size estimate for a parameterized scan of a base relation. -+ * - * 'param_clauses' lists the additional join clauses to be used. - * - * set_baserel_size_estimates must have been applied already. - */ - double --get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, -- List *param_clauses) -+get_parameterized_baserel_size_standard(PlannerInfo *root, RelOptInfo *rel, -+ List *param_clauses) - { - List *allclauses; - double nrows; -@@ -4462,6 +4519,36 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, - * set_joinrel_size_estimates - * Set the size estimates for the given join relation. - * -+ * To support loadable plugins that monitor or modify cardinality estimation, -+ * we provide a hook variable that lets a plugin get control before and -+ * after the cardinality estimation. -+ * The hook must set rel->rows value. -+ */ -+void -+set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, -+ RelOptInfo *outer_rel, -+ RelOptInfo *inner_rel, -+ SpecialJoinInfo *sjinfo, -+ List *restrictlist) -+{ -+ if (set_joinrel_size_estimates_hook) -+ (*set_joinrel_size_estimates_hook) (root, rel, -+ outer_rel, -+ inner_rel, -+ sjinfo, -+ restrictlist); -+ else -+ set_joinrel_size_estimates_standard(root, rel, -+ outer_rel, -+ inner_rel, -+ sjinfo, -+ restrictlist); -+} -+ -+/* -+ * set_joinrel_size_estimates_standard -+ * Set the size estimates for the given join relation. -+ * - * The rel's targetlist must have been constructed already, and a - * restriction clause list that matches the given component rels must - * be provided. -@@ -4481,11 +4568,11 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, - * build_joinrel_tlist, and baserestrictcost is not used for join rels. - */ - void --set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, -- RelOptInfo *outer_rel, -- RelOptInfo *inner_rel, -- SpecialJoinInfo *sjinfo, -- List *restrictlist) -+set_joinrel_size_estimates_standard(PlannerInfo *root, RelOptInfo *rel, -+ RelOptInfo *outer_rel, -+ RelOptInfo *inner_rel, -+ SpecialJoinInfo *sjinfo, -+ List *restrictlist) - { - rel->rows = calc_joinrel_size_estimate(root, - rel, -@@ -4501,6 +4588,35 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, - * get_parameterized_joinrel_size - * Make a size estimate for a parameterized scan of a join relation. - * -+ * To support loadable plugins that monitor or modify cardinality estimation, -+ * we provide a hook variable that lets a plugin get control before and -+ * after the cardinality estimation. -+ */ -+double -+get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, -+ Path *outer_path, -+ Path *inner_path, -+ SpecialJoinInfo *sjinfo, -+ List *restrict_clauses) -+{ -+ if (get_parameterized_joinrel_size_hook) -+ return (*get_parameterized_joinrel_size_hook) (root, rel, -+ outer_path, -+ inner_path, -+ sjinfo, -+ restrict_clauses); -+ else -+ return get_parameterized_joinrel_size_standard(root, rel, -+ outer_path, -+ inner_path, -+ sjinfo, -+ restrict_clauses); -+} -+ -+/* -+ * get_parameterized_joinrel_size_standard -+ * Make a size estimate for a parameterized scan of a join relation. -+ * - * 'rel' is the joinrel under consideration. - * 'outer_path', 'inner_path' are (probably also parameterized) Paths that - * produce the relations being joined. -@@ -4513,11 +4629,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, - * set_joinrel_size_estimates must have been applied already. - */ - double --get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, -- Path *outer_path, -- Path *inner_path, -- SpecialJoinInfo *sjinfo, -- List *restrict_clauses) -+get_parameterized_joinrel_size_standard(PlannerInfo *root, RelOptInfo *rel, -+ Path *outer_path, -+ Path *inner_path, -+ SpecialJoinInfo *sjinfo, -+ List *restrict_clauses) - { - double nrows; - -@@ -5474,14 +5590,25 @@ page_size(double tuples, int width) - return ceil(relation_byte_size(tuples, width) / BLCKSZ); - } - -+bool -+IsParallelTuplesProcessing(const Plan *plan) -+{ -+ if (plan->path_parallel_workers > 0 && ( -+ plan->parallel_aware || nodeTag(plan) == T_HashJoin || -+ nodeTag(plan) == T_MergeJoin || -+ nodeTag(plan) == T_NestLoop)) -+ return true; -+ return false; -+} -+ - /* - * Estimate the fraction of the work that each worker will do given the - * number of workers budgeted for the path. - */ --static double --get_parallel_divisor(Path *path) -+double -+get_parallel_divisor(int parallel_workers) - { -- double parallel_divisor = path->parallel_workers; -+ double parallel_divisor = parallel_workers; - - /* - * Early experience with parallel query suggests that when there is only -@@ -5498,7 +5625,7 @@ get_parallel_divisor(Path *path) - { - double leader_contribution; - -- leader_contribution = 1.0 - (0.3 * path->parallel_workers); -+ leader_contribution = 1.0 - (0.3 * parallel_workers); - if (leader_contribution > 0) - parallel_divisor += leader_contribution; - } -diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c -index 608d5adfed..222a7a34f3 100644 ---- a/src/backend/optimizer/plan/createplan.c -+++ b/src/backend/optimizer/plan/createplan.c -@@ -70,6 +70,8 @@ - #define CP_LABEL_TLIST 0x0004 /* tlist must contain sortgrouprefs */ - #define CP_IGNORE_TLIST 0x0008 /* caller will replace tlist */ - -+/* Hook for plugins to get control in creating plan from path */ -+copy_generic_path_info_hook_type copy_generic_path_info_hook = NULL; - - static Plan *create_plan_recurse(PlannerInfo *root, Path *best_path, - int flags); -@@ -164,7 +166,7 @@ static Node *fix_indexqual_clause(PlannerInfo *root, - static Node *fix_indexqual_operand(Node *node, IndexOptInfo *index, int indexcol); - static List *get_switched_clauses(List *clauses, Relids outerrelids); - static List *order_qual_clauses(PlannerInfo *root, List *clauses); --static void copy_generic_path_info(Plan *dest, Path *src); -+static void copy_generic_path_info(PlannerInfo *root, Plan *dest, Path *src); - static void copy_plan_costsize(Plan *dest, Plan *src); - static void label_sort_with_costsize(PlannerInfo *root, Sort *plan, - double limit_tuples); -@@ -1094,7 +1096,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) - false)), - NULL); - -- copy_generic_path_info(plan, (Path *) best_path); -+ copy_generic_path_info(root, plan, (Path *) best_path); - - return plan; - } -@@ -1241,7 +1243,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) - plan->first_partial_plan = best_path->first_partial_path; - plan->part_prune_info = partpruneinfo; - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, &best_path->path); - - /* - * If prepare_sort_from_pathkeys added sort columns, but we were told to -@@ -1287,7 +1289,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path, - * prepare_sort_from_pathkeys on it before we do so on the individual - * child plans, to make cross-checking the sort info easier. - */ -- copy_generic_path_info(plan, (Path *) best_path); -+ copy_generic_path_info(root, plan, &best_path->path); - plan->targetlist = tlist; - plan->qual = NIL; - plan->lefttree = NULL; -@@ -1440,7 +1442,7 @@ create_group_result_plan(PlannerInfo *root, GroupResultPath *best_path) - - plan = make_result(tlist, (Node *) quals, NULL); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, &best_path->path); - - return plan; - } -@@ -1465,7 +1467,7 @@ create_project_set_plan(PlannerInfo *root, ProjectSetPath *best_path) - - plan = make_project_set(tlist, subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, &best_path->path); - - return plan; - } -@@ -1493,7 +1495,7 @@ create_material_plan(PlannerInfo *root, MaterialPath *best_path, int flags) - - plan = make_material(subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, &best_path->path); - - return plan; - } -@@ -1692,7 +1694,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) - } - - /* Copy cost data from Path to Plan */ -- copy_generic_path_info(plan, &best_path->path); -+ copy_generic_path_info(root, plan, &best_path->path); - - return plan; - } -@@ -1725,7 +1727,7 @@ create_gather_plan(PlannerInfo *root, GatherPath *best_path) - best_path->single_copy, - subplan); - -- copy_generic_path_info(&gather_plan->plan, &best_path->path); -+ copy_generic_path_info(root, &gather_plan->plan, &best_path->path); - - /* use parallel mode for parallel plans. */ - root->glob->parallelModeNeeded = true; -@@ -1754,7 +1756,7 @@ create_gather_merge_plan(PlannerInfo *root, GatherMergePath *best_path) - gm_plan = makeNode(GatherMerge); - gm_plan->plan.targetlist = tlist; - gm_plan->num_workers = best_path->num_workers; -- copy_generic_path_info(&gm_plan->plan, &best_path->path); -+ copy_generic_path_info(root, &gm_plan->plan, &best_path->path); - - /* Assign the rescan Param. */ - gm_plan->rescan_param = assign_special_exec_param(root); -@@ -1882,7 +1884,7 @@ create_projection_plan(PlannerInfo *root, ProjectionPath *best_path, int flags) - /* We need a Result node */ - plan = (Plan *) make_result(tlist, NULL, subplan); - -- copy_generic_path_info(plan, (Path *) best_path); -+ copy_generic_path_info(root, plan, (Path *) best_path); - } - - return plan; -@@ -1983,7 +1985,7 @@ create_sort_plan(PlannerInfo *root, SortPath *best_path, int flags) - IS_OTHER_REL(best_path->subpath->parent) ? - best_path->path.parent->relids : NULL); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2022,7 +2024,7 @@ create_group_plan(PlannerInfo *root, GroupPath *best_path) - subplan->targetlist), - subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2050,7 +2052,7 @@ create_upper_unique_plan(PlannerInfo *root, UpperUniquePath *best_path, int flag - best_path->path.pathkeys, - best_path->numkeys); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2093,7 +2095,7 @@ create_agg_plan(PlannerInfo *root, AggPath *best_path) - best_path->numGroups, - subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2294,7 +2296,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) - subplan); - - /* Copy cost data from Path to Plan */ -- copy_generic_path_info(&plan->plan, &best_path->path); -+ copy_generic_path_info(root, &plan->plan, &best_path->path); - } - - return (Plan *) plan; -@@ -2350,7 +2352,7 @@ create_minmaxagg_plan(PlannerInfo *root, MinMaxAggPath *best_path) - - plan = make_result(tlist, (Node *) best_path->quals, NULL); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - /* - * During setrefs.c, we'll need to replace references to the Agg nodes -@@ -2466,7 +2468,7 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) - wc->inRangeNullsFirst, - subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2502,7 +2504,7 @@ create_setop_plan(PlannerInfo *root, SetOpPath *best_path, int flags) - best_path->firstFlag, - numGroups); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2538,7 +2540,7 @@ create_recursiveunion_plan(PlannerInfo *root, RecursiveUnionPath *best_path) - best_path->distinctList, - numGroups); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2561,7 +2563,7 @@ create_lockrows_plan(PlannerInfo *root, LockRowsPath *best_path, - - plan = make_lockrows(subplan, best_path->rowMarks, best_path->epqParam); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2622,7 +2624,7 @@ create_modifytable_plan(PlannerInfo *root, ModifyTablePath *best_path) - best_path->onconflict, - best_path->epqParam); - -- copy_generic_path_info(&plan->plan, &best_path->path); -+ copy_generic_path_info(root, &plan->plan, &best_path->path); - - return plan; - } -@@ -2646,7 +2648,7 @@ create_limit_plan(PlannerInfo *root, LimitPath *best_path, int flags) - best_path->limitOffset, - best_path->limitCount); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2692,7 +2694,7 @@ create_seqscan_plan(PlannerInfo *root, Path *best_path, - scan_clauses, - scan_relid); - -- copy_generic_path_info(&scan_plan->plan, best_path); -+ copy_generic_path_info(root, &scan_plan->plan, best_path); - - return scan_plan; - } -@@ -2738,7 +2740,7 @@ create_samplescan_plan(PlannerInfo *root, Path *best_path, - scan_relid, - tsc); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -2916,7 +2918,7 @@ create_indexscan_plan(PlannerInfo *root, - indexorderbyops, - best_path->indexscandir); - -- copy_generic_path_info(&scan_plan->plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->plan, &best_path->path); - - return scan_plan; - } -@@ -3031,7 +3033,7 @@ create_bitmap_scan_plan(PlannerInfo *root, - bitmapqualorig, - baserelid); - -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - return scan_plan; - } -@@ -3351,7 +3353,7 @@ create_tidscan_plan(PlannerInfo *root, TidPath *best_path, - scan_relid, - tidquals); - -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - return scan_plan; - } -@@ -3401,7 +3403,7 @@ create_subqueryscan_plan(PlannerInfo *root, SubqueryScanPath *best_path, - scan_relid, - subplan); - -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - return scan_plan; - } -@@ -3444,7 +3446,7 @@ create_functionscan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_functionscan(tlist, scan_clauses, scan_relid, - functions, rte->funcordinality); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3487,7 +3489,7 @@ create_tablefuncscan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_tablefuncscan(tlist, scan_clauses, scan_relid, - tablefunc); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3531,7 +3533,7 @@ create_valuesscan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_valuesscan(tlist, scan_clauses, scan_relid, - values_lists); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3624,7 +3626,7 @@ create_ctescan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_ctescan(tlist, scan_clauses, scan_relid, - plan_id, cte_param_id); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3663,7 +3665,7 @@ create_namedtuplestorescan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_namedtuplestorescan(tlist, scan_clauses, scan_relid, - rte->enrname); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3701,7 +3703,7 @@ create_resultscan_plan(PlannerInfo *root, Path *best_path, - - scan_plan = make_result(tlist, (Node *) scan_clauses, NULL); - -- copy_generic_path_info(&scan_plan->plan, best_path); -+ copy_generic_path_info(root, &scan_plan->plan, best_path); - - return scan_plan; - } -@@ -3761,7 +3763,7 @@ create_worktablescan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_worktablescan(tlist, scan_clauses, scan_relid, - cteroot->wt_param_id); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3821,7 +3823,7 @@ create_foreignscan_plan(PlannerInfo *root, ForeignPath *best_path, - outer_plan); - - /* Copy cost data from Path to Plan; no need to make FDW do this */ -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - /* Copy foreign server OID; likewise, no need to make FDW do this */ - scan_plan->fs_server = rel->serverid; -@@ -3955,7 +3957,7 @@ create_customscan_plan(PlannerInfo *root, CustomPath *best_path, - * Copy cost data from Path to Plan; no need to make custom-plan providers - * do this - */ -- copy_generic_path_info(&cplan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &cplan->scan.plan, &best_path->path); - - /* Likewise, copy the relids that are represented by this custom scan */ - cplan->custom_relids = best_path->path.parent->relids; -@@ -4057,7 +4059,7 @@ create_nestloop_plan(PlannerInfo *root, - best_path->jointype, - best_path->inner_unique); - -- copy_generic_path_info(&join_plan->join.plan, &best_path->path); -+ copy_generic_path_info(root, &join_plan->join.plan, &best_path->path); - - return join_plan; - } -@@ -4364,7 +4366,7 @@ create_mergejoin_plan(PlannerInfo *root, - best_path->skip_mark_restore); - - /* Costs of sort and material steps are included in path cost already */ -- copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path); -+ copy_generic_path_info(root, &join_plan->join.plan, &best_path->jpath.path); - - return join_plan; - } -@@ -4510,7 +4512,7 @@ create_hashjoin_plan(PlannerInfo *root, - best_path->jpath.jointype, - best_path->jpath.inner_unique); - -- copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path); -+ copy_generic_path_info(root, &join_plan->join.plan, &best_path->jpath.path); - - return join_plan; - } -@@ -5010,7 +5012,7 @@ order_qual_clauses(PlannerInfo *root, List *clauses) - * Also copy the parallel-related flags, which the executor *will* use. - */ - static void --copy_generic_path_info(Plan *dest, Path *src) -+copy_generic_path_info(PlannerInfo *root, Plan *dest, Path *src) - { - dest->startup_cost = src->startup_cost; - dest->total_cost = src->total_cost; -@@ -5018,6 +5020,9 @@ copy_generic_path_info(Plan *dest, Path *src) - dest->plan_width = src->pathtarget->width; - dest->parallel_aware = src->parallel_aware; - dest->parallel_safe = src->parallel_safe; -+ -+ if (copy_generic_path_info_hook) -+ (*copy_generic_path_info_hook) (root, dest, src); - } - - /* -diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c -index 6054bd2b53..1c8434174e 100644 ---- a/src/backend/optimizer/util/relnode.c -+++ b/src/backend/optimizer/util/relnode.c -@@ -1233,6 +1233,7 @@ find_childrel_parents(PlannerInfo *root, RelOptInfo *rel) - } - - -+set_parampathinfo_postinit_hook_type parampathinfo_postinit_hook = NULL; - /* - * get_baserel_parampathinfo - * Get the ParamPathInfo for a parameterized path for a base relation, -@@ -1301,6 +1302,10 @@ get_baserel_parampathinfo(PlannerInfo *root, RelOptInfo *baserel, - ppi->ppi_req_outer = required_outer; - ppi->ppi_rows = rows; - ppi->ppi_clauses = pclauses; -+ -+ if (parampathinfo_postinit_hook) -+ (*parampathinfo_postinit_hook)(ppi); -+ - baserel->ppilist = lappend(baserel->ppilist, ppi); - - return ppi; -@@ -1526,6 +1531,10 @@ get_joinrel_parampathinfo(PlannerInfo *root, RelOptInfo *joinrel, - ppi->ppi_req_outer = required_outer; - ppi->ppi_rows = rows; - ppi->ppi_clauses = NIL; -+ -+ if (parampathinfo_postinit_hook) -+ (*parampathinfo_postinit_hook)(ppi); -+ - joinrel->ppilist = lappend(joinrel->ppilist, ppi); - - return ppi; -diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h -index f8b79ec120..b5eda01907 100644 ---- a/src/include/commands/explain.h -+++ b/src/include/commands/explain.h -@@ -62,6 +62,12 @@ extern PGDLLIMPORT ExplainOneQuery_hook_type ExplainOneQuery_hook; - typedef const char *(*explain_get_index_name_hook_type) (Oid indexId); - extern PGDLLIMPORT explain_get_index_name_hook_type explain_get_index_name_hook; - -+/* Hook for plugins to get control in ExplainOnePlan() */ -+typedef void (*ExplainOnePlan_hook_type) (PlannedStmt *plannedstmt, IntoClause *into, -+ ExplainState *es, const char *queryString, -+ ParamListInfo params, const instr_time *planduration, -+ QueryEnvironment *queryEnv); -+extern PGDLLIMPORT ExplainOnePlan_hook_type ExplainOnePlan_hook; - - extern void ExplainQuery(ParseState *pstate, ExplainStmt *stmt, const char *queryString, - ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest); -diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h -index 441e64eca9..484bca379a 100644 ---- a/src/include/nodes/pathnodes.h -+++ b/src/include/nodes/pathnodes.h -@@ -710,6 +710,10 @@ typedef struct RelOptInfo - Relids top_parent_relids; /* Relids of topmost parents (if "other" - * rel) */ - -+ /* For Adaptive optimization DEBUG purposes */ -+ double predicted_cardinality; -+ int fss_hash; -+ - /* used for partitioned relations */ - PartitionScheme part_scheme; /* Partitioning scheme. */ - int nparts; /* number of partitions */ -@@ -1069,6 +1073,10 @@ typedef struct ParamPathInfo - Relids ppi_req_outer; /* rels supplying parameters used by path */ - double ppi_rows; /* estimated number of result tuples */ - List *ppi_clauses; /* join clauses available from outer rels */ -+ -+ /* AQO DEBUG purposes */ -+ double predicted_ppi_rows; -+ double fss_ppi_hash; - } ParamPathInfo; - - -diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h -index 70f8b8e22b..d188c2596a 100644 ---- a/src/include/nodes/plannodes.h -+++ b/src/include/nodes/plannodes.h -@@ -144,6 +144,19 @@ typedef struct Plan - List *initPlan; /* Init Plan nodes (un-correlated expr - * subselects) */ - -+ /* -+ * information for adaptive query optimization -+ */ -+ bool had_path; -+ List *path_clauses; -+ List *path_relids; -+ JoinType path_jointype; -+ int path_parallel_workers; -+ bool was_parametrized; -+ /* For Adaptive optimization DEBUG purposes */ -+ double predicted_cardinality; -+ int fss_hash; -+ - /* - * Information for management of parameter-change-driven rescanning - * -diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h -index 9b6bdbc518..2a0caa6474 100644 ---- a/src/include/optimizer/cost.h -+++ b/src/include/optimizer/cost.h -@@ -39,6 +39,33 @@ typedef enum - } ConstraintExclusionType; - - -+/* Hook for plugins to get control of cardinality estimation */ -+typedef void (*set_baserel_rows_estimate_hook_type) (PlannerInfo *root, -+ RelOptInfo *rel); -+extern PGDLLIMPORT set_baserel_rows_estimate_hook_type -+ set_baserel_rows_estimate_hook; -+typedef double (*get_parameterized_baserel_size_hook_type) (PlannerInfo *root, -+ RelOptInfo *rel, -+ List *param_clauses); -+extern PGDLLIMPORT get_parameterized_baserel_size_hook_type -+ get_parameterized_baserel_size_hook; -+typedef double (*get_parameterized_joinrel_size_hook_type) (PlannerInfo *root, -+ RelOptInfo *rel, -+ Path *outer_path, -+ Path *inner_path, -+ SpecialJoinInfo *sjinfo, -+ List *restrict_clauses); -+extern PGDLLIMPORT get_parameterized_joinrel_size_hook_type -+ get_parameterized_joinrel_size_hook; -+typedef void (*set_joinrel_size_estimates_hook_type) (PlannerInfo *root, -+ RelOptInfo *rel, -+ RelOptInfo *outer_rel, -+ RelOptInfo *inner_rel, -+ SpecialJoinInfo *sjinfo, -+ List *restrictlist); -+extern PGDLLIMPORT set_joinrel_size_estimates_hook_type -+ set_joinrel_size_estimates_hook; -+ - /* - * prototypes for costsize.c - * routines to compute costs and sizes -@@ -171,10 +198,21 @@ extern void compute_semi_anti_join_factors(PlannerInfo *root, - SpecialJoinInfo *sjinfo, - List *restrictlist, - SemiAntiJoinFactors *semifactors); -+extern void set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel); -+extern void set_baserel_rows_estimate_standard(PlannerInfo *root, RelOptInfo *rel); - extern void set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel); - extern double get_parameterized_baserel_size(PlannerInfo *root, - RelOptInfo *rel, - List *param_clauses); -+extern double get_parameterized_baserel_size_standard(PlannerInfo *root, -+ RelOptInfo *rel, -+ List *param_clauses); -+extern double get_parameterized_joinrel_size_standard(PlannerInfo *root, -+ RelOptInfo *rel, -+ Path *outer_path, -+ Path *inner_path, -+ SpecialJoinInfo *sjinfo, -+ List *restrict_clauses); - extern double get_parameterized_joinrel_size(PlannerInfo *root, - RelOptInfo *rel, - Path *outer_path, -@@ -186,6 +224,11 @@ extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, - RelOptInfo *inner_rel, - SpecialJoinInfo *sjinfo, - List *restrictlist); -+extern void set_joinrel_size_estimates_standard(PlannerInfo *root, RelOptInfo *rel, -+ RelOptInfo *outer_rel, -+ RelOptInfo *inner_rel, -+ SpecialJoinInfo *sjinfo, -+ List *restrictlist); - extern void set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel); - extern void set_function_size_estimates(PlannerInfo *root, RelOptInfo *rel); - extern void set_values_size_estimates(PlannerInfo *root, RelOptInfo *rel); -@@ -198,5 +241,7 @@ extern void set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel); - extern PathTarget *set_pathtarget_cost_width(PlannerInfo *root, PathTarget *target); - extern double compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, - Path *bitmapqual, int loop_count, Cost *cost, double *tuple); -+extern bool IsParallelTuplesProcessing(const Plan *plan); -+extern double get_parallel_divisor(int parallel_workers); - - #endif /* COST_H */ -diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h -index e70d6a3f18..21dbfee508 100644 ---- a/src/include/optimizer/pathnode.h -+++ b/src/include/optimizer/pathnode.h -@@ -18,6 +18,10 @@ - #include "nodes/pathnodes.h" - - -+typedef void (*set_parampathinfo_postinit_hook_type) (ParamPathInfo *ppi); -+ -+extern PGDLLIMPORT set_parampathinfo_postinit_hook_type parampathinfo_postinit_hook; -+ - /* - * prototypes for pathnode.c - */ -diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h -index e7aaddd50d..56e58dee25 100644 ---- a/src/include/optimizer/planmain.h -+++ b/src/include/optimizer/planmain.h -@@ -24,6 +24,12 @@ extern double cursor_tuple_fraction; - /* query_planner callback to compute query_pathkeys */ - typedef void (*query_pathkeys_callback) (PlannerInfo *root, void *extra); - -+/* hook for plugins to get control in creating plan from path */ -+typedef void (*copy_generic_path_info_hook_type) (PlannerInfo *root, -+ Plan *dest, Path *src); -+ -+extern PGDLLIMPORT copy_generic_path_info_hook_type copy_generic_path_info_hook; -+ - /* - * prototypes for plan/planmain.c - */ diff --git a/aqo_pg13.patch b/aqo_pg13.patch index 1c30cadc..106fd659 100644 --- a/aqo_pg13.patch +++ b/aqo_pg13.patch @@ -11,7 +11,7 @@ index 1846d415b6..95519ac11d 100644 auto_explain \ bloom \ diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c -index 0ad49612d2..7c0b82bde7 100644 +index 20708db9f1..d4659f53a0 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -24,6 +24,7 @@ @@ -74,7 +74,7 @@ index 256ab54003..cfdc0247ec 100644 COPY_BITMAPSET_FIELD(allParam); } diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c -index ef7e8281cc..93d24b905a 100644 +index ffd2bf8783..84152b43b6 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -97,6 +97,11 @@ @@ -87,9 +87,9 @@ index ef7e8281cc..93d24b905a 100644 +get_parameterized_joinrel_size_hook_type get_parameterized_joinrel_size_hook = NULL; +set_joinrel_size_estimates_hook_type set_joinrel_size_estimates_hook = NULL; - #define LOG2(x) (log(x) / 0.693147180559945) - -@@ -178,7 +183,6 @@ static Cost append_nonpartial_cost(List *subpaths, int numpaths, + /* source-code-compatibility hacks for pull_varnos() API change */ + #define pull_varnos(a,b) pull_varnos_new(a,b) +@@ -181,7 +186,6 @@ static Cost append_nonpartial_cost(List *subpaths, int numpaths, static void set_rel_width(PlannerInfo *root, RelOptInfo *rel); static double relation_byte_size(double tuples, int width); static double page_size(double tuples, int width); @@ -97,7 +97,7 @@ index ef7e8281cc..93d24b905a 100644 /* -@@ -256,7 +260,7 @@ cost_seqscan(Path *path, PlannerInfo *root, +@@ -259,7 +263,7 @@ cost_seqscan(Path *path, PlannerInfo *root, /* Adjust costing for parallelism, if used. */ if (path->parallel_workers > 0) { @@ -106,7 +106,7 @@ index ef7e8281cc..93d24b905a 100644 /* The CPU cost is divided among all the workers. */ cpu_run_cost /= parallel_divisor; -@@ -735,7 +739,7 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count, +@@ -738,7 +742,7 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count, /* Adjust costing for parallelism, if used. */ if (path->path.parallel_workers > 0) { @@ -115,7 +115,7 @@ index ef7e8281cc..93d24b905a 100644 path->path.rows = clamp_row_est(path->path.rows / parallel_divisor); -@@ -1016,7 +1020,7 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel, +@@ -1019,7 +1023,7 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel, /* Adjust costing for parallelism, if used. */ if (path->parallel_workers > 0) { @@ -124,7 +124,7 @@ index ef7e8281cc..93d24b905a 100644 /* The CPU cost is divided among all the workers. */ cpu_run_cost /= parallel_divisor; -@@ -2119,7 +2123,7 @@ cost_append(AppendPath *apath) +@@ -2122,7 +2126,7 @@ cost_append(AppendPath *apath) else /* parallel-aware */ { int i = 0; @@ -133,7 +133,7 @@ index ef7e8281cc..93d24b905a 100644 /* Parallel-aware Append never produces ordered output. */ Assert(apath->path.pathkeys == NIL); -@@ -2153,7 +2157,7 @@ cost_append(AppendPath *apath) +@@ -2156,7 +2160,7 @@ cost_append(AppendPath *apath) { double subpath_parallel_divisor; @@ -142,7 +142,7 @@ index ef7e8281cc..93d24b905a 100644 apath->path.rows += subpath->rows * (subpath_parallel_divisor / parallel_divisor); apath->path.total_cost += subpath->total_cost; -@@ -2752,7 +2756,7 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path, +@@ -2755,7 +2759,7 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path, /* For partial paths, scale row estimate. */ if (path->path.parallel_workers > 0) { @@ -151,7 +151,7 @@ index ef7e8281cc..93d24b905a 100644 path->path.rows = clamp_row_est(path->path.rows / parallel_divisor); -@@ -3200,7 +3204,7 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path, +@@ -3203,7 +3207,7 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path, /* For partial paths, scale row estimate. */ if (path->jpath.path.parallel_workers > 0) { @@ -160,7 +160,7 @@ index ef7e8281cc..93d24b905a 100644 path->jpath.path.rows = clamp_row_est(path->jpath.path.rows / parallel_divisor); -@@ -3534,7 +3538,7 @@ initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace, +@@ -3537,7 +3541,7 @@ initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace, * number, so we need to undo the division. */ if (parallel_hash) @@ -169,7 +169,7 @@ index ef7e8281cc..93d24b905a 100644 /* * Get hash table size that executor would use for inner relation. -@@ -3631,7 +3635,7 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, +@@ -3634,7 +3638,7 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, /* For partial paths, scale row estimate. */ if (path->jpath.path.parallel_workers > 0) { @@ -178,7 +178,7 @@ index ef7e8281cc..93d24b905a 100644 path->jpath.path.rows = clamp_row_est(path->jpath.path.rows / parallel_divisor); -@@ -4626,6 +4630,58 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) +@@ -4629,6 +4633,58 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) } @@ -237,7 +237,7 @@ index ef7e8281cc..93d24b905a 100644 /* * set_baserel_size_estimates * Set the size estimates for the given base relation. -@@ -4642,19 +4698,10 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) +@@ -4645,19 +4701,10 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) void set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) { @@ -258,7 +258,7 @@ index ef7e8281cc..93d24b905a 100644 cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); -@@ -4665,13 +4712,33 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) +@@ -4668,13 +4715,33 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) * get_parameterized_baserel_size * Make a size estimate for a parameterized scan of a base relation. * @@ -294,7 +294,7 @@ index ef7e8281cc..93d24b905a 100644 { List *allclauses; double nrows; -@@ -4700,6 +4767,36 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, +@@ -4703,6 +4770,36 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, * set_joinrel_size_estimates * Set the size estimates for the given join relation. * @@ -331,7 +331,7 @@ index ef7e8281cc..93d24b905a 100644 * The rel's targetlist must have been constructed already, and a * restriction clause list that matches the given component rels must * be provided. -@@ -4719,11 +4816,11 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, +@@ -4722,11 +4819,11 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, * build_joinrel_tlist, and baserestrictcost is not used for join rels. */ void @@ -348,7 +348,7 @@ index ef7e8281cc..93d24b905a 100644 { rel->rows = calc_joinrel_size_estimate(root, rel, -@@ -4739,6 +4836,35 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, +@@ -4742,6 +4839,35 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, * get_parameterized_joinrel_size * Make a size estimate for a parameterized scan of a join relation. * @@ -384,7 +384,7 @@ index ef7e8281cc..93d24b905a 100644 * 'rel' is the joinrel under consideration. * 'outer_path', 'inner_path' are (probably also parameterized) Paths that * produce the relations being joined. -@@ -4751,11 +4877,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, +@@ -4754,11 +4880,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, * set_joinrel_size_estimates must have been applied already. */ double @@ -401,7 +401,7 @@ index ef7e8281cc..93d24b905a 100644 { double nrows; -@@ -5424,7 +5550,7 @@ set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel) +@@ -5427,7 +5553,7 @@ set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel) /* Should only be applied to base relations */ Assert(rel->relid > 0); @@ -410,7 +410,7 @@ index ef7e8281cc..93d24b905a 100644 cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); -@@ -5706,14 +5832,25 @@ page_size(double tuples, int width) +@@ -5709,14 +5835,25 @@ page_size(double tuples, int width) return ceil(relation_byte_size(tuples, width) / BLCKSZ); } @@ -439,7 +439,7 @@ index ef7e8281cc..93d24b905a 100644 /* * Early experience with parallel query suggests that when there is only -@@ -5730,7 +5867,7 @@ get_parallel_divisor(Path *path) +@@ -5733,7 +5870,7 @@ get_parallel_divisor(Path *path) { double leader_contribution; @@ -449,7 +449,7 @@ index ef7e8281cc..93d24b905a 100644 parallel_divisor += leader_contribution; } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c -index 84f2d186d9..a35d8ec9ee 100644 +index 7bf1751e93..e0bbf583fb 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -70,6 +70,8 @@ @@ -470,7 +470,7 @@ index 84f2d186d9..a35d8ec9ee 100644 static void copy_plan_costsize(Plan *dest, Plan *src); static void label_sort_with_costsize(PlannerInfo *root, Sort *plan, double limit_tuples); -@@ -1110,7 +1112,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) +@@ -1107,7 +1109,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) false)), NULL); @@ -479,7 +479,7 @@ index 84f2d186d9..a35d8ec9ee 100644 return plan; } -@@ -1258,7 +1260,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) +@@ -1255,7 +1257,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) plan->first_partial_plan = best_path->first_partial_path; plan->part_prune_info = partpruneinfo; @@ -488,7 +488,7 @@ index 84f2d186d9..a35d8ec9ee 100644 /* * If prepare_sort_from_pathkeys added sort columns, but we were told to -@@ -1304,7 +1306,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path, +@@ -1301,7 +1303,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path, * prepare_sort_from_pathkeys on it before we do so on the individual * child plans, to make cross-checking the sort info easier. */ @@ -497,7 +497,7 @@ index 84f2d186d9..a35d8ec9ee 100644 plan->targetlist = tlist; plan->qual = NIL; plan->lefttree = NULL; -@@ -1458,7 +1460,7 @@ create_group_result_plan(PlannerInfo *root, GroupResultPath *best_path) +@@ -1455,7 +1457,7 @@ create_group_result_plan(PlannerInfo *root, GroupResultPath *best_path) plan = make_result(tlist, (Node *) quals, NULL); @@ -506,7 +506,7 @@ index 84f2d186d9..a35d8ec9ee 100644 return plan; } -@@ -1483,7 +1485,7 @@ create_project_set_plan(PlannerInfo *root, ProjectSetPath *best_path) +@@ -1480,7 +1482,7 @@ create_project_set_plan(PlannerInfo *root, ProjectSetPath *best_path) plan = make_project_set(tlist, subplan); @@ -515,7 +515,7 @@ index 84f2d186d9..a35d8ec9ee 100644 return plan; } -@@ -1511,7 +1513,7 @@ create_material_plan(PlannerInfo *root, MaterialPath *best_path, int flags) +@@ -1508,7 +1510,7 @@ create_material_plan(PlannerInfo *root, MaterialPath *best_path, int flags) plan = make_material(subplan); @@ -524,7 +524,7 @@ index 84f2d186d9..a35d8ec9ee 100644 return plan; } -@@ -1711,7 +1713,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) +@@ -1708,7 +1710,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) } /* Copy cost data from Path to Plan */ @@ -533,7 +533,7 @@ index 84f2d186d9..a35d8ec9ee 100644 return plan; } -@@ -1744,7 +1746,7 @@ create_gather_plan(PlannerInfo *root, GatherPath *best_path) +@@ -1741,7 +1743,7 @@ create_gather_plan(PlannerInfo *root, GatherPath *best_path) best_path->single_copy, subplan); @@ -542,7 +542,7 @@ index 84f2d186d9..a35d8ec9ee 100644 /* use parallel mode for parallel plans. */ root->glob->parallelModeNeeded = true; -@@ -1773,7 +1775,7 @@ create_gather_merge_plan(PlannerInfo *root, GatherMergePath *best_path) +@@ -1770,7 +1772,7 @@ create_gather_merge_plan(PlannerInfo *root, GatherMergePath *best_path) gm_plan = makeNode(GatherMerge); gm_plan->plan.targetlist = tlist; gm_plan->num_workers = best_path->num_workers; @@ -551,7 +551,7 @@ index 84f2d186d9..a35d8ec9ee 100644 /* Assign the rescan Param. */ gm_plan->rescan_param = assign_special_exec_param(root); -@@ -1901,7 +1903,7 @@ create_projection_plan(PlannerInfo *root, ProjectionPath *best_path, int flags) +@@ -1899,7 +1901,7 @@ create_projection_plan(PlannerInfo *root, ProjectionPath *best_path, int flags) /* We need a Result node */ plan = (Plan *) make_result(tlist, NULL, subplan); @@ -560,7 +560,7 @@ index 84f2d186d9..a35d8ec9ee 100644 } return plan; -@@ -2002,7 +2004,7 @@ create_sort_plan(PlannerInfo *root, SortPath *best_path, int flags) +@@ -2000,7 +2002,7 @@ create_sort_plan(PlannerInfo *root, SortPath *best_path, int flags) IS_OTHER_REL(best_path->subpath->parent) ? best_path->path.parent->relids : NULL); @@ -569,7 +569,7 @@ index 84f2d186d9..a35d8ec9ee 100644 return plan; } -@@ -2028,7 +2030,7 @@ create_incrementalsort_plan(PlannerInfo *root, IncrementalSortPath *best_path, +@@ -2026,7 +2028,7 @@ create_incrementalsort_plan(PlannerInfo *root, IncrementalSortPath *best_path, best_path->spath.path.parent->relids : NULL, best_path->nPresortedCols); @@ -578,7 +578,7 @@ index 84f2d186d9..a35d8ec9ee 100644 return plan; } -@@ -2067,7 +2069,7 @@ create_group_plan(PlannerInfo *root, GroupPath *best_path) +@@ -2065,7 +2067,7 @@ create_group_plan(PlannerInfo *root, GroupPath *best_path) subplan->targetlist), subplan); @@ -587,7 +587,7 @@ index 84f2d186d9..a35d8ec9ee 100644 return plan; } -@@ -2095,7 +2097,7 @@ create_upper_unique_plan(PlannerInfo *root, UpperUniquePath *best_path, int flag +@@ -2093,7 +2095,7 @@ create_upper_unique_plan(PlannerInfo *root, UpperUniquePath *best_path, int flag best_path->path.pathkeys, best_path->numkeys); @@ -596,7 +596,7 @@ index 84f2d186d9..a35d8ec9ee 100644 return plan; } -@@ -2139,7 +2141,7 @@ create_agg_plan(PlannerInfo *root, AggPath *best_path) +@@ -2137,7 +2139,7 @@ create_agg_plan(PlannerInfo *root, AggPath *best_path) best_path->transitionSpace, subplan); @@ -605,7 +605,7 @@ index 84f2d186d9..a35d8ec9ee 100644 return plan; } -@@ -2341,7 +2343,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) +@@ -2339,7 +2341,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) subplan); /* Copy cost data from Path to Plan */ @@ -614,7 +614,7 @@ index 84f2d186d9..a35d8ec9ee 100644 } return (Plan *) plan; -@@ -2399,7 +2401,7 @@ create_minmaxagg_plan(PlannerInfo *root, MinMaxAggPath *best_path) +@@ -2397,7 +2399,7 @@ create_minmaxagg_plan(PlannerInfo *root, MinMaxAggPath *best_path) plan = make_result(tlist, (Node *) best_path->quals, NULL); @@ -623,7 +623,7 @@ index 84f2d186d9..a35d8ec9ee 100644 /* * During setrefs.c, we'll need to replace references to the Agg nodes -@@ -2518,7 +2520,7 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) +@@ -2516,7 +2518,7 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) wc->inRangeNullsFirst, subplan); @@ -632,7 +632,7 @@ index 84f2d186d9..a35d8ec9ee 100644 return plan; } -@@ -2554,7 +2556,7 @@ create_setop_plan(PlannerInfo *root, SetOpPath *best_path, int flags) +@@ -2552,7 +2554,7 @@ create_setop_plan(PlannerInfo *root, SetOpPath *best_path, int flags) best_path->firstFlag, numGroups); @@ -641,7 +641,7 @@ index 84f2d186d9..a35d8ec9ee 100644 return plan; } -@@ -2590,7 +2592,7 @@ create_recursiveunion_plan(PlannerInfo *root, RecursiveUnionPath *best_path) +@@ -2588,7 +2590,7 @@ create_recursiveunion_plan(PlannerInfo *root, RecursiveUnionPath *best_path) best_path->distinctList, numGroups); @@ -650,7 +650,7 @@ index 84f2d186d9..a35d8ec9ee 100644 return plan; } -@@ -2613,7 +2615,7 @@ create_lockrows_plan(PlannerInfo *root, LockRowsPath *best_path, +@@ -2611,7 +2613,7 @@ create_lockrows_plan(PlannerInfo *root, LockRowsPath *best_path, plan = make_lockrows(subplan, best_path->rowMarks, best_path->epqParam); @@ -659,7 +659,7 @@ index 84f2d186d9..a35d8ec9ee 100644 return plan; } -@@ -2674,7 +2676,7 @@ create_modifytable_plan(PlannerInfo *root, ModifyTablePath *best_path) +@@ -2672,7 +2674,7 @@ create_modifytable_plan(PlannerInfo *root, ModifyTablePath *best_path) best_path->onconflict, best_path->epqParam); @@ -668,7 +668,7 @@ index 84f2d186d9..a35d8ec9ee 100644 return plan; } -@@ -2728,7 +2730,7 @@ create_limit_plan(PlannerInfo *root, LimitPath *best_path, int flags) +@@ -2726,7 +2728,7 @@ create_limit_plan(PlannerInfo *root, LimitPath *best_path, int flags) best_path->limitOption, numUniqkeys, uniqColIdx, uniqOperators, uniqCollations); @@ -677,7 +677,7 @@ index 84f2d186d9..a35d8ec9ee 100644 return plan; } -@@ -2774,7 +2776,7 @@ create_seqscan_plan(PlannerInfo *root, Path *best_path, +@@ -2772,7 +2774,7 @@ create_seqscan_plan(PlannerInfo *root, Path *best_path, scan_clauses, scan_relid); @@ -686,7 +686,7 @@ index 84f2d186d9..a35d8ec9ee 100644 return scan_plan; } -@@ -2820,7 +2822,7 @@ create_samplescan_plan(PlannerInfo *root, Path *best_path, +@@ -2818,7 +2820,7 @@ create_samplescan_plan(PlannerInfo *root, Path *best_path, scan_relid, tsc); @@ -695,7 +695,7 @@ index 84f2d186d9..a35d8ec9ee 100644 return scan_plan; } -@@ -2998,7 +3000,7 @@ create_indexscan_plan(PlannerInfo *root, +@@ -2996,7 +2998,7 @@ create_indexscan_plan(PlannerInfo *root, indexorderbyops, best_path->indexscandir); @@ -704,7 +704,7 @@ index 84f2d186d9..a35d8ec9ee 100644 return scan_plan; } -@@ -3113,7 +3115,7 @@ create_bitmap_scan_plan(PlannerInfo *root, +@@ -3111,7 +3113,7 @@ create_bitmap_scan_plan(PlannerInfo *root, bitmapqualorig, baserelid); @@ -713,7 +713,7 @@ index 84f2d186d9..a35d8ec9ee 100644 return scan_plan; } -@@ -3433,7 +3435,7 @@ create_tidscan_plan(PlannerInfo *root, TidPath *best_path, +@@ -3431,7 +3433,7 @@ create_tidscan_plan(PlannerInfo *root, TidPath *best_path, scan_relid, tidquals); @@ -722,7 +722,7 @@ index 84f2d186d9..a35d8ec9ee 100644 return scan_plan; } -@@ -3483,7 +3485,7 @@ create_subqueryscan_plan(PlannerInfo *root, SubqueryScanPath *best_path, +@@ -3481,7 +3483,7 @@ create_subqueryscan_plan(PlannerInfo *root, SubqueryScanPath *best_path, scan_relid, subplan); @@ -731,7 +731,7 @@ index 84f2d186d9..a35d8ec9ee 100644 return scan_plan; } -@@ -3526,7 +3528,7 @@ create_functionscan_plan(PlannerInfo *root, Path *best_path, +@@ -3524,7 +3526,7 @@ create_functionscan_plan(PlannerInfo *root, Path *best_path, scan_plan = make_functionscan(tlist, scan_clauses, scan_relid, functions, rte->funcordinality); @@ -740,7 +740,7 @@ index 84f2d186d9..a35d8ec9ee 100644 return scan_plan; } -@@ -3569,7 +3571,7 @@ create_tablefuncscan_plan(PlannerInfo *root, Path *best_path, +@@ -3567,7 +3569,7 @@ create_tablefuncscan_plan(PlannerInfo *root, Path *best_path, scan_plan = make_tablefuncscan(tlist, scan_clauses, scan_relid, tablefunc); @@ -749,7 +749,7 @@ index 84f2d186d9..a35d8ec9ee 100644 return scan_plan; } -@@ -3613,7 +3615,7 @@ create_valuesscan_plan(PlannerInfo *root, Path *best_path, +@@ -3611,7 +3613,7 @@ create_valuesscan_plan(PlannerInfo *root, Path *best_path, scan_plan = make_valuesscan(tlist, scan_clauses, scan_relid, values_lists); @@ -758,7 +758,7 @@ index 84f2d186d9..a35d8ec9ee 100644 return scan_plan; } -@@ -3706,7 +3708,7 @@ create_ctescan_plan(PlannerInfo *root, Path *best_path, +@@ -3704,7 +3706,7 @@ create_ctescan_plan(PlannerInfo *root, Path *best_path, scan_plan = make_ctescan(tlist, scan_clauses, scan_relid, plan_id, cte_param_id); @@ -767,7 +767,7 @@ index 84f2d186d9..a35d8ec9ee 100644 return scan_plan; } -@@ -3745,7 +3747,7 @@ create_namedtuplestorescan_plan(PlannerInfo *root, Path *best_path, +@@ -3743,7 +3745,7 @@ create_namedtuplestorescan_plan(PlannerInfo *root, Path *best_path, scan_plan = make_namedtuplestorescan(tlist, scan_clauses, scan_relid, rte->enrname); @@ -776,7 +776,7 @@ index 84f2d186d9..a35d8ec9ee 100644 return scan_plan; } -@@ -3783,7 +3785,7 @@ create_resultscan_plan(PlannerInfo *root, Path *best_path, +@@ -3781,7 +3783,7 @@ create_resultscan_plan(PlannerInfo *root, Path *best_path, scan_plan = make_result(tlist, (Node *) scan_clauses, NULL); @@ -785,7 +785,7 @@ index 84f2d186d9..a35d8ec9ee 100644 return scan_plan; } -@@ -3843,7 +3845,7 @@ create_worktablescan_plan(PlannerInfo *root, Path *best_path, +@@ -3841,7 +3843,7 @@ create_worktablescan_plan(PlannerInfo *root, Path *best_path, scan_plan = make_worktablescan(tlist, scan_clauses, scan_relid, cteroot->wt_param_id); @@ -794,7 +794,7 @@ index 84f2d186d9..a35d8ec9ee 100644 return scan_plan; } -@@ -3903,7 +3905,7 @@ create_foreignscan_plan(PlannerInfo *root, ForeignPath *best_path, +@@ -3901,7 +3903,7 @@ create_foreignscan_plan(PlannerInfo *root, ForeignPath *best_path, outer_plan); /* Copy cost data from Path to Plan; no need to make FDW do this */ @@ -803,7 +803,7 @@ index 84f2d186d9..a35d8ec9ee 100644 /* Copy foreign server OID; likewise, no need to make FDW do this */ scan_plan->fs_server = rel->serverid; -@@ -4037,7 +4039,7 @@ create_customscan_plan(PlannerInfo *root, CustomPath *best_path, +@@ -4035,7 +4037,7 @@ create_customscan_plan(PlannerInfo *root, CustomPath *best_path, * Copy cost data from Path to Plan; no need to make custom-plan providers * do this */ @@ -812,7 +812,7 @@ index 84f2d186d9..a35d8ec9ee 100644 /* Likewise, copy the relids that are represented by this custom scan */ cplan->custom_relids = best_path->path.parent->relids; -@@ -4139,7 +4141,7 @@ create_nestloop_plan(PlannerInfo *root, +@@ -4137,7 +4139,7 @@ create_nestloop_plan(PlannerInfo *root, best_path->jointype, best_path->inner_unique); @@ -821,7 +821,7 @@ index 84f2d186d9..a35d8ec9ee 100644 return join_plan; } -@@ -4446,7 +4448,7 @@ create_mergejoin_plan(PlannerInfo *root, +@@ -4444,7 +4446,7 @@ create_mergejoin_plan(PlannerInfo *root, best_path->skip_mark_restore); /* Costs of sort and material steps are included in path cost already */ @@ -830,7 +830,7 @@ index 84f2d186d9..a35d8ec9ee 100644 return join_plan; } -@@ -4619,7 +4621,7 @@ create_hashjoin_plan(PlannerInfo *root, +@@ -4617,7 +4619,7 @@ create_hashjoin_plan(PlannerInfo *root, best_path->jpath.jointype, best_path->jpath.inner_unique); @@ -839,7 +839,7 @@ index 84f2d186d9..a35d8ec9ee 100644 return join_plan; } -@@ -5119,7 +5121,7 @@ order_qual_clauses(PlannerInfo *root, List *clauses) +@@ -5117,7 +5119,7 @@ order_qual_clauses(PlannerInfo *root, List *clauses) * Also copy the parallel-related flags, which the executor *will* use. */ static void @@ -848,7 +848,7 @@ index 84f2d186d9..a35d8ec9ee 100644 { dest->startup_cost = src->startup_cost; dest->total_cost = src->total_cost; -@@ -5127,6 +5129,9 @@ copy_generic_path_info(Plan *dest, Path *src) +@@ -5125,6 +5127,9 @@ copy_generic_path_info(Plan *dest, Path *src) dest->plan_width = src->pathtarget->width; dest->parallel_aware = src->parallel_aware; dest->parallel_safe = src->parallel_safe; @@ -917,7 +917,7 @@ index ba661d32a6..74e4f7592c 100644 extern void ExplainQuery(ParseState *pstate, ExplainStmt *stmt, ParamListInfo params, DestReceiver *dest); diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h -index 10f0a149e9..fecf543f44 100644 +index 69150e46eb..15bf1a1160 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -738,6 +738,10 @@ typedef struct RelOptInfo @@ -1067,7 +1067,7 @@ index 3bd7072ae8..21bbaba11c 100644 * prototypes for pathnode.c */ diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h -index f3cefe67b8..6d77f6e871 100644 +index 8ce60e202e..f066ca6540 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -24,6 +24,12 @@ extern double cursor_tuple_fraction; diff --git a/aqo_pg9_6.patch b/aqo_pg9_6.patch deleted file mode 100644 index 68ceacdb..00000000 --- a/aqo_pg9_6.patch +++ /dev/null @@ -1,758 +0,0 @@ -diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c -index 3244c76..8229702 100644 ---- a/src/backend/nodes/copyfuncs.c -+++ b/src/backend/nodes/copyfuncs.c -@@ -121,6 +121,12 @@ CopyPlanFields(const Plan *from, Plan *newnode) - COPY_NODE_FIELD(lefttree); - COPY_NODE_FIELD(righttree); - COPY_NODE_FIELD(initPlan); -+ COPY_SCALAR_FIELD(had_path); -+ COPY_NODE_FIELD(path_clauses); -+ COPY_NODE_FIELD(path_relids); -+ COPY_SCALAR_FIELD(path_jointype); -+ COPY_SCALAR_FIELD(path_parallel_workers); -+ COPY_SCALAR_FIELD(was_parametrized); - COPY_BITMAPSET_FIELD(extParam); - COPY_BITMAPSET_FIELD(allParam); - } -diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c -index 2a49639..7b9bdef 100644 ---- a/src/backend/optimizer/path/costsize.c -+++ b/src/backend/optimizer/path/costsize.c -@@ -100,6 +100,10 @@ - - #define LOG2(x) (log(x) / 0.693147180559945) - -+set_baserel_rows_estimate_hook_type set_baserel_rows_estimate_hook = NULL; -+get_parameterized_baserel_size_hook_type get_parameterized_baserel_size_hook = NULL; -+get_parameterized_joinrel_size_hook_type get_parameterized_joinrel_size_hook = NULL; -+set_joinrel_size_estimates_hook_type set_joinrel_size_estimates_hook = NULL; - - double seq_page_cost = DEFAULT_SEQ_PAGE_COST; - double random_page_cost = DEFAULT_RANDOM_PAGE_COST; -@@ -3754,6 +3758,49 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) - - - /* -+ * set_baserel_rows_estimate -+ * Set the rows estimate for the given base relation. -+ * -+ * Rows is the estimated number of output tuples after applying -+ * restriction clauses. -+ * -+ * To support loadable plugins that monitor or modify cardinality estimation, -+ * we provide a hook variable that lets a plugin get control before and -+ * after the cardinality estimation. -+ * The hook must set rel->rows. -+ */ -+void -+set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) -+{ -+ if (set_baserel_rows_estimate_hook) -+ (*set_baserel_rows_estimate_hook) (root, rel); -+ else -+ set_baserel_rows_estimate_standard(root, rel); -+} -+ -+/* -+ * set_baserel_rows_estimate -+ * Set the rows estimate for the given base relation. -+ * -+ * Rows is the estimated number of output tuples after applying -+ * restriction clauses. -+ */ -+void -+set_baserel_rows_estimate_standard(PlannerInfo *root, RelOptInfo *rel) -+{ -+ double nrows; -+ -+ nrows = rel->tuples * -+ clauselist_selectivity(root, -+ rel->baserestrictinfo, -+ 0, -+ JOIN_INNER, -+ NULL); -+ -+ rel->rows = clamp_row_est(nrows); -+} -+ -+/* - * set_baserel_size_estimates - * Set the size estimates for the given base relation. - * -@@ -3769,19 +3816,10 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) - void - set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) - { -- double nrows; -- - /* Should only be applied to base relations */ - Assert(rel->relid > 0); - -- nrows = rel->tuples * -- clauselist_selectivity(root, -- rel->baserestrictinfo, -- 0, -- JOIN_INNER, -- NULL); -- -- rel->rows = clamp_row_est(nrows); -+ set_baserel_rows_estimate(root, rel); - - cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); - -@@ -3792,13 +3830,33 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) - * get_parameterized_baserel_size - * Make a size estimate for a parameterized scan of a base relation. - * -+ * To support loadable plugins that monitor or modify cardinality estimation, -+ * we provide a hook variable that lets a plugin get control before and -+ * after the cardinality estimation. -+ */ -+double -+get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, -+ List *param_clauses) -+{ -+ if (get_parameterized_baserel_size_hook) -+ return (*get_parameterized_baserel_size_hook) (root, rel, -+ param_clauses); -+ else -+ return get_parameterized_baserel_size_standard(root, rel, -+ param_clauses); -+} -+ -+/* -+ * get_parameterized_baserel_size_standard -+ * Make a size estimate for a parameterized scan of a base relation. -+ * - * 'param_clauses' lists the additional join clauses to be used. - * - * set_baserel_size_estimates must have been applied already. - */ - double --get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, -- List *param_clauses) -+get_parameterized_baserel_size_standard(PlannerInfo *root, RelOptInfo *rel, -+ List *param_clauses) - { - List *allclauses; - double nrows; -@@ -3828,6 +3886,36 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, - * set_joinrel_size_estimates - * Set the size estimates for the given join relation. - * -+ * To support loadable plugins that monitor or modify cardinality estimation, -+ * we provide a hook variable that lets a plugin get control before and -+ * after the cardinality estimation. -+ * The hook must set rel->rows value. -+ */ -+void -+set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, -+ RelOptInfo *outer_rel, -+ RelOptInfo *inner_rel, -+ SpecialJoinInfo *sjinfo, -+ List *restrictlist) -+{ -+ if (set_joinrel_size_estimates_hook) -+ (*set_joinrel_size_estimates_hook) (root, rel, -+ outer_rel, -+ inner_rel, -+ sjinfo, -+ restrictlist); -+ else -+ set_joinrel_size_estimates_standard(root, rel, -+ outer_rel, -+ inner_rel, -+ sjinfo, -+ restrictlist); -+} -+ -+/* -+ * set_joinrel_size_estimates_standard -+ * Set the size estimates for the given join relation. -+ * - * The rel's targetlist must have been constructed already, and a - * restriction clause list that matches the given component rels must - * be provided. -@@ -3847,11 +3935,11 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, - * build_joinrel_tlist, and baserestrictcost is not used for join rels. - */ - void --set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, -- RelOptInfo *outer_rel, -- RelOptInfo *inner_rel, -- SpecialJoinInfo *sjinfo, -- List *restrictlist) -+set_joinrel_size_estimates_standard(PlannerInfo *root, RelOptInfo *rel, -+ RelOptInfo *outer_rel, -+ RelOptInfo *inner_rel, -+ SpecialJoinInfo *sjinfo, -+ List *restrictlist) - { - rel->rows = calc_joinrel_size_estimate(root, - outer_rel, -@@ -3866,6 +3954,35 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, - * get_parameterized_joinrel_size - * Make a size estimate for a parameterized scan of a join relation. - * -+ * To support loadable plugins that monitor or modify cardinality estimation, -+ * we provide a hook variable that lets a plugin get control before and -+ * after the cardinality estimation. -+ */ -+double -+get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, -+ Path *outer_path, -+ Path *inner_path, -+ SpecialJoinInfo *sjinfo, -+ List *restrict_clauses) -+{ -+ if (get_parameterized_joinrel_size_hook) -+ return (*get_parameterized_joinrel_size_hook) (root, rel, -+ outer_path, -+ inner_path, -+ sjinfo, -+ restrict_clauses); -+ else -+ return get_parameterized_joinrel_size_standard(root, rel, -+ outer_path, -+ inner_path, -+ sjinfo, -+ restrict_clauses); -+} -+ -+/* -+ * get_parameterized_joinrel_size_standard -+ * Make a size estimate for a parameterized scan of a join relation. -+ * - * 'rel' is the joinrel under consideration. - * 'outer_path', 'inner_path' are (probably also parameterized) Paths that - * produce the relations being joined. -@@ -3878,11 +3995,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, - * set_joinrel_size_estimates must have been applied already. - */ - double --get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, -- Path *outer_path, -- Path *inner_path, -- SpecialJoinInfo *sjinfo, -- List *restrict_clauses) -+get_parameterized_joinrel_size_standard(PlannerInfo *root, RelOptInfo *rel, -+ Path *outer_path, -+ Path *inner_path, -+ SpecialJoinInfo *sjinfo, -+ List *restrict_clauses) - { - double nrows; - -diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c -index 54d601f..b212325 100644 ---- a/src/backend/optimizer/plan/createplan.c -+++ b/src/backend/optimizer/plan/createplan.c -@@ -67,6 +67,8 @@ - #define CP_SMALL_TLIST 0x0002 /* Prefer narrower tlists */ - #define CP_LABEL_TLIST 0x0004 /* tlist must contain sortgrouprefs */ - -+/* Hook for plugins to get control in creating plan from path */ -+copy_generic_path_info_hook_type copy_generic_path_info_hook = NULL; - - static Plan *create_plan_recurse(PlannerInfo *root, Path *best_path, - int flags); -@@ -154,7 +156,7 @@ static List *fix_indexorderby_references(PlannerInfo *root, IndexPath *index_pat - static Node *fix_indexqual_operand(Node *node, IndexOptInfo *index, int indexcol); - static List *get_switched_clauses(List *clauses, Relids outerrelids); - static List *order_qual_clauses(PlannerInfo *root, List *clauses); --static void copy_generic_path_info(Plan *dest, Path *src); -+static void copy_generic_path_info(PlannerInfo *root, Plan *dest, Path *src); - static void copy_plan_costsize(Plan *dest, Plan *src); - static void label_sort_with_costsize(PlannerInfo *root, Sort *plan, - double limit_tuples); -@@ -977,7 +979,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) - false)), - NULL); - -- copy_generic_path_info(plan, (Path *) best_path); -+ copy_generic_path_info(root, plan, (Path *) best_path); - - return plan; - } -@@ -1003,7 +1005,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) - - plan = make_append(subplans, tlist); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return (Plan *) plan; - } -@@ -1031,7 +1033,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path) - * prepare_sort_from_pathkeys on it before we do so on the individual - * child plans, to make cross-checking the sort info easier. - */ -- copy_generic_path_info(plan, (Path *) best_path); -+ copy_generic_path_info(root, plan, (Path *) best_path); - plan->targetlist = tlist; - plan->qual = NIL; - plan->lefttree = NULL; -@@ -1136,7 +1138,7 @@ create_result_plan(PlannerInfo *root, ResultPath *best_path) - - plan = make_result(tlist, (Node *) quals, NULL); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1164,7 +1166,7 @@ create_material_plan(PlannerInfo *root, MaterialPath *best_path, int flags) - - plan = make_material(subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1367,7 +1369,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) - } - - /* Copy cost data from Path to Plan */ -- copy_generic_path_info(plan, &best_path->path); -+ copy_generic_path_info(root, plan, &best_path->path); - - return plan; - } -@@ -1399,7 +1401,7 @@ create_gather_plan(PlannerInfo *root, GatherPath *best_path) - best_path->single_copy, - subplan); - -- copy_generic_path_info(&gather_plan->plan, &best_path->path); -+ copy_generic_path_info(root, &gather_plan->plan, &best_path->path); - - /* use parallel mode for parallel plans. */ - root->glob->parallelModeNeeded = true; -@@ -1460,7 +1462,7 @@ create_projection_plan(PlannerInfo *root, ProjectionPath *best_path) - /* We need a Result node */ - plan = (Plan *) make_result(tlist, NULL, subplan); - -- copy_generic_path_info(plan, (Path *) best_path); -+ copy_generic_path_info(root, plan, (Path *) best_path); - } - - return plan; -@@ -1515,7 +1517,7 @@ create_sort_plan(PlannerInfo *root, SortPath *best_path, int flags) - - plan = make_sort_from_pathkeys(subplan, best_path->path.pathkeys); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1552,7 +1554,7 @@ create_group_plan(PlannerInfo *root, GroupPath *best_path) - extract_grouping_ops(best_path->groupClause), - subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1580,7 +1582,7 @@ create_upper_unique_plan(PlannerInfo *root, UpperUniquePath *best_path, int flag - best_path->path.pathkeys, - best_path->numkeys); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1621,7 +1623,7 @@ create_agg_plan(PlannerInfo *root, AggPath *best_path) - best_path->numGroups, - subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -1809,7 +1811,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) - subplan); - - /* Copy cost data from Path to Plan */ -- copy_generic_path_info(&plan->plan, &best_path->path); -+ copy_generic_path_info(root, &plan->plan, &best_path->path); - } - - return (Plan *) plan; -@@ -1864,7 +1866,7 @@ create_minmaxagg_plan(PlannerInfo *root, MinMaxAggPath *best_path) - - plan = make_result(tlist, (Node *) best_path->quals, NULL); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - /* - * During setrefs.c, we'll need to replace references to the Agg nodes -@@ -1958,7 +1960,7 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) - wc->endOffset, - subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2100,7 +2102,7 @@ create_setop_plan(PlannerInfo *root, SetOpPath *best_path, int flags) - best_path->firstFlag, - numGroups); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2136,7 +2138,7 @@ create_recursiveunion_plan(PlannerInfo *root, RecursiveUnionPath *best_path) - best_path->distinctList, - numGroups); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2159,7 +2161,7 @@ create_lockrows_plan(PlannerInfo *root, LockRowsPath *best_path, - - plan = make_lockrows(subplan, best_path->rowMarks, best_path->epqParam); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2217,7 +2219,7 @@ create_modifytable_plan(PlannerInfo *root, ModifyTablePath *best_path) - best_path->onconflict, - best_path->epqParam); - -- copy_generic_path_info(&plan->plan, &best_path->path); -+ copy_generic_path_info(root, &plan->plan, &best_path->path); - - return plan; - } -@@ -2241,7 +2243,7 @@ create_limit_plan(PlannerInfo *root, LimitPath *best_path, int flags) - best_path->limitOffset, - best_path->limitCount); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2287,7 +2289,7 @@ create_seqscan_plan(PlannerInfo *root, Path *best_path, - scan_clauses, - scan_relid); - -- copy_generic_path_info(&scan_plan->plan, best_path); -+ copy_generic_path_info(root, &scan_plan->plan, best_path); - - return scan_plan; - } -@@ -2333,7 +2335,7 @@ create_samplescan_plan(PlannerInfo *root, Path *best_path, - scan_relid, - tsc); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -2514,7 +2516,7 @@ create_indexscan_plan(PlannerInfo *root, - indexorderbyops, - best_path->indexscandir); - -- copy_generic_path_info(&scan_plan->plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->plan, &best_path->path); - - return scan_plan; - } -@@ -2627,7 +2629,7 @@ create_bitmap_scan_plan(PlannerInfo *root, - bitmapqualorig, - baserelid); - -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - return scan_plan; - } -@@ -2888,7 +2890,7 @@ create_tidscan_plan(PlannerInfo *root, TidPath *best_path, - scan_relid, - tidquals); - -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - return scan_plan; - } -@@ -2938,7 +2940,7 @@ create_subqueryscan_plan(PlannerInfo *root, SubqueryScanPath *best_path, - scan_relid, - subplan); - -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - return scan_plan; - } -@@ -2981,7 +2983,7 @@ create_functionscan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_functionscan(tlist, scan_clauses, scan_relid, - functions, rte->funcordinality); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3025,7 +3027,7 @@ create_valuesscan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_valuesscan(tlist, scan_clauses, scan_relid, - values_lists); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3118,7 +3120,7 @@ create_ctescan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_ctescan(tlist, scan_clauses, scan_relid, - plan_id, cte_param_id); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3178,7 +3180,7 @@ create_worktablescan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_worktablescan(tlist, scan_clauses, scan_relid, - cteroot->wt_param_id); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3238,7 +3240,7 @@ create_foreignscan_plan(PlannerInfo *root, ForeignPath *best_path, - outer_plan); - - /* Copy cost data from Path to Plan; no need to make FDW do this */ -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - /* Copy foreign server OID; likewise, no need to make FDW do this */ - scan_plan->fs_server = rel->serverid; -@@ -3365,7 +3367,7 @@ create_customscan_plan(PlannerInfo *root, CustomPath *best_path, - * Copy cost data from Path to Plan; no need to make custom-plan providers - * do this - */ -- copy_generic_path_info(&cplan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &cplan->scan.plan, &best_path->path); - - /* Likewise, copy the relids that are represented by this custom scan */ - cplan->custom_relids = best_path->path.parent->relids; -@@ -3496,7 +3498,7 @@ create_nestloop_plan(PlannerInfo *root, - inner_plan, - best_path->jointype); - -- copy_generic_path_info(&join_plan->join.plan, &best_path->path); -+ copy_generic_path_info(root, &join_plan->join.plan, &best_path->path); - - return join_plan; - } -@@ -3800,7 +3802,7 @@ create_mergejoin_plan(PlannerInfo *root, - best_path->jpath.jointype); - - /* Costs of sort and material steps are included in path cost already */ -- copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path); -+ copy_generic_path_info(root, &join_plan->join.plan, &best_path->jpath.path); - - return join_plan; - } -@@ -3939,7 +3941,7 @@ create_hashjoin_plan(PlannerInfo *root, - (Plan *) hash_plan, - best_path->jpath.jointype); - -- copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path); -+ copy_generic_path_info(root, &join_plan->join.plan, &best_path->jpath.path); - - return join_plan; - } -@@ -4578,13 +4580,16 @@ order_qual_clauses(PlannerInfo *root, List *clauses) - * Also copy the parallel-aware flag, which the executor *will* use. - */ - static void --copy_generic_path_info(Plan *dest, Path *src) -+copy_generic_path_info(PlannerInfo *root, Plan *dest, Path *src) - { - dest->startup_cost = src->startup_cost; - dest->total_cost = src->total_cost; - dest->plan_rows = src->rows; - dest->plan_width = src->pathtarget->width; - dest->parallel_aware = src->parallel_aware; -+ -+ if (copy_generic_path_info_hook) -+ (*copy_generic_path_info_hook) (root, dest, src); - } - - /* -diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h -index 369179f..6e81ae8 100644 ---- a/src/include/nodes/plannodes.h -+++ b/src/include/nodes/plannodes.h -@@ -125,6 +125,16 @@ typedef struct Plan - * subselects) */ - - /* -+ * information for adaptive query optimization -+ */ -+ bool had_path; -+ List *path_clauses; -+ List *path_relids; -+ JoinType path_jointype; -+ int path_parallel_workers; -+ bool was_parametrized; -+ -+ /* - * Information for management of parameter-change-driven rescanning - * - * extParam includes the paramIDs of all external PARAM_EXEC params -diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h -index 2a4df2f..64994a7 100644 ---- a/src/include/optimizer/cost.h -+++ b/src/include/optimizer/cost.h -@@ -39,6 +39,34 @@ typedef enum - } ConstraintExclusionType; - - -+/* Hook for plugins to get control of cardinality estimation */ -+typedef void (*set_baserel_rows_estimate_hook_type) (PlannerInfo *root, -+ RelOptInfo *rel); -+extern PGDLLIMPORT set_baserel_rows_estimate_hook_type -+ set_baserel_rows_estimate_hook; -+typedef double (*get_parameterized_baserel_size_hook_type) (PlannerInfo *root, -+ RelOptInfo *rel, -+ List *param_clauses); -+extern PGDLLIMPORT get_parameterized_baserel_size_hook_type -+ get_parameterized_baserel_size_hook; -+typedef double (*get_parameterized_joinrel_size_hook_type) (PlannerInfo *root, -+ RelOptInfo *rel, -+ Path *outer_path, -+ Path *inner_path, -+ SpecialJoinInfo *sjinfo, -+ List *restrict_clauses); -+extern PGDLLIMPORT get_parameterized_joinrel_size_hook_type -+ get_parameterized_joinrel_size_hook; -+typedef void (*set_joinrel_size_estimates_hook_type) (PlannerInfo *root, -+ RelOptInfo *rel, -+ RelOptInfo *outer_rel, -+ RelOptInfo *inner_rel, -+ SpecialJoinInfo *sjinfo, -+ List *restrictlist); -+extern PGDLLIMPORT set_joinrel_size_estimates_hook_type -+ set_joinrel_size_estimates_hook; -+ -+ - /* - * prototypes for costsize.c - * routines to compute costs and sizes -@@ -161,21 +189,37 @@ extern void compute_semi_anti_join_factors(PlannerInfo *root, - SpecialJoinInfo *sjinfo, - List *restrictlist, - SemiAntiJoinFactors *semifactors); -+extern void set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel); -+extern void set_baserel_rows_estimate_standard(PlannerInfo *root, RelOptInfo *rel); - extern void set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel); - extern double get_parameterized_baserel_size(PlannerInfo *root, - RelOptInfo *rel, - List *param_clauses); -+extern double get_parameterized_baserel_size_standard(PlannerInfo *root, -+ RelOptInfo *rel, -+ List *param_clauses); - extern double get_parameterized_joinrel_size(PlannerInfo *root, - RelOptInfo *rel, - Path *outer_path, - Path *inner_path, - SpecialJoinInfo *sjinfo, - List *restrict_clauses); -+extern double get_parameterized_joinrel_size_standard(PlannerInfo *root, -+ RelOptInfo *rel, -+ Path *outer_path, -+ Path *inner_path, -+ SpecialJoinInfo *sjinfo, -+ List *restrict_clauses); - extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, - RelOptInfo *outer_rel, - RelOptInfo *inner_rel, - SpecialJoinInfo *sjinfo, - List *restrictlist); -+extern void set_joinrel_size_estimates_standard(PlannerInfo *root, RelOptInfo *rel, -+ RelOptInfo *outer_rel, -+ RelOptInfo *inner_rel, -+ SpecialJoinInfo *sjinfo, -+ List *restrictlist); - extern void set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel); - extern void set_function_size_estimates(PlannerInfo *root, RelOptInfo *rel); - extern void set_values_size_estimates(PlannerInfo *root, RelOptInfo *rel); -diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h -index 4fbb6cc..def55e5 100644 ---- a/src/include/optimizer/planmain.h -+++ b/src/include/optimizer/planmain.h -@@ -33,6 +33,12 @@ extern int force_parallel_mode; - /* query_planner callback to compute query_pathkeys */ - typedef void (*query_pathkeys_callback) (PlannerInfo *root, void *extra); - -+/* hook for plugins to get control in creating plan from path */ -+typedef void (*copy_generic_path_info_hook_type) (PlannerInfo *root, -+ Plan *dest, Path *src); -+ -+extern PGDLLIMPORT copy_generic_path_info_hook_type copy_generic_path_info_hook; -+ - /* - * prototypes for plan/planmain.c - */ -diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c -index 7a6545f..42b58c0 100644 ---- a/src/backend/commands/explain.c -+++ b/src/backend/commands/explain.c -@@ -46,6 +46,9 @@ ExplainOneQuery_hook_type ExplainOneQuery_hook = NULL; - /* Hook for plugins to get control in explain_get_index_name() */ - explain_get_index_name_hook_type explain_get_index_name_hook = NULL; - -+/* Hook for plugins to get control in ExplainOnePlan() */ -+ExplainOnePlan_hook_type ExplainOnePlan_hook = NULL; -+ - - /* OR-able flags for ExplainXMLTag() */ - #define X_OPENING 0 -@@ -558,6 +561,10 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, - 3, es); - } - -+ if (ExplainOnePlan_hook) -+ ExplainOnePlan_hook(plannedstmt, into, es, -+ queryString, params, planduration); -+ - ExplainCloseGroup("Query", NULL, true, es); - } - -diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h -index 1f0bde7..9ca637c 100644 ---- a/src/include/commands/explain.h -+++ b/src/include/commands/explain.h -@@ -58,6 +58,12 @@ extern PGDLLIMPORT ExplainOneQuery_hook_type ExplainOneQuery_hook; - typedef const char *(*explain_get_index_name_hook_type) (Oid indexId); - extern PGDLLIMPORT explain_get_index_name_hook_type explain_get_index_name_hook; - -+/* Hook for plugins to get control in ExplainOnePlan() */ -+typedef void (*ExplainOnePlan_hook_type) (PlannedStmt *plannedstmt, IntoClause *into, -+ ExplainState *es, const char *queryString, -+ ParamListInfo params, const instr_time *planduration); -+extern PGDLLIMPORT ExplainOnePlan_hook_type ExplainOnePlan_hook; -+ - - extern void ExplainQuery(ExplainStmt *stmt, const char *queryString, - ParamListInfo params, DestReceiver *dest); From 483944862ee2105e68d88b83af6a0d7a9bed4340 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Tue, 15 Jun 2021 09:49:01 +0500 Subject: [PATCH 031/203] Add aqo-regress rule. It is heavily reduced version of regress test for successful passing. Check on passing of this rule is added into CI/CD script. Further we need to implement execution of the regression tests in different modes of AQO repeatedly on the same instance and the same database. --- .github/workflows/c-cpp.yml | 1 + Makefile | 17 +++ schedule | 204 ++++++++++++++++++++++++++++++++++++ 3 files changed, 222 insertions(+) create mode 100644 schedule diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 8f1dbeba..e13f20e2 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -27,3 +27,4 @@ jobs: patch -p1 --no-backup-if-mismatch < contrib/aqo/aqo_pg13.patch make -j4 > /dev/null && make -j4 -C contrib > /dev/null make -C contrib/aqo check + make -C contrib/aqo aqo-regress diff --git a/Makefile b/Makefile index bff27f38..46c49231 100644 --- a/Makefile +++ b/Makefile @@ -25,6 +25,13 @@ REGRESS = aqo_disabled \ fdw_srcdir = $(top_srcdir)/contrib/postgres_fdw PG_CPPFLAGS += -I$(libpq_srcdir) -I$(fdw_srcdir) EXTRA_REGRESS_OPTS=--temp-config=$(top_srcdir)/$(subdir)/conf.add +EXTRA_CLEAN = $(pg_regress_clean_files) sql/tablespace.sql \ + sql/misc.sql sql/largeobject.sql sql/create_function_2.sql \ + sql/create_function_1.sql sql/copy.sql sql/constraints.sql \ + expected/tablespace.out \ + expected/misc.out expected/largeobject.out expected/largeobject_1.out \ + expected/create_function_2.out expected/create_function_1.out \ + expected/copy.out expected/copy_1.out expected/constraints.out EXTRA_INSTALL = contrib/postgres_fdw DATA = aqo--1.0.sql aqo--1.0--1.1.sql aqo--1.1--1.2.sql aqo--1.2.sql @@ -40,3 +47,13 @@ include $(top_builddir)/src/Makefile.global include $(top_srcdir)/contrib/contrib-global.mk endif +aqo-regress: + $(with_temp_install) \ + $(top_builddir)/src/test/regress/pg_regress \ + --temp-instance=./tmp_check \ + $(pg_regress_locale_flags) \ + --bindir='' \ + --dlpath=$(CURDIR)/$(top_builddir)/src/test/regress \ + --inputdir=$(abs_top_srcdir)/src/test/regress \ + --schedule=$(CURDIR)/schedule \ + --load-extension=aqo diff --git a/schedule b/schedule new file mode 100644 index 00000000..52f0063d --- /dev/null +++ b/schedule @@ -0,0 +1,204 @@ +# src/test/regress/serial_schedule +# This should probably be in an order similar to parallel_schedule. +# test: tablespace +test: boolean +test: char +test: name +test: varchar +test: text +test: int2 +test: int4 +test: int8 +test: oid +test: xid +test: float4 +test: float8 +test: bit +test: numeric +test: txid +test: uuid +test: enum +test: money +test: rangetypes +test: pg_lsn +test: regproc +test: strings +test: numerology +test: point +test: lseg +test: line +test: box +test: path +test: polygon +test: circle +test: date +test: time +test: timetz +test: timestamp +test: timestamptz +test: interval +test: inet +test: macaddr +test: macaddr8 +test: tstypes +test: geometry +test: horology +test: regex +test: oidjoins +test: type_sanity +test: opr_sanity +test: misc_sanity +test: comments +test: expressions +test: unicode +test: create_function_1 +test: create_type +test: create_table +test: create_function_2 +test: copy +test: copyselect +test: copydml +test: insert +test: insert_conflict +test: create_misc +test: create_operator +test: create_procedure +test: create_index +test: create_index_spgist +test: create_view +test: index_including +test: index_including_gist +test: create_aggregate +test: create_function_3 +test: create_cast +test: constraints +test: triggers +test: select +test: inherit +test: typed_table +test: vacuum +test: drop_if_exists +test: updatable_views +test: roleattributes +test: create_am +test: hash_func +test: errors +test: infinite_recurse +# test: sanity_check +test: select_into +test: select_distinct +test: select_distinct_on +test: select_implicit +test: select_having +# test: subselect +test: incremental_sort +# test: union +test: case +# test: join +test: aggregates +test: transactions +ignore: random +test: random +test: portals +test: arrays +test: btree_index +test: hash_index +test: update +test: delete +test: namespace +test: prepared_xacts +test: brin +test: gin +test: gist +test: spgist +test: privileges +test: init_privs +test: security_label +test: collate +test: matview +test: lock +test: replica_identity +test: rowsecurity +test: object_address +test: tablesample +test: groupingsets +test: drop_operator +test: password +test: identity +test: generated +test: join_hash +test: create_table_like +test: alter_generic +test: alter_operator +test: misc +test: async +test: dbsize +test: misc_functions +test: sysviews +test: tsrf +test: tid +test: tidscan +test: collate.icu.utf8 +test: rules +test: psql +test: psql_crosstab +test: amutils +test: stats_ext +test: collate.linux.utf8 +# test: select_parallel +test: write_parallel +test: publication +test: subscription +test: select_views +test: portals_p2 +test: foreign_key +test: cluster +test: dependency +test: guc +test: bitmapops +test: combocid +test: tsearch +test: tsdicts +test: foreign_data +test: window +test: xmlmap +test: functional_deps +test: advisory_lock +test: indirect_toast +test: equivclass +test: json +test: jsonb +test: json_encoding +test: jsonpath +test: jsonpath_encoding +test: jsonb_jsonpath +test: plancache +test: limit +test: plpgsql +test: copy2 +test: temp +test: domain +test: rangefuncs +test: prepare +test: conversion +test: truncate +test: alter_table +test: sequence +test: polymorphism +test: rowtypes +test: returning +test: largeobject +# test: with +test: xml +test: partition_join +test: partition_prune +test: reloptions +test: hash_part +test: indexing +test: partition_aggregate +test: partition_info +test: tuplesort +test: explain +test: event_trigger +test: fast_default +test: stats From ac52b5a5cb6705fe7c4917b48a46bf6adaed751c Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Mon, 21 Jun 2021 19:39:42 +0500 Subject: [PATCH 032/203] Bugfix. AQO must be disabled for queries inside a parallel operation, as either the master or a worker. --- preprocessing.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/preprocessing.c b/preprocessing.c index 971f2a80..bf440e90 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -139,7 +139,7 @@ aqo_planner(Query *parse, strstr(application_name, "pgfdw:") != NULL || /* caused by fdw */ get_extension_oid("aqo", true) == InvalidOid || creating_extension || - IsParallelWorker() || + IsInParallelMode() || IsParallelWorker() || (aqo_mode == AQO_MODE_DISABLED && !force_collect_stat) || isQueryUsingSystemRelation(parse) || RecoveryInProgress()) From 329a89e4620ef53311ece4c60e2f531833ce648f Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Tue, 15 Jun 2021 14:12:16 +0500 Subject: [PATCH 033/203] Add a callback on an action of a transaction cleaning. Now AQO could clean global data, such of query_text at the end of transaction. It is needed because errors during query execution phases. We need to be sure that all such data cleaned before the next query execution. Interface of the add_query_text routine changed. --- aqo.c | 17 +++++++++++++++++ aqo.h | 2 +- preprocessing.c | 19 +++++++++++++------ storage.c | 2 +- 4 files changed, 32 insertions(+), 8 deletions(-) diff --git a/aqo.c b/aqo.c index b35bc2fc..c84567fd 100644 --- a/aqo.c +++ b/aqo.c @@ -111,6 +111,22 @@ ExplainOneNode_hook_type prev_ExplainOneNode_hook; * *****************************************************************************/ +static void +aqo_free_callback(ResourceReleasePhase phase, + bool isCommit, + bool isTopLevel, + void *arg) +{ + if (phase != RESOURCE_RELEASE_AFTER_LOCKS) + return; + + if (query_text != NULL) + { + pfree(query_text); + query_text = NULL; + } +} + void _PG_init(void) { @@ -208,6 +224,7 @@ _PG_init(void) AQOMemoryContext = AllocSetContextCreate(TopMemoryContext, "AQOMemoryContext", ALLOCSET_DEFAULT_SIZES); + RegisterResourceReleaseCallback(aqo_free_callback, NULL); } PG_FUNCTION_INFO_V1(invalidate_deactivated_queries_cache); diff --git a/aqo.h b/aqo.h index 1b37a3a7..2633ca39 100644 --- a/aqo.h +++ b/aqo.h @@ -285,7 +285,7 @@ int get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash); extern bool find_query(int qhash, Datum *search_values, bool *search_nulls); extern bool update_query(int qhash, int fhash, bool learn_aqo, bool use_aqo, bool auto_tuning); -extern bool add_query_text(int query_hash, const char *query_text); +extern bool add_query_text(int query_hash); extern bool load_fss(int fhash, int fss_hash, int ncols, double **matrix, double *targets, int *rows); extern bool update_fss(int fhash, int fss_hash, int nrows, int ncols, diff --git a/preprocessing.c b/preprocessing.c index bf440e90..14ebeda5 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -71,16 +71,19 @@ static bool isQueryUsingSystemRelation_walker(Node *node, void *context); void get_query_text(ParseState *pstate, Query *query) { - MemoryContext oldCxt; - /* * Duplicate query string into private AQO memory context for guard * from possible memory context switching. */ - oldCxt = MemoryContextSwitchTo(AQOMemoryContext); if (pstate) + { + MemoryContext oldCxt = MemoryContextSwitchTo(AQOMemoryContext); query_text = pstrdup(pstate->p_sourcetext); - MemoryContextSwitchTo(oldCxt); + MemoryContextSwitchTo(oldCxt); + } + else + /* Can't imagine such case. Still, throw an error. */ + elog(ERROR, "[AQO]: Query text is not found in post-parse step"); if (prev_post_parse_analyze_hook) prev_post_parse_analyze_hook(pstate, query); @@ -235,8 +238,12 @@ aqo_planner(Query *parse, query_context.use_aqo, query_context.auto_tuning); - - add_query_text(query_context.query_hash, query_text); + /* + * Add query text into the ML-knowledge base. Just for further + * analysis. In the case of cached plans we could have NULL query text. + */ + if (query_text != NULL) + add_query_text(query_context.query_hash); } } else diff --git a/storage.c b/storage.c index b19be49f..94320329 100644 --- a/storage.c +++ b/storage.c @@ -215,7 +215,7 @@ update_query(int qhash, int fhash, * Returns false if the operation failed, true otherwise. */ bool -add_query_text(int qhash, const char *query_text) +add_query_text(int qhash) { RangeVar *rv; Relation hrel; From 18e88defa6d065ac6cc6d0250e7498ee6ee3d1ee Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Sun, 27 Jun 2021 13:50:02 +0300 Subject: [PATCH 034/203] Disallow AQO to write into the ML-tables if can be detected that the query must be read-only (by the flag XactReadOnly). --- storage.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/storage.c b/storage.c index 94320329..f8eb255e 100644 --- a/storage.c +++ b/storage.c @@ -125,6 +125,10 @@ update_query(int qhash, int fhash, ScanKeyData key; SnapshotData snap; + /* Couldn't allow to write if xact must be read-only. */ + if (XactReadOnly) + return false; + reloid = RelnameGetRelid("aqo_queries_query_hash_idx"); if (!OidIsValid(reloid)) { @@ -228,6 +232,10 @@ add_query_text(int qhash) values[0] = Int32GetDatum(qhash); values[1] = CStringGetTextDatum(query_text); + /* Couldn't allow to write if xact must be read-only. */ + if (XactReadOnly) + return false; + reloid = RelnameGetRelid("aqo_query_texts_query_hash_idx"); if (!OidIsValid(reloid)) { @@ -374,6 +382,10 @@ update_fss(int fhash, int fsshash, int nrows, int ncols, ScanKeyData key[2]; bool result = true; + /* Couldn't allow to write if xact must be read-only. */ + if (XactReadOnly) + return false; + reloid = RelnameGetRelid("aqo_fss_access_idx"); if (!OidIsValid(reloid)) { @@ -563,6 +575,10 @@ update_aqo_stat(int qhash, QueryStat *stat) IndexScanDesc scan; ScanKeyData key; + /* Couldn't allow to write if xact must be read-only. */ + if (XactReadOnly) + return; + reloid = RelnameGetRelid("aqo_query_stat_idx"); if (!OidIsValid(reloid)) { From 90a2d663d87c495a102ee326929a0c12fc8e435e Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Fri, 25 Jun 2021 07:01:30 +0300 Subject: [PATCH 035/203] Revert support of FDW pushdowns because of unstability. Add ignore option in pg_regress schedule for non-passed tests. --- postprocessing.c | 41 ----------------------------------------- 1 file changed, 41 deletions(-) diff --git a/postprocessing.c b/postprocessing.c index f10e94ea..c73d9eba 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -625,47 +625,6 @@ aqo_copy_generic_path_info(PlannerInfo *root, Plan *dest, Path *src) dest->path_clauses = ((JoinPath *) src)->joinrestrictinfo; dest->path_jointype = ((JoinPath *) src)->jointype; } - else if (src->type == T_ForeignPath) - { - ForeignPath *fpath = (ForeignPath *) src; - PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) fpath->path.parent->fdw_private; - - /* - * Pushed down foreign join keeps clauses in special fdw_private - * structure. - * I'm not sure what fpinfo structure keeps clauses for sufficient time. - * So, copy clauses. - */ - - dest->path_clauses = list_concat(list_copy(fpinfo->joinclauses), - list_copy(fpinfo->remote_conds)); - dest->path_clauses = list_concat(dest->path_clauses, - list_copy(fpinfo->local_conds)); - - dest->path_jointype = ((JoinPath *) src)->jointype; - - dest->path_relids = get_list_of_relids(root, fpinfo->lower_subquery_rels); - - if (fpinfo->outerrel) - { - dest->path_clauses = list_concat(dest->path_clauses, - list_copy(fpinfo->outerrel->baserestrictinfo)); - dest->path_clauses = list_concat(dest->path_clauses, - list_copy(fpinfo->outerrel->joininfo)); - dest->path_relids = list_concat(dest->path_relids, - get_list_of_relids(root, fpinfo->outerrel->relids)); - } - - if (fpinfo->innerrel) - { - dest->path_clauses = list_concat(dest->path_clauses, - list_copy(fpinfo->innerrel->baserestrictinfo)); - dest->path_clauses = list_concat(dest->path_clauses, - list_copy(fpinfo->innerrel->joininfo)); - dest->path_relids = list_concat(dest->path_relids, - get_list_of_relids(root, fpinfo->innerrel->relids)); - } - } else { dest->path_clauses = list_concat( From e1a7199253b998927676effc4ba345078a72b3c0 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Fri, 25 Jun 2021 21:53:14 +0300 Subject: [PATCH 036/203] Add list of currently used feature spaces in the backend. It is needed to exclude problems with recursive execution induced by cache invalidation. --- aqo.c | 6 ++++++ aqo.h | 1 + postprocessing.c | 4 ++++ preprocessing.c | 18 ++++++++++++++++-- 4 files changed, 27 insertions(+), 2 deletions(-) diff --git a/aqo.c b/aqo.c index c84567fd..74215c14 100644 --- a/aqo.c +++ b/aqo.c @@ -125,6 +125,12 @@ aqo_free_callback(ResourceReleasePhase phase, pfree(query_text); query_text = NULL; } + + if (isTopLevel) + { + list_free(cur_classes); + cur_classes = NIL; + } } void diff --git a/aqo.h b/aqo.h index 2633ca39..c8885595 100644 --- a/aqo.h +++ b/aqo.h @@ -384,4 +384,5 @@ extern void selectivity_cache_clear(void); extern Oid get_aqo_schema(void); extern void init_lock_tag(LOCKTAG *tag, uint32 key1, uint32 key2); +extern List *cur_classes; #endif diff --git a/postprocessing.c b/postprocessing.c index c73d9eba..c3c5f86b 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -579,7 +579,11 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) pfree_query_stat(stat); } + /* Allow concurrent queries to update this feature space. */ LockRelease(&tag, ExclusiveLock, false); + + cur_classes = list_delete_int(cur_classes, query_context.query_hash); + RemoveFromQueryEnv(queryDesc); end: diff --git a/preprocessing.c b/preprocessing.c index 14ebeda5..1fb22b1c 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -61,6 +61,9 @@ #include "access/table.h" #include "commands/extension.h" +/* List of feature spaces, that are processing in this backend. */ +List *cur_classes = NIL; + static bool isQueryUsingSystemRelation(Query *query); static bool isQueryUsingSystemRelation_walker(Node *node, void *context); @@ -128,6 +131,7 @@ aqo_planner(Query *parse, Datum query_params[5]; bool query_nulls[5] = {false, false, false, false, false}; LOCKTAG tag; + MemoryContext oldCxt; selectivity_cache_clear(); @@ -154,11 +158,15 @@ aqo_planner(Query *parse, boundParams); } - INSTR_TIME_SET_CURRENT(query_context.query_starttime); query_context.query_hash = get_query_hash(parse, query_text); - if (query_is_deactivated(query_context.query_hash)) + if (query_is_deactivated(query_context.query_hash) || + list_member_int(cur_classes, query_context.query_hash)) { + /* Disable AQO for deactivated query or for query belonged to a + * feature space, that is processing yet (disallow invalidation + * recursion, as an example). + */ disable_aqo_for_query(); return call_default_planner(parse, query_string, @@ -166,6 +174,12 @@ aqo_planner(Query *parse, boundParams); } + oldCxt = MemoryContextSwitchTo(AQOMemoryContext); + cur_classes = lappend_int(cur_classes, query_context.query_hash); + MemoryContextSwitchTo(oldCxt); + + INSTR_TIME_SET_CURRENT(query_context.query_starttime); + /* * find-add query and query text must be atomic operation to prevent * concurrent insertions. From 8651c232c324ec6b3c6d0777694f0d57feb9c9c2 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Mon, 28 Jun 2021 13:33:17 +0300 Subject: [PATCH 037/203] Bugfix. The predicted_cardinality field of a plan node could be established by previous execution of this plan in another query. --- postprocessing.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/postprocessing.c b/postprocessing.c index c3c5f86b..232668a3 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -279,11 +279,14 @@ learnOnPlanState(PlanState *p, void *context) learn_rows = p->instrument->ntuples / p->instrument->nloops; } - /* Calculate predicted cardinality */ - if (p->plan->predicted_cardinality > 0.) + /* + * Calculate predicted cardinality. + * We could find a positive value of predicted cardinality in the case of + * reusing plan caused by the rewriting procedure. + * Also it may be caused by using of a generic plan. + */ + if (p->plan->predicted_cardinality > 0. && query_context.use_aqo) { - Assert(query_context.use_aqo); - /* AQO made prediction. use it. */ predicted = p->plan->predicted_cardinality; } From e3beec6b8fe48222b444f5c17f5f993d20f07dde Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Mon, 28 Jun 2021 15:08:26 +0300 Subject: [PATCH 038/203] Fix typo --- storage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage.c b/storage.c index f8eb255e..d8873839 100644 --- a/storage.c +++ b/storage.c @@ -43,7 +43,7 @@ static bool my_simple_heap_update(Relation relation, * Returns whether the query with given hash is in aqo_queries. * If yes, returns the content of the first line with given hash. * - * Use dirty snapshot to see all (include in-progess) data. We want to prevent + * Use dirty snapshot to see all (include in-progress) data. We want to prevent * wait in the XactLockTableWait routine. */ bool From 9d5ad309ec49790c06a17ba135eed030cae54660 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Fri, 17 Sep 2021 09:04:23 +0500 Subject: [PATCH 039/203] Fix into the pgbench TAP test: reduce number of clients and threads during the stress test on a postgres instance with installed AQO extension. This is enough to check correctness of concurrent access to a ML knowledge base. --- t/001_pgbench.pl | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index 6b3f34a4..a66edb1f 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -13,8 +13,6 @@ aqo.log_ignorance = 'on' }); -#my $result1; - $node->start(); # Check conflicts of accessing to the ML knowledge base @@ -22,26 +20,26 @@ $node->safe_psql('postgres', "CREATE EXTENSION aqo"); $node->safe_psql('postgres', "ALTER SYSTEM SET aqo.mode = 'intelligent'"); $node->command_ok([ 'pgbench', '-i', '-s', '1' ], 'init pgbench tables'); -$node->command_ok([ 'pgbench', '-t', "1000", '-c', "20", '-j', "20" ], +$node->command_ok([ 'pgbench', '-t', "1000", '-c', "10", '-j', "10" ], 'pgbench in intelligent mode'); $node->safe_psql('postgres', "ALTER SYSTEM SET aqo.mode = 'controlled'"); -$node->command_ok([ 'pgbench', '-t', "1000", '-c', "20", '-j', "20" ], +$node->command_ok([ 'pgbench', '-t', "1000", '-c', "10", '-j', "10" ], 'pgbench in controlled mode'); $node->safe_psql('postgres', "ALTER SYSTEM SET aqo.mode = 'disabled'"); -$node->command_ok([ 'pgbench', '-t', "1000", '-c', "20", '-j', "20" ], +$node->command_ok([ 'pgbench', '-t', "1000", '-c', "10", '-j', "10" ], 'pgbench in disabled mode'); $node->safe_psql('postgres', "DROP EXTENSION aqo"); $node->safe_psql('postgres', "CREATE EXTENSION aqo"); $node->safe_psql('postgres', "ALTER SYSTEM SET aqo.mode = 'learn'"); -$node->command_ok([ 'pgbench', '-t', "1000", '-c', "20", '-j', "20" ], +$node->command_ok([ 'pgbench', '-t', "1000", '-c', "10", '-j', "10" ], 'pgbench in learn mode'); $node->safe_psql('postgres', "ALTER SYSTEM SET aqo.mode = 'frozen'"); -$node->command_ok([ 'pgbench', '-t', "1000", '-c', "20", '-j', "20" ], +$node->command_ok([ 'pgbench', '-t', "1000", '-c', "10", '-j', "10" ], 'pgbench in frozen mode'); $node->safe_psql('postgres', "DROP EXTENSION aqo"); From d32b3d00d9f7dc3df547cf7317ce777a38b79cad Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Fri, 3 Dec 2021 16:14:13 +0500 Subject: [PATCH 040/203] Bugfix. Filter system-dependent strings in an explain output of 'aqo_fdw' and 'unsupported' regression tests. --- expected/aqo_fdw.out | 43 ++++++++++++++++++++++++---------------- expected/unsupported.out | 7 ++++--- sql/aqo_fdw.sql | 19 ++++++++++++++---- sql/unsupported.sql | 2 ++ 4 files changed, 47 insertions(+), 24 deletions(-) diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out index d947c9c7..66bc4970 100644 --- a/expected/aqo_fdw.out +++ b/expected/aqo_fdw.out @@ -21,7 +21,15 @@ CREATE TABLE local (x int); CREATE FOREIGN TABLE frgn(x int) SERVER loopback OPTIONS (table_name 'local'); INSERT INTO frgn (x) VALUES (1); ANALYZE local; --- Trivial foreign scan.s +-- Utility tool. Allow to filter system-dependent strings from explain output. +CREATE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('%s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; +-- Trivial foreign scan. EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT x FROM frgn; QUERY PLAN @@ -75,28 +83,29 @@ SELECT x FROM frgn WHERE x < -10; -- AQO ignores constants (5 rows) -- Trivial JOIN push-down. -EXPLAIN (COSTS OFF) -SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; - QUERY PLAN ------------------------------------- - Merge Join +SELECT str FROM expln(' +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; +') AS str WHERE str NOT LIKE '%Sort Method%'; + str +------------------------------------------------------------ + Merge Join (actual rows=1 loops=1) + AQO not used Merge Cond: (a.x = b.x) - -> Sort + -> Sort (actual rows=1 loops=1) + AQO not used Sort Key: a.x - -> Foreign Scan on frgn a - -> Sort + -> Foreign Scan on frgn a (actual rows=1 loops=1) + AQO not used + -> Sort (actual rows=1 loops=1) + AQO not used Sort Key: b.x - -> Foreign Scan on frgn b + -> Foreign Scan on frgn b (actual rows=1 loops=1) + AQO not used Using aqo: true AQO mode: LEARN JOINS: 0 -(11 rows) - -SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; - x | x ----+--- - 1 | 1 -(1 row) +(16 rows) EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; diff --git a/expected/unsupported.out b/expected/unsupported.out index a9cbeca6..9304579e 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -3,14 +3,15 @@ SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; DROP TABLE IF EXISTS t; CREATE TABLE t AS SELECT (gs.* / 50) AS x FROM generate_series(1,1000) AS gs; +SELECT str FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM t GROUP BY (x) HAVING x > 3; - QUERY PLAN +') AS str WHERE str NOT LIKE '%Memory Usage%'; + str ----------------------------------------------- HashAggregate (actual rows=17 loops=1) AQO not used Group Key: x - Batches: 1 Memory Usage: 40kB -> Seq Scan on t (actual rows=801 loops=1) AQO not used Filter: (x > 3) @@ -18,7 +19,7 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) Using aqo: true AQO mode: LEARN JOINS: 0 -(11 rows) +(10 rows) -- Do not support having clauses for now. EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) diff --git a/sql/aqo_fdw.sql b/sql/aqo_fdw.sql index e8612339..80917dec 100644 --- a/sql/aqo_fdw.sql +++ b/sql/aqo_fdw.sql @@ -26,7 +26,16 @@ CREATE FOREIGN TABLE frgn(x int) SERVER loopback OPTIONS (table_name 'local'); INSERT INTO frgn (x) VALUES (1); ANALYZE local; --- Trivial foreign scan.s +-- Utility tool. Allow to filter system-dependent strings from explain output. +CREATE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('%s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; + +-- Trivial foreign scan. EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT x FROM frgn; EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) @@ -41,9 +50,11 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT x FROM frgn WHERE x < -10; -- AQO ignores constants -- Trivial JOIN push-down. -EXPLAIN (COSTS OFF) -SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; -SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; +SELECT str FROM expln(' +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; +') AS str WHERE str NOT LIKE '%Sort Method%'; + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; diff --git a/sql/unsupported.sql b/sql/unsupported.sql index 320e9adf..76edcdb3 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -5,8 +5,10 @@ SET aqo.show_details = 'on'; DROP TABLE IF EXISTS t; CREATE TABLE t AS SELECT (gs.* / 50) AS x FROM generate_series(1,1000) AS gs; +SELECT str FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM t GROUP BY (x) HAVING x > 3; +') AS str WHERE str NOT LIKE '%Memory Usage%'; -- Do not support having clauses for now. EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) From 892713f34078fd9cb927eb8df7aacb8ecdb69ed5 Mon Sep 17 00:00:00 2001 From: Alena0704 Date: Fri, 3 Dec 2021 17:42:31 +0300 Subject: [PATCH 041/203] fix problem with test in unsupported --- expected/unsupported.out | 7 ++++--- sql/unsupported.sql | 2 ++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/expected/unsupported.out b/expected/unsupported.out index 9304579e..95a011bc 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -22,14 +22,15 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) (10 rows) -- Do not support having clauses for now. +SELECT str FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM t GROUP BY (x) HAVING x > 3; - QUERY PLAN +') AS str WHERE str NOT LIKE '%Memory Usage%'; + str ----------------------------------------------- HashAggregate (actual rows=17 loops=1) AQO not used Group Key: x - Batches: 1 Memory Usage: 40kB -> Seq Scan on t (actual rows=801 loops=1) AQO: rows=801, error=0% Filter: (x > 3) @@ -37,6 +38,6 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) Using aqo: true AQO mode: LEARN JOINS: 0 -(11 rows) +(10 rows) DROP EXTENSION aqo; diff --git a/sql/unsupported.sql b/sql/unsupported.sql index 76edcdb3..c49271dc 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -11,7 +11,9 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) ') AS str WHERE str NOT LIKE '%Memory Usage%'; -- Do not support having clauses for now. +SELECT str FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM t GROUP BY (x) HAVING x > 3; +') AS str WHERE str NOT LIKE '%Memory Usage%'; DROP EXTENSION aqo; From a203a01d0f2c7d101ea812567a70cd37d47f3aaa Mon Sep 17 00:00:00 2001 From: Marina Polyakova Date: Wed, 3 Aug 2022 22:35:39 +0300 Subject: [PATCH 042/203] PGPRO-6403: fix conf.add so PostgreSQL installchecks pass with aqo loaded --- conf.add | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf.add b/conf.add index 936ca166..ed455870 100644 --- a/conf.add +++ b/conf.add @@ -1,3 +1,3 @@ autovacuum = off shared_preload_libraries = 'postgres_fdw, aqo' -max_parallel_workers_per_gather = 0 +max_parallel_workers_per_gather = 1 # switch off parallel workers because of unsteadiness From 1675c39ad071351c3d35a5a5b7c722b16cc7c46b Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Fri, 5 Aug 2022 15:59:27 +0300 Subject: [PATCH 043/203] Add generated subdirectories in .gitignore. --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index e2fcd401..238f170c 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,6 @@ regression.out *.gcov tags +# Generated subdirectories +/log/ +/tmp_check/ \ No newline at end of file From e9c93641c2f9634d76de76432b8c69aa3dd5f914 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Fri, 5 Aug 2022 16:13:41 +0300 Subject: [PATCH 044/203] Add empty line in .gitignore aqo --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 238f170c..1811e98d 100644 --- a/.gitignore +++ b/.gitignore @@ -10,4 +10,4 @@ tags # Generated subdirectories /log/ -/tmp_check/ \ No newline at end of file +/tmp_check/ From b5cf6716f911f17a98161586f828bab5205961bb Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Wed, 10 Aug 2022 15:50:45 +0300 Subject: [PATCH 045/203] Clear AQO_cache_mem_ctx memory context. --- aqo.c | 4 ++++ aqo.h | 1 + cardinality_hooks.c | 7 ++++--- selectivity_cache.c | 1 + 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/aqo.c b/aqo.c index 74215c14..b038f0dd 100644 --- a/aqo.c +++ b/aqo.c @@ -85,6 +85,7 @@ double log_selectivity_lower_bound = -30; * after a query parsing and is used during the query planning. */ MemoryContext AQOMemoryContext; +MemoryContext AQO_cache_mem_ctx; QueryContextData query_context; /* Additional plan info */ int njoins; @@ -230,6 +231,9 @@ _PG_init(void) AQOMemoryContext = AllocSetContextCreate(TopMemoryContext, "AQOMemoryContext", ALLOCSET_DEFAULT_SIZES); + AQO_cache_mem_ctx = AllocSetContextCreate(TopMemoryContext, + "AQO_cache_mem_ctx", + ALLOCSET_DEFAULT_SIZES); RegisterResourceReleaseCallback(aqo_free_callback, NULL); } diff --git a/aqo.h b/aqo.h index c8885595..99e9e1d4 100644 --- a/aqo.h +++ b/aqo.h @@ -249,6 +249,7 @@ extern char *query_text; /* Memory context for long-live data */ extern MemoryContext AQOMemoryContext; +extern MemoryContext AQO_cache_mem_ctx; /* Saved hook values in case of unload */ extern post_parse_analyze_hook_type prev_post_parse_analyze_hook; diff --git a/cardinality_hooks.c b/cardinality_hooks.c index 38bfa6c9..b6c2c985 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -206,7 +206,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, if (query_context.use_aqo) { - MemoryContext mcxt; + MemoryContext old_ctx_m; allclauses = list_concat(list_copy(param_clauses), list_copy(rel->baserestrictinfo)); @@ -215,7 +215,8 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, relid = planner_rt_fetch(rel->relid, root)->relid; get_eclasses(allclauses, &nargs, &args_hash, &eclass_hash); - mcxt = MemoryContextSwitchTo(CacheMemoryContext); + old_ctx_m = MemoryContextSwitchTo(AQO_cache_mem_ctx); + forboth(l, allclauses, l2, selectivities) { current_hash = get_clause_hash( @@ -225,7 +226,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, *((double *) lfirst(l2))); } - MemoryContextSwitchTo(mcxt); + MemoryContextSwitchTo(old_ctx_m); pfree(args_hash); pfree(eclass_hash); } diff --git a/selectivity_cache.c b/selectivity_cache.c index 12ecd699..a57682db 100644 --- a/selectivity_cache.c +++ b/selectivity_cache.c @@ -87,5 +87,6 @@ selectivity_cache_find_global_relid(int clause_hash, int global_relid) void selectivity_cache_clear(void) { + MemoryContextReset(AQO_cache_mem_ctx); objects = NIL; } From a384832638cc2547a6b7d7e853a01d3b2008a390 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Tue, 22 Mar 2022 09:39:35 +0500 Subject: [PATCH 046/203] Remove an ignored node detection feature. --- Makefile | 2 +- aqo.c | 14 ---- expected/gucs.out | 58 --------------- ignorance.c | 184 ---------------------------------------------- ignorance.h | 12 --- postprocessing.c | 11 --- preprocessing.c | 3 +- sql/gucs.sql | 26 ------- t/001_pgbench.pl | 1 - 9 files changed, 2 insertions(+), 309 deletions(-) delete mode 100644 ignorance.c delete mode 100644 ignorance.h diff --git a/Makefile b/Makefile index 46c49231..ef12bc7d 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ PGFILEDESC = "AQO - Adaptive Query Optimization" MODULE_big = aqo OBJS = aqo.o auto_tuning.o cardinality_estimation.o cardinality_hooks.o \ hash.o machine_learning.o path_utils.o postprocessing.o preprocessing.o \ -selectivity_cache.o storage.o utils.o ignorance.o $(WIN32RES) +selectivity_cache.o storage.o utils.o $(WIN32RES) TAP_TESTS = 1 diff --git a/aqo.c b/aqo.c index b038f0dd..374895dc 100644 --- a/aqo.c +++ b/aqo.c @@ -9,7 +9,6 @@ */ #include "aqo.h" -#include "ignorance.h" #include "access/relation.h" #include "access/table.h" @@ -189,19 +188,6 @@ _PG_init(void) NULL ); - DefineCustomBoolVariable( - "aqo.log_ignorance", - "Log in a special table all feature spaces for which the AQO prediction was not successful.", - NULL, - &aqo_log_ignorance, - false, - PGC_SUSET, - 0, - NULL, - set_ignorance, - NULL - ); - prev_planner_hook = planner_hook; planner_hook = aqo_planner; prev_post_parse_analyze_hook = post_parse_analyze_hook; diff --git a/expected/gucs.out b/expected/gucs.out index a4f91130..6a28de78 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -29,61 +29,3 @@ EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) (6 rows) DROP EXTENSION aqo; -SET aqo.log_ignorance = 'on'; -SET aqo.log_ignorance = 'off'; -SET aqo.log_ignorance = 'off'; -SET aqo.log_ignorance = 'on'; -CREATE EXTENSION aqo; -SET aqo.log_ignorance = 'off'; -SET aqo.log_ignorance = 'on'; -SET aqo.log_ignorance = 'on'; -\d aqo_ignorance - Table "public.aqo_ignorance" - Column | Type | Collation | Nullable | Default ------------+---------+-----------+----------+--------- - qhash | integer | | | - fhash | integer | | | - fss_hash | integer | | | - node_type | integer | | | - node | text | | | -Indexes: - "aqo_ignorance_idx" UNIQUE, btree (qhash, fhash, fss_hash) - -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) -SELECT * FROM t; - QUERY PLAN ------------------------------------------ - Seq Scan on t (actual rows=100 loops=1) - AQO not used - Using aqo: true - AQO mode: LEARN - JOINS: 0 -(5 rows) - -SELECT node_type FROM aqo_ignorance; - node_type ------------ -(0 rows) - -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT * FROM t; - QUERY PLAN ------------------------------------------ - Seq Scan on t (actual rows=100 loops=1) - AQO: rows=100, error=0% - Using aqo: true - AQO mode: LEARN - JOINS: 0 -(5 rows) - -SELECT node_type FROM aqo_ignorance; - node_type ------------ -(0 rows) - --- This GUC can be changed by an admin only. -CREATE ROLE noadmin; -SET ROLE noadmin; -SET aqo.log_ignorance = 'off'; -ERROR: permission denied to set parameter "aqo.log_ignorance" -RESET ROLE; -DROP EXTENSION aqo; diff --git a/ignorance.c b/ignorance.c deleted file mode 100644 index 88bb97d1..00000000 --- a/ignorance.c +++ /dev/null @@ -1,184 +0,0 @@ -#include "aqo.h" -#include "ignorance.h" - -#include "access/heapam.h" -#include "access/parallel.h" -#include "executor/spi.h" -#include "utils/lsyscache.h" -#include "miscadmin.h" - -bool aqo_log_ignorance; - -void -set_ignorance(bool newval, void *extra) -{ - /* - * On postgres start we can't create any table. - * It is not problem. We will check existence at each update and create this - * table in dynamic mode, if needed. - */ - if (IsUnderPostmaster && !IsParallelWorker() && newval && - (aqo_log_ignorance != newval)) - /* Create storage and no error, if it exists already. */ - create_ignorance_table(true); - - aqo_log_ignorance = newval; -} - -bool -create_ignorance_table(bool fail_ok) -{ - Oid nspid = get_aqo_schema(); - char *nspname; - char *sql; - int rc; - - if (nspid == InvalidOid) - { - if (!fail_ok) - ereport(ERROR, - (errmsg("AQO extension is not installed"), - errdetail("AQO shared library is enabled but extension isn't installed."))); - else - return false; - } - - nspname = get_namespace_name(nspid); - Assert(nspname != NULL); - - /* Check the table existence. */ - if (get_relname_relid("aqo_ignorance", nspid) != InvalidOid) - { - if (!fail_ok) - elog(PANIC, "aqo_ignorance table exists yet."); - else - return false; - } - - sql = psprintf("CREATE TABLE %s.aqo_ignorance (qhash int, fhash int, fss_hash int, node_type int, node text);" - "CREATE UNIQUE INDEX aqo_ignorance_idx ON aqo_ignorance (qhash, fhash, fss_hash);", - nspname); - - SPI_connect(); - rc = SPI_execute(sql, false, 0); - SPI_finish(); - - if (rc < 0) - /* Can't ignore this problem. */ - elog(ERROR, "Failed to create aqo_ignorance table %s. status: %d", - sql, rc); - - pfree(nspname); - pfree(sql); - return true; -} - -void -update_ignorance(int qhash, int fhash, int fss_hash, Plan *plan) -{ - RangeVar *rv; - Relation hrel; - Relation irel; - SnapshotData snap; - TupleTableSlot *slot; - TupleDesc tupDesc; - HeapTuple tuple; - Datum values[5]; - bool isnull[5] = { false, false, false, false, false }; - bool shouldFree; - Oid reloid; - IndexScanDesc scan; - ScanKeyData key[3]; - LOCKTAG tag; - Oid nspid = get_aqo_schema(); - char *nspname; - - if (!OidIsValid(nspid)) - elog(PANIC, "AQO schema does not exists!"); - nspname = get_namespace_name(nspid); - Assert(nspname != 0); - - rv = makeRangeVar(nspname, "aqo_ignorance_idx", -1); - reloid = RangeVarGetRelid(rv, NoLock, true); - if (!OidIsValid(reloid)) - { - /* This table doesn't created on instance startup. Create now. */ - create_ignorance_table(false); - reloid = RangeVarGetRelid(rv, NoLock, true); - if (!OidIsValid(reloid)) - elog(PANIC, "Ignorance table does not exists!"); - } - - init_lock_tag(&tag, (uint32) fhash, (uint32) fss_hash); - LockAcquire(&tag, ExclusiveLock, false, false); - - rv = makeRangeVar(nspname, "aqo_ignorance", -1); - hrel = table_openrv(rv, RowExclusiveLock); - irel = index_open(reloid, RowExclusiveLock); - tupDesc = RelationGetDescr(hrel); - - InitDirtySnapshot(snap); - scan = index_beginscan(hrel, irel, &snap, 3, 0); - - ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(qhash)); - ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fhash)); - ScanKeyInit(&key[2], 3, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fss_hash)); - index_rescan(scan, key, 3, NULL, 0); - slot = MakeSingleTupleTableSlot(tupDesc, &TTSOpsBufferHeapTuple); - - if (!index_getnext_slot(scan, ForwardScanDirection, slot)) - { - if (plan->predicted_cardinality < 0.) - { - char nodestr[1024]; - char *qplan = nodeToString(plan); - - memset(nodestr, 0, 1024); - strncpy(nodestr, qplan, 1023); - pfree(qplan); - - /* - * AQO failed to predict cardinality for this node. - */ - values[0] = Int32GetDatum(qhash); - values[1] = Int32GetDatum(fhash); - values[2] = Int32GetDatum(fss_hash); - values[3] = Int32GetDatum(nodeTag(plan)); - values[4] = CStringGetTextDatum(nodestr); - tuple = heap_form_tuple(tupDesc, values, isnull); - - simple_heap_insert(hrel, tuple); - my_index_insert(irel, values, isnull, &(tuple->t_self), - hrel, UNIQUE_CHECK_YES); - } - else - { - /* AQO works as expected. */ - } - } - else if (!TransactionIdIsValid(snap.xmin) && - !TransactionIdIsValid(snap.xmax)) - { - /* - * AQO made prediction for this node. Delete it from the ignorance - * table. - */ - tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); - Assert(shouldFree != true); - simple_heap_delete(hrel, &(tuple->t_self)); - } - else - { - /* - * The data exists. We can't do anything for now. - */ - } - - ExecDropSingleTupleTableSlot(slot); - index_endscan(scan); - index_close(irel, RowExclusiveLock); - table_close(hrel, RowExclusiveLock); - - CommandCounterIncrement(); - LockRelease(&tag, ExclusiveLock, false); -} diff --git a/ignorance.h b/ignorance.h deleted file mode 100644 index bceb855b..00000000 --- a/ignorance.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef IGNORANCE_H -#define IGNORANCE_H - -#include "postgres.h" - -extern bool aqo_log_ignorance; - -extern void set_ignorance(bool newval, void *extra); -extern bool create_ignorance_table(bool fail_ok); -extern void update_ignorance(int qhash, int fhash, int fss_hash, Plan *plan); - -#endif /* IGNORANCE_H */ diff --git a/postprocessing.c b/postprocessing.c index 232668a3..05e92737 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -17,7 +17,6 @@ */ #include "aqo.h" -#include "ignorance.h" #include "access/parallel.h" #include "optimizer/optimizer.h" @@ -115,16 +114,6 @@ learn_sample(List *clauselist, List *selectivities, List *relidslist, fss_hash = get_fss_for_object(clauselist, selectivities, relidslist, &nfeatures, &features); - if (aqo_log_ignorance && plan->predicted_cardinality <= 0 && - load_fss(fhash, fss_hash, 0, NULL, NULL, NULL) ) - { - /* - * If ignorance logging is enabled and the feature space was existed in - * the ML knowledge base, log this issue. - */ - update_ignorance(query_context.query_hash, fhash, fss_hash, plan); - } - if (nfeatures > 0) for (i = 0; i < aqo_K; ++i) matrix[i] = palloc(sizeof(double) * nfeatures); diff --git a/preprocessing.c b/preprocessing.c index 1fb22b1c..e1dd92b1 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -366,8 +366,7 @@ IsAQORelation(Relation rel) if (strcmp(relname, "aqo_data") == 0 || strcmp(relname, "aqo_query_texts") == 0 || strcmp(relname, "aqo_query_stat") == 0 || - strcmp(relname, "aqo_queries") == 0 || - strcmp(relname, "aqo_ignorance") == 0 + strcmp(relname, "aqo_queries") == 0 ) return true; diff --git a/sql/gucs.sql b/sql/gucs.sql index 99804669..c8cc8f36 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -12,29 +12,3 @@ EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT x FROM t; DROP EXTENSION aqo; - -SET aqo.log_ignorance = 'on'; -SET aqo.log_ignorance = 'off'; -SET aqo.log_ignorance = 'off'; -SET aqo.log_ignorance = 'on'; - -CREATE EXTENSION aqo; -SET aqo.log_ignorance = 'off'; -SET aqo.log_ignorance = 'on'; -SET aqo.log_ignorance = 'on'; -\d aqo_ignorance - -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) -SELECT * FROM t; -SELECT node_type FROM aqo_ignorance; - -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT * FROM t; -SELECT node_type FROM aqo_ignorance; - --- This GUC can be changed by an admin only. -CREATE ROLE noadmin; -SET ROLE noadmin; -SET aqo.log_ignorance = 'off'; -RESET ROLE; - -DROP EXTENSION aqo; diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index a66edb1f..7c3992d3 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -10,7 +10,6 @@ shared_preload_libraries = 'aqo' log_statement = 'none' aqo.mode = 'intelligent' - aqo.log_ignorance = 'on' }); $node->start(); From f3b81a63fae15c4e9d6adfcca5b0caca4727f1a8 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Tue, 22 Mar 2022 10:04:41 +0500 Subject: [PATCH 047/203] Arrange stable13 with the stable14 code. --- .github/workflows/c-cpp.yml | 2 - Makefile | 31 +- README.md | 2 +- aqo--1.0.sql | 10 +- aqo--1.1--1.2.sql | 12 +- aqo--1.2--1.3.sql | 138 +++++ aqo--1.2.sql | 22 +- aqo.c | 69 ++- aqo.control | 2 +- aqo.h | 96 +--- aqo_pg13.patch | 856 +++++++++------------------- auto_tuning.c | 4 +- cardinality_estimation.c | 57 +- cardinality_hooks.c | 287 +++++++--- cardinality_hooks.h | 31 + expected/aqo_CVE-2020-14350.out | 36 +- expected/aqo_fdw.out | 2 +- expected/clean_aqo_data.out | 321 +++++++++++ expected/forced_stat_collection.out | 8 +- expected/plancache.out | 46 ++ expected/top_queries.out | 51 ++ expected/unsupported.out | 543 +++++++++++++++++- hash.c | 162 +++++- hash.h | 16 + machine_learning.c | 2 + path_utils.c | 481 +++++++++++++++- path_utils.h | 68 +++ postprocessing.c | 534 ++++++++++------- preprocessing.c | 199 ++++--- preprocessing.h | 12 + schedule | 204 ------- selectivity_cache.c | 2 + sql/aqo_CVE-2020-14350.sql | 36 +- sql/aqo_fdw.sql | 3 +- sql/clean_aqo_data.sql | 143 +++++ sql/forced_stat_collection.sql | 2 +- sql/plancache.sql | 46 ++ sql/top_queries.sql | 27 + sql/unsupported.sql | 149 ++++- storage.c | 278 +++++---- t/001_pgbench.pl | 285 ++++++++- t/002_pg_stat_statements_aqo.pl | 66 +++ utils.c | 2 + 43 files changed, 3808 insertions(+), 1535 deletions(-) mode change 100644 => 100755 Makefile create mode 100755 aqo--1.2--1.3.sql create mode 100644 cardinality_hooks.h create mode 100644 expected/clean_aqo_data.out create mode 100644 expected/plancache.out create mode 100644 expected/top_queries.out create mode 100644 hash.h create mode 100644 path_utils.h create mode 100644 preprocessing.h delete mode 100644 schedule create mode 100644 sql/clean_aqo_data.sql create mode 100644 sql/plancache.sql create mode 100755 sql/top_queries.sql create mode 100644 t/002_pg_stat_statements_aqo.pl diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index e13f20e2..a23ec574 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -12,8 +12,6 @@ jobs: runs-on: ubuntu-latest steps: - - name: Install additional packages - run: sudo apt-get install -y libperl-dev libipc-run-perl - name: pg run: | echo "Deploying to production server on branch $GITHUB_REF" diff --git a/Makefile b/Makefile old mode 100644 new mode 100755 index ef12bc7d..b351ae0e --- a/Makefile +++ b/Makefile @@ -20,21 +20,19 @@ REGRESS = aqo_disabled \ aqo_CVE-2020-14350 \ gucs \ forced_stat_collection \ - unsupported + unsupported \ + clean_aqo_data \ + plancache \ + top_queries fdw_srcdir = $(top_srcdir)/contrib/postgres_fdw -PG_CPPFLAGS += -I$(libpq_srcdir) -I$(fdw_srcdir) +stat_srcdir = $(top_srcdir)/contrib/pg_stat_statements +PG_CPPFLAGS += -I$(libpq_srcdir) -I$(fdw_srcdir) -I$(stat_srcdir) EXTRA_REGRESS_OPTS=--temp-config=$(top_srcdir)/$(subdir)/conf.add -EXTRA_CLEAN = $(pg_regress_clean_files) sql/tablespace.sql \ - sql/misc.sql sql/largeobject.sql sql/create_function_2.sql \ - sql/create_function_1.sql sql/copy.sql sql/constraints.sql \ - expected/tablespace.out \ - expected/misc.out expected/largeobject.out expected/largeobject_1.out \ - expected/create_function_2.out expected/create_function_1.out \ - expected/copy.out expected/copy_1.out expected/constraints.out -EXTRA_INSTALL = contrib/postgres_fdw +EXTRA_INSTALL = contrib/postgres_fdw contrib/pg_stat_statements -DATA = aqo--1.0.sql aqo--1.0--1.1.sql aqo--1.1--1.2.sql aqo--1.2.sql +DATA = aqo--1.0.sql aqo--1.0--1.1.sql aqo--1.1--1.2.sql aqo--1.2.sql \ + aqo--1.2--1.3.sql ifdef USE_PGXS PG_CONFIG ?= pg_config @@ -46,14 +44,3 @@ top_builddir = ../.. include $(top_builddir)/src/Makefile.global include $(top_srcdir)/contrib/contrib-global.mk endif - -aqo-regress: - $(with_temp_install) \ - $(top_builddir)/src/test/regress/pg_regress \ - --temp-instance=./tmp_check \ - $(pg_regress_locale_flags) \ - --bindir='' \ - --dlpath=$(CURDIR)/$(top_builddir)/src/test/regress \ - --inputdir=$(abs_top_srcdir)/src/test/regress \ - --schedule=$(CURDIR)/schedule \ - --load-extension=aqo diff --git a/README.md b/README.md index 45ea1072..b3c0216e 100644 --- a/README.md +++ b/README.md @@ -325,7 +325,7 @@ Dynamically generated constants are okay. ## License -© [Postgres Professional](https://postgrespro.com/), 2016-2020. Licensed under +© [Postgres Professional](https://postgrespro.com/), 2016-2021. Licensed under [The PostgreSQL License](LICENSE). ## Reference diff --git a/aqo--1.0.sql b/aqo--1.0.sql index 1f207718..67395744 100644 --- a/aqo--1.0.sql +++ b/aqo--1.0.sql @@ -2,20 +2,20 @@ \echo Use "CREATE EXTENSION aqo" to load this file. \quit CREATE TABLE public.aqo_queries ( - query_hash int PRIMARY KEY, + query_hash bigint PRIMARY KEY, learn_aqo boolean NOT NULL, use_aqo boolean NOT NULL, - fspace_hash int NOT NULL, + fspace_hash bigint NOT NULL, auto_tuning boolean NOT NULL ); CREATE TABLE public.aqo_query_texts ( - query_hash int PRIMARY KEY REFERENCES public.aqo_queries ON DELETE CASCADE, + query_hash bigint PRIMARY KEY REFERENCES public.aqo_queries ON DELETE CASCADE, query_text varchar NOT NULL ); CREATE TABLE public.aqo_query_stat ( - query_hash int PRIMARY KEY REFERENCES public.aqo_queries ON DELETE CASCADE, + query_hash bigint PRIMARY KEY REFERENCES public.aqo_queries ON DELETE CASCADE, execution_time_with_aqo double precision[], execution_time_without_aqo double precision[], planning_time_with_aqo double precision[], @@ -27,7 +27,7 @@ CREATE TABLE public.aqo_query_stat ( ); CREATE TABLE public.aqo_data ( - fspace_hash int NOT NULL REFERENCES public.aqo_queries ON DELETE CASCADE, + fspace_hash bigint NOT NULL REFERENCES public.aqo_queries ON DELETE CASCADE, fsspace_hash int NOT NULL, nfeatures int NOT NULL, features double precision[][], diff --git a/aqo--1.1--1.2.sql b/aqo--1.1--1.2.sql index 719f455b..9291e7b7 100644 --- a/aqo--1.1--1.2.sql +++ b/aqo--1.1--1.2.sql @@ -28,12 +28,12 @@ DROP FUNCTION aqo_migrate_to_1_2_get_pk(regclass); -- -- Show query state at the AQO knowledge base -CREATE OR REPLACE FUNCTION public.aqo_status(hash int) +CREATE OR REPLACE FUNCTION public.aqo_status(hash bigint) RETURNS TABLE ( "learn" BOOL, "use aqo" BOOL, "auto tune" BOOL, - "fspace hash" INT, + "fspace hash" bigINT, "t_naqo" TEXT, "err_naqo" TEXT, "iters" BIGINT, @@ -63,7 +63,7 @@ WHERE (aqs.query_hash = aq.query_hash) AND aqs.query_hash = $1; $func$ LANGUAGE SQL; -CREATE OR REPLACE FUNCTION public.aqo_enable_query(hash int) +CREATE OR REPLACE FUNCTION public.aqo_enable_query(hash bigint) RETURNS VOID AS $func$ UPDATE public.aqo_queries SET @@ -72,7 +72,7 @@ UPDATE public.aqo_queries SET WHERE query_hash = $1; $func$ LANGUAGE SQL; -CREATE OR REPLACE FUNCTION public.aqo_disable_query(hash int) +CREATE OR REPLACE FUNCTION public.aqo_disable_query(hash bigint) RETURNS VOID AS $func$ UPDATE public.aqo_queries SET @@ -82,7 +82,7 @@ UPDATE public.aqo_queries SET WHERE query_hash = $1; $func$ LANGUAGE SQL; -CREATE OR REPLACE FUNCTION public.aqo_clear_hist(hash int) +CREATE OR REPLACE FUNCTION public.aqo_clear_hist(hash bigint) RETURNS VOID AS $func$ DELETE FROM public.aqo_data WHERE fspace_hash=$1; @@ -96,7 +96,7 @@ SELECT query_hash FROM public.aqo_query_stat aqs WHERE -1 = ANY (cardinality_error_with_aqo::double precision[]); $func$ LANGUAGE SQL; -CREATE OR REPLACE FUNCTION public.aqo_drop(hash int) +CREATE OR REPLACE FUNCTION public.aqo_drop(hash bigint) RETURNS VOID AS $func$ DELETE FROM public.aqo_queries aq WHERE (aq.query_hash = $1); diff --git a/aqo--1.2--1.3.sql b/aqo--1.2--1.3.sql new file mode 100755 index 00000000..f8bd3e49 --- /dev/null +++ b/aqo--1.2--1.3.sql @@ -0,0 +1,138 @@ +ALTER TABLE public.aqo_data ADD COLUMN oids OID [] DEFAULT NULL; + +-- +-- Remove data, related to previously dropped tables, from the AQO tables. +-- +CREATE OR REPLACE FUNCTION public.clean_aqo_data() RETURNS void AS $$ +DECLARE + aqo_data_row aqo_data%ROWTYPE; + aqo_queries_row aqo_queries%ROWTYPE; + aqo_query_texts_row aqo_query_texts%ROWTYPE; + aqo_query_stat_row aqo_query_stat%ROWTYPE; + oid_var oid; + fspace_hash_var bigint; + delete_row boolean DEFAULT false; +BEGIN + RAISE NOTICE 'Cleaning aqo_data records'; + + FOR aqo_data_row IN (SELECT * FROM aqo_data) + LOOP + delete_row = false; + SELECT aqo_data_row.fspace_hash INTO fspace_hash_var FROM aqo_data; + + IF (aqo_data_row.oids IS NOT NULL) THEN + FOREACH oid_var IN ARRAY aqo_data_row.oids + LOOP + IF NOT EXISTS (SELECT relname FROM pg_class WHERE oid = oid_var) THEN + delete_row = true; + END IF; + END LOOP; + END IF; + + FOR aqo_queries_row IN (SELECT * FROM aqo_queries) + LOOP + IF (delete_row = true AND fspace_hash_var <> 0 AND + fspace_hash_var = aqo_queries_row.fspace_hash AND + aqo_queries_row.fspace_hash = aqo_queries_row.query_hash) THEN + DELETE FROM aqo_data WHERE aqo_data = aqo_data_row; + DELETE FROM aqo_queries WHERE aqo_queries = aqo_queries_row; + + FOR aqo_query_texts_row IN (SELECT * FROM aqo_query_texts) + LOOP + DELETE FROM aqo_query_texts + WHERE aqo_query_texts_row.query_hash = fspace_hash_var AND + aqo_query_texts = aqo_query_texts_row; + END LOOP; + + FOR aqo_query_stat_row IN (SELECT * FROM aqo_query_stat) + LOOP + DELETE FROM aqo_query_stat + WHERE aqo_query_stat_row.query_hash = fspace_hash_var AND + aqo_query_stat = aqo_query_stat_row; + END LOOP; + END IF; + END LOOP; + END LOOP; +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION array_avg(arr double precision[]) RETURNS double precision as $$ +BEGIN + RETURN (SELECT AVG(a) FROM UNNEST(arr) AS a); +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION array_mse(arr double precision[]) RETURNS double precision as $$ +DECLARE + mean double precision; +BEGIN + mean = array_avg(arr); + RETURN (SELECT AVG(POWER(a - mean, 2)) FROM UNNEST(arr) AS a); +END; +$$ LANGUAGE plpgsql; + + +-- +-- Show top N of 'bad' queries. +-- +-- The AQO extension must be installed, but disabled. +-- Strictly speaking, these functions shows 'query classes' that includes all +-- queries of the same structure. An query example of a class can be found in the +-- aqo_query_texts table. +-- This functions can be used to gentle search of 'bad' queries. User must set: +-- aqo.mode = 'disabled' +-- aqo.force_collect_stat = 'on' +-- + +-- +-- Top of queries with the highest value of execution time. +-- +CREATE OR REPLACE FUNCTION public.top_time_queries(n int) + RETURNS TABLE(num bigint, + fspace_hash bigint, + query_hash bigint, + execution_time float, + deviation float + ) +AS $$ +BEGIN + RAISE NOTICE 'Top % execution time queries', n; + RETURN QUERY + SELECT row_number() OVER(ORDER BY execution_time_without_aqo DESC) num, + aqo_queries.fspace_hash, + aqo_queries.query_hash, + to_char(array_avg(execution_time_without_aqo), '9.99EEEE')::float, + to_char(array_mse(execution_time_without_aqo), '9.99EEEE')::float + FROM aqo_queries INNER JOIN aqo_query_stat + ON aqo_queries.query_hash = aqo_query_stat.query_hash + GROUP BY (execution_time_without_aqo, aqo_queries.fspace_hash, aqo_queries.query_hash) + ORDER BY execution_time DESC LIMIT n; +END; +$$ LANGUAGE plpgsql; + +-- +-- Top of queries with largest value of total cardinality error. +-- +CREATE OR REPLACE FUNCTION public.top_error_queries(n int) + RETURNS TABLE(num bigint, + fspace_hash bigint, + query_hash bigint, + error float, + deviation float + ) +AS $$ +BEGIN + RAISE NOTICE 'Top % cardinality error queries', n; + RETURN QUERY + SELECT row_number() OVER (ORDER BY cardinality_error_without_aqo DESC) num, + aqo_queries.fspace_hash, + aqo_queries.query_hash, + to_char(array_avg(cardinality_error_without_aqo), '9.99EEEE')::float, + to_char(array_mse(cardinality_error_without_aqo), '9.99EEEE')::float + FROM aqo_queries INNER JOIN aqo_query_stat + ON aqo_queries.query_hash = aqo_query_stat.query_hash + GROUP BY (cardinality_error_without_aqo, aqo_queries.fspace_hash, aqo_queries.query_hash) + ORDER BY error DESC LIMIT n; +END; +$$ LANGUAGE plpgsql; + diff --git a/aqo--1.2.sql b/aqo--1.2.sql index 3d96f0cc..7e3abf4a 100644 --- a/aqo--1.2.sql +++ b/aqo--1.2.sql @@ -2,20 +2,20 @@ \echo Use "CREATE EXTENSION aqo" to load this file. \quit CREATE TABLE public.aqo_queries ( - query_hash int CONSTRAINT aqo_queries_query_hash_idx PRIMARY KEY, + query_hash bigint CONSTRAINT aqo_queries_query_hash_idx PRIMARY KEY, learn_aqo boolean NOT NULL, use_aqo boolean NOT NULL, - fspace_hash int NOT NULL, + fspace_hash bigint NOT NULL, auto_tuning boolean NOT NULL ); CREATE TABLE public.aqo_query_texts ( - query_hash int CONSTRAINT aqo_query_texts_query_hash_idx PRIMARY KEY REFERENCES public.aqo_queries ON DELETE CASCADE, + query_hash bigint CONSTRAINT aqo_query_texts_query_hash_idx PRIMARY KEY REFERENCES public.aqo_queries ON DELETE CASCADE, query_text text NOT NULL ); CREATE TABLE public.aqo_query_stat ( - query_hash int CONSTRAINT aqo_query_stat_idx PRIMARY KEY REFERENCES public.aqo_queries ON DELETE CASCADE, + query_hash bigint CONSTRAINT aqo_query_stat_idx PRIMARY KEY REFERENCES public.aqo_queries ON DELETE CASCADE, execution_time_with_aqo double precision[], execution_time_without_aqo double precision[], planning_time_with_aqo double precision[], @@ -27,7 +27,7 @@ CREATE TABLE public.aqo_query_stat ( ); CREATE TABLE public.aqo_data ( - fspace_hash int NOT NULL REFERENCES public.aqo_queries ON DELETE CASCADE, + fspace_hash bigint NOT NULL REFERENCES public.aqo_queries ON DELETE CASCADE, fsspace_hash int NOT NULL, nfeatures int NOT NULL, features double precision[][], @@ -52,12 +52,12 @@ CREATE TRIGGER aqo_queries_invalidate AFTER UPDATE OR DELETE OR TRUNCATE -- -- Show query state at the AQO knowledge base -CREATE FUNCTION public.aqo_status(hash int) +CREATE FUNCTION public.aqo_status(hash bigint) RETURNS TABLE ( "learn" BOOL, "use aqo" BOOL, "auto tune" BOOL, - "fspace hash" INT, + "fspace hash" bigINT, "t_naqo" TEXT, "err_naqo" TEXT, "iters" BIGINT, @@ -87,7 +87,7 @@ WHERE (aqs.query_hash = aq.query_hash) AND aqs.query_hash = $1; $func$ LANGUAGE SQL; -CREATE FUNCTION public.aqo_enable_query(hash int) +CREATE FUNCTION public.aqo_enable_query(hash bigint) RETURNS VOID AS $func$ UPDATE public.aqo_queries SET @@ -96,7 +96,7 @@ UPDATE public.aqo_queries SET WHERE query_hash = $1; $func$ LANGUAGE SQL; -CREATE FUNCTION public.aqo_disable_query(hash int) +CREATE FUNCTION public.aqo_disable_query(hash bigint) RETURNS VOID AS $func$ UPDATE public.aqo_queries SET @@ -106,7 +106,7 @@ UPDATE public.aqo_queries SET WHERE query_hash = $1; $func$ LANGUAGE SQL; -CREATE FUNCTION public.aqo_clear_hist(hash int) +CREATE FUNCTION public.aqo_clear_hist(hash bigint) RETURNS VOID AS $func$ DELETE FROM public.aqo_data WHERE fspace_hash=$1; @@ -120,7 +120,7 @@ SELECT query_hash FROM public.aqo_query_stat aqs WHERE -1 = ANY (cardinality_error_with_aqo::double precision[]); $func$ LANGUAGE SQL; -CREATE FUNCTION public.aqo_drop(hash int) +CREATE FUNCTION public.aqo_drop(hash bigint) RETURNS VOID AS $func$ DELETE FROM public.aqo_queries aq WHERE (aq.query_hash = $1); diff --git a/aqo.c b/aqo.c index 374895dc..e36e8e5d 100644 --- a/aqo.c +++ b/aqo.c @@ -8,12 +8,20 @@ * aqo/aqo.c */ -#include "aqo.h" +#include "postgres.h" #include "access/relation.h" #include "access/table.h" #include "catalog/pg_extension.h" #include "commands/extension.h" +#include "miscadmin.h" +#include "utils/selfuncs.h" + +#include "aqo.h" +#include "cardinality_hooks.h" +#include "path_utils.h" +#include "preprocessing.h" + PG_MODULE_MAGIC; @@ -23,6 +31,8 @@ void _PG_init(void); /* Strategy of determining feature space for new queries. */ int aqo_mode; +bool aqo_enabled = false; /* Signals that CREATE EXTENSION have executed and + all extension tables is ready for use. */ bool force_collect_stat; /* @@ -89,8 +99,6 @@ QueryContextData query_context; /* Additional plan info */ int njoins; -char *query_text = NULL; - /* Saved hook values */ post_parse_analyze_hook_type prev_post_parse_analyze_hook; planner_hook_type prev_planner_hook; @@ -101,7 +109,7 @@ set_baserel_rows_estimate_hook_type prev_set_baserel_rows_estimate_hook; get_parameterized_baserel_size_hook_type prev_get_parameterized_baserel_size_hook; set_joinrel_size_estimates_hook_type prev_set_joinrel_size_estimates_hook; get_parameterized_joinrel_size_hook_type prev_get_parameterized_joinrel_size_hook; -copy_generic_path_info_hook_type prev_copy_generic_path_info_hook; +create_plan_hook_type prev_create_plan_hook; ExplainOnePlan_hook_type prev_ExplainOnePlan_hook; ExplainOneNode_hook_type prev_ExplainOneNode_hook; @@ -120,15 +128,9 @@ aqo_free_callback(ResourceReleasePhase phase, if (phase != RESOURCE_RELEASE_AFTER_LOCKS) return; - if (query_text != NULL) - { - pfree(query_text); - query_text = NULL; - } - if (isTopLevel) { - list_free(cur_classes); + list_free_deep(cur_classes); cur_classes = NIL; } } @@ -136,6 +138,16 @@ aqo_free_callback(ResourceReleasePhase phase, void _PG_init(void) { + /* + * In order to create our shared memory area, we have to be loaded via + * shared_preload_libraries. If not, report an ERROR. + */ + if (!process_shared_preload_libraries_in_progress) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("AQO module could be loaded only on startup."), + errdetail("Add 'aqo' into the shared_preload_libraries list."))); + DefineCustomEnumVariable("aqo.mode", "Mode of aqo usage.", NULL, @@ -190,12 +202,12 @@ _PG_init(void) prev_planner_hook = planner_hook; planner_hook = aqo_planner; - prev_post_parse_analyze_hook = post_parse_analyze_hook; - post_parse_analyze_hook = get_query_text; prev_ExecutorStart_hook = ExecutorStart_hook; ExecutorStart_hook = aqo_ExecutorStart; prev_ExecutorEnd_hook = ExecutorEnd_hook; ExecutorEnd_hook = aqo_ExecutorEnd; + + /* Cardinality prediction hooks. */ prev_set_baserel_rows_estimate_hook = set_baserel_rows_estimate_hook; set_foreign_rows_estimate_hook = aqo_set_baserel_rows_estimate; set_baserel_rows_estimate_hook = aqo_set_baserel_rows_estimate; @@ -205,13 +217,21 @@ _PG_init(void) set_joinrel_size_estimates_hook = aqo_set_joinrel_size_estimates; prev_get_parameterized_joinrel_size_hook = get_parameterized_joinrel_size_hook; get_parameterized_joinrel_size_hook = aqo_get_parameterized_joinrel_size; - prev_copy_generic_path_info_hook = copy_generic_path_info_hook; - copy_generic_path_info_hook = aqo_copy_generic_path_info; + prev_estimate_num_groups_hook = estimate_num_groups_hook; + estimate_num_groups_hook = aqo_estimate_num_groups_hook; + parampathinfo_postinit_hook = ppi_hook; + + prev_create_plan_hook = create_plan_hook; + create_plan_hook = aqo_create_plan_hook; + + /* Service hooks. */ prev_ExplainOnePlan_hook = ExplainOnePlan_hook; ExplainOnePlan_hook = print_into_explain; prev_ExplainOneNode_hook = ExplainOneNode_hook; ExplainOneNode_hook = print_node_explain; - parampathinfo_postinit_hook = ppi_hook; + + prev_create_upper_paths_hook = create_upper_paths_hook; + create_upper_paths_hook = aqo_store_upper_signature_hook; init_deactivated_queries_storage(); AQOMemoryContext = AllocSetContextCreate(TopMemoryContext, @@ -221,6 +241,7 @@ _PG_init(void) "AQO_cache_mem_ctx", ALLOCSET_DEFAULT_SIZES); RegisterResourceReleaseCallback(aqo_free_callback, NULL); + RegisterAQOPlanNodeMethods(); } PG_FUNCTION_INFO_V1(invalidate_deactivated_queries_cache); @@ -296,3 +317,19 @@ init_lock_tag(LOCKTAG *tag, uint32 key1, uint32 key2) tag->locktag_type = LOCKTAG_USERLOCK; tag->locktag_lockmethodid = USER_LOCKMETHOD; } + +/* + * AQO is really needed for any activity? + */ +bool +IsQueryDisabled(void) +{ + if (!query_context.learn_aqo && !query_context.use_aqo && + !query_context.auto_tuning && !query_context.collect_stat && + !query_context.adding_query && !query_context.explain_only && + INSTR_TIME_IS_ZERO(query_context.start_planning_time) && + query_context.planning_time < 0.) + return true; + + return false; +} diff --git a/aqo.control b/aqo.control index 8edc5fc7..14bb3b50 100644 --- a/aqo.control +++ b/aqo.control @@ -1,5 +1,5 @@ # AQO extension comment = 'machine learning for cardinality estimation in optimizer' -default_version = '1.2' +default_version = '1.3' module_pathname = '$libdir/aqo' relocatable = false diff --git a/aqo.h b/aqo.h index 99e9e1d4..04f18994 100644 --- a/aqo.h +++ b/aqo.h @@ -116,10 +116,6 @@ #include -#include "postgres.h" - -#include "fmgr.h" - #include "access/hash.h" #include "access/htup_details.h" #include "access/xact.h" @@ -135,7 +131,6 @@ #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "optimizer/pathnode.h" -#include "optimizer/planmain.h" #include "optimizer/planner.h" #include "optimizer/cost.h" #include "parser/analyze.h" @@ -173,6 +168,7 @@ typedef enum } AQO_MODE; extern int aqo_mode; +extern bool aqo_enabled; extern bool force_collect_stat; extern bool aqo_show_hash; extern bool aqo_show_details; @@ -206,18 +202,26 @@ typedef struct /* Parameters for current query */ typedef struct QueryContextData { - int query_hash; + uint64 query_hash; + uint64 fspace_hash; bool learn_aqo; bool use_aqo; - int fspace_hash; bool auto_tuning; bool collect_stat; bool adding_query; bool explain_only; - /* Query execution time */ - instr_time query_starttime; - double query_planning_time; + /* + * Timestamp of start of query planning process. Must be zeroed on execution + * start or in the case of ERROR. Query context is stored in an query env + * field. So, if query has a cached plan, a planning step could be skipped + * by an optimizer. We should realize it at an execution stage by zero value + * of this field. + */ + instr_time start_planning_time; + + instr_time start_execution_time; + double planning_time; } QueryContextData; extern double predicted_ppi_rows; @@ -245,7 +249,6 @@ extern double log_selectivity_lower_bound; /* Parameters for current query */ extern QueryContextData query_context; extern int njoins; -extern char *query_text; /* Memory context for long-live data */ extern MemoryContext AQOMemoryContext; @@ -266,85 +269,44 @@ extern set_joinrel_size_estimates_hook_type prev_set_joinrel_size_estimates_hook; extern get_parameterized_joinrel_size_hook_type prev_get_parameterized_joinrel_size_hook; -extern copy_generic_path_info_hook_type prev_copy_generic_path_info_hook; extern ExplainOnePlan_hook_type prev_ExplainOnePlan_hook; extern ExplainOneNode_hook_type prev_ExplainOneNode_hook; extern void ppi_hook(ParamPathInfo *ppi); /* Hash functions */ -int get_query_hash(Query *parse, const char *query_text); -extern int get_fss_for_object(List *clauselist, List *selectivities, - List *relidslist, int *nfeatures, - double **features); void get_eclasses(List *clauselist, int *nargs, int **args_hash, int **eclass_hash); int get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash); /* Storage interaction */ -extern bool find_query(int qhash, Datum *search_values, bool *search_nulls); -extern bool update_query(int qhash, int fhash, +extern bool find_query(uint64 qhash, Datum *search_values, bool *search_nulls); +extern bool update_query(uint64 qhash, uint64 fhash, bool learn_aqo, bool use_aqo, bool auto_tuning); -extern bool add_query_text(int query_hash); -extern bool load_fss(int fhash, int fss_hash, - int ncols, double **matrix, double *targets, int *rows); -extern bool update_fss(int fhash, int fss_hash, int nrows, int ncols, - double **matrix, double *targets); -QueryStat *get_aqo_stat(int query_hash); -void update_aqo_stat(int query_hash, QueryStat * stat); +extern bool add_query_text(uint64 query_hash, const char *query_string); +extern bool load_fss(uint64 fhash, int fss_hash, + int ncols, double **matrix, double *targets, int *rows, + List **relids); +extern bool update_fss(uint64 fhash, int fss_hash, int nrows, int ncols, + double **matrix, double *targets, List *relids); +QueryStat *get_aqo_stat(uint64 query_hash); +void update_aqo_stat(uint64 query_hash, QueryStat * stat); extern bool my_index_insert(Relation indexRelation, Datum *values, bool *isnull, ItemPointer heap_t_ctid, Relation heapRelation, IndexUniqueCheck checkUnique); void init_deactivated_queries_storage(void); void fini_deactivated_queries_storage(void); -bool query_is_deactivated(int query_hash); -void add_deactivated_query(int query_hash); +extern bool query_is_deactivated(uint64 query_hash); +extern void add_deactivated_query(uint64 query_hash); /* Query preprocessing hooks */ -extern void get_query_text(ParseState *pstate, Query *query); -extern PlannedStmt *call_default_planner(Query *parse, - const char *query_string, - int cursorOptions, - ParamListInfo boundParams); -extern PlannedStmt *aqo_planner(Query *parse, - const char *query_string, - int cursorOptions, - ParamListInfo boundParams); extern void print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, const char *queryString, ParamListInfo params, const instr_time *planduration, QueryEnvironment *queryEnv); -extern void print_node_explain(ExplainState *es, PlanState *ps, Plan *plan, - double rows); -extern void disable_aqo_for_query(void); - -/* Cardinality estimation hooks */ -extern void aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel); -double aqo_get_parameterized_baserel_size(PlannerInfo *root, - RelOptInfo *rel, - List *param_clauses); -void aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, - RelOptInfo *outer_rel, - RelOptInfo *inner_rel, - SpecialJoinInfo *sjinfo, - List *restrictlist); -double aqo_get_parameterized_joinrel_size(PlannerInfo *root, - RelOptInfo *rel, - Path *outer_path, - Path *inner_path, - SpecialJoinInfo *sjinfo, - List *restrict_clauses); - -/* Extracting path information utilities */ -List *get_selectivities(PlannerInfo *root, - List *clauses, - int varRelid, - JoinType jointype, - SpecialJoinInfo *sjinfo); -List *get_list_of_relids(PlannerInfo *root, Relids relids); -List *get_path_clauses(Path *path, PlannerInfo *root, List **selectivities); +extern void print_node_explain(ExplainState *es, PlanState *ps, Plan *plan); /* Cardinality estimation */ double predict_for_relation(List *restrict_clauses, List *selectivities, @@ -352,7 +314,6 @@ double predict_for_relation(List *restrict_clauses, List *selectivities, /* Query execution statistics collecting hooks */ void aqo_ExecutorStart(QueryDesc *queryDesc, int eflags); -void aqo_copy_generic_path_info(PlannerInfo *root, Plan *dest, Path *src); void aqo_ExecutorEnd(QueryDesc *queryDesc); /* Machine learning techniques */ @@ -364,7 +325,7 @@ extern int OkNNr_learn(int matrix_rows, int matrix_cols, double *features, double target); /* Automatic query tuning */ -extern void automatical_query_tuning(int query_hash, QueryStat * stat); +extern void automatical_query_tuning(uint64 query_hash, QueryStat * stat); /* Utilities */ int int_cmp(const void *a, const void *b); @@ -384,6 +345,7 @@ extern void selectivity_cache_clear(void); extern Oid get_aqo_schema(void); extern void init_lock_tag(LOCKTAG *tag, uint32 key1, uint32 key2); +extern bool IsQueryDisabled(void); extern List *cur_classes; #endif diff --git a/aqo_pg13.patch b/aqo_pg13.patch index 106fd659..afb43aba 100644 --- a/aqo_pg13.patch +++ b/aqo_pg13.patch @@ -11,7 +11,7 @@ index 1846d415b6..95519ac11d 100644 auto_explain \ bloom \ diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c -index 20708db9f1..d4659f53a0 100644 +index bc05c96b4c..b6a3abe0d2 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -24,6 +24,7 @@ @@ -46,50 +46,74 @@ index 20708db9f1..d4659f53a0 100644 ExplainCloseGroup("Query", NULL, true, es); } -@@ -1582,6 +1593,9 @@ ExplainNode(PlanState *planstate, List *ancestors, - appendStringInfo(es->str, - " (actual rows=%.0f loops=%.0f)", - rows, nloops); -+ -+ if (ExplainOneNode_hook) -+ ExplainOneNode_hook(es, planstate, plan, rows); +@@ -1612,6 +1623,9 @@ ExplainNode(PlanState *planstate, List *ancestors, } - else - { + } + ++ if (ExplainOneNode_hook) ++ ExplainOneNode_hook(es, planstate, plan); ++ + /* in text format, first line ends here */ + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfoChar(es->str, '\n'); diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c -index 256ab54003..cfdc0247ec 100644 +index 682b28ed72..e64ea3ff46 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c -@@ -127,6 +127,12 @@ CopyPlanFields(const Plan *from, Plan *newnode) - COPY_NODE_FIELD(lefttree); - COPY_NODE_FIELD(righttree); +@@ -132,6 +132,7 @@ CopyPlanFields(const Plan *from, Plan *newnode) COPY_NODE_FIELD(initPlan); -+ COPY_SCALAR_FIELD(had_path); -+ COPY_NODE_FIELD(path_clauses); -+ COPY_NODE_FIELD(path_relids); -+ COPY_SCALAR_FIELD(path_jointype); -+ COPY_SCALAR_FIELD(path_parallel_workers); -+ COPY_SCALAR_FIELD(was_parametrized); COPY_BITMAPSET_FIELD(extParam); COPY_BITMAPSET_FIELD(allParam); ++ COPY_NODE_FIELD(private); + } + + /* +diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c +index 7237b52e96..025b4fde2b 100644 +--- a/src/backend/nodes/outfuncs.c ++++ b/src/backend/nodes/outfuncs.c +@@ -342,6 +342,7 @@ _outPlanInfo(StringInfo str, const Plan *node) + WRITE_NODE_FIELD(initPlan); + WRITE_BITMAPSET_FIELD(extParam); + WRITE_BITMAPSET_FIELD(allParam); ++ /*WRITE_NODE_FIELD(private); */ } + + /* +diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c +index 62c945b6c5..23ab51fb9b 100644 +--- a/src/backend/nodes/readfuncs.c ++++ b/src/backend/nodes/readfuncs.c +@@ -1580,6 +1580,11 @@ ReadCommonPlan(Plan *local_node) + READ_NODE_FIELD(initPlan); + READ_BITMAPSET_FIELD(extParam); + READ_BITMAPSET_FIELD(allParam); ++ local_node->private = NIL; ++ /* READ_NODE_FIELD(private); ++ * Don't serialize this field. It is required to serialize RestrictInfo and ++ * EqualenceClass. ++ */ + } + + /* diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c -index ffd2bf8783..84152b43b6 100644 +index 92b5223fee..a533c2cada 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c -@@ -97,6 +97,11 @@ - #include "utils/spccache.h" +@@ -98,6 +98,12 @@ #include "utils/tuplesort.h" + +set_baserel_rows_estimate_hook_type set_baserel_rows_estimate_hook = NULL; +set_foreign_rows_estimate_hook_type set_foreign_rows_estimate_hook = NULL; +get_parameterized_baserel_size_hook_type get_parameterized_baserel_size_hook = NULL; +get_parameterized_joinrel_size_hook_type get_parameterized_joinrel_size_hook = NULL; +set_joinrel_size_estimates_hook_type set_joinrel_size_estimates_hook = NULL; - ++ /* source-code-compatibility hacks for pull_varnos() API change */ #define pull_varnos(a,b) pull_varnos_new(a,b) -@@ -181,7 +186,6 @@ static Cost append_nonpartial_cost(List *subpaths, int numpaths, + +@@ -181,7 +187,6 @@ static Cost append_nonpartial_cost(List *subpaths, int numpaths, static void set_rel_width(PlannerInfo *root, RelOptInfo *rel); static double relation_byte_size(double tuples, int width); static double page_size(double tuples, int width); @@ -97,88 +121,7 @@ index ffd2bf8783..84152b43b6 100644 /* -@@ -259,7 +263,7 @@ cost_seqscan(Path *path, PlannerInfo *root, - /* Adjust costing for parallelism, if used. */ - if (path->parallel_workers > 0) - { -- double parallel_divisor = get_parallel_divisor(path); -+ double parallel_divisor = get_parallel_divisor(path->parallel_workers); - - /* The CPU cost is divided among all the workers. */ - cpu_run_cost /= parallel_divisor; -@@ -738,7 +742,7 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count, - /* Adjust costing for parallelism, if used. */ - if (path->path.parallel_workers > 0) - { -- double parallel_divisor = get_parallel_divisor(&path->path); -+ double parallel_divisor = get_parallel_divisor(path->path.parallel_workers); - - path->path.rows = clamp_row_est(path->path.rows / parallel_divisor); - -@@ -1019,7 +1023,7 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel, - /* Adjust costing for parallelism, if used. */ - if (path->parallel_workers > 0) - { -- double parallel_divisor = get_parallel_divisor(path); -+ double parallel_divisor = get_parallel_divisor(path->parallel_workers); - - /* The CPU cost is divided among all the workers. */ - cpu_run_cost /= parallel_divisor; -@@ -2122,7 +2126,7 @@ cost_append(AppendPath *apath) - else /* parallel-aware */ - { - int i = 0; -- double parallel_divisor = get_parallel_divisor(&apath->path); -+ double parallel_divisor = get_parallel_divisor(apath->path.parallel_workers); - - /* Parallel-aware Append never produces ordered output. */ - Assert(apath->path.pathkeys == NIL); -@@ -2156,7 +2160,7 @@ cost_append(AppendPath *apath) - { - double subpath_parallel_divisor; - -- subpath_parallel_divisor = get_parallel_divisor(subpath); -+ subpath_parallel_divisor = get_parallel_divisor(subpath->parallel_workers); - apath->path.rows += subpath->rows * (subpath_parallel_divisor / - parallel_divisor); - apath->path.total_cost += subpath->total_cost; -@@ -2755,7 +2759,7 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path, - /* For partial paths, scale row estimate. */ - if (path->path.parallel_workers > 0) - { -- double parallel_divisor = get_parallel_divisor(&path->path); -+ double parallel_divisor = get_parallel_divisor(path->path.parallel_workers); - - path->path.rows = - clamp_row_est(path->path.rows / parallel_divisor); -@@ -3203,7 +3207,7 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path, - /* For partial paths, scale row estimate. */ - if (path->jpath.path.parallel_workers > 0) - { -- double parallel_divisor = get_parallel_divisor(&path->jpath.path); -+ double parallel_divisor = get_parallel_divisor(path->jpath.path.parallel_workers); - - path->jpath.path.rows = - clamp_row_est(path->jpath.path.rows / parallel_divisor); -@@ -3537,7 +3541,7 @@ initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace, - * number, so we need to undo the division. - */ - if (parallel_hash) -- inner_path_rows_total *= get_parallel_divisor(inner_path); -+ inner_path_rows_total *= get_parallel_divisor(inner_path->parallel_workers); - - /* - * Get hash table size that executor would use for inner relation. -@@ -3634,7 +3638,7 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, - /* For partial paths, scale row estimate. */ - if (path->jpath.path.parallel_workers > 0) - { -- double parallel_divisor = get_parallel_divisor(&path->jpath.path); -+ double parallel_divisor = get_parallel_divisor(path->jpath.path.parallel_workers); - - path->jpath.path.rows = - clamp_row_est(path->jpath.path.rows / parallel_divisor); -@@ -4629,6 +4633,58 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) +@@ -4626,6 +4631,58 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) } @@ -237,7 +180,7 @@ index ffd2bf8783..84152b43b6 100644 /* * set_baserel_size_estimates * Set the size estimates for the given base relation. -@@ -4645,19 +4701,10 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) +@@ -4642,19 +4699,10 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) void set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) { @@ -258,7 +201,7 @@ index ffd2bf8783..84152b43b6 100644 cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); -@@ -4668,13 +4715,33 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) +@@ -4665,13 +4713,33 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) * get_parameterized_baserel_size * Make a size estimate for a parameterized scan of a base relation. * @@ -294,7 +237,7 @@ index ffd2bf8783..84152b43b6 100644 { List *allclauses; double nrows; -@@ -4703,6 +4770,36 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, +@@ -4700,6 +4768,36 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, * set_joinrel_size_estimates * Set the size estimates for the given join relation. * @@ -331,7 +274,7 @@ index ffd2bf8783..84152b43b6 100644 * The rel's targetlist must have been constructed already, and a * restriction clause list that matches the given component rels must * be provided. -@@ -4722,11 +4819,11 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, +@@ -4719,11 +4817,11 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, * build_joinrel_tlist, and baserestrictcost is not used for join rels. */ void @@ -348,7 +291,7 @@ index ffd2bf8783..84152b43b6 100644 { rel->rows = calc_joinrel_size_estimate(root, rel, -@@ -4742,6 +4839,35 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, +@@ -4739,6 +4837,35 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, * get_parameterized_joinrel_size * Make a size estimate for a parameterized scan of a join relation. * @@ -384,7 +327,7 @@ index ffd2bf8783..84152b43b6 100644 * 'rel' is the joinrel under consideration. * 'outer_path', 'inner_path' are (probably also parameterized) Paths that * produce the relations being joined. -@@ -4754,11 +4880,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, +@@ -4751,11 +4878,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, * set_joinrel_size_estimates must have been applied already. */ double @@ -401,7 +344,7 @@ index ffd2bf8783..84152b43b6 100644 { double nrows; -@@ -5427,7 +5553,7 @@ set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel) +@@ -5424,7 +5551,7 @@ set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel) /* Should only be applied to base relations */ Assert(rel->relid > 0); @@ -410,459 +353,164 @@ index ffd2bf8783..84152b43b6 100644 cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); -@@ -5709,14 +5835,25 @@ page_size(double tuples, int width) - return ceil(relation_byte_size(tuples, width) / BLCKSZ); - } - -+bool -+IsParallelTuplesProcessing(const Plan *plan) -+{ -+ if (plan->path_parallel_workers > 0 && ( -+ plan->parallel_aware || nodeTag(plan) == T_HashJoin || -+ nodeTag(plan) == T_MergeJoin || -+ nodeTag(plan) == T_NestLoop)) -+ return true; -+ return false; -+} -+ - /* +@@ -5710,7 +5837,7 @@ page_size(double tuples, int width) * Estimate the fraction of the work that each worker will do given the * number of workers budgeted for the path. */ -static double --get_parallel_divisor(Path *path) +double -+get_parallel_divisor(int parallel_workers) + get_parallel_divisor(Path *path) { -- double parallel_divisor = path->parallel_workers; -+ double parallel_divisor = parallel_workers; - - /* - * Early experience with parallel query suggests that when there is only -@@ -5733,7 +5870,7 @@ get_parallel_divisor(Path *path) - { - double leader_contribution; - -- leader_contribution = 1.0 - (0.3 * path->parallel_workers); -+ leader_contribution = 1.0 - (0.3 * parallel_workers); - if (leader_contribution > 0) - parallel_divisor += leader_contribution; - } + double parallel_divisor = path->parallel_workers; diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c -index 7bf1751e93..e0bbf583fb 100644 +index e445debe57..365b7aa319 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c -@@ -70,6 +70,8 @@ +@@ -70,6 +70,7 @@ #define CP_LABEL_TLIST 0x0004 /* tlist must contain sortgrouprefs */ #define CP_IGNORE_TLIST 0x0008 /* caller will replace tlist */ -+/* Hook for plugins to get control in creating plan from path */ -+copy_generic_path_info_hook_type copy_generic_path_info_hook = NULL; ++create_plan_hook_type create_plan_hook = NULL; static Plan *create_plan_recurse(PlannerInfo *root, Path *best_path, int flags); -@@ -166,7 +168,7 @@ static Node *fix_indexqual_clause(PlannerInfo *root, - static Node *fix_indexqual_operand(Node *node, IndexOptInfo *index, int indexcol); - static List *get_switched_clauses(List *clauses, Relids outerrelids); - static List *order_qual_clauses(PlannerInfo *root, List *clauses); --static void copy_generic_path_info(Plan *dest, Path *src); -+static void copy_generic_path_info(PlannerInfo *root, Plan *dest, Path *src); - static void copy_plan_costsize(Plan *dest, Plan *src); - static void label_sort_with_costsize(PlannerInfo *root, Sort *plan, - double limit_tuples); -@@ -1107,7 +1109,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) - false)), - NULL); - -- copy_generic_path_info(plan, (Path *) best_path); -+ copy_generic_path_info(root, plan, (Path *) best_path); - - return plan; +@@ -524,6 +525,10 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags) + break; } -@@ -1255,7 +1257,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) - plan->first_partial_plan = best_path->first_partial_path; - plan->part_prune_info = partpruneinfo; - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, &best_path->path); - - /* - * If prepare_sort_from_pathkeys added sort columns, but we were told to -@@ -1301,7 +1303,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path, - * prepare_sort_from_pathkeys on it before we do so on the individual - * child plans, to make cross-checking the sort info easier. - */ -- copy_generic_path_info(plan, (Path *) best_path); -+ copy_generic_path_info(root, plan, &best_path->path); - plan->targetlist = tlist; - plan->qual = NIL; - plan->lefttree = NULL; -@@ -1455,7 +1457,7 @@ create_group_result_plan(PlannerInfo *root, GroupResultPath *best_path) - - plan = make_result(tlist, (Node *) quals, NULL); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, &best_path->path); - - return plan; - } -@@ -1480,7 +1482,7 @@ create_project_set_plan(PlannerInfo *root, ProjectSetPath *best_path) - - plan = make_project_set(tlist, subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, &best_path->path); - - return plan; - } -@@ -1508,7 +1510,7 @@ create_material_plan(PlannerInfo *root, MaterialPath *best_path, int flags) - - plan = make_material(subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, &best_path->path); - - return plan; - } -@@ -1708,7 +1710,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) - } - - /* Copy cost data from Path to Plan */ -- copy_generic_path_info(plan, &best_path->path); -+ copy_generic_path_info(root, plan, &best_path->path); - - return plan; - } -@@ -1741,7 +1743,7 @@ create_gather_plan(PlannerInfo *root, GatherPath *best_path) - best_path->single_copy, - subplan); - -- copy_generic_path_info(&gather_plan->plan, &best_path->path); -+ copy_generic_path_info(root, &gather_plan->plan, &best_path->path); - - /* use parallel mode for parallel plans. */ - root->glob->parallelModeNeeded = true; -@@ -1770,7 +1772,7 @@ create_gather_merge_plan(PlannerInfo *root, GatherMergePath *best_path) - gm_plan = makeNode(GatherMerge); - gm_plan->plan.targetlist = tlist; - gm_plan->num_workers = best_path->num_workers; -- copy_generic_path_info(&gm_plan->plan, &best_path->path); -+ copy_generic_path_info(root, &gm_plan->plan, &best_path->path); - - /* Assign the rescan Param. */ - gm_plan->rescan_param = assign_special_exec_param(root); -@@ -1899,7 +1901,7 @@ create_projection_plan(PlannerInfo *root, ProjectionPath *best_path, int flags) - /* We need a Result node */ - plan = (Plan *) make_result(tlist, NULL, subplan); - -- copy_generic_path_info(plan, (Path *) best_path); -+ copy_generic_path_info(root, plan, (Path *) best_path); - } - - return plan; -@@ -2000,7 +2002,7 @@ create_sort_plan(PlannerInfo *root, SortPath *best_path, int flags) - IS_OTHER_REL(best_path->subpath->parent) ? - best_path->path.parent->relids : NULL); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2026,7 +2028,7 @@ create_incrementalsort_plan(PlannerInfo *root, IncrementalSortPath *best_path, - best_path->spath.path.parent->relids : NULL, - best_path->nPresortedCols); - -- copy_generic_path_info(&plan->sort.plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->sort.plan, (Path *) best_path); - - return plan; - } -@@ -2065,7 +2067,7 @@ create_group_plan(PlannerInfo *root, GroupPath *best_path) - subplan->targetlist), - subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2093,7 +2095,7 @@ create_upper_unique_plan(PlannerInfo *root, UpperUniquePath *best_path, int flag - best_path->path.pathkeys, - best_path->numkeys); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2137,7 +2139,7 @@ create_agg_plan(PlannerInfo *root, AggPath *best_path) - best_path->transitionSpace, - subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2339,7 +2341,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) - subplan); - - /* Copy cost data from Path to Plan */ -- copy_generic_path_info(&plan->plan, &best_path->path); -+ copy_generic_path_info(root, &plan->plan, &best_path->path); - } - - return (Plan *) plan; -@@ -2397,7 +2399,7 @@ create_minmaxagg_plan(PlannerInfo *root, MinMaxAggPath *best_path) - - plan = make_result(tlist, (Node *) best_path->quals, NULL); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - /* - * During setrefs.c, we'll need to replace references to the Agg nodes -@@ -2516,7 +2518,7 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) - wc->inRangeNullsFirst, - subplan); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2552,7 +2554,7 @@ create_setop_plan(PlannerInfo *root, SetOpPath *best_path, int flags) - best_path->firstFlag, - numGroups); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2588,7 +2590,7 @@ create_recursiveunion_plan(PlannerInfo *root, RecursiveUnionPath *best_path) - best_path->distinctList, - numGroups); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2611,7 +2613,7 @@ create_lockrows_plan(PlannerInfo *root, LockRowsPath *best_path, - - plan = make_lockrows(subplan, best_path->rowMarks, best_path->epqParam); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); - - return plan; - } -@@ -2672,7 +2674,7 @@ create_modifytable_plan(PlannerInfo *root, ModifyTablePath *best_path) - best_path->onconflict, - best_path->epqParam); - -- copy_generic_path_info(&plan->plan, &best_path->path); -+ copy_generic_path_info(root, &plan->plan, &best_path->path); - - return plan; - } -@@ -2726,7 +2728,7 @@ create_limit_plan(PlannerInfo *root, LimitPath *best_path, int flags) - best_path->limitOption, - numUniqkeys, uniqColIdx, uniqOperators, uniqCollations); - -- copy_generic_path_info(&plan->plan, (Path *) best_path); -+ copy_generic_path_info(root, &plan->plan, (Path *) best_path); ++ if (create_plan_hook) ++ /* Give an extension a chance to do something */ ++ (*create_plan_hook)(root, best_path, &plan); ++ return plan; } -@@ -2772,7 +2774,7 @@ create_seqscan_plan(PlannerInfo *root, Path *best_path, - scan_clauses, - scan_relid); - -- copy_generic_path_info(&scan_plan->plan, best_path); -+ copy_generic_path_info(root, &scan_plan->plan, best_path); - - return scan_plan; - } -@@ -2818,7 +2820,7 @@ create_samplescan_plan(PlannerInfo *root, Path *best_path, - scan_relid, - tsc); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -2996,7 +2998,7 @@ create_indexscan_plan(PlannerInfo *root, - indexorderbyops, - best_path->indexscandir); - -- copy_generic_path_info(&scan_plan->plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->plan, &best_path->path); - - return scan_plan; - } -@@ -3111,7 +3113,7 @@ create_bitmap_scan_plan(PlannerInfo *root, - bitmapqualorig, - baserelid); - -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - return scan_plan; - } -@@ -3431,7 +3433,7 @@ create_tidscan_plan(PlannerInfo *root, TidPath *best_path, - scan_relid, - tidquals); - -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - return scan_plan; - } -@@ -3481,7 +3483,7 @@ create_subqueryscan_plan(PlannerInfo *root, SubqueryScanPath *best_path, - scan_relid, - subplan); - -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - return scan_plan; - } -@@ -3524,7 +3526,7 @@ create_functionscan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_functionscan(tlist, scan_clauses, scan_relid, - functions, rte->funcordinality); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3567,7 +3569,7 @@ create_tablefuncscan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_tablefuncscan(tlist, scan_clauses, scan_relid, - tablefunc); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3611,7 +3613,7 @@ create_valuesscan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_valuesscan(tlist, scan_clauses, scan_relid, - values_lists); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3704,7 +3706,7 @@ create_ctescan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_ctescan(tlist, scan_clauses, scan_relid, - plan_id, cte_param_id); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3743,7 +3745,7 @@ create_namedtuplestorescan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_namedtuplestorescan(tlist, scan_clauses, scan_relid, - rte->enrname); - -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3781,7 +3783,7 @@ create_resultscan_plan(PlannerInfo *root, Path *best_path, - - scan_plan = make_result(tlist, (Node *) scan_clauses, NULL); - -- copy_generic_path_info(&scan_plan->plan, best_path); -+ copy_generic_path_info(root, &scan_plan->plan, best_path); - - return scan_plan; - } -@@ -3841,7 +3843,7 @@ create_worktablescan_plan(PlannerInfo *root, Path *best_path, - scan_plan = make_worktablescan(tlist, scan_clauses, scan_relid, - cteroot->wt_param_id); -- copy_generic_path_info(&scan_plan->scan.plan, best_path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, best_path); - - return scan_plan; - } -@@ -3901,7 +3903,7 @@ create_foreignscan_plan(PlannerInfo *root, ForeignPath *best_path, - outer_plan); - - /* Copy cost data from Path to Plan; no need to make FDW do this */ -- copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &scan_plan->scan.plan, &best_path->path); - - /* Copy foreign server OID; likewise, no need to make FDW do this */ - scan_plan->fs_server = rel->serverid; -@@ -4035,7 +4037,7 @@ create_customscan_plan(PlannerInfo *root, CustomPath *best_path, - * Copy cost data from Path to Plan; no need to make custom-plan providers - * do this - */ -- copy_generic_path_info(&cplan->scan.plan, &best_path->path); -+ copy_generic_path_info(root, &cplan->scan.plan, &best_path->path); - - /* Likewise, copy the relids that are represented by this custom scan */ - cplan->custom_relids = best_path->path.parent->relids; -@@ -4137,7 +4139,7 @@ create_nestloop_plan(PlannerInfo *root, - best_path->jointype, - best_path->inner_unique); - -- copy_generic_path_info(&join_plan->join.plan, &best_path->path); -+ copy_generic_path_info(root, &join_plan->join.plan, &best_path->path); - - return join_plan; - } -@@ -4444,7 +4446,7 @@ create_mergejoin_plan(PlannerInfo *root, - best_path->skip_mark_restore); - - /* Costs of sort and material steps are included in path cost already */ -- copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path); -+ copy_generic_path_info(root, &join_plan->join.plan, &best_path->jpath.path); - - return join_plan; - } -@@ -4617,7 +4619,7 @@ create_hashjoin_plan(PlannerInfo *root, - best_path->jpath.jointype, - best_path->jpath.inner_unique); - -- copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path); -+ copy_generic_path_info(root, &join_plan->join.plan, &best_path->jpath.path); - - return join_plan; - } -@@ -5117,7 +5119,7 @@ order_qual_clauses(PlannerInfo *root, List *clauses) - * Also copy the parallel-related flags, which the executor *will* use. - */ - static void --copy_generic_path_info(Plan *dest, Path *src) -+copy_generic_path_info(PlannerInfo *root, Plan *dest, Path *src) - { - dest->startup_cost = src->startup_cost; - dest->total_cost = src->total_cost; -@@ -5125,6 +5127,9 @@ copy_generic_path_info(Plan *dest, Path *src) +@@ -5162,6 +5167,7 @@ copy_generic_path_info(Plan *dest, Path *src) dest->plan_width = src->pathtarget->width; dest->parallel_aware = src->parallel_aware; dest->parallel_safe = src->parallel_safe; -+ -+ if (copy_generic_path_info_hook) -+ (*copy_generic_path_info_hook) (root, dest, src); ++ dest->private = NIL; } /* +diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c +index 60e7fda6a9..5732c7a685 100644 +--- a/src/backend/optimizer/plan/planner.c ++++ b/src/backend/optimizer/plan/planner.c +@@ -145,7 +145,8 @@ static List *extract_rollup_sets(List *groupingSets); + static List *reorder_grouping_sets(List *groupingSets, List *sortclause); + static void standard_qp_callback(PlannerInfo *root, void *extra); + static double get_number_of_groups(PlannerInfo *root, +- double path_rows, ++ Path *subpath, ++ RelOptInfo *grouped_rel, + grouping_sets_data *gd, + List *target_list); + static RelOptInfo *create_grouping_paths(PlannerInfo *root, +@@ -3682,7 +3683,8 @@ standard_qp_callback(PlannerInfo *root, void *extra) + */ + static double + get_number_of_groups(PlannerInfo *root, +- double path_rows, ++ Path *subpath, ++ RelOptInfo *grouped_rel, + grouping_sets_data *gd, + List *target_list) + { +@@ -3719,7 +3721,7 @@ get_number_of_groups(PlannerInfo *root, + GroupingSetData *gs = lfirst_node(GroupingSetData, lc2); + double numGroups = estimate_num_groups(root, + groupExprs, +- path_rows, ++ subpath->rows, + &gset); + + gs->numGroups = numGroups; +@@ -3744,7 +3746,7 @@ get_number_of_groups(PlannerInfo *root, + GroupingSetData *gs = lfirst_node(GroupingSetData, lc2); + double numGroups = estimate_num_groups(root, + groupExprs, +- path_rows, ++ subpath->rows, + &gset); + + gs->numGroups = numGroups; +@@ -3760,8 +3762,8 @@ get_number_of_groups(PlannerInfo *root, + groupExprs = get_sortgrouplist_exprs(parse->groupClause, + target_list); + +- dNumGroups = estimate_num_groups(root, groupExprs, path_rows, +- NULL); ++ dNumGroups = estimate_num_groups_ext(root, groupExprs, subpath, ++ grouped_rel, NULL); + } + } + else if (parse->groupingSets) +@@ -4147,7 +4149,8 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, + * Estimate number of groups. + */ + dNumGroups = get_number_of_groups(root, +- cheapest_path->rows, ++ cheapest_path, ++ grouped_rel, + gd, + extra->targetList); + +@@ -6931,13 +6934,15 @@ create_partial_grouping_paths(PlannerInfo *root, + if (cheapest_total_path != NULL) + dNumPartialGroups = + get_number_of_groups(root, +- cheapest_total_path->rows, ++ cheapest_total_path, ++ partially_grouped_rel, + gd, + extra->targetList); + if (cheapest_partial_path != NULL) + dNumPartialPartialGroups = + get_number_of_groups(root, +- cheapest_partial_path->rows, ++ cheapest_partial_path, ++ partially_grouped_rel, + gd, + extra->targetList); + diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c -index a203e6f1ff..a335ede976 100644 +index a203e6f1ff..f8db135be0 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c -@@ -1264,6 +1264,7 @@ find_childrel_parents(PlannerInfo *root, RelOptInfo *rel) +@@ -258,6 +258,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) + rel->partexprs = NULL; + rel->nullable_partexprs = NULL; + rel->partitioned_child_rels = NIL; ++ rel->private = NULL; + + /* + * Pass assorted information down the inheritance hierarchy. +@@ -383,7 +384,6 @@ find_base_rel(PlannerInfo *root, int relid) + if (rel) + return rel; + } +- + elog(ERROR, "no relation entry for relid %d", relid); + + return NULL; /* keep compiler quiet */ +@@ -673,6 +673,7 @@ build_join_rel(PlannerInfo *root, + joinrel->partexprs = NULL; + joinrel->nullable_partexprs = NULL; + joinrel->partitioned_child_rels = NIL; ++ joinrel->private = NULL; + + /* Compute information relevant to the foreign relations. */ + set_foreign_rel_properties(joinrel, outer_rel, inner_rel); +@@ -851,6 +852,7 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, + joinrel->partexprs = NULL; + joinrel->nullable_partexprs = NULL; + joinrel->partitioned_child_rels = NIL; ++ joinrel->private = NULL; + + joinrel->top_parent_relids = bms_union(outer_rel->top_parent_relids, + inner_rel->top_parent_relids); +@@ -1264,6 +1266,7 @@ find_childrel_parents(PlannerInfo *root, RelOptInfo *rel) } @@ -870,7 +518,7 @@ index a203e6f1ff..a335ede976 100644 /* * get_baserel_parampathinfo * Get the ParamPathInfo for a parameterized path for a base relation, -@@ -1332,6 +1333,10 @@ get_baserel_parampathinfo(PlannerInfo *root, RelOptInfo *baserel, +@@ -1332,6 +1335,10 @@ get_baserel_parampathinfo(PlannerInfo *root, RelOptInfo *baserel, ppi->ppi_req_outer = required_outer; ppi->ppi_rows = rows; ppi->ppi_clauses = pclauses; @@ -881,7 +529,7 @@ index a203e6f1ff..a335ede976 100644 baserel->ppilist = lappend(baserel->ppilist, ppi); return ppi; -@@ -1557,6 +1562,10 @@ get_joinrel_parampathinfo(PlannerInfo *root, RelOptInfo *joinrel, +@@ -1557,6 +1564,10 @@ get_joinrel_parampathinfo(PlannerInfo *root, RelOptInfo *joinrel, ppi->ppi_req_outer = required_outer; ppi->ppi_rows = rows; ppi->ppi_clauses = NIL; @@ -892,11 +540,43 @@ index a203e6f1ff..a335ede976 100644 joinrel->ppilist = lappend(joinrel->ppilist, ppi); return ppi; +diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c +index 821844ada3..85b2482114 100644 +--- a/src/backend/utils/adt/selfuncs.c ++++ b/src/backend/utils/adt/selfuncs.c +@@ -147,6 +147,7 @@ + /* Hooks for plugins to get control when we ask for stats */ + get_relation_stats_hook_type get_relation_stats_hook = NULL; + get_index_stats_hook_type get_index_stats_hook = NULL; ++estimate_num_groups_hook_type estimate_num_groups_hook = NULL; + + static double eqsel_internal(PG_FUNCTION_ARGS, bool negate); + static double eqjoinsel_inner(Oid opfuncoid, Oid collation, +@@ -3295,6 +3296,19 @@ add_unique_group_var(PlannerInfo *root, List *varinfos, + return varinfos; + } + ++double ++estimate_num_groups_ext(PlannerInfo *root, List *groupExprs, Path *subpath, ++ RelOptInfo *grouped_rel, List **pgset) ++{ ++ double input_rows = subpath->rows; ++ ++ if (estimate_num_groups_hook != NULL) ++ return (*estimate_num_groups_hook)(root, groupExprs, subpath, ++ grouped_rel, pgset); ++ ++ return estimate_num_groups(root, groupExprs, input_rows, pgset); ++} ++ + /* + * estimate_num_groups - Estimate number of groups in a grouped query + * diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h -index ba661d32a6..74e4f7592c 100644 +index ba661d32a6..09d0abe58b 100644 --- a/src/include/commands/explain.h +++ b/src/include/commands/explain.h -@@ -75,6 +75,19 @@ extern PGDLLIMPORT ExplainOneQuery_hook_type ExplainOneQuery_hook; +@@ -75,6 +75,18 @@ extern PGDLLIMPORT ExplainOneQuery_hook_type ExplainOneQuery_hook; typedef const char *(*explain_get_index_name_hook_type) (Oid indexId); extern PGDLLIMPORT explain_get_index_name_hook_type explain_get_index_name_hook; @@ -910,14 +590,13 @@ index ba661d32a6..74e4f7592c 100644 +/* Explain a node info */ +typedef void (*ExplainOneNode_hook_type) (ExplainState *es, + PlanState *ps, -+ Plan *plan, -+ double rows); ++ Plan *plan); +extern PGDLLIMPORT ExplainOneNode_hook_type ExplainOneNode_hook; extern void ExplainQuery(ParseState *pstate, ExplainStmt *stmt, ParamListInfo params, DestReceiver *dest); diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h -index 69150e46eb..15bf1a1160 100644 +index 69150e46eb..c7361a7ef4 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -738,6 +738,10 @@ typedef struct RelOptInfo @@ -931,7 +610,16 @@ index 69150e46eb..15bf1a1160 100644 /* used for partitioned relations: */ PartitionScheme part_scheme; /* Partitioning scheme */ int nparts; /* Number of partitions; -1 if not yet set; in -@@ -1104,6 +1108,10 @@ typedef struct ParamPathInfo +@@ -753,6 +757,8 @@ typedef struct RelOptInfo + List **partexprs; /* Non-nullable partition key expressions */ + List **nullable_partexprs; /* Nullable partition key expressions */ + List *partitioned_child_rels; /* List of RT indexes */ ++ ++ List *private; + } RelOptInfo; + + /* +@@ -1104,6 +1110,10 @@ typedef struct ParamPathInfo Relids ppi_req_outer; /* rels supplying parameters used by path */ double ppi_rows; /* estimated number of result tuples */ List *ppi_clauses; /* join clauses available from outer rels */ @@ -943,31 +631,21 @@ index 69150e46eb..15bf1a1160 100644 diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h -index 83e01074ed..5f1de775ca 100644 +index 90f02ce6fd..b093dc46ce 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h -@@ -146,6 +146,19 @@ typedef struct Plan - List *initPlan; /* Init Plan nodes (un-correlated expr - * subselects) */ - -+ /* -+ * information for adaptive query optimization -+ */ -+ bool had_path; -+ List *path_clauses; -+ List *path_relids; -+ JoinType path_jointype; -+ int path_parallel_workers; -+ bool was_parametrized; -+ /* For Adaptive optimization DEBUG purposes */ -+ double predicted_cardinality; -+ int fss_hash; +@@ -159,6 +159,9 @@ typedef struct Plan + */ + Bitmapset *extParam; + Bitmapset *allParam; + - /* - * Information for management of parameter-change-driven rescanning - * ++ /* Additional field for an extension purposes. */ ++ List *private; + } Plan; + + /* ---------------- diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h -index 6141654e47..3288548af6 100644 +index 6141654e47..e6b28cbb05 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -39,6 +39,37 @@ typedef enum @@ -1043,12 +721,11 @@ index 6141654e47..3288548af6 100644 extern void set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel); extern void set_function_size_estimates(PlannerInfo *root, RelOptInfo *rel); extern void set_values_size_estimates(PlannerInfo *root, RelOptInfo *rel); -@@ -202,5 +250,7 @@ extern void set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel); +@@ -202,5 +250,6 @@ extern void set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel); extern PathTarget *set_pathtarget_cost_width(PlannerInfo *root, PathTarget *target); extern double compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual, int loop_count, Cost *cost, double *tuple); -+extern bool IsParallelTuplesProcessing(const Plan *plan); -+extern double get_parallel_divisor(int parallel_workers); ++extern double get_parallel_divisor(Path *path); #endif /* COST_H */ diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h @@ -1067,19 +744,46 @@ index 3bd7072ae8..21bbaba11c 100644 * prototypes for pathnode.c */ diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h -index 8ce60e202e..f066ca6540 100644 +index 8ce60e202e..75415102c2 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -24,6 +24,12 @@ extern double cursor_tuple_fraction; /* query_planner callback to compute query_pathkeys */ typedef void (*query_pathkeys_callback) (PlannerInfo *root, void *extra); -+/* hook for plugins to get control in creating plan from path */ -+typedef void (*copy_generic_path_info_hook_type) (PlannerInfo *root, -+ Plan *dest, Path *src); -+ -+extern PGDLLIMPORT copy_generic_path_info_hook_type copy_generic_path_info_hook; + ++/* Hook for plugins to get control in ExecutorRun() */ ++typedef void (*create_plan_hook_type) (PlannerInfo *root, ++ Path *best_path, ++ Plan **plan); ++extern PGDLLIMPORT create_plan_hook_type create_plan_hook; /* * prototypes for plan/planmain.c */ +diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h +index 7ac4a06391..def3522881 100644 +--- a/src/include/utils/selfuncs.h ++++ b/src/include/utils/selfuncs.h +@@ -127,6 +127,12 @@ typedef bool (*get_index_stats_hook_type) (PlannerInfo *root, + AttrNumber indexattnum, + VariableStatData *vardata); + extern PGDLLIMPORT get_index_stats_hook_type get_index_stats_hook; ++typedef double (*estimate_num_groups_hook_type) (PlannerInfo *root, ++ List *groupExprs, ++ Path *subpath, ++ RelOptInfo *grouped_rel, ++ List **pgset); ++extern PGDLLIMPORT estimate_num_groups_hook_type estimate_num_groups_hook; + + /* Functions in selfuncs.c */ + +@@ -195,6 +201,9 @@ extern void mergejoinscansel(PlannerInfo *root, Node *clause, + + extern double estimate_num_groups(PlannerInfo *root, List *groupExprs, + double input_rows, List **pgset); ++extern double estimate_num_groups_ext(PlannerInfo *root, List *groupExprs, ++ Path *subpath, RelOptInfo *grouped_rel, ++ List **pgset); + + extern void estimate_hash_bucket_stats(PlannerInfo *root, + Node *hashkey, double nbuckets, diff --git a/auto_tuning.c b/auto_tuning.c index 8b7b32b6..a98578cf 100644 --- a/auto_tuning.c +++ b/auto_tuning.c @@ -15,6 +15,8 @@ * */ +#include "postgres.h" + #include "aqo.h" /* @@ -142,7 +144,7 @@ is_in_infinite_loop_cq(double *elems, int nelems) * this query to false. */ void -automatical_query_tuning(int query_hash, QueryStat * stat) +automatical_query_tuning(uint64 query_hash, QueryStat * stat) { double unstability = auto_tuning_exploration; double t_aqo, diff --git a/cardinality_estimation.c b/cardinality_estimation.c index 3b4dda09..c3e5d7a4 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -15,14 +15,50 @@ * */ -#include "aqo.h" +#include "postgres.h" + #include "optimizer/optimizer.h" +#include "aqo.h" +#include "hash.h" + +#ifdef AQO_DEBUG_PRINT +static void +predict_debug_output(List *clauses, List *selectivities, + List *relids, int fss_hash, double result) +{ + StringInfoData debug_str; + ListCell *lc; + + initStringInfo(&debug_str); + appendStringInfo(&debug_str, "fss: %d, clausesNum: %d, ", + fss_hash, list_length(clauses)); + + appendStringInfoString(&debug_str, ", selectivities: { "); + foreach(lc, selectivities) + { + Selectivity *s = (Selectivity *) lfirst(lc); + appendStringInfo(&debug_str, "%lf ", *s); + } + + appendStringInfoString(&debug_str, "}, relids: { "); + foreach(lc, relids) + { + int relid = lfirst_int(lc); + appendStringInfo(&debug_str, "%d ", relid); + } + + appendStringInfo(&debug_str, "}, result: %lf", result); + elog(DEBUG1, "Prediction: %s", debug_str.data); + pfree(debug_str.data); +} +#endif + /* * General method for prediction the cardinality of given relation. */ double -predict_for_relation(List *restrict_clauses, List *selectivities, +predict_for_relation(List *clauses, List *selectivities, List *relids, int *fss_hash) { int nfeatures; @@ -33,15 +69,22 @@ predict_for_relation(List *restrict_clauses, List *selectivities, int rows; int i; - *fss_hash = get_fss_for_object(restrict_clauses, selectivities, relids, - &nfeatures, &features); + if (relids == NIL) + /* + * Don't make prediction for query plans without any underlying plane + * tables. Use return value -4 for debug purposes. + */ + return -4.; + + *fss_hash = get_fss_for_object(relids, clauses, + selectivities, &nfeatures, &features); if (nfeatures > 0) for (i = 0; i < aqo_K; ++i) matrix[i] = palloc0(sizeof(**matrix) * nfeatures); if (load_fss(query_context.fspace_hash, *fss_hash, nfeatures, matrix, - targets, &rows)) + targets, &rows, NULL)) result = OkNNr_predict(rows, nfeatures, matrix, targets, features); else { @@ -53,7 +96,9 @@ predict_for_relation(List *restrict_clauses, List *selectivities, */ result = -1; } - +#ifdef AQO_DEBUG_PRINT + predict_debug_output(clauses, selectivities, relids, *fss_hash, result); +#endif pfree(features); if (nfeatures > 0) { diff --git a/cardinality_hooks.c b/cardinality_hooks.c index b6c2c985..7d962c04 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -25,35 +25,24 @@ * */ +#include "postgres.h" + #include "aqo.h" +#include "cardinality_hooks.h" +#include "hash.h" +#include "path_utils.h" + +estimate_num_groups_hook_type prev_estimate_num_groups_hook = NULL; double predicted_ppi_rows; double fss_ppi_hash; -static void call_default_set_baserel_rows_estimate(PlannerInfo *root, - RelOptInfo *rel); -static double call_default_get_parameterized_baserel_size(PlannerInfo *root, - RelOptInfo *rel, - List *param_clauses); -static void call_default_set_joinrel_size_estimates(PlannerInfo *root, - RelOptInfo *rel, - RelOptInfo *outer_rel, - RelOptInfo *inner_rel, - SpecialJoinInfo *sjinfo, - List *restrictlist); -static double call_default_get_parameterized_joinrel_size(PlannerInfo *root, - RelOptInfo *rel, - Path *outer_path, - Path *inner_path, - SpecialJoinInfo *sjinfo, - List *restrict_clauses); - /* * Calls standard set_baserel_rows_estimate or its previous hook. */ -void -call_default_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) +static void +default_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) { if (prev_set_baserel_rows_estimate_hook) prev_set_baserel_rows_estimate_hook(root, rel); @@ -64,8 +53,8 @@ call_default_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) /* * Calls standard get_parameterized_baserel_size or its previous hook. */ -double -call_default_get_parameterized_baserel_size(PlannerInfo *root, +static double +default_get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, List *param_clauses) { @@ -78,8 +67,8 @@ call_default_get_parameterized_baserel_size(PlannerInfo *root, /* * Calls standard get_parameterized_joinrel_size or its previous hook. */ -double -call_default_get_parameterized_joinrel_size(PlannerInfo *root, +static double +default_get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, Path *outer_path, Path *inner_path, @@ -103,8 +92,8 @@ call_default_get_parameterized_joinrel_size(PlannerInfo *root, /* * Calls standard set_joinrel_size_estimates or its previous hook. */ -void -call_default_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, +static void +default_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, RelOptInfo *outer_rel, RelOptInfo *inner_rel, SpecialJoinInfo *sjinfo, @@ -124,6 +113,22 @@ call_default_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, restrictlist); } +static double +default_estimate_num_groups(PlannerInfo *root, List *groupExprs, + Path *subpath, RelOptInfo *grouped_rel, + List **pgset) +{ + double input_rows = subpath->rows; + + if (prev_estimate_num_groups_hook != NULL) + return (*prev_estimate_num_groups_hook)(root, groupExprs, + subpath, + grouped_rel, + pgset); + else + return estimate_num_groups(root, groupExprs, input_rows, pgset); +} + /* * Our hook for setting baserel rows estimate. * Extracts clauses, their selectivities and list of relation relids and @@ -134,49 +139,60 @@ aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) { double predicted; Oid relid; - List *relids; + List *relids = NIL; List *selectivities = NULL; - List *restrict_clauses; + List *clauses; int fss = 0; - if (query_context.use_aqo) + if (IsQueryDisabled()) + /* Fast path. */ + goto default_estimator; + + if (query_context.use_aqo || query_context.learn_aqo) selectivities = get_selectivities(root, rel->baserestrictinfo, 0, JOIN_INNER, NULL); - else + + if (!query_context.use_aqo) { - rel->predicted_cardinality = -2.; - call_default_set_baserel_rows_estimate(root, rel); - return; + if (query_context.learn_aqo) + list_free_deep(selectivities); + + goto default_estimator; } relid = planner_rt_fetch(rel->relid, root)->relid; - relids = list_make1_int(relid); + if (OidIsValid(relid)) + /* Predict for a plane table only. */ + relids = list_make1_int(relid); - restrict_clauses = list_copy(rel->baserestrictinfo); - predicted = predict_for_relation(restrict_clauses, selectivities, + clauses = aqo_get_clauses(root, rel->baserestrictinfo); + predicted = predict_for_relation(clauses, selectivities, relids, &fss); rel->fss_hash = fss; + list_free_deep(selectivities); + list_free(clauses); + list_free(relids); + if (predicted >= 0) { rel->rows = predicted; rel->predicted_cardinality = predicted; - } - else - { - call_default_set_baserel_rows_estimate(root, rel); - rel->predicted_cardinality = -1.; + return; } - list_free_deep(selectivities); - list_free(restrict_clauses); - list_free(relids); +default_estimator: + rel->predicted_cardinality = -1.; + default_set_baserel_rows_estimate(root, rel); } void ppi_hook(ParamPathInfo *ppi) { + if (IsQueryDisabled()) + return; + ppi->predicted_ppi_rows = predicted_ppi_rows; ppi->fss_ppi_hash = fss_ppi_hash; } @@ -193,7 +209,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, { double predicted; Oid relid = InvalidOid; - List *relids = NULL; + List *relids = NIL; List *allclauses = NULL; List *selectivities = NULL; ListCell *l; @@ -204,12 +220,16 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, int current_hash; int fss = 0; - if (query_context.use_aqo) + if (IsQueryDisabled()) + /* Fast path */ + goto default_estimator; + + if (query_context.use_aqo || query_context.learn_aqo) { MemoryContext old_ctx_m; - allclauses = list_concat(list_copy(param_clauses), - list_copy(rel->baserestrictinfo)); + allclauses = list_concat(aqo_get_clauses(root, param_clauses), + aqo_get_clauses(root, rel->baserestrictinfo)); selectivities = get_selectivities(root, allclauses, rel->relid, JOIN_INNER, NULL); relid = planner_rt_fetch(rel->relid, root)->relid; @@ -230,14 +250,21 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, pfree(args_hash); pfree(eclass_hash); } - else + + if (!query_context.use_aqo) { - predicted_ppi_rows = -3.; - return call_default_get_parameterized_baserel_size(root, rel, - param_clauses); + if (query_context.learn_aqo) + { + list_free_deep(selectivities); + list_free(allclauses); + } + + goto default_estimator; } - relids = list_make1_int(relid); + if (OidIsValid(relid)) + /* Predict for a plane table only. */ + relids = list_make1_int(relid); predicted = predict_for_relation(allclauses, selectivities, relids, &fss); @@ -246,9 +273,9 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, if (predicted >= 0) return predicted; - else - return call_default_get_parameterized_baserel_size(root, rel, - param_clauses); + +default_estimator: + return default_get_parameterized_baserel_size(root, rel, param_clauses); } /* @@ -272,20 +299,22 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, List *inner_selectivities; List *outer_selectivities; List *current_selectivities = NULL; - int fss = 0; + int fss = 0; + + if (IsQueryDisabled()) + /* Fast path */ + goto default_estimator; - if (query_context.use_aqo) + if (query_context.use_aqo || query_context.learn_aqo) current_selectivities = get_selectivities(root, restrictlist, 0, sjinfo->jointype, sjinfo); - else + + if (!query_context.use_aqo) { - rel->predicted_cardinality = -2.; - call_default_set_joinrel_size_estimates(root, rel, - outer_rel, - inner_rel, - sjinfo, - restrictlist); - return; + if (query_context.learn_aqo) + list_free_deep(current_selectivities); + + goto default_estimator; } relids = get_list_of_relids(root, rel->relids); @@ -293,7 +322,7 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, &outer_selectivities); inner_clauses = get_path_clauses(inner_rel->cheapest_total_path, root, &inner_selectivities); - allclauses = list_concat(list_copy(restrictlist), + allclauses = list_concat(aqo_get_clauses(root, restrictlist), list_concat(outer_clauses, inner_clauses)); selectivities = list_concat(current_selectivities, list_concat(outer_selectivities, @@ -306,16 +335,14 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, { rel->predicted_cardinality = predicted; rel->rows = predicted; + return; } - else - { - rel->predicted_cardinality = -1; - call_default_set_joinrel_size_estimates(root, rel, - outer_rel, - inner_rel, - sjinfo, - restrictlist); - } + +default_estimator: + rel->predicted_cardinality = -1; + default_set_joinrel_size_estimates(root, rel, + outer_rel, inner_rel, + sjinfo, restrictlist); } /* @@ -329,7 +356,7 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, Path *outer_path, Path *inner_path, SpecialJoinInfo *sjinfo, - List *restrict_clauses) + List *clauses) { double predicted; List *relids; @@ -342,23 +369,26 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, List *current_selectivities = NULL; int fss = 0; - if (query_context.use_aqo) - current_selectivities = get_selectivities(root, restrict_clauses, 0, + if (IsQueryDisabled()) + /* Fast path */ + goto default_estimator; + + if (query_context.use_aqo || query_context.learn_aqo) + current_selectivities = get_selectivities(root, clauses, 0, sjinfo->jointype, sjinfo); - else + + if (!query_context.use_aqo) { - predicted_ppi_rows = -3.; - return call_default_get_parameterized_joinrel_size(root, rel, - outer_path, - inner_path, - sjinfo, - restrict_clauses); + if (query_context.learn_aqo) + list_free_deep(current_selectivities); + + goto default_estimator; } relids = get_list_of_relids(root, rel->relids); outer_clauses = get_path_clauses(outer_path, root, &outer_selectivities); inner_clauses = get_path_clauses(inner_path, root, &inner_selectivities); - allclauses = list_concat(list_copy(restrict_clauses), + allclauses = list_concat(aqo_get_clauses(root, clauses), list_concat(outer_clauses, inner_clauses)); selectivities = list_concat(current_selectivities, list_concat(outer_selectivities, @@ -371,10 +401,83 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, if (predicted >= 0) return predicted; + +default_estimator: + return default_get_parameterized_joinrel_size(root, rel, + outer_path, inner_path, + sjinfo, clauses); +} + +static double +predict_num_groups(PlannerInfo *root, Path *subpath, List *group_exprs, + int *fss) +{ + int child_fss = 0; + double prediction; + int rows; + double target; + + if (subpath->parent->predicted_cardinality > 0.) + /* A fast path. Here we can use a fss hash of a leaf. */ + child_fss = subpath->parent->fss_hash; + else + { + List *relids; + List *clauses; + List *selectivities = NIL; + + relids = get_list_of_relids(root, subpath->parent->relids); + clauses = get_path_clauses(subpath, root, &selectivities); + (void) predict_for_relation(clauses, selectivities, relids, &child_fss); + } + + *fss = get_grouped_exprs_hash(child_fss, group_exprs); + + if (!load_fss(query_context.fspace_hash, *fss, 0, NULL, &target, &rows, NULL)) + return -1; + + Assert(rows == 1); + prediction = exp(target); + return (prediction <= 0) ? -1 : prediction; +} + +double +aqo_estimate_num_groups_hook(PlannerInfo *root, List *groupExprs, + Path *subpath, RelOptInfo *grouped_rel, + List **pgset) +{ + int fss; + double predicted; + + if (!query_context.use_aqo) + goto default_estimator; + + if (pgset || groupExprs == NIL) + /* XXX: Don't support some GROUPING options */ + goto default_estimator; + + if (prev_estimate_num_groups_hook != NULL) + elog(WARNING, "AQO replaced another estimator of a groups number"); + + if (groupExprs == NIL) + return 1.0; + + predicted = predict_num_groups(root, subpath, groupExprs, &fss); + if (predicted > 0.) + { + grouped_rel->predicted_cardinality = predicted; + grouped_rel->rows = predicted; + grouped_rel->fss_hash = fss; + return predicted; + } else - return call_default_get_parameterized_joinrel_size(root, rel, - outer_path, - inner_path, - sjinfo, - restrict_clauses); + /* + * Some nodes AQO doesn't know yet, some nodes are ignored by AQO + * permanently - as an example, SubqueryScan. + */ + grouped_rel->predicted_cardinality = -1; + +default_estimator: + return default_estimate_num_groups(root, groupExprs, subpath, grouped_rel, + pgset); } diff --git a/cardinality_hooks.h b/cardinality_hooks.h new file mode 100644 index 00000000..0e8c65c0 --- /dev/null +++ b/cardinality_hooks.h @@ -0,0 +1,31 @@ +#ifndef CARDINALITY_HOOKS_H +#define CARDINALITY_HOOKS_H + +#include "optimizer/planner.h" +#include "utils/selfuncs.h" + +extern estimate_num_groups_hook_type prev_estimate_num_groups_hook; + + +/* Cardinality estimation hooks */ +extern void aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel); +extern double aqo_get_parameterized_baserel_size(PlannerInfo *root, + RelOptInfo *rel, + List *param_clauses); +extern void aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel, + SpecialJoinInfo *sjinfo, + List *restrictlist); +extern double aqo_get_parameterized_joinrel_size(PlannerInfo *root, + RelOptInfo *rel, + Path *outer_path, + Path *inner_path, + SpecialJoinInfo *sjinfo, + List *restrict_clauses); +extern double aqo_estimate_num_groups_hook(PlannerInfo *root, List *groupExprs, + Path *subpath, + RelOptInfo *grouped_rel, + List **pgset); + +#endif /* CARDINALITY_HOOKS_H */ diff --git a/expected/aqo_CVE-2020-14350.out b/expected/aqo_CVE-2020-14350.out index 5dafac09..de90beaa 100644 --- a/expected/aqo_CVE-2020-14350.out +++ b/expected/aqo_CVE-2020-14350.out @@ -9,6 +9,7 @@ CREATE ROLE regress_hacker LOGIN; -- Test 1 RESET ROLE; ALTER ROLE regress_hacker NOSUPERUSER; +GRANT CREATE ON SCHEMA public TO regress_hacker; SET ROLE regress_hacker; SHOW is_superuser; is_superuser @@ -47,12 +48,12 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_status(hash int) +CREATE FUNCTION aqo_status(hash bigint) RETURNS TABLE ( "learn" BOOL, "use aqo" BOOL, "auto tune" BOOL, - "fspace hash" INT, + "fspace hash" bigINT, "t_naqo" TEXT, "err_naqo" TEXT, "iters" BIGINT, @@ -69,12 +70,12 @@ RESET ROLE; CREATE EXTENSION aqo; ERROR: function "aqo_status" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_status(hash int) +CREATE OR REPLACE FUNCTION aqo_status(hash bigint) RETURNS TABLE ( "learn" BOOL, "use aqo" BOOL, "auto tune" BOOL, - "fspace hash" INT, + "fspace hash" bigINT, "t_naqo" TEXT, "err_naqo" TEXT, "iters" BIGINT, @@ -101,7 +102,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_status(int); +DROP FUNCTION aqo_status(bigint); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 3 @@ -114,7 +115,7 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_enable_query(hash int) +CREATE FUNCTION aqo_enable_query(hash bigint) RETURNS VOID AS $$ BEGIN @@ -125,7 +126,7 @@ RESET ROLE; CREATE EXTENSION aqo; ERROR: function "aqo_enable_query" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_enable_query(hash int) +CREATE OR REPLACE FUNCTION aqo_enable_query(hash bigint) RETURNS VOID AS $$ BEGIN @@ -147,7 +148,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_enable_query(int); +DROP FUNCTION aqo_enable_query(bigint); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 4 @@ -160,7 +161,7 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_disable_query(hash int) +CREATE FUNCTION aqo_disable_query(hash bigint) RETURNS VOID AS $$ BEGIN @@ -171,7 +172,7 @@ RESET ROLE; CREATE EXTENSION aqo; ERROR: function "aqo_disable_query" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_disable_query(hash int) +CREATE OR REPLACE FUNCTION aqo_disable_query(hash bigint) RETURNS VOID AS $$ BEGIN @@ -193,7 +194,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_disable_query(int); +DROP FUNCTION aqo_disable_query(bigint); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 5 @@ -206,7 +207,7 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_clear_hist(hash int) +CREATE FUNCTION aqo_clear_hist(hash bigint) RETURNS VOID AS $$ BEGIN @@ -217,7 +218,7 @@ RESET ROLE; CREATE EXTENSION aqo; ERROR: function "aqo_clear_hist" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_clear_hist(hash int) +CREATE OR REPLACE FUNCTION aqo_clear_hist(hash bigint) RETURNS VOID AS $$ BEGIN @@ -239,7 +240,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_clear_hist(int); +DROP FUNCTION aqo_clear_hist(bigint); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 6 @@ -252,7 +253,7 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_drop(hash int) +CREATE FUNCTION aqo_drop(hash bigint) RETURNS VOID AS $$ BEGIN @@ -263,7 +264,7 @@ RESET ROLE; CREATE EXTENSION aqo; ERROR: function "aqo_drop" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_drop(hash int) +CREATE OR REPLACE FUNCTION aqo_drop(hash bigint) RETURNS VOID AS $$ BEGIN @@ -285,7 +286,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_drop(int); +DROP FUNCTION aqo_drop(bigint); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 7 @@ -369,4 +370,5 @@ DROP FUNCTION aqo_migrate_to_1_1_get_pk(text); DROP EXTENSION IF EXISTS aqo; -- Cleanup RESET ROLE; +DROP OWNED BY regress_hacker CASCADE; DROP ROLE regress_hacker; diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out index 66bc4970..7956f649 100644 --- a/expected/aqo_fdw.out +++ b/expected/aqo_fdw.out @@ -85,7 +85,7 @@ SELECT x FROM frgn WHERE x < -10; -- AQO ignores constants -- Trivial JOIN push-down. SELECT str FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) - SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; +SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; ') AS str WHERE str NOT LIKE '%Sort Method%'; str ------------------------------------------------------------ diff --git a/expected/clean_aqo_data.out b/expected/clean_aqo_data.out new file mode 100644 index 00000000..bc143be7 --- /dev/null +++ b/expected/clean_aqo_data.out @@ -0,0 +1,321 @@ +CREATE EXTENSION aqo; +SET aqo.mode = 'learn'; +DROP TABLE IF EXISTS a; +NOTICE: table "a" does not exist, skipping +DROP TABLE IF EXISTS b; +NOTICE: table "b" does not exist, skipping +CREATE TABLE a(); +SELECT * FROM a; +-- +(0 rows) + +SELECT 'a'::regclass::oid AS a_oid \gset +SELECT clean_aqo_data(); +NOTICE: Cleaning aqo_data records + clean_aqo_data +---------------- + +(1 row) + +/* + * lines with a_oid in aqo_data, + * lines with fspace_hash corresponding to a_oid in aqo_queries, + * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_texts, + * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat + * should remain + */ +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); + count +------- + 1 +(1 row) + +SELECT count(*) FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); + count +------- + 1 +(1 row) + +SELECT count(*) FROM aqo_query_texts WHERE + aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + count +------- + 1 +(1 row) + +SELECT count(*) FROM aqo_query_stat WHERE + aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + count +------- + 1 +(1 row) + +DROP TABLE a; +SELECT clean_aqo_data(); +NOTICE: Cleaning aqo_data records + clean_aqo_data +---------------- + +(1 row) + +/* + * lines with a_oid in aqo_data, + * lines with a_oid's fspace_hash EQUAL TO query_hash in aqo_queries, + * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_texts, + * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat, + * should be deleted +*/ +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = aqo_queries.query_hash; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_query_texts WHERE + aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = aqo_queries.query_hash); + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_query_stat WHERE + aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = aqo_queries.query_hash); + count +------- + 0 +(1 row) + +CREATE TABLE a(); +SELECT * FROM a; +-- +(0 rows) + +SELECT 'a'::regclass::oid AS a_oid \gset +-- add manually line with different fspace_hash and query_hash to aqo_queries +INSERT INTO aqo_queries VALUES (:a_oid + 1, 't', 't', :a_oid, 'f'); +DROP TABLE a; +SELECT clean_aqo_data(); +NOTICE: Cleaning aqo_data records + clean_aqo_data +---------------- + +(1 row) + +-- this line should remain +SELECT count(*) FROM aqo_queries WHERE (fspace_hash = :a_oid AND query_hash = :a_oid + 1); + count +------- + 1 +(1 row) + +CREATE TABLE a(); +CREATE TABLE b(); +SELECT * FROM a; +-- +(0 rows) + +SELECT * FROM b; +-- +(0 rows) + +SELECT * FROM b CROSS JOIN a; +-- +(0 rows) + +SELECT 'a'::regclass::oid AS a_oid \gset +SELECT 'b'::regclass::oid AS b_oid \gset +-- new lines added to aqo_data +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); + count +------- + 3 +(1 row) + +SELECT count(*) FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); + count +------- + 2 +(1 row) + +SELECT count(*) FROM aqo_query_texts WHERE + aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + count +------- + 2 +(1 row) + +SELECT count(*) FROM aqo_query_stat WHERE + aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + count +------- + 2 +(1 row) + +SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); + count +------- + 3 +(1 row) + +SELECT count(*) FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)); + count +------- + 2 +(1 row) + +SELECT count(*) FROM aqo_query_texts WHERE + aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); + count +------- + 2 +(1 row) + +SELECT count(*) FROM aqo_query_stat WHERE + aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); + count +------- + 2 +(1 row) + +DROP TABLE a; +SELECT clean_aqo_data(); +NOTICE: Cleaning aqo_data records + clean_aqo_data +---------------- + +(1 row) + +/* + * lines corresponding to a_oid and both a_oid's fspace_hash deleted in aqo_data, + * lines with fspace_hash corresponding to a_oid deleted in aqo_queries, + * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_texts, + * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_stat, + */ +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = aqo_queries.query_hash; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_query_texts WHERE + aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = aqo_queries.query_hash); + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_query_stat WHERE + aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = aqo_queries.query_hash); + count +------- + 0 +(1 row) + +-- lines corresponding to b_oid in all theese tables should remain +SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); + count +------- + 1 +(1 row) + +SELECT count(*) FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = aqo_queries.query_hash; + count +------- + 1 +(1 row) + +SELECT count(*) FROM aqo_query_texts WHERE + aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = aqo_queries.query_hash); + count +------- + 1 +(1 row) + +SELECT count(*) FROM aqo_query_stat WHERE + aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = aqo_queries.query_hash); + count +------- + 1 +(1 row) + +DROP TABLE b; +SELECT clean_aqo_data(); +NOTICE: Cleaning aqo_data records + clean_aqo_data +---------------- + +(1 row) + +-- lines corresponding to b_oid in theese tables deleted +SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = aqo_queries.query_hash; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_query_texts WHERE + aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = aqo_queries.query_hash); + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_query_stat WHERE + aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = aqo_queries.query_hash); + count +------- + 0 +(1 row) + +DROP EXTENSION aqo; diff --git a/expected/forced_stat_collection.out b/expected/forced_stat_collection.out index 294c7fdb..fa40fcf6 100644 --- a/expected/forced_stat_collection.out +++ b/expected/forced_stat_collection.out @@ -32,19 +32,19 @@ SELECT count(*) FROM person WHERE age<18 AND passport IS NOT NULL; (1 row) SELECT * FROM aqo_data; - fspace_hash | fsspace_hash | nfeatures | features | targets --------------+--------------+-----------+----------+--------- + fspace_hash | fsspace_hash | nfeatures | features | targets | oids +-------------+--------------+-----------+----------+---------+------ (0 rows) SELECT learn_aqo,use_aqo,auto_tuning,cardinality_error_without_aqo ce,executions_without_aqo nex FROM aqo_queries JOIN aqo_query_stat USING (query_hash); learn_aqo | use_aqo | auto_tuning | ce | nex -----------+---------+-------------+----------------------+----- - f | f | f | {2.9661225937240054} | 1 + f | f | f | {0.8637762840285226} | 1 f | f | f | {2.9634630129852053} | 1 (2 rows) -SELECT query_text FROM aqo_query_texts ORDER BY md5(query_text); +SELECT query_text FROM aqo_query_texts ORDER BY (md5(query_text)); query_text -------------------------------------------------------------------- SELECT count(*) FROM person WHERE age<18; diff --git a/expected/plancache.out b/expected/plancache.out new file mode 100644 index 00000000..64eecf99 --- /dev/null +++ b/expected/plancache.out @@ -0,0 +1,46 @@ +-- Tests on interaction of AQO with cached plans. +CREATE EXTENSION aqo; +SET aqo.mode = 'intelligent'; +SET aqo.show_details = 'on'; +SET aqo.show_hash = 'off'; +CREATE TABLE test AS SELECT x FROM generate_series(1,10) AS x; +ANALYZE test; +-- Function which implements a test where AQO is used for both situations where +-- a query is planned or got from a plan cache. +-- Use a function to hide a system dependent hash value. +CREATE FUNCTION f1() RETURNS TABLE ( + nnex bigint, + nex bigint, + pt double precision[] +) AS $$ +DECLARE + i integer; + qhash bigint; +BEGIN + PREPARE fooplan (int) AS SELECT count(*) FROM test WHERE x = $1; + + FOR i IN 1..10 LOOP + execute 'EXECUTE fooplan(1)'; + END LOOP; + + SELECT query_hash FROM aqo_query_texts + WHERE query_text LIKE '%count(*) FROM test WHERE x%' INTO qhash; + + RETURN QUERY SELECT executions_without_aqo nnex, + executions_with_aqo nex, + planning_time_with_aqo pt + FROM aqo_query_stat WHERE query_hash = qhash; +END $$ LANGUAGE 'plpgsql'; +-- The function shows 6 executions without an AQO support (nnex) and +-- 4 executions with usage of an AQO knowledge base (nex). Planning time in the +-- case of AQO support (pt) is equal to '-1', because the query plan is exracted +-- from the plan cache. +SELECT * FROM f1(); + nnex | nex | pt +------+-----+--------------- + 6 | 4 | {-1,-1,-1,-1} +(1 row) + +DROP FUNCTION f1; +DROP TABLE test CASCADE; +DROP EXTENSION aqo; diff --git a/expected/top_queries.out b/expected/top_queries.out new file mode 100644 index 00000000..ebf6d21b --- /dev/null +++ b/expected/top_queries.out @@ -0,0 +1,51 @@ +CREATE EXTENSION aqo; +SET aqo.mode = 'disabled'; +SET aqo.force_collect_stat = 'on'; +-- +-- num of generate_series(1,1000000) query should be the first +-- +SELECT count(*) FROM generate_series(1,1000000); + count +--------- + 1000000 +(1 row) + +SELECT num FROM top_time_queries(10) AS tt WHERE + tt.fspace_hash = (SELECT fspace_hash FROM aqo_queries WHERE + aqo_queries.query_hash = (SELECT aqo_query_texts.query_hash FROM aqo_query_texts + WHERE query_text = 'SELECT count(*) FROM generate_series(1,1000000);')); +NOTICE: Top 10 execution time queries + num +----- + 1 +(1 row) + +-- +-- num of query uses table t2 should be bigger than num of query uses table t1 and be the first +-- +CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y + FROM generate_series(1,1000) AS gs; +CREATE TABLE t2 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y + FROM generate_series(1,100000) AS gs; +SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; + count +------- + 31 +(1 row) + +SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; + count +------- + 31 +(1 row) + +SELECT num FROM top_error_queries(10) AS te WHERE + te.fspace_hash = (SELECT fspace_hash FROM aqo_queries WHERE + aqo_queries.query_hash = (SELECT aqo_query_texts.query_hash FROM aqo_query_texts + WHERE query_text = 'SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1;')); +NOTICE: Top 10 cardinality error queries + num +----- + 1 +(1 row) + diff --git a/expected/unsupported.out b/expected/unsupported.out index 95a011bc..30de424d 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -3,41 +3,540 @@ SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; DROP TABLE IF EXISTS t; CREATE TABLE t AS SELECT (gs.* / 50) AS x FROM generate_series(1,1000) AS gs; -SELECT str FROM expln(' +ANALYZE t; +CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y + FROM generate_series(1,1000) AS gs; +ANALYZE t, t1; +-- +-- Do not support HAVING clause for now. +-- +SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; + count +------- + 17 +(1 row) + +EXPLAIN (COSTS OFF) + SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; + QUERY PLAN +------------------------------- + Aggregate + AQO not used + -> HashAggregate + AQO: rows=17 + Group Key: t.x + -> Seq Scan on t + AQO: rows=801 + Filter: (x > 3) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(11 rows) + +-- +-- Doesn't estimates GROUP BY clause +-- +SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; + count +------- + 10 +(1 row) + +EXPLAIN (COSTS OFF) + SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; + QUERY PLAN +------------------------------- + Aggregate + AQO not used + -> HashAggregate + AQO: rows=10 + Group Key: t1.x, t1.y + -> Seq Scan on t1 + AQO: rows=1000 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(10 rows) + +SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,x*y)) AS q1; + count +------- + 10 +(1 row) + +EXPLAIN (COSTS OFF) + SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,x*y)) AS q1; + QUERY PLAN +---------------------------------------- + Aggregate + AQO not used + -> HashAggregate + AQO: rows=10 + Group Key: t1.x, (t1.x * t1.y) + -> Seq Scan on t1 + AQO: rows=1000 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(10 rows) + +SELECT count(*) FROM ( + SELECT count(*) AS x FROM ( + SELECT count(*) FROM t1 GROUP BY (x,y) + ) AS q1 +) AS q2 +WHERE q2.x > 1; + count +------- + 1 +(1 row) + +SELECT count(*) FROM ( + SELECT count(*) AS x FROM ( + SELECT count(*) FROM t1 GROUP BY (x,y) + ) AS q1 +) AS q2 +WHERE q2.x > 1; + count +------- + 1 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM ( + SELECT count(*) AS x FROM ( + SELECT count(*) FROM t1 GROUP BY (x,y) + ) AS q1 +) AS q2 +WHERE q2.x > 1; + QUERY PLAN +------------------------------------- + Aggregate + AQO not used + -> Aggregate + AQO not used + Filter: (count(*) > 1) + -> HashAggregate + AQO: rows=10 + Group Key: t1.x, t1.y + -> Seq Scan on t1 + AQO: rows=1000 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(13 rows) + +-- +-- Doesn't support GROUPING SETS clause +-- +SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; + count +------- + 31 +(1 row) + +EXPLAIN (COSTS OFF) + SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; + QUERY PLAN +------------------------------ + Aggregate + AQO not used + -> MixedAggregate + AQO not used + Hash Key: t1.x, t1.y + Hash Key: t1.x + Hash Key: t1.y + Group Key: () + -> Seq Scan on t1 + AQO: rows=1000 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(13 rows) + +-- +-- The subplans issue +-- +SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t WHERE x = 1); + count +------- + 50 +(1 row) + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) - SELECT * FROM t GROUP BY (x) HAVING x > 3; -') AS str WHERE str NOT LIKE '%Memory Usage%'; - str ------------------------------------------------ - HashAggregate (actual rows=17 loops=1) + SELECT count(*) FROM t WHERE x = ( + SELECT avg(x) FROM t WHERE x = 1 + ); + QUERY PLAN +---------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + AQO not used + InitPlan 1 (returns $0) + -> Aggregate (actual rows=1 loops=1) + AQO not used + -> Seq Scan on t t_1 (actual rows=50 loops=1) + AQO: rows=50, error=0% + Filter: (x = 1) + Rows Removed by Filter: 950 + -> Seq Scan on t (actual rows=50 loops=1) + AQO: rows=50, error=0% + Filter: ((x)::numeric = $0) + Rows Removed by Filter: 950 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(16 rows) + +SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + count +------- + 1000 +(1 row) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE x = ( + SELECT avg(x) FROM t t0 WHERE t0.x = t.x + ); + QUERY PLAN +------------------------------------------------------------------ + Aggregate (actual rows=1 loops=1) + AQO not used + -> Seq Scan on t (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Filter: ((x)::numeric = (SubPlan 1)) + SubPlan 1 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=50 loops=1000) + AQO: rows=50, error=0% + Filter: (x = t.x) + Rows Removed by Filter: 950 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(15 rows) + +-- Two identical subplans in a clause list +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE + x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR + x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); + QUERY PLAN +------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) AQO not used - Group Key: x - -> Seq Scan on t (actual rows=801 loops=1) + -> Seq Scan on t (actual rows=0 loops=1) AQO not used - Filter: (x > 3) - Rows Removed by Filter: 199 + Filter: (((x)::numeric = (SubPlan 1)) OR (SubPlan 2)) + Rows Removed by Filter: 1000 + SubPlan 1 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=0 loops=1000) + AQO not used + Filter: (x = (t.x + 21)) + Rows Removed by Filter: 1000 + SubPlan 2 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0_1 (actual rows=0 loops=1000) + AQO not used + Filter: (x = (t.x + 21)) + Rows Removed by Filter: 1000 Using aqo: true AQO mode: LEARN JOINS: 0 -(10 rows) +(23 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE + x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR + x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); + QUERY PLAN +------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + AQO not used + -> Seq Scan on t (actual rows=0 loops=1) + AQO: rows=1, error=100% + Filter: (((x)::numeric = (SubPlan 1)) OR (SubPlan 2)) + Rows Removed by Filter: 1000 + SubPlan 1 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=0 loops=1000) + AQO: rows=1, error=100% + Filter: (x = (t.x + 21)) + Rows Removed by Filter: 1000 + SubPlan 2 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0_1 (actual rows=0 loops=1000) + AQO: rows=1, error=100% + Filter: (x = (t.x + 21)) + Rows Removed by Filter: 1000 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(23 rows) + +-- It's OK to use the knowledge for a query with different constants. +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE + x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 22) OR + x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 23); + QUERY PLAN +------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + AQO not used + -> Seq Scan on t (actual rows=0 loops=1) + AQO: rows=1, error=100% + Filter: (((x)::numeric = (SubPlan 1)) OR (SubPlan 2)) + Rows Removed by Filter: 1000 + SubPlan 1 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=0 loops=1000) + AQO: rows=1, error=100% + Filter: (x = (t.x + 22)) + Rows Removed by Filter: 1000 + SubPlan 2 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0_1 (actual rows=0 loops=1000) + AQO: rows=1, error=100% + Filter: (x = (t.x + 23)) + Rows Removed by Filter: 1000 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(23 rows) + +-- Different SubPlans in the quals of leafs of JOIN. +SELECT count(*) FROM + (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + JOIN + (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2 + ON q1.x = q2.x+1; + count +------- + 42550 +(1 row) --- Do not support having clauses for now. SELECT str FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) - SELECT * FROM t GROUP BY (x) HAVING x > 3; +SELECT count(*) FROM + (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + JOIN + (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2 + ON q1.x = q2.x+1; ') AS str WHERE str NOT LIKE '%Memory Usage%'; - str ------------------------------------------------ - HashAggregate (actual rows=17 loops=1) + str +------------------------------------------------------------------------------ + Aggregate (actual rows=1 loops=1) + AQO not used + -> Hash Join (actual rows=42550 loops=1) + AQO: rows=42550, error=0% + Hash Cond: ((t_1.x + 1) = t.x) + -> Seq Scan on t t_1 (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Filter: (((x % 3))::numeric < (SubPlan 2)) + SubPlan 2 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0_1 (actual rows=950 loops=1000) + AQO: rows=950, error=-0% + Filter: (x <> t_1.x) + Rows Removed by Filter: 50 + -> Hash (actual rows=851 loops=1) + AQO not used + -> Seq Scan on t (actual rows=851 loops=1) + AQO: rows=851, error=0% + Filter: (((x % 3))::numeric < (SubPlan 1)) + Rows Removed by Filter: 149 + SubPlan 1 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=50 loops=1000) + AQO: rows=50, error=0% + Filter: (x = t.x) + Rows Removed by Filter: 950 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(31 rows) + +-- Two identical subplans in a clause +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE (SELECT avg(x) FROM t t0 WHERE t0.x = t.x) = + (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + QUERY PLAN +-------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) AQO not used - Group Key: x - -> Seq Scan on t (actual rows=801 loops=1) - AQO: rows=801, error=0% - Filter: (x > 3) - Rows Removed by Filter: 199 + -> Seq Scan on t (actual rows=1000 loops=1) + AQO not used + Filter: ((SubPlan 1) = (SubPlan 2)) + SubPlan 1 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=50 loops=1000) + AQO not used + Filter: (x = t.x) + Rows Removed by Filter: 950 + SubPlan 2 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0_1 (actual rows=50 loops=1000) + AQO not used + Filter: (x = t.x) + Rows Removed by Filter: 950 Using aqo: true AQO mode: LEARN JOINS: 0 -(10 rows) +(22 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE (SELECT avg(x) FROM t t0 WHERE t0.x = t.x) = + (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + QUERY PLAN +-------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + AQO not used + -> Seq Scan on t (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Filter: ((SubPlan 1) = (SubPlan 2)) + SubPlan 1 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=50 loops=1000) + AQO: rows=50, error=0% + Filter: (x = t.x) + Rows Removed by Filter: 950 + SubPlan 2 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0_1 (actual rows=50 loops=1000) + AQO: rows=50, error=0% + Filter: (x = t.x) + Rows Removed by Filter: 950 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(22 rows) + +-- +-- Not executed nodes +-- +SELECT * FROM + (SELECT * FROM t WHERE x < 0) AS t0 + JOIN + (SELECT * FROM t WHERE x > 20) AS t1 + USING(x); + x +--- +(0 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM + (SELECT * FROM t WHERE x < 0) AS t0 + JOIN + (SELECT * FROM t WHERE x > 20) AS t1 + USING(x); + QUERY PLAN +--------------------------------------------- + Nested Loop (actual rows=0 loops=1) + AQO: rows=1, error=100% + Join Filter: (t.x = t_1.x) + -> Seq Scan on t (actual rows=0 loops=1) + AQO: rows=1, error=100% + Filter: (x < 0) + Rows Removed by Filter: 1000 + -> Seq Scan on t t_1 (never executed) + AQO: rows=1 + Filter: (x > 20) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(13 rows) + +-- AQO need to predict total fetched tuples in a table. +-- +-- At a non-leaf node we have prediction about input tuples - is a number of +-- predicted output rows in underlying node. But for Scan nodes we don't have +-- any prediction on number of fetched tuples. +-- So, if selectivity was wrong we could make bad choice of Scan operation. +-- For example, we could choose suboptimal index. +-- Turn off statistics gathering for simple demonstration of filtering problem. +ALTER TABLE t SET (autovacuum_enabled = 'false'); +CREATE INDEX ind1 ON t(x); +SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + count +------- + 50 +(1 row) + +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) + SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + QUERY PLAN +---------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + AQO not used + -> Index Only Scan using ind1 on t (actual rows=50 loops=1) + AQO: rows=50, error=0% + Index Cond: (x < 3) + Filter: (mod(x, 3) = 1) + Rows Removed by Filter: 99 + Heap Fetches: 149 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(11 rows) + +-- Because of bad statistics we use a last created index instead of best choice. +-- Here we filter more tuples than with the ind1 index. +CREATE INDEX ind2 ON t(mod(x,3)); +SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + count +------- + 50 +(1 row) + +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) + SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + QUERY PLAN +----------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + AQO not used + -> Bitmap Heap Scan on t (actual rows=50 loops=1) + AQO: rows=50, error=0% + Recheck Cond: (mod(x, 3) = 1) + Filter: (x < 3) + Rows Removed by Filter: 300 + Heap Blocks: exact=5 + -> Bitmap Index Scan on ind2 (actual rows=350 loops=1) + AQO not used + Index Cond: (mod(x, 3) = 1) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(14 rows) + +-- Best choice is ... +ANALYZE t; +EXPLAIN (COSTS OFF) + SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + QUERY PLAN +--------------------------------------- + Aggregate + AQO not used + -> Index Only Scan using ind1 on t + AQO: rows=50 + Index Cond: (x < 3) + Filter: (mod(x, 3) = 1) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(9 rows) +DROP TABLE t,t1 CASCADE; DROP EXTENSION aqo; diff --git a/hash.c b/hash.c index f853c359..0daad6e6 100644 --- a/hash.c +++ b/hash.c @@ -19,11 +19,15 @@ * */ +#include "postgres.h" + +#include "math.h" + #include "aqo.h" +#include "hash.h" static int get_str_hash(const char *str); static int get_node_hash(Node *node); -static int get_int_array_hash(int *arr, int len); static int get_unsorted_unsafe_int_array_hash(int *arr, int len); static int get_unordered_int_list_hash(List *lst); @@ -51,35 +55,111 @@ static List **get_clause_args_ptr(Expr *clause); static bool clause_is_eq_clause(Expr *clause); /* - * Computes hash for given query. + * Computes hash for given query.Query Identifier: = * Hash is supposed to be constant-insensitive. * XXX: Hashing depend on Oids of database objects. It is restrict usability of * the AQO knowledge base by current database at current Postgres instance. */ -int +uint64 get_query_hash(Query *parse, const char *query_text) { char *str_repr; - int hash; + uint64 hash; + /* XXX: remove_locations and remove_consts are heavy routines. */ str_repr = remove_locations(remove_consts(nodeToString(parse))); - hash = DatumGetInt32(hash_any((const unsigned char *) str_repr, - strlen(str_repr) * sizeof(*str_repr))); + hash = DatumGetUInt64(hash_any_extended((void *) str_repr, strlen(str_repr),0)); pfree(str_repr); return hash; } +/********************************************************************************* + * + * Because List natively works with OID, integer and a postgres node types, + * implement separate set of functions which manages list of uint64 values + * (need for the query hash type). + * + ********************************************************************************/ + +bool +list_member_uint64(const List *list, uint64 datum) +{ + const ListCell *cell; + + foreach(cell, list) + { + if (*((uint64 *)lfirst(cell)) == datum) + return true; + } + + return false; +} + +List * +lappend_uint64(List *list, uint64 datum) +{ + uint64 *val = palloc(sizeof(uint64)); + + *val = datum; + list = lappend(list, (void *) val); + return list; +} + +List * +ldelete_uint64(List *list, uint64 datum) +{ + ListCell *cell; + + foreach(cell, list) + { + if (*((uint64 *)lfirst(cell)) == datum) + { + list = list_delete_ptr(list, lfirst(cell)); + return list; + } + } + return list; +} + +/********************************************************************************/ + +int +get_grouped_exprs_hash(int child_fss, List *group_exprs) +{ + ListCell *lc; + int *hashes = palloc(list_length(group_exprs) * sizeof(int)); + int i = 0; + int final_hashes[2]; + + /* Calculate hash of each grouping expression. */ + foreach(lc, group_exprs) + { + Node *clause = (Node *) lfirst(lc); + + hashes[i++] = get_node_hash(clause); + } + + /* Sort to get rid of expressions permutation. */ + qsort(hashes, i, sizeof(int), int_cmp); + + final_hashes[0] = child_fss; + final_hashes[1] = get_int_array_hash(hashes, i); + return get_int_array_hash(final_hashes, 2); +} + /* * For given object (clauselist, selectivities, relidslist) creates feature * subspace: * sets nfeatures * creates and computes fss_hash * transforms selectivities to features + * + * Special case for nfeatures == NULL: don't calculate features. */ int -get_fss_for_object(List *clauselist, List *selectivities, List *relidslist, - int *nfeatures, double **features) +get_fss_for_object(List *relidslist, List *clauselist, + List *selectivities, int *nfeatures, double **features) { int n; int *clause_hashes; @@ -94,7 +174,7 @@ get_fss_for_object(List *clauselist, List *selectivities, List *relidslist, int eclasses_hash; int relidslist_hash; List **args; - ListCell *l; + ListCell *lc; int i, j, k, @@ -105,20 +185,27 @@ get_fss_for_object(List *clauselist, List *selectivities, List *relidslist, n = list_length(clauselist); + /* Check parameters state invariant. */ + Assert(n == list_length(selectivities) || + (nfeatures == NULL && features == NULL)); + get_eclasses(clauselist, &nargs, &args_hash, &eclass_hash); clause_hashes = palloc(sizeof(*clause_hashes) * n); clause_has_consts = palloc(sizeof(*clause_has_consts) * n); sorted_clauses = palloc(sizeof(*sorted_clauses) * n); - *features = palloc0(sizeof(**features) * n); + + if (nfeatures != NULL) + *features = palloc0(sizeof(**features) * n); i = 0; - foreach(l, clauselist) + foreach(lc, clauselist) { - clause_hashes[i] = get_clause_hash( - ((RestrictInfo *) lfirst(l))->clause, + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); + + clause_hashes[i] = get_clause_hash(rinfo->clause, nargs, args_hash, eclass_hash); - args = get_clause_args_ptr(((RestrictInfo *) lfirst(l))->clause); + args = get_clause_args_ptr(rinfo->clause); clause_has_consts[i] = (args != NULL && has_consts(*args)); i++; } @@ -127,15 +214,26 @@ get_fss_for_object(List *clauselist, List *selectivities, List *relidslist, inverse_idx = inverse_permutation(idx, n); i = 0; - foreach(l, selectivities) + foreach(lc, clauselist) { - (*features)[inverse_idx[i]] = log(*((double *) (lfirst(l)))); - if ((*features)[inverse_idx[i]] < log_selectivity_lower_bound) - (*features)[inverse_idx[i]] = log_selectivity_lower_bound; sorted_clauses[inverse_idx[i]] = clause_hashes[i]; i++; } + i = 0; + foreach(lc, selectivities) + { + Selectivity *s = (Selectivity *) lfirst(lc); + + if (nfeatures != NULL) + { + (*features)[inverse_idx[i]] = log(*s); + if ((*features)[inverse_idx[i]] < log_selectivity_lower_bound) + (*features)[inverse_idx[i]] = log_selectivity_lower_bound; + } + i++; + } + for (i = 0; i < n;) { k = 0; @@ -146,25 +244,25 @@ get_fss_for_object(List *clauselist, List *selectivities, List *relidslist, for (j = i; j < n && sorted_clauses[j] == sorted_clauses[i]; ++j) if (clause_has_consts[idx[j]] || k + 1 == m - i) { - (*features)[j - sh] = (*features)[j]; + if (nfeatures != NULL) + (*features)[j - sh] = (*features)[j]; sorted_clauses[j - sh] = sorted_clauses[j]; } else sh++; - qsort(&((*features)[i - old_sh]), j - sh - (i - old_sh), - sizeof(**features), double_cmp); + + if (nfeatures != NULL) + qsort(&((*features)[i - old_sh]), j - sh - (i - old_sh), + sizeof(**features), double_cmp); i = j; } - *nfeatures = n - sh; - (*features) = repalloc(*features, (*nfeatures) * sizeof(**features)); - /* * Generate feature subspace hash. * XXX: Remember! that relidslist_hash isn't portable between postgres * instances. */ - clauses_hash = get_int_array_hash(sorted_clauses, *nfeatures); + clauses_hash = get_int_array_hash(sorted_clauses, n - sh); eclasses_hash = get_int_array_hash(eclass_hash, nargs); relidslist_hash = get_relidslist_hash(relidslist); fss_hash = get_fss_hash(clauses_hash, eclasses_hash, relidslist_hash); @@ -176,6 +274,12 @@ get_fss_for_object(List *clauselist, List *selectivities, List *relidslist, pfree(clause_has_consts); pfree(args_hash); pfree(eclass_hash); + + if (nfeatures != NULL) + { + *nfeatures = n - sh; + (*features) = repalloc(*features, (*nfeatures) * sizeof(**features)); + } return fss_hash; } @@ -225,7 +329,7 @@ get_str_hash(const char *str) /* * Computes hash for given node. */ -int +static int get_node_hash(Node *node) { char *str; @@ -290,7 +394,7 @@ get_unordered_int_list_hash(List *lst) * "[^]*" are replaced with substring * "". */ -char * +static char * replace_patterns(const char *str, const char *start_pattern, bool (*end_pattern) (char ch)) { @@ -345,7 +449,7 @@ get_relidslist_hash(List *relidslist) * Returns the C-string in which the substrings of kind "{CONST.*}" are * replaced with substring "{CONST}". */ -char * +static char * remove_consts(const char *str) { char *res; @@ -359,7 +463,7 @@ remove_consts(const char *str) * Returns the C-string in which the substrings of kind " :location.*}" are * replaced with substring " :location}". */ -char * +static char * remove_locations(const char *str) { return replace_patterns(str, " :location", is_brace); diff --git a/hash.h b/hash.h new file mode 100644 index 00000000..0a98814b --- /dev/null +++ b/hash.h @@ -0,0 +1,16 @@ +#ifndef AQO_HASH_H +#define AQO_HASH_H + +#include "nodes/pg_list.h" + +extern uint64 get_query_hash(Query *parse, const char *query_text); +extern bool list_member_uint64(const List *list, uint64 datum); +extern List *lappend_uint64(List *list, uint64 datum); +extern List *ldelete_uint64(List *list, uint64 datum); +extern int get_fss_for_object(List *relidslist, List *clauselist, + List *selectivities, int *nfeatures, + double **features); +extern int get_int_array_hash(int *arr, int len); +extern int get_grouped_exprs_hash(int fss, List *group_exprs); + +#endif /* AQO_HASH_H */ \ No newline at end of file diff --git a/machine_learning.c b/machine_learning.c index 9ebbae6a..a9889868 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -19,6 +19,8 @@ * */ +#include "postgres.h" + #include "aqo.h" static double fs_distance(double *a, double *b, int len); diff --git a/path_utils.c b/path_utils.c index 022dff32..e35fce41 100644 --- a/path_utils.c +++ b/path_utils.c @@ -12,8 +12,82 @@ * */ -#include "aqo.h" +#include "postgres.h" + +#include "nodes/readfuncs.h" #include "optimizer/optimizer.h" +#include "path_utils.h" + +#include "aqo.h" +#include "hash.h" + + +/* + * Hook on creation of a plan node. We need to store AQO-specific data to + * support learning stage. + */ +create_plan_hook_type prev_create_plan_hook = NULL; + +create_upper_paths_hook_type prev_create_upper_paths_hook = NULL; + +static AQOPlanNode DefaultAQOPlanNode = +{ + .node.type = T_ExtensibleNode, + .node.extnodename = AQO_PLAN_NODE, + .had_path = false, + .relids = NIL, + .clauses = NIL, + .selectivities = NIL, + .grouping_exprs = NIL, + .jointype = -1, + .parallel_divisor = -1., + .was_parametrized = false, + .fss = INT_MAX, + .prediction = -1 +}; + +static AQOPlanNode * +create_aqo_plan_node() +{ + AQOPlanNode *node = (AQOPlanNode *) newNode(sizeof(AQOPlanNode), + T_ExtensibleNode); + + memcpy(node, &DefaultAQOPlanNode, sizeof(AQOPlanNode)); + return node; +} + +AQOPlanNode * +get_aqo_plan_node(Plan *plan, bool create) +{ + AQOPlanNode *node = NULL; + ListCell *lc; + + foreach(lc, plan->private) + { + AQOPlanNode *candidate = (AQOPlanNode *) lfirst(lc); + + if (!IsA(candidate, ExtensibleNode)) + continue; + + if (strcmp(candidate->node.extnodename, AQO_PLAN_NODE) != 0) + continue; + + node = candidate; + break; + } + + if (node == NULL) + { + if (!create) + return &DefaultAQOPlanNode; + + node = create_aqo_plan_node(); + plan->private = lappend(plan->private, node); + } + + Assert(node); + return node; +} /* * Returns list of marginal selectivities using as an arguments for each clause @@ -57,15 +131,78 @@ get_list_of_relids(PlannerInfo *root, Relids relids) if (relids == NULL) return NIL; + /* + * Check: don't take into account relations without underlying plane + * source table. + */ + Assert(!bms_is_member(0, relids)); + i = -1; while ((i = bms_next_member(relids, i)) >= 0) { entry = planner_rt_fetch(i, root); - l = lappend_int(l, entry->relid); + if (OidIsValid(entry->relid)) + l = lappend_int(l, entry->relid); } return l; } +/* + * Search for any subplans or initplans. + * if subplan is found, replace it by the feature space value of this subplan. + */ +static Node * +subplan_hunter(Node *node, void *context) +{ + if (node == NULL) + /* Continue recursion in other subtrees. */ + return false; + + if (IsA(node, SubPlan)) + { + SubPlan *splan = (SubPlan *) node; + PlannerInfo *root = (PlannerInfo *) context; + PlannerInfo *subroot; + RelOptInfo *upper_rel; + A_Const *fss; + + subroot = (PlannerInfo *) list_nth(root->glob->subroots, + splan->plan_id - 1); + upper_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL); + + Assert(list_length(upper_rel->private) == 1); + Assert(IsA((Node *) linitial(upper_rel->private), A_Const)); + + fss = (A_Const *) linitial(upper_rel->private); + return (Node *) copyObject(fss); + } + return expression_tree_mutator(node, subplan_hunter, context); +} + +/* + * Get independent copy of the clauses list. + * During this operation clauses could be changed and we couldn't walk across + * this list next. + */ +List * +aqo_get_clauses(PlannerInfo *root, List *restrictlist) +{ + List *clauses = NIL; + ListCell *lc; + + foreach(lc, restrictlist) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); + + rinfo = copyObject(rinfo); + rinfo->clause = (Expr *) expression_tree_mutator((Node *) rinfo->clause, + subplan_hunter, + (void *) root); + clauses = lappend(clauses, (void *) rinfo); + } + return clauses; +} + /* * For given path returns the list of all clauses used in it. * Also returns selectivities for the clauses throw the selectivities variable. @@ -125,10 +262,21 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) return get_path_clauses(((ProjectionPath *) path)->subpath, root, selectivities); break; + case T_ProjectSetPath: + return get_path_clauses(((ProjectSetPath *) path)->subpath, root, + selectivities); + break; case T_SortPath: return get_path_clauses(((SortPath *) path)->subpath, root, selectivities); break; + case T_IncrementalSortPath: + { + IncrementalSortPath *p = (IncrementalSortPath *) path; + return get_path_clauses(p->spath.subpath, root, + selectivities); + } + break; case T_GroupPath: return get_path_clauses(((GroupPath *) path)->subpath, root, selectivities); @@ -153,16 +301,6 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) return get_path_clauses(((SetOpPath *) path)->subpath, root, selectivities); break; - case T_SubqueryScanPath: - /* - * According to the SubqueryScanPath description, we need to use - * path.parent->subroot as the planning context for interpretation - * of the subpath. - */ - return get_path_clauses(((SubqueryScanPath *) path)->subpath, - path->parent->subroot, - selectivities); - break; case T_LockRowsPath: return get_path_clauses(((LockRowsPath *) path)->subpath, root, selectivities); @@ -171,10 +309,41 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) return get_path_clauses(((LimitPath *) path)->subpath, root, selectivities); break; + case T_SubqueryScanPath: + return get_path_clauses(((SubqueryScanPath *) path)->subpath, root, + selectivities); + break; + case T_ModifyTablePath: + { + ListCell *lc; + + foreach (lc, ((ModifyTablePath *) path)->subpaths) + { + Path *subpath = lfirst(lc); + + cur = list_concat(cur, list_copy( + get_path_clauses(subpath, root, selectivities))); + cur_sel = list_concat(cur_sel, *selectivities); + } + cur = list_concat(cur, aqo_get_clauses(root, + path->parent->baserestrictinfo)); + *selectivities = list_concat(cur_sel, + get_selectivities(root, + path->parent->baserestrictinfo, + 0, JOIN_INNER, NULL)); + return cur; + } + break; + /* TODO: RecursiveUnionPath */ case T_AppendPath: + case T_MergeAppendPath: { ListCell *lc; + /* + * It isn't a safe style, but we use the only subpaths field that is + * the first at both Append and MergeAppend nodes. + */ foreach (lc, ((AppendPath *) path)->subpaths) { Path *subpath = lfirst(lc); @@ -183,7 +352,8 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) get_path_clauses(subpath, root, selectivities))); cur_sel = list_concat(cur_sel, *selectivities); } - cur = list_concat(cur, list_copy(path->parent->baserestrictinfo)); + cur = list_concat(cur, aqo_get_clauses(root, + path->parent->baserestrictinfo)); *selectivities = list_concat(cur_sel, get_selectivities(root, path->parent->baserestrictinfo, @@ -194,9 +364,12 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) case T_ForeignPath: /* The same as in the default case */ default: - cur = list_concat(list_copy(path->parent->baserestrictinfo), + cur = list_concat(aqo_get_clauses(root, + path->parent->baserestrictinfo), path->param_info ? - list_copy(path->param_info->ppi_clauses) : NIL); + aqo_get_clauses(root, + path->param_info->ppi_clauses) : + NIL); if (path->param_info) cur_sel = get_selectivities(root, cur, path->parent->relid, JOIN_INNER, NULL); @@ -207,3 +380,281 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) break; } } + +/* + * Some of paths are kind of utility path. I mean, It isn't corresponding to + * specific RelOptInfo node. So, it should be omitted in process of clauses + * gathering to avoid duplication of the same clauses. + * XXX: only a dump plug implemented for now. + */ +static bool +is_appropriate_path(Path *path) +{ + bool appropriate = true; + + switch (path->type) + { + case T_SortPath: + case T_IncrementalSortPath: + case T_GatherPath: + case T_GatherMergePath: + appropriate = false; + break; + default: + break; + } + + return appropriate; +} + +/* + * Converts path info into plan node for collecting it after query execution. + */ +void +aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) +{ + bool is_join_path; + Plan *plan = *dest; + AQOPlanNode *node; + + if (prev_create_plan_hook) + prev_create_plan_hook(root, src, dest); + + if (!query_context.use_aqo && !query_context.learn_aqo) + return; + + is_join_path = (src->type == T_NestPath || src->type == T_MergePath || + src->type == T_HashPath); + + node = get_aqo_plan_node(plan, true); + + if (node->had_path) + { + /* + * The convention is that any extension that sets had_path is also + * responsible for setting path_clauses, path_jointype, path_relids, + * path_parallel_workers, and was_parameterized. + */ + return; + } + + if (is_join_path) + { + node->clauses = aqo_get_clauses(root, ((JoinPath *) src)->joinrestrictinfo); + node->jointype = ((JoinPath *) src)->jointype; + } + else if (IsA(src, AggPath)) + /* Aggregation node must store grouping clauses. */ + { + AggPath *ap = (AggPath *) src; + + /* Get TLE's from child target list corresponding to the list of exprs. */ + List *groupExprs = get_sortgrouplist_exprs(ap->groupClause, + (*dest)->lefttree->targetlist); + /* Copy bare expressions for further AQO learning case. */ + node->grouping_exprs = copyObject(groupExprs); + node->relids = get_list_of_relids(root, ap->subpath->parent->relids); + node->jointype = JOIN_INNER; + } + else if (is_appropriate_path(src)) + { + node->clauses = list_concat( + aqo_get_clauses(root, src->parent->baserestrictinfo), + src->param_info ? aqo_get_clauses(root, src->param_info->ppi_clauses) : NIL); + node->jointype = JOIN_INNER; + } + + node->relids = list_concat(node->relids, + get_list_of_relids(root, src->parent->relids)); + + if (src->parallel_workers > 0) + node->parallel_divisor = get_parallel_divisor(src); + node->was_parametrized = (src->param_info != NULL); + + if (src->param_info) + { + node->prediction = src->param_info->predicted_ppi_rows; + node->fss = src->param_info->fss_ppi_hash; + } + else + { + node->prediction = src->parent->predicted_cardinality; + node->fss = src->parent->fss_hash; + } + + node->had_path = true; +} + +static void +AQOnodeCopy(struct ExtensibleNode *enew, const struct ExtensibleNode *eold) +{ + AQOPlanNode *new = (AQOPlanNode *) enew; + AQOPlanNode *old = (AQOPlanNode *) eold; + + Assert(IsA(old, ExtensibleNode)); + Assert(strcmp(old->node.extnodename, AQO_PLAN_NODE) == 0); + + /* Copy static fields in one command */ + memcpy(new, old, sizeof(AQOPlanNode)); + + /* These lists couldn't contain AQO nodes. Use basic machinery */ + new->relids = copyObject(old->relids); + new->clauses = copyObject(old->clauses); + new->grouping_exprs = copyObject(old->grouping_exprs); + new->selectivities = copyObject(old->selectivities); + enew = (ExtensibleNode *) new; +} + +static bool +AQOnodeEqual(const struct ExtensibleNode *a, const struct ExtensibleNode *b) +{ + return false; +} + +#define WRITE_INT_FIELD(fldname) \ + appendStringInfo(str, " :" CppAsString(fldname) " %d", node->fldname) + +/* Write a boolean field */ +#define WRITE_BOOL_FIELD(fldname) \ + appendStringInfo(str, " :" CppAsString(fldname) " %s", \ + booltostr(node->fldname)) + +#define WRITE_NODE_FIELD(fldname) \ + (appendStringInfoString(str, " :" CppAsString(fldname) " "), \ + outNode(str, node->fldname)) + +/* Write an enumerated-type field as an integer code */ +#define WRITE_ENUM_FIELD(fldname, enumtype) \ + appendStringInfo(str, " :" CppAsString(fldname) " %d", \ + (int) node->fldname) + +/* Write a float field --- caller must give format to define precision */ +#define WRITE_FLOAT_FIELD(fldname,format) \ + appendStringInfo(str, " :" CppAsString(fldname) " " format, node->fldname) + +static void +AQOnodeOut(struct StringInfoData *str, const struct ExtensibleNode *enode) +{ + AQOPlanNode *node = (AQOPlanNode *) enode; + + Assert(0); + WRITE_BOOL_FIELD(had_path); + WRITE_NODE_FIELD(relids); + WRITE_NODE_FIELD(clauses); + WRITE_NODE_FIELD(selectivities); + WRITE_NODE_FIELD(grouping_exprs); + + WRITE_ENUM_FIELD(jointype, JoinType); + WRITE_FLOAT_FIELD(parallel_divisor, "%.5f"); + WRITE_BOOL_FIELD(was_parametrized); + + /* For Adaptive optimization DEBUG purposes */ + WRITE_INT_FIELD(fss); + WRITE_FLOAT_FIELD(prediction, "%.0f"); +} + +/* Read an integer field (anything written as ":fldname %d") */ +#define READ_INT_FIELD(fldname) \ + token = pg_strtok(&length); /* skip :fldname */ \ + token = pg_strtok(&length); /* get field value */ \ + local_node->fldname = atoi(token) + +/* Read an enumerated-type field that was written as an integer code */ +#define READ_ENUM_FIELD(fldname, enumtype) \ + token = pg_strtok(&length); /* skip :fldname */ \ + token = pg_strtok(&length); /* get field value */ \ + local_node->fldname = (enumtype) atoi(token) + +/* Read a float field */ +#define READ_FLOAT_FIELD(fldname) \ + token = pg_strtok(&length); /* skip :fldname */ \ + token = pg_strtok(&length); /* get field value */ \ + local_node->fldname = atof(token) + +/* Read a boolean field */ +#define READ_BOOL_FIELD(fldname) \ + token = pg_strtok(&length); /* skip :fldname */ \ + token = pg_strtok(&length); /* get field value */ \ + local_node->fldname = strtobool(token) + +/* Read a Node field */ +#define READ_NODE_FIELD(fldname) \ + token = pg_strtok(&length); /* skip :fldname */ \ + (void) token; /* in case not used elsewhere */ \ + local_node->fldname = nodeRead(NULL, 0) + +static void +AQOnodeRead(struct ExtensibleNode *enode) +{ + AQOPlanNode *local_node = (AQOPlanNode *) enode; + const char *token; + int length; + + Assert(0); + READ_BOOL_FIELD(had_path); + READ_NODE_FIELD(relids); + READ_NODE_FIELD(clauses); + READ_NODE_FIELD(selectivities); + READ_NODE_FIELD(grouping_exprs); + + READ_ENUM_FIELD(jointype, JoinType); + READ_FLOAT_FIELD(parallel_divisor); + READ_BOOL_FIELD(was_parametrized); + + /* For Adaptive optimization DEBUG purposes */ + READ_INT_FIELD(fss); + READ_FLOAT_FIELD(prediction); +} + +static const ExtensibleNodeMethods method = +{ + .extnodename = AQO_PLAN_NODE, + .node_size = sizeof(AQOPlanNode), + .nodeCopy = AQOnodeCopy, + .nodeEqual = AQOnodeEqual, + .nodeOut = AQOnodeOut, + .nodeRead = AQOnodeRead +}; + +void +RegisterAQOPlanNodeMethods(void) +{ + RegisterExtensibleNodeMethods(&method); +} + +/* + * Hook for create_upper_paths_hook + * + * Assume, that we are last in the chain of path creators. + */ +void +aqo_store_upper_signature_hook(PlannerInfo *root, + UpperRelationKind stage, + RelOptInfo *input_rel, + RelOptInfo *output_rel, + void *extra) +{ + A_Const *fss_node = makeNode(A_Const); + List *relids; + List *clauses; + List *selectivities; + + if (prev_create_upper_paths_hook) + (*prev_create_upper_paths_hook)(root, stage, input_rel, output_rel, extra); + + if (!query_context.use_aqo && !query_context.learn_aqo && !force_collect_stat) + /* Includes 'disabled query' state. */ + return; + + if (stage != UPPERREL_FINAL) + return; + + set_cheapest(input_rel); + clauses = get_path_clauses(input_rel->cheapest_total_path, + root, &selectivities); + relids = get_list_of_relids(root, input_rel->relids); + fss_node->val.type = T_Integer; + fss_node->location = -1; + fss_node->val.val.ival = get_fss_for_object(relids, clauses, NIL, NULL, NULL); + output_rel->private = lappend(output_rel->private, (void *) fss_node); +} diff --git a/path_utils.h b/path_utils.h new file mode 100644 index 00000000..5ee4bba5 --- /dev/null +++ b/path_utils.h @@ -0,0 +1,68 @@ +#ifndef PATH_UTILS_H +#define PATH_UTILS_H + +#include "nodes/extensible.h" +#include "nodes/pathnodes.h" +#include "optimizer/planmain.h" +#include "optimizer/planner.h" + +#define AQO_PLAN_NODE "AQOPlanNode" + +/* + * information for adaptive query optimization + */ +typedef struct AQOPlanNode +{ + ExtensibleNode node; + bool had_path; + List *relids; + List *clauses; + List *selectivities; + + /* Grouping expressions from a target list. */ + List *grouping_exprs; + + JoinType jointype; + double parallel_divisor; + bool was_parametrized; + + /* For Adaptive optimization DEBUG purposes */ + int fss; + double prediction; +} AQOPlanNode; + + +#define strtobool(x) ((*(x) == 't') ? true : false) + +#define nullable_string(token,length) \ + ((length) == 0 ? NULL : debackslash(token, length)) + +#define booltostr(x) ((x) ? "true" : "false") + +extern create_plan_hook_type prev_create_plan_hook; + +/* Extracting path information utilities */ +extern List *get_selectivities(PlannerInfo *root, + List *clauses, + int varRelid, + JoinType jointype, + SpecialJoinInfo *sjinfo); +extern List *get_list_of_relids(PlannerInfo *root, Relids relids); + +extern List *get_path_clauses(Path *path, + PlannerInfo *root, + List **selectivities); + +extern void aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest); +extern AQOPlanNode *get_aqo_plan_node(Plan *plan, bool create); +extern void RegisterAQOPlanNodeMethods(void); + +extern create_upper_paths_hook_type prev_create_upper_paths_hook; +extern void aqo_store_upper_signature_hook(PlannerInfo *root, + UpperRelationKind stage, + RelOptInfo *input_rel, + RelOptInfo *output_rel, + void *extra); +extern List *aqo_get_clauses(PlannerInfo *root, List *restrictlist); + +#endif /* PATH_UTILS_H */ diff --git a/postprocessing.c b/postprocessing.c index 05e92737..6c2b0b82 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -16,13 +16,18 @@ * */ -#include "aqo.h" +#include "postgres.h" #include "access/parallel.h" #include "optimizer/optimizer.h" #include "postgres_fdw.h" #include "utils/queryenvironment.h" +#include "aqo.h" +#include "hash.h" +#include "path_utils.h" +#include "preprocessing.h" + typedef struct { @@ -35,7 +40,12 @@ typedef struct static double cardinality_sum_errors; static int cardinality_num_objects; -/* It is needed to recognize stored Query-related aqo data in the query +/* + * Store an AQO-related query data into the Query Environment structure. + * + * It is very sad that we have to use such unsuitable field, but alternative is + * to introduce a private field in a PlannedStmt struct. + * It is needed to recognize stored Query-related aqo data in the query * environment field. */ static char *AQOPrivateData = "AQOPrivateData"; @@ -43,14 +53,17 @@ static char *PlanStateInfo = "PlanStateInfo"; /* Query execution statistics collecting utilities */ -static void atomic_fss_learn_step(int fhash, int fss_hash, int ncols, +static void atomic_fss_learn_step(uint64 fhash, int fss_hash, int ncols, double **matrix, double *targets, - double *features, double target); + double *features, double target, + List *relids); +static bool learnOnPlanState(PlanState *p, void *context); static void learn_sample(List *clauselist, List *selectivities, List *relidslist, double true_cardinality, - Plan *plan); + Plan *plan, + bool notExecuted); static List *restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, @@ -65,7 +78,6 @@ static void update_query_stat_row(double *et, int *et_size, static void StoreToQueryEnv(QueryDesc *queryDesc); static void StorePlanInternals(QueryDesc *queryDesc); static bool ExtractFromQueryEnv(QueryDesc *queryDesc); -static void RemoveFromQueryEnv(QueryDesc *queryDesc); /* @@ -74,34 +86,68 @@ static void RemoveFromQueryEnv(QueryDesc *queryDesc); * matrix and targets are just preallocated memory for computations. */ static void -atomic_fss_learn_step(int fhash, int fss_hash, int ncols, +atomic_fss_learn_step(uint64 fhash, int fss_hash, int ncols, double **matrix, double *targets, - double *features, double target) + double *features, double target, + List *relids) { LOCKTAG tag; int nrows; - init_lock_tag(&tag, (uint32) fhash, (uint32) fss_hash); + init_lock_tag(&tag, (uint32) fhash, fss_hash); LockAcquire(&tag, ExclusiveLock, false, false); - if (!load_fss(fhash, fss_hash, ncols, matrix, targets, &nrows)) + if (!load_fss(fhash, fss_hash, ncols, matrix, targets, &nrows, NULL)) nrows = 0; nrows = OkNNr_learn(nrows, ncols, matrix, targets, features, target); - update_fss(fhash, fss_hash, nrows, ncols, matrix, targets); + update_fss(fhash, fss_hash, nrows, ncols, matrix, targets, relids); LockRelease(&tag, ExclusiveLock, false); } +static void +learn_agg_sample(List *clauselist, List *selectivities, List *relidslist, + double true_cardinality, Plan *plan, bool notExecuted) +{ + uint64 fhash = query_context.fspace_hash; + int child_fss; + int fss; + double target; + double *matrix[aqo_K]; + double targets[aqo_K]; + AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); + int i; + + /* + * Learn 'not executed' nodes only once, if no one another knowledge exists + * for current feature subspace. + */ + if (notExecuted && aqo_node->prediction > 0) + return; + + target = log(true_cardinality); + child_fss = get_fss_for_object(relidslist, clauselist, NIL, NULL, NULL); + fss = get_grouped_exprs_hash(child_fss, aqo_node->grouping_exprs); + + for (i = 0; i < aqo_K; i++) + matrix[i] = NULL; + /* Critical section */ + atomic_fss_learn_step(fhash, fss, + 0, matrix, targets, NULL, target, + relidslist); + /* End of critical section */ +} + /* * For given object (i. e. clauselist, selectivities, relidslist, predicted and * true cardinalities) performs learning procedure. */ static void learn_sample(List *clauselist, List *selectivities, List *relidslist, - double true_cardinality, Plan *plan) + double true_cardinality, Plan *plan, bool notExecuted) { - int fhash = query_context.fspace_hash; + uint64 fhash = query_context.fspace_hash; int fss_hash; int nfeatures; double *matrix[aqo_K]; @@ -109,10 +155,21 @@ learn_sample(List *clauselist, List *selectivities, List *relidslist, double *features; double target; int i; + AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); target = log(true_cardinality); - fss_hash = get_fss_for_object(clauselist, selectivities, relidslist, - &nfeatures, &features); + fss_hash = get_fss_for_object(relidslist, clauselist, + selectivities, &nfeatures, &features); + + /* Only Agg nodes can have non-empty a grouping expressions list. */ + Assert(!IsA(plan, Agg) || aqo_node->grouping_exprs != NIL); + + /* + * Learn 'not executed' nodes only once, if no one another knowledge exists + * for current feature subspace. + */ + if (notExecuted && aqo_node->prediction > 0) + return; if (nfeatures > 0) for (i = 0; i < aqo_K; ++i) @@ -120,7 +177,8 @@ learn_sample(List *clauselist, List *selectivities, List *relidslist, /* Critical section */ atomic_fss_learn_step(fhash, fss_hash, - nfeatures, matrix, targets, features, target); + nfeatures, matrix, targets, features, target, + relidslist); /* End of critical section */ if (nfeatures > 0) @@ -194,22 +252,63 @@ restore_selectivities(List *clauselist, return lst; } +static bool +IsParallelTuplesProcessing(const Plan *plan, bool IsParallel) +{ + if (IsParallel && (plan->parallel_aware || nodeTag(plan) == T_HashJoin || + nodeTag(plan) == T_MergeJoin || nodeTag(plan) == T_NestLoop)) + return true; + return false; +} + /* - * Check for the nodes that never executed. If at least one node exists in the - * plan than actual rows of any another node can be false. - * Suppress such knowledge because it can worsen the query execution time. + * learn_subplan_recurse + * + * Emphasise recursion operation into separate function because of increasing + * complexity of this logic. */ static bool -HasNeverExecutedNodes(PlanState *ps, void *context) +learn_subplan_recurse(PlanState *p, aqo_obj_stat *ctx) { - Assert(context == NULL); + List *saved_subplan_list = NIL; + List *saved_initplan_list = NIL; + ListCell *lc; + + if (!p->instrument) + return true; + InstrEndLoop(p->instrument); - InstrEndLoop(ps->instrument); - if (ps->instrument == NULL || ps->instrument->nloops == 0) + saved_subplan_list = p->subPlan; + saved_initplan_list = p->initPlan; + p->subPlan = NIL; + p->initPlan = NIL; + + if (planstate_tree_walker(p, learnOnPlanState, (void *) ctx)) return true; - return planstate_tree_walker(ps, HasNeverExecutedNodes, NULL); + foreach(lc, saved_subplan_list) + { + SubPlanState *sps = lfirst_node(SubPlanState, lc); + aqo_obj_stat SPCtx = {NIL, NIL, NIL, ctx->learn}; + + if (learnOnPlanState(sps->planstate, (void *) &SPCtx)) + return true; + } + + foreach(lc, saved_initplan_list) + { + SubPlanState *sps = lfirst_node(SubPlanState, lc); + aqo_obj_stat SPCtx = {NIL, NIL, NIL, ctx->learn}; + + if (learnOnPlanState(sps->planstate, (void *) &SPCtx)) + return true; + } + + p->subPlan = saved_subplan_list; + p->initPlan = saved_initplan_list; + return false; } + /* * Walks over obtained PlanState tree, collects relation objects with their * clauses, selectivities and relids and passes each object to learn_sample. @@ -217,7 +316,7 @@ HasNeverExecutedNodes(PlanState *ps, void *context) * Returns clauselist, selectivities and relids. * Store observed subPlans into other_plans list. * - * We use list_copy() of p->plan->path_clauses and p->plan->path_relids + * We use list_copy() of AQOPlanNode->clauses and AQOPlanNode->relids * because the plan may be stored in the cache after this. Operation * list_concat() changes input lists and may destruct cached plan. */ @@ -228,16 +327,27 @@ learnOnPlanState(PlanState *p, void *context) aqo_obj_stat SubplanCtx = {NIL, NIL, NIL, ctx->learn}; double predicted = 0.; double learn_rows = 0.; + AQOPlanNode *aqo_node; + bool notExecuted = false; - if (!p->instrument) + /* Recurse into subtree and collect clauses. */ + if (learn_subplan_recurse(p, &SubplanCtx)) + /* If something goes wrong, return quckly. */ return true; - planstate_tree_walker(p, learnOnPlanState, (void *) &SubplanCtx); + aqo_node = get_aqo_plan_node(p->plan, false); + /* + * Compute real value of rows, passed through this node. Summarize rows + * for parallel workers. + * If 'never executed' node will be found - set specific sign, because we + * allow to learn on such node only once. + */ if (p->instrument->nloops > 0.) { /* If we can strongly calculate produced rows, do it. */ - if (p->worker_instrument && IsParallelTuplesProcessing(p->plan)) + if (p->worker_instrument && + IsParallelTuplesProcessing(p->plan, aqo_node->parallel_divisor > 0)) { double wnloops = 0.; double wntuples = 0.; @@ -267,6 +377,12 @@ learnOnPlanState(PlanState *p, void *context) * to calculate produced rows. */ learn_rows = p->instrument->ntuples / p->instrument->nloops; } + else + { + /* The case of 'not executed' node. */ + learn_rows = 1.; + notExecuted = true; + } /* * Calculate predicted cardinality. @@ -274,18 +390,17 @@ learnOnPlanState(PlanState *p, void *context) * reusing plan caused by the rewriting procedure. * Also it may be caused by using of a generic plan. */ - if (p->plan->predicted_cardinality > 0. && query_context.use_aqo) + if (aqo_node->prediction > 0. && query_context.use_aqo) { /* AQO made prediction. use it. */ - predicted = p->plan->predicted_cardinality; + predicted = aqo_node->prediction; } - else if (IsParallelTuplesProcessing(p->plan)) + else if (IsParallelTuplesProcessing(p->plan, aqo_node->parallel_divisor > 0)) /* * AQO didn't make a prediction and we need to calculate real number * of tuples passed because of parallel workers. */ - predicted = p->plan->plan_rows * - get_parallel_divisor(p->plan->path_parallel_workers); + predicted = p->plan->plan_rows * aqo_node->parallel_divisor; else /* No AQO prediction. Parallel workers not used for this plan node. */ predicted = p->plan->plan_rows; @@ -305,61 +420,69 @@ learnOnPlanState(PlanState *p, void *context) else if (!ctx->learn) return true; - /* It is needed for correct exp(result) calculation. */ + /* + * Need learn. + */ + + /* + * It is needed for correct exp(result) calculation. + * Do it before cardinality error estimation because we can predict no less + * than 1 tuple, but get zero tuples. + */ predicted = clamp_row_est(predicted); learn_rows = clamp_row_est(learn_rows); + /* Exclude "not executed" nodes from error calculation to reduce fluctuations. */ + if (!notExecuted) + { + cardinality_sum_errors += fabs(predicted - learn_rows); + cardinality_num_objects += 1; + } + /* * Some nodes inserts after planning step (See T_Hash node type). * In this case we have'nt AQO prediction and fss record. */ - if (p->plan->had_path) + if (aqo_node->had_path) { List *cur_selectivities; - cur_selectivities = restore_selectivities(p->plan->path_clauses, - p->plan->path_relids, - p->plan->path_jointype, - p->plan->was_parametrized); + cur_selectivities = restore_selectivities(aqo_node->clauses, + aqo_node->relids, + aqo_node->jointype, + aqo_node->was_parametrized); SubplanCtx.selectivities = list_concat(SubplanCtx.selectivities, cur_selectivities); SubplanCtx.clauselist = list_concat(SubplanCtx.clauselist, - list_copy(p->plan->path_clauses)); + list_copy(aqo_node->clauses)); - if (p->plan->path_relids != NIL) + if (aqo_node->relids != NIL) + { /* - * This plan can be stored as cached plan. In the case we will have + * This plan can be stored as a cached plan. In the case we will have * bogus path_relids field (changed by list_concat routine) at the * next usage (and aqo-learn) of this plan. */ - ctx->relidslist = list_copy(p->plan->path_relids); + ctx->relidslist = list_copy(aqo_node->relids); - if (p->instrument && (p->righttree != NULL || p->lefttree == NULL || - p->plan->path_clauses != NIL || - IsA(p, ForeignScanState) || - IsA(p, AppendState) || IsA(p, MergeAppendState))) - { - if (p->instrument->nloops <= 0.) + if (p->instrument) { - /* - * LAV: I found two cases for this code: - * 1. if query returns with error. - * 2. plan node has never visited. In this case we can not teach - * AQO because ntuples value is equal to 0 and we will got - * learn rows == 1. It is false knowledge: at another place of - * a plan, scanning of the node may produce many tuples. - * Both cases can't be used to learning AQO because give an - * incorrect number of rows. - */ - elog(PANIC, "AQO: impossible situation"); + Assert(predicted >= 1. && learn_rows >= 1.); + + if (ctx->learn) + { + if (IsA(p, AggState)) + learn_agg_sample(SubplanCtx.clauselist, NULL, + aqo_node->relids, learn_rows, + p->plan, notExecuted); + + else + learn_sample(SubplanCtx.clauselist, + SubplanCtx.selectivities, + aqo_node->relids, learn_rows, + p->plan, notExecuted); + } } - - Assert(predicted >= 1 && learn_rows >= 1); - - if (ctx->learn) - learn_sample(SubplanCtx.clauselist, SubplanCtx.selectivities, - p->plan->path_relids, learn_rows, - p->plan); } } @@ -409,7 +532,7 @@ update_query_stat_row(double *et, int *et_size, pt[i - 1] = pt[i]; *pt_size = (*pt_size >= aqo_stat_size) ? aqo_stat_size : (*pt_size + 1); - pt[*pt_size - 1] = planning_time; + pt[*pt_size - 1] = planning_time; /* Just remember: planning time can be negative. */ (*n_exec)++; } @@ -425,18 +548,44 @@ update_query_stat_row(double *et, int *et_size, void aqo_ExecutorStart(QueryDesc *queryDesc, int eflags) { - instr_time current_time; + instr_time now; bool use_aqo; - use_aqo = !IsParallelWorker() && (query_context.use_aqo || - query_context.learn_aqo || - force_collect_stat); + /* + * If the plan pulled from a plan cache, planning don't needed. Restore + * query context from the query environment. + */ + if (ExtractFromQueryEnv(queryDesc)) + Assert(INSTR_TIME_IS_ZERO(query_context.start_planning_time)); + + use_aqo = !IsQueryDisabled() && !IsParallelWorker() && + (query_context.use_aqo || query_context.learn_aqo || + force_collect_stat); if (use_aqo) { - INSTR_TIME_SET_CURRENT(current_time); - INSTR_TIME_SUBTRACT(current_time, query_context.query_starttime); - query_context.query_planning_time = INSTR_TIME_GET_DOUBLE(current_time); + if (!INSTR_TIME_IS_ZERO(query_context.start_planning_time)) + { + INSTR_TIME_SET_CURRENT(now); + INSTR_TIME_SUBTRACT(now, query_context.start_planning_time); + query_context.planning_time = INSTR_TIME_GET_DOUBLE(now); + } + else + /* + * Should set anyway. It will be stored in a query env. The query + * can be reused later by extracting from a plan cache. + */ + query_context.planning_time = -1; + + /* + * To zero this timestamp preventing a false time calculation in the + * case, when the plan was got from a plan cache. + */ + INSTR_TIME_SET_ZERO(query_context.start_planning_time); + + /* Make a timestamp for execution stage. */ + INSTR_TIME_SET_CURRENT(now); + query_context.start_execution_time = now; query_context.explain_only = ((eflags & EXEC_FLAG_EXPLAIN_ONLY) != 0); @@ -453,7 +602,6 @@ aqo_ExecutorStart(QueryDesc *queryDesc, int eflags) else standard_ExecutorStart(queryDesc, eflags); - /* Plan state has initialized */ if (use_aqo) StorePlanInternals(queryDesc); } @@ -466,7 +614,7 @@ aqo_ExecutorStart(QueryDesc *queryDesc, int eflags) void aqo_ExecutorEnd(QueryDesc *queryDesc) { - double totaltime; + double execution_time; double cardinality_error; QueryStat *stat = NULL; instr_time endtime; @@ -487,6 +635,7 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) njoins = (enr != NULL) ? *(int *) enr->reldata : -1; + Assert(!IsQueryDisabled()); Assert(!IsParallelWorker()); if (query_context.explain_only) @@ -495,8 +644,7 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) query_context.collect_stat = false; } - if ((query_context.learn_aqo && - !HasNeverExecutedNodes(queryDesc->planstate, NULL)) || + if (query_context.learn_aqo || (!query_context.learn_aqo && query_context.collect_stat)) { aqo_obj_stat ctx = {NIL, NIL, NIL, query_context.learn_aqo}; @@ -510,22 +658,24 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) list_free(ctx.selectivities); } - /* Prevent concurrent updates. */ - init_lock_tag(&tag, (uint32) query_context.query_hash, - (uint32) query_context.fspace_hash); - LockAcquire(&tag, ExclusiveLock, false, false); - if (query_context.collect_stat) + stat = get_aqo_stat(query_context.query_hash); + { + /* Calculate execution time. */ INSTR_TIME_SET_CURRENT(endtime); - INSTR_TIME_SUBTRACT(endtime, query_context.query_starttime); - totaltime = INSTR_TIME_GET_DOUBLE(endtime); + INSTR_TIME_SUBTRACT(endtime, query_context.start_execution_time); + execution_time = INSTR_TIME_GET_DOUBLE(endtime); + if (cardinality_num_objects > 0) cardinality_error = cardinality_sum_errors / cardinality_num_objects; else cardinality_error = -1; - - stat = get_aqo_stat(query_context.query_hash); + Assert(query_context.query_hash>=0); + /* Prevent concurrent updates. */ + init_lock_tag(&tag, (uint32) query_context.query_hash,//my code + (uint32) query_context.fspace_hash);//possible here + LockAcquire(&tag, ExclusiveLock, false, false); if (stat != NULL) { @@ -538,8 +688,8 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) &stat->planning_time_with_aqo_size, stat->cardinality_error_with_aqo, &stat->cardinality_error_with_aqo_size, - query_context.query_planning_time, - totaltime - query_context.query_planning_time, + query_context.planning_time, + execution_time, cardinality_error, &stat->executions_with_aqo); else @@ -550,33 +700,27 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) &stat->planning_time_without_aqo_size, stat->cardinality_error_without_aqo, &stat->cardinality_error_without_aqo_size, - query_context.query_planning_time, - totaltime - query_context.query_planning_time, + query_context.planning_time, + execution_time, cardinality_error, &stat->executions_without_aqo); - } - } - selectivity_cache_clear(); - - /* - * Store all learn data into the AQO service relations. - */ - if ((query_context.collect_stat) && (stat != NULL)) - { - if (!query_context.adding_query && query_context.auto_tuning) - automatical_query_tuning(query_context.query_hash, stat); - /* Write AQO statistics to the aqo_query_stat table */ - update_aqo_stat(query_context.fspace_hash, stat); - pfree_query_stat(stat); - } + /* Store all learn data into the AQO service relations. */ + Assert(query_context.query_hash>=0); + if (!query_context.adding_query && query_context.auto_tuning) + automatical_query_tuning(query_context.query_hash, stat); - /* Allow concurrent queries to update this feature space. */ - LockRelease(&tag, ExclusiveLock, false); + /* Write AQO statistics to the aqo_query_stat table */ + update_aqo_stat(query_context.fspace_hash, stat); + pfree_query_stat(stat); + } - cur_classes = list_delete_int(cur_classes, query_context.query_hash); + /* Allow concurrent queries to update this feature space. */ + LockRelease(&tag, ExclusiveLock, false); + } - RemoveFromQueryEnv(queryDesc); + selectivity_cache_clear(); + cur_classes = ldelete_uint64(cur_classes, query_context.query_hash); end: if (prev_ExecutorEnd_hook) @@ -591,67 +735,11 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) } /* - * Converts path info into plan node for collecting it after query execution. - */ -void -aqo_copy_generic_path_info(PlannerInfo *root, Plan *dest, Path *src) -{ - bool is_join_path; - - if (prev_copy_generic_path_info_hook) - prev_copy_generic_path_info_hook(root, dest, src); - - is_join_path = (src->type == T_NestPath || src->type == T_MergePath || - src->type == T_HashPath); - - if (dest->had_path) - { - /* - * The convention is that any extension that sets had_path is also - * responsible for setting path_clauses, path_jointype, path_relids, - * path_parallel_workers, and was_parameterized. - */ - Assert(dest->path_clauses && dest->path_jointype && - dest->path_relids && dest->path_parallel_workers); - return; - } - - if (is_join_path) - { - dest->path_clauses = ((JoinPath *) src)->joinrestrictinfo; - dest->path_jointype = ((JoinPath *) src)->jointype; - } - else - { - dest->path_clauses = list_concat( - list_copy(src->parent->baserestrictinfo), - src->param_info ? src->param_info->ppi_clauses : NIL); - dest->path_jointype = JOIN_INNER; - } - - dest->path_relids = list_concat(dest->path_relids, - get_list_of_relids(root, src->parent->relids)); - dest->path_parallel_workers = src->parallel_workers; - dest->was_parametrized = (src->param_info != NULL); - - if (src->param_info) - { - dest->predicted_cardinality = src->param_info->predicted_ppi_rows; - dest->fss_hash = src->param_info->fss_ppi_hash; - } - else - { - dest->predicted_cardinality = src->parent->predicted_cardinality; - dest->fss_hash = src->parent->fss_hash; - } - - dest->had_path = true; -} - -/* - * Store into query environment field AQO data related to the query. + * Store into a query environment field an AQO data related to the query. * We introduce this machinery to avoid problems with subqueries, induced by * top-level query. + * If such enr exists, routine will replace it with current value of the + * query context. */ static void StoreToQueryEnv(QueryDesc *queryDesc) @@ -659,22 +747,32 @@ StoreToQueryEnv(QueryDesc *queryDesc) EphemeralNamedRelation enr; int qcsize = sizeof(QueryContextData); MemoryContext oldCxt; + bool newentry = false; + + oldCxt = MemoryContextSwitchTo(GetMemoryChunkContext(queryDesc->plannedstmt)); - oldCxt = MemoryContextSwitchTo(AQOMemoryContext); - enr = palloc0(sizeof(EphemeralNamedRelationData)); if (queryDesc->queryEnv == NULL) - queryDesc->queryEnv = create_queryEnv(); + queryDesc->queryEnv = create_queryEnv(); + + enr = get_ENR(queryDesc->queryEnv, AQOPrivateData); + if (enr == NULL) + { + /* If such query environment don't exists, allocate new. */ + enr = palloc0(sizeof(EphemeralNamedRelationData)); + newentry = true; + } enr->md.name = AQOPrivateData; enr->md.enrtuples = 0; enr->md.enrtype = 0; enr->md.reliddesc = InvalidOid; enr->md.tupdesc = NULL; - enr->reldata = palloc0(qcsize); memcpy(enr->reldata, &query_context, qcsize); - register_ENR(queryDesc->queryEnv, enr); + if (newentry) + register_ENR(queryDesc->queryEnv, enr); + MemoryContextSwitchTo(oldCxt); } @@ -698,15 +796,24 @@ StorePlanInternals(QueryDesc *queryDesc) { EphemeralNamedRelation enr; MemoryContext oldCxt; + bool newentry = false; njoins = 0; planstate_tree_walker(queryDesc->planstate, calculateJoinNum, &njoins); - oldCxt = MemoryContextSwitchTo(AQOMemoryContext); - enr = palloc0(sizeof(EphemeralNamedRelationData)); + oldCxt = MemoryContextSwitchTo(GetMemoryChunkContext(queryDesc->plannedstmt)); + if (queryDesc->queryEnv == NULL) queryDesc->queryEnv = create_queryEnv(); + enr = get_ENR(queryDesc->queryEnv, PlanStateInfo); + if (enr == NULL) + { + /* If such query environment field doesn't exist, allocate new. */ + enr = palloc0(sizeof(EphemeralNamedRelationData)); + newentry = true; + } + enr->md.name = PlanStateInfo; enr->md.enrtuples = 0; enr->md.enrtype = 0; @@ -714,7 +821,10 @@ StorePlanInternals(QueryDesc *queryDesc) enr->md.tupdesc = NULL; enr->reldata = palloc0(sizeof(int)); memcpy(enr->reldata, &njoins, sizeof(int)); - register_ENR(queryDesc->queryEnv, enr); + + if (newentry) + register_ENR(queryDesc->queryEnv, enr); + MemoryContextSwitchTo(oldCxt); } @@ -744,33 +854,35 @@ ExtractFromQueryEnv(QueryDesc *queryDesc) return true; } -static void -RemoveFromQueryEnv(QueryDesc *queryDesc) -{ - EphemeralNamedRelation enr = get_ENR(queryDesc->queryEnv, AQOPrivateData); - unregister_ENR(queryDesc->queryEnv, AQOPrivateData); - pfree(enr->reldata); - pfree(enr); - - /* Remove the plan state internals */ - enr = get_ENR(queryDesc->queryEnv, PlanStateInfo); - unregister_ENR(queryDesc->queryEnv, PlanStateInfo); - pfree(enr->reldata); - pfree(enr); -} - void -print_node_explain(ExplainState *es, PlanState *ps, Plan *plan, double rows) +print_node_explain(ExplainState *es, PlanState *ps, Plan *plan) { int wrkrs = 1; double error = -1.; + AQOPlanNode *aqo_node; + + /* Extension, which took a hook early can be executed early too. */ + if (prev_ExplainOneNode_hook) + prev_ExplainOneNode_hook(es, ps, plan); + + if (IsQueryDisabled()) + return; + + if (es->format != EXPLAIN_FORMAT_TEXT) + /* Only text format is supported. */ + return; - if (!aqo_show_details || !plan || !ps->instrument) + if (!aqo_show_details || !plan || !ps) goto explain_end; - Assert(es->format == EXPLAIN_FORMAT_TEXT); + aqo_node = get_aqo_plan_node(plan, false); - if (ps->worker_instrument && IsParallelTuplesProcessing(plan)) + if (!ps->instrument) + /* We can show only prediction, without error calculation */ + goto explain_print; + + if (ps->worker_instrument && + IsParallelTuplesProcessing(plan, aqo_node->parallel_divisor > 0)) { int i; @@ -785,28 +897,31 @@ print_node_explain(ExplainState *es, PlanState *ps, Plan *plan, double rows) } } +explain_print: appendStringInfoChar(es->str, '\n'); - Assert(es->format == EXPLAIN_FORMAT_TEXT); if (es->str->len == 0 || es->str->data[es->str->len - 1] == '\n') appendStringInfoSpaces(es->str, es->indent * 2); - if (plan->predicted_cardinality > 0.) + if (aqo_node->prediction > 0.) { - error = 100. * (plan->predicted_cardinality - (rows*wrkrs)) - / plan->predicted_cardinality; - appendStringInfo(es->str, - "AQO: rows=%.0lf, error=%.0lf%%", - plan->predicted_cardinality, error); + appendStringInfo(es->str, "AQO: rows=%.0lf", aqo_node->prediction); + + if (ps->instrument && ps->instrument->nloops > 0.) + { + double rows = ps->instrument->ntuples / ps->instrument->nloops; + + error = 100. * (aqo_node->prediction - (rows*wrkrs)) + / aqo_node->prediction; + appendStringInfo(es->str, ", error=%.0lf%%", error); + } } else appendStringInfo(es->str, "AQO not used"); explain_end: + /* XXX: Do we really have situations than plan is NULL? */ if (plan && aqo_show_hash) - appendStringInfo(es->str, ", fss=%d", plan->fss_hash); - - if (prev_ExplainOneNode_hook) - prev_ExplainOneNode_hook(es, ps, plan, rows); + appendStringInfo(es->str, ", fss=%d", aqo_node->fss); } /* @@ -822,7 +937,7 @@ print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, prev_ExplainOnePlan_hook(plannedstmt, into, es, queryString, params, planduration, queryEnv); - if (!aqo_show_details) + if (IsQueryDisabled() || !aqo_show_details) return; /* Report to user about aqo state only in verbose mode */ @@ -859,6 +974,7 @@ print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, */ if (aqo_mode != AQO_MODE_DISABLED || force_collect_stat) { + Assert(query_context.query_hash>=0); if (aqo_show_hash) ExplainPropertyInteger("Query hash", NULL, query_context.query_hash, es); diff --git a/preprocessing.c b/preprocessing.c index e1dd92b1..30462552 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -56,10 +56,16 @@ * */ -#include "aqo.h" +#include "postgres.h" + #include "access/parallel.h" #include "access/table.h" #include "commands/extension.h" +#include "parser/scansup.h" +#include "aqo.h" +#include "hash.h" +#include "preprocessing.h" + /* List of feature spaces, that are processing in this backend. */ List *cur_classes = NIL; @@ -67,35 +73,10 @@ List *cur_classes = NIL; static bool isQueryUsingSystemRelation(Query *query); static bool isQueryUsingSystemRelation_walker(Node *node, void *context); -/* - * Saves query text into query_text variable. - * Query text field in aqo_queries table is for user. - */ -void -get_query_text(ParseState *pstate, Query *query) -{ - /* - * Duplicate query string into private AQO memory context for guard - * from possible memory context switching. - */ - if (pstate) - { - MemoryContext oldCxt = MemoryContextSwitchTo(AQOMemoryContext); - query_text = pstrdup(pstate->p_sourcetext); - MemoryContextSwitchTo(oldCxt); - } - else - /* Can't imagine such case. Still, throw an error. */ - elog(ERROR, "[AQO]: Query text is not found in post-parse step"); - - if (prev_post_parse_analyze_hook) - prev_post_parse_analyze_hook(pstate, query); -} - /* * Calls standard query planner or its previous hook. */ -PlannedStmt * +static PlannedStmt * call_default_planner(Query *parse, const char *query_string, int cursorOptions, @@ -113,6 +94,33 @@ call_default_planner(Query *parse, boundParams); } +/* + * Check, that a 'CREATE EXTENSION aqo' command has been executed. + * This function allows us to execute the get_extension_oid routine only once + * at each backend. + * If any AQO-related table is missed we will set aqo_enabled to false (see + * a storage implementation module). + */ +static bool +aqoIsEnabled(void) +{ + if (creating_extension) + /* Nothing to tell in this mode. */ + return false; + + if (aqo_enabled) + /* + * Fast path. Dropping should be detected by absence of any AQO-related + * table. + */ + return true; + + if (get_extension_oid("aqo", true) != InvalidOid) + aqo_enabled = true; + + return aqo_enabled; +} + /* * Before query optimization we determine machine learning settings * for the query. @@ -127,43 +135,48 @@ aqo_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) { - bool query_is_stored; + bool query_is_stored = false; Datum query_params[5]; bool query_nulls[5] = {false, false, false, false, false}; LOCKTAG tag; MemoryContext oldCxt; - selectivity_cache_clear(); - /* * We do not work inside an parallel worker now by reason of insert into * the heap during planning. Transactions is synchronized between parallel * sections. See GetCurrentCommandId() comments also. */ - if ((parse->commandType != CMD_SELECT && parse->commandType != CMD_INSERT && + if (!aqoIsEnabled() || + (parse->commandType != CMD_SELECT && parse->commandType != CMD_INSERT && parse->commandType != CMD_UPDATE && parse->commandType != CMD_DELETE) || - strstr(application_name, "postgres_fdw") != NULL || /* Prevent distributed deadlocks */ - strstr(application_name, "pgfdw:") != NULL || /* caused by fdw */ - get_extension_oid("aqo", true) == InvalidOid || creating_extension || IsInParallelMode() || IsParallelWorker() || (aqo_mode == AQO_MODE_DISABLED && !force_collect_stat) || + strstr(application_name, "postgres_fdw") != NULL || /* Prevent distributed deadlocks */ + strstr(application_name, "pgfdw:") != NULL || /* caused by fdw */ isQueryUsingSystemRelation(parse) || RecoveryInProgress()) { + /* + * We should disable AQO for this query to remember this decision along + * all execution stages. + */ disable_aqo_for_query(); + return call_default_planner(parse, query_string, cursorOptions, boundParams); } - query_context.query_hash = get_query_hash(parse, query_text); + selectivity_cache_clear(); + query_context.query_hash = get_query_hash(parse, query_string); if (query_is_deactivated(query_context.query_hash) || - list_member_int(cur_classes, query_context.query_hash)) + list_member_uint64(cur_classes,query_context.query_hash)) { - /* Disable AQO for deactivated query or for query belonged to a + /* + * Disable AQO for deactivated query or for query belonged to a * feature space, that is processing yet (disallow invalidation * recursion, as an example). */ @@ -174,18 +187,19 @@ aqo_planner(Query *parse, boundParams); } + elog(DEBUG1, "AQO will be used for query '%s', class %ld", + query_string ? query_string : "null string", query_context.query_hash); + oldCxt = MemoryContextSwitchTo(AQOMemoryContext); - cur_classes = lappend_int(cur_classes, query_context.query_hash); + cur_classes = lappend_uint64(cur_classes, query_context.query_hash); MemoryContextSwitchTo(oldCxt); - INSTR_TIME_SET_CURRENT(query_context.query_starttime); - - /* - * find-add query and query text must be atomic operation to prevent - * concurrent insertions. - */ - init_lock_tag(&tag, (uint32) query_context.query_hash, (uint32) 0); - LockAcquire(&tag, ExclusiveLock, false, false); + if (aqo_mode == AQO_MODE_DISABLED) + { + /* Skip access to a database in this mode. */ + disable_aqo_for_query(); + goto ignore_query_settings; + } query_is_stored = find_query(query_context.query_hash, &query_params[0], &query_nulls[0]); @@ -232,45 +246,28 @@ aqo_planner(Query *parse, break; case AQO_MODE_DISABLED: /* Should never happen */ - query_context.fspace_hash = query_context.query_hash; + Assert(0); break; default: elog(ERROR, "unrecognized mode in AQO: %d", aqo_mode); break; } - - if (query_context.adding_query || force_collect_stat) - { - /* - * Add query into the AQO knowledge base. To process an error with - * concurrent addition from another backend we will try to restart - * preprocessing routine. - */ - update_query(query_context.query_hash, - query_context.fspace_hash, - query_context.learn_aqo, - query_context.use_aqo, - query_context.auto_tuning); - - /* - * Add query text into the ML-knowledge base. Just for further - * analysis. In the case of cached plans we could have NULL query text. - */ - if (query_text != NULL) - add_query_text(query_context.query_hash); - } } - else + else /* Query class exists in a ML knowledge base. */ { query_context.adding_query = false; query_context.learn_aqo = DatumGetBool(query_params[1]); query_context.use_aqo = DatumGetBool(query_params[2]); - query_context.fspace_hash = DatumGetInt32(query_params[3]); + query_context.fspace_hash = DatumGetInt64(query_params[3]); query_context.auto_tuning = DatumGetBool(query_params[4]); query_context.collect_stat = query_context.auto_tuning; + /* + * Deactivate query if no one reason exists for usage of an AQO machinery. + */ + Assert(query_context.query_hash>=0); if (!query_context.learn_aqo && !query_context.use_aqo && - !query_context.auto_tuning) + !query_context.auto_tuning && !force_collect_stat) add_deactivated_query(query_context.query_hash); /* @@ -294,6 +291,7 @@ aqo_planner(Query *parse, * In this mode we want to learn with incoming query (if it is not * suppressed manually) and collect stats. */ + Assert(query_context.query_hash>=0); query_context.collect_stat = true; query_context.fspace_hash = query_context.query_hash; break; @@ -310,14 +308,36 @@ aqo_planner(Query *parse, } } - LockRelease(&tag, ExclusiveLock, false); +ignore_query_settings: + if (!query_is_stored && (query_context.adding_query || force_collect_stat)) + { + /* + * find-add query and query text must be atomic operation to prevent + * concurrent insertions. + */ + Assert(query_context.query_hash>=0); + init_lock_tag(&tag, (uint32) query_context.query_hash, (uint32) 0);//my code + LockAcquire(&tag, ExclusiveLock, false, false); + /* + * Add query into the AQO knowledge base. To process an error with + * concurrent addition from another backend we will try to restart + * preprocessing routine. + */ + Assert(query_context.query_hash>=0); + update_query(query_context.query_hash, query_context.fspace_hash, + query_context.learn_aqo, query_context.use_aqo, + query_context.auto_tuning); - /* - * This mode is possible here, because force collect statistics uses AQO - * machinery. - */ - if (aqo_mode == AQO_MODE_DISABLED) - disable_aqo_for_query(); + /* + * Add query text into the ML-knowledge base. Just for further + * analysis. In the case of cached plans we could have NULL query text. + */ + Assert(query_context.query_hash>=0); + if (query_string != NULL) + add_query_text(query_context.query_hash, query_string); + + LockRelease(&tag, ExclusiveLock, false); + } if (force_collect_stat) { @@ -326,9 +346,14 @@ aqo_planner(Query *parse, * query execution statistics in any mode. */ query_context.collect_stat = true; + Assert(query_context.query_hash>=0); query_context.fspace_hash = query_context.query_hash; } + if (!IsQueryDisabled()) + /* It's good place to set timestamp of start of a planning process. */ + INSTR_TIME_SET_CURRENT(query_context.start_planning_time); + return call_default_planner(parse, query_string, cursorOptions, @@ -341,23 +366,29 @@ aqo_planner(Query *parse, void disable_aqo_for_query(void) { - query_context.adding_query = false; + query_context.learn_aqo = false; query_context.use_aqo = false; query_context.auto_tuning = false; query_context.collect_stat = false; + query_context.adding_query = false; + query_context.explain_only = false; + + INSTR_TIME_SET_ZERO(query_context.start_planning_time); + query_context.planning_time = -1.; } /* * Examine a fully-parsed query, and return TRUE iff any relation underlying * the query is a system relation. */ -bool +static bool isQueryUsingSystemRelation(Query *query) { return isQueryUsingSystemRelation_walker((Node *) query, NULL); } + static bool IsAQORelation(Relation rel) { @@ -373,7 +404,7 @@ IsAQORelation(Relation rel) return false; } -bool +static bool isQueryUsingSystemRelation_walker(Node *node, void *context) { if (node == NULL) @@ -398,6 +429,12 @@ isQueryUsingSystemRelation_walker(Node *node, void *context) if (is_catalog || is_aqo_rel) return true; } + else if (rte->rtekind == RTE_FUNCTION) + { + /* + * TODO: Exclude queries with AQO functions. + */ + } } return query_tree_walker(query, diff --git a/preprocessing.h b/preprocessing.h new file mode 100644 index 00000000..f27deb91 --- /dev/null +++ b/preprocessing.h @@ -0,0 +1,12 @@ +#ifndef __PREPROCESSING_H__ +#define __PREPROCESSING_H__ + +#include "nodes/pathnodes.h" +#include "nodes/plannodes.h" +extern PlannedStmt *aqo_planner(Query *parse, + const char *query_string, + int cursorOptions, + ParamListInfo boundParams); +extern void disable_aqo_for_query(void); + +#endif /* __PREPROCESSING_H__ */ diff --git a/schedule b/schedule deleted file mode 100644 index 52f0063d..00000000 --- a/schedule +++ /dev/null @@ -1,204 +0,0 @@ -# src/test/regress/serial_schedule -# This should probably be in an order similar to parallel_schedule. -# test: tablespace -test: boolean -test: char -test: name -test: varchar -test: text -test: int2 -test: int4 -test: int8 -test: oid -test: xid -test: float4 -test: float8 -test: bit -test: numeric -test: txid -test: uuid -test: enum -test: money -test: rangetypes -test: pg_lsn -test: regproc -test: strings -test: numerology -test: point -test: lseg -test: line -test: box -test: path -test: polygon -test: circle -test: date -test: time -test: timetz -test: timestamp -test: timestamptz -test: interval -test: inet -test: macaddr -test: macaddr8 -test: tstypes -test: geometry -test: horology -test: regex -test: oidjoins -test: type_sanity -test: opr_sanity -test: misc_sanity -test: comments -test: expressions -test: unicode -test: create_function_1 -test: create_type -test: create_table -test: create_function_2 -test: copy -test: copyselect -test: copydml -test: insert -test: insert_conflict -test: create_misc -test: create_operator -test: create_procedure -test: create_index -test: create_index_spgist -test: create_view -test: index_including -test: index_including_gist -test: create_aggregate -test: create_function_3 -test: create_cast -test: constraints -test: triggers -test: select -test: inherit -test: typed_table -test: vacuum -test: drop_if_exists -test: updatable_views -test: roleattributes -test: create_am -test: hash_func -test: errors -test: infinite_recurse -# test: sanity_check -test: select_into -test: select_distinct -test: select_distinct_on -test: select_implicit -test: select_having -# test: subselect -test: incremental_sort -# test: union -test: case -# test: join -test: aggregates -test: transactions -ignore: random -test: random -test: portals -test: arrays -test: btree_index -test: hash_index -test: update -test: delete -test: namespace -test: prepared_xacts -test: brin -test: gin -test: gist -test: spgist -test: privileges -test: init_privs -test: security_label -test: collate -test: matview -test: lock -test: replica_identity -test: rowsecurity -test: object_address -test: tablesample -test: groupingsets -test: drop_operator -test: password -test: identity -test: generated -test: join_hash -test: create_table_like -test: alter_generic -test: alter_operator -test: misc -test: async -test: dbsize -test: misc_functions -test: sysviews -test: tsrf -test: tid -test: tidscan -test: collate.icu.utf8 -test: rules -test: psql -test: psql_crosstab -test: amutils -test: stats_ext -test: collate.linux.utf8 -# test: select_parallel -test: write_parallel -test: publication -test: subscription -test: select_views -test: portals_p2 -test: foreign_key -test: cluster -test: dependency -test: guc -test: bitmapops -test: combocid -test: tsearch -test: tsdicts -test: foreign_data -test: window -test: xmlmap -test: functional_deps -test: advisory_lock -test: indirect_toast -test: equivclass -test: json -test: jsonb -test: json_encoding -test: jsonpath -test: jsonpath_encoding -test: jsonb_jsonpath -test: plancache -test: limit -test: plpgsql -test: copy2 -test: temp -test: domain -test: rangefuncs -test: prepare -test: conversion -test: truncate -test: alter_table -test: sequence -test: polymorphism -test: rowtypes -test: returning -test: largeobject -# test: with -test: xml -test: partition_join -test: partition_prune -test: reloptions -test: hash_part -test: indexing -test: partition_aggregate -test: partition_info -test: tuplesort -test: explain -test: event_trigger -test: fast_default -test: stats diff --git a/selectivity_cache.c b/selectivity_cache.c index a57682db..b59da933 100644 --- a/selectivity_cache.c +++ b/selectivity_cache.c @@ -16,6 +16,8 @@ * */ +#include "postgres.h" + #include "aqo.h" typedef struct diff --git a/sql/aqo_CVE-2020-14350.sql b/sql/aqo_CVE-2020-14350.sql index 7fff18a4..f7dd4e23 100644 --- a/sql/aqo_CVE-2020-14350.sql +++ b/sql/aqo_CVE-2020-14350.sql @@ -12,6 +12,7 @@ CREATE ROLE regress_hacker LOGIN; -- Test 1 RESET ROLE; ALTER ROLE regress_hacker NOSUPERUSER; +GRANT CREATE ON SCHEMA public TO regress_hacker; SET ROLE regress_hacker; SHOW is_superuser; @@ -42,12 +43,12 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_status(hash int) +CREATE FUNCTION aqo_status(hash bigint) RETURNS TABLE ( "learn" BOOL, "use aqo" BOOL, "auto tune" BOOL, - "fspace hash" INT, + "fspace hash" bigINT, "t_naqo" TEXT, "err_naqo" TEXT, "iters" BIGINT, @@ -65,12 +66,12 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_status(hash int) +CREATE OR REPLACE FUNCTION aqo_status(hash bigint) RETURNS TABLE ( "learn" BOOL, "use aqo" BOOL, "auto tune" BOOL, - "fspace hash" INT, + "fspace hash" bigINT, "t_naqo" TEXT, "err_naqo" TEXT, "iters" BIGINT, @@ -91,7 +92,7 @@ SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_status(int); +DROP FUNCTION aqo_status(bigint); DROP EXTENSION IF EXISTS aqo; -- Test 3 @@ -101,7 +102,7 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_enable_query(hash int) +CREATE FUNCTION aqo_enable_query(hash bigint) RETURNS VOID AS $$ BEGIN @@ -113,7 +114,7 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_enable_query(hash int) +CREATE OR REPLACE FUNCTION aqo_enable_query(hash bigint) RETURNS VOID AS $$ BEGIN @@ -128,7 +129,7 @@ SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_enable_query(int); +DROP FUNCTION aqo_enable_query(bigint); DROP EXTENSION IF EXISTS aqo; -- Test 4 @@ -138,7 +139,7 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_disable_query(hash int) +CREATE FUNCTION aqo_disable_query(hash bigint) RETURNS VOID AS $$ BEGIN @@ -150,7 +151,7 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_disable_query(hash int) +CREATE OR REPLACE FUNCTION aqo_disable_query(hash bigint) RETURNS VOID AS $$ BEGIN @@ -165,7 +166,7 @@ SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_disable_query(int); +DROP FUNCTION aqo_disable_query(bigint); DROP EXTENSION IF EXISTS aqo; -- Test 5 @@ -175,7 +176,7 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_clear_hist(hash int) +CREATE FUNCTION aqo_clear_hist(hash bigint) RETURNS VOID AS $$ BEGIN @@ -187,7 +188,7 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_clear_hist(hash int) +CREATE OR REPLACE FUNCTION aqo_clear_hist(hash bigint) RETURNS VOID AS $$ BEGIN @@ -202,7 +203,7 @@ SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_clear_hist(int); +DROP FUNCTION aqo_clear_hist(bigint); DROP EXTENSION IF EXISTS aqo; -- Test 6 @@ -212,7 +213,7 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_drop(hash int) +CREATE FUNCTION aqo_drop(hash bigint) RETURNS VOID AS $$ BEGIN @@ -224,7 +225,7 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_drop(hash int) +CREATE OR REPLACE FUNCTION aqo_drop(hash bigint) RETURNS VOID AS $$ BEGIN @@ -239,7 +240,7 @@ SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_drop(int); +DROP FUNCTION aqo_drop(bigint); DROP EXTENSION IF EXISTS aqo; -- Test 7 @@ -311,5 +312,6 @@ DROP EXTENSION IF EXISTS aqo; -- Cleanup RESET ROLE; +DROP OWNED BY regress_hacker CASCADE; DROP ROLE regress_hacker; diff --git a/sql/aqo_fdw.sql b/sql/aqo_fdw.sql index 80917dec..e31923d9 100644 --- a/sql/aqo_fdw.sql +++ b/sql/aqo_fdw.sql @@ -52,9 +52,8 @@ SELECT x FROM frgn WHERE x < -10; -- AQO ignores constants -- Trivial JOIN push-down. SELECT str FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) - SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; +SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; ') AS str WHERE str NOT LIKE '%Sort Method%'; - EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; diff --git a/sql/clean_aqo_data.sql b/sql/clean_aqo_data.sql new file mode 100644 index 00000000..acd64b16 --- /dev/null +++ b/sql/clean_aqo_data.sql @@ -0,0 +1,143 @@ +CREATE EXTENSION aqo; +SET aqo.mode = 'learn'; + +DROP TABLE IF EXISTS a; +DROP TABLE IF EXISTS b; +CREATE TABLE a(); +SELECT * FROM a; +SELECT 'a'::regclass::oid AS a_oid \gset +SELECT clean_aqo_data(); + +/* + * lines with a_oid in aqo_data, + * lines with fspace_hash corresponding to a_oid in aqo_queries, + * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_texts, + * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat + * should remain + */ +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); +SELECT count(*) FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); +SELECT count(*) FROM aqo_query_texts WHERE + aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); +SELECT count(*) FROM aqo_query_stat WHERE + aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + +DROP TABLE a; +SELECT clean_aqo_data(); + +/* + * lines with a_oid in aqo_data, + * lines with a_oid's fspace_hash EQUAL TO query_hash in aqo_queries, + * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_texts, + * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat, + * should be deleted +*/ +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); +SELECT count(*) FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = aqo_queries.query_hash; +SELECT count(*) FROM aqo_query_texts WHERE + aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = aqo_queries.query_hash); +SELECT count(*) FROM aqo_query_stat WHERE + aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = aqo_queries.query_hash); + +CREATE TABLE a(); +SELECT * FROM a; +SELECT 'a'::regclass::oid AS a_oid \gset +-- add manually line with different fspace_hash and query_hash to aqo_queries +INSERT INTO aqo_queries VALUES (:a_oid + 1, 't', 't', :a_oid, 'f'); +DROP TABLE a; +SELECT clean_aqo_data(); +-- this line should remain +SELECT count(*) FROM aqo_queries WHERE (fspace_hash = :a_oid AND query_hash = :a_oid + 1); + +CREATE TABLE a(); +CREATE TABLE b(); +SELECT * FROM a; +SELECT * FROM b; +SELECT * FROM b CROSS JOIN a; +SELECT 'a'::regclass::oid AS a_oid \gset +SELECT 'b'::regclass::oid AS b_oid \gset + +-- new lines added to aqo_data +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); +SELECT count(*) FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); +SELECT count(*) FROM aqo_query_texts WHERE + aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); +SELECT count(*) FROM aqo_query_stat WHERE + aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + +SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); +SELECT count(*) FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)); +SELECT count(*) FROM aqo_query_texts WHERE + aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); +SELECT count(*) FROM aqo_query_stat WHERE + aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); + +DROP TABLE a; +SELECT clean_aqo_data(); + +/* + * lines corresponding to a_oid and both a_oid's fspace_hash deleted in aqo_data, + * lines with fspace_hash corresponding to a_oid deleted in aqo_queries, + * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_texts, + * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_stat, + */ +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); +SELECT count(*) FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = aqo_queries.query_hash; +SELECT count(*) FROM aqo_query_texts WHERE + aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = aqo_queries.query_hash); +SELECT count(*) FROM aqo_query_stat WHERE + aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = aqo_queries.query_hash); + +-- lines corresponding to b_oid in all theese tables should remain +SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); +SELECT count(*) FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = aqo_queries.query_hash; +SELECT count(*) FROM aqo_query_texts WHERE + aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = aqo_queries.query_hash); +SELECT count(*) FROM aqo_query_stat WHERE + aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = aqo_queries.query_hash); + +DROP TABLE b; +SELECT clean_aqo_data(); + +-- lines corresponding to b_oid in theese tables deleted +SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); +SELECT count(*) FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = aqo_queries.query_hash; +SELECT count(*) FROM aqo_query_texts WHERE + aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = aqo_queries.query_hash); +SELECT count(*) FROM aqo_query_stat WHERE + aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = aqo_queries.query_hash); + +DROP EXTENSION aqo; \ No newline at end of file diff --git a/sql/forced_stat_collection.sql b/sql/forced_stat_collection.sql index 209edd19..9c169a26 100644 --- a/sql/forced_stat_collection.sql +++ b/sql/forced_stat_collection.sql @@ -31,6 +31,6 @@ SELECT * FROM aqo_data; SELECT learn_aqo,use_aqo,auto_tuning,cardinality_error_without_aqo ce,executions_without_aqo nex FROM aqo_queries JOIN aqo_query_stat USING (query_hash); -SELECT query_text FROM aqo_query_texts ORDER BY md5(query_text); +SELECT query_text FROM aqo_query_texts ORDER BY (md5(query_text)); DROP EXTENSION aqo; diff --git a/sql/plancache.sql b/sql/plancache.sql new file mode 100644 index 00000000..8208b1d3 --- /dev/null +++ b/sql/plancache.sql @@ -0,0 +1,46 @@ +-- Tests on interaction of AQO with cached plans. + +CREATE EXTENSION aqo; +SET aqo.mode = 'intelligent'; +SET aqo.show_details = 'on'; +SET aqo.show_hash = 'off'; + +CREATE TABLE test AS SELECT x FROM generate_series(1,10) AS x; +ANALYZE test; + +-- Function which implements a test where AQO is used for both situations where +-- a query is planned or got from a plan cache. +-- Use a function to hide a system dependent hash value. +CREATE FUNCTION f1() RETURNS TABLE ( + nnex bigint, + nex bigint, + pt double precision[] +) AS $$ +DECLARE + i integer; + qhash bigint; +BEGIN + PREPARE fooplan (int) AS SELECT count(*) FROM test WHERE x = $1; + + FOR i IN 1..10 LOOP + execute 'EXECUTE fooplan(1)'; + END LOOP; + + SELECT query_hash FROM aqo_query_texts + WHERE query_text LIKE '%count(*) FROM test WHERE x%' INTO qhash; + + RETURN QUERY SELECT executions_without_aqo nnex, + executions_with_aqo nex, + planning_time_with_aqo pt + FROM aqo_query_stat WHERE query_hash = qhash; +END $$ LANGUAGE 'plpgsql'; + +-- The function shows 6 executions without an AQO support (nnex) and +-- 4 executions with usage of an AQO knowledge base (nex). Planning time in the +-- case of AQO support (pt) is equal to '-1', because the query plan is exracted +-- from the plan cache. +SELECT * FROM f1(); + +DROP FUNCTION f1; +DROP TABLE test CASCADE; +DROP EXTENSION aqo; \ No newline at end of file diff --git a/sql/top_queries.sql b/sql/top_queries.sql new file mode 100755 index 00000000..bfacdd38 --- /dev/null +++ b/sql/top_queries.sql @@ -0,0 +1,27 @@ +CREATE EXTENSION aqo; +SET aqo.mode = 'disabled'; +SET aqo.force_collect_stat = 'on'; + +-- +-- num of generate_series(1,1000000) query should be the first +-- +SELECT count(*) FROM generate_series(1,1000000); +SELECT num FROM top_time_queries(10) AS tt WHERE + tt.fspace_hash = (SELECT fspace_hash FROM aqo_queries WHERE + aqo_queries.query_hash = (SELECT aqo_query_texts.query_hash FROM aqo_query_texts + WHERE query_text = 'SELECT count(*) FROM generate_series(1,1000000);')); + +-- +-- num of query uses table t2 should be bigger than num of query uses table t1 and be the first +-- +CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y + FROM generate_series(1,1000) AS gs; +CREATE TABLE t2 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y + FROM generate_series(1,100000) AS gs; +SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; +SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; + +SELECT num FROM top_error_queries(10) AS te WHERE + te.fspace_hash = (SELECT fspace_hash FROM aqo_queries WHERE + aqo_queries.query_hash = (SELECT aqo_query_texts.query_hash FROM aqo_query_texts + WHERE query_text = 'SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1;')); \ No newline at end of file diff --git a/sql/unsupported.sql b/sql/unsupported.sql index c49271dc..472ea5d9 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -4,16 +4,155 @@ SET aqo.show_details = 'on'; DROP TABLE IF EXISTS t; CREATE TABLE t AS SELECT (gs.* / 50) AS x FROM generate_series(1,1000) AS gs; +ANALYZE t; -SELECT str FROM expln(' +CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y + FROM generate_series(1,1000) AS gs; +ANALYZE t, t1; + +-- +-- Do not support HAVING clause for now. +-- +SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; +EXPLAIN (COSTS OFF) + SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; + +-- +-- Doesn't estimates GROUP BY clause +-- +SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; +EXPLAIN (COSTS OFF) + SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; + +SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,x*y)) AS q1; +EXPLAIN (COSTS OFF) + SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,x*y)) AS q1; + +SELECT count(*) FROM ( + SELECT count(*) AS x FROM ( + SELECT count(*) FROM t1 GROUP BY (x,y) + ) AS q1 +) AS q2 +WHERE q2.x > 1; + +SELECT count(*) FROM ( + SELECT count(*) AS x FROM ( + SELECT count(*) FROM t1 GROUP BY (x,y) + ) AS q1 +) AS q2 +WHERE q2.x > 1; + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM ( + SELECT count(*) AS x FROM ( + SELECT count(*) FROM t1 GROUP BY (x,y) + ) AS q1 +) AS q2 +WHERE q2.x > 1; + +-- +-- Doesn't support GROUPING SETS clause +-- +SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; +EXPLAIN (COSTS OFF) + SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; + +-- +-- The subplans issue +-- +SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t WHERE x = 1); EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) - SELECT * FROM t GROUP BY (x) HAVING x > 3; -') AS str WHERE str NOT LIKE '%Memory Usage%'; + SELECT count(*) FROM t WHERE x = ( + SELECT avg(x) FROM t WHERE x = 1 + ); --- Do not support having clauses for now. +SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE x = ( + SELECT avg(x) FROM t t0 WHERE t0.x = t.x + ); + +-- Two identical subplans in a clause list +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE + x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR + x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE + x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR + x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); + +-- It's OK to use the knowledge for a query with different constants. +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE + x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 22) OR + x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 23); + +-- Different SubPlans in the quals of leafs of JOIN. +SELECT count(*) FROM + (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + JOIN + (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2 + ON q1.x = q2.x+1; SELECT str FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) - SELECT * FROM t GROUP BY (x) HAVING x > 3; +SELECT count(*) FROM + (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + JOIN + (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2 + ON q1.x = q2.x+1; ') AS str WHERE str NOT LIKE '%Memory Usage%'; +-- Two identical subplans in a clause +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE (SELECT avg(x) FROM t t0 WHERE t0.x = t.x) = + (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE (SELECT avg(x) FROM t t0 WHERE t0.x = t.x) = + (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + +-- +-- Not executed nodes +-- +SELECT * FROM + (SELECT * FROM t WHERE x < 0) AS t0 + JOIN + (SELECT * FROM t WHERE x > 20) AS t1 + USING(x); +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM + (SELECT * FROM t WHERE x < 0) AS t0 + JOIN + (SELECT * FROM t WHERE x > 20) AS t1 + USING(x); + +-- AQO need to predict total fetched tuples in a table. +-- +-- At a non-leaf node we have prediction about input tuples - is a number of +-- predicted output rows in underlying node. But for Scan nodes we don't have +-- any prediction on number of fetched tuples. +-- So, if selectivity was wrong we could make bad choice of Scan operation. +-- For example, we could choose suboptimal index. + +-- Turn off statistics gathering for simple demonstration of filtering problem. +ALTER TABLE t SET (autovacuum_enabled = 'false'); +CREATE INDEX ind1 ON t(x); + +SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) + SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + +-- Because of bad statistics we use a last created index instead of best choice. +-- Here we filter more tuples than with the ind1 index. +CREATE INDEX ind2 ON t(mod(x,3)); +SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) + SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + +-- Best choice is ... +ANALYZE t; +EXPLAIN (COSTS OFF) + SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + +DROP TABLE t,t1 CASCADE; DROP EXTENSION aqo; diff --git a/storage.c b/storage.c index d8873839..5c62896f 100644 --- a/storage.c +++ b/storage.c @@ -15,12 +15,16 @@ * */ -#include "aqo.h" +#include "postgres.h" #include "access/heapam.h" #include "access/table.h" #include "access/tableam.h" +#include "aqo.h" +#include "preprocessing.h" + + HTAB *deactivated_queries = NULL; static ArrayType *form_matrix(double **matrix, int nrows, int ncols); @@ -38,49 +42,75 @@ static bool my_simple_heap_update(Relation relation, HeapTuple tup, bool *update_indexes); +/* + * Open an AQO-related relation. + * It should be done carefully because of a possible concurrent DROP EXTENSION + * command. In such case AQO must be disabled in this backend. + */ +static bool +open_aqo_relation(char *heaprelnspname, char *heaprelname, + char *indrelname, LOCKMODE lockmode, + Relation *hrel, Relation *irel) +{ + Oid reloid; + RangeVar *rv; + + reloid = RelnameGetRelid(indrelname); + rv = makeRangeVar(heaprelnspname, heaprelname, -1); + *hrel = table_openrv_extended(rv, lockmode, true); + if (!OidIsValid(reloid) || *hrel == NULL) + { + /* + * Absence of any AQO-related table tell us that someone executed + * a 'DROP EXTENSION aqo' command. We disable AQO for all future queries + * in this backend. For performance reasons we do it locally. + * Clear profiling hash table. + * Also, we gently disable AQO for the rest of the current query + * execution process. + */ + aqo_enabled = false; + disable_aqo_for_query(); + + return false; + } + + *irel = index_open(reloid, lockmode); + return true; +} /* * Returns whether the query with given hash is in aqo_queries. * If yes, returns the content of the first line with given hash. * - * Use dirty snapshot to see all (include in-progress) data. We want to prevent + * Use dirty snapshot to see all (include in-progess) data. We want to prevent * wait in the XactLockTableWait routine. */ bool -find_query(int qhash, Datum *search_values, bool *search_nulls) +find_query(uint64 qhash, Datum *search_values, bool *search_nulls) { - RangeVar *rv; Relation hrel; Relation irel; HeapTuple tuple; TupleTableSlot *slot; bool shouldFree; - Oid reloid; IndexScanDesc scan; ScanKeyData key; SnapshotData snap; bool find_ok = false; - reloid = RelnameGetRelid("aqo_queries_query_hash_idx"); - if (!OidIsValid(reloid)) - { - disable_aqo_for_query(); + if (!open_aqo_relation("public", "aqo_queries", "aqo_queries_query_hash_idx", + AccessShareLock, &hrel, &irel)) return false; - } - - rv = makeRangeVar("public", "aqo_queries", -1); - hrel = table_openrv(rv, AccessShareLock); - irel = index_open(reloid, AccessShareLock); InitDirtySnapshot(snap); scan = index_beginscan(hrel, irel, &snap, 1, 0); - ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(qhash)); + ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT4EQ, Int64GetDatum(qhash)); index_rescan(scan, &key, 1, NULL, 0); slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); find_ok = index_getnext_slot(scan, ForwardScanDirection, slot); - if (find_ok) + if (find_ok && search_values != NULL) { tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); Assert(shouldFree != true); @@ -105,10 +135,9 @@ find_query(int qhash, Datum *search_values, bool *search_nulls) * not break any learning logic besides possible additional learning iterations. */ bool -update_query(int qhash, int fhash, +update_query(uint64 qhash, uint64 fhash, bool learn_aqo, bool use_aqo, bool auto_tuning) { - RangeVar *rv; Relation hrel; Relation irel; TupleTableSlot *slot; @@ -120,7 +149,6 @@ update_query(int qhash, int fhash, bool shouldFree; bool result = true; bool update_indexes; - Oid reloid; IndexScanDesc scan; ScanKeyData key; SnapshotData snap; @@ -129,16 +157,9 @@ update_query(int qhash, int fhash, if (XactReadOnly) return false; - reloid = RelnameGetRelid("aqo_queries_query_hash_idx"); - if (!OidIsValid(reloid)) - { - disable_aqo_for_query(); + if (!open_aqo_relation("public", "aqo_queries", "aqo_queries_query_hash_idx", + RowExclusiveLock, &hrel, &irel)) return false; - } - - rv = makeRangeVar("public", "aqo_queries", -1); - hrel = table_openrv(rv, RowExclusiveLock); - irel = index_open(reloid, RowExclusiveLock); /* * Start an index scan. Use dirty snapshot to check concurrent updates that @@ -151,10 +172,10 @@ update_query(int qhash, int fhash, index_rescan(scan, &key, 1, NULL, 0); slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); - values[0] = Int32GetDatum(qhash); + values[0] = Int64GetDatum(qhash); values[1] = BoolGetDatum(learn_aqo); values[2] = BoolGetDatum(use_aqo); - values[3] = Int32GetDatum(fhash); + values[3] = Int64GetDatum(fhash); values[4] = BoolGetDatum(auto_tuning); if (!index_getnext_slot(scan, ForwardScanDirection, slot)) @@ -191,7 +212,7 @@ update_query(int qhash, int fhash, * Ooops, somebody concurrently updated the tuple. It is possible * only in the case of changes made by third-party code. */ - elog(ERROR, "AQO feature space data for signature (%d, %d) concurrently" + elog(ERROR, "AQO feature space data for signature (%ld, %ld) concurrently" " updated by a stranger backend.", qhash, fhash); result = false; @@ -219,39 +240,56 @@ update_query(int qhash, int fhash, * Returns false if the operation failed, true otherwise. */ bool -add_query_text(int qhash) +add_query_text(uint64 qhash, const char *query_string) { - RangeVar *rv; Relation hrel; Relation irel; HeapTuple tuple; Datum values[2]; bool isnull[2] = {false, false}; - Oid reloid; - values[0] = Int32GetDatum(qhash); - values[1] = CStringGetTextDatum(query_text); + /* Variables for checking of concurrent writings. */ + TupleTableSlot *slot; + IndexScanDesc scan; + ScanKeyData key; + SnapshotData snap; + + values[0] = Int64GetDatum(qhash); + values[1] = CStringGetTextDatum(query_string); /* Couldn't allow to write if xact must be read-only. */ if (XactReadOnly) return false; - reloid = RelnameGetRelid("aqo_query_texts_query_hash_idx"); - if (!OidIsValid(reloid)) - { - disable_aqo_for_query(); + if (!open_aqo_relation("public", "aqo_query_texts", + "aqo_query_texts_query_hash_idx", + RowExclusiveLock, &hrel, &irel)) return false; - } - rv = makeRangeVar("public", "aqo_query_texts", -1); - hrel = table_openrv(rv, RowExclusiveLock); - irel = index_open(reloid, RowExclusiveLock); tuple = heap_form_tuple(RelationGetDescr(hrel), values, isnull); - simple_heap_insert(hrel, tuple); - my_index_insert(irel, values, isnull, &(tuple->t_self), hrel, + /* + * Start an index scan. Use dirty snapshot to check concurrent updates that + * can be made before, but still not visible. + */ + InitDirtySnapshot(snap); + scan = index_beginscan(hrel, irel, &snap, 1, 0); + ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(qhash)); + + index_rescan(scan, &key, 1, NULL, 0); + slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); + + if (!index_getnext_slot(scan, ForwardScanDirection, slot)) + { + tuple = heap_form_tuple(RelationGetDescr(hrel), values, isnull); + + simple_heap_insert(hrel, tuple); + my_index_insert(irel, values, isnull, &(tuple->t_self), hrel, UNIQUE_CHECK_YES); + } + ExecDropSingleTupleTableSlot(slot); + index_endscan(scan); index_close(irel, RowExclusiveLock); table_close(hrel, RowExclusiveLock); @@ -259,6 +297,53 @@ add_query_text(int qhash) return true; } + +static ArrayType * +form_oids_vector(List *relids) +{ + Datum *oids; + ArrayType *array; + ListCell *lc; + int i = 0; + + if (relids == NIL) + return NULL; + + oids = (Datum *) palloc(list_length(relids) * sizeof(Datum)); + + foreach(lc, relids) + { + Oid relid = lfirst_oid(lc); + + oids[i++] = ObjectIdGetDatum(relid); + } + + Assert(i == list_length(relids)); + array = construct_array(oids, i, OIDOID, sizeof(Oid), true, TYPALIGN_INT); + pfree(oids); + return array; +} + +static List * +deform_oids_vector(Datum datum) +{ + ArrayType *array = DatumGetArrayTypePCopy(PG_DETOAST_DATUM(datum)); + Datum *values; + int i; + int nelems = 0; + List *relids = NIL; + + deconstruct_array(array, + OIDOID, sizeof(Oid), true, TYPALIGN_INT, + &values, NULL, &nelems); + for (i = 0; i < nelems; ++i) + relids = lappend_oid(relids, DatumGetObjectId(values[i])); + + pfree(values); + pfree(array); + return relids; +} + /* * Loads feature subspace (fss) from table aqo_data into memory. * The last column of the returned matrix is for target values of objects. @@ -274,35 +359,28 @@ add_query_text(int qhash) * objects in the given feature space */ bool -load_fss(int fhash, int fss_hash, - int ncols, double **matrix, double *targets, int *rows) +load_fss(uint64 fhash, int fss_hash, + int ncols, double **matrix, double *targets, int *rows, + List **relids) { - RangeVar *rv; Relation hrel; Relation irel; HeapTuple tuple; TupleTableSlot *slot; bool shouldFree; bool find_ok = false; - Oid reloid; IndexScanDesc scan; ScanKeyData key[2]; - Datum values[5]; - bool isnull[5]; + Datum values[6]; + bool isnull[6]; bool success = true; - reloid = RelnameGetRelid("aqo_fss_access_idx"); - if (!OidIsValid(reloid)) - { - disable_aqo_for_query(); + if (!open_aqo_relation("public", "aqo_data", + "aqo_fss_access_idx", + AccessShareLock, &hrel, &irel)) return false; - } - rv = makeRangeVar("public", "aqo_data", -1); - hrel = table_openrv(rv, AccessShareLock); - irel = index_open(reloid, AccessShareLock); scan = index_beginscan(hrel, irel, SnapshotSelf, 2, 0); - ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fhash)); ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fss_hash)); index_rescan(scan, key, 2, NULL, 0); @@ -330,9 +408,12 @@ load_fss(int fhash, int fss_hash, deform_matrix(values[3], matrix); deform_vector(values[4], targets, rows); + + if (relids != NULL) + *relids = deform_oids_vector(values[5]); } else - elog(ERROR, "unexpected number of features for hash (%d, %d):\ + elog(ERROR, "unexpected number of features for hash (%ld, %d):\ expected %d features, obtained %d", fhash, fss_hash, ncols, DatumGetInt32(values[2])); } @@ -360,10 +441,9 @@ load_fss(int fhash, int fss_hash, * Caller guaranteed that no one AQO process insert or update this data row. */ bool -update_fss(int fhash, int fsshash, int nrows, int ncols, - double **matrix, double *targets) +update_fss(uint64 fhash, int fsshash, int nrows, int ncols, + double **matrix, double *targets, List *relids) { - RangeVar *rv; Relation hrel; Relation irel; SnapshotData snap; @@ -371,13 +451,12 @@ update_fss(int fhash, int fsshash, int nrows, int ncols, TupleDesc tupDesc; HeapTuple tuple, nw_tuple; - Datum values[5]; - bool isnull[5] = { false, false, false, false, false }; - bool replace[5] = { false, false, false, true, true }; + Datum values[6]; + bool isnull[6] = { false, false, false, false, false, false }; + bool replace[6] = { false, false, false, true, true, false }; bool shouldFree; bool find_ok = false; bool update_indexes; - Oid reloid; IndexScanDesc scan; ScanKeyData key[2]; bool result = true; @@ -386,18 +465,12 @@ update_fss(int fhash, int fsshash, int nrows, int ncols, if (XactReadOnly) return false; - reloid = RelnameGetRelid("aqo_fss_access_idx"); - if (!OidIsValid(reloid)) - { - disable_aqo_for_query(); + if (!open_aqo_relation("public", "aqo_data", + "aqo_fss_access_idx", + RowExclusiveLock, &hrel, &irel)) return false; - } - rv = makeRangeVar("public", "aqo_data", -1); - hrel = table_openrv(rv, RowExclusiveLock); - irel = index_open(reloid, RowExclusiveLock); tupDesc = RelationGetDescr(hrel); - InitDirtySnapshot(snap); scan = index_beginscan(hrel, irel, &snap, 2, 0); @@ -421,6 +494,11 @@ update_fss(int fhash, int fsshash, int nrows, int ncols, isnull[3] = true; values[4] = PointerGetDatum(form_vector(targets, nrows)); + + /* Form array of relids. Only once. */ + values[5] = PointerGetDatum(form_oids_vector(relids)); + if ((void *) values[5] == NULL) + isnull[5] = true; tuple = heap_form_tuple(tupDesc, values, isnull); /* @@ -461,7 +539,7 @@ update_fss(int fhash, int fsshash, int nrows, int ncols, * Ooops, somebody concurrently updated the tuple. It is possible * only in the case of changes made by third-party code. */ - elog(ERROR, "AQO data piece (%d %d) concurrently updated" + elog(ERROR, "AQO data piece (%ld %d) concurrently updated" " by a stranger backend.", fhash, fsshash); result = false; @@ -491,28 +569,21 @@ update_fss(int fhash, int fsshash, int nrows, int ncols, * is not found. */ QueryStat * -get_aqo_stat(int qhash) +get_aqo_stat(uint64 qhash) { - RangeVar *rv; Relation hrel; Relation irel; TupleTableSlot *slot; - Oid reloid; IndexScanDesc scan; ScanKeyData key; QueryStat *stat = palloc_query_stat(); bool shouldFree; - reloid = RelnameGetRelid("aqo_query_stat_idx"); - if (!OidIsValid(reloid)) - { - disable_aqo_for_query(); - return NULL; - } - rv = makeRangeVar("public", "aqo_query_stat", -1); - hrel = table_openrv(rv, AccessShareLock); - irel = index_open(reloid, AccessShareLock); + if (!open_aqo_relation("public", "aqo_query_stat", + "aqo_query_stat_idx", + AccessShareLock, &hrel, &irel)) + return false; scan = index_beginscan(hrel, irel, SnapshotSelf, 1, 0); ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(qhash)); @@ -552,9 +623,8 @@ get_aqo_stat(int qhash) * Executes disable_aqo_for_query if aqo_query_stat is not found. */ void -update_aqo_stat(int qhash, QueryStat *stat) +update_aqo_stat(uint64 qhash, QueryStat *stat) { - RangeVar *rv; Relation hrel; Relation irel; SnapshotData snap; @@ -571,7 +641,6 @@ update_aqo_stat(int qhash, QueryStat *stat) true, true, true }; bool shouldFree; bool update_indexes; - Oid reloid; IndexScanDesc scan; ScanKeyData key; @@ -579,21 +648,16 @@ update_aqo_stat(int qhash, QueryStat *stat) if (XactReadOnly) return; - reloid = RelnameGetRelid("aqo_query_stat_idx"); - if (!OidIsValid(reloid)) - { - disable_aqo_for_query(); + if (!open_aqo_relation("public", "aqo_query_stat", + "aqo_query_stat_idx", + RowExclusiveLock, &hrel, &irel)) return; - } - rv = makeRangeVar("public", "aqo_query_stat", -1); - hrel = table_openrv(rv, RowExclusiveLock); - irel = index_open(reloid, RowExclusiveLock); tupDesc = RelationGetDescr(hrel); InitDirtySnapshot(snap); scan = index_beginscan(hrel, irel, &snap, 1, 0); - ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(qhash)); + ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT4EQ, Int64GetDatum(qhash)); index_rescan(scan, &key, 1, NULL, 0); slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); @@ -611,7 +675,7 @@ update_aqo_stat(int qhash, QueryStat *stat) if (!index_getnext_slot(scan, ForwardScanDirection, slot)) { /* Such signature (hash) doesn't yet exist in the ML knowledge base. */ - values[0] = Int32GetDatum(qhash); + values[0] = Int64GetDatum(qhash); tuple = heap_form_tuple(tupDesc, values, isnull); simple_heap_insert(hrel, tuple); my_index_insert(irel, values, isnull, &(tuple->t_self), @@ -639,7 +703,7 @@ update_aqo_stat(int qhash, QueryStat *stat) * Ooops, somebody concurrently updated the tuple. It is possible * only in the case of changes made by third-party code. */ - elog(ERROR, "AQO statistic data for query signature %d concurrently" + elog(ERROR, "AQO statistic data for query signature %ld concurrently" " updated by a stranger backend.", qhash); } @@ -858,7 +922,7 @@ fini_deactivated_queries_storage(void) /* Checks whether the query with given hash is deactivated */ bool -query_is_deactivated(int query_hash) +query_is_deactivated(uint64 query_hash) { bool found; @@ -868,7 +932,7 @@ query_is_deactivated(int query_hash) /* Adds given query hash into the set of hashes of deactivated queries*/ void -add_deactivated_query(int query_hash) +add_deactivated_query(uint64 query_hash) { hash_search(deactivated_queries, &query_hash, HASH_ENTER, NULL); } diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index 7c3992d3..cdc18d38 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -1,46 +1,301 @@ use strict; use warnings; -use TestLib; -use Test::More tests => 6; + +use Config; use PostgresNode; +use TestLib; +use Test::More tests => 21; my $node = get_new_node('aqotest'); $node->init; $node->append_conf('postgresql.conf', qq{ shared_preload_libraries = 'aqo' - log_statement = 'none' aqo.mode = 'intelligent' + log_statement = 'ddl' }); +# Test constants. +my $TRANSACTIONS = 1000; +my $CLIENTS = 10; +my $THREADS = 10; + +# General purpose variables. +my $res; +my $fss_count; +my $fs_count; +my $fs_samples_count; +my $stat_count; + $node->start(); +# The AQO module loaded, but extension still not created. +$node->command_ok([ 'pgbench', '-i', '-s', '1' ], 'init pgbench tables'); +$node->command_ok([ 'pgbench', '-t', + "$TRANSACTIONS", '-c', "$CLIENTS", '-j', "$THREADS" ], + 'pgbench without enabled AQO'); + # Check conflicts of accessing to the ML knowledge base # intelligent mode $node->safe_psql('postgres', "CREATE EXTENSION aqo"); $node->safe_psql('postgres', "ALTER SYSTEM SET aqo.mode = 'intelligent'"); -$node->command_ok([ 'pgbench', '-i', '-s', '1' ], 'init pgbench tables'); -$node->command_ok([ 'pgbench', '-t', "1000", '-c', "10", '-j', "10" ], - 'pgbench in intelligent mode'); +$node->safe_psql('postgres', "SELECT pg_reload_conf()"); +$node->command_ok([ 'pgbench', '-t', + "$TRANSACTIONS", '-c', "$CLIENTS", '-j', "$THREADS" ], + 'pgbench in intelligent mode'); $node->safe_psql('postgres', "ALTER SYSTEM SET aqo.mode = 'controlled'"); -$node->command_ok([ 'pgbench', '-t', "1000", '-c', "10", '-j', "10" ], - 'pgbench in controlled mode'); +$node->safe_psql('postgres', "SELECT pg_reload_conf()"); +$node->command_ok([ 'pgbench', '-t', + "$TRANSACTIONS", '-c', "$CLIENTS", '-j', "$THREADS" ], + 'pgbench in controlled mode'); -$node->safe_psql('postgres', "ALTER SYSTEM SET aqo.mode = 'disabled'"); -$node->command_ok([ 'pgbench', '-t', "1000", '-c', "10", '-j', "10" ], - 'pgbench in disabled mode'); +# ############################################################################## +# +# pgbench on a database with AQO extension in 'disabled' mode. +# +# ############################################################################## + +# Cleanup of AQO kbowledge base. Also test correctness of DROP procedure. +$node->safe_psql('postgres', "DROP EXTENSION aqo"); +$node->safe_psql('postgres', "CREATE EXTENSION aqo"); + +# Check: no problems with concurrency in disabled mode. +$node->safe_psql('postgres', " + ALTER SYSTEM SET aqo.mode = 'disabled'; + SELECT pg_reload_conf(); +"); +$node->command_ok([ 'pgbench', '-t', + "$TRANSACTIONS", '-c', "$CLIENTS", '-j', "$THREADS" ], + 'pgbench in disabled mode'); + +# Check: no any data added into AQO-related tables. +# Each of aqo_queries and aqo_query_texts tables contains one predefined record. +$fss_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_data;"); +$fs_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_queries;"); +$fs_samples_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_texts;"); +$stat_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_stat;"); +is( (($fss_count == 0) and ($fs_count == 1) and ($fs_samples_count == 1) and ($stat_count == 0)), 1); + +# Check: no problems with stats collection in highly concurrent environment. +$node->safe_psql('postgres', " + ALTER SYSTEM SET aqo.force_collect_stat = 'on'; + SELECT pg_reload_conf(); +"); +$node->command_ok([ 'pgbench', '-t', + "$TRANSACTIONS", '-c', "$CLIENTS", '-j', "$THREADS" ], + 'pgbench in disabled mode'); + +# Check: no any tuples added into the aqo_data table in this mode. +$fss_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_data;"); +is( ($fss_count == 0), 1); + +# Check: in forced stat collection state AQO writes into aqo_query_stat, +# aqo_queries and aqo_query_texts to give user a chance to find problematic +# queries. +$fs_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_queries"); +$fs_samples_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_stat"); +$stat_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_texts"); +# This constants looks like magic numbers. But query set of the pgbench test +# is fixed for a long time. +is( (($fs_count == 7) and ($fs_samples_count == 6) and ($stat_count == 7)), 1); + +my $analytics = File::Temp->new(); +append_to_file($analytics, q{ + \set border random(1, 1E5) + SELECT count(aid) FROM pgbench_accounts GROUP BY abalance ORDER BY abalance DESC; + SELECT count(aid) FROM pgbench_accounts GROUP BY abalance HAVING abalance < :border; + + SELECT count(*) FROM pgbench_branches pgbb, + (SELECT count(aid) AS x FROM pgbench_accounts GROUP BY abalance HAVING abalance < :border) AS q1 + WHERE pgbb.bid = q1.x; +}); +# Look for top of problematic queries. +$node->command_ok([ 'pgbench', '-t', "10", '-c', "$CLIENTS", '-j', "$THREADS", + '-f', "$analytics" ], + 'analytical queries in pgbench (disabled mode)'); + +$res = $node->safe_psql('postgres', + "SELECT count(*) FROM top_error_queries(10) v + JOIN aqo_query_texts t ON (t.query_hash = v.fspace_hash) + WHERE v.error > 0. AND t.query_text LIKE '%pgbench_accounts%'"); +is($res, 3); +$res = $node->safe_psql('postgres', + "SELECT v.error, t.query_text FROM top_error_queries(10) v + JOIN aqo_query_texts t ON (t.query_hash = v.fspace_hash) + WHERE v.error > 0."); +note("\n Queries: \n $res \n"); +$res = $node->safe_psql('postgres', + "SELECT count(*) FROM top_time_queries(10) v + WHERE v.execution_time > 0."); +is($res, 10); + +# ############################################################################## +# +# pgbench on a database with AQO in 'learn' mode. +# +# ############################################################################## $node->safe_psql('postgres', "DROP EXTENSION aqo"); $node->safe_psql('postgres', "CREATE EXTENSION aqo"); $node->safe_psql('postgres', "ALTER SYSTEM SET aqo.mode = 'learn'"); -$node->command_ok([ 'pgbench', '-t', "1000", '-c', "10", '-j', "10" ], - 'pgbench in learn mode'); +$node->safe_psql('postgres', "ALTER SYSTEM SET aqo.force_collect_stat = 'off'"); +$node->safe_psql('postgres', "SELECT pg_reload_conf()"); +$node->command_ok([ 'pgbench', '-t', + "$TRANSACTIONS", '-c', "$CLIENTS", '-j', "$THREADS" ], + 'pgbench in learn mode'); + $node->safe_psql('postgres', "ALTER SYSTEM SET aqo.mode = 'frozen'"); -$node->command_ok([ 'pgbench', '-t', "1000", '-c', "10", '-j', "10" ], - 'pgbench in frozen mode'); +$node->safe_psql('postgres', "SELECT pg_reload_conf()"); +$node->command_ok([ 'pgbench', '-t', + "$TRANSACTIONS", '-c', "$CLIENTS", '-j', "$THREADS" ], + 'pgbench in frozen mode'); + +# ############################################################################## +# +# Check procedure of ML-knowledge data cleaning. +# +# ############################################################################## + +# Store OIDs of pgbench tables +my $aoid = $node->safe_psql('postgres', + "SELECT ('pgbench_accounts'::regclass)::oid"); +my $boid = $node->safe_psql('postgres', + "SELECT ('pgbench_branches'::regclass)::oid"); +my $toid = $node->safe_psql('postgres', + "SELECT ('pgbench_tellers'::regclass)::oid"); +my $hoid = $node->safe_psql('postgres', + "SELECT ('pgbench_history'::regclass)::oid"); +note("oids: $aoid, $boid, $toid, $hoid"); + +# Add data into AQO to control that cleaning procedure won't delete nothing extra +$node->safe_psql('postgres', " + CREATE TABLE detector(a int); + INSERT INTO detector (a) VALUES (1); + UPDATE detector SET a = a + 1; + DELETE FROM detector; + SELECT count(*) FROM detector; +"); + +# New queries won't add rows into AQO knowledge base. +$node->safe_psql('postgres', "ALTER SYSTEM SET aqo.mode = 'disabled'"); +$node->restart(); +$res = $node->safe_psql('postgres', "SHOW aqo.mode"); +is($res, 'disabled'); + +# Number of rows in aqo_data: related to pgbench test and total value. +my $pgb_fss_count = $node->safe_psql('postgres', " + SELECT count(*) FROM aqo_data + WHERE $aoid = ANY(oids) OR + $boid = ANY(oids) OR + $toid = ANY(oids) OR + $hoid = ANY(oids) +"); +$fss_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_data;"); + +# Number of rows in aqo_queries: related to pgbench test and total value. +my $pgb_fs_count = $node->safe_psql('postgres', " + SELECT count(*) FROM aqo_queries + WHERE fspace_hash IN ( + SELECT fspace_hash FROM aqo_data + WHERE + $aoid = ANY(oids) OR + $boid = ANY(oids) OR + $toid = ANY(oids) OR + $hoid = ANY(oids) + ) +"); +$fs_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_queries;"); + +# Number of rows in aqo_query_texts: related to pgbench test and total value. +my $pgb_fs_samples_count = $node->safe_psql('postgres', " + SELECT count(*) FROM aqo_query_texts + WHERE query_hash IN ( + SELECT fspace_hash FROM aqo_data + WHERE $aoid = ANY(oids) OR $boid = ANY(oids) OR $toid = ANY(oids) OR $hoid = ANY(oids) + ) +"); +$fs_samples_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_texts;"); + +# Number of rows in aqo_query_stat: related to pgbench test and total value. +my $pgb_stat_count = $node->safe_psql('postgres', " + SELECT count(*) FROM aqo_query_texts + WHERE query_hash IN ( + SELECT fspace_hash FROM aqo_data + WHERE $aoid = ANY(oids) OR $boid = ANY(oids) OR $toid = ANY(oids) OR $hoid = ANY(oids) + ) +"); +$stat_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_stat;"); + +note("pgbench-related rows: aqo_data - $pgb_fss_count/$fss_count, + aqo_queries: $pgb_fs_count/$fs_count, aqo_query_texts: $pgb_fs_samples_count/$fs_samples_count, + aqo_query_stat: $pgb_stat_count/$stat_count"); + +$node->safe_psql('postgres', " + DROP TABLE pgbench_accounts, pgbench_branches, pgbench_tellers, + pgbench_history CASCADE;"); + +# Clean unneeded AQO knowledge +$node->safe_psql('postgres', "SELECT clean_aqo_data()"); + +# Calculate total number of rows in AQO-related tables. +my $new_fs_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_queries;"); +my $new_fss_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_data;"); +my $new_fs_samples_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_texts;"); +my $new_stat_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_stat;"); +note("Total AQO rows after dropping pgbench-related tables: + aqo_queries: $new_fs_count, aqo_data: $new_fss_count, + aqo_query_texts: $new_fs_samples_count, aqo_query_stat: $new_stat_count"); + +# Check total number of rows in AQO knowledge base after removing of +# pgbench-related data. +is($new_fs_count == $fs_count - $pgb_fs_count, 1, 'Total number of feature spaces'); +is($new_fss_count == $fss_count - $pgb_fss_count, 1, 'Total number of feature subspaces'); +is($new_fs_samples_count == $fs_samples_count - $pgb_fs_samples_count, 1, 'Total number of samples in aqo_query_texts'); +is($new_stat_count == $stat_count - $pgb_stat_count, 1, 'Total number of samples in aqo_query_texts'); $node->safe_psql('postgres', "DROP EXTENSION aqo"); +# ############################################################################## +# +# Check CREATE/DROP AQO extension commands in a highly concurrent environment. +# +# ############################################################################## + +$node->command_ok([ 'pgbench', '-i', '-s', '1' ], 'init pgbench tables'); +my $bank = File::Temp->new(); +append_to_file($bank, q{ + \set aid random(1, 100000 * :scale) + \set bid random(1, 1 * :scale) + \set tid random(1, 10 * :scale) + \set delta random(-5000, 5000) + \set drop_aqo random(0, 5) + \if :client_id = 0 AND :drop_aqo = 0 + DROP EXTENSION aqo; + \sleep 10 ms + CREATE EXTENSION aqo; + \else + BEGIN; + UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid; + SELECT abalance FROM pgbench_accounts WHERE aid = :aid; + UPDATE pgbench_tellers SET tbalance = tbalance + :delta WHERE tid = :tid; + UPDATE pgbench_branches SET bbalance = bbalance + :delta WHERE bid = :bid; + INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP); + END; + \endif +}); + +$node->safe_psql('postgres', " + CREATE EXTENSION aqo; + ALTER SYSTEM SET aqo.mode = 'intelligent'; + ALTER SYSTEM SET log_statement = 'all'; + SELECT pg_reload_conf(); +"); +$node->restart(); + +$node->command_ok([ 'pgbench', '-T', + "5", '-c', "$CLIENTS", '-j', "$THREADS" , '-f', "$bank"], + 'Conflicts with an AQO dropping command.'); + $node->stop(); diff --git a/t/002_pg_stat_statements_aqo.pl b/t/002_pg_stat_statements_aqo.pl new file mode 100644 index 00000000..c0bc5127 --- /dev/null +++ b/t/002_pg_stat_statements_aqo.pl @@ -0,0 +1,66 @@ +use strict; +use warnings; + +use PostgresNode; +use TestLib; +use Test::More tests => 3; + +my $node = get_new_node('profiling'); +$node->init; +print "create conf"; + +$node->append_conf('postgresql.conf', qq{ + aqo.mode = 'disabled' + aqo.profile_classes = -1 + aqo.profile_enable = 'true' + aqo.force_collect_stat = 'false' + log_statement = 'ddl' # reduce size of logs. + }); +# Test constants. +my $TRANSACTIONS = 100; +my $CLIENTS = 10; +my $THREADS = 10; +my $query_id; + +# General purpose variables. +my $res; +my $total_classes; +$node->start(); + # ERROR: AQO allow to load library only on startup +print "create extantion aqo"; +$node->psql('postgres', "CREATE EXTENSION aqo"); +$node->psql('postgres', "CREATE EXTENSION pg_stat_statements"); +print "create preload libraries"; +$node->append_conf('postgresql.conf', qq{shared_preload_libraries = 'aqo, pg_stat_statements'}); +$node->restart(); +$node->psql('postgres', "CREATE EXTENSION aqo"); +$node->psql('postgres', "CREATE EXTENSION pg_stat_statements"); +$node->psql('postgres', " + ALTER SYSTEM SET aqo.profile_enable = 'true'; + SELECT pg_reload_conf(); +"); + +$node->psql('postgres', "CREATE TABLE aqo_test0(a int, b int, c int, d int); +WITH RECURSIVE t(a, b, c, d) +AS ( + VALUES (0, 0, 0, 0) + UNION ALL + SELECT t.a + 1, t.b + 1, t.c + 1, t.d + 1 FROM t WHERE t.a < 2000 +) INSERT INTO aqo_test0 (SELECT * FROM t); +CREATE INDEX aqo_test0_idx_a ON aqo_test0 (a); +ANALYZE aqo_test0;"); +$node->psql('postgres', " + ALTER SYSTEM SET aqo.mode = 'controlled'; +"); +$res = $node->safe_psql('postgres', "SELECT * FROM aqo_test0"); +$res = $node->safe_psql('postgres', "SELECT count(*) FROM pg_stat_statements where query = 'SELECT * FROM aqo_test0'"); +is($res, 1); # The same query add in pg_stat_statements +$res = $node->safe_psql('postgres', "SELECT count(*) from aqo_query_texts where query_text = 'SELECT * FROM aqo_test0'"); +is($res, 0); # The same query isn't add in aqo_query_texts +$query_id = $node->safe_psql('postgres', "SELECT queryid FROM pg_stat_statements where query = 'SELECT * FROM aqo_test0'"); +$res = $node->safe_psql('postgres', "insert into aqo_queries values ($query_id,'f','f',$query_id,'f')"); +# Add query in aqo_query_texts +$res = $node->safe_psql('postgres', "insert into aqo_query_texts values ($query_id,'SELECT * FROM aqo_test0')"); +$res = $node->safe_psql('postgres', "SELECT count(*) from aqo_query_texts where query_text = 'SELECT * FROM aqo_test0'"); # The same query is in aqo_query_texts +is($res, 1); +$node->stop(); \ No newline at end of file diff --git a/utils.c b/utils.c index 62e6d122..34bcd2f9 100644 --- a/utils.c +++ b/utils.c @@ -12,6 +12,8 @@ * */ +#include "postgres.h" + #include "aqo.h" /* TODO: get rid of those static vars */ From a59779725007178136db5cd430069ba397848c7c Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 23 Mar 2022 09:41:28 +0500 Subject: [PATCH 048/203] Bugfix. Recursing into subquery we must use subroot instead of root to transalte relids in this subtree. --- cardinality_hooks.c | 2 +- path_utils.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/cardinality_hooks.c b/cardinality_hooks.c index 7d962c04..4a0f4d8d 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -299,7 +299,7 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, List *inner_selectivities; List *outer_selectivities; List *current_selectivities = NULL; - int fss = 0; + int fss = 0; if (IsQueryDisabled()) /* Fast path */ diff --git a/path_utils.c b/path_utils.c index e35fce41..d0865773 100644 --- a/path_utils.c +++ b/path_utils.c @@ -310,7 +310,10 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) selectivities); break; case T_SubqueryScanPath: - return get_path_clauses(((SubqueryScanPath *) path)->subpath, root, + /* Recursing into Subquery we must use subroot */ + Assert(path->parent->subroot != NULL); + return get_path_clauses(((SubqueryScanPath *) path)->subpath, + path->parent->subroot, selectivities); break; case T_ModifyTablePath: From 22afc7d5ee40b91b25b3b9b4390752d1b5ea207c Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Wed, 23 Mar 2022 17:09:58 +0500 Subject: [PATCH 049/203] Remove duplicating definition of prev_create_plan_hook in aqo.c --- aqo.c | 1 - 1 file changed, 1 deletion(-) diff --git a/aqo.c b/aqo.c index e36e8e5d..eec011b1 100644 --- a/aqo.c +++ b/aqo.c @@ -109,7 +109,6 @@ set_baserel_rows_estimate_hook_type prev_set_baserel_rows_estimate_hook; get_parameterized_baserel_size_hook_type prev_get_parameterized_baserel_size_hook; set_joinrel_size_estimates_hook_type prev_set_joinrel_size_estimates_hook; get_parameterized_joinrel_size_hook_type prev_get_parameterized_joinrel_size_hook; -create_plan_hook_type prev_create_plan_hook; ExplainOnePlan_hook_type prev_ExplainOnePlan_hook; ExplainOneNode_hook_type prev_ExplainOneNode_hook; From fad26db3e14ec98b79192dc964fc5cda27fb9814 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Tue, 5 Apr 2022 11:28:55 +0300 Subject: [PATCH 050/203] Fix print_node_explain. Avoid situation where an AQO node isn't initialized. --- path_utils.c | 6 ++++++ postprocessing.c | 18 +++++++----------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/path_utils.c b/path_utils.c index d0865773..804aeb5f 100644 --- a/path_utils.c +++ b/path_utils.c @@ -56,6 +56,12 @@ create_aqo_plan_node() return node; } +/* + * Extract an AQO node from the plan private field. + * If no one node was found, return pointer to the default value or allocate new + * node (with default value) according to 'create' field. + * Can't return NULL value at all. + */ AQOPlanNode * get_aqo_plan_node(Plan *plan, bool create) { diff --git a/postprocessing.c b/postprocessing.c index 6c2b0b82..d437a444 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -857,26 +857,22 @@ ExtractFromQueryEnv(QueryDesc *queryDesc) void print_node_explain(ExplainState *es, PlanState *ps, Plan *plan) { - int wrkrs = 1; - double error = -1.; - AQOPlanNode *aqo_node; + int wrkrs = 1; + double error = -1.; + AQOPlanNode *aqo_node; /* Extension, which took a hook early can be executed early too. */ if (prev_ExplainOneNode_hook) prev_ExplainOneNode_hook(es, ps, plan); - if (IsQueryDisabled()) + if (IsQueryDisabled() || !plan || es->format != EXPLAIN_FORMAT_TEXT) return; - if (es->format != EXPLAIN_FORMAT_TEXT) - /* Only text format is supported. */ - return; + aqo_node = get_aqo_plan_node(plan, false); - if (!aqo_show_details || !plan || !ps) + if (!aqo_show_details || !ps) goto explain_end; - aqo_node = get_aqo_plan_node(plan, false); - if (!ps->instrument) /* We can show only prediction, without error calculation */ goto explain_print; @@ -919,7 +915,7 @@ print_node_explain(ExplainState *es, PlanState *ps, Plan *plan) appendStringInfo(es->str, "AQO not used"); explain_end: - /* XXX: Do we really have situations than plan is NULL? */ + /* XXX: Do we really have situations when the plan is a NULL pointer? */ if (plan && aqo_show_hash) appendStringInfo(es->str, ", fss=%d", aqo_node->fss); } From a032873ffacd4aa27d55edb18832b0542492de4b Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Tue, 19 Apr 2022 14:21:03 +0500 Subject: [PATCH 051/203] Bugfix. Do not try to open an AQO heap relation if an index does not exists. --- aqo--1.2--1.3.sql | 6 +++--- storage.c | 34 +++++++++++++++++++--------------- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/aqo--1.2--1.3.sql b/aqo--1.2--1.3.sql index f8bd3e49..605e6b99 100755 --- a/aqo--1.2--1.3.sql +++ b/aqo--1.2--1.3.sql @@ -29,7 +29,7 @@ BEGIN END LOOP; END IF; - FOR aqo_queries_row IN (SELECT * FROM aqo_queries) + FOR aqo_queries_row IN (SELECT * FROM public.aqo_queries) LOOP IF (delete_row = true AND fspace_hash_var <> 0 AND fspace_hash_var = aqo_queries_row.fspace_hash AND @@ -103,7 +103,7 @@ BEGIN aqo_queries.query_hash, to_char(array_avg(execution_time_without_aqo), '9.99EEEE')::float, to_char(array_mse(execution_time_without_aqo), '9.99EEEE')::float - FROM aqo_queries INNER JOIN aqo_query_stat + FROM public.aqo_queries INNER JOIN aqo_query_stat ON aqo_queries.query_hash = aqo_query_stat.query_hash GROUP BY (execution_time_without_aqo, aqo_queries.fspace_hash, aqo_queries.query_hash) ORDER BY execution_time DESC LIMIT n; @@ -129,7 +129,7 @@ BEGIN aqo_queries.query_hash, to_char(array_avg(cardinality_error_without_aqo), '9.99EEEE')::float, to_char(array_mse(cardinality_error_without_aqo), '9.99EEEE')::float - FROM aqo_queries INNER JOIN aqo_query_stat + FROM public.aqo_queries INNER JOIN aqo_query_stat ON aqo_queries.query_hash = aqo_query_stat.query_hash GROUP BY (cardinality_error_without_aqo, aqo_queries.fspace_hash, aqo_queries.query_hash) ORDER BY error DESC LIMIT n; diff --git a/storage.c b/storage.c index 5c62896f..0b7cbf63 100644 --- a/storage.c +++ b/storage.c @@ -56,26 +56,30 @@ open_aqo_relation(char *heaprelnspname, char *heaprelname, RangeVar *rv; reloid = RelnameGetRelid(indrelname); + if (!OidIsValid(reloid)) + goto cleanup; + rv = makeRangeVar(heaprelnspname, heaprelname, -1); *hrel = table_openrv_extended(rv, lockmode, true); - if (!OidIsValid(reloid) || *hrel == NULL) - { - /* - * Absence of any AQO-related table tell us that someone executed - * a 'DROP EXTENSION aqo' command. We disable AQO for all future queries - * in this backend. For performance reasons we do it locally. - * Clear profiling hash table. - * Also, we gently disable AQO for the rest of the current query - * execution process. - */ - aqo_enabled = false; - disable_aqo_for_query(); - - return false; - } + if (*hrel == NULL) + goto cleanup; *irel = index_open(reloid, lockmode); return true; + +cleanup: + /* + * Absence of any AQO-related table tell us that someone executed + * a 'DROP EXTENSION aqo' command. We disable AQO for all future queries + * in this backend. For performance reasons we do it locally. + * Clear profiling hash table. + * Also, we gently disable AQO for the rest of the current query + * execution process. + */ + aqo_enabled = false; + disable_aqo_for_query(); + return false; + } /* From ad258cc4d96bdbb56bf4a227685680955460caf9 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Tue, 19 Apr 2022 16:46:46 +0500 Subject: [PATCH 052/203] Bugfixes: 1. Increase stability of the pgbench test. 2. Open subsidiary AQO relations more carefully. --- storage.c | 9 ++++++--- t/001_pgbench.pl | 11 ++++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/storage.c b/storage.c index 0b7cbf63..259d725b 100644 --- a/storage.c +++ b/storage.c @@ -52,8 +52,8 @@ open_aqo_relation(char *heaprelnspname, char *heaprelname, char *indrelname, LOCKMODE lockmode, Relation *hrel, Relation *irel) { - Oid reloid; - RangeVar *rv; + Oid reloid; + RangeVar *rv; reloid = RelnameGetRelid(indrelname); if (!OidIsValid(reloid)) @@ -64,7 +64,10 @@ open_aqo_relation(char *heaprelnspname, char *heaprelname, if (*hrel == NULL) goto cleanup; - *irel = index_open(reloid, lockmode); + /* Try to open index relation carefully. */ + *irel = try_relation_open(reloid, lockmode); + if (*irel == NULL) + goto cleanup; return true; cleanup: diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index cdc18d38..624e6cdc 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -109,6 +109,10 @@ (SELECT count(aid) AS x FROM pgbench_accounts GROUP BY abalance HAVING abalance < :border) AS q1 WHERE pgbb.bid = q1.x; }); + +# Avoid problems with an error fluctuations during the test above. +$node->safe_psql('postgres', "TRUNCATE aqo_query_stat"); + # Look for top of problematic queries. $node->command_ok([ 'pgbench', '-t', "10", '-c', "$CLIENTS", '-j', "$THREADS", '-f', "$analytics" ], @@ -127,7 +131,7 @@ $res = $node->safe_psql('postgres', "SELECT count(*) FROM top_time_queries(10) v WHERE v.execution_time > 0."); -is($res, 10); +is($res, 5); # ############################################################################## # @@ -281,7 +285,8 @@ SELECT abalance FROM pgbench_accounts WHERE aid = :aid; UPDATE pgbench_tellers SET tbalance = tbalance + :delta WHERE tid = :tid; UPDATE pgbench_branches SET bbalance = bbalance + :delta WHERE bid = :bid; - INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP); + INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) + VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP); END; \endif }); @@ -295,7 +300,7 @@ $node->restart(); $node->command_ok([ 'pgbench', '-T', - "5", '-c', "$CLIENTS", '-j', "$THREADS" , '-f', "$bank"], + "50", '-c', "$CLIENTS", '-j', "$THREADS" , '-f', "$bank"], 'Conflicts with an AQO dropping command.'); $node->stop(); From 4d009b72037561d5b75d4788d2bee210ec40d4fe Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Wed, 20 Apr 2022 15:09:00 +0500 Subject: [PATCH 053/203] Parameterize 001_pgbench.pl: allow to define a number of transactions, clients and threads from the environment. --- t/001_pgbench.pl | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index 624e6cdc..c4ddb7ae 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -14,11 +14,25 @@ log_statement = 'ddl' }); -# Test constants. +# Test constants. Default values. my $TRANSACTIONS = 1000; my $CLIENTS = 10; my $THREADS = 10; +# Change pgbench parameters according to the environment variable. +if (defined $ENV{TRANSACTIONS}) +{ + $TRANSACTIONS = $ENV{TRANSACTIONS}; +} +if (defined $ENV{CLIENTS}) +{ + $CLIENTS = $ENV{CLIENTS}; +} +if (defined $ENV{THREADS}) +{ + $THREADS = $ENV{THREADS}; +} + # General purpose variables. my $res; my $fss_count; From 045357204635544e2a2f70d4c323043e1865b24b Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 20 Apr 2022 15:23:34 +0500 Subject: [PATCH 054/203] Update c-cpp.yml Change CI to drastically increase concurrency among pgbench clients --- .github/workflows/c-cpp.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index a23ec574..3c987855 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -24,5 +24,4 @@ jobs: git subtree add --prefix=contrib/aqo https://github.com/postgrespro/aqo.git $GITHUB_REF patch -p1 --no-backup-if-mismatch < contrib/aqo/aqo_pg13.patch make -j4 > /dev/null && make -j4 -C contrib > /dev/null - make -C contrib/aqo check - make -C contrib/aqo aqo-regress + env CLIENTS=50 THREADS=50 make -C contrib/aqo check From af515eafbbcd868d8ba886b7fb6f4ec7f087ac91 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Wed, 20 Apr 2022 16:21:29 +0500 Subject: [PATCH 055/203] Bugfix. close heap relation in the case of races between backend and 'DROP EXTENSION aqo'. --- storage.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/storage.c b/storage.c index 259d725b..cf2ee59e 100644 --- a/storage.c +++ b/storage.c @@ -67,7 +67,10 @@ open_aqo_relation(char *heaprelnspname, char *heaprelname, /* Try to open index relation carefully. */ *irel = try_relation_open(reloid, lockmode); if (*irel == NULL) + { + relation_close(*hrel, lockmode); goto cleanup; + } return true; cleanup: From 506683c4de06a6e6471f64afef25f11605fe55e2 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 28 Apr 2022 09:24:30 +0500 Subject: [PATCH 056/203] Bugfix. Fix omissions related to shifting from 32-bit query hash to 64-bit hash --- aqo.c | 8 +++-- aqo.h | 4 +-- auto_tuning.c | 6 ++-- expected/plancache.out | 2 +- postprocessing.c | 9 ++---- preprocessing.c | 22 +++---------- sql/plancache.sql | 2 +- storage.c | 71 ++++++++++++++++++++++++------------------ 8 files changed, 61 insertions(+), 63 deletions(-) diff --git a/aqo.c b/aqo.c index eec011b1..4cffc94f 100644 --- a/aqo.c +++ b/aqo.c @@ -307,11 +307,13 @@ get_aqo_schema(void) * Init userlock */ void -init_lock_tag(LOCKTAG *tag, uint32 key1, uint32 key2) +init_lock_tag(LOCKTAG *tag, uint64 key1, int32 key2) { + uint32 key = key1 % UINT32_MAX; + tag->locktag_field1 = AQO_MODULE_MAGIC; - tag->locktag_field2 = key1; - tag->locktag_field3 = key2; + tag->locktag_field2 = key; + tag->locktag_field3 = (uint32) key2; tag->locktag_field4 = 0; tag->locktag_type = LOCKTAG_USERLOCK; tag->locktag_lockmethodid = USER_LOCKMETHOD; diff --git a/aqo.h b/aqo.h index 04f18994..fff0bb06 100644 --- a/aqo.h +++ b/aqo.h @@ -281,7 +281,7 @@ int get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash); /* Storage interaction */ -extern bool find_query(uint64 qhash, Datum *search_values, bool *search_nulls); +extern bool find_query(uint64 qhash, QueryContextData *ctx); extern bool update_query(uint64 qhash, uint64 fhash, bool learn_aqo, bool use_aqo, bool auto_tuning); extern bool add_query_text(uint64 query_hash, const char *query_string); @@ -344,7 +344,7 @@ extern double *selectivity_cache_find_global_relid(int clause_hash, extern void selectivity_cache_clear(void); extern Oid get_aqo_schema(void); -extern void init_lock_tag(LOCKTAG *tag, uint32 key1, uint32 key2); +extern void init_lock_tag(LOCKTAG *tag, uint64 key1, int32 key2); extern bool IsQueryDisabled(void); extern List *cur_classes; diff --git a/auto_tuning.c b/auto_tuning.c index a98578cf..43fbe439 100644 --- a/auto_tuning.c +++ b/auto_tuning.c @@ -144,7 +144,7 @@ is_in_infinite_loop_cq(double *elems, int nelems) * this query to false. */ void -automatical_query_tuning(uint64 query_hash, QueryStat * stat) +automatical_query_tuning(uint64 qhash, QueryStat * stat) { double unstability = auto_tuning_exploration; double t_aqo, @@ -204,11 +204,11 @@ automatical_query_tuning(uint64 query_hash, QueryStat * stat) } if (num_iterations <= auto_tuning_max_iterations || p_use > 0.5) - update_query(query_hash, + update_query(qhash, query_context.fspace_hash, query_context.learn_aqo, query_context.use_aqo, true); else - update_query(query_hash, query_context.fspace_hash, false, false, false); + update_query(qhash, query_context.fspace_hash, false, false, false); } diff --git a/expected/plancache.out b/expected/plancache.out index 64eecf99..0d019334 100644 --- a/expected/plancache.out +++ b/expected/plancache.out @@ -33,7 +33,7 @@ BEGIN END $$ LANGUAGE 'plpgsql'; -- The function shows 6 executions without an AQO support (nnex) and -- 4 executions with usage of an AQO knowledge base (nex). Planning time in the --- case of AQO support (pt) is equal to '-1', because the query plan is exracted +-- case of AQO support (pt) is equal to '-1', because the query plan is extracted -- from the plan cache. SELECT * FROM f1(); nnex | nex | pt diff --git a/postprocessing.c b/postprocessing.c index d437a444..f9c00b50 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -94,7 +94,7 @@ atomic_fss_learn_step(uint64 fhash, int fss_hash, int ncols, LOCKTAG tag; int nrows; - init_lock_tag(&tag, (uint32) fhash, fss_hash); + init_lock_tag(&tag, fhash, fss_hash); LockAcquire(&tag, ExclusiveLock, false, false); if (!load_fss(fhash, fss_hash, ncols, matrix, targets, &nrows, NULL)) @@ -671,10 +671,9 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) cardinality_error = cardinality_sum_errors / cardinality_num_objects; else cardinality_error = -1; - Assert(query_context.query_hash>=0); + /* Prevent concurrent updates. */ - init_lock_tag(&tag, (uint32) query_context.query_hash,//my code - (uint32) query_context.fspace_hash);//possible here + init_lock_tag(&tag, query_context.query_hash, query_context.fspace_hash); LockAcquire(&tag, ExclusiveLock, false, false); if (stat != NULL) @@ -706,7 +705,6 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) &stat->executions_without_aqo); /* Store all learn data into the AQO service relations. */ - Assert(query_context.query_hash>=0); if (!query_context.adding_query && query_context.auto_tuning) automatical_query_tuning(query_context.query_hash, stat); @@ -970,7 +968,6 @@ print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, */ if (aqo_mode != AQO_MODE_DISABLED || force_collect_stat) { - Assert(query_context.query_hash>=0); if (aqo_show_hash) ExplainPropertyInteger("Query hash", NULL, query_context.query_hash, es); diff --git a/preprocessing.c b/preprocessing.c index 30462552..6bb19632 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -136,8 +136,6 @@ aqo_planner(Query *parse, ParamListInfo boundParams) { bool query_is_stored = false; - Datum query_params[5]; - bool query_nulls[5] = {false, false, false, false, false}; LOCKTAG tag; MemoryContext oldCxt; @@ -187,7 +185,7 @@ aqo_planner(Query *parse, boundParams); } - elog(DEBUG1, "AQO will be used for query '%s', class %ld", + elog(DEBUG1, "AQO will be used for query '%s', class "UINT64_FORMAT, query_string ? query_string : "null string", query_context.query_hash); oldCxt = MemoryContextSwitchTo(AQOMemoryContext); @@ -201,8 +199,7 @@ aqo_planner(Query *parse, goto ignore_query_settings; } - query_is_stored = find_query(query_context.query_hash, &query_params[0], - &query_nulls[0]); + query_is_stored = find_query(query_context.query_hash, &query_context); if (!query_is_stored) { @@ -256,16 +253,12 @@ aqo_planner(Query *parse, else /* Query class exists in a ML knowledge base. */ { query_context.adding_query = false; - query_context.learn_aqo = DatumGetBool(query_params[1]); - query_context.use_aqo = DatumGetBool(query_params[2]); - query_context.fspace_hash = DatumGetInt64(query_params[3]); - query_context.auto_tuning = DatumGetBool(query_params[4]); - query_context.collect_stat = query_context.auto_tuning; + + /* Other query_context fields filled in the find_query() routine. */ /* * Deactivate query if no one reason exists for usage of an AQO machinery. */ - Assert(query_context.query_hash>=0); if (!query_context.learn_aqo && !query_context.use_aqo && !query_context.auto_tuning && !force_collect_stat) add_deactivated_query(query_context.query_hash); @@ -291,7 +284,6 @@ aqo_planner(Query *parse, * In this mode we want to learn with incoming query (if it is not * suppressed manually) and collect stats. */ - Assert(query_context.query_hash>=0); query_context.collect_stat = true; query_context.fspace_hash = query_context.query_hash; break; @@ -315,15 +307,13 @@ aqo_planner(Query *parse, * find-add query and query text must be atomic operation to prevent * concurrent insertions. */ - Assert(query_context.query_hash>=0); - init_lock_tag(&tag, (uint32) query_context.query_hash, (uint32) 0);//my code + init_lock_tag(&tag, query_context.query_hash, 0); LockAcquire(&tag, ExclusiveLock, false, false); /* * Add query into the AQO knowledge base. To process an error with * concurrent addition from another backend we will try to restart * preprocessing routine. */ - Assert(query_context.query_hash>=0); update_query(query_context.query_hash, query_context.fspace_hash, query_context.learn_aqo, query_context.use_aqo, query_context.auto_tuning); @@ -332,7 +322,6 @@ aqo_planner(Query *parse, * Add query text into the ML-knowledge base. Just for further * analysis. In the case of cached plans we could have NULL query text. */ - Assert(query_context.query_hash>=0); if (query_string != NULL) add_query_text(query_context.query_hash, query_string); @@ -346,7 +335,6 @@ aqo_planner(Query *parse, * query execution statistics in any mode. */ query_context.collect_stat = true; - Assert(query_context.query_hash>=0); query_context.fspace_hash = query_context.query_hash; } diff --git a/sql/plancache.sql b/sql/plancache.sql index 8208b1d3..035b8904 100644 --- a/sql/plancache.sql +++ b/sql/plancache.sql @@ -37,7 +37,7 @@ END $$ LANGUAGE 'plpgsql'; -- The function shows 6 executions without an AQO support (nnex) and -- 4 executions with usage of an AQO knowledge base (nex). Planning time in the --- case of AQO support (pt) is equal to '-1', because the query plan is exracted +-- case of AQO support (pt) is equal to '-1', because the query plan is extracted -- from the plan cache. SELECT * FROM f1(); diff --git a/storage.c b/storage.c index cf2ee59e..48fa8064 100644 --- a/storage.c +++ b/storage.c @@ -94,19 +94,22 @@ open_aqo_relation(char *heaprelnspname, char *heaprelname, * * Use dirty snapshot to see all (include in-progess) data. We want to prevent * wait in the XactLockTableWait routine. + * If query is found in the knowledge base, fill the query context struct. */ bool -find_query(uint64 qhash, Datum *search_values, bool *search_nulls) +find_query(uint64 qhash, QueryContextData *ctx) { - Relation hrel; - Relation irel; - HeapTuple tuple; + Relation hrel; + Relation irel; + HeapTuple tuple; TupleTableSlot *slot; - bool shouldFree; - IndexScanDesc scan; - ScanKeyData key; - SnapshotData snap; - bool find_ok = false; + bool shouldFree = true; + IndexScanDesc scan; + ScanKeyData key; + SnapshotData snap; + bool find_ok = false; + Datum values[5]; + bool nulls[5] = {false, false, false, false, false}; if (!open_aqo_relation("public", "aqo_queries", "aqo_queries_query_hash_idx", AccessShareLock, &hrel, &irel)) @@ -114,24 +117,30 @@ find_query(uint64 qhash, Datum *search_values, bool *search_nulls) InitDirtySnapshot(snap); scan = index_beginscan(hrel, irel, &snap, 1, 0); - ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT4EQ, Int64GetDatum(qhash)); + ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(qhash)); index_rescan(scan, &key, 1, NULL, 0); slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); find_ok = index_getnext_slot(scan, ForwardScanDirection, slot); - if (find_ok && search_values != NULL) + if (find_ok) { tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); Assert(shouldFree != true); - heap_deform_tuple(tuple, hrel->rd_att, search_values, search_nulls); + heap_deform_tuple(tuple, hrel->rd_att, values, nulls); + + /* Fill query context data */ + ctx->learn_aqo = DatumGetBool(values[1]); + ctx->use_aqo = DatumGetBool(values[2]); + ctx->fspace_hash = DatumGetInt64(values[3]); + ctx->auto_tuning = DatumGetBool(values[4]); + ctx->collect_stat = query_context.auto_tuning; } ExecDropSingleTupleTableSlot(slot); index_endscan(scan); index_close(irel, AccessShareLock); table_close(hrel, AccessShareLock); - return find_ok; } @@ -177,7 +186,7 @@ update_query(uint64 qhash, uint64 fhash, */ InitDirtySnapshot(snap); scan = index_beginscan(hrel, irel, &snap, 1, 0); - ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(qhash)); + ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(qhash)); index_rescan(scan, &key, 1, NULL, 0); slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); @@ -222,7 +231,8 @@ update_query(uint64 qhash, uint64 fhash, * Ooops, somebody concurrently updated the tuple. It is possible * only in the case of changes made by third-party code. */ - elog(ERROR, "AQO feature space data for signature (%ld, %ld) concurrently" + elog(ERROR, "AQO feature space data for signature ("UINT64_FORMAT \ + ", "UINT64_FORMAT") concurrently" " updated by a stranger backend.", qhash, fhash); result = false; @@ -284,7 +294,7 @@ add_query_text(uint64 qhash, const char *query_string) */ InitDirtySnapshot(snap); scan = index_beginscan(hrel, irel, &snap, 1, 0); - ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(qhash)); + ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(qhash)); index_rescan(scan, &key, 1, NULL, 0); slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); @@ -391,7 +401,7 @@ load_fss(uint64 fhash, int fss_hash, return false; scan = index_beginscan(hrel, irel, SnapshotSelf, 2, 0); - ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fhash)); + ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(fhash)); ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fss_hash)); index_rescan(scan, key, 2, NULL, 0); @@ -423,9 +433,10 @@ load_fss(uint64 fhash, int fss_hash, *relids = deform_oids_vector(values[5]); } else - elog(ERROR, "unexpected number of features for hash (%ld, %d):\ - expected %d features, obtained %d", - fhash, fss_hash, ncols, DatumGetInt32(values[2])); + elog(ERROR, "unexpected number of features for hash (" \ + UINT64_FORMAT", %d):\ + expected %d features, obtained %d", + fhash, fss_hash, ncols, DatumGetInt32(values[2])); } else success = false; @@ -484,7 +495,7 @@ update_fss(uint64 fhash, int fsshash, int nrows, int ncols, InitDirtySnapshot(snap); scan = index_beginscan(hrel, irel, &snap, 2, 0); - ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fhash)); + ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(fhash)); ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fsshash)); index_rescan(scan, key, 2, NULL, 0); @@ -494,7 +505,7 @@ update_fss(uint64 fhash, int fsshash, int nrows, int ncols, if (!find_ok) { - values[0] = Int32GetDatum(fhash); + values[0] = Int64GetDatum(fhash); values[1] = Int32GetDatum(fsshash); values[2] = Int32GetDatum(ncols); @@ -549,8 +560,8 @@ update_fss(uint64 fhash, int fsshash, int nrows, int ncols, * Ooops, somebody concurrently updated the tuple. It is possible * only in the case of changes made by third-party code. */ - elog(ERROR, "AQO data piece (%ld %d) concurrently updated" - " by a stranger backend.", + elog(ERROR, "AQO data piece ("UINT64_FORMAT" %d) concurrently" + " updated by a stranger backend.", fhash, fsshash); result = false; } @@ -596,7 +607,7 @@ get_aqo_stat(uint64 qhash) return false; scan = index_beginscan(hrel, irel, SnapshotSelf, 1, 0); - ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(qhash)); + ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(qhash)); index_rescan(scan, &key, 1, NULL, 0); slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); @@ -667,7 +678,7 @@ update_aqo_stat(uint64 qhash, QueryStat *stat) InitDirtySnapshot(snap); scan = index_beginscan(hrel, irel, &snap, 1, 0); - ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT4EQ, Int64GetDatum(qhash)); + ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(qhash)); index_rescan(scan, &key, 1, NULL, 0); slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); @@ -713,8 +724,8 @@ update_aqo_stat(uint64 qhash, QueryStat *stat) * Ooops, somebody concurrently updated the tuple. It is possible * only in the case of changes made by third-party code. */ - elog(ERROR, "AQO statistic data for query signature %ld concurrently" - " updated by a stranger backend.", + elog(ERROR, "AQO statistic data for query signature "UINT64_FORMAT + " concurrently updated by a stranger backend.", qhash); } } @@ -914,8 +925,8 @@ init_deactivated_queries_storage(void) /* Create the hashtable proper */ MemSet(&hash_ctl, 0, sizeof(hash_ctl)); - hash_ctl.keysize = sizeof(int); - hash_ctl.entrysize = sizeof(int); + hash_ctl.keysize = sizeof(uint64); + hash_ctl.entrysize = sizeof(uint64); deactivated_queries = hash_create("aqo_deactivated_queries", 128, /* start small and extend */ &hash_ctl, From 62220606b867afb4dbd63495750d18b0d0a5a3dd Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Fri, 29 Apr 2022 15:03:11 +0500 Subject: [PATCH 057/203] Bugfix: we can't use C++ reserved words as identifiers for shared variables or routines. --- aqo_pg13.patch | 73 +++++++++++++++++++++++++++----------------------- path_utils.c | 12 ++++----- 2 files changed, 46 insertions(+), 39 deletions(-) diff --git a/aqo_pg13.patch b/aqo_pg13.patch index afb43aba..3755bbf5 100644 --- a/aqo_pg13.patch +++ b/aqo_pg13.patch @@ -57,39 +57,39 @@ index bc05c96b4c..b6a3abe0d2 100644 if (es->format == EXPLAIN_FORMAT_TEXT) appendStringInfoChar(es->str, '\n'); diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c -index 682b28ed72..e64ea3ff46 100644 +index 682b28ed72..3a5c615deb 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -132,6 +132,7 @@ CopyPlanFields(const Plan *from, Plan *newnode) COPY_NODE_FIELD(initPlan); COPY_BITMAPSET_FIELD(extParam); COPY_BITMAPSET_FIELD(allParam); -+ COPY_NODE_FIELD(private); ++ COPY_NODE_FIELD(ext_nodes); } /* diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c -index 7237b52e96..025b4fde2b 100644 +index 7237b52e96..5e2ee2732a 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -342,6 +342,7 @@ _outPlanInfo(StringInfo str, const Plan *node) WRITE_NODE_FIELD(initPlan); WRITE_BITMAPSET_FIELD(extParam); WRITE_BITMAPSET_FIELD(allParam); -+ /*WRITE_NODE_FIELD(private); */ ++ /*WRITE_NODE_FIELD(ext_nodes); */ } /* diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c -index 62c945b6c5..23ab51fb9b 100644 +index 62c945b6c5..a39046ca56 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1580,6 +1580,11 @@ ReadCommonPlan(Plan *local_node) READ_NODE_FIELD(initPlan); READ_BITMAPSET_FIELD(extParam); READ_BITMAPSET_FIELD(allParam); -+ local_node->private = NIL; -+ /* READ_NODE_FIELD(private); ++ local_node->ext_nodes = NIL; ++ /* READ_NODE_FIELD(ext_nodes); + * Don't serialize this field. It is required to serialize RestrictInfo and + * EqualenceClass. + */ @@ -97,7 +97,7 @@ index 62c945b6c5..23ab51fb9b 100644 /* diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c -index 92b5223fee..a533c2cada 100644 +index 4edc859cb5..988f2e6ab7 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -98,6 +98,12 @@ @@ -121,7 +121,7 @@ index 92b5223fee..a533c2cada 100644 /* -@@ -4626,6 +4631,58 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) +@@ -4632,6 +4637,58 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) } @@ -180,7 +180,7 @@ index 92b5223fee..a533c2cada 100644 /* * set_baserel_size_estimates * Set the size estimates for the given base relation. -@@ -4642,19 +4699,10 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) +@@ -4648,19 +4705,10 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) void set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) { @@ -201,7 +201,7 @@ index 92b5223fee..a533c2cada 100644 cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); -@@ -4665,13 +4713,33 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) +@@ -4671,13 +4719,33 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) * get_parameterized_baserel_size * Make a size estimate for a parameterized scan of a base relation. * @@ -237,7 +237,7 @@ index 92b5223fee..a533c2cada 100644 { List *allclauses; double nrows; -@@ -4700,6 +4768,36 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, +@@ -4706,6 +4774,36 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, * set_joinrel_size_estimates * Set the size estimates for the given join relation. * @@ -274,7 +274,7 @@ index 92b5223fee..a533c2cada 100644 * The rel's targetlist must have been constructed already, and a * restriction clause list that matches the given component rels must * be provided. -@@ -4719,11 +4817,11 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, +@@ -4725,11 +4823,11 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, * build_joinrel_tlist, and baserestrictcost is not used for join rels. */ void @@ -291,7 +291,7 @@ index 92b5223fee..a533c2cada 100644 { rel->rows = calc_joinrel_size_estimate(root, rel, -@@ -4739,6 +4837,35 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, +@@ -4745,6 +4843,35 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, * get_parameterized_joinrel_size * Make a size estimate for a parameterized scan of a join relation. * @@ -327,7 +327,7 @@ index 92b5223fee..a533c2cada 100644 * 'rel' is the joinrel under consideration. * 'outer_path', 'inner_path' are (probably also parameterized) Paths that * produce the relations being joined. -@@ -4751,11 +4878,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, +@@ -4757,11 +4884,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, * set_joinrel_size_estimates must have been applied already. */ double @@ -344,7 +344,7 @@ index 92b5223fee..a533c2cada 100644 { double nrows; -@@ -5424,7 +5551,7 @@ set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel) +@@ -5430,7 +5557,7 @@ set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel) /* Should only be applied to base relations */ Assert(rel->relid > 0); @@ -353,7 +353,7 @@ index 92b5223fee..a533c2cada 100644 cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); -@@ -5710,7 +5837,7 @@ page_size(double tuples, int width) +@@ -5716,7 +5843,7 @@ page_size(double tuples, int width) * Estimate the fraction of the work that each worker will do given the * number of workers budgeted for the path. */ @@ -363,7 +363,7 @@ index 92b5223fee..a533c2cada 100644 { double parallel_divisor = path->parallel_workers; diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c -index e445debe57..365b7aa319 100644 +index 917713c163..5b7bf1cec6 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -70,6 +70,7 @@ @@ -385,11 +385,11 @@ index e445debe57..365b7aa319 100644 return plan; } -@@ -5162,6 +5167,7 @@ copy_generic_path_info(Plan *dest, Path *src) +@@ -5163,6 +5168,7 @@ copy_generic_path_info(Plan *dest, Path *src) dest->plan_width = src->pathtarget->width; dest->parallel_aware = src->parallel_aware; dest->parallel_safe = src->parallel_safe; -+ dest->private = NIL; ++ dest->ext_nodes = NIL; } /* @@ -475,14 +475,14 @@ index 60e7fda6a9..5732c7a685 100644 extra->targetList); diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c -index a203e6f1ff..f8db135be0 100644 +index a203e6f1ff..d31bf5bae6 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -258,6 +258,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) rel->partexprs = NULL; rel->nullable_partexprs = NULL; rel->partitioned_child_rels = NIL; -+ rel->private = NULL; ++ rel->ext_nodes = NULL; /* * Pass assorted information down the inheritance hierarchy. @@ -498,7 +498,7 @@ index a203e6f1ff..f8db135be0 100644 joinrel->partexprs = NULL; joinrel->nullable_partexprs = NULL; joinrel->partitioned_child_rels = NIL; -+ joinrel->private = NULL; ++ joinrel->ext_nodes = NULL; /* Compute information relevant to the foreign relations. */ set_foreign_rel_properties(joinrel, outer_rel, inner_rel); @@ -506,7 +506,7 @@ index a203e6f1ff..f8db135be0 100644 joinrel->partexprs = NULL; joinrel->nullable_partexprs = NULL; joinrel->partitioned_child_rels = NIL; -+ joinrel->private = NULL; ++ joinrel->ext_nodes = NULL; joinrel->top_parent_relids = bms_union(outer_rel->top_parent_relids, inner_rel->top_parent_relids); @@ -596,10 +596,10 @@ index ba661d32a6..09d0abe58b 100644 extern void ExplainQuery(ParseState *pstate, ExplainStmt *stmt, ParamListInfo params, DestReceiver *dest); diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h -index 69150e46eb..c7361a7ef4 100644 +index 5ebf070979..5b2acd7de2 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h -@@ -738,6 +738,10 @@ typedef struct RelOptInfo +@@ -739,6 +739,10 @@ typedef struct RelOptInfo Relids top_parent_relids; /* Relids of topmost parents (if "other" * rel) */ @@ -610,16 +610,20 @@ index 69150e46eb..c7361a7ef4 100644 /* used for partitioned relations: */ PartitionScheme part_scheme; /* Partitioning scheme */ int nparts; /* Number of partitions; -1 if not yet set; in -@@ -753,6 +757,8 @@ typedef struct RelOptInfo +@@ -754,6 +758,12 @@ typedef struct RelOptInfo List **partexprs; /* Non-nullable partition key expressions */ List **nullable_partexprs; /* Nullable partition key expressions */ List *partitioned_child_rels; /* List of RT indexes */ + -+ List *private; ++ /* ++ * At this list an extension can add additional nodes to pass an info along ++ * the planning and executing stages. ++ */ ++ List *ext_nodes; } RelOptInfo; /* -@@ -1104,6 +1110,10 @@ typedef struct ParamPathInfo +@@ -1105,6 +1115,10 @@ typedef struct ParamPathInfo Relids ppi_req_outer; /* rels supplying parameters used by path */ double ppi_rows; /* estimated number of result tuples */ List *ppi_clauses; /* join clauses available from outer rels */ @@ -631,16 +635,19 @@ index 69150e46eb..c7361a7ef4 100644 diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h -index 90f02ce6fd..b093dc46ce 100644 +index 90f02ce6fd..f3e2138ee2 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h -@@ -159,6 +159,9 @@ typedef struct Plan +@@ -159,6 +159,12 @@ typedef struct Plan */ Bitmapset *extParam; Bitmapset *allParam; + -+ /* Additional field for an extension purposes. */ -+ List *private; ++ /* ++ * Additional fields for an extension purposes. ++ * TODO: allow to serialize/deserialize this list. ++ */ ++ List *ext_nodes; } Plan; /* ---------------- diff --git a/path_utils.c b/path_utils.c index 804aeb5f..decf6d93 100644 --- a/path_utils.c +++ b/path_utils.c @@ -68,7 +68,7 @@ get_aqo_plan_node(Plan *plan, bool create) AQOPlanNode *node = NULL; ListCell *lc; - foreach(lc, plan->private) + foreach(lc, plan->ext_nodes) { AQOPlanNode *candidate = (AQOPlanNode *) lfirst(lc); @@ -88,7 +88,7 @@ get_aqo_plan_node(Plan *plan, bool create) return &DefaultAQOPlanNode; node = create_aqo_plan_node(); - plan->private = lappend(plan->private, node); + plan->ext_nodes = lappend(plan->ext_nodes, node); } Assert(node); @@ -176,10 +176,10 @@ subplan_hunter(Node *node, void *context) splan->plan_id - 1); upper_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL); - Assert(list_length(upper_rel->private) == 1); - Assert(IsA((Node *) linitial(upper_rel->private), A_Const)); + Assert(list_length(upper_rel->ext_nodes) == 1); + Assert(IsA((Node *) linitial(upper_rel->ext_nodes), A_Const)); - fss = (A_Const *) linitial(upper_rel->private); + fss = (A_Const *) linitial(upper_rel->ext_nodes); return (Node *) copyObject(fss); } return expression_tree_mutator(node, subplan_hunter, context); @@ -665,5 +665,5 @@ aqo_store_upper_signature_hook(PlannerInfo *root, fss_node->val.type = T_Integer; fss_node->location = -1; fss_node->val.val.ival = get_fss_for_object(relids, clauses, NIL, NULL, NULL); - output_rel->private = lappend(output_rel->private, (void *) fss_node); + output_rel->ext_nodes = lappend(output_rel->ext_nodes, (void *) fss_node); } From 137b8157a0baa95c8aba8f1b71543468422f594b Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 11 May 2022 15:14:12 +0500 Subject: [PATCH 058/203] Bugfix. Normalize cardinality error. --- postprocessing.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/postprocessing.c b/postprocessing.c index f9c00b50..524f41e0 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -435,7 +435,7 @@ learnOnPlanState(PlanState *p, void *context) /* Exclude "not executed" nodes from error calculation to reduce fluctuations. */ if (!notExecuted) { - cardinality_sum_errors += fabs(predicted - learn_rows); + cardinality_sum_errors += fabs(log(predicted) - log(learn_rows)); cardinality_num_objects += 1; } From fc8504c2233495e90cc9d0247a7cc3c1f59c6f89 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 24 Feb 2022 13:54:23 +0500 Subject: [PATCH 059/203] Introduce usage of statement timeout. In the case then an user sets statement timeout AQO add one more timeout right before this. If timeout is expired, AQO walks across the PlanState tree and learn on partially executed nodes. TODO: 1. We should somehow remember, that partial knowledge isn't real and use it only before first successful execution. 2. We can distinguish already finished nodes and partially finished nodes. For nodes, which really have time to finish execution we should store cardinality "AS IS". In other situation we should use some extrapolation formula. 3. Maybe we shouldn't change instrumentation during partial walk? 4. Think about parallel workers. --- Makefile | 2 +- README.md | 2 +- aqo.c | 7 +- aqo.h | 15 ++- auto_tuning.c | 2 +- cardinality_estimation.c | 6 +- cardinality_hooks.c | 5 +- hash.c | 2 +- learn_cache.c | 157 +++++++++++++++++++++++++++++++ learn_cache.h | 15 +++ machine_learning.c | 2 +- path_utils.c | 2 +- postprocessing.c | 193 +++++++++++++++++++++++++++++++++------ preprocessing.c | 2 +- selectivity_cache.c | 2 +- storage.c | 32 ++++++- t/001_pgbench.pl | 2 +- utils.c | 2 +- 18 files changed, 400 insertions(+), 50 deletions(-) create mode 100644 learn_cache.c create mode 100644 learn_cache.h diff --git a/Makefile b/Makefile index b351ae0e..2845854f 100755 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ PGFILEDESC = "AQO - Adaptive Query Optimization" MODULE_big = aqo OBJS = aqo.o auto_tuning.o cardinality_estimation.o cardinality_hooks.o \ hash.o machine_learning.o path_utils.o postprocessing.o preprocessing.o \ -selectivity_cache.o storage.o utils.o $(WIN32RES) +selectivity_cache.o storage.o utils.o learn_cache.o $(WIN32RES) TAP_TESTS = 1 diff --git a/README.md b/README.md index b3c0216e..aa7946cf 100644 --- a/README.md +++ b/README.md @@ -325,7 +325,7 @@ Dynamically generated constants are okay. ## License -© [Postgres Professional](https://postgrespro.com/), 2016-2021. Licensed under +© [Postgres Professional](https://postgrespro.com/), 2016-2022. Licensed under [The PostgreSQL License](LICENSE). ## Reference diff --git a/aqo.c b/aqo.c index 4cffc94f..2acc4729 100644 --- a/aqo.c +++ b/aqo.c @@ -2,7 +2,7 @@ * aqo.c * Adaptive query optimization extension * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/aqo.c @@ -21,6 +21,7 @@ #include "cardinality_hooks.h" #include "path_utils.h" #include "preprocessing.h" +#include "learn_cache.h" PG_MODULE_MAGIC; @@ -103,6 +104,7 @@ int njoins; post_parse_analyze_hook_type prev_post_parse_analyze_hook; planner_hook_type prev_planner_hook; ExecutorStart_hook_type prev_ExecutorStart_hook; +ExecutorRun_hook_type prev_ExecutorRun; ExecutorEnd_hook_type prev_ExecutorEnd_hook; set_baserel_rows_estimate_hook_type prev_set_foreign_rows_estimate_hook; set_baserel_rows_estimate_hook_type prev_set_baserel_rows_estimate_hook; @@ -203,6 +205,8 @@ _PG_init(void) planner_hook = aqo_planner; prev_ExecutorStart_hook = ExecutorStart_hook; ExecutorStart_hook = aqo_ExecutorStart; + prev_ExecutorRun = ExecutorRun_hook; + ExecutorRun_hook = aqo_ExecutorRun; prev_ExecutorEnd_hook = ExecutorEnd_hook; ExecutorEnd_hook = aqo_ExecutorEnd; @@ -241,6 +245,7 @@ _PG_init(void) ALLOCSET_DEFAULT_SIZES); RegisterResourceReleaseCallback(aqo_free_callback, NULL); RegisterAQOPlanNodeMethods(); + lc_init(); } PG_FUNCTION_INFO_V1(invalidate_deactivated_queries_cache); diff --git a/aqo.h b/aqo.h index fff0bb06..d47a855f 100644 --- a/aqo.h +++ b/aqo.h @@ -105,7 +105,7 @@ * Module storage.c is responsible for storage query settings and models * (i. e. all information which is used in extension). * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/aqo.h @@ -258,6 +258,7 @@ extern MemoryContext AQO_cache_mem_ctx; extern post_parse_analyze_hook_type prev_post_parse_analyze_hook; extern planner_hook_type prev_planner_hook; extern ExecutorStart_hook_type prev_ExecutorStart_hook; +extern ExecutorRun_hook_type prev_ExecutorRun; extern ExecutorEnd_hook_type prev_ExecutorEnd_hook; extern set_baserel_rows_estimate_hook_type prev_set_foreign_rows_estimate_hook; @@ -285,9 +286,15 @@ extern bool find_query(uint64 qhash, QueryContextData *ctx); extern bool update_query(uint64 qhash, uint64 fhash, bool learn_aqo, bool use_aqo, bool auto_tuning); extern bool add_query_text(uint64 query_hash, const char *query_string); +extern bool load_fss_ext(uint64 fs, int fss, + int ncols, double **matrix, double *targets, int *rows, + List **relids, bool isSafe); extern bool load_fss(uint64 fhash, int fss_hash, int ncols, double **matrix, double *targets, int *rows, List **relids); +extern bool update_fss_ext(uint64 fhash, int fsshash, int nrows, int ncols, + double **matrix, double *targets, List *relids, + bool isTimedOut); extern bool update_fss(uint64 fhash, int fss_hash, int nrows, int ncols, double **matrix, double *targets, List *relids); QueryStat *get_aqo_stat(uint64 query_hash); @@ -313,8 +320,10 @@ double predict_for_relation(List *restrict_clauses, List *selectivities, List *relids, int *fss_hash); /* Query execution statistics collecting hooks */ -void aqo_ExecutorStart(QueryDesc *queryDesc, int eflags); -void aqo_ExecutorEnd(QueryDesc *queryDesc); +void aqo_ExecutorStart(QueryDesc *queryDesc, int eflags); +void aqo_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, + uint64 count, bool execute_once); +void aqo_ExecutorEnd(QueryDesc *queryDesc); /* Machine learning techniques */ extern double OkNNr_predict(int nrows, int ncols, diff --git a/auto_tuning.c b/auto_tuning.c index 43fbe439..29930db0 100644 --- a/auto_tuning.c +++ b/auto_tuning.c @@ -8,7 +8,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/auto_tuning.c diff --git a/cardinality_estimation.c b/cardinality_estimation.c index c3e5d7a4..e5b9f593 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -8,7 +8,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/cardinality_estimation.c @@ -83,8 +83,8 @@ predict_for_relation(List *clauses, List *selectivities, for (i = 0; i < aqo_K; ++i) matrix[i] = palloc0(sizeof(**matrix) * nfeatures); - if (load_fss(query_context.fspace_hash, *fss_hash, nfeatures, matrix, - targets, &rows, NULL)) + if (load_fss_ext(query_context.fspace_hash, *fss_hash, nfeatures, matrix, + targets, &rows, NULL, true)) result = OkNNr_predict(rows, nfeatures, matrix, targets, features); else { diff --git a/cardinality_hooks.c b/cardinality_hooks.c index 4a0f4d8d..7216fcd3 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -18,7 +18,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/cardinality_hooks.c @@ -433,7 +433,8 @@ predict_num_groups(PlannerInfo *root, Path *subpath, List *group_exprs, *fss = get_grouped_exprs_hash(child_fss, group_exprs); - if (!load_fss(query_context.fspace_hash, *fss, 0, NULL, &target, &rows, NULL)) + if (!load_fss_ext(query_context.fspace_hash, *fss, 0, NULL, + &target, &rows, NULL, true)) return -1; Assert(rows == 1); diff --git a/hash.c b/hash.c index 0daad6e6..4510032e 100644 --- a/hash.c +++ b/hash.c @@ -12,7 +12,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/hash.c diff --git a/learn_cache.c b/learn_cache.c new file mode 100644 index 00000000..f2b59323 --- /dev/null +++ b/learn_cache.c @@ -0,0 +1,157 @@ +/* + ******************************************************************************* + * + * + * + ******************************************************************************* + * + * Copyright (c) 2016-2022, Postgres Professional + * + * IDENTIFICATION + * aqo/learn_cache.c + * + */ + +#include "postgres.h" + +#include "aqo.h" +#include "learn_cache.h" + +typedef struct +{ + /* XXX we assume this struct contains no padding bytes */ + uint64 fs; + int64 fss; +} htab_key; + +typedef struct +{ + htab_key key; + + /* Store ML data "AS IS". */ + int nrows; + int ncols; + double *matrix[aqo_K]; + double *targets; + List *relids; +} htab_entry; + +static HTAB *fss_htab = NULL; +MemoryContext LearnCacheMemoryContext = NULL; + +void +lc_init(void) +{ + HASHCTL ctl; + + Assert(!LearnCacheMemoryContext); + LearnCacheMemoryContext = AllocSetContextCreate(TopMemoryContext, + "lcache context", + ALLOCSET_DEFAULT_SIZES); + + ctl.keysize = sizeof(htab_key); + ctl.entrysize = sizeof(htab_entry); + ctl.hcxt = LearnCacheMemoryContext; + + fss_htab = hash_create("Remote Con hash", 32, &ctl, HASH_ELEM | HASH_BLOBS); +} + +bool +lc_update_fss(uint64 fs, int fss, int nrows, int ncols, + double **matrix, double *targets, List *relids) +{ + htab_key key = {fs, fss}; + htab_entry *entry; + bool found; + int i; + MemoryContext memctx = MemoryContextSwitchTo(LearnCacheMemoryContext); + + Assert(fss_htab); + + entry = (htab_entry *) hash_search(fss_htab, &key, HASH_ENTER, &found); + if (found) + { + /* Clear previous version of the cached data. */ + for (i = 0; i < entry->nrows; ++i) + pfree(entry->matrix[i]); + pfree(entry->targets); + list_free(entry->relids); + } + + entry->nrows = nrows; + entry->ncols = ncols; + for (i = 0; i < entry->nrows; ++i) + { + entry->matrix[i] = palloc(sizeof(double) * ncols); + memcpy(entry->matrix[i], matrix[i], sizeof(double) * ncols); + } + entry->targets = palloc(sizeof(double) * nrows); + memcpy(entry->targets, targets, sizeof(double) * nrows); + entry->relids = list_copy(relids); + + MemoryContextSwitchTo(memctx); + return true; +} + +bool +lc_has_fss(uint64 fs, int fss) +{ + htab_key key = {fs, fss}; + bool found; + + Assert(fss_htab); + + (void) hash_search(fss_htab, &key, HASH_FIND, &found); + if (!found) + return false; + return true; +} + +bool +lc_load_fss(uint64 fs, int fss, int ncols, double **matrix, + double *targets, int *nrows, List **relids) +{ + htab_key key = {fs, fss}; + htab_entry *entry; + bool found; + int i; + + Assert(fss_htab); + + entry = (htab_entry *) hash_search(fss_htab, &key, HASH_FIND, &found); + if (!found) + return false; + + *nrows = entry->nrows; + Assert(entry->ncols == ncols); + for (i = 0; i < entry->nrows; ++i) + memcpy(matrix[i], entry->matrix[i], sizeof(double) * ncols); + memcpy(targets, entry->targets, sizeof(double) * entry->nrows); + if (relids) + *relids = list_copy(entry->relids); + return true; +} + +/* + * Remove record from fss cache. Should be done at learning stage of successfully + * finished query execution. +*/ +void +lc_remove_fss(uint64 fs, int fss) +{ + htab_key key = {fs, fss}; + htab_entry *entry; + bool found; + int i; + + Assert(fss_htab); + + entry = (htab_entry *) hash_search(fss_htab, &key, HASH_FIND, &found); + if (!found) + return; + + for (i = 0; i < entry->nrows; ++i) + pfree(entry->matrix[i]); + pfree(entry->targets); + hash_search(fss_htab, &key, HASH_REMOVE, NULL); +} diff --git a/learn_cache.h b/learn_cache.h new file mode 100644 index 00000000..876a106e --- /dev/null +++ b/learn_cache.h @@ -0,0 +1,15 @@ +#ifndef LEARN_CACHE_H +#define LEARN_CACHE_H + +#include "nodes/pg_list.h" + +extern void lc_init(void); +extern bool lc_update_fss(uint64 fhash, int fsshash, int nrows, int ncols, + double **matrix, double *targets, List *relids); +extern bool lc_has_fss(uint64 fhash, int fss); +extern bool lc_load_fss(uint64 fhash, int fsshash, int ncols, + double **matrix, double *targets, int *nrows, + List **relids); +extern void lc_remove_fss(uint64 fhash, int fss_hash); + +#endif /* LEARN_CACHE_H */ diff --git a/machine_learning.c b/machine_learning.c index a9889868..91c72d3e 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -12,7 +12,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/machine_learning.c diff --git a/path_utils.c b/path_utils.c index decf6d93..e5b4e0ad 100644 --- a/path_utils.c +++ b/path_utils.c @@ -5,7 +5,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/path_utils.c diff --git a/postprocessing.c b/postprocessing.c index 524f41e0..1eab3d95 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -9,7 +9,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/postprocessing.c @@ -27,6 +27,7 @@ #include "hash.h" #include "path_utils.h" #include "preprocessing.h" +#include "learn_cache.h" typedef struct @@ -35,6 +36,7 @@ typedef struct List *selectivities; List *relidslist; bool learn; + bool isTimedOut; /* Is execution was interrupted by timeout? */ } aqo_obj_stat; static double cardinality_sum_errors; @@ -56,14 +58,13 @@ static char *PlanStateInfo = "PlanStateInfo"; static void atomic_fss_learn_step(uint64 fhash, int fss_hash, int ncols, double **matrix, double *targets, double *features, double target, - List *relids); + List *relids, bool isTimedOut); static bool learnOnPlanState(PlanState *p, void *context); -static void learn_sample(List *clauselist, - List *selectivities, - List *relidslist, - double true_cardinality, - Plan *plan, - bool notExecuted); +static void learn_agg_sample(aqo_obj_stat *ctx, List *relidslist, + double true_cardinality, Plan *plan, + bool notExecuted); +static void learn_sample(aqo_obj_stat *ctx, List *relidslist, + double true_cardinality, Plan *plan, bool notExecuted); static List *restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, @@ -89,7 +90,7 @@ static void atomic_fss_learn_step(uint64 fhash, int fss_hash, int ncols, double **matrix, double *targets, double *features, double target, - List *relids) + List *relids, bool isTimedOut) { LOCKTAG tag; int nrows; @@ -97,17 +98,18 @@ atomic_fss_learn_step(uint64 fhash, int fss_hash, int ncols, init_lock_tag(&tag, fhash, fss_hash); LockAcquire(&tag, ExclusiveLock, false, false); - if (!load_fss(fhash, fss_hash, ncols, matrix, targets, &nrows, NULL)) + if (!load_fss_ext(fhash, fss_hash, ncols, matrix, targets, &nrows, NULL, !isTimedOut)) nrows = 0; nrows = OkNNr_learn(nrows, ncols, matrix, targets, features, target); - update_fss(fhash, fss_hash, nrows, ncols, matrix, targets, relids); + update_fss_ext(fhash, fss_hash, nrows, ncols, matrix, targets, relids, + isTimedOut); LockRelease(&tag, ExclusiveLock, false); } static void -learn_agg_sample(List *clauselist, List *selectivities, List *relidslist, +learn_agg_sample(aqo_obj_stat *ctx, List *relidslist, double true_cardinality, Plan *plan, bool notExecuted) { uint64 fhash = query_context.fspace_hash; @@ -123,11 +125,11 @@ learn_agg_sample(List *clauselist, List *selectivities, List *relidslist, * Learn 'not executed' nodes only once, if no one another knowledge exists * for current feature subspace. */ - if (notExecuted && aqo_node->prediction > 0) + if (notExecuted && aqo_node->prediction > 0.) return; target = log(true_cardinality); - child_fss = get_fss_for_object(relidslist, clauselist, NIL, NULL, NULL); + child_fss = get_fss_for_object(relidslist, ctx->clauselist, NIL, NULL, NULL); fss = get_grouped_exprs_hash(child_fss, aqo_node->grouping_exprs); for (i = 0; i < aqo_K; i++) @@ -135,7 +137,7 @@ learn_agg_sample(List *clauselist, List *selectivities, List *relidslist, /* Critical section */ atomic_fss_learn_step(fhash, fss, 0, matrix, targets, NULL, target, - relidslist); + relidslist, ctx->isTimedOut); /* End of critical section */ } @@ -144,7 +146,7 @@ learn_agg_sample(List *clauselist, List *selectivities, List *relidslist, * true cardinalities) performs learning procedure. */ static void -learn_sample(List *clauselist, List *selectivities, List *relidslist, +learn_sample(aqo_obj_stat *ctx, List *relidslist, double true_cardinality, Plan *plan, bool notExecuted) { uint64 fhash = query_context.fspace_hash; @@ -158,8 +160,8 @@ learn_sample(List *clauselist, List *selectivities, List *relidslist, AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); target = log(true_cardinality); - fss_hash = get_fss_for_object(relidslist, clauselist, - selectivities, &nfeatures, &features); + fss_hash = get_fss_for_object(relidslist, ctx->clauselist, + ctx->selectivities, &nfeatures, &features); /* Only Agg nodes can have non-empty a grouping expressions list. */ Assert(!IsA(plan, Agg) || aqo_node->grouping_exprs != NIL); @@ -178,7 +180,7 @@ learn_sample(List *clauselist, List *selectivities, List *relidslist, /* Critical section */ atomic_fss_learn_step(fhash, fss_hash, nfeatures, matrix, targets, features, target, - relidslist); + relidslist, ctx->isTimedOut); /* End of critical section */ if (nfeatures > 0) @@ -264,7 +266,7 @@ IsParallelTuplesProcessing(const Plan *plan, bool IsParallel) /* * learn_subplan_recurse * - * Emphasise recursion operation into separate function because of increasing + * Emphasize recursion operation into separate function because of increasing * complexity of this logic. */ static bool @@ -276,6 +278,13 @@ learn_subplan_recurse(PlanState *p, aqo_obj_stat *ctx) if (!p->instrument) return true; + + if (!INSTR_TIME_IS_ZERO(p->instrument->starttime)) + { + Assert(ctx->isTimedOut); + InstrStopNode(p->instrument, 0); + } + InstrEndLoop(p->instrument); saved_subplan_list = p->subPlan; @@ -286,19 +295,22 @@ learn_subplan_recurse(PlanState *p, aqo_obj_stat *ctx) if (planstate_tree_walker(p, learnOnPlanState, (void *) ctx)) return true; + /* + * Learn on subplans and initplans separately. Discard learn context of these + * subplans because we will use their fss'es directly. + */ foreach(lc, saved_subplan_list) { SubPlanState *sps = lfirst_node(SubPlanState, lc); - aqo_obj_stat SPCtx = {NIL, NIL, NIL, ctx->learn}; + aqo_obj_stat SPCtx = {NIL, NIL, NIL, ctx->learn, ctx->isTimedOut}; if (learnOnPlanState(sps->planstate, (void *) &SPCtx)) return true; } - foreach(lc, saved_initplan_list) { SubPlanState *sps = lfirst_node(SubPlanState, lc); - aqo_obj_stat SPCtx = {NIL, NIL, NIL, ctx->learn}; + aqo_obj_stat SPCtx = {NIL, NIL, NIL, ctx->learn, ctx->isTimedOut}; if (learnOnPlanState(sps->planstate, (void *) &SPCtx)) return true; @@ -309,6 +321,23 @@ learn_subplan_recurse(PlanState *p, aqo_obj_stat *ctx) return false; } +static bool +should_learn(aqo_obj_stat *ctx, double predicted, double *nrows) +{ + if (ctx->isTimedOut) + { + if (ctx->learn && *nrows > predicted * 1.2) + { + *nrows += (*nrows - predicted) * 3.; + return true; + } + } + else if (ctx->learn) + return true; + + return false; +} + /* * Walks over obtained PlanState tree, collects relation objects with their * clauses, selectivities and relids and passes each object to learn_sample. @@ -324,7 +353,7 @@ static bool learnOnPlanState(PlanState *p, void *context) { aqo_obj_stat *ctx = (aqo_obj_stat *) context; - aqo_obj_stat SubplanCtx = {NIL, NIL, NIL, ctx->learn}; + aqo_obj_stat SubplanCtx = {NIL, NIL, NIL, ctx->learn, ctx->isTimedOut}; double predicted = 0.; double learn_rows = 0.; AQOPlanNode *aqo_node; @@ -332,7 +361,7 @@ learnOnPlanState(PlanState *p, void *context) /* Recurse into subtree and collect clauses. */ if (learn_subplan_recurse(p, &SubplanCtx)) - /* If something goes wrong, return quckly. */ + /* If something goes wrong, return quickly. */ return true; aqo_node = get_aqo_plan_node(p->plan, false); @@ -469,18 +498,24 @@ learnOnPlanState(PlanState *p, void *context) { Assert(predicted >= 1. && learn_rows >= 1.); - if (ctx->learn) + if (should_learn(ctx, predicted, &learn_rows)) { + if (ctx->isTimedOut) + elog(DEBUG1, "[AQO] Learn on partially executed plan node. fs: %lu, fss: %d, predicted rows: %.0lf, updated prediction: %.0lf", + query_context.query_hash, aqo_node->fss, predicted, learn_rows); + if (IsA(p, AggState)) - learn_agg_sample(SubplanCtx.clauselist, NULL, + learn_agg_sample(&SubplanCtx, aqo_node->relids, learn_rows, p->plan, notExecuted); else - learn_sample(SubplanCtx.clauselist, - SubplanCtx.selectivities, + learn_sample(&SubplanCtx, aqo_node->relids, learn_rows, p->plan, notExecuted); + + if (!ctx->isTimedOut) + lc_remove_fss(query_context.query_hash, aqo_node->fss); } } } @@ -606,6 +641,102 @@ aqo_ExecutorStart(QueryDesc *queryDesc, int eflags) StorePlanInternals(queryDesc); } +#include "utils/timeout.h" + +static struct +{ + TimeoutId id; + QueryDesc *queryDesc; +} timeoutCtl = {0, NULL}; + +static int exec_nested_level = 0; + +static void +aqo_timeout_handler(void) +{ + aqo_obj_stat ctx = {NIL, NIL, NIL, false, false}; + + if (!timeoutCtl.queryDesc || !ExtractFromQueryEnv(timeoutCtl.queryDesc)) + return; + + /* Now we can analyze execution state of the query. */ + + ctx.learn = query_context.learn_aqo; + ctx.isTimedOut = true; + + elog(DEBUG1, "AQO timeout was expired. Try to learn on partial data."); + learnOnPlanState(timeoutCtl.queryDesc->planstate, (void *) &ctx); +} + +static bool +set_timeout_if_need(QueryDesc *queryDesc) +{ + TimestampTz fin_time; + + if (!get_timeout_active(STATEMENT_TIMEOUT)) + return false; + + if (!ExtractFromQueryEnv(queryDesc)) + return false; + + if (IsQueryDisabled() || IsParallelWorker() || + !(query_context.use_aqo || query_context.learn_aqo)) + return false; + + /* + * Statement timeout exists. AQO should create user timeout right before the + * statement timeout. + */ + + if (timeoutCtl.id < USER_TIMEOUT) + /* Register once per backend, because of timeouts implementation. */ + timeoutCtl.id = RegisterTimeout(USER_TIMEOUT, aqo_timeout_handler); + else + Assert(!get_timeout_active(timeoutCtl.id)); + + fin_time = get_timeout_finish_time(STATEMENT_TIMEOUT); + enable_timeout_at(timeoutCtl.id, fin_time - 1); + + /* Save pointer to queryDesc to use at learning after a timeout interruption. */ + timeoutCtl.queryDesc = queryDesc; + return true; +} + +/* + * ExecutorRun hook. + */ +void +aqo_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, + bool execute_once) +{ + bool timeout_enabled = false; + + if (exec_nested_level <= 0) + timeout_enabled = set_timeout_if_need(queryDesc); + + Assert(!timeout_enabled || + (timeoutCtl.queryDesc && timeoutCtl.id >= USER_TIMEOUT)); + + exec_nested_level++; + + PG_TRY(); + { + if (prev_ExecutorRun) + prev_ExecutorRun(queryDesc, direction, count, execute_once); + else + standard_ExecutorRun(queryDesc, direction, count, execute_once); + } + PG_FINALLY(); + { + exec_nested_level--; + timeoutCtl.queryDesc = NULL; + + if (timeout_enabled) + disable_timeout(timeoutCtl.id, false); + } + PG_END_TRY(); +} + /* * General hook which runs before ExecutorEnd and collects query execution * cardinality statistics. @@ -647,7 +778,7 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) if (query_context.learn_aqo || (!query_context.learn_aqo && query_context.collect_stat)) { - aqo_obj_stat ctx = {NIL, NIL, NIL, query_context.learn_aqo}; + aqo_obj_stat ctx = {NIL, NIL, NIL, query_context.learn_aqo, false}; /* * Analyze plan if AQO need to learn or need to collect statistics only. @@ -730,6 +861,8 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) * standard_ExecutorEnd clears the queryDesc->planstate. After this point no * one operation with the plan can be made. */ + + timeoutCtl.queryDesc = NULL; } /* diff --git a/preprocessing.c b/preprocessing.c index 6bb19632..a09a584c 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -49,7 +49,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/preprocessing.c diff --git a/selectivity_cache.c b/selectivity_cache.c index b59da933..0b354ba0 100644 --- a/selectivity_cache.c +++ b/selectivity_cache.c @@ -9,7 +9,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/selectivity_cache.c diff --git a/storage.c b/storage.c index 48fa8064..740513cb 100644 --- a/storage.c +++ b/storage.c @@ -8,7 +8,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/storage.c @@ -23,6 +23,7 @@ #include "aqo.h" #include "preprocessing.h" +#include "learn_cache.h" HTAB *deactivated_queries = NULL; @@ -364,6 +365,23 @@ deform_oids_vector(Datum datum) return relids; } +bool +load_fss_ext(uint64 fs, int fss, + int ncols, double **matrix, double *targets, int *rows, + List **relids, bool isSafe) +{ + if (isSafe && !lc_has_fss(fs, fss)) + return load_fss(fs, fss, ncols, matrix, targets, rows, relids); + else + { + if (matrix == NULL && targets == NULL && rows == NULL) + return true; + + elog(DEBUG1, "Load ML data for fs %lu, fss %d", fs, fss); + return lc_load_fss(fs, fss, ncols, matrix, targets, rows, relids); + } +} + /* * Loads feature subspace (fss) from table aqo_data into memory. * The last column of the returned matrix is for target values of objects. @@ -449,6 +467,18 @@ load_fss(uint64 fhash, int fss_hash, return success; } +bool +update_fss_ext(uint64 fhash, int fsshash, int nrows, int ncols, + double **matrix, double *targets, List *relids, bool isTimedOut) +{ + if (!isTimedOut) + return update_fss(fhash, fsshash, nrows, ncols, matrix, targets, + relids); + else + return lc_update_fss(fhash, fsshash, nrows, ncols, matrix, targets, + relids); +} + /* * Updates the specified line in the specified feature subspace. * Returns false if the operation failed, true otherwise. diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index c4ddb7ae..f21b8a98 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -97,7 +97,7 @@ "); $node->command_ok([ 'pgbench', '-t', "$TRANSACTIONS", '-c', "$CLIENTS", '-j', "$THREADS" ], - 'pgbench in disabled mode'); + 'pgbench in disabled mode - 2'); # Check: no any tuples added into the aqo_data table in this mode. $fss_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_data;"); diff --git a/utils.c b/utils.c index 34bcd2f9..8fc0d186 100644 --- a/utils.c +++ b/utils.c @@ -5,7 +5,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/utils.c From 7d36f880d7ad5baced5bee00a7b6a9a5dc108cfd Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Fri, 25 Feb 2022 11:54:12 +0500 Subject: [PATCH 060/203] Resolve a problem with gathering of instrumentation data on a partially executed query plan. Fix some issues. --- postprocessing.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/postprocessing.c b/postprocessing.c index 1eab3d95..ff466542 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -279,13 +279,24 @@ learn_subplan_recurse(PlanState *p, aqo_obj_stat *ctx) if (!p->instrument) return true; - if (!INSTR_TIME_IS_ZERO(p->instrument->starttime)) + if (!ctx->isTimedOut) + InstrEndLoop(p->instrument); + else if (p->instrument->running) { - Assert(ctx->isTimedOut); - InstrStopNode(p->instrument, 0); - } + /* + * We can't use node instrumentation functions because after the end + * of this timeout handler query can work for some time. + * We change ntuples and nloops to unify walking logic and because we + * know that the query execution results meaningless. + */ + p->instrument->ntuples += p->instrument->tuplecount; + p->instrument->nloops += 1; - InstrEndLoop(p->instrument); + /* + * TODO: can we simply use ExecParallelCleanup to implement gathering of + * instrument data in the case of parallel workers? + */ + } saved_subplan_list = p->subPlan; saved_initplan_list = p->initPlan; @@ -328,7 +339,7 @@ should_learn(aqo_obj_stat *ctx, double predicted, double *nrows) { if (ctx->learn && *nrows > predicted * 1.2) { - *nrows += (*nrows - predicted) * 3.; + *nrows += (*nrows - predicted) * 10.; return true; } } @@ -500,8 +511,8 @@ learnOnPlanState(PlanState *p, void *context) if (should_learn(ctx, predicted, &learn_rows)) { - if (ctx->isTimedOut) - elog(DEBUG1, "[AQO] Learn on partially executed plan node. fs: %lu, fss: %d, predicted rows: %.0lf, updated prediction: %.0lf", + if (ctx->isTimedOut && aqo_show_details) + elog(NOTICE, "[AQO] Learn on partially executed plan node. fs: %lu, fss: %d, predicted rows: %.0lf, updated prediction: %.0lf", query_context.query_hash, aqo_node->fss, predicted, learn_rows); if (IsA(p, AggState)) @@ -664,7 +675,7 @@ aqo_timeout_handler(void) ctx.learn = query_context.learn_aqo; ctx.isTimedOut = true; - elog(DEBUG1, "AQO timeout was expired. Try to learn on partial data."); + elog(NOTICE, "[AQO] Time limit for execution of the statement was expired. Try to learn on partial data."); learnOnPlanState(timeoutCtl.queryDesc->planstate, (void *) &ctx); } From 08a510600b08942070b1ba69c7275628cd6bbc59 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Fri, 25 Feb 2022 12:10:06 +0500 Subject: [PATCH 061/203] An iteration of the code improvement. --- learn_cache.c | 16 +++++++++++----- storage.c | 1 - 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/learn_cache.c b/learn_cache.c index f2b59323..0feeb5dc 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -53,7 +53,7 @@ lc_init(void) ctl.entrysize = sizeof(htab_entry); ctl.hcxt = LearnCacheMemoryContext; - fss_htab = hash_create("Remote Con hash", 32, &ctl, HASH_ELEM | HASH_BLOBS); + fss_htab = hash_create("ML AQO cache", 256, &ctl, HASH_ELEM | HASH_BLOBS); } bool @@ -102,11 +102,13 @@ lc_has_fss(uint64 fs, int fss) Assert(fss_htab); (void) hash_search(fss_htab, &key, HASH_FIND, &found); - if (!found) - return false; - return true; + return found; } +/* + * Load ML data from a memory cache, not from a table. + * XXX That to do with learning tails, living in the cache? + */ bool lc_load_fss(uint64 fs, int fss, int ncols, double **matrix, double *targets, int *nrows, List **relids) @@ -122,11 +124,15 @@ lc_load_fss(uint64 fs, int fss, int ncols, double **matrix, if (!found) return false; + if (aqo_show_details) + elog(NOTICE, "[AQO] Load ML data for fs %lu, fss %d from the cache", + fs, fss); + *nrows = entry->nrows; Assert(entry->ncols == ncols); for (i = 0; i < entry->nrows; ++i) memcpy(matrix[i], entry->matrix[i], sizeof(double) * ncols); - memcpy(targets, entry->targets, sizeof(double) * entry->nrows); + memcpy(targets, entry->targets, sizeof(double) * entry->nrows); if (relids) *relids = list_copy(entry->relids); return true; diff --git a/storage.c b/storage.c index 740513cb..b1d5d695 100644 --- a/storage.c +++ b/storage.c @@ -377,7 +377,6 @@ load_fss_ext(uint64 fs, int fss, if (matrix == NULL && targets == NULL && rows == NULL) return true; - elog(DEBUG1, "Load ML data for fs %lu, fss %d", fs, fss); return lc_load_fss(fs, fss, ncols, matrix, targets, rows, relids); } } From 8ae5af7689e47bf2e9f54719fa8a7b4f3fa935c9 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Fri, 25 Feb 2022 13:17:18 +0500 Subject: [PATCH 062/203] Hide the AQO Statement Timeout feature under a GUC. Use aqo.learn_statement_timeout to enable this feature. On more function here is to do cleanup on this cache and memory context. --- aqo.c | 13 +++++++++++++ learn_cache.c | 41 +++++++++++++++++++++++++++++++++++++++-- learn_cache.h | 3 +++ postprocessing.c | 4 ++-- storage.c | 4 +++- 5 files changed, 60 insertions(+), 5 deletions(-) diff --git a/aqo.c b/aqo.c index 2acc4729..146cca54 100644 --- a/aqo.c +++ b/aqo.c @@ -201,6 +201,19 @@ _PG_init(void) NULL ); + DefineCustomBoolVariable( + "aqo.learn_statement_timeout", + "Learn on a plan interrupted by statement timeout.", + "ML data stored in a backend cache, so it works only locally.", + &aqo_learn_statement_timeout, + false, + PGC_USERSET, + 0, + NULL, + lc_assign_hook, + NULL + ); + prev_planner_hook = planner_hook; planner_hook = aqo_planner; prev_ExecutorStart_hook = ExecutorStart_hook; diff --git a/learn_cache.c b/learn_cache.c index 0feeb5dc..bc7bf935 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -13,6 +13,7 @@ */ #include "postgres.h" +#include "miscadmin.h" #include "aqo.h" #include "learn_cache.h" @@ -39,6 +40,8 @@ typedef struct static HTAB *fss_htab = NULL; MemoryContext LearnCacheMemoryContext = NULL; +bool aqo_learn_statement_timeout = false; + void lc_init(void) { @@ -66,7 +69,7 @@ lc_update_fss(uint64 fs, int fss, int nrows, int ncols, int i; MemoryContext memctx = MemoryContextSwitchTo(LearnCacheMemoryContext); - Assert(fss_htab); + Assert(fss_htab && aqo_learn_statement_timeout); entry = (htab_entry *) hash_search(fss_htab, &key, HASH_ENTER, &found); if (found) @@ -99,6 +102,9 @@ lc_has_fss(uint64 fs, int fss) htab_key key = {fs, fss}; bool found; + if (!aqo_learn_statement_timeout) + return false; + Assert(fss_htab); (void) hash_search(fss_htab, &key, HASH_FIND, &found); @@ -118,7 +124,7 @@ lc_load_fss(uint64 fs, int fss, int ncols, double **matrix, bool found; int i; - Assert(fss_htab); + Assert(fss_htab && aqo_learn_statement_timeout); entry = (htab_entry *) hash_search(fss_htab, &key, HASH_FIND, &found); if (!found) @@ -150,6 +156,9 @@ lc_remove_fss(uint64 fs, int fss) bool found; int i; + if (!aqo_learn_statement_timeout) + return; + Assert(fss_htab); entry = (htab_entry *) hash_search(fss_htab, &key, HASH_FIND, &found); @@ -161,3 +170,31 @@ lc_remove_fss(uint64 fs, int fss) pfree(entry->targets); hash_search(fss_htab, &key, HASH_REMOVE, NULL); } + +/* + * Main purpose of this hook is to cleanup a backend cache in some way to prevent + * memory leaks - in large queries we could have many unused fss nodes. + */ +void +lc_assign_hook(bool newval, void *extra) +{ + HASH_SEQ_STATUS status; + htab_entry *entry; + + if (!fss_htab || !IsUnderPostmaster) + return; + + /* Remove all entries, reset memory context. */ + + elog(DEBUG5, "[AQO] Cleanup local cache of ML data."); + + /* Remove all frozen plans from a plancache. */ + hash_seq_init(&status, fss_htab); + while ((entry = (htab_entry *) hash_seq_search(&status)) != NULL) + { + if (!hash_search(fss_htab, (void *) &entry->key, HASH_REMOVE, NULL)) + elog(ERROR, "[AQO] The local ML cache is corrupted."); + } + + MemoryContextReset(LearnCacheMemoryContext); +} \ No newline at end of file diff --git a/learn_cache.h b/learn_cache.h index 876a106e..e597c0f1 100644 --- a/learn_cache.h +++ b/learn_cache.h @@ -3,6 +3,8 @@ #include "nodes/pg_list.h" +extern bool aqo_learn_statement_timeout; + extern void lc_init(void); extern bool lc_update_fss(uint64 fhash, int fsshash, int nrows, int ncols, double **matrix, double *targets, List *relids); @@ -11,5 +13,6 @@ extern bool lc_load_fss(uint64 fhash, int fsshash, int ncols, double **matrix, double *targets, int *nrows, List **relids); extern void lc_remove_fss(uint64 fhash, int fss_hash); +extern void lc_assign_hook(bool newval, void *extra); #endif /* LEARN_CACHE_H */ diff --git a/postprocessing.c b/postprocessing.c index ff466542..d7a6e572 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -684,7 +684,7 @@ set_timeout_if_need(QueryDesc *queryDesc) { TimestampTz fin_time; - if (!get_timeout_active(STATEMENT_TIMEOUT)) + if (!get_timeout_active(STATEMENT_TIMEOUT) || !aqo_learn_statement_timeout) return false; if (!ExtractFromQueryEnv(queryDesc)) @@ -696,7 +696,7 @@ set_timeout_if_need(QueryDesc *queryDesc) /* * Statement timeout exists. AQO should create user timeout right before the - * statement timeout. + * timeout. */ if (timeoutCtl.id < USER_TIMEOUT) diff --git a/storage.c b/storage.c index b1d5d695..d96fdb04 100644 --- a/storage.c +++ b/storage.c @@ -370,10 +370,12 @@ load_fss_ext(uint64 fs, int fss, int ncols, double **matrix, double *targets, int *rows, List **relids, bool isSafe) { - if (isSafe && !lc_has_fss(fs, fss)) + if (isSafe && (!aqo_learn_statement_timeout || !lc_has_fss(fs, fss))) return load_fss(fs, fss, ncols, matrix, targets, rows, relids); else { + Assert(aqo_learn_statement_timeout); + if (matrix == NULL && targets == NULL && rows == NULL) return true; From ddcf450c3ffd342c2229530b4ac3f7011e7f11f0 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Sat, 5 Mar 2022 11:49:01 +0500 Subject: [PATCH 063/203] Distinguish finished and running plan nodes. --- postprocessing.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/postprocessing.c b/postprocessing.c index d7a6e572..8b9f0e3a 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -333,13 +333,34 @@ learn_subplan_recurse(PlanState *p, aqo_obj_stat *ctx) } static bool -should_learn(aqo_obj_stat *ctx, double predicted, double *nrows) +should_learn(PlanState *ps, AQOPlanNode *node, aqo_obj_stat *ctx, + double predicted, double *nrows) { if (ctx->isTimedOut) { if (ctx->learn && *nrows > predicted * 1.2) { - *nrows += (*nrows - predicted) * 10.; + /* This node s*/ + if (aqo_show_details) + elog(NOTICE, + "[AQO] Learn on a plan node (%lu, %d), " + "predicted rows: %.0lf, updated prediction: %.0lf", + query_context.query_hash, node->fss, predicted, *nrows); + + return true; + } + + /* Has the executor finished its work? */ + if (TupIsNull(ps->ps_ResultTupleSlot) && + ps->instrument->nloops > 0.) /* Node was visited by executor at least once. */ + { + /* This is much more reliable data. So we can correct our prediction. */ + if (ctx->learn && aqo_show_details && fabs(*nrows - predicted) / predicted > 0.2) + elog(NOTICE, + "[AQO] Learn on a finished plan node (%lu, %d), " + "predicted rows: %.0lf, updated prediction: %.0lf", + query_context.query_hash, node->fss, predicted, *nrows); + return true; } } @@ -509,12 +530,8 @@ learnOnPlanState(PlanState *p, void *context) { Assert(predicted >= 1. && learn_rows >= 1.); - if (should_learn(ctx, predicted, &learn_rows)) + if (should_learn(p, aqo_node, ctx, predicted, &learn_rows)) { - if (ctx->isTimedOut && aqo_show_details) - elog(NOTICE, "[AQO] Learn on partially executed plan node. fs: %lu, fss: %d, predicted rows: %.0lf, updated prediction: %.0lf", - query_context.query_hash, aqo_node->fss, predicted, learn_rows); - if (IsA(p, AggState)) learn_agg_sample(&SubplanCtx, aqo_node->relids, learn_rows, From c274a4b59c6335a38891085d27c34f4e14a029b1 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Sat, 5 Mar 2022 15:05:09 +0500 Subject: [PATCH 064/203] Add reliability factor (rfactor) into interface of learning procedures. --- aqo.c | 15 ------ aqo.h | 31 +++-------- cardinality_estimation.c | 31 +++++------ cardinality_hooks.c | 28 +++++----- learn_cache.c | 26 +++++---- learn_cache.h | 11 ++-- machine_learning.c | 97 +++++++++++++++++++-------------- machine_learning.h | 29 ++++++++++ postprocessing.c | 112 ++++++++++++++++++++------------------- storage.c | 71 ++++++++++--------------- 10 files changed, 225 insertions(+), 226 deletions(-) create mode 100644 machine_learning.h diff --git a/aqo.c b/aqo.c index 146cca54..b5520a66 100644 --- a/aqo.c +++ b/aqo.c @@ -71,21 +71,6 @@ int auto_tuning_infinite_loop = 8; /* Machine learning parameters */ -/* - * Defines where we do not perform learning procedure - */ -const double object_selection_prediction_threshold = 0.3; - -/* - * This parameter tell us that the new learning sample object has very small - * distance from one whose features stored in matrix already. - * In this case we will not to add new line in matrix, but will modify this - * nearest neighbor features and cardinality with linear smoothing by - * learning_rate coefficient. - */ -const double object_selection_threshold = 0.1; -const double learning_rate = 1e-1; - /* The number of nearest neighbors which will be chosen for ML-operations */ int aqo_k = 3; double log_selectivity_lower_bound = -30; diff --git a/aqo.h b/aqo.h index d47a855f..6f3f9018 100644 --- a/aqo.h +++ b/aqo.h @@ -144,6 +144,7 @@ #include "utils/fmgroids.h" #include "utils/snapmgr.h" +#include "machine_learning.h" /* Check PostgreSQL version (9.6.0 contains important changes in planner) */ #if PG_VERSION_NUM < 90600 @@ -237,12 +238,6 @@ extern double auto_tuning_convergence_error; /* Machine learning parameters */ -/* Max number of matrix rows - max number of possible neighbors. */ -#define aqo_K (30) - -extern const double object_selection_prediction_threshold; -extern const double object_selection_threshold; -extern const double learning_rate; extern int aqo_k; extern double log_selectivity_lower_bound; @@ -286,17 +281,13 @@ extern bool find_query(uint64 qhash, QueryContextData *ctx); extern bool update_query(uint64 qhash, uint64 fhash, bool learn_aqo, bool use_aqo, bool auto_tuning); extern bool add_query_text(uint64 query_hash, const char *query_string); -extern bool load_fss_ext(uint64 fs, int fss, - int ncols, double **matrix, double *targets, int *rows, +extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **relids, bool isSafe); -extern bool load_fss(uint64 fhash, int fss_hash, - int ncols, double **matrix, double *targets, int *rows, - List **relids); -extern bool update_fss_ext(uint64 fhash, int fsshash, int nrows, int ncols, - double **matrix, double *targets, List *relids, - bool isTimedOut); -extern bool update_fss(uint64 fhash, int fss_hash, int nrows, int ncols, - double **matrix, double *targets, List *relids); +extern bool load_fss(uint64 fhash, int fss_hash, OkNNrdata *data, List **relids); +extern bool update_fss_ext(uint64 fhash, int fsshash, OkNNrdata *data, + List *relids, bool isTimedOut); +extern bool update_fss(uint64 fhash, int fss_hash, OkNNrdata *data, + List *relids); QueryStat *get_aqo_stat(uint64 query_hash); void update_aqo_stat(uint64 query_hash, QueryStat * stat); extern bool my_index_insert(Relation indexRelation, Datum *values, bool *isnull, @@ -325,14 +316,6 @@ void aqo_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, bool execute_once); void aqo_ExecutorEnd(QueryDesc *queryDesc); -/* Machine learning techniques */ -extern double OkNNr_predict(int nrows, int ncols, - double **matrix, const double *targets, - double *features); -extern int OkNNr_learn(int matrix_rows, int matrix_cols, - double **matrix, double *targets, - double *features, double target); - /* Automatic query tuning */ extern void automatical_query_tuning(uint64 query_hash, QueryStat * stat); diff --git a/cardinality_estimation.c b/cardinality_estimation.c index e5b9f593..9bdaff5d 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -21,6 +21,7 @@ #include "aqo.h" #include "hash.h" +#include "machine_learning.h" #ifdef AQO_DEBUG_PRINT static void @@ -59,15 +60,12 @@ predict_debug_output(List *clauses, List *selectivities, */ double predict_for_relation(List *clauses, List *selectivities, - List *relids, int *fss_hash) + List *relids, int *fss) { - int nfeatures; - double *matrix[aqo_K]; - double targets[aqo_K]; - double *features; - double result; - int rows; - int i; + double *features; + double result; + int i; + OkNNrdata data; if (relids == NIL) /* @@ -76,16 +74,15 @@ predict_for_relation(List *clauses, List *selectivities, */ return -4.; - *fss_hash = get_fss_for_object(relids, clauses, - selectivities, &nfeatures, &features); + *fss = get_fss_for_object(relids, clauses, + selectivities, &data.cols, &features); - if (nfeatures > 0) + if (data.cols > 0) for (i = 0; i < aqo_K; ++i) - matrix[i] = palloc0(sizeof(**matrix) * nfeatures); + data.matrix[i] = palloc0(sizeof(double) * data.cols); - if (load_fss_ext(query_context.fspace_hash, *fss_hash, nfeatures, matrix, - targets, &rows, NULL, true)) - result = OkNNr_predict(rows, nfeatures, matrix, targets, features); + if (load_fss_ext(query_context.fspace_hash, *fss, &data, NULL, true)) + result = OkNNr_predict(&data, features); else { /* @@ -100,10 +97,10 @@ predict_for_relation(List *clauses, List *selectivities, predict_debug_output(clauses, selectivities, relids, *fss_hash, result); #endif pfree(features); - if (nfeatures > 0) + if (data.cols > 0) { for (i = 0; i < aqo_K; ++i) - pfree(matrix[i]); + pfree(data.matrix[i]); } if (result < 0) diff --git a/cardinality_hooks.c b/cardinality_hooks.c index 7216fcd3..c63bb57c 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -30,6 +30,7 @@ #include "aqo.h" #include "cardinality_hooks.h" #include "hash.h" +#include "machine_learning.h" #include "path_utils.h" estimate_num_groups_hook_type prev_estimate_num_groups_hook = NULL; @@ -137,12 +138,12 @@ default_estimate_num_groups(PlannerInfo *root, List *groupExprs, void aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) { - double predicted; - Oid relid; - List *relids = NIL; - List *selectivities = NULL; - List *clauses; - int fss = 0; + double predicted; + Oid relid; + List *relids = NIL; + List *selectivities = NULL; + List *clauses; + int fss = 0; if (IsQueryDisabled()) /* Fast path. */ @@ -412,10 +413,9 @@ static double predict_num_groups(PlannerInfo *root, Path *subpath, List *group_exprs, int *fss) { - int child_fss = 0; - double prediction; - int rows; - double target; + int child_fss = 0; + double prediction; + OkNNrdata data; if (subpath->parent->predicted_cardinality > 0.) /* A fast path. Here we can use a fss hash of a leaf. */ @@ -432,13 +432,13 @@ predict_num_groups(PlannerInfo *root, Path *subpath, List *group_exprs, } *fss = get_grouped_exprs_hash(child_fss, group_exprs); + memset(&data, 0, sizeof(OkNNrdata)); - if (!load_fss_ext(query_context.fspace_hash, *fss, 0, NULL, - &target, &rows, NULL, true)) + if (!load_fss_ext(query_context.fspace_hash, *fss, &data, NULL, true)) return -1; - Assert(rows == 1); - prediction = exp(target); + Assert(data.rows == 1); + prediction = exp(data.targets[0]); return (prediction <= 0) ? -1 : prediction; } diff --git a/learn_cache.c b/learn_cache.c index bc7bf935..156f04a5 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -60,8 +60,7 @@ lc_init(void) } bool -lc_update_fss(uint64 fs, int fss, int nrows, int ncols, - double **matrix, double *targets, List *relids) +lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relids) { htab_key key = {fs, fss}; htab_entry *entry; @@ -81,15 +80,15 @@ lc_update_fss(uint64 fs, int fss, int nrows, int ncols, list_free(entry->relids); } - entry->nrows = nrows; - entry->ncols = ncols; + entry->nrows = data->rows; + entry->ncols = data->cols; for (i = 0; i < entry->nrows; ++i) { - entry->matrix[i] = palloc(sizeof(double) * ncols); - memcpy(entry->matrix[i], matrix[i], sizeof(double) * ncols); + entry->matrix[i] = palloc(sizeof(double) * data->cols); + memcpy(entry->matrix[i], data->matrix[i], sizeof(double) * data->cols); } - entry->targets = palloc(sizeof(double) * nrows); - memcpy(entry->targets, targets, sizeof(double) * nrows); + entry->targets = palloc(sizeof(double) * data->rows); + memcpy(entry->targets, data->targets, sizeof(double) * data->rows); entry->relids = list_copy(relids); MemoryContextSwitchTo(memctx); @@ -116,8 +115,7 @@ lc_has_fss(uint64 fs, int fss) * XXX That to do with learning tails, living in the cache? */ bool -lc_load_fss(uint64 fs, int fss, int ncols, double **matrix, - double *targets, int *nrows, List **relids) +lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **relids) { htab_key key = {fs, fss}; htab_entry *entry; @@ -134,11 +132,11 @@ lc_load_fss(uint64 fs, int fss, int ncols, double **matrix, elog(NOTICE, "[AQO] Load ML data for fs %lu, fss %d from the cache", fs, fss); - *nrows = entry->nrows; - Assert(entry->ncols == ncols); + data->rows = entry->nrows; + Assert(entry->ncols == data->cols); for (i = 0; i < entry->nrows; ++i) - memcpy(matrix[i], entry->matrix[i], sizeof(double) * ncols); - memcpy(targets, entry->targets, sizeof(double) * entry->nrows); + memcpy(data->matrix[i], entry->matrix[i], sizeof(double) * data->cols); + memcpy(data->targets, entry->targets, sizeof(double) * entry->nrows); if (relids) *relids = list_copy(entry->relids); return true; diff --git a/learn_cache.h b/learn_cache.h index e597c0f1..52e4bec2 100644 --- a/learn_cache.h +++ b/learn_cache.h @@ -3,15 +3,16 @@ #include "nodes/pg_list.h" +#include "machine_learning.h" + extern bool aqo_learn_statement_timeout; extern void lc_init(void); -extern bool lc_update_fss(uint64 fhash, int fsshash, int nrows, int ncols, - double **matrix, double *targets, List *relids); +extern bool lc_update_fss(uint64 fhash, int fsshash, OkNNrdata *data, + List *relids); extern bool lc_has_fss(uint64 fhash, int fss); -extern bool lc_load_fss(uint64 fhash, int fsshash, int ncols, - double **matrix, double *targets, int *nrows, - List **relids); +extern bool lc_load_fss(uint64 fhash, int fsshash, OkNNrdata *data, + List **relids); extern void lc_remove_fss(uint64 fhash, int fss_hash); extern void lc_assign_hook(bool newval, void *extra); diff --git a/machine_learning.c b/machine_learning.c index 91c72d3e..380c9e42 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -22,6 +22,19 @@ #include "postgres.h" #include "aqo.h" +#include "machine_learning.h" + + +/* + * This parameter tell us that the new learning sample object has very small + * distance from one whose features stored in matrix already. + * In this case we will not to add new line in matrix, but will modify this + * nearest neighbor features and cardinality with linear smoothing by + * learning_rate coefficient. + */ +const double object_selection_threshold = 0.1; +const double learning_rate = 1e-1; + static double fs_distance(double *a, double *b, int len); static double fs_similarity(double dist); @@ -31,7 +44,7 @@ static double compute_weights(double *distances, int nrows, double *w, int *idx) /* * Computes L2-distance between two given vectors. */ -double +static double fs_distance(double *a, double *b, int len) { double res = 0; @@ -47,7 +60,7 @@ fs_distance(double *a, double *b, int len) /* * Returns similarity between objects based on distance between them. */ -double +static double fs_similarity(double dist) { return 1.0 / (0.001 + dist); @@ -60,7 +73,7 @@ fs_similarity(double dist) * Appeared as a separate function because of "don't repeat your code" * principle. */ -double +static double compute_weights(double *distances, int nrows, double *w, int *idx) { int i, @@ -103,31 +116,30 @@ compute_weights(double *distances, int nrows, double *w, int *idx) * positive targets are assumed. */ double -OkNNr_predict(int nrows, int ncols, double **matrix, const double *targets, - double *features) +OkNNr_predict(OkNNrdata *data, double *features) { double distances[aqo_K]; int i; int idx[aqo_K]; /* indexes of nearest neighbors */ double w[aqo_K]; double w_sum; - double result = 0; + double result = 0.; - for (i = 0; i < nrows; ++i) - distances[i] = fs_distance(matrix[i], features, ncols); + for (i = 0; i < data->rows; ++i) + distances[i] = fs_distance(data->matrix[i], features, data->cols); - w_sum = compute_weights(distances, nrows, w, idx); + w_sum = compute_weights(distances, data->rows, w, idx); for (i = 0; i < aqo_k; ++i) if (idx[i] != -1) - result += targets[idx[i]] * w[i] / w_sum; + result += data->targets[idx[i]] * w[i] / w_sum; - if (result < 0) - result = 0; + if (result < 0.) + result = 0.; /* this should never happen */ if (idx[0] == -1) - result = -1; + result = -1.; return result; } @@ -139,23 +151,26 @@ OkNNr_predict(int nrows, int ncols, double **matrix, const double *targets, * updates this line in database, otherwise adds new line with given index. * It is supposed that indexes of new lines are consequent numbers * starting from matrix_rows. + * reliability: 1 - value after normal end of a query; 0.1 - data from partially + * executed node (we don't want this part); 0.9 - from finished node, but + * partially executed statement. */ int -OkNNr_learn(int nrows, int nfeatures, double **matrix, double *targets, - double *features, double target) +OkNNr_learn(OkNNrdata *data, + double *features, double target, double rfactor) { - double distances[aqo_K]; - int i, - j; - int mid = 0; /* index of row with minimum distance value */ - int idx[aqo_K]; + double distances[aqo_K]; + int i; + int j; + int mid = 0; /* index of row with minimum distance value */ + int idx[aqo_K]; /* * For each neighbor compute distance and search for nearest object. */ - for (i = 0; i < nrows; ++i) + for (i = 0; i < data->rows; ++i) { - distances[i] = fs_distance(matrix[i], features, nfeatures); + distances[i] = fs_distance(data->matrix[i], features, data->cols); if (distances[i] < distances[mid]) mid = i; } @@ -165,16 +180,16 @@ OkNNr_learn(int nrows, int nfeatures, double **matrix, double *targets, * replace data for the neighbor to avoid some fluctuations. * We will change it's row with linear smoothing by learning_rate. */ - if (nrows > 0 && distances[mid] < object_selection_threshold) + if (data->rows > 0 && distances[mid] < object_selection_threshold) { - for (j = 0; j < nfeatures; ++j) - matrix[mid][j] += learning_rate * (features[j] - matrix[mid][j]); - targets[mid] += learning_rate * (target - targets[mid]); + for (j = 0; j < data->cols; ++j) + data->matrix[mid][j] += learning_rate * (features[j] - data->matrix[mid][j]); + data->targets[mid] += learning_rate * (target - data->targets[mid]); - return nrows; + return data->rows; } - if (nrows < aqo_K) + if (data->rows < aqo_K) { /* We can't reached limit of stored neighbors */ @@ -182,11 +197,12 @@ OkNNr_learn(int nrows, int nfeatures, double **matrix, double *targets, * Add new line into the matrix. We can do this because matrix_rows * is not the boundary of matrix. Matrix has aqo_K free lines */ - for (j = 0; j < nfeatures; ++j) - matrix[nrows][j] = features[j]; - targets[nrows] = target; + for (j = 0; j < data->cols; ++j) + data->matrix[data->rows][j] = features[j]; + data->targets[data->rows] = target; + data->rfactors[data->rows] = rfactor; - return nrows+1; + return data->rows + 1; } else { @@ -208,7 +224,7 @@ OkNNr_learn(int nrows, int nfeatures, double **matrix, double *targets, * idx array. Compute weight for each nearest neighbor and total weight * of all nearest neighbor. */ - w_sum = compute_weights(distances, nrows, w, idx); + w_sum = compute_weights(distances, data->rows, w, idx); /* * Compute average value for target by nearest neighbors. We need to @@ -216,26 +232,27 @@ OkNNr_learn(int nrows, int nfeatures, double **matrix, double *targets, * neighbors than aqo_k. * Semantics of coef1: it is defined distance between new object and * this superposition value (with linear smoothing). + * fc_coef - feature changing rate. * */ for (i = 0; i < aqo_k && idx[i] != -1; ++i) - avg_target += targets[idx[i]] * w[i] / w_sum; + avg_target += data->targets[idx[i]] * w[i] / w_sum; tc_coef = learning_rate * (avg_target - target); /* Modify targets and features of each nearest neighbor row. */ for (i = 0; i < aqo_k && idx[i] != -1; ++i) { - fc_coef = tc_coef * (targets[idx[i]] - avg_target) * w[i] * w[i] / - sqrt(nfeatures) / w_sum; + fc_coef = tc_coef * (data->targets[idx[i]] - avg_target) * w[i] * w[i] / + sqrt(data->cols) / w_sum; - targets[idx[i]] -= tc_coef * w[i] / w_sum; - for (j = 0; j < nfeatures; ++j) + data->targets[idx[i]] -= tc_coef * w[i] / w_sum; + for (j = 0; j < data->cols; ++j) { - feature = matrix[idx[i]]; + feature = data->matrix[idx[i]]; feature[j] -= fc_coef * (features[j] - feature[j]) / distances[idx[i]]; } } } - return nrows; + return data->rows; } diff --git a/machine_learning.h b/machine_learning.h new file mode 100644 index 00000000..a09b3102 --- /dev/null +++ b/machine_learning.h @@ -0,0 +1,29 @@ +#ifndef MACHINE_LEARNING_H +#define MACHINE_LEARNING_H + +/* Max number of matrix rows - max number of possible neighbors. */ +#define aqo_K (30) + +extern const double object_selection_threshold; +extern const double learning_rate; + +#define RELIABILITY_MIN (0.1) +#define RELIABILITY_MAX (1.0) + +typedef struct OkNNrdata +{ + int rows; /* Number of filled rows in the matrix */ + int cols; /* Number of columns in the matrix */ + + double *matrix[aqo_K]; /* Contains the matrix - learning data for the same + * value of (fs, fss), but different features. */ + double targets[aqo_K]; /* Right side of the equations system */ + double rfactors[aqo_K]; +} OkNNrdata; + +/* Machine learning techniques */ +extern double OkNNr_predict(OkNNrdata *data, double *features); +extern int OkNNr_learn(OkNNrdata *data, + double *features, double target, double rfactor); + +#endif /* MACHINE_LEARNING_H */ diff --git a/postprocessing.c b/postprocessing.c index 8b9f0e3a..9a8ab192 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -26,6 +26,7 @@ #include "aqo.h" #include "hash.h" #include "path_utils.h" +#include "machine_learning.h" #include "preprocessing.h" #include "learn_cache.h" @@ -55,16 +56,17 @@ static char *PlanStateInfo = "PlanStateInfo"; /* Query execution statistics collecting utilities */ -static void atomic_fss_learn_step(uint64 fhash, int fss_hash, int ncols, - double **matrix, double *targets, - double *features, double target, +static void atomic_fss_learn_step(uint64 fhash, int fss_hash, OkNNrdata *data, + double *features, + double target, double rfactor, List *relids, bool isTimedOut); static bool learnOnPlanState(PlanState *p, void *context); static void learn_agg_sample(aqo_obj_stat *ctx, List *relidslist, - double true_cardinality, Plan *plan, + double learned, double rfactor, Plan *plan, bool notExecuted); static void learn_sample(aqo_obj_stat *ctx, List *relidslist, - double true_cardinality, Plan *plan, bool notExecuted); + double learned, double rfactor, + Plan *plan, bool notExecuted); static List *restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, @@ -87,39 +89,35 @@ static bool ExtractFromQueryEnv(QueryDesc *queryDesc); * matrix and targets are just preallocated memory for computations. */ static void -atomic_fss_learn_step(uint64 fhash, int fss_hash, int ncols, - double **matrix, double *targets, - double *features, double target, +atomic_fss_learn_step(uint64 fs, int fss, OkNNrdata *data, + double *features, double target, double rfactor, List *relids, bool isTimedOut) { - LOCKTAG tag; - int nrows; + LOCKTAG tag; - init_lock_tag(&tag, fhash, fss_hash); + init_lock_tag(&tag, fs, fss); LockAcquire(&tag, ExclusiveLock, false, false); - if (!load_fss_ext(fhash, fss_hash, ncols, matrix, targets, &nrows, NULL, !isTimedOut)) - nrows = 0; + if (!load_fss_ext(fs, fss, data, NULL, !isTimedOut)) + data->rows = 0; - nrows = OkNNr_learn(nrows, ncols, matrix, targets, features, target); - update_fss_ext(fhash, fss_hash, nrows, ncols, matrix, targets, relids, - isTimedOut); + data->rows = OkNNr_learn(data, features, target, rfactor); + update_fss_ext(fs, fss, data, relids, isTimedOut); LockRelease(&tag, ExclusiveLock, false); } static void learn_agg_sample(aqo_obj_stat *ctx, List *relidslist, - double true_cardinality, Plan *plan, bool notExecuted) + double learned, double rfactor, Plan *plan, bool notExecuted) { - uint64 fhash = query_context.fspace_hash; - int child_fss; - int fss; - double target; - double *matrix[aqo_K]; - double targets[aqo_K]; - AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); - int i; + AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); + uint64 fhash = query_context.fspace_hash; + int child_fss; + double target; + OkNNrdata data; + int fss; + int i; /* * Learn 'not executed' nodes only once, if no one another knowledge exists @@ -128,16 +126,17 @@ learn_agg_sample(aqo_obj_stat *ctx, List *relidslist, if (notExecuted && aqo_node->prediction > 0.) return; - target = log(true_cardinality); + target = log(learned); child_fss = get_fss_for_object(relidslist, ctx->clauselist, NIL, NULL, NULL); fss = get_grouped_exprs_hash(child_fss, aqo_node->grouping_exprs); + memset(&data, 0, sizeof(OkNNrdata)); for (i = 0; i < aqo_K; i++) - matrix[i] = NULL; + data.matrix[i] = NULL; + /* Critical section */ - atomic_fss_learn_step(fhash, fss, - 0, matrix, targets, NULL, target, - relidslist, ctx->isTimedOut); + atomic_fss_learn_step(fhash, fss, &data, NULL, + target, rfactor, relidslist, ctx->isTimedOut); /* End of critical section */ } @@ -147,21 +146,20 @@ learn_agg_sample(aqo_obj_stat *ctx, List *relidslist, */ static void learn_sample(aqo_obj_stat *ctx, List *relidslist, - double true_cardinality, Plan *plan, bool notExecuted) + double learned, double rfactor, Plan *plan, bool notExecuted) { - uint64 fhash = query_context.fspace_hash; - int fss_hash; - int nfeatures; - double *matrix[aqo_K]; - double targets[aqo_K]; - double *features; - double target; - int i; - AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); - - target = log(true_cardinality); - fss_hash = get_fss_for_object(relidslist, ctx->clauselist, - ctx->selectivities, &nfeatures, &features); + AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); + uint64 fs = query_context.fspace_hash; + double *features; + double target; + OkNNrdata data; + int fss; + int i; + + memset(&data, 0, sizeof(OkNNrdata)); + target = log(learned); + fss = get_fss_for_object(relidslist, ctx->clauselist, + ctx->selectivities, &data.cols, &features); /* Only Agg nodes can have non-empty a grouping expressions list. */ Assert(!IsA(plan, Agg) || aqo_node->grouping_exprs != NIL); @@ -173,19 +171,18 @@ learn_sample(aqo_obj_stat *ctx, List *relidslist, if (notExecuted && aqo_node->prediction > 0) return; - if (nfeatures > 0) + if (data.cols > 0) for (i = 0; i < aqo_K; ++i) - matrix[i] = palloc(sizeof(double) * nfeatures); + data.matrix[i] = palloc(sizeof(double) * data.cols); /* Critical section */ - atomic_fss_learn_step(fhash, fss_hash, - nfeatures, matrix, targets, features, target, + atomic_fss_learn_step(fs, fss, &data, features, target, rfactor, relidslist, ctx->isTimedOut); /* End of critical section */ - if (nfeatures > 0) + if (data.cols > 0) for (i = 0; i < aqo_K; ++i) - pfree(matrix[i]); + pfree(data.matrix[i]); pfree(features); } @@ -334,7 +331,7 @@ learn_subplan_recurse(PlanState *p, aqo_obj_stat *ctx) static bool should_learn(PlanState *ps, AQOPlanNode *node, aqo_obj_stat *ctx, - double predicted, double *nrows) + double predicted, double *nrows, double *rfactor) { if (ctx->isTimedOut) { @@ -347,6 +344,7 @@ should_learn(PlanState *ps, AQOPlanNode *node, aqo_obj_stat *ctx, "predicted rows: %.0lf, updated prediction: %.0lf", query_context.query_hash, node->fss, predicted, *nrows); + *rfactor = RELIABILITY_MIN; return true; } @@ -361,11 +359,15 @@ should_learn(PlanState *ps, AQOPlanNode *node, aqo_obj_stat *ctx, "predicted rows: %.0lf, updated prediction: %.0lf", query_context.query_hash, node->fss, predicted, *nrows); + *rfactor = 0.9 * (RELIABILITY_MAX - RELIABILITY_MIN); return true; } } else if (ctx->learn) + { + *rfactor = RELIABILITY_MAX; return true; + } return false; } @@ -528,18 +530,20 @@ learnOnPlanState(PlanState *p, void *context) if (p->instrument) { + double rfactor = 1.; + Assert(predicted >= 1. && learn_rows >= 1.); - if (should_learn(p, aqo_node, ctx, predicted, &learn_rows)) + if (should_learn(p, aqo_node, ctx, predicted, &learn_rows, &rfactor)) { if (IsA(p, AggState)) learn_agg_sample(&SubplanCtx, - aqo_node->relids, learn_rows, + aqo_node->relids, learn_rows, rfactor, p->plan, notExecuted); else learn_sample(&SubplanCtx, - aqo_node->relids, learn_rows, + aqo_node->relids, learn_rows, rfactor, p->plan, notExecuted); if (!ctx->isTimedOut) diff --git a/storage.c b/storage.c index d96fdb04..134915aa 100644 --- a/storage.c +++ b/storage.c @@ -22,6 +22,7 @@ #include "access/tableam.h" #include "aqo.h" +#include "machine_learning.h" #include "preprocessing.h" #include "learn_cache.h" @@ -348,11 +349,11 @@ form_oids_vector(List *relids) static List * deform_oids_vector(Datum datum) { - ArrayType *array = DatumGetArrayTypePCopy(PG_DETOAST_DATUM(datum)); - Datum *values; + ArrayType *array = DatumGetArrayTypePCopy(PG_DETOAST_DATUM(datum)); + Datum *values; int i; int nelems = 0; - List *relids = NIL; + List *relids = NIL; deconstruct_array(array, OIDOID, sizeof(Oid), true, TYPALIGN_INT, @@ -366,20 +367,14 @@ deform_oids_vector(Datum datum) } bool -load_fss_ext(uint64 fs, int fss, - int ncols, double **matrix, double *targets, int *rows, - List **relids, bool isSafe) +load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **relids, bool isSafe) { if (isSafe && (!aqo_learn_statement_timeout || !lc_has_fss(fs, fss))) - return load_fss(fs, fss, ncols, matrix, targets, rows, relids); + return load_fss(fs, fss, data, relids); else { Assert(aqo_learn_statement_timeout); - - if (matrix == NULL && targets == NULL && rows == NULL) - return true; - - return lc_load_fss(fs, fss, ncols, matrix, targets, rows, relids); + return lc_load_fss(fs, fss, data, relids); } } @@ -398,9 +393,7 @@ load_fss_ext(uint64 fs, int fss, * objects in the given feature space */ bool -load_fss(uint64 fhash, int fss_hash, - int ncols, double **matrix, double *targets, int *rows, - List **relids) +load_fss(uint64 fs, int fss, OkNNrdata *data, List **relids) { Relation hrel; Relation irel; @@ -420,33 +413,28 @@ load_fss(uint64 fhash, int fss_hash, return false; scan = index_beginscan(hrel, irel, SnapshotSelf, 2, 0); - ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(fhash)); - ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fss_hash)); + ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(fs)); + ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fss)); index_rescan(scan, key, 2, NULL, 0); slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); find_ok = index_getnext_slot(scan, ForwardScanDirection, slot); - if (matrix == NULL && targets == NULL && rows == NULL) - { - /* Just check availability */ - success = find_ok; - } - else if (find_ok) + if (find_ok) { tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); Assert(shouldFree != true); heap_deform_tuple(tuple, hrel->rd_att, values, isnull); - if (DatumGetInt32(values[2]) == ncols) + if (DatumGetInt32(values[2]) == data->cols) { - if (ncols > 0) + if (data->cols > 0) /* * The case than an object has not any filters and selectivities */ - deform_matrix(values[3], matrix); + deform_matrix(values[3], data->matrix); - deform_vector(values[4], targets, rows); + deform_vector(values[4], data->targets, &(data->rows)); if (relids != NULL) *relids = deform_oids_vector(values[5]); @@ -455,7 +443,7 @@ load_fss(uint64 fhash, int fss_hash, elog(ERROR, "unexpected number of features for hash (" \ UINT64_FORMAT", %d):\ expected %d features, obtained %d", - fhash, fss_hash, ncols, DatumGetInt32(values[2])); + fs, fss, ncols, DatumGetInt32(values[2])); } else success = false; @@ -469,15 +457,13 @@ load_fss(uint64 fhash, int fss_hash, } bool -update_fss_ext(uint64 fhash, int fsshash, int nrows, int ncols, - double **matrix, double *targets, List *relids, bool isTimedOut) +update_fss_ext(uint64 fs, int fsshash, OkNNrdata *data, List *relids, + bool isTimedOut) { if (!isTimedOut) - return update_fss(fhash, fsshash, nrows, ncols, matrix, targets, - relids); + return update_fss(fs, fsshash, data, relids); else - return lc_update_fss(fhash, fsshash, nrows, ncols, matrix, targets, - relids); + return lc_update_fss(fs, fsshash, data, relids); } /* @@ -493,8 +479,7 @@ update_fss_ext(uint64 fhash, int fsshash, int nrows, int ncols, * Caller guaranteed that no one AQO process insert or update this data row. */ bool -update_fss(uint64 fhash, int fsshash, int nrows, int ncols, - double **matrix, double *targets, List *relids) +update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids) { Relation hrel; Relation irel; @@ -538,14 +523,14 @@ update_fss(uint64 fhash, int fsshash, int nrows, int ncols, { values[0] = Int64GetDatum(fhash); values[1] = Int32GetDatum(fsshash); - values[2] = Int32GetDatum(ncols); + values[2] = Int32GetDatum(data->cols); - if (ncols > 0) - values[3] = PointerGetDatum(form_matrix(matrix, nrows, ncols)); + if (data->cols > 0) + values[3] = PointerGetDatum(form_matrix(data->matrix, data->rows, data->cols)); else isnull[3] = true; - values[4] = PointerGetDatum(form_vector(targets, nrows)); + values[4] = PointerGetDatum(form_vector(data->targets, data->rows)); /* Form array of relids. Only once. */ values[5] = PointerGetDatum(form_oids_vector(relids)); @@ -568,12 +553,12 @@ update_fss(uint64 fhash, int fsshash, int nrows, int ncols, Assert(shouldFree != true); heap_deform_tuple(tuple, hrel->rd_att, values, isnull); - if (ncols > 0) - values[3] = PointerGetDatum(form_matrix(matrix, nrows, ncols)); + if (data->cols > 0) + values[3] = PointerGetDatum(form_matrix(data->matrix, data->rows, data->cols)); else isnull[3] = true; - values[4] = PointerGetDatum(form_vector(targets, nrows)); + values[4] = PointerGetDatum(form_vector(data->targets, data->rows)); nw_tuple = heap_modify_tuple(tuple, tupDesc, values, isnull, replace); if (my_simple_heap_update(hrel, &(nw_tuple->t_self), nw_tuple, From 65b8dbad7c7f52a4842faeb52339f1942496494e Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Sat, 5 Mar 2022 23:33:52 +0500 Subject: [PATCH 065/203] Introduce AQO v.1.4. Add reliability field into the aqo_data table. --- Makefile | 4 ++-- aqo--1.3--1.4.sql | 6 ++++++ aqo.control | 2 +- expected/forced_stat_collection.out | 4 ++-- learn_cache.c | 10 ++++++---- machine_learning.c | 9 ++++----- storage.c | 18 +++++++++++------- 7 files changed, 32 insertions(+), 21 deletions(-) create mode 100755 aqo--1.3--1.4.sql diff --git a/Makefile b/Makefile index 2845854f..766c98ca 100755 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # contrib/aqo/Makefile EXTENSION = aqo -EXTVERSION = 1.2 +EXTVERSION = 1.4 PGFILEDESC = "AQO - Adaptive Query Optimization" MODULE_big = aqo OBJS = aqo.o auto_tuning.o cardinality_estimation.o cardinality_hooks.o \ @@ -32,7 +32,7 @@ EXTRA_REGRESS_OPTS=--temp-config=$(top_srcdir)/$(subdir)/conf.add EXTRA_INSTALL = contrib/postgres_fdw contrib/pg_stat_statements DATA = aqo--1.0.sql aqo--1.0--1.1.sql aqo--1.1--1.2.sql aqo--1.2.sql \ - aqo--1.2--1.3.sql + aqo--1.2--1.3.sql aqo--1.3--1.4.sql ifdef USE_PGXS PG_CONFIG ?= pg_config diff --git a/aqo--1.3--1.4.sql b/aqo--1.3--1.4.sql new file mode 100755 index 00000000..517a6911 --- /dev/null +++ b/aqo--1.3--1.4.sql @@ -0,0 +1,6 @@ +/* contrib/aqo/aqo--1.3--1.4.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION aqo UPDATE TO '1.4'" to load this file. \quit + +ALTER TABLE public.aqo_data ADD COLUMN reliability double precision []; diff --git a/aqo.control b/aqo.control index 14bb3b50..dfdd815d 100644 --- a/aqo.control +++ b/aqo.control @@ -1,5 +1,5 @@ # AQO extension comment = 'machine learning for cardinality estimation in optimizer' -default_version = '1.3' +default_version = '1.4' module_pathname = '$libdir/aqo' relocatable = false diff --git a/expected/forced_stat_collection.out b/expected/forced_stat_collection.out index fa40fcf6..716517a2 100644 --- a/expected/forced_stat_collection.out +++ b/expected/forced_stat_collection.out @@ -32,8 +32,8 @@ SELECT count(*) FROM person WHERE age<18 AND passport IS NOT NULL; (1 row) SELECT * FROM aqo_data; - fspace_hash | fsspace_hash | nfeatures | features | targets | oids --------------+--------------+-----------+----------+---------+------ + fspace_hash | fsspace_hash | nfeatures | features | targets | oids | reliability +-------------+--------------+-----------+----------+---------+------+------------- (0 rows) SELECT learn_aqo,use_aqo,auto_tuning,cardinality_error_without_aqo ce,executions_without_aqo nex diff --git a/learn_cache.c b/learn_cache.c index 156f04a5..471ea058 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -33,7 +33,8 @@ typedef struct int nrows; int ncols; double *matrix[aqo_K]; - double *targets; + double targets[aqo_K]; + double rfactors[aqo_K]; List *relids; } htab_entry; @@ -76,7 +77,6 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relids) /* Clear previous version of the cached data. */ for (i = 0; i < entry->nrows; ++i) pfree(entry->matrix[i]); - pfree(entry->targets); list_free(entry->relids); } @@ -87,8 +87,9 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relids) entry->matrix[i] = palloc(sizeof(double) * data->cols); memcpy(entry->matrix[i], data->matrix[i], sizeof(double) * data->cols); } - entry->targets = palloc(sizeof(double) * data->rows); + memcpy(entry->targets, data->targets, sizeof(double) * data->rows); + memcpy(entry->rfactors, data->rfactors, sizeof(double) * data->rows); entry->relids = list_copy(relids); MemoryContextSwitchTo(memctx); @@ -137,6 +138,7 @@ lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **relids) for (i = 0; i < entry->nrows; ++i) memcpy(data->matrix[i], entry->matrix[i], sizeof(double) * data->cols); memcpy(data->targets, entry->targets, sizeof(double) * entry->nrows); + memcpy(data->rfactors, entry->rfactors, sizeof(double) * entry->nrows); if (relids) *relids = list_copy(entry->relids); return true; @@ -165,7 +167,7 @@ lc_remove_fss(uint64 fs, int fss) for (i = 0; i < entry->nrows; ++i) pfree(entry->matrix[i]); - pfree(entry->targets); + hash_search(fss_htab, &key, HASH_REMOVE, NULL); } diff --git a/machine_learning.c b/machine_learning.c index 380c9e42..d0683334 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -156,8 +156,7 @@ OkNNr_predict(OkNNrdata *data, double *features) * partially executed statement. */ int -OkNNr_learn(OkNNrdata *data, - double *features, double target, double rfactor) +OkNNr_learn(OkNNrdata *data, double *features, double target, double rfactor) { double distances[aqo_K]; int i; @@ -191,10 +190,10 @@ OkNNr_learn(OkNNrdata *data, if (data->rows < aqo_K) { - /* We can't reached limit of stored neighbors */ + /* We don't reach a limit of stored neighbors */ /* - * Add new line into the matrix. We can do this because matrix_rows + * Add new line into the matrix. We can do this because data->rows * is not the boundary of matrix. Matrix has aqo_K free lines */ for (j = 0; j < data->cols; ++j) @@ -206,7 +205,7 @@ OkNNr_learn(OkNNrdata *data, } else { - double *feature; + double *feature; double avg_target = 0; double tc_coef; /* Target correction coefficient */ double fc_coef; /* Feature correction coefficient */ diff --git a/storage.c b/storage.c index 134915aa..46f67e87 100644 --- a/storage.c +++ b/storage.c @@ -27,6 +27,7 @@ #include "learn_cache.h" +#define AQO_DATA_COLUMNS (7) HTAB *deactivated_queries = NULL; static ArrayType *form_matrix(double **matrix, int nrows, int ncols); @@ -403,8 +404,8 @@ load_fss(uint64 fs, int fss, OkNNrdata *data, List **relids) bool find_ok = false; IndexScanDesc scan; ScanKeyData key[2]; - Datum values[6]; - bool isnull[6]; + Datum values[AQO_DATA_COLUMNS]; + bool isnull[AQO_DATA_COLUMNS]; bool success = true; if (!open_aqo_relation("public", "aqo_data", @@ -435,6 +436,7 @@ load_fss(uint64 fs, int fss, OkNNrdata *data, List **relids) deform_matrix(values[3], data->matrix); deform_vector(values[4], data->targets, &(data->rows)); + deform_vector(values[6], data->rfactors, &(data->rows)); if (relids != NULL) *relids = deform_oids_vector(values[5]); @@ -488,9 +490,9 @@ update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids) TupleDesc tupDesc; HeapTuple tuple, nw_tuple; - Datum values[6]; - bool isnull[6] = { false, false, false, false, false, false }; - bool replace[6] = { false, false, false, true, true, false }; + Datum values[AQO_DATA_COLUMNS]; + bool isnull[AQO_DATA_COLUMNS]; + bool replace[AQO_DATA_COLUMNS] = { false, false, false, true, true, false, true }; bool shouldFree; bool find_ok = false; bool update_indexes; @@ -507,6 +509,7 @@ update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids) RowExclusiveLock, &hrel, &irel)) return false; + memset(isnull, 0, sizeof(bool) * AQO_DATA_COLUMNS); tupDesc = RelationGetDescr(hrel); InitDirtySnapshot(snap); scan = index_beginscan(hrel, irel, &snap, 2, 0); @@ -536,6 +539,7 @@ update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids) values[5] = PointerGetDatum(form_oids_vector(relids)); if ((void *) values[5] == NULL) isnull[5] = true; + values[6] = PointerGetDatum(form_vector(data->rfactors, data->rows)); tuple = heap_form_tuple(tupDesc, values, isnull); /* @@ -559,8 +563,8 @@ update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids) isnull[3] = true; values[4] = PointerGetDatum(form_vector(data->targets, data->rows)); - nw_tuple = heap_modify_tuple(tuple, tupDesc, - values, isnull, replace); + values[6] = PointerGetDatum(form_vector(data->rfactors, data->rows)); + nw_tuple = heap_modify_tuple(tuple, tupDesc, values, isnull, replace); if (my_simple_heap_update(hrel, &(nw_tuple->t_self), nw_tuple, &update_indexes)) { From 79c488b6ea10686d36e67fb6834066d934d59dc1 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Mon, 7 Mar 2022 21:28:51 +0500 Subject: [PATCH 066/203] Add reliability into the ML model. --- machine_learning.c | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/machine_learning.c b/machine_learning.c index d0683334..1894a266 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -181,9 +181,21 @@ OkNNr_learn(OkNNrdata *data, double *features, double target, double rfactor) */ if (data->rows > 0 && distances[mid] < object_selection_threshold) { + double lr = learning_rate * rfactor / data->rfactors[mid]; + + if (lr > 1.) + { + elog(WARNING, "[AQO] Something goes wrong in the ML core: learning rate = %lf", lr); + lr = 1.; + } + + Assert(lr > 0.); + Assert(data->rfactors[mid] > 0. && data->rfactors[mid] <= 1.); + for (j = 0; j < data->cols; ++j) - data->matrix[mid][j] += learning_rate * (features[j] - data->matrix[mid][j]); - data->targets[mid] += learning_rate * (target - data->targets[mid]); + data->matrix[mid][j] += lr * (features[j] - data->matrix[mid][j]); + data->targets[mid] += lr * (target - data->targets[mid]); + data->rfactors[mid] += lr * (rfactor - data->rfactors[mid]); return data->rows; } @@ -229,7 +241,7 @@ OkNNr_learn(OkNNrdata *data, double *features, double target, double rfactor) * Compute average value for target by nearest neighbors. We need to * check idx[i] != -1 because we may have smaller value of nearest * neighbors than aqo_k. - * Semantics of coef1: it is defined distance between new object and + * Semantics of tc_coef: it is defined distance between new object and * this superposition value (with linear smoothing). * fc_coef - feature changing rate. * */ @@ -240,10 +252,21 @@ OkNNr_learn(OkNNrdata *data, double *features, double target, double rfactor) /* Modify targets and features of each nearest neighbor row. */ for (i = 0; i < aqo_k && idx[i] != -1; ++i) { - fc_coef = tc_coef * (data->targets[idx[i]] - avg_target) * w[i] * w[i] / - sqrt(data->cols) / w_sum; + double lr = learning_rate * rfactor / data->rfactors[mid]; + + if (lr > 1.) + { + elog(WARNING, "[AQO] Something goes wrong in the ML core: learning rate = %lf", lr); + lr = 1.; + } + + Assert(lr > 0.); + Assert(data->rfactors[mid] > 0. && data->rfactors[mid] <= 1.); + + fc_coef = tc_coef * lr * (data->targets[idx[i]] - avg_target) * + w[i] * w[i] / sqrt(data->cols) / w_sum; - data->targets[idx[i]] -= tc_coef * w[i] / w_sum; + data->targets[idx[i]] -= tc_coef * lr * w[i] / w_sum; for (j = 0; j < data->cols; ++j) { feature = data->matrix[idx[i]]; From 523b46d4cb20d49634ca4be8833b63c0382bc9c9 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 10 Mar 2022 12:49:02 +0500 Subject: [PATCH 067/203] Add basic code for support of DSM cache. --- Makefile | 2 +- aqo.c | 9 +++++++- aqo_shared.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++ aqo_shared.h | 22 +++++++++++++++++++ 4 files changed, 92 insertions(+), 2 deletions(-) create mode 100644 aqo_shared.c create mode 100644 aqo_shared.h diff --git a/Makefile b/Makefile index 766c98ca..d5dfd1c5 100755 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ PGFILEDESC = "AQO - Adaptive Query Optimization" MODULE_big = aqo OBJS = aqo.o auto_tuning.o cardinality_estimation.o cardinality_hooks.o \ hash.o machine_learning.o path_utils.o postprocessing.o preprocessing.o \ -selectivity_cache.o storage.o utils.o learn_cache.o $(WIN32RES) +selectivity_cache.o storage.o utils.o learn_cache.o aqo_shared.o $(WIN32RES) TAP_TESTS = 1 diff --git a/aqo.c b/aqo.c index b5520a66..35bff2e8 100644 --- a/aqo.c +++ b/aqo.c @@ -18,6 +18,7 @@ #include "utils/selfuncs.h" #include "aqo.h" +#include "aqo_shared.h" #include "cardinality_hooks.h" #include "path_utils.h" #include "preprocessing.h" @@ -126,7 +127,7 @@ _PG_init(void) { /* * In order to create our shared memory area, we have to be loaded via - * shared_preload_libraries. If not, report an ERROR. + * shared_preload_libraries. If not, report an ERROR. */ if (!process_shared_preload_libraries_in_progress) ereport(ERROR, @@ -199,6 +200,8 @@ _PG_init(void) NULL ); + prev_shmem_startup_hook = shmem_startup_hook; + shmem_startup_hook = aqo_init_shmem; prev_planner_hook = planner_hook; planner_hook = aqo_planner; prev_ExecutorStart_hook = ExecutorStart_hook; @@ -243,6 +246,10 @@ _PG_init(void) ALLOCSET_DEFAULT_SIZES); RegisterResourceReleaseCallback(aqo_free_callback, NULL); RegisterAQOPlanNodeMethods(); + + MarkGUCPrefixReserved("aqo"); + RequestAddinShmemSpace(MAXALIGN(sizeof(AQOSharedState))); + lc_init(); } diff --git a/aqo_shared.c b/aqo_shared.c new file mode 100644 index 00000000..1d6983f0 --- /dev/null +++ b/aqo_shared.c @@ -0,0 +1,61 @@ +/* + * + */ + +#include "postgres.h" + +#include "storage/shmem.h" + +#include "aqo_shared.h" + +shmem_startup_hook_type prev_shmem_startup_hook = NULL; +static AQOSharedState *aqo_state = NULL; +unsigned long temp_storage_size = 1024 * 1024; /* Storage size, in bytes */ +void *temp_storage = NULL; + +static void +attach_dsm_segment(void) +{ + dsm_segment *seg; + + LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); + + if (aqo_state->dsm_handler != DSM_HANDLE_INVALID) + { + seg = dsm_attach(aqo_state->dsm_handler); + } + else + { + seg = dsm_create(temp_storage_size, 0); + aqo_state->dsm_handler = dsm_segment_handle(seg); + } + + temp_storage = dsm_segment_address(seg); + LWLockRelease(&aqo_state->lock); +} + +static void +aqo_detach_shmem(int code, Datum arg) +{ + dsm_handle handler = *(dsm_handle *) arg; + dsm_detach(dsm_find_mapping(handler)); +} + +void +aqo_init_shmem(void) +{ + bool found; + + LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); + aqo_state = ShmemInitStruct("aqo", sizeof(AQOSharedState), &found); + if (!found) + { + /* First time through ... */ + LWLockInitialize(&aqo_state->lock, LWLockNewTrancheId()); + aqo_state->dsm_handler = DSM_HANDLE_INVALID; + } + LWLockRelease(AddinShmemInitLock); + + LWLockRegisterTranche(aqo_state->lock.tranche, "aqo"); + on_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); +} diff --git a/aqo_shared.h b/aqo_shared.h new file mode 100644 index 00000000..ce5b436f --- /dev/null +++ b/aqo_shared.h @@ -0,0 +1,22 @@ +#ifndef AQO_SHARED_H +#define AQO_SHARED_H + + +#include "storage/dsm.h" +#include "storage/ipc.h" +#include "storage/lwlock.h" + + +typedef struct AQOSharedState +{ + LWLock lock; /* mutual exclusion */ + dsm_handle dsm_handler; +} AQOSharedState; + + +extern shmem_startup_hook_type prev_shmem_startup_hook; + + +extern void aqo_init_shmem(void); + +#endif /* AQO_SHARED_H */ From 70b57440c5b76c00831d1228056e68c6e84ac6d4 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 10 Mar 2022 16:50:03 +0500 Subject: [PATCH 068/203] Cumulative commit on the 'learn on statement timeout' feature. Now it works quite stable, merge it into master branch. --- aqo.c | 4 +- aqo_shared.c | 170 +++++++++++++++++++++++++++--- aqo_shared.h | 21 ++++ learn_cache.c | 261 +++++++++++++++++++++++++++++++++-------------- learn_cache.h | 2 +- postprocessing.c | 8 +- storage.c | 1 + t/001_pgbench.pl | 5 + 8 files changed, 373 insertions(+), 99 deletions(-) diff --git a/aqo.c b/aqo.c index 35bff2e8..a0b2dccf 100644 --- a/aqo.c +++ b/aqo.c @@ -248,9 +248,7 @@ _PG_init(void) RegisterAQOPlanNodeMethods(); MarkGUCPrefixReserved("aqo"); - RequestAddinShmemSpace(MAXALIGN(sizeof(AQOSharedState))); - - lc_init(); + RequestAddinShmemSpace(aqo_memsize()); } PG_FUNCTION_INFO_V1(invalidate_deactivated_queries_cache); diff --git a/aqo_shared.c b/aqo_shared.c index 1d6983f0..5d4edb6f 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -8,43 +8,169 @@ #include "aqo_shared.h" + +typedef struct +{ + int magic; + uint32 total_size; + uint32 delta; +} dsm_seg_hdr; + +#define free_space(hdr) (uint32) (temp_storage_size - sizeof(dsm_seg_hdr) - hdr->delta) +#define addr(delta) ((char *) dsm_segment_address(seg) + sizeof(dsm_seg_hdr) + delta) + shmem_startup_hook_type prev_shmem_startup_hook = NULL; -static AQOSharedState *aqo_state = NULL; -unsigned long temp_storage_size = 1024 * 1024; /* Storage size, in bytes */ -void *temp_storage = NULL; +AQOSharedState *aqo_state = NULL; +HTAB *fss_htab = NULL; +static int aqo_htab_max_items = 1000; +static uint32 temp_storage_size = 1024 * 1024 * 10; /* Storage size, in bytes */ +static dsm_segment *seg = NULL; -static void -attach_dsm_segment(void) + +static void aqo_detach_shmem(int code, Datum arg); + + +void * +get_dsm_all(uint32 *size) { - dsm_segment *seg; + dsm_seg_hdr *hdr; - LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); + Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE)); - if (aqo_state->dsm_handler != DSM_HANDLE_INVALID) + if (aqo_state->dsm_handler == DSM_HANDLE_INVALID) + { + /* Fast path. No any cached data exists. */ + *size = 0; + return NULL; + } + + if (!seg) { + /* if segment exists we should connect to */ seg = dsm_attach(aqo_state->dsm_handler); + Assert(seg); + dsm_pin_mapping(seg); + on_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); + } + + hdr = (dsm_seg_hdr *) dsm_segment_address(seg); + *size = hdr->delta; + return (char *) hdr + sizeof(dsm_seg_hdr); +} + +/* + * Cleanup of DSM cache: set header into default state and zero the memory block. + * This operation can be coupled with the cache dump, so we do it under an external + * hold of the lock. + */ +void +reset_dsm_cache(void) +{ + dsm_seg_hdr *hdr; + char *start; + + Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE)); + + if (aqo_state->dsm_handler == DSM_HANDLE_INVALID) + /* Fast path. No any cached data exists. */ + return; + + Assert(seg); + + hdr = (dsm_seg_hdr *) dsm_segment_address(seg); + start = (char *) hdr + sizeof(dsm_seg_hdr); + + /* Reset the cache */ + memset(start, 0, hdr->delta); + + hdr->delta = 0; + hdr->total_size = temp_storage_size - sizeof(dsm_seg_hdr); +} + +char * +get_cache_address(void) +{ + dsm_seg_hdr *hdr; + + Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE) || + LWLockHeldByMeInMode(&aqo_state->lock, LW_SHARED)); + + if (aqo_state->dsm_handler != DSM_HANDLE_INVALID) + { + if (!seg) + { + /* Another process created the segment yet. Just attach to. */ + seg = dsm_attach(aqo_state->dsm_handler); + dsm_pin_mapping(seg); + on_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); + } + + hdr = (dsm_seg_hdr *) dsm_segment_address(seg); } else { + /* + * First request for DSM cache in this instance. + * Create the DSM segment. Pin it to live up to instance shutdown. + * Don't forget to detach DSM segment before an exit. + */ seg = dsm_create(temp_storage_size, 0); + dsm_pin_mapping(seg); + dsm_pin_segment(seg); aqo_state->dsm_handler = dsm_segment_handle(seg); + on_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); + + hdr = (dsm_seg_hdr *) dsm_segment_address(seg); + hdr->magic = AQO_SHARED_MAGIC; + hdr->delta = 0; + hdr->total_size = temp_storage_size - sizeof(dsm_seg_hdr); } - temp_storage = dsm_segment_address(seg); - LWLockRelease(&aqo_state->lock); + Assert(seg); + Assert(hdr->magic == AQO_SHARED_MAGIC && hdr->total_size > 0); + + return (char *) hdr + sizeof(dsm_seg_hdr); +} + +uint32 +get_dsm_cache_pos(uint32 size) +{ + dsm_seg_hdr *hdr; + uint32 pos; + + Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE) || + LWLockHeldByMeInMode(&aqo_state->lock, LW_SHARED)); + + (void) get_cache_address(); + hdr = (dsm_seg_hdr *) dsm_segment_address(seg); + + if (free_space(hdr) < size || size == 0) + elog(ERROR, + "DSM cache can't allcoate a mem block. Required: %u, free: %u", + size, free_space(hdr)); + + pos = hdr->delta; + hdr->delta += size; + Assert(free_space(hdr) >= 0); + return pos; } static void aqo_detach_shmem(int code, Datum arg) { - dsm_handle handler = *(dsm_handle *) arg; - dsm_detach(dsm_find_mapping(handler)); + if (seg != NULL) + dsm_detach(seg); + seg = NULL; } void aqo_init_shmem(void) { bool found; + HASHCTL info; + + aqo_state = NULL; + fss_htab = NULL; LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); aqo_state = ShmemInitStruct("aqo", sizeof(AQOSharedState), &found); @@ -54,8 +180,26 @@ aqo_init_shmem(void) LWLockInitialize(&aqo_state->lock, LWLockNewTrancheId()); aqo_state->dsm_handler = DSM_HANDLE_INVALID; } + + info.keysize = sizeof(htab_key); + info.entrysize = sizeof(htab_entry); + fss_htab = ShmemInitHash("aqo hash", + aqo_htab_max_items, aqo_htab_max_items, + &info, + HASH_ELEM | HASH_BLOBS); + LWLockRelease(AddinShmemInitLock); LWLockRegisterTranche(aqo_state->lock.tranche, "aqo"); - on_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); +} + +Size +aqo_memsize(void) +{ + Size size; + + size = MAXALIGN(sizeof(AQOSharedState)); + size = add_size(size, hash_estimate_size(aqo_htab_max_items, sizeof(htab_entry))); + + return size; } diff --git a/aqo_shared.h b/aqo_shared.h index ce5b436f..eb5323e0 100644 --- a/aqo_shared.h +++ b/aqo_shared.h @@ -6,6 +6,20 @@ #include "storage/ipc.h" #include "storage/lwlock.h" +#define AQO_SHARED_MAGIC 0x053163 + +typedef struct +{ + /* XXX we assume this struct contains no padding bytes */ + uint64 fs; + int64 fss; +} htab_key; + +typedef struct +{ + htab_key key; + uint32 hdr_off; /* offset of data in DSM cache */ +} htab_entry; typedef struct AQOSharedState { @@ -15,8 +29,15 @@ typedef struct AQOSharedState extern shmem_startup_hook_type prev_shmem_startup_hook; +extern AQOSharedState *aqo_state; +extern HTAB *fss_htab; +extern Size aqo_memsize(void); +extern void reset_dsm_cache(void); +extern void *get_dsm_all(uint32 *size); +extern char *get_cache_address(void); +extern uint32 get_dsm_cache_pos(uint32 size); extern void aqo_init_shmem(void); #endif /* AQO_SHARED_H */ diff --git a/learn_cache.c b/learn_cache.c index 471ea058..dc07c959 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -16,48 +16,43 @@ #include "miscadmin.h" #include "aqo.h" +#include "aqo_shared.h" #include "learn_cache.h" -typedef struct -{ - /* XXX we assume this struct contains no padding bytes */ - uint64 fs; - int64 fss; -} htab_key; typedef struct { + int magic; htab_key key; + int rows; + int cols; + int nrelids; - /* Store ML data "AS IS". */ - int nrows; - int ncols; - double *matrix[aqo_K]; - double targets[aqo_K]; - double rfactors[aqo_K]; - List *relids; -} htab_entry; + /* + * Links to variable data: + * double *matrix[aqo_K]; + * double *targets; + * double *rfactors; + * int *relids; + */ +} dsm_block_hdr; -static HTAB *fss_htab = NULL; -MemoryContext LearnCacheMemoryContext = NULL; bool aqo_learn_statement_timeout = false; -void -lc_init(void) -{ - HASHCTL ctl; +static uint32 init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **relids); - Assert(!LearnCacheMemoryContext); - LearnCacheMemoryContext = AllocSetContextCreate(TopMemoryContext, - "lcache context", - ALLOCSET_DEFAULT_SIZES); - ctl.keysize = sizeof(htab_key); - ctl.entrysize = sizeof(htab_entry); - ctl.hcxt = LearnCacheMemoryContext; +/* Calculate, how many data we need to store an ML record. */ +static uint32 +calculate_size(int cols, int nrelids) +{ + uint32 size = sizeof(dsm_block_hdr); /* header's size */ - fss_htab = hash_create("ML AQO cache", 256, &ctl, HASH_ELEM | HASH_BLOBS); + size += sizeof(double) * cols * aqo_K; /* matrix */ + size += 2 * sizeof(double) * aqo_K; /* targets, rfactors */ + size += sizeof(int) * nrelids; /* relids */ + return size; } bool @@ -65,34 +60,81 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relids) { htab_key key = {fs, fss}; htab_entry *entry; + dsm_block_hdr *hdr; + char *ptr; bool found; int i; - MemoryContext memctx = MemoryContextSwitchTo(LearnCacheMemoryContext); + ListCell *lc; + uint32 size; Assert(fss_htab && aqo_learn_statement_timeout); + size = calculate_size(data->cols, list_length(relids)); + LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); + entry = (htab_entry *) hash_search(fss_htab, &key, HASH_ENTER, &found); if (found) { - /* Clear previous version of the cached data. */ - for (i = 0; i < entry->nrows; ++i) - pfree(entry->matrix[i]); - list_free(entry->relids); + hdr = (dsm_block_hdr *) (get_cache_address() + entry->hdr_off); + + Assert(hdr->magic == AQO_SHARED_MAGIC); + Assert(hdr->key.fs == fs && hdr->key.fss == fss); + + if (data->cols != hdr->cols || list_length(relids) != hdr->nrelids) + { + /* + * Collision found: the same {fs,fss}, but something different. + * For simplicity - just don't update. + */ + LWLockRelease(&aqo_state->lock); + return false; + } + } + else + { + /* Get new block of DSM */ + entry->hdr_off = get_dsm_cache_pos(size); + hdr = (dsm_block_hdr *) (get_cache_address() + entry->hdr_off); + + /* These fields shouldn't change */ + hdr->magic = AQO_SHARED_MAGIC; + hdr->key.fs = fs; + hdr->key.fss = fss; + hdr->cols = data->cols; + hdr->nrelids = list_length(relids); } - entry->nrows = data->rows; - entry->ncols = data->cols; - for (i = 0; i < entry->nrows; ++i) + hdr->rows = data->rows; + ptr = (char *) hdr + sizeof(dsm_block_hdr); /* start point of variable data */ + + /* copy the matrix into DSM storage */ + for (i = 0; i < aqo_K; ++i) { - entry->matrix[i] = palloc(sizeof(double) * data->cols); - memcpy(entry->matrix[i], data->matrix[i], sizeof(double) * data->cols); + if (i < hdr->rows) + memcpy(ptr, data->matrix[i], sizeof(double) * hdr->cols); + ptr += sizeof(double) * data->cols; } - memcpy(entry->targets, data->targets, sizeof(double) * data->rows); - memcpy(entry->rfactors, data->rfactors, sizeof(double) * data->rows); - entry->relids = list_copy(relids); + /* copy targets into DSM storage */ + memcpy(ptr, data->targets, sizeof(double) * hdr->rows); + ptr += sizeof(double) * aqo_K; + + /* copy rfactors into DSM storage */ + memcpy(ptr, data->rfactors, sizeof(double) * hdr->rows); + ptr += sizeof(double) * aqo_K; - MemoryContextSwitchTo(memctx); + /* store relids */ + i = 0; + foreach(lc, relids) + { + memcpy(ptr, &lfirst_int(lc), sizeof(int)); + ptr += sizeof(int); + } + + /* Check the invariant */ + Assert((uint32)(ptr - (char *) hdr) == size); + + LWLockRelease(&aqo_state->lock); return true; } @@ -107,68 +149,129 @@ lc_has_fss(uint64 fs, int fss) Assert(fss_htab); + LWLockAcquire(&aqo_state->lock, LW_SHARED); (void) hash_search(fss_htab, &key, HASH_FIND, &found); + LWLockRelease(&aqo_state->lock); + return found; } /* * Load ML data from a memory cache, not from a table. - * XXX That to do with learning tails, living in the cache? */ bool lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **relids) { - htab_key key = {fs, fss}; - htab_entry *entry; - bool found; - int i; + htab_key key = {fs, fss}; + htab_entry *entry; + bool found; + dsm_block_hdr *hdr; Assert(fss_htab && aqo_learn_statement_timeout); + if (aqo_show_details) + elog(NOTICE, "[AQO] Load ML data for fs %lu, fss %d from the cache", + fs, fss); + + LWLockAcquire(&aqo_state->lock, LW_SHARED); entry = (htab_entry *) hash_search(fss_htab, &key, HASH_FIND, &found); if (!found) + { + LWLockRelease(&aqo_state->lock); return false; + } - if (aqo_show_details) - elog(NOTICE, "[AQO] Load ML data for fs %lu, fss %d from the cache", - fs, fss); + hdr = (dsm_block_hdr *) (get_cache_address() + entry->hdr_off); + Assert(hdr->magic == AQO_SHARED_MAGIC); + Assert(hdr->key.fs == fs && hdr->key.fss == fss); - data->rows = entry->nrows; - Assert(entry->ncols == data->cols); - for (i = 0; i < entry->nrows; ++i) - memcpy(data->matrix[i], entry->matrix[i], sizeof(double) * data->cols); - memcpy(data->targets, entry->targets, sizeof(double) * entry->nrows); - memcpy(data->rfactors, entry->rfactors, sizeof(double) * entry->nrows); - if (relids) - *relids = list_copy(entry->relids); + /* XXX */ + if (hdr->cols != data->cols) + { + LWLockRelease(&aqo_state->lock); + return false; + } + + init_with_dsm(data, hdr, relids); + LWLockRelease(&aqo_state->lock); return true; } -/* - * Remove record from fss cache. Should be done at learning stage of successfully - * finished query execution. -*/ -void -lc_remove_fss(uint64 fs, int fss) +static uint32 +init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **relids) { - htab_key key = {fs, fss}; - htab_entry *entry; - bool found; - int i; + int i; + char *ptr = (char *) hdr + sizeof(dsm_block_hdr); - if (!aqo_learn_statement_timeout) - return; + Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE) || + LWLockHeldByMeInMode(&aqo_state->lock, LW_SHARED)); + Assert(hdr->magic == AQO_SHARED_MAGIC); - Assert(fss_htab); + data->rows = hdr->rows; + data->cols = hdr->cols; - entry = (htab_entry *) hash_search(fss_htab, &key, HASH_FIND, &found); - if (!found) + if (data->cols > 0) + { + for (i = 0; i < aqo_K; ++i) + { + if (i < data->rows) + { + data->matrix[i] = palloc(sizeof(double) * data->cols); + memcpy(data->matrix[i], ptr, sizeof(double) * data->cols); + } + ptr += sizeof(double) * data->cols; + } + } + memcpy(data->targets, ptr, sizeof(double) * hdr->rows); + ptr += sizeof(double) * aqo_K; + memcpy(data->rfactors, ptr, sizeof(double) * hdr->rows); + ptr += sizeof(double) * aqo_K; + + if (relids) + { + *relids = NIL; + for (i = 0; i < hdr->nrelids; i++) + { + *relids = lappend_int(*relids, *((int *)ptr)); + ptr += sizeof(int); + } + } + + return calculate_size(hdr->cols, hdr->nrelids); +} + +void +lc_flush_data(void) +{ + char *ptr; + uint32 size; + + if (aqo_state->dsm_handler == DSM_HANDLE_INVALID) + /* Fast path. No any cached data exists. */ return; - for (i = 0; i < entry->nrows; ++i) - pfree(entry->matrix[i]); + LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); + ptr = get_dsm_all(&size); - hash_search(fss_htab, &key, HASH_REMOVE, NULL); + /* Iterate through records and store them into the aqo_data table */ + while(size > 0) + { + dsm_block_hdr *hdr = (dsm_block_hdr *) ptr; + OkNNrdata data; + List *relids; + uint32 delta = 0; + + delta = init_with_dsm(&data, hdr, &relids); + ptr += delta; + size -= delta; + update_fss(hdr->key.fs, hdr->key.fss, &data, relids); + + if (!hash_search(fss_htab, (void *) &hdr->key, HASH_REMOVE, NULL)) + elog(ERROR, "[AQO] Flush: local ML cache is corrupted."); + } + + reset_dsm_cache(); + LWLockRelease(&aqo_state->lock); } /* @@ -189,12 +292,12 @@ lc_assign_hook(bool newval, void *extra) elog(DEBUG5, "[AQO] Cleanup local cache of ML data."); /* Remove all frozen plans from a plancache. */ + LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); hash_seq_init(&status, fss_htab); while ((entry = (htab_entry *) hash_seq_search(&status)) != NULL) { if (!hash_search(fss_htab, (void *) &entry->key, HASH_REMOVE, NULL)) elog(ERROR, "[AQO] The local ML cache is corrupted."); } - - MemoryContextReset(LearnCacheMemoryContext); -} \ No newline at end of file + LWLockRelease(&aqo_state->lock); +} diff --git a/learn_cache.h b/learn_cache.h index 52e4bec2..194f92c2 100644 --- a/learn_cache.h +++ b/learn_cache.h @@ -7,13 +7,13 @@ extern bool aqo_learn_statement_timeout; -extern void lc_init(void); extern bool lc_update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids); extern bool lc_has_fss(uint64 fhash, int fss); extern bool lc_load_fss(uint64 fhash, int fsshash, OkNNrdata *data, List **relids); extern void lc_remove_fss(uint64 fhash, int fss_hash); +extern void lc_flush_data(void); extern void lc_assign_hook(bool newval, void *extra); #endif /* LEARN_CACHE_H */ diff --git a/postprocessing.c b/postprocessing.c index 9a8ab192..7237102f 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -545,9 +545,6 @@ learnOnPlanState(PlanState *p, void *context) learn_sample(&SubplanCtx, aqo_node->relids, learn_rows, rfactor, p->plan, notExecuted); - - if (!ctx->isTimedOut) - lc_remove_fss(query_context.query_hash, aqo_node->fss); } } } @@ -812,6 +809,11 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) { aqo_obj_stat ctx = {NIL, NIL, NIL, query_context.learn_aqo, false}; + /* + * Before learn phase, flush all cached data down to ML base. + */ + lc_flush_data(); + /* * Analyze plan if AQO need to learn or need to collect statistics only. */ diff --git a/storage.c b/storage.c index 46f67e87..44e060e3 100644 --- a/storage.c +++ b/storage.c @@ -74,6 +74,7 @@ open_aqo_relation(char *heaprelnspname, char *heaprelname, relation_close(*hrel, lockmode); goto cleanup; } + return true; cleanup: diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index f21b8a98..d6d24458 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -137,6 +137,11 @@ JOIN aqo_query_texts t ON (t.query_hash = v.fspace_hash) WHERE v.error > 0. AND t.query_text LIKE '%pgbench_accounts%'"); is($res, 3); +$res = $node->safe_psql('postgres', + "SELECT * FROM top_error_queries(10) v + JOIN aqo_query_texts t ON (t.query_hash = v.fspace_hash) + WHERE v.error > 0. AND t.query_text LIKE '%pgbench_accounts%'"); +note("\n TopN: \n $res \n"); $res = $node->safe_psql('postgres', "SELECT v.error, t.query_text FROM top_error_queries(10) v JOIN aqo_query_texts t ON (t.query_hash = v.fspace_hash) From 2f3a259e8d1f02b0393253c6c400af07c990151c Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Thu, 14 Apr 2022 10:00:33 +0500 Subject: [PATCH 069/203] Add tests for the 'Learn after an query interruption by timeout' feature. Fix the bug with false finished node. Add some DEBUG messages. Just for conveniency. --- Makefile | 1 + expected/statement_timeout.out | 109 +++++++++++++++++++++++++++++++++ learn_cache.c | 3 + machine_learning.c | 4 +- postprocessing.c | 7 ++- sql/statement_timeout.sql | 64 +++++++++++++++++++ 6 files changed, 182 insertions(+), 6 deletions(-) create mode 100644 expected/statement_timeout.out create mode 100644 sql/statement_timeout.sql diff --git a/Makefile b/Makefile index d5dfd1c5..0a03ac48 100755 --- a/Makefile +++ b/Makefile @@ -23,6 +23,7 @@ REGRESS = aqo_disabled \ unsupported \ clean_aqo_data \ plancache \ + statement_timeout \ top_queries fdw_srcdir = $(top_srcdir)/contrib/postgres_fdw diff --git a/expected/statement_timeout.out b/expected/statement_timeout.out new file mode 100644 index 00000000..9d91de22 --- /dev/null +++ b/expected/statement_timeout.out @@ -0,0 +1,109 @@ +-- Check the learning-on-timeout feature +-- For stabilized reproduction autovacuum must be disabled. +CREATE FUNCTION check_estimated_rows(text) RETURNS TABLE (estimated int) +LANGUAGE plpgsql AS $$ +DECLARE + ln text; + tmp text[]; + first_row bool := true; +BEGIN + FOR ln IN + execute format('explain %s', $1) + LOOP + IF first_row THEN + first_row := false; + tmp := regexp_match(ln, 'rows=(\d*)'); + RETURN QUERY SELECT tmp[1]::int; + END IF; + END LOOP; +END; $$; +CREATE TABLE t AS SELECT * FROM generate_series(1,100) AS x; +ANALYZE t; +DELETE FROM t WHERE x > 5; -- Force optimizer to make overestimated prediction. +CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.mode = 'learn'; +SET aqo.show_details = 'off'; +SET aqo.learn_statement_timeout = 'on'; +SET statement_timeout = 800; -- [0.8s] +SELECT *, pg_sleep(1) FROM t; +NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. +ERROR: canceling statement due to statement timeout +SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); -- haven't any partial data + check_estimated_rows +---------------------- + 100 +(1 row) + +-- Don't learn because running node has smaller cardinality than an optimizer prediction +SET statement_timeout = 3500; +SELECT *, pg_sleep(1) FROM t; +NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. +ERROR: canceling statement due to statement timeout +SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); + check_estimated_rows +---------------------- + 100 +(1 row) + +-- We have a real learning data. +SET statement_timeout = 10000; +SELECT *, pg_sleep(1) FROM t; + x | pg_sleep +---+---------- + 1 | + 2 | + 3 | + 4 | + 5 | +(5 rows) + +SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); + check_estimated_rows +---------------------- + 5 +(1 row) + +-- Force to make an underestimated prediction +DELETE FROM t WHERE x > 2; +ANALYZE t; +INSERT INTO t (x) (SELECT * FROM generate_series(3,5) AS x); +TRUNCATE aqo_data; +SET statement_timeout = 800; +SELECT *, pg_sleep(1) FROM t; -- Not learned +NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. +ERROR: canceling statement due to statement timeout +SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); + check_estimated_rows +---------------------- + 2 +(1 row) + +SET statement_timeout = 3500; +SELECT *, pg_sleep(1) FROM t; -- Learn! +NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. +ERROR: canceling statement due to statement timeout +SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); + check_estimated_rows +---------------------- + 3 +(1 row) + +SET statement_timeout = 5500; +SELECT *, pg_sleep(1) FROM t; -- Get reliable data + x | pg_sleep +---+---------- + 1 | + 2 | + 3 | + 4 | + 5 | +(5 rows) + +SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); + check_estimated_rows +---------------------- + 5 +(1 row) + +DROP TABLE t; +DROP EXTENSION aqo; diff --git a/learn_cache.c b/learn_cache.c index dc07c959..c3f65d3f 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -86,6 +86,7 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relids) * Collision found: the same {fs,fss}, but something different. * For simplicity - just don't update. */ + elog(DEBUG5, "[AQO]: A collision found in the temporary storage."); LWLockRelease(&aqo_state->lock); return false; } @@ -134,6 +135,8 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relids) /* Check the invariant */ Assert((uint32)(ptr - (char *) hdr) == size); + elog(DEBUG5, "DSM entry: %s, targets: %d.", + found ? "Reused" : "New entry", hdr->rows); LWLockRelease(&aqo_state->lock); return true; } diff --git a/machine_learning.c b/machine_learning.c index 1894a266..52c1ab40 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -199,8 +199,7 @@ OkNNr_learn(OkNNrdata *data, double *features, double target, double rfactor) return data->rows; } - - if (data->rows < aqo_K) + else if (data->rows < aqo_K) { /* We don't reach a limit of stored neighbors */ @@ -275,6 +274,5 @@ OkNNr_learn(OkNNrdata *data, double *features, double target, double rfactor) } } } - return data->rows; } diff --git a/postprocessing.c b/postprocessing.c index 7237102f..d2eee036 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -349,11 +349,12 @@ should_learn(PlanState *ps, AQOPlanNode *node, aqo_obj_stat *ctx, } /* Has the executor finished its work? */ - if (TupIsNull(ps->ps_ResultTupleSlot) && + if (!ps->instrument->running && TupIsNull(ps->ps_ResultTupleSlot) && ps->instrument->nloops > 0.) /* Node was visited by executor at least once. */ { /* This is much more reliable data. So we can correct our prediction. */ - if (ctx->learn && aqo_show_details && fabs(*nrows - predicted) / predicted > 0.2) + if (ctx->learn && aqo_show_details && + fabs(*nrows - predicted) / predicted > 0.2) elog(NOTICE, "[AQO] Learn on a finished plan node (%lu, %d), " "predicted rows: %.0lf, updated prediction: %.0lf", @@ -693,7 +694,7 @@ aqo_timeout_handler(void) ctx.learn = query_context.learn_aqo; ctx.isTimedOut = true; - elog(NOTICE, "[AQO] Time limit for execution of the statement was expired. Try to learn on partial data."); + elog(NOTICE, "[AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data."); learnOnPlanState(timeoutCtl.queryDesc->planstate, (void *) &ctx); } diff --git a/sql/statement_timeout.sql b/sql/statement_timeout.sql new file mode 100644 index 00000000..419d85de --- /dev/null +++ b/sql/statement_timeout.sql @@ -0,0 +1,64 @@ +-- Check the learning-on-timeout feature +-- For stabilized reproduction autovacuum must be disabled. + +CREATE FUNCTION check_estimated_rows(text) RETURNS TABLE (estimated int) +LANGUAGE plpgsql AS $$ +DECLARE + ln text; + tmp text[]; + first_row bool := true; +BEGIN + FOR ln IN + execute format('explain %s', $1) + LOOP + IF first_row THEN + first_row := false; + tmp := regexp_match(ln, 'rows=(\d*)'); + RETURN QUERY SELECT tmp[1]::int; + END IF; + END LOOP; +END; $$; + +CREATE TABLE t AS SELECT * FROM generate_series(1,100) AS x; +ANALYZE t; +DELETE FROM t WHERE x > 5; -- Force optimizer to make overestimated prediction. + +CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.mode = 'learn'; +SET aqo.show_details = 'off'; +SET aqo.learn_statement_timeout = 'on'; + +SET statement_timeout = 800; -- [0.8s] +SELECT *, pg_sleep(1) FROM t; +SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); -- haven't any partial data + +-- Don't learn because running node has smaller cardinality than an optimizer prediction +SET statement_timeout = 3500; +SELECT *, pg_sleep(1) FROM t; +SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); + +-- We have a real learning data. +SET statement_timeout = 10000; +SELECT *, pg_sleep(1) FROM t; +SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); + +-- Force to make an underestimated prediction +DELETE FROM t WHERE x > 2; +ANALYZE t; +INSERT INTO t (x) (SELECT * FROM generate_series(3,5) AS x); +TRUNCATE aqo_data; + +SET statement_timeout = 800; +SELECT *, pg_sleep(1) FROM t; -- Not learned +SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); + +SET statement_timeout = 3500; +SELECT *, pg_sleep(1) FROM t; -- Learn! +SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); + +SET statement_timeout = 5500; +SELECT *, pg_sleep(1) FROM t; -- Get reliable data +SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); + +DROP TABLE t; +DROP EXTENSION aqo; From a8e4a79a87fa454a67caf1de4a0032fe33ab8e95 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Fri, 8 Apr 2022 16:01:10 +0300 Subject: [PATCH 070/203] Move AQO from a relid based approach to a relation name based approach. It allows us to reuse ML data at different instance and learn on temporary tables. --- aqo--1.2--1.3.sql | 6 +-- aqo.h | 30 +++++++-------- cardinality_estimation.c | 20 +++++----- cardinality_hooks.c | 65 ++++++++++++++++++------------- expected/clean_aqo_data.out | 61 +++++++++++++++-------------- hash.c | 50 +++++++++++++++++------- hash.h | 2 +- learn_cache.c | 60 ++++++++++++++++++----------- learn_cache.h | 10 ++--- path_utils.c | 33 +++++++++------- path_utils.h | 3 +- postprocessing.c | 28 +++++++------- sql/clean_aqo_data.sql | 61 +++++++++++++++-------------- storage.c | 77 ++++++++++++++++++++----------------- t/001_pgbench.pl | 28 +++++++++----- utils.c | 16 ++++++++ 16 files changed, 314 insertions(+), 236 deletions(-) diff --git a/aqo--1.2--1.3.sql b/aqo--1.2--1.3.sql index 605e6b99..c29a6f10 100755 --- a/aqo--1.2--1.3.sql +++ b/aqo--1.2--1.3.sql @@ -1,4 +1,4 @@ -ALTER TABLE public.aqo_data ADD COLUMN oids OID [] DEFAULT NULL; +ALTER TABLE public.aqo_data ADD COLUMN oids text [] DEFAULT NULL; -- -- Remove data, related to previously dropped tables, from the AQO tables. @@ -9,7 +9,7 @@ DECLARE aqo_queries_row aqo_queries%ROWTYPE; aqo_query_texts_row aqo_query_texts%ROWTYPE; aqo_query_stat_row aqo_query_stat%ROWTYPE; - oid_var oid; + oid_var text; fspace_hash_var bigint; delete_row boolean DEFAULT false; BEGIN @@ -23,7 +23,7 @@ BEGIN IF (aqo_data_row.oids IS NOT NULL) THEN FOREACH oid_var IN ARRAY aqo_data_row.oids LOOP - IF NOT EXISTS (SELECT relname FROM pg_class WHERE oid = oid_var) THEN + IF NOT EXISTS (SELECT relname FROM pg_class WHERE oid::regclass::text = oid_var) THEN delete_row = true; END IF; END LOOP; diff --git a/aqo.h b/aqo.h index 6f3f9018..b43e01a9 100644 --- a/aqo.h +++ b/aqo.h @@ -281,13 +281,12 @@ extern bool find_query(uint64 qhash, QueryContextData *ctx); extern bool update_query(uint64 qhash, uint64 fhash, bool learn_aqo, bool use_aqo, bool auto_tuning); extern bool add_query_text(uint64 query_hash, const char *query_string); -extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, - List **relids, bool isSafe); -extern bool load_fss(uint64 fhash, int fss_hash, OkNNrdata *data, List **relids); -extern bool update_fss_ext(uint64 fhash, int fsshash, OkNNrdata *data, - List *relids, bool isTimedOut); -extern bool update_fss(uint64 fhash, int fss_hash, OkNNrdata *data, - List *relids); +extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **relnames, + bool isSafe); +extern bool load_fss(uint64 fs, int fss, OkNNrdata *data, List **relnames); +extern bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, + List *relnames, bool isTimedOut); +extern bool update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames); QueryStat *get_aqo_stat(uint64 query_hash); void update_aqo_stat(uint64 query_hash, QueryStat * stat); extern bool my_index_insert(Relation indexRelation, Datum *values, bool *isnull, @@ -308,7 +307,7 @@ extern void print_node_explain(ExplainState *es, PlanState *ps, Plan *plan); /* Cardinality estimation */ double predict_for_relation(List *restrict_clauses, List *selectivities, - List *relids, int *fss_hash); + List *relnames, int *fss); /* Query execution statistics collecting hooks */ void aqo_ExecutorStart(QueryDesc *queryDesc, int eflags); @@ -320,13 +319,14 @@ void aqo_ExecutorEnd(QueryDesc *queryDesc); extern void automatical_query_tuning(uint64 query_hash, QueryStat * stat); /* Utilities */ -int int_cmp(const void *a, const void *b); -int double_cmp(const void *a, const void *b); -int *argsort(void *a, int n, size_t es, - int (*cmp) (const void *, const void *)); -int *inverse_permutation(int *a, int n); -QueryStat *palloc_query_stat(void); -void pfree_query_stat(QueryStat *stat); +extern int int64_compare(const void *a, const void *b); +extern int int_cmp(const void *a, const void *b); +extern int double_cmp(const void *a, const void *b); +extern int *argsort(void *a, int n, size_t es, + int (*cmp) (const void *, const void *)); +extern int *inverse_permutation(int *a, int n); +extern QueryStat *palloc_query_stat(void); +extern void pfree_query_stat(QueryStat *stat); /* Selectivity cache for parametrized baserels */ extern void cache_selectivity(int clause_hash, int relid, int global_relid, diff --git a/cardinality_estimation.c b/cardinality_estimation.c index 9bdaff5d..f5202f22 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -26,7 +26,7 @@ #ifdef AQO_DEBUG_PRINT static void predict_debug_output(List *clauses, List *selectivities, - List *relids, int fss_hash, double result) + List *relnames, int fss, double result) { StringInfoData debug_str; ListCell *lc; @@ -42,11 +42,11 @@ predict_debug_output(List *clauses, List *selectivities, appendStringInfo(&debug_str, "%lf ", *s); } - appendStringInfoString(&debug_str, "}, relids: { "); - foreach(lc, relids) + appendStringInfoString(&debug_str, "}, relnames: { "); + foreach(lc, relnames) { - int relid = lfirst_int(lc); - appendStringInfo(&debug_str, "%d ", relid); + String *relname = lfirst_node(String, lc); + appendStringInfo(&debug_str, "%s ", relname->sval); } appendStringInfo(&debug_str, "}, result: %lf", result); @@ -60,22 +60,22 @@ predict_debug_output(List *clauses, List *selectivities, */ double predict_for_relation(List *clauses, List *selectivities, - List *relids, int *fss) + List *relnames, int *fss) { double *features; double result; int i; OkNNrdata data; - if (relids == NIL) + if (relnames == NIL) /* * Don't make prediction for query plans without any underlying plane * tables. Use return value -4 for debug purposes. */ return -4.; - *fss = get_fss_for_object(relids, clauses, - selectivities, &data.cols, &features); + *fss = get_fss_for_object(relnames, clauses, selectivities, + &data.cols, &features); if (data.cols > 0) for (i = 0; i < aqo_K; ++i) @@ -94,7 +94,7 @@ predict_for_relation(List *clauses, List *selectivities, result = -1; } #ifdef AQO_DEBUG_PRINT - predict_debug_output(clauses, selectivities, relids, *fss_hash, result); + predict_debug_output(clauses, selectivities, relnames, *fss, result); #endif pfree(features); if (data.cols > 0) diff --git a/cardinality_hooks.c b/cardinality_hooks.c index c63bb57c..00290029 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -139,8 +139,8 @@ void aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) { double predicted; - Oid relid; - List *relids = NIL; + RangeTblEntry *rte; + List *relnames = NIL; List *selectivities = NULL; List *clauses; int fss = 0; @@ -161,19 +161,24 @@ aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) goto default_estimator; } - relid = planner_rt_fetch(rel->relid, root)->relid; - if (OidIsValid(relid)) - /* Predict for a plane table only. */ - relids = list_make1_int(relid); + rte = planner_rt_fetch(rel->relid, root); + if (rte && OidIsValid(rte->relid)) + { + String *s = makeNode(String); + + /* Predict for a plane table. */ + Assert(rte->eref && rte->eref->aliasname); + s->sval = pstrdup(rte->eref->aliasname); + relnames = list_make1(s); + } clauses = aqo_get_clauses(root, rel->baserestrictinfo); - predicted = predict_for_relation(clauses, selectivities, - relids, &fss); + predicted = predict_for_relation(clauses, selectivities, relnames, &fss); rel->fss_hash = fss; list_free_deep(selectivities); list_free(clauses); - list_free(relids); + list_free(relnames); if (predicted >= 0) { @@ -209,8 +214,8 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, List *param_clauses) { double predicted; - Oid relid = InvalidOid; - List *relids = NIL; + RangeTblEntry *rte = NULL; + List *relnames = NIL; List *allclauses = NULL; List *selectivities = NULL; ListCell *l; @@ -219,7 +224,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, int *args_hash; int *eclass_hash; int current_hash; - int fss = 0; + int fss = 0; if (IsQueryDisabled()) /* Fast path */ @@ -233,7 +238,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, aqo_get_clauses(root, rel->baserestrictinfo)); selectivities = get_selectivities(root, allclauses, rel->relid, JOIN_INNER, NULL); - relid = planner_rt_fetch(rel->relid, root)->relid; + rte = planner_rt_fetch(rel->relid, root); get_eclasses(allclauses, &nargs, &args_hash, &eclass_hash); old_ctx_m = MemoryContextSwitchTo(AQO_cache_mem_ctx); @@ -243,7 +248,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, current_hash = get_clause_hash( ((RestrictInfo *) lfirst(l))->clause, nargs, args_hash, eclass_hash); - cache_selectivity(current_hash, rel->relid, relid, + cache_selectivity(current_hash, rel->relid, rte->relid, *((double *) lfirst(l2))); } @@ -263,11 +268,17 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, goto default_estimator; } - if (OidIsValid(relid)) - /* Predict for a plane table only. */ - relids = list_make1_int(relid); + if (rte && OidIsValid(rte->relid)) + { + String *s = makeNode(String); + + /* Predict for a plane table. */ + Assert(rte->eref && rte->eref->aliasname); + s->sval = pstrdup(rte->eref->aliasname); + relnames = list_make1(s); + } - predicted = predict_for_relation(allclauses, selectivities, relids, &fss); + predicted = predict_for_relation(allclauses, selectivities, relnames, &fss); predicted_ppi_rows = predicted; fss_ppi_hash = fss; @@ -292,7 +303,7 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, List *restrictlist) { double predicted; - List *relids; + List *relnames; List *outer_clauses; List *inner_clauses; List *allclauses; @@ -318,7 +329,7 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, goto default_estimator; } - relids = get_list_of_relids(root, rel->relids); + relnames = get_relnames(root, rel->relids); outer_clauses = get_path_clauses(outer_rel->cheapest_total_path, root, &outer_selectivities); inner_clauses = get_path_clauses(inner_rel->cheapest_total_path, root, @@ -329,7 +340,7 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, list_concat(outer_selectivities, inner_selectivities)); - predicted = predict_for_relation(allclauses, selectivities, relids, &fss); + predicted = predict_for_relation(allclauses, selectivities, relnames, &fss); rel->fss_hash = fss; if (predicted >= 0) @@ -360,7 +371,7 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, List *clauses) { double predicted; - List *relids; + List *relnames; List *outer_clauses; List *inner_clauses; List *allclauses; @@ -386,7 +397,7 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, goto default_estimator; } - relids = get_list_of_relids(root, rel->relids); + relnames = get_relnames(root, rel->relids); outer_clauses = get_path_clauses(outer_path, root, &outer_selectivities); inner_clauses = get_path_clauses(inner_path, root, &inner_selectivities); allclauses = list_concat(aqo_get_clauses(root, clauses), @@ -395,7 +406,7 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, list_concat(outer_selectivities, inner_selectivities)); - predicted = predict_for_relation(allclauses, selectivities, relids, &fss); + predicted = predict_for_relation(allclauses, selectivities, relnames, &fss); predicted_ppi_rows = predicted; fss_ppi_hash = fss; @@ -422,13 +433,13 @@ predict_num_groups(PlannerInfo *root, Path *subpath, List *group_exprs, child_fss = subpath->parent->fss_hash; else { - List *relids; + List *relnames; List *clauses; List *selectivities = NIL; - relids = get_list_of_relids(root, subpath->parent->relids); + relnames = get_relnames(root, subpath->parent->relids); clauses = get_path_clauses(subpath, root, &selectivities); - (void) predict_for_relation(clauses, selectivities, relids, &child_fss); + (void) predict_for_relation(clauses, selectivities, relnames, &child_fss); } *fss = get_grouped_exprs_hash(child_fss, group_exprs); diff --git a/expected/clean_aqo_data.out b/expected/clean_aqo_data.out index bc143be7..94551d7d 100644 --- a/expected/clean_aqo_data.out +++ b/expected/clean_aqo_data.out @@ -9,7 +9,6 @@ SELECT * FROM a; -- (0 rows) -SELECT 'a'::regclass::oid AS a_oid \gset SELECT clean_aqo_data(); NOTICE: Cleaning aqo_data records clean_aqo_data @@ -24,14 +23,14 @@ NOTICE: Cleaning aqo_data records * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat * should remain */ -SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); count ------- 1 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)); count ------- 1 @@ -39,7 +38,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); count ------- 1 @@ -47,7 +46,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); count ------- 1 @@ -68,14 +67,14 @@ NOTICE: Cleaning aqo_data records * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat, * should be deleted */ -SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); count ------- 0 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; count ------- @@ -84,7 +83,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -93,7 +92,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -137,17 +136,17 @@ SELECT * FROM b CROSS JOIN a; -- (0 rows) -SELECT 'a'::regclass::oid AS a_oid \gset -SELECT 'b'::regclass::oid AS b_oid \gset +-- SELECT 'a'::regclass::oid AS a_oid \gset +-- SELECT 'b'::regclass::oid AS b_oid \gset -- new lines added to aqo_data -SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); count ------- 3 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)); count ------- 2 @@ -155,7 +154,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); count ------- 2 @@ -163,20 +162,20 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); count ------- 2 (1 row) -SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'b' = ANY(oids); count ------- 3 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)); count ------- 2 @@ -184,7 +183,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids))); count ------- 2 @@ -192,7 +191,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids))); count ------- 2 @@ -212,14 +211,14 @@ NOTICE: Cleaning aqo_data records * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_texts, * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_stat, */ -SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); count ------- 0 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; count ------- @@ -228,7 +227,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -237,7 +236,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -245,14 +244,14 @@ SELECT count(*) FROM aqo_query_stat WHERE (1 row) -- lines corresponding to b_oid in all theese tables should remain -SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'b' = ANY(oids); count ------- 1 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; count ------- @@ -261,7 +260,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -270,7 +269,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -286,14 +285,14 @@ NOTICE: Cleaning aqo_data records (1 row) -- lines corresponding to b_oid in theese tables deleted -SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'b' = ANY(oids); count ------- 0 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; count ------- @@ -302,7 +301,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -311,7 +310,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- diff --git a/hash.c b/hash.c index 4510032e..d8083fce 100644 --- a/hash.c +++ b/hash.c @@ -31,7 +31,7 @@ static int get_node_hash(Node *node); static int get_unsorted_unsafe_int_array_hash(int *arr, int len); static int get_unordered_int_list_hash(List *lst); -static int get_relidslist_hash(List *relidslist); +static int64 get_relations_hash(List *relnames); static int get_fss_hash(int clauses_hash, int eclasses_hash, int relidslist_hash); @@ -149,7 +149,7 @@ get_grouped_exprs_hash(int child_fss, List *group_exprs) } /* - * For given object (clauselist, selectivities, relidslist) creates feature + * For given object (clauselist, selectivities, relnames) creates feature * subspace: * sets nfeatures * creates and computes fss_hash @@ -158,7 +158,7 @@ get_grouped_exprs_hash(int child_fss, List *group_exprs) * Special case for nfeatures == NULL: don't calculate features. */ int -get_fss_for_object(List *relidslist, List *clauselist, +get_fss_for_object(List *relnames, List *clauselist, List *selectivities, int *nfeatures, double **features) { int n; @@ -172,7 +172,7 @@ get_fss_for_object(List *relidslist, List *clauselist, int *eclass_hash; int clauses_hash; int eclasses_hash; - int relidslist_hash; + int relnames_hash; List **args; ListCell *lc; int i, @@ -181,7 +181,7 @@ get_fss_for_object(List *relidslist, List *clauselist, m; int sh = 0, old_sh; - int fss_hash; + int fss_hash; n = list_length(clauselist); @@ -259,13 +259,11 @@ get_fss_for_object(List *relidslist, List *clauselist, /* * Generate feature subspace hash. - * XXX: Remember! that relidslist_hash isn't portable between postgres - * instances. */ clauses_hash = get_int_array_hash(sorted_clauses, n - sh); eclasses_hash = get_int_array_hash(eclass_hash, nargs); - relidslist_hash = get_relidslist_hash(relidslist); - fss_hash = get_fss_hash(clauses_hash, eclasses_hash, relidslist_hash); + relnames_hash = (int) get_relations_hash(relnames); + fss_hash = get_fss_hash(clauses_hash, eclasses_hash, relnames_hash); pfree(clause_hashes); pfree(sorted_clauses); @@ -436,13 +434,37 @@ get_fss_hash(int clauses_hash, int eclasses_hash, int relidslist_hash) } /* - * Computes hash for given list of relids. - * Hash is supposed to be relids-order-insensitive. + * Computes hash for given list of relations. + * Hash is supposed to be relations-order-insensitive. + * Each element of a list must have a String type, */ -int -get_relidslist_hash(List *relidslist) +static int64 +get_relations_hash(List *relnames) { - return get_unordered_int_list_hash(relidslist); + int64 *hashes = palloc(list_length(relnames) * sizeof(int64)); + ListCell *lc; + int64 hash = 0; + int i = 0; + + /* generate array of hashes. */ + foreach(lc, relnames) + { + String *relname = lfirst_node(String, lc); + + hashes[i++] = DatumGetInt64(hash_any_extended( + (unsigned char *) relname->sval, + strlen(relname->sval), 0)); + } + + /* Sort the array to make query insensitive to input order of relations. */ + qsort(hashes, i, sizeof(int64), int64_compare); + + /* Make a final hash value */ + hash = DatumGetInt64(hash_any_extended((unsigned char *) hashes, + i * sizeof(int64), 0)); + + pfree(hashes); + return hash; } /* diff --git a/hash.h b/hash.h index 0a98814b..b33b1990 100644 --- a/hash.h +++ b/hash.h @@ -7,7 +7,7 @@ extern uint64 get_query_hash(Query *parse, const char *query_text); extern bool list_member_uint64(const List *list, uint64 datum); extern List *lappend_uint64(List *list, uint64 datum); extern List *ldelete_uint64(List *list, uint64 datum); -extern int get_fss_for_object(List *relidslist, List *clauselist, +extern int get_fss_for_object(List *relnames, List *clauselist, List *selectivities, int *nfeatures, double **features); extern int get_int_array_hash(int *arr, int len); diff --git a/learn_cache.c b/learn_cache.c index c3f65d3f..f2bbeca5 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -45,18 +45,25 @@ static uint32 init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **relids); /* Calculate, how many data we need to store an ML record. */ static uint32 -calculate_size(int cols, int nrelids) +calculate_size(int cols, List *relnames) { - uint32 size = sizeof(dsm_block_hdr); /* header's size */ + uint32 size = sizeof(dsm_block_hdr); /* header's size */ + ListCell *lc; size += sizeof(double) * cols * aqo_K; /* matrix */ size += 2 * sizeof(double) * aqo_K; /* targets, rfactors */ - size += sizeof(int) * nrelids; /* relids */ + + /* Calculate memory size needed to store relation names */ + foreach(lc, relnames) + { + size += strlen(lfirst_node(String, lc)->sval) + 1; + } + return size; } bool -lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relids) +lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames) { htab_key key = {fs, fss}; htab_entry *entry; @@ -69,7 +76,7 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relids) Assert(fss_htab && aqo_learn_statement_timeout); - size = calculate_size(data->cols, list_length(relids)); + size = calculate_size(data->cols, relnames); LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); entry = (htab_entry *) hash_search(fss_htab, &key, HASH_ENTER, &found); @@ -80,7 +87,7 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relids) Assert(hdr->magic == AQO_SHARED_MAGIC); Assert(hdr->key.fs == fs && hdr->key.fss == fss); - if (data->cols != hdr->cols || list_length(relids) != hdr->nrelids) + if (data->cols != hdr->cols || list_length(relnames) != hdr->nrelids) { /* * Collision found: the same {fs,fss}, but something different. @@ -102,7 +109,7 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relids) hdr->key.fs = fs; hdr->key.fss = fss; hdr->cols = data->cols; - hdr->nrelids = list_length(relids); + hdr->nrelids = list_length(relnames); } hdr->rows = data->rows; @@ -124,12 +131,14 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relids) memcpy(ptr, data->rfactors, sizeof(double) * hdr->rows); ptr += sizeof(double) * aqo_K; - /* store relids */ - i = 0; - foreach(lc, relids) + /* store strings of relation names. Each string ends with 0-byte */ + foreach(lc, relnames) { - memcpy(ptr, &lfirst_int(lc), sizeof(int)); - ptr += sizeof(int); + char *relname = lfirst_node(String, lc)->sval; + int len = strlen(relname) + 1; + + memcpy(ptr, relname, len); + ptr += len; } /* Check the invariant */ @@ -163,7 +172,7 @@ lc_has_fss(uint64 fs, int fss) * Load ML data from a memory cache, not from a table. */ bool -lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **relids) +lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **relnames) { htab_key key = {fs, fss}; htab_entry *entry; @@ -195,13 +204,13 @@ lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **relids) return false; } - init_with_dsm(data, hdr, relids); + init_with_dsm(data, hdr, relnames); LWLockRelease(&aqo_state->lock); return true; } static uint32 -init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **relids) +init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **relnames) { int i; char *ptr = (char *) hdr + sizeof(dsm_block_hdr); @@ -225,22 +234,27 @@ init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **relids) ptr += sizeof(double) * data->cols; } } + memcpy(data->targets, ptr, sizeof(double) * hdr->rows); ptr += sizeof(double) * aqo_K; memcpy(data->rfactors, ptr, sizeof(double) * hdr->rows); ptr += sizeof(double) * aqo_K; - if (relids) + if (relnames) { - *relids = NIL; + *relnames = NIL; for (i = 0; i < hdr->nrelids; i++) { - *relids = lappend_int(*relids, *((int *)ptr)); - ptr += sizeof(int); + String *s = makeNode(String); + int len = strlen(ptr) + 1; + + s->sval = pstrdup(ptr); + *relnames = lappend(*relnames, s); + ptr += len; } } - return calculate_size(hdr->cols, hdr->nrelids); + return calculate_size(hdr->cols, *relnames); } void @@ -261,13 +275,13 @@ lc_flush_data(void) { dsm_block_hdr *hdr = (dsm_block_hdr *) ptr; OkNNrdata data; - List *relids; + List *relnames; uint32 delta = 0; - delta = init_with_dsm(&data, hdr, &relids); + delta = init_with_dsm(&data, hdr, &relnames); ptr += delta; size -= delta; - update_fss(hdr->key.fs, hdr->key.fss, &data, relids); + update_fss(hdr->key.fs, hdr->key.fss, &data, relnames); if (!hash_search(fss_htab, (void *) &hdr->key, HASH_REMOVE, NULL)) elog(ERROR, "[AQO] Flush: local ML cache is corrupted."); diff --git a/learn_cache.h b/learn_cache.h index 194f92c2..eccca22a 100644 --- a/learn_cache.h +++ b/learn_cache.h @@ -7,12 +7,10 @@ extern bool aqo_learn_statement_timeout; -extern bool lc_update_fss(uint64 fhash, int fsshash, OkNNrdata *data, - List *relids); -extern bool lc_has_fss(uint64 fhash, int fss); -extern bool lc_load_fss(uint64 fhash, int fsshash, OkNNrdata *data, - List **relids); -extern void lc_remove_fss(uint64 fhash, int fss_hash); +extern bool lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames); +extern bool lc_has_fss(uint64 fs, int fss); +extern bool lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **relnames); +extern void lc_remove_fss(uint64 fs, int fss); extern void lc_flush_data(void); extern void lc_assign_hook(bool newval, void *extra); diff --git a/path_utils.c b/path_utils.c index e5b4e0ad..089b6133 100644 --- a/path_utils.c +++ b/path_utils.c @@ -125,14 +125,14 @@ get_selectivities(PlannerInfo *root, /* * Transforms given relids from path optimization stage format to list of - * an absolute (independent on query optimization context) relids. + * an absolute (independent on query optimization context) relnames. */ List * -get_list_of_relids(PlannerInfo *root, Relids relids) +get_relnames(PlannerInfo *root, Relids relids) { - int i; - RangeTblEntry *entry; - List *l = NIL; + int i; + RangeTblEntry *rte; + List *l = NIL; if (relids == NULL) return NIL; @@ -146,9 +146,14 @@ get_list_of_relids(PlannerInfo *root, Relids relids) i = -1; while ((i = bms_next_member(relids, i)) >= 0) { - entry = planner_rt_fetch(i, root); - if (OidIsValid(entry->relid)) - l = lappend_int(l, entry->relid); + rte = planner_rt_fetch(i, root); + if (OidIsValid(rte->relid)) + { + String *s = makeNode(String); + + s->sval = pstrdup(rte->eref->aliasname); + l = lappend(l, s); + } } return l; } @@ -422,9 +427,9 @@ is_appropriate_path(Path *path) void aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) { - bool is_join_path; - Plan *plan = *dest; - AQOPlanNode *node; + bool is_join_path; + Plan *plan = *dest; + AQOPlanNode *node; if (prev_create_plan_hook) prev_create_plan_hook(root, src, dest); @@ -462,7 +467,7 @@ aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) (*dest)->lefttree->targetlist); /* Copy bare expressions for further AQO learning case. */ node->grouping_exprs = copyObject(groupExprs); - node->relids = get_list_of_relids(root, ap->subpath->parent->relids); + node->relids = get_relnames(root, ap->subpath->parent->relids); node->jointype = JOIN_INNER; } else if (is_appropriate_path(src)) @@ -474,7 +479,7 @@ aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) } node->relids = list_concat(node->relids, - get_list_of_relids(root, src->parent->relids)); + get_relnames(root, src->parent->relids)); if (src->parallel_workers > 0) node->parallel_divisor = get_parallel_divisor(src); @@ -644,7 +649,7 @@ aqo_store_upper_signature_hook(PlannerInfo *root, void *extra) { A_Const *fss_node = makeNode(A_Const); - List *relids; + List *relnames; List *clauses; List *selectivities; diff --git a/path_utils.h b/path_utils.h index 5ee4bba5..54ee181d 100644 --- a/path_utils.h +++ b/path_utils.h @@ -16,6 +16,7 @@ typedef struct AQOPlanNode ExtensibleNode node; bool had_path; List *relids; + List *temp_relnames; /* We store name of temporary table because OID by-default haven't sense at other backends. */ List *clauses; List *selectivities; @@ -47,7 +48,7 @@ extern List *get_selectivities(PlannerInfo *root, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo); -extern List *get_list_of_relids(PlannerInfo *root, Relids relids); +extern List *get_relnames(PlannerInfo *root, Relids relids); extern List *get_path_clauses(Path *path, PlannerInfo *root, diff --git a/postprocessing.c b/postprocessing.c index d2eee036..dd420bce 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -56,10 +56,10 @@ static char *PlanStateInfo = "PlanStateInfo"; /* Query execution statistics collecting utilities */ -static void atomic_fss_learn_step(uint64 fhash, int fss_hash, OkNNrdata *data, +static void atomic_fss_learn_step(uint64 fhash, int fss, OkNNrdata *data, double *features, double target, double rfactor, - List *relids, bool isTimedOut); + List *relnames, bool isTimedOut); static bool learnOnPlanState(PlanState *p, void *context); static void learn_agg_sample(aqo_obj_stat *ctx, List *relidslist, double learned, double rfactor, Plan *plan, @@ -90,8 +90,8 @@ static bool ExtractFromQueryEnv(QueryDesc *queryDesc); */ static void atomic_fss_learn_step(uint64 fs, int fss, OkNNrdata *data, - double *features, double target, double rfactor, - List *relids, bool isTimedOut) + double *features, double target, double rfactor, + List *relnames, bool isTimedOut) { LOCKTAG tag; @@ -102,13 +102,13 @@ atomic_fss_learn_step(uint64 fs, int fss, OkNNrdata *data, data->rows = 0; data->rows = OkNNr_learn(data, features, target, rfactor); - update_fss_ext(fs, fss, data, relids, isTimedOut); + update_fss_ext(fs, fss, data, relnames, isTimedOut); LockRelease(&tag, ExclusiveLock, false); } static void -learn_agg_sample(aqo_obj_stat *ctx, List *relidslist, +learn_agg_sample(aqo_obj_stat *ctx, List *relnames, double learned, double rfactor, Plan *plan, bool notExecuted) { AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); @@ -127,7 +127,7 @@ learn_agg_sample(aqo_obj_stat *ctx, List *relidslist, return; target = log(learned); - child_fss = get_fss_for_object(relidslist, ctx->clauselist, NIL, NULL, NULL); + child_fss = get_fss_for_object(relnames, ctx->clauselist, NIL, NULL, NULL); fss = get_grouped_exprs_hash(child_fss, aqo_node->grouping_exprs); memset(&data, 0, sizeof(OkNNrdata)); @@ -136,7 +136,7 @@ learn_agg_sample(aqo_obj_stat *ctx, List *relidslist, /* Critical section */ atomic_fss_learn_step(fhash, fss, &data, NULL, - target, rfactor, relidslist, ctx->isTimedOut); + target, rfactor, relnames, ctx->isTimedOut); /* End of critical section */ } @@ -145,7 +145,7 @@ learn_agg_sample(aqo_obj_stat *ctx, List *relidslist, * true cardinalities) performs learning procedure. */ static void -learn_sample(aqo_obj_stat *ctx, List *relidslist, +learn_sample(aqo_obj_stat *ctx, List *relnames, double learned, double rfactor, Plan *plan, bool notExecuted) { AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); @@ -158,8 +158,8 @@ learn_sample(aqo_obj_stat *ctx, List *relidslist, memset(&data, 0, sizeof(OkNNrdata)); target = log(learned); - fss = get_fss_for_object(relidslist, ctx->clauselist, - ctx->selectivities, &data.cols, &features); + fss = get_fss_for_object(relnames, ctx->clauselist, + ctx->selectivities, &data.cols, &features); /* Only Agg nodes can have non-empty a grouping expressions list. */ Assert(!IsA(plan, Agg) || aqo_node->grouping_exprs != NIL); @@ -177,7 +177,7 @@ learn_sample(aqo_obj_stat *ctx, List *relidslist, /* Critical section */ atomic_fss_learn_step(fs, fss, &data, features, target, rfactor, - relidslist, ctx->isTimedOut); + relnames, ctx->isTimedOut); /* End of critical section */ if (data.cols > 0) @@ -192,9 +192,7 @@ learn_sample(aqo_obj_stat *ctx, List *relidslist, * the same selectivities of clauses as were used at query optimization stage. */ List * -restore_selectivities(List *clauselist, - List *relidslist, - JoinType join_type, +restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, bool was_parametrized) { List *lst = NIL; diff --git a/sql/clean_aqo_data.sql b/sql/clean_aqo_data.sql index acd64b16..6f09d62f 100644 --- a/sql/clean_aqo_data.sql +++ b/sql/clean_aqo_data.sql @@ -5,7 +5,6 @@ DROP TABLE IF EXISTS a; DROP TABLE IF EXISTS b; CREATE TABLE a(); SELECT * FROM a; -SELECT 'a'::regclass::oid AS a_oid \gset SELECT clean_aqo_data(); /* @@ -15,15 +14,15 @@ SELECT clean_aqo_data(); * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat * should remain */ -SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); DROP TABLE a; SELECT clean_aqo_data(); @@ -35,17 +34,17 @@ SELECT clean_aqo_data(); * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat, * should be deleted */ -SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); CREATE TABLE a(); @@ -63,29 +62,29 @@ CREATE TABLE b(); SELECT * FROM a; SELECT * FROM b; SELECT * FROM b CROSS JOIN a; -SELECT 'a'::regclass::oid AS a_oid \gset -SELECT 'b'::regclass::oid AS b_oid \gset +-- SELECT 'a'::regclass::oid AS a_oid \gset +-- SELECT 'b'::regclass::oid AS b_oid \gset -- new lines added to aqo_data -SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); -SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'b' = ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids))); DROP TABLE a; SELECT clean_aqo_data(); @@ -96,48 +95,48 @@ SELECT clean_aqo_data(); * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_texts, * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_stat, */ -SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); -- lines corresponding to b_oid in all theese tables should remain -SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'b' = ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); DROP TABLE b; SELECT clean_aqo_data(); -- lines corresponding to b_oid in theese tables deleted -SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'b' = ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); DROP EXTENSION aqo; \ No newline at end of file diff --git a/storage.c b/storage.c index 44e060e3..060c2134 100644 --- a/storage.c +++ b/storage.c @@ -323,60 +323,63 @@ add_query_text(uint64 qhash, const char *query_string) static ArrayType * -form_oids_vector(List *relids) +form_strings_vector(List *relnames) { - Datum *oids; + Datum *rels; ArrayType *array; ListCell *lc; int i = 0; - if (relids == NIL) + if (relnames == NIL) return NULL; - oids = (Datum *) palloc(list_length(relids) * sizeof(Datum)); + rels = (Datum *) palloc(list_length(relnames) * sizeof(Datum)); - foreach(lc, relids) + foreach(lc, relnames) { - Oid relid = lfirst_oid(lc); + char *relname = (lfirst_node(String, lc))->sval; - oids[i++] = ObjectIdGetDatum(relid); + rels[i++] = CStringGetTextDatum(relname); } - Assert(i == list_length(relids)); - array = construct_array(oids, i, OIDOID, sizeof(Oid), true, TYPALIGN_INT); - pfree(oids); + array = construct_array(rels, i, TEXTOID, -1, false, TYPALIGN_INT); + pfree(rels); return array; } static List * -deform_oids_vector(Datum datum) +deform_strings_vector(Datum datum) { ArrayType *array = DatumGetArrayTypePCopy(PG_DETOAST_DATUM(datum)); Datum *values; int i; int nelems = 0; - List *relids = NIL; + List *relnames = NIL; - deconstruct_array(array, - OIDOID, sizeof(Oid), true, TYPALIGN_INT, + deconstruct_array(array, TEXTOID, -1, false, TYPALIGN_INT, &values, NULL, &nelems); for (i = 0; i < nelems; ++i) - relids = lappend_oid(relids, DatumGetObjectId(values[i])); + { + String *s = makeNode(String); + + s->sval = pstrdup(TextDatumGetCString(values[i])); + relnames = lappend(relnames, s); + } pfree(values); pfree(array); - return relids; + return relnames; } bool -load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **relids, bool isSafe) +load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **relnames, bool isSafe) { if (isSafe && (!aqo_learn_statement_timeout || !lc_has_fss(fs, fss))) - return load_fss(fs, fss, data, relids); + return load_fss(fs, fss, data, relnames); else { Assert(aqo_learn_statement_timeout); - return lc_load_fss(fs, fss, data, relids); + return lc_load_fss(fs, fss, data, relnames); } } @@ -395,7 +398,7 @@ load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **relids, bool isSafe) * objects in the given feature space */ bool -load_fss(uint64 fs, int fss, OkNNrdata *data, List **relids) +load_fss(uint64 fs, int fss, OkNNrdata *data, List **relnames) { Relation hrel; Relation irel; @@ -439,8 +442,8 @@ load_fss(uint64 fs, int fss, OkNNrdata *data, List **relids) deform_vector(values[4], data->targets, &(data->rows)); deform_vector(values[6], data->rfactors, &(data->rows)); - if (relids != NULL) - *relids = deform_oids_vector(values[5]); + if (relnames != NULL) + *relnames = deform_strings_vector(values[5]); } else elog(ERROR, "unexpected number of features for hash (" \ @@ -460,13 +463,13 @@ load_fss(uint64 fs, int fss, OkNNrdata *data, List **relids) } bool -update_fss_ext(uint64 fs, int fsshash, OkNNrdata *data, List *relids, +update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *relnames, bool isTimedOut) { if (!isTimedOut) - return update_fss(fs, fsshash, data, relids); + return update_fss(fs, fss, data, relnames); else - return lc_update_fss(fs, fsshash, data, relids); + return lc_update_fss(fs, fss, data, relnames); } /* @@ -482,7 +485,7 @@ update_fss_ext(uint64 fs, int fsshash, OkNNrdata *data, List *relids, * Caller guaranteed that no one AQO process insert or update this data row. */ bool -update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids) +update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames) { Relation hrel; Relation irel; @@ -514,9 +517,8 @@ update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids) tupDesc = RelationGetDescr(hrel); InitDirtySnapshot(snap); scan = index_beginscan(hrel, irel, &snap, 2, 0); - - ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(fhash)); - ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fsshash)); + ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(fs)); + ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fss)); index_rescan(scan, key, 2, NULL, 0); @@ -525,8 +527,8 @@ update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids) if (!find_ok) { - values[0] = Int64GetDatum(fhash); - values[1] = Int32GetDatum(fsshash); + values[0] = Int64GetDatum(fs); + values[1] = Int32GetDatum(fss); values[2] = Int32GetDatum(data->cols); if (data->cols > 0) @@ -537,7 +539,7 @@ update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids) values[4] = PointerGetDatum(form_vector(data->targets, data->rows)); /* Form array of relids. Only once. */ - values[5] = PointerGetDatum(form_oids_vector(relids)); + values[5] = PointerGetDatum(form_strings_vector(relnames)); if ((void *) values[5] == NULL) isnull[5] = true; values[6] = PointerGetDatum(form_vector(data->rfactors, data->rows)); @@ -550,7 +552,7 @@ update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids) */ simple_heap_insert(hrel, tuple); my_index_insert(irel, values, isnull, &(tuple->t_self), - hrel, UNIQUE_CHECK_YES); + hrel, UNIQUE_CHECK_YES); } else if (!TransactionIdIsValid(snap.xmin) && !TransactionIdIsValid(snap.xmax)) { @@ -570,8 +572,7 @@ update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids) &update_indexes)) { if (update_indexes) - my_index_insert(irel, values, isnull, - &(nw_tuple->t_self), + my_index_insert(irel, values, isnull, &(nw_tuple->t_self), hrel, UNIQUE_CHECK_YES); result = true; } @@ -581,9 +582,15 @@ update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids) * Ooops, somebody concurrently updated the tuple. It is possible * only in the case of changes made by third-party code. */ +<<<<<<< HEAD elog(ERROR, "AQO data piece ("UINT64_FORMAT" %d) concurrently" " updated by a stranger backend.", fhash, fsshash); +======= + elog(ERROR, "AQO data piece (%ld %d) concurrently updated" + " by a stranger backend.", + fs, fss); +>>>>>>> ecac693 (Move AQO from a relid based approach to a relation name based approach.) result = false; } } diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index d6d24458..c29174fe 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -210,10 +210,10 @@ # Number of rows in aqo_data: related to pgbench test and total value. my $pgb_fss_count = $node->safe_psql('postgres', " SELECT count(*) FROM aqo_data - WHERE $aoid = ANY(oids) OR - $boid = ANY(oids) OR - $toid = ANY(oids) OR - $hoid = ANY(oids) + WHERE $aoid ::regclass::text = ANY(oids) OR + $boid ::regclass::text = ANY(oids) OR + $toid ::regclass::text = ANY(oids) OR + $hoid ::regclass::text = ANY(oids) "); $fss_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_data;"); @@ -223,10 +223,10 @@ WHERE fspace_hash IN ( SELECT fspace_hash FROM aqo_data WHERE - $aoid = ANY(oids) OR - $boid = ANY(oids) OR - $toid = ANY(oids) OR - $hoid = ANY(oids) + $aoid ::regclass::text = ANY(oids) OR + $boid ::regclass::text = ANY(oids) OR + $toid ::regclass::text = ANY(oids) OR + $hoid ::regclass::text = ANY(oids) ) "); $fs_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_queries;"); @@ -236,7 +236,11 @@ SELECT count(*) FROM aqo_query_texts WHERE query_hash IN ( SELECT fspace_hash FROM aqo_data - WHERE $aoid = ANY(oids) OR $boid = ANY(oids) OR $toid = ANY(oids) OR $hoid = ANY(oids) + WHERE + $aoid ::regclass::text = ANY(oids) OR + $boid ::regclass::text = ANY(oids) OR + $toid ::regclass::text = ANY(oids) OR + $hoid ::regclass::text = ANY(oids) ) "); $fs_samples_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_texts;"); @@ -246,7 +250,11 @@ SELECT count(*) FROM aqo_query_texts WHERE query_hash IN ( SELECT fspace_hash FROM aqo_data - WHERE $aoid = ANY(oids) OR $boid = ANY(oids) OR $toid = ANY(oids) OR $hoid = ANY(oids) + WHERE + $aoid ::regclass::text = ANY(oids) OR + $boid ::regclass::text = ANY(oids) OR + $toid ::regclass::text = ANY(oids) OR + $hoid ::regclass::text = ANY(oids) ) "); $stat_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_stat;"); diff --git a/utils.c b/utils.c index 8fc0d186..3fda40d6 100644 --- a/utils.c +++ b/utils.c @@ -24,6 +24,22 @@ static int (*argsort_value_cmp) (const void *, const void *); static int argsort_cmp(const void *a, const void *b); +/* + * qsort comparator functions + */ + +/* int64 comparator for pg_qsort. */ +int +int64_compare(const void *va, const void *vb) +{ + int64 a = *((const int64 *) va); + int64 b = *((const int64 *) vb); + + if (a == b) + return 0; + return (a > b) ? 1 : -1; +} + /* * Function for qsorting an integer arrays */ From 3ba92a2a4b7864f46833eef945fa073c3601e81b Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Tue, 12 Apr 2022 11:02:57 +0500 Subject: [PATCH 071/203] Bugfix. Detach DSM segment earlier, before cleaning of memory context. Bugfix. Small mistake during calculation of DSM segment size. --- aqo_shared.c | 7 +++---- learn_cache.c | 7 +++++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/aqo_shared.c b/aqo_shared.c index 5d4edb6f..84e6eadb 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -50,7 +50,7 @@ get_dsm_all(uint32 *size) seg = dsm_attach(aqo_state->dsm_handler); Assert(seg); dsm_pin_mapping(seg); - on_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); + before_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); } hdr = (dsm_seg_hdr *) dsm_segment_address(seg); @@ -102,7 +102,7 @@ get_cache_address(void) /* Another process created the segment yet. Just attach to. */ seg = dsm_attach(aqo_state->dsm_handler); dsm_pin_mapping(seg); - on_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); + before_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); } hdr = (dsm_seg_hdr *) dsm_segment_address(seg); @@ -118,7 +118,7 @@ get_cache_address(void) dsm_pin_mapping(seg); dsm_pin_segment(seg); aqo_state->dsm_handler = dsm_segment_handle(seg); - on_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); + before_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); hdr = (dsm_seg_hdr *) dsm_segment_address(seg); hdr->magic = AQO_SHARED_MAGIC; @@ -189,7 +189,6 @@ aqo_init_shmem(void) HASH_ELEM | HASH_BLOBS); LWLockRelease(AddinShmemInitLock); - LWLockRegisterTranche(aqo_state->lock.tranche, "aqo"); } diff --git a/learn_cache.c b/learn_cache.c index f2bbeca5..35cfd57a 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -252,9 +252,11 @@ init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **relnames) *relnames = lappend(*relnames, s); ptr += len; } + return calculate_size(hdr->cols, *relnames); } - return calculate_size(hdr->cols, *relnames); + /* It is just read operation. No any interest in size calculation. */ + return 0; } void @@ -275,10 +277,11 @@ lc_flush_data(void) { dsm_block_hdr *hdr = (dsm_block_hdr *) ptr; OkNNrdata data; - List *relnames; + List *relnames = NIL; uint32 delta = 0; delta = init_with_dsm(&data, hdr, &relnames); + Assert(delta > 0); ptr += delta; size -= delta; update_fss(hdr->key.fs, hdr->key.fss, &data, relnames); From ff05962d6560642064cf14868682bf4374558700 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 20 Apr 2022 11:28:19 +0500 Subject: [PATCH 072/203] Add the show_cardinality_errors routine. Add into AQO SQL interface one more function for an quick check of cardinality errors of last execution of each controlled query. --- aqo--1.3--1.4.sql | 29 ++++++++++++++++++++++++ expected/gucs.out | 7 ++++++ expected/unsupported.out | 49 ++++++++++++++++++++++++++++++++++++++++ sql/gucs.sql | 3 +++ sql/unsupported.sql | 16 +++++++++++++ 5 files changed, 104 insertions(+) diff --git a/aqo--1.3--1.4.sql b/aqo--1.3--1.4.sql index 517a6911..16891d34 100755 --- a/aqo--1.3--1.4.sql +++ b/aqo--1.3--1.4.sql @@ -4,3 +4,32 @@ \echo Use "ALTER EXTENSION aqo UPDATE TO '1.4'" to load this file. \quit ALTER TABLE public.aqo_data ADD COLUMN reliability double precision []; + +-- +-- Get IDs of queries having the largest cardinality error when last executed. +-- num - sequental number. Smaller number corresponds to higher error. +-- qhash - ID of a query. +-- error - AQO error calculated over plan nodes of the query. +-- +CREATE OR REPLACE FUNCTION public.show_cardinality_errors() +RETURNS TABLE(num bigint, id bigint, error float) +AS $$ +BEGIN + RETURN QUERY + SELECT + row_number() OVER (ORDER BY (cerror, qhash) DESC) AS nn, + qhash, cerror + FROM ( + SELECT + aq.query_hash AS qhash, + cardinality_error_with_aqo[array_length(cardinality_error_with_aqo, 1)] AS cerror + FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs + ON aq.query_hash = aqs.query_hash + WHERE TRUE = ANY (SELECT unnest(cardinality_error_with_aqo) IS NOT NULL) + ) AS q1 + ORDER BY nn ASC; +END; +$$ LANGUAGE plpgsql; + +COMMENT ON FUNCTION public.show_cardinality_errors() IS +'Get cardinality error of last query execution. Return queries having the largest error.'; diff --git a/expected/gucs.out b/expected/gucs.out index 6a28de78..095ea9f1 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -28,4 +28,11 @@ EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) JOINS: 0 (6 rows) +-- Check existence of the interface functions. +SELECT obj_description('public.show_cardinality_errors'::regproc::oid); + obj_description +----------------------------------------------------------------------------------------- + Get cardinality error of last query execution. Return queries having the largest error. +(1 row) + DROP EXTENSION aqo; diff --git a/expected/unsupported.out b/expected/unsupported.out index 30de424d..fc01998c 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -538,5 +538,54 @@ EXPLAIN (COSTS OFF) JOINS: 0 (9 rows) +-- XXX: Do we stuck into an unstable behavior of an error value? +-- Live with this variant of the test for some time. +SELECT + num, + to_char(error, '9.99EEEE')::text AS error +FROM public.show_cardinality_errors() +WHERE error > 0.; + num | error +-----+----------- + 1 | 9.69e+02 + 2 | 1.15e+02 + 3 | 3.00e+01 + 4 | 3.00e+01 + 5 | 3.00e+01 + 6 | 1.33e+00 +(6 rows) + DROP TABLE t,t1 CASCADE; +SELECT public.clean_aqo_data(); +NOTICE: Cleaning aqo_data records + clean_aqo_data +---------------- + +(1 row) + +-- TODO: figure out with remaining queries in the ML storage. +SELECT num, to_char(error, '9.99EEEE')::text AS error, query_text +FROM public.show_cardinality_errors() cef, aqo_query_texts aqt +WHERE aqt.query_hash = cef.id; + num | error | query_text +-----+-----------+------------------------------------------------------------------------------------------- + 1 | 9.69e+02 | SELECT str FROM expln(' + + | | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | | SELECT count(*) FROM + + | | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + + | | JOIN + + | | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2+ + | | ON q1.x = q2.x+1; + + | | ') AS str WHERE str NOT LIKE '%Memory Usage%'; + 2 | 3.27e+02 | SELECT + + | | num, + + | | to_char(error, '9.99EEEE')::text AS error + + | | FROM public.show_cardinality_errors() + + | | WHERE error > 0.; + 3 | 0.00e+00 | CREATE TABLE t AS SELECT (gs.* / 50) AS x FROM generate_series(1,1000) AS gs; + 4 | 0.00e+00 | SELECT public.clean_aqo_data(); + 5 | 0.00e+00 | CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y + + | | FROM generate_series(1,1000) AS gs; +(5 rows) + DROP EXTENSION aqo; diff --git a/sql/gucs.sql b/sql/gucs.sql index c8cc8f36..a5c999a4 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -11,4 +11,7 @@ EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT x FROM t; +-- Check existence of the interface functions. +SELECT obj_description('public.show_cardinality_errors'::regproc::oid); + DROP EXTENSION aqo; diff --git a/sql/unsupported.sql b/sql/unsupported.sql index 472ea5d9..e87fda31 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -154,5 +154,21 @@ ANALYZE t; EXPLAIN (COSTS OFF) SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; +-- XXX: Do we stuck into an unstable behavior of an error value? +-- Live with this variant of the test for some time. +SELECT + num, + to_char(error, '9.99EEEE')::text AS error +FROM public.show_cardinality_errors() +WHERE error > 0.; + DROP TABLE t,t1 CASCADE; + +SELECT public.clean_aqo_data(); + +-- TODO: figure out with remaining queries in the ML storage. +SELECT num, to_char(error, '9.99EEEE')::text AS error, query_text +FROM public.show_cardinality_errors() cef, aqo_query_texts aqt +WHERE aqt.query_hash = cef.id; + DROP EXTENSION aqo; From a149a7af0c2f5e5e5e707c7b61112ca7ebfa90da Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Tue, 19 Apr 2022 16:46:46 +0500 Subject: [PATCH 073/203] Bugfixes: 1. Increase stability of the pgbench test. 2. Open subsidiary AQO relations more carefully. --- storage.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/storage.c b/storage.c index 060c2134..4b05bf11 100644 --- a/storage.c +++ b/storage.c @@ -582,15 +582,9 @@ update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames) * Ooops, somebody concurrently updated the tuple. It is possible * only in the case of changes made by third-party code. */ -<<<<<<< HEAD elog(ERROR, "AQO data piece ("UINT64_FORMAT" %d) concurrently" " updated by a stranger backend.", fhash, fsshash); -======= - elog(ERROR, "AQO data piece (%ld %d) concurrently updated" - " by a stranger backend.", - fs, fss); ->>>>>>> ecac693 (Move AQO from a relid based approach to a relation name based approach.) result = false; } } From 27e47723cb79774ac0d8392416bc47a28df6ee94 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 4 May 2022 09:39:23 +0500 Subject: [PATCH 074/203] Reconcile backpatched (PG 15 -> 13) features with the code of PG13. --- aqo.c | 2 +- cardinality_estimation.c | 4 ++-- cardinality_hooks.c | 10 ++-------- expected/unsupported.out | 7 ++++--- hash.c | 6 +++--- learn_cache.c | 8 +++----- path_utils.c | 11 +++-------- sql/unsupported.sql | 3 ++- storage.c | 13 ++++++++----- 9 files changed, 28 insertions(+), 36 deletions(-) diff --git a/aqo.c b/aqo.c index a0b2dccf..3e0210e8 100644 --- a/aqo.c +++ b/aqo.c @@ -247,7 +247,7 @@ _PG_init(void) RegisterResourceReleaseCallback(aqo_free_callback, NULL); RegisterAQOPlanNodeMethods(); - MarkGUCPrefixReserved("aqo"); + EmitWarningsOnPlaceholders("aqo"); RequestAddinShmemSpace(aqo_memsize()); } diff --git a/cardinality_estimation.c b/cardinality_estimation.c index f5202f22..7740528a 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -45,8 +45,8 @@ predict_debug_output(List *clauses, List *selectivities, appendStringInfoString(&debug_str, "}, relnames: { "); foreach(lc, relnames) { - String *relname = lfirst_node(String, lc); - appendStringInfo(&debug_str, "%s ", relname->sval); + Value *relname = lfirst_node(String, lc); + appendStringInfo(&debug_str, "%s ", valStr(relname)); } appendStringInfo(&debug_str, "}, result: %lf", result); diff --git a/cardinality_hooks.c b/cardinality_hooks.c index 00290029..1da20880 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -164,12 +164,9 @@ aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) rte = planner_rt_fetch(rel->relid, root); if (rte && OidIsValid(rte->relid)) { - String *s = makeNode(String); - /* Predict for a plane table. */ Assert(rte->eref && rte->eref->aliasname); - s->sval = pstrdup(rte->eref->aliasname); - relnames = list_make1(s); + relnames = list_make1(makeString(pstrdup(rte->eref->aliasname))); } clauses = aqo_get_clauses(root, rel->baserestrictinfo); @@ -270,12 +267,9 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, if (rte && OidIsValid(rte->relid)) { - String *s = makeNode(String); - /* Predict for a plane table. */ Assert(rte->eref && rte->eref->aliasname); - s->sval = pstrdup(rte->eref->aliasname); - relnames = list_make1(s); + relnames = list_make1(makeString(pstrdup(rte->eref->aliasname))); } predicted = predict_for_relation(allclauses, selectivities, relnames, &fss); diff --git a/expected/unsupported.out b/expected/unsupported.out index fc01998c..9b5e67f7 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -566,7 +566,8 @@ NOTICE: Cleaning aqo_data records -- TODO: figure out with remaining queries in the ML storage. SELECT num, to_char(error, '9.99EEEE')::text AS error, query_text FROM public.show_cardinality_errors() cef, aqo_query_texts aqt -WHERE aqt.query_hash = cef.id; +WHERE aqt.query_hash = cef.id +ORDER BY (error, md5(query_text)) DESC; num | error | query_text -----+-----------+------------------------------------------------------------------------------------------- 1 | 9.69e+02 | SELECT str FROM expln(' + @@ -582,9 +583,9 @@ WHERE aqt.query_hash = cef.id; | | to_char(error, '9.99EEEE')::text AS error + | | FROM public.show_cardinality_errors() + | | WHERE error > 0.; - 3 | 0.00e+00 | CREATE TABLE t AS SELECT (gs.* / 50) AS x FROM generate_series(1,1000) AS gs; + 5 | 0.00e+00 | CREATE TABLE t AS SELECT (gs.* / 50) AS x FROM generate_series(1,1000) AS gs; 4 | 0.00e+00 | SELECT public.clean_aqo_data(); - 5 | 0.00e+00 | CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y + + 3 | 0.00e+00 | CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y + | | FROM generate_series(1,1000) AS gs; (5 rows) diff --git a/hash.c b/hash.c index d8083fce..1f8f8112 100644 --- a/hash.c +++ b/hash.c @@ -449,11 +449,11 @@ get_relations_hash(List *relnames) /* generate array of hashes. */ foreach(lc, relnames) { - String *relname = lfirst_node(String, lc); + Value *relname = (Value *) lfirst(lc); hashes[i++] = DatumGetInt64(hash_any_extended( - (unsigned char *) relname->sval, - strlen(relname->sval), 0)); + (unsigned char *) strVal(relname), + strlen(strVal(relname)), 0)); } /* Sort the array to make query insensitive to input order of relations. */ diff --git a/learn_cache.c b/learn_cache.c index 35cfd57a..316968b0 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -56,7 +56,7 @@ calculate_size(int cols, List *relnames) /* Calculate memory size needed to store relation names */ foreach(lc, relnames) { - size += strlen(lfirst_node(String, lc)->sval) + 1; + size += strlen(strVal(lfirst(lc))) + 1; } return size; @@ -134,7 +134,7 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames) /* store strings of relation names. Each string ends with 0-byte */ foreach(lc, relnames) { - char *relname = lfirst_node(String, lc)->sval; + char *relname = strVal(lfirst(lc)); int len = strlen(relname) + 1; memcpy(ptr, relname, len); @@ -245,11 +245,9 @@ init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **relnames) *relnames = NIL; for (i = 0; i < hdr->nrelids; i++) { - String *s = makeNode(String); int len = strlen(ptr) + 1; - s->sval = pstrdup(ptr); - *relnames = lappend(*relnames, s); + *relnames = lappend(*relnames, makeString(pstrdup(ptr))); ptr += len; } return calculate_size(hdr->cols, *relnames); diff --git a/path_utils.c b/path_utils.c index 089b6133..78d0512d 100644 --- a/path_utils.c +++ b/path_utils.c @@ -148,12 +148,7 @@ get_relnames(PlannerInfo *root, Relids relids) { rte = planner_rt_fetch(i, root); if (OidIsValid(rte->relid)) - { - String *s = makeNode(String); - - s->sval = pstrdup(rte->eref->aliasname); - l = lappend(l, s); - } + l = lappend(l, makeString(pstrdup(rte->eref->aliasname))); } return l; } @@ -666,9 +661,9 @@ aqo_store_upper_signature_hook(PlannerInfo *root, set_cheapest(input_rel); clauses = get_path_clauses(input_rel->cheapest_total_path, root, &selectivities); - relids = get_list_of_relids(root, input_rel->relids); + relnames = get_relnames(root, input_rel->relids); fss_node->val.type = T_Integer; fss_node->location = -1; - fss_node->val.val.ival = get_fss_for_object(relids, clauses, NIL, NULL, NULL); + fss_node->val.val.ival = get_fss_for_object(relnames, clauses, NIL, NULL, NULL); output_rel->ext_nodes = lappend(output_rel->ext_nodes, (void *) fss_node); } diff --git a/sql/unsupported.sql b/sql/unsupported.sql index e87fda31..b1fb6a1e 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -169,6 +169,7 @@ SELECT public.clean_aqo_data(); -- TODO: figure out with remaining queries in the ML storage. SELECT num, to_char(error, '9.99EEEE')::text AS error, query_text FROM public.show_cardinality_errors() cef, aqo_query_texts aqt -WHERE aqt.query_hash = cef.id; +WHERE aqt.query_hash = cef.id +ORDER BY (error, md5(query_text)) DESC; DROP EXTENSION aqo; diff --git a/storage.c b/storage.c index 4b05bf11..7a03a840 100644 --- a/storage.c +++ b/storage.c @@ -17,6 +17,9 @@ #include "postgres.h" +#include "nodes/value.h" +#include "postgres.h" + #include "access/heapam.h" #include "access/table.h" #include "access/tableam.h" @@ -337,7 +340,7 @@ form_strings_vector(List *relnames) foreach(lc, relnames) { - char *relname = (lfirst_node(String, lc))->sval; + char *relname = strVal(lfirst(lc)); rels[i++] = CStringGetTextDatum(relname); } @@ -360,9 +363,9 @@ deform_strings_vector(Datum datum) &values, NULL, &nelems); for (i = 0; i < nelems; ++i) { - String *s = makeNode(String); + Value *s; - s->sval = pstrdup(TextDatumGetCString(values[i])); + s = makeString(pstrdup(TextDatumGetCString(values[i]))); relnames = lappend(relnames, s); } @@ -449,7 +452,7 @@ load_fss(uint64 fs, int fss, OkNNrdata *data, List **relnames) elog(ERROR, "unexpected number of features for hash (" \ UINT64_FORMAT", %d):\ expected %d features, obtained %d", - fs, fss, ncols, DatumGetInt32(values[2])); + fs, fss, data->cols, DatumGetInt32(values[2])); } else success = false; @@ -584,7 +587,7 @@ update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames) */ elog(ERROR, "AQO data piece ("UINT64_FORMAT" %d) concurrently" " updated by a stranger backend.", - fhash, fsshash); + fs, fss); result = false; } } From 6402dd33a6d42607732908b20b59f125cb0bcdef Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Thu, 21 Apr 2022 07:19:22 +0500 Subject: [PATCH 075/203] Fix some problems found during underwent of the AQO by Join-Order-Benchmark: 1. Minor code improvements 2. Introduce the show_cardinality_errors(bool) routine that can show cardinality errors detected by the AQO that made during last execution under or without AQO control. 3. Ignore queries that don't touch any database relations. --- aqo--1.3--1.4.sql | 67 ++++++++++++++++++++++++---------- expected/gucs.out | 6 ++-- expected/top_queries.out | 56 +++++++++++++++++++---------- expected/unsupported.out | 77 ++++++++++++++++++++++------------------ preprocessing.c | 14 ++++++-- sql/top_queries.sql | 29 +++++++++------ sql/unsupported.sql | 12 +++---- t/001_pgbench.pl | 14 ++++---- 8 files changed, 174 insertions(+), 101 deletions(-) diff --git a/aqo--1.3--1.4.sql b/aqo--1.3--1.4.sql index 16891d34..f6df0263 100755 --- a/aqo--1.3--1.4.sql +++ b/aqo--1.3--1.4.sql @@ -5,31 +5,60 @@ ALTER TABLE public.aqo_data ADD COLUMN reliability double precision []; +DROP FUNCTION public.top_error_queries(int); + +-- +-- Get cardinality error of queries the last time they were executed. +-- IN: +-- controlled - show queries executed under a control of AQO (true); +-- executed without an AQO control, but AQO has a stat on the query (false). -- --- Get IDs of queries having the largest cardinality error when last executed. +-- OUT: -- num - sequental number. Smaller number corresponds to higher error. --- qhash - ID of a query. --- error - AQO error calculated over plan nodes of the query. +-- id - ID of a query. +-- fshash - feature space. Usually equal to zero or ID. +-- error - AQO error that calculated on plan nodes of the query. +-- nexecs - number of executions of queries associated with this ID. -- -CREATE OR REPLACE FUNCTION public.show_cardinality_errors() -RETURNS TABLE(num bigint, id bigint, error float) +CREATE OR REPLACE FUNCTION public.show_cardinality_errors(controlled boolean) +RETURNS TABLE(num bigint, id bigint, fshash bigint, error float, nexecs bigint) AS $$ BEGIN +IF (controlled) THEN + RETURN QUERY + SELECT + row_number() OVER (ORDER BY (cerror, query_id, fs_hash) DESC) AS nn, + query_id, fs_hash, cerror, execs + FROM ( + SELECT + aq.query_hash AS query_id, + aq.fspace_hash AS fs_hash, + cardinality_error_with_aqo[array_length(cardinality_error_with_aqo, 1)] AS cerror, + executions_with_aqo AS execs + FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs + ON aq.query_hash = aqs.query_hash + WHERE TRUE = ANY (SELECT unnest(cardinality_error_with_aqo) IS NOT NULL) + ) AS q1 + ORDER BY nn ASC; +ELSE RETURN QUERY - SELECT - row_number() OVER (ORDER BY (cerror, qhash) DESC) AS nn, - qhash, cerror - FROM ( - SELECT - aq.query_hash AS qhash, - cardinality_error_with_aqo[array_length(cardinality_error_with_aqo, 1)] AS cerror - FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs - ON aq.query_hash = aqs.query_hash - WHERE TRUE = ANY (SELECT unnest(cardinality_error_with_aqo) IS NOT NULL) - ) AS q1 - ORDER BY nn ASC; + SELECT + row_number() OVER (ORDER BY (cerror, query_id, fs_hash) DESC) AS nn, + query_id, fs_hash, cerror, execs + FROM ( + SELECT + aq.query_hash AS query_id, + aq.fspace_hash AS fs_hash, + array_avg(cardinality_error_without_aqo) AS cerror, + executions_without_aqo AS execs + FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs + ON aq.query_hash = aqs.query_hash + WHERE TRUE = ANY (SELECT unnest(cardinality_error_without_aqo) IS NOT NULL) + ) AS q1 + ORDER BY (nn) ASC; +END IF; END; $$ LANGUAGE plpgsql; -COMMENT ON FUNCTION public.show_cardinality_errors() IS -'Get cardinality error of last query execution. Return queries having the largest error.'; +COMMENT ON FUNCTION public.show_cardinality_errors(boolean) IS +'Get cardinality error of queries the last time they were executed. Order queries according to an error value.'; diff --git a/expected/gucs.out b/expected/gucs.out index 095ea9f1..1a036f64 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -30,9 +30,9 @@ EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) -- Check existence of the interface functions. SELECT obj_description('public.show_cardinality_errors'::regproc::oid); - obj_description ------------------------------------------------------------------------------------------ - Get cardinality error of last query execution. Return queries having the largest error. + obj_description +--------------------------------------------------------------------------------------------------------------- + Get cardinality error of queries the last time they were executed. Order queries according to an error value. (1 row) DROP EXTENSION aqo; diff --git a/expected/top_queries.out b/expected/top_queries.out index ebf6d21b..36df518f 100644 --- a/expected/top_queries.out +++ b/expected/top_queries.out @@ -2,23 +2,31 @@ CREATE EXTENSION aqo; SET aqo.mode = 'disabled'; SET aqo.force_collect_stat = 'on'; -- --- num of generate_series(1,1000000) query should be the first +-- Dummy test. CREATE TABLE shouldn't find in the ML storage. But a simple +-- select must be in. Also here we test on gathering a stat on temp and plain +-- relations. -- -SELECT count(*) FROM generate_series(1,1000000); - count ---------- - 1000000 +CREATE TEMP TABLE ttt AS SELECT count(*) AS cnt FROM generate_series(1,10); +CREATE TABLE ttp AS SELECT count(*) AS cnt FROM generate_series(1,10); +SELECT count(*) AS cnt FROM ttt WHERE cnt % 100 = 0; + cnt +----- + 0 +(1 row) + +SELECT count(*) AS cnt FROM ttp WHERE cnt % 100 = 0; + cnt +----- + 0 (1 row) -SELECT num FROM top_time_queries(10) AS tt WHERE - tt.fspace_hash = (SELECT fspace_hash FROM aqo_queries WHERE - aqo_queries.query_hash = (SELECT aqo_query_texts.query_hash FROM aqo_query_texts - WHERE query_text = 'SELECT count(*) FROM generate_series(1,1000000);')); -NOTICE: Top 10 execution time queries +SELECT num FROM top_time_queries(3); +NOTICE: Top 3 execution time queries num ----- 1 -(1 row) + 2 +(2 rows) -- -- num of query uses table t2 should be bigger than num of query uses table t1 and be the first @@ -39,13 +47,23 @@ SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y 31 (1 row) -SELECT num FROM top_error_queries(10) AS te WHERE - te.fspace_hash = (SELECT fspace_hash FROM aqo_queries WHERE - aqo_queries.query_hash = (SELECT aqo_query_texts.query_hash FROM aqo_query_texts - WHERE query_text = 'SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1;')); -NOTICE: Top 10 cardinality error queries - num ------ - 1 +SELECT num, to_char(error, '9.99EEEE') FROM show_cardinality_errors(false) AS te +WHERE te.fshash = ( + SELECT fspace_hash FROM aqo_queries + WHERE aqo_queries.query_hash = ( + SELECT aqo_query_texts.query_hash FROM aqo_query_texts + WHERE query_text = 'SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1;' + ) +); + num | to_char +-----+----------- + 1 | 1.94e+00 +(1 row) + +-- Should return zero +SELECT count(*) FROM show_cardinality_errors(true); + count +------- + 0 (1 row) diff --git a/expected/unsupported.out b/expected/unsupported.out index 9b5e67f7..b716e11f 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -541,19 +541,43 @@ EXPLAIN (COSTS OFF) -- XXX: Do we stuck into an unstable behavior of an error value? -- Live with this variant of the test for some time. SELECT - num, - to_char(error, '9.99EEEE')::text AS error -FROM public.show_cardinality_errors() -WHERE error > 0.; - num | error ------+----------- - 1 | 9.69e+02 - 2 | 1.15e+02 - 3 | 3.00e+01 - 4 | 3.00e+01 - 5 | 3.00e+01 - 6 | 1.33e+00 -(6 rows) + num, to_char(error, '9.99EEEE')::text AS error, query_text +FROM public.show_cardinality_errors(true) cef, aqo_query_texts aqt +WHERE aqt.query_hash = cef.id +ORDER BY (error, md5(query_text)) DESC; + num | error | query_text +-----+-----------+------------------------------------------------------------------------------------------------ + 1 | 1.15e+02 | SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + 3 | 3.00e+01 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; + 4 | 3.00e+01 | SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; + 2 | 3.00e+01 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,x*y)) AS q1; + 5 | 1.33e+00 | SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; + 11 | 0.00e+00 | SELECT * FROM + + | | (SELECT * FROM t WHERE x < 0) AS t0 + + | | JOIN + + | | (SELECT * FROM t WHERE x > 20) AS t1 + + | | USING(x); + 10 | 0.00e+00 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + 12 | 0.00e+00 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | | SELECT count(*) FROM t WHERE + + | | x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR + + | | x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); + 8 | 0.00e+00 | SELECT count(*) FROM ( + + | | SELECT count(*) AS x FROM ( + + | | SELECT count(*) FROM t1 GROUP BY (x,y) + + | | ) AS q1 + + | | ) AS q2 + + | | WHERE q2.x > 1; + 9 | 0.00e+00 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t WHERE x = 1); + 6 | 0.00e+00 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | | SELECT count(*) FROM t WHERE (SELECT avg(x) FROM t t0 WHERE t0.x = t.x) = + + | | (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + 7 | 0.00e+00 | SELECT count(*) FROM + + | | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + + | | JOIN + + | | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2 + + | | ON q1.x = q2.x+1; +(12 rows) DROP TABLE t,t1 CASCADE; SELECT public.clean_aqo_data(); @@ -563,30 +587,13 @@ NOTICE: Cleaning aqo_data records (1 row) --- TODO: figure out with remaining queries in the ML storage. +-- Look for any remaining queries in the ML storage. SELECT num, to_char(error, '9.99EEEE')::text AS error, query_text -FROM public.show_cardinality_errors() cef, aqo_query_texts aqt +FROM public.show_cardinality_errors(true) cef, aqo_query_texts aqt WHERE aqt.query_hash = cef.id ORDER BY (error, md5(query_text)) DESC; - num | error | query_text ------+-----------+------------------------------------------------------------------------------------------- - 1 | 9.69e+02 | SELECT str FROM expln(' + - | | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + - | | SELECT count(*) FROM + - | | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + - | | JOIN + - | | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2+ - | | ON q1.x = q2.x+1; + - | | ') AS str WHERE str NOT LIKE '%Memory Usage%'; - 2 | 3.27e+02 | SELECT + - | | num, + - | | to_char(error, '9.99EEEE')::text AS error + - | | FROM public.show_cardinality_errors() + - | | WHERE error > 0.; - 5 | 0.00e+00 | CREATE TABLE t AS SELECT (gs.* / 50) AS x FROM generate_series(1,1000) AS gs; - 4 | 0.00e+00 | SELECT public.clean_aqo_data(); - 3 | 0.00e+00 | CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y + - | | FROM generate_series(1,1000) AS gs; -(5 rows) + num | error | query_text +-----+-------+------------ +(0 rows) DROP EXTENSION aqo; diff --git a/preprocessing.c b/preprocessing.c index a09a584c..0d376d7b 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -368,12 +368,19 @@ disable_aqo_for_query(void) /* * Examine a fully-parsed query, and return TRUE iff any relation underlying - * the query is a system relation. + * the query is a system relation or no one relation touched by the query. */ static bool isQueryUsingSystemRelation(Query *query) { - return isQueryUsingSystemRelation_walker((Node *) query, NULL); + bool trivQuery = true; + bool result; + + result = isQueryUsingSystemRelation_walker((Node *) query, &trivQuery); + + if (result || trivQuery) + return true; + return false; } @@ -412,10 +419,13 @@ isQueryUsingSystemRelation_walker(Node *node, void *context) Relation rel = table_open(rte->relid, AccessShareLock); bool is_catalog = IsCatalogRelation(rel); bool is_aqo_rel = IsAQORelation(rel); + bool *trivQuery = (bool *) context; table_close(rel, AccessShareLock); if (is_catalog || is_aqo_rel) return true; + + *trivQuery = false; } else if (rte->rtekind == RTE_FUNCTION) { diff --git a/sql/top_queries.sql b/sql/top_queries.sql index bfacdd38..da04e682 100755 --- a/sql/top_queries.sql +++ b/sql/top_queries.sql @@ -3,13 +3,15 @@ SET aqo.mode = 'disabled'; SET aqo.force_collect_stat = 'on'; -- --- num of generate_series(1,1000000) query should be the first +-- Dummy test. CREATE TABLE shouldn't find in the ML storage. But a simple +-- select must be in. Also here we test on gathering a stat on temp and plain +-- relations. -- -SELECT count(*) FROM generate_series(1,1000000); -SELECT num FROM top_time_queries(10) AS tt WHERE - tt.fspace_hash = (SELECT fspace_hash FROM aqo_queries WHERE - aqo_queries.query_hash = (SELECT aqo_query_texts.query_hash FROM aqo_query_texts - WHERE query_text = 'SELECT count(*) FROM generate_series(1,1000000);')); +CREATE TEMP TABLE ttt AS SELECT count(*) AS cnt FROM generate_series(1,10); +CREATE TABLE ttp AS SELECT count(*) AS cnt FROM generate_series(1,10); +SELECT count(*) AS cnt FROM ttt WHERE cnt % 100 = 0; +SELECT count(*) AS cnt FROM ttp WHERE cnt % 100 = 0; +SELECT num FROM top_time_queries(3); -- -- num of query uses table t2 should be bigger than num of query uses table t1 and be the first @@ -21,7 +23,14 @@ CREATE TABLE t2 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; -SELECT num FROM top_error_queries(10) AS te WHERE - te.fspace_hash = (SELECT fspace_hash FROM aqo_queries WHERE - aqo_queries.query_hash = (SELECT aqo_query_texts.query_hash FROM aqo_query_texts - WHERE query_text = 'SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1;')); \ No newline at end of file +SELECT num, to_char(error, '9.99EEEE') FROM show_cardinality_errors(false) AS te +WHERE te.fshash = ( + SELECT fspace_hash FROM aqo_queries + WHERE aqo_queries.query_hash = ( + SELECT aqo_query_texts.query_hash FROM aqo_query_texts + WHERE query_text = 'SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1;' + ) +); + +-- Should return zero +SELECT count(*) FROM show_cardinality_errors(true); diff --git a/sql/unsupported.sql b/sql/unsupported.sql index b1fb6a1e..059f5c64 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -157,18 +157,18 @@ EXPLAIN (COSTS OFF) -- XXX: Do we stuck into an unstable behavior of an error value? -- Live with this variant of the test for some time. SELECT - num, - to_char(error, '9.99EEEE')::text AS error -FROM public.show_cardinality_errors() -WHERE error > 0.; + num, to_char(error, '9.99EEEE')::text AS error, query_text +FROM public.show_cardinality_errors(true) cef, aqo_query_texts aqt +WHERE aqt.query_hash = cef.id +ORDER BY (error, md5(query_text)) DESC; DROP TABLE t,t1 CASCADE; SELECT public.clean_aqo_data(); --- TODO: figure out with remaining queries in the ML storage. +-- Look for any remaining queries in the ML storage. SELECT num, to_char(error, '9.99EEEE')::text AS error, query_text -FROM public.show_cardinality_errors() cef, aqo_query_texts aqt +FROM public.show_cardinality_errors(true) cef, aqo_query_texts aqt WHERE aqt.query_hash = cef.id ORDER BY (error, md5(query_text)) DESC; diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index c29174fe..bc486184 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -133,24 +133,24 @@ 'analytical queries in pgbench (disabled mode)'); $res = $node->safe_psql('postgres', - "SELECT count(*) FROM top_error_queries(10) v - JOIN aqo_query_texts t ON (t.query_hash = v.fspace_hash) + "SELECT count(*) FROM show_cardinality_errors(false) v + JOIN aqo_query_texts t ON (t.query_hash = v.id) WHERE v.error > 0. AND t.query_text LIKE '%pgbench_accounts%'"); is($res, 3); $res = $node->safe_psql('postgres', - "SELECT * FROM top_error_queries(10) v - JOIN aqo_query_texts t ON (t.query_hash = v.fspace_hash) + "SELECT * FROM show_cardinality_errors(false) v + JOIN aqo_query_texts t ON (t.query_hash = v.id) WHERE v.error > 0. AND t.query_text LIKE '%pgbench_accounts%'"); note("\n TopN: \n $res \n"); $res = $node->safe_psql('postgres', - "SELECT v.error, t.query_text FROM top_error_queries(10) v - JOIN aqo_query_texts t ON (t.query_hash = v.fspace_hash) + "SELECT v.error, t.query_text FROM show_cardinality_errors(false) v + JOIN aqo_query_texts t ON (t.query_hash = v.id) WHERE v.error > 0."); note("\n Queries: \n $res \n"); $res = $node->safe_psql('postgres', "SELECT count(*) FROM top_time_queries(10) v WHERE v.execution_time > 0."); -is($res, 5); +is($res, 3); # ############################################################################## # From ac615aa52739e6b9e49d81354b7ac62c6240203b Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Wed, 4 May 2022 23:18:26 +0500 Subject: [PATCH 076/203] Another attempt to resolve contradictory between oid-based and relname-based approaches to organize ML storage base. In this patch we store list of oids of persistent tables for each record in the aqo_data table to have a possibility of cleaning records which depends on removed tables. On the other hand, we use relnames (tupDesc hash for TEMP tables) to form a kind of signature of a table. This signature is used for a feature subspace generation. --- Makefile | 5 +- aqo--1.4--1.5.sql | 75 +++++++++++++ aqo.control | 2 +- aqo.h | 12 +-- cardinality_estimation.c | 16 +-- cardinality_hooks.c | 60 +++++++---- expected/aqo_learn.out | 177 +++++++++++++++++++++--------- expected/clean_aqo_data.out | 66 ++++++------ expected/statement_timeout.out | 1 + expected/temp_tables.out | 189 +++++++++++++++++++++++++++++++++ expected/top_queries.out | 5 +- expected/unsupported.out | 78 +++++++------- hash.c | 41 +++---- hash.h | 2 +- learn_cache.c | 52 ++++----- learn_cache.h | 4 +- path_utils.c | 141 ++++++++++++++++++------ path_utils.h | 27 +++-- postprocessing.c | 32 +++--- preprocessing.c | 10 +- sql/aqo_learn.sql | 67 +++++++++--- sql/clean_aqo_data.sql | 61 +++++------ sql/statement_timeout.sql | 2 +- sql/temp_tables.sql | 95 +++++++++++++++++ sql/top_queries.sql | 5 +- sql/unsupported.sql | 9 +- storage.c | 77 ++++++++++---- t/001_pgbench.pl | 32 +++--- 28 files changed, 967 insertions(+), 376 deletions(-) create mode 100644 aqo--1.4--1.5.sql create mode 100644 expected/temp_tables.out create mode 100644 sql/temp_tables.sql diff --git a/Makefile b/Makefile index 0a03ac48..5beaba9a 100755 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # contrib/aqo/Makefile EXTENSION = aqo -EXTVERSION = 1.4 +EXTVERSION = 1.5 PGFILEDESC = "AQO - Adaptive Query Optimization" MODULE_big = aqo OBJS = aqo.o auto_tuning.o cardinality_estimation.o cardinality_hooks.o \ @@ -24,6 +24,7 @@ REGRESS = aqo_disabled \ clean_aqo_data \ plancache \ statement_timeout \ + temp_tables \ top_queries fdw_srcdir = $(top_srcdir)/contrib/postgres_fdw @@ -33,7 +34,7 @@ EXTRA_REGRESS_OPTS=--temp-config=$(top_srcdir)/$(subdir)/conf.add EXTRA_INSTALL = contrib/postgres_fdw contrib/pg_stat_statements DATA = aqo--1.0.sql aqo--1.0--1.1.sql aqo--1.1--1.2.sql aqo--1.2.sql \ - aqo--1.2--1.3.sql aqo--1.3--1.4.sql + aqo--1.2--1.3.sql aqo--1.3--1.4.sql aqo--1.4--1.5.sql ifdef USE_PGXS PG_CONFIG ?= pg_config diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql new file mode 100644 index 00000000..b0d97594 --- /dev/null +++ b/aqo--1.4--1.5.sql @@ -0,0 +1,75 @@ +/* contrib/aqo/aqo--1.4--1.5.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION aqo UPDATE TO '1.5'" to load this file. \quit + +-- +-- Re-create the aqo_data table. Do so to keep the columns order. +-- +DROP TABLE public.aqo_data CASCADE; +CREATE TABLE public.aqo_data ( + fspace_hash bigint NOT NULL REFERENCES public.aqo_queries ON DELETE CASCADE, + fsspace_hash int NOT NULL, + nfeatures int NOT NULL, + features double precision[][], + targets double precision[], + oids oid [] DEFAULT NULL, + reliability double precision [] +); +CREATE UNIQUE INDEX aqo_fss_access_idx ON public.aqo_data (fspace_hash, fsspace_hash); + + +-- +-- Remove rows from the AQO ML knowledge base, related to previously dropped +-- tables of the database. +-- +CREATE OR REPLACE FUNCTION public.clean_aqo_data() RETURNS void AS $$ +DECLARE + aqo_data_row aqo_data%ROWTYPE; + aqo_queries_row aqo_queries%ROWTYPE; + aqo_query_texts_row aqo_query_texts%ROWTYPE; + aqo_query_stat_row aqo_query_stat%ROWTYPE; + oid_var oid; + fspace_hash_var bigint; + delete_row boolean DEFAULT false; +BEGIN + FOR aqo_data_row IN (SELECT * FROM aqo_data) + LOOP + delete_row = false; + SELECT aqo_data_row.fspace_hash INTO fspace_hash_var FROM aqo_data; + + IF (aqo_data_row.oids IS NOT NULL) THEN + FOREACH oid_var IN ARRAY aqo_data_row.oids + LOOP + IF NOT EXISTS (SELECT relname FROM pg_class WHERE oid = oid_var) THEN + delete_row = true; + END IF; + END LOOP; + END IF; + + FOR aqo_queries_row IN (SELECT * FROM public.aqo_queries) + LOOP + IF (delete_row = true AND fspace_hash_var <> 0 AND + fspace_hash_var = aqo_queries_row.fspace_hash AND + aqo_queries_row.fspace_hash = aqo_queries_row.query_hash) THEN + DELETE FROM aqo_data WHERE aqo_data = aqo_data_row; + DELETE FROM aqo_queries WHERE aqo_queries = aqo_queries_row; + + FOR aqo_query_texts_row IN (SELECT * FROM aqo_query_texts) + LOOP + DELETE FROM aqo_query_texts + WHERE aqo_query_texts_row.query_hash = fspace_hash_var AND + aqo_query_texts = aqo_query_texts_row; + END LOOP; + + FOR aqo_query_stat_row IN (SELECT * FROM aqo_query_stat) + LOOP + DELETE FROM aqo_query_stat + WHERE aqo_query_stat_row.query_hash = fspace_hash_var AND + aqo_query_stat = aqo_query_stat_row; + END LOOP; + END IF; + END LOOP; + END LOOP; +END; +$$ LANGUAGE plpgsql; \ No newline at end of file diff --git a/aqo.control b/aqo.control index dfdd815d..9c6c65b3 100644 --- a/aqo.control +++ b/aqo.control @@ -1,5 +1,5 @@ # AQO extension comment = 'machine learning for cardinality estimation in optimizer' -default_version = '1.4' +default_version = '1.5' module_pathname = '$libdir/aqo' relocatable = false diff --git a/aqo.h b/aqo.h index b43e01a9..92db265b 100644 --- a/aqo.h +++ b/aqo.h @@ -281,12 +281,12 @@ extern bool find_query(uint64 qhash, QueryContextData *ctx); extern bool update_query(uint64 qhash, uint64 fhash, bool learn_aqo, bool use_aqo, bool auto_tuning); extern bool add_query_text(uint64 query_hash, const char *query_string); -extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **relnames, +extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool isSafe); -extern bool load_fss(uint64 fs, int fss, OkNNrdata *data, List **relnames); +extern bool load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids); extern bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, - List *relnames, bool isTimedOut); -extern bool update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames); + List *reloids, bool isTimedOut); +extern bool update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids); QueryStat *get_aqo_stat(uint64 query_hash); void update_aqo_stat(uint64 query_hash, QueryStat * stat); extern bool my_index_insert(Relation indexRelation, Datum *values, bool *isnull, @@ -306,8 +306,8 @@ extern void print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, extern void print_node_explain(ExplainState *es, PlanState *ps, Plan *plan); /* Cardinality estimation */ -double predict_for_relation(List *restrict_clauses, List *selectivities, - List *relnames, int *fss); +extern double predict_for_relation(List *restrict_clauses, List *selectivities, + List *relsigns, int *fss); /* Query execution statistics collecting hooks */ void aqo_ExecutorStart(QueryDesc *queryDesc, int eflags); diff --git a/cardinality_estimation.c b/cardinality_estimation.c index 7740528a..48630754 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -26,7 +26,7 @@ #ifdef AQO_DEBUG_PRINT static void predict_debug_output(List *clauses, List *selectivities, - List *relnames, int fss, double result) + List *reloids, int fss, double result) { StringInfoData debug_str; ListCell *lc; @@ -42,8 +42,8 @@ predict_debug_output(List *clauses, List *selectivities, appendStringInfo(&debug_str, "%lf ", *s); } - appendStringInfoString(&debug_str, "}, relnames: { "); - foreach(lc, relnames) + appendStringInfoString(&debug_str, "}, reloids: { "); + foreach(lc, reloids) { Value *relname = lfirst_node(String, lc); appendStringInfo(&debug_str, "%s ", valStr(relname)); @@ -59,22 +59,22 @@ predict_debug_output(List *clauses, List *selectivities, * General method for prediction the cardinality of given relation. */ double -predict_for_relation(List *clauses, List *selectivities, - List *relnames, int *fss) +predict_for_relation(List *clauses, List *selectivities, List *relsigns, + int *fss) { double *features; double result; int i; OkNNrdata data; - if (relnames == NIL) + if (relsigns == NIL) /* * Don't make prediction for query plans without any underlying plane * tables. Use return value -4 for debug purposes. */ return -4.; - *fss = get_fss_for_object(relnames, clauses, selectivities, + *fss = get_fss_for_object(relsigns, clauses, selectivities, &data.cols, &features); if (data.cols > 0) @@ -94,7 +94,7 @@ predict_for_relation(List *clauses, List *selectivities, result = -1; } #ifdef AQO_DEBUG_PRINT - predict_debug_output(clauses, selectivities, relnames, *fss, result); + predict_debug_output(clauses, selectivities, relsigns, *fss, result); #endif pfree(features); if (data.cols > 0) diff --git a/cardinality_hooks.c b/cardinality_hooks.c index 1da20880..64d0fe14 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -138,12 +138,12 @@ default_estimate_num_groups(PlannerInfo *root, List *groupExprs, void aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) { - double predicted; - RangeTblEntry *rte; - List *relnames = NIL; - List *selectivities = NULL; - List *clauses; - int fss = 0; + double predicted; + RangeTblEntry *rte; + RelSortOut rels = {NIL, NIL}; + List *selectivities = NULL; + List *clauses; + int fss = 0; if (IsQueryDisabled()) /* Fast path. */ @@ -166,16 +166,18 @@ aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) { /* Predict for a plane table. */ Assert(rte->eref && rte->eref->aliasname); - relnames = list_make1(makeString(pstrdup(rte->eref->aliasname))); + get_list_of_relids(root, rel->relids, &rels); } clauses = aqo_get_clauses(root, rel->baserestrictinfo); - predicted = predict_for_relation(clauses, selectivities, relnames, &fss); + predicted = predict_for_relation(clauses, selectivities, rels.signatures, + &fss); rel->fss_hash = fss; + list_free(rels.hrels); + list_free(rels.signatures); list_free_deep(selectivities); list_free(clauses); - list_free(relnames); if (predicted >= 0) { @@ -212,7 +214,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, { double predicted; RangeTblEntry *rte = NULL; - List *relnames = NIL; + RelSortOut rels = {NIL, NIL}; List *allclauses = NULL; List *selectivities = NULL; ListCell *l; @@ -269,10 +271,12 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, { /* Predict for a plane table. */ Assert(rte->eref && rte->eref->aliasname); - relnames = list_make1(makeString(pstrdup(rte->eref->aliasname))); + get_list_of_relids(root, rel->relids, &rels); } - predicted = predict_for_relation(allclauses, selectivities, relnames, &fss); + predicted = predict_for_relation(allclauses, selectivities, rels.signatures, &fss); + list_free(rels.hrels); + list_free(rels.signatures); predicted_ppi_rows = predicted; fss_ppi_hash = fss; @@ -297,7 +301,7 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, List *restrictlist) { double predicted; - List *relnames; + RelSortOut rels = {NIL, NIL}; List *outer_clauses; List *inner_clauses; List *allclauses; @@ -323,7 +327,7 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, goto default_estimator; } - relnames = get_relnames(root, rel->relids); + get_list_of_relids(root, rel->relids, &rels); outer_clauses = get_path_clauses(outer_rel->cheapest_total_path, root, &outer_selectivities); inner_clauses = get_path_clauses(inner_rel->cheapest_total_path, root, @@ -334,7 +338,11 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, list_concat(outer_selectivities, inner_selectivities)); - predicted = predict_for_relation(allclauses, selectivities, relnames, &fss); + predicted = predict_for_relation(allclauses, selectivities, rels.signatures, + &fss); + list_free(rels.hrels); + list_free(rels.signatures); + rel->fss_hash = fss; if (predicted >= 0) @@ -365,7 +373,7 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, List *clauses) { double predicted; - List *relnames; + RelSortOut rels = {NIL, NIL}; List *outer_clauses; List *inner_clauses; List *allclauses; @@ -391,7 +399,7 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, goto default_estimator; } - relnames = get_relnames(root, rel->relids); + get_list_of_relids(root, rel->relids, &rels); outer_clauses = get_path_clauses(outer_path, root, &outer_selectivities); inner_clauses = get_path_clauses(inner_path, root, &inner_selectivities); allclauses = list_concat(aqo_get_clauses(root, clauses), @@ -400,7 +408,10 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, list_concat(outer_selectivities, inner_selectivities)); - predicted = predict_for_relation(allclauses, selectivities, relnames, &fss); + predicted = predict_for_relation(allclauses, selectivities, rels.signatures, + &fss); + list_free(rels.hrels); + list_free(rels.signatures); predicted_ppi_rows = predicted; fss_ppi_hash = fss; @@ -427,13 +438,16 @@ predict_num_groups(PlannerInfo *root, Path *subpath, List *group_exprs, child_fss = subpath->parent->fss_hash; else { - List *relnames; - List *clauses; - List *selectivities = NIL; + RelSortOut rels = {NIL, NIL}; + List *clauses; + List *selectivities = NIL; - relnames = get_relnames(root, subpath->parent->relids); + get_list_of_relids(root, subpath->parent->relids, &rels); clauses = get_path_clauses(subpath, root, &selectivities); - (void) predict_for_relation(clauses, selectivities, relnames, &child_fss); + (void) predict_for_relation(clauses, selectivities, rels.signatures, + &child_fss); + list_free(rels.hrels); + list_free(rels.signatures); } *fss = get_grouped_exprs_hash(child_fss, group_exprs); diff --git a/expected/aqo_learn.out b/expected/aqo_learn.out index 9e6c21ee..088a5c60 100644 --- a/expected/aqo_learn.out +++ b/expected/aqo_learn.out @@ -1,3 +1,23 @@ +-- The function just copied from stats_ext.sql +create function check_estimated_rows(text) returns table (estimated int, actual int) +language plpgsql as +$$ +declare + ln text; + tmp text[]; + first_row bool := true; +begin + for ln in + execute format('explain analyze %s', $1) + loop + if first_row then + first_row := false; + tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)'); + return query select tmp[1]::int, tmp[2]::int; + end if; + end loop; +end; +$$; CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -214,6 +234,82 @@ SELECT count(*) FROM tmp1; 17 (1 row) +-- Remove data on some unneeded instances of tmp1 table. +SELECT public.clean_aqo_data(); + clean_aqo_data +---------------- + +(1 row) + +-- Result of the query below should be empty +SELECT * FROM aqo_query_texts aqt1, aqo_query_texts aqt2 +WHERE aqt1.query_text = aqt2.query_text AND aqt1.query_hash <> aqt2.query_hash; + query_hash | query_text | query_hash | query_text +------------+------------+------------+------------ +(0 rows) + +-- Fix the state of the AQO data +SELECT reliability,nfeatures,query_text +FROM aqo_data ad, aqo_query_texts aqt +WHERE aqt.query_hash = ad.fspace_hash +ORDER BY (md5(query_text)) +; + reliability | nfeatures | query_text +-------------+-----------+---------------------------------------------------------------------------------------- + {1} | 1 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test1 AS t1, aqo_test0 AS t2 + + | | WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; + {1} | 5 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test1 AS t1, aqo_test0 AS t2 + + | | WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; + {1} | 4 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test1 AS t1, aqo_test0 AS t2 + + | | WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; + {1,1,1} | 4 | EXPLAIN SELECT * FROM aqo_test0 + + | | WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; + {1} | 0 | SELECT count(*) FROM tmp1; + {1} | 0 | SELECT count(*) FROM tmp1; + {1} | 6 | EXPlAIN SELECT t1.a, t2.b, t3.c + + | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + + | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; + {1} | 1 | EXPlAIN SELECT t1.a, t2.b, t3.c + + | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + + | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; + {1} | 2 | EXPlAIN SELECT t1.a, t2.b, t3.c + + | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + + | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; + {1} | 3 | EXPlAIN SELECT t1.a, t2.b, t3.c + + | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + + | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; + {1} | 2 | EXPlAIN SELECT t1.a, t2.b, t3.c + + | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + + | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; + {1} | 2 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 + + | | WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; + {1} | 2 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 + + | | WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; + {1} | 4 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 + + | | WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; + {1} | 2 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c + + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + + | | WHERE t1.a = t2.b AND t2.a = t3.b; + {1} | 1 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c + + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + + | | WHERE t1.a = t2.b AND t2.a = t3.b; + {1} | 0 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c + + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + + | | WHERE t1.a = t2.b AND t2.a = t3.b; + {1} | 3 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + + | | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; + {1} | 0 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + + | | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; + {1} | 1 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + + | | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; + {1} | 2 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + + | | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; +(21 rows) + DROP TABLE tmp1; SET aqo.mode = 'controlled'; UPDATE aqo_queries SET learn_aqo = false, use_aqo = false, auto_tuning = false; @@ -268,7 +364,7 @@ WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; Filter: ((b < 1) AND (c < 1) AND (d < 1)) (6 rows) -EXPlAIN SELECT t1.a, t2.b, t3.c +EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; QUERY PLAN @@ -285,21 +381,15 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) -EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c -FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 -WHERE t1.a = t2.b AND t2.a = t3.b; - QUERY PLAN -------------------------------------------------------------------------------- - Hash Join (cost=2.90..4.65 rows=20 width=12) - Hash Cond: (t2.a = t3.b) - -> Hash Join (cost=1.45..2.92 rows=20 width=8) - Hash Cond: (t1.a = t2.b) - -> Seq Scan on aqo_test1 t1 (cost=0.00..1.20 rows=20 width=4) - -> Hash (cost=1.20..1.20 rows=20 width=8) - -> Seq Scan on aqo_test1 t2 (cost=0.00..1.20 rows=20 width=8) - -> Hash (cost=1.20..1.20 rows=20 width=8) - -> Seq Scan on aqo_test1 t3 (cost=0.00..1.20 rows=20 width=8) -(9 rows) +SELECT * FROM check_estimated_rows(' + SELECT t1.a AS a, t2.a AS b, t3.a AS c + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + WHERE t1.a = t2.b AND t2.a = t3.b; +'); + estimated | actual +-----------+-------- + 20 | 18 +(1 row) EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 @@ -373,7 +463,7 @@ WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; Filter: ((b < 1) AND (c < 1) AND (d < 1)) (6 rows) -EXPlAIN SELECT t1.a, t2.b, t3.c +EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; QUERY PLAN @@ -390,44 +480,29 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) -EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c -FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 -WHERE t1.a = t2.b AND t2.a = t3.b; - QUERY PLAN -------------------------------------------------------------------------------- - Hash Join (cost=2.90..4.64 rows=18 width=12) - Hash Cond: (t2.a = t3.b) - -> Hash Join (cost=1.45..2.92 rows=19 width=8) - Hash Cond: (t1.a = t2.b) - -> Seq Scan on aqo_test1 t1 (cost=0.00..1.20 rows=20 width=4) - -> Hash (cost=1.20..1.20 rows=20 width=8) - -> Seq Scan on aqo_test1 t2 (cost=0.00..1.20 rows=20 width=8) - -> Hash (cost=1.20..1.20 rows=20 width=8) - -> Seq Scan on aqo_test1 t3 (cost=0.00..1.20 rows=20 width=8) -(9 rows) +SELECT * FROM check_estimated_rows(' + SELECT t1.a AS a, t2.a AS b, t3.a AS c + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + WHERE t1.a = t2.b AND t2.a = t3.b; +'); + estimated | actual +-----------+-------- + 18 | 18 +(1 row) -EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d -FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 -WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; - QUERY PLAN -------------------------------------------------------------------------------------- - Hash Join (cost=4.35..6.33 rows=17 width=16) - Hash Cond: (t3.a = t4.b) - -> Hash Join (cost=2.90..4.64 rows=18 width=12) - Hash Cond: (t2.a = t3.b) - -> Hash Join (cost=1.45..2.92 rows=19 width=8) - Hash Cond: (t1.a = t2.b) - -> Seq Scan on aqo_test1 t1 (cost=0.00..1.20 rows=20 width=4) - -> Hash (cost=1.20..1.20 rows=20 width=8) - -> Seq Scan on aqo_test1 t2 (cost=0.00..1.20 rows=20 width=8) - -> Hash (cost=1.20..1.20 rows=20 width=8) - -> Seq Scan on aqo_test1 t3 (cost=0.00..1.20 rows=20 width=8) - -> Hash (cost=1.20..1.20 rows=20 width=8) - -> Seq Scan on aqo_test1 t4 (cost=0.00..1.20 rows=20 width=8) -(13 rows) +SELECT * FROM check_estimated_rows(' + SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; +'); + estimated | actual +-----------+-------- + 17 | 17 +(1 row) DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; DROP EXTENSION aqo; +DROP FUNCTION check_estimated_rows; diff --git a/expected/clean_aqo_data.out b/expected/clean_aqo_data.out index 94551d7d..acee95bd 100644 --- a/expected/clean_aqo_data.out +++ b/expected/clean_aqo_data.out @@ -9,8 +9,8 @@ SELECT * FROM a; -- (0 rows) +SELECT 'a'::regclass::oid AS a_oid \gset SELECT clean_aqo_data(); -NOTICE: Cleaning aqo_data records clean_aqo_data ---------------- @@ -23,14 +23,14 @@ NOTICE: Cleaning aqo_data records * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat * should remain */ -SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); count ------- 1 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); count ------- 1 @@ -38,7 +38,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- 1 @@ -46,7 +46,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- 1 @@ -54,7 +54,6 @@ SELECT count(*) FROM aqo_query_stat WHERE DROP TABLE a; SELECT clean_aqo_data(); -NOTICE: Cleaning aqo_data records clean_aqo_data ---------------- @@ -67,14 +66,14 @@ NOTICE: Cleaning aqo_data records * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat, * should be deleted */ -SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); count ------- 0 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; count ------- @@ -83,7 +82,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -92,7 +91,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -109,7 +108,6 @@ SELECT 'a'::regclass::oid AS a_oid \gset INSERT INTO aqo_queries VALUES (:a_oid + 1, 't', 't', :a_oid, 'f'); DROP TABLE a; SELECT clean_aqo_data(); -NOTICE: Cleaning aqo_data records clean_aqo_data ---------------- @@ -136,17 +134,17 @@ SELECT * FROM b CROSS JOIN a; -- (0 rows) --- SELECT 'a'::regclass::oid AS a_oid \gset --- SELECT 'b'::regclass::oid AS b_oid \gset +SELECT 'a'::regclass::oid AS a_oid \gset +SELECT 'b'::regclass::oid AS b_oid \gset -- new lines added to aqo_data -SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); count ------- 3 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); count ------- 2 @@ -154,7 +152,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- 2 @@ -162,20 +160,20 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- 2 (1 row) -SELECT count(*) FROM aqo_data WHERE 'b' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); count ------- 3 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)); count ------- 2 @@ -183,7 +181,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); count ------- 2 @@ -191,7 +189,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); count ------- 2 @@ -199,7 +197,6 @@ SELECT count(*) FROM aqo_query_stat WHERE DROP TABLE a; SELECT clean_aqo_data(); -NOTICE: Cleaning aqo_data records clean_aqo_data ---------------- @@ -211,14 +208,14 @@ NOTICE: Cleaning aqo_data records * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_texts, * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_stat, */ -SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); count ------- 0 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; count ------- @@ -227,7 +224,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -236,7 +233,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -244,14 +241,14 @@ SELECT count(*) FROM aqo_query_stat WHERE (1 row) -- lines corresponding to b_oid in all theese tables should remain -SELECT count(*) FROM aqo_data WHERE 'b' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); count ------- 1 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; count ------- @@ -260,7 +257,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -269,7 +266,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -278,21 +275,20 @@ SELECT count(*) FROM aqo_query_stat WHERE DROP TABLE b; SELECT clean_aqo_data(); -NOTICE: Cleaning aqo_data records clean_aqo_data ---------------- (1 row) -- lines corresponding to b_oid in theese tables deleted -SELECT count(*) FROM aqo_data WHERE 'b' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); count ------- 0 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; count ------- @@ -301,7 +297,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -310,7 +306,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- diff --git a/expected/statement_timeout.out b/expected/statement_timeout.out index 9d91de22..c8c9f50c 100644 --- a/expected/statement_timeout.out +++ b/expected/statement_timeout.out @@ -107,3 +107,4 @@ SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); DROP TABLE t; DROP EXTENSION aqo; +DROP FUNCTION check_estimated_rows; diff --git a/expected/temp_tables.out b/expected/temp_tables.out new file mode 100644 index 00000000..daf2602f --- /dev/null +++ b/expected/temp_tables.out @@ -0,0 +1,189 @@ +CREATE EXTENSION aqo; +SET aqo.mode = 'learn'; +CREATE TEMP TABLE tt(); +CREATE TABLE pt(); +-- Ignore queries with the only temp tables +SELECT count(*) FROM tt; + count +------- + 0 +(1 row) + +SELECT count(*) FROM tt AS t1, tt AS t2; + count +------- + 0 +(1 row) + +SELECT * FROM aqo_data; + fspace_hash | fsspace_hash | nfeatures | features | targets | oids | reliability +-------------+--------------+-----------+----------+---------+------+------------- +(0 rows) + +-- Should be stored in the ML base +SELECT count(*) FROM pt; + count +------- + 0 +(1 row) + +SELECT count(*) FROM pt, tt; + count +------- + 0 +(1 row) + +SELECT count(*) FROM pt AS pt1, tt AS tt1, tt AS tt2, pt AS pt2; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_data; + count +------- + 10 +(1 row) + +DROP TABLE tt; +SELECT clean_aqo_data(); + clean_aqo_data +---------------- + +(1 row) + +SELECT count(*) FROM aqo_data; -- Should be the same as above + count +------- + 10 +(1 row) + +DROP TABLE pt; +SELECT clean_aqo_data(); + clean_aqo_data +---------------- + +(1 row) + +SELECT count(*) FROM aqo_data; -- Should be 0 + count +------- + 0 +(1 row) + +SELECT query_text FROM aqo_queries aq LEFT JOIN aqo_query_texts aqt +ON aq.query_hash = aqt.query_hash +; -- TODO: should contain just one row + query_text +------------------------------------------ + COMMON feature space (do not delete!) + SELECT count(*) FROM tt; + SELECT count(*) FROM tt AS t1, tt AS t2; +(3 rows) + +-- Test learning on temporary table +CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; +CREATE TEMP TABLE tt AS SELECT -x AS x, (x % 7) AS y FROM generate_series(1,100) AS x; +CREATE TEMP TABLE ttd AS -- the same structure as tt + SELECT -(x*3) AS x, (x % 9) AS y FROM generate_series(1,100) AS x; +ANALYZE pt,tt,ttd; +create function check_estimated_rows(text) returns table (estimated int, actual int) +language plpgsql as +$$ +declare + ln text; + tmp text[]; + first_row bool := true; +begin + for ln in + execute format('explain analyze %s', $1) + loop + if first_row then + first_row := false; + tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)'); + return query select tmp[1]::int, tmp[2]::int; + end if; + end loop; +end; +$$; +-- Check: AQO learns on queries with temp tables +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Estimation failed. Learn. + estimated | actual +-----------+-------- + 100 | 0 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Should use AQO estimation + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,ttd WHERE pt.x = ttd.x GROUP BY (pt.x); +'); -- TODO: Should use AQO estimation with another temp table of the same structure + estimated | actual +-----------+-------- + 100 | 0 +(1 row) + +SET aqo.mode = 'forced'; -- Now we use all fss records for each query +DROP TABLE pt; +SELECT clean_aqo_data(); + clean_aqo_data +---------------- + +(1 row) + +CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; +CREATE TEMP TABLE ttd1 AS + SELECT -(x*3) AS x, (x % 9) AS y1 FROM generate_series(1,100) AS x; +ANALYZE; +-- Check: use AQO knowledge with different temp table of the same structure +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Estimation failed. Learn. + estimated | actual +-----------+-------- + 100 | 0 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Should use AQO estimation + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,ttd WHERE pt.x = ttd.x GROUP BY (pt.x); +'); -- Should use AQO estimation with another temp table of the same structure + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT pt1.x, avg(pt1.y) FROM pt AS pt1,ttd WHERE pt1.x = ttd.x GROUP BY (pt1.x); +'); -- Alias doesn't influence feature space + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,ttd1 WHERE pt.x = ttd1.x GROUP BY (pt.x); +'); -- Don't use AQO for temp table because of different attname + estimated | actual +-----------+-------- + 100 | 0 +(1 row) + +DROP TABLE pt CASCADE; +DROP EXTENSION aqo; +DROP FUNCTION check_estimated_rows; diff --git a/expected/top_queries.out b/expected/top_queries.out index 36df518f..19c57543 100644 --- a/expected/top_queries.out +++ b/expected/top_queries.out @@ -2,9 +2,10 @@ CREATE EXTENSION aqo; SET aqo.mode = 'disabled'; SET aqo.force_collect_stat = 'on'; -- --- Dummy test. CREATE TABLE shouldn't find in the ML storage. But a simple --- select must be in. Also here we test on gathering a stat on temp and plain +-- Dummy test. CREATE TABLE shouldn't be found in the ML storage. But a simple +-- select must recorded. Also here we test on gathering a stat on temp and plain -- relations. +-- XXX: Right now we ignore queries if no one permanent table is touched. -- CREATE TEMP TABLE ttt AS SELECT count(*) AS cnt FROM generate_series(1,10); CREATE TABLE ttp AS SELECT count(*) AS cnt FROM generate_series(1,10); diff --git a/expected/unsupported.out b/expected/unsupported.out index b716e11f..56015cb7 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -540,60 +540,58 @@ EXPLAIN (COSTS OFF) -- XXX: Do we stuck into an unstable behavior of an error value? -- Live with this variant of the test for some time. -SELECT - num, to_char(error, '9.99EEEE')::text AS error, query_text +SELECT to_char(error, '9.99EEEE')::text AS error, query_text FROM public.show_cardinality_errors(true) cef, aqo_query_texts aqt WHERE aqt.query_hash = cef.id -ORDER BY (error, md5(query_text)) DESC; - num | error | query_text ------+-----------+------------------------------------------------------------------------------------------------ - 1 | 1.15e+02 | SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; - 3 | 3.00e+01 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; - 4 | 3.00e+01 | SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; - 2 | 3.00e+01 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,x*y)) AS q1; - 5 | 1.33e+00 | SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; - 11 | 0.00e+00 | SELECT * FROM + - | | (SELECT * FROM t WHERE x < 0) AS t0 + - | | JOIN + - | | (SELECT * FROM t WHERE x > 20) AS t1 + - | | USING(x); - 10 | 0.00e+00 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); - 12 | 0.00e+00 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + - | | SELECT count(*) FROM t WHERE + - | | x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR + - | | x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); - 8 | 0.00e+00 | SELECT count(*) FROM ( + - | | SELECT count(*) AS x FROM ( + - | | SELECT count(*) FROM t1 GROUP BY (x,y) + - | | ) AS q1 + - | | ) AS q2 + - | | WHERE q2.x > 1; - 9 | 0.00e+00 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t WHERE x = 1); - 6 | 0.00e+00 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + - | | SELECT count(*) FROM t WHERE (SELECT avg(x) FROM t t0 WHERE t0.x = t.x) = + - | | (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); - 7 | 0.00e+00 | SELECT count(*) FROM + - | | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + - | | JOIN + - | | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2 + - | | ON q1.x = q2.x+1; +ORDER BY (md5(query_text),error) DESC; + error | query_text +-----------+------------------------------------------------------------------------------------------------ + 3.00e+01 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; + 1.33e+00 | SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; + 1.15e+02 | SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + 0.00e+00 | SELECT * FROM + + | (SELECT * FROM t WHERE x < 0) AS t0 + + | JOIN + + | (SELECT * FROM t WHERE x > 20) AS t1 + + | USING(x); + 0.00e+00 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + 0.00e+00 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT count(*) FROM t WHERE + + | x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR + + | x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); + 3.00e+01 | SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; + 0.00e+00 | SELECT count(*) FROM ( + + | SELECT count(*) AS x FROM ( + + | SELECT count(*) FROM t1 GROUP BY (x,y) + + | ) AS q1 + + | ) AS q2 + + | WHERE q2.x > 1; + 3.00e+01 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,x*y)) AS q1; + 0.00e+00 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t WHERE x = 1); + 0.00e+00 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT count(*) FROM t WHERE (SELECT avg(x) FROM t t0 WHERE t0.x = t.x) = + + | (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + 0.00e+00 | SELECT count(*) FROM + + | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + + | JOIN + + | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2 + + | ON q1.x = q2.x+1; (12 rows) DROP TABLE t,t1 CASCADE; SELECT public.clean_aqo_data(); -NOTICE: Cleaning aqo_data records clean_aqo_data ---------------- (1 row) -- Look for any remaining queries in the ML storage. -SELECT num, to_char(error, '9.99EEEE')::text AS error, query_text +SELECT to_char(error, '9.99EEEE')::text AS error, query_text FROM public.show_cardinality_errors(true) cef, aqo_query_texts aqt WHERE aqt.query_hash = cef.id -ORDER BY (error, md5(query_text)) DESC; - num | error | query_text ------+-------+------------ +ORDER BY (md5(query_text),error) DESC; + error | query_text +-------+------------ (0 rows) DROP EXTENSION aqo; diff --git a/hash.c b/hash.c index 1f8f8112..c1b16404 100644 --- a/hash.c +++ b/hash.c @@ -18,9 +18,11 @@ * aqo/hash.c * */ - #include "postgres.h" +#include "access/htup.h" +#include "common/fe_memutils.h" + #include "math.h" #include "aqo.h" @@ -31,7 +33,7 @@ static int get_node_hash(Node *node); static int get_unsorted_unsafe_int_array_hash(int *arr, int len); static int get_unordered_int_list_hash(List *lst); -static int64 get_relations_hash(List *relnames); +static int64 get_relations_hash(List *relsigns); static int get_fss_hash(int clauses_hash, int eclasses_hash, int relidslist_hash); @@ -149,7 +151,7 @@ get_grouped_exprs_hash(int child_fss, List *group_exprs) } /* - * For given object (clauselist, selectivities, relnames) creates feature + * For given object (clauselist, selectivities, reloids) creates feature * subspace: * sets nfeatures * creates and computes fss_hash @@ -158,7 +160,7 @@ get_grouped_exprs_hash(int child_fss, List *group_exprs) * Special case for nfeatures == NULL: don't calculate features. */ int -get_fss_for_object(List *relnames, List *clauselist, +get_fss_for_object(List *relsigns, List *clauselist, List *selectivities, int *nfeatures, double **features) { int n; @@ -172,7 +174,7 @@ get_fss_for_object(List *relnames, List *clauselist, int *eclass_hash; int clauses_hash; int eclasses_hash; - int relnames_hash; + int relations_hash; List **args; ListCell *lc; int i, @@ -262,8 +264,8 @@ get_fss_for_object(List *relnames, List *clauselist, */ clauses_hash = get_int_array_hash(sorted_clauses, n - sh); eclasses_hash = get_int_array_hash(eclass_hash, nargs); - relnames_hash = (int) get_relations_hash(relnames); - fss_hash = get_fss_hash(clauses_hash, eclasses_hash, relnames_hash); + relations_hash = (int) get_relations_hash(relsigns); + fss_hash = get_fss_hash(clauses_hash, eclasses_hash, relations_hash); pfree(clause_hashes); pfree(sorted_clauses); @@ -439,32 +441,23 @@ get_fss_hash(int clauses_hash, int eclasses_hash, int relidslist_hash) * Each element of a list must have a String type, */ static int64 -get_relations_hash(List *relnames) +get_relations_hash(List *relsigns) { - int64 *hashes = palloc(list_length(relnames) * sizeof(int64)); + int nhashes = 0; + int64 *hashes = palloc(list_length(relsigns) * sizeof(uint64)); ListCell *lc; - int64 hash = 0; - int i = 0; - /* generate array of hashes. */ - foreach(lc, relnames) + foreach(lc, relsigns) { - Value *relname = (Value *) lfirst(lc); - - hashes[i++] = DatumGetInt64(hash_any_extended( - (unsigned char *) strVal(relname), - strlen(strVal(relname)), 0)); + hashes[nhashes++] = *(int64 *) lfirst(lc); } /* Sort the array to make query insensitive to input order of relations. */ - qsort(hashes, i, sizeof(int64), int64_compare); + qsort(hashes, nhashes, sizeof(int64), int64_compare); /* Make a final hash value */ - hash = DatumGetInt64(hash_any_extended((unsigned char *) hashes, - i * sizeof(int64), 0)); - - pfree(hashes); - return hash; + return DatumGetInt64(hash_any_extended((const unsigned char *) hashes, + nhashes * sizeof(int64), 0)); } /* diff --git a/hash.h b/hash.h index b33b1990..a218c9a4 100644 --- a/hash.h +++ b/hash.h @@ -7,7 +7,7 @@ extern uint64 get_query_hash(Query *parse, const char *query_text); extern bool list_member_uint64(const List *list, uint64 datum); extern List *lappend_uint64(List *list, uint64 datum); extern List *ldelete_uint64(List *list, uint64 datum); -extern int get_fss_for_object(List *relnames, List *clauselist, +extern int get_fss_for_object(List *relsigns, List *clauselist, List *selectivities, int *nfeatures, double **features); extern int get_int_array_hash(int *arr, int len); diff --git a/learn_cache.c b/learn_cache.c index 316968b0..3f75a4a9 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -45,25 +45,20 @@ static uint32 init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **relids); /* Calculate, how many data we need to store an ML record. */ static uint32 -calculate_size(int cols, List *relnames) +calculate_size(int cols, List *reloids) { uint32 size = sizeof(dsm_block_hdr); /* header's size */ - ListCell *lc; size += sizeof(double) * cols * aqo_K; /* matrix */ size += 2 * sizeof(double) * aqo_K; /* targets, rfactors */ /* Calculate memory size needed to store relation names */ - foreach(lc, relnames) - { - size += strlen(strVal(lfirst(lc))) + 1; - } - + size += list_length(reloids) * sizeof(Oid); return size; } bool -lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames) +lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids) { htab_key key = {fs, fss}; htab_entry *entry; @@ -76,7 +71,7 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames) Assert(fss_htab && aqo_learn_statement_timeout); - size = calculate_size(data->cols, relnames); + size = calculate_size(data->cols, reloids); LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); entry = (htab_entry *) hash_search(fss_htab, &key, HASH_ENTER, &found); @@ -87,7 +82,7 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames) Assert(hdr->magic == AQO_SHARED_MAGIC); Assert(hdr->key.fs == fs && hdr->key.fss == fss); - if (data->cols != hdr->cols || list_length(relnames) != hdr->nrelids) + if (data->cols != hdr->cols || list_length(reloids) != hdr->nrelids) { /* * Collision found: the same {fs,fss}, but something different. @@ -109,7 +104,7 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames) hdr->key.fs = fs; hdr->key.fss = fss; hdr->cols = data->cols; - hdr->nrelids = list_length(relnames); + hdr->nrelids = list_length(reloids); } hdr->rows = data->rows; @@ -131,14 +126,13 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames) memcpy(ptr, data->rfactors, sizeof(double) * hdr->rows); ptr += sizeof(double) * aqo_K; - /* store strings of relation names. Each string ends with 0-byte */ - foreach(lc, relnames) + /* store list of relations */ + foreach(lc, reloids) { - char *relname = strVal(lfirst(lc)); - int len = strlen(relname) + 1; + Oid reloid = lfirst_oid(lc); - memcpy(ptr, relname, len); - ptr += len; + memcpy(ptr, &reloid, sizeof(Oid)); + ptr += sizeof(Oid); } /* Check the invariant */ @@ -172,7 +166,7 @@ lc_has_fss(uint64 fs, int fss) * Load ML data from a memory cache, not from a table. */ bool -lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **relnames) +lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids) { htab_key key = {fs, fss}; htab_entry *entry; @@ -204,13 +198,13 @@ lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **relnames) return false; } - init_with_dsm(data, hdr, relnames); + init_with_dsm(data, hdr, reloids); LWLockRelease(&aqo_state->lock); return true; } static uint32 -init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **relnames) +init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **reloids) { int i; char *ptr = (char *) hdr + sizeof(dsm_block_hdr); @@ -240,17 +234,15 @@ init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **relnames) memcpy(data->rfactors, ptr, sizeof(double) * hdr->rows); ptr += sizeof(double) * aqo_K; - if (relnames) + if (reloids) { - *relnames = NIL; + *reloids = NIL; for (i = 0; i < hdr->nrelids; i++) { - int len = strlen(ptr) + 1; - - *relnames = lappend(*relnames, makeString(pstrdup(ptr))); - ptr += len; + *reloids = lappend_oid(*reloids, *(Oid *)(ptr)); + ptr += sizeof(Oid); } - return calculate_size(hdr->cols, *relnames); + return calculate_size(hdr->cols, *reloids); } /* It is just read operation. No any interest in size calculation. */ @@ -275,14 +267,14 @@ lc_flush_data(void) { dsm_block_hdr *hdr = (dsm_block_hdr *) ptr; OkNNrdata data; - List *relnames = NIL; + List *reloids = NIL; uint32 delta = 0; - delta = init_with_dsm(&data, hdr, &relnames); + delta = init_with_dsm(&data, hdr, &reloids); Assert(delta > 0); ptr += delta; size -= delta; - update_fss(hdr->key.fs, hdr->key.fss, &data, relnames); + update_fss(hdr->key.fs, hdr->key.fss, &data, reloids); if (!hash_search(fss_htab, (void *) &hdr->key, HASH_REMOVE, NULL)) elog(ERROR, "[AQO] Flush: local ML cache is corrupted."); diff --git a/learn_cache.h b/learn_cache.h index eccca22a..df61700e 100644 --- a/learn_cache.h +++ b/learn_cache.h @@ -7,9 +7,9 @@ extern bool aqo_learn_statement_timeout; -extern bool lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames); +extern bool lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids); extern bool lc_has_fss(uint64 fs, int fss); -extern bool lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **relnames); +extern bool lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids); extern void lc_remove_fss(uint64 fs, int fss); extern void lc_flush_data(void); extern void lc_assign_hook(bool newval, void *extra); diff --git a/path_utils.c b/path_utils.c index 78d0512d..fbd21299 100644 --- a/path_utils.c +++ b/path_utils.c @@ -11,12 +11,14 @@ * aqo/path_utils.c * */ - #include "postgres.h" +#include "access/relation.h" #include "nodes/readfuncs.h" #include "optimizer/optimizer.h" #include "path_utils.h" +#include "utils/syscache.h" +#include "utils/lsyscache.h" #include "aqo.h" #include "hash.h" @@ -35,7 +37,7 @@ static AQOPlanNode DefaultAQOPlanNode = .node.type = T_ExtensibleNode, .node.extnodename = AQO_PLAN_NODE, .had_path = false, - .relids = NIL, + .rels = NULL, .clauses = NIL, .selectivities = NIL, .grouping_exprs = NIL, @@ -53,6 +55,9 @@ create_aqo_plan_node() T_ExtensibleNode); memcpy(node, &DefaultAQOPlanNode, sizeof(AQOPlanNode)); + node->rels = palloc(sizeof(RelSortOut)); + node->rels->hrels = NIL; + node->rels->signatures = NIL; return node; } @@ -124,33 +129,98 @@ get_selectivities(PlannerInfo *root, } /* - * Transforms given relids from path optimization stage format to list of - * an absolute (independent on query optimization context) relnames. + * Based on the hashTupleDesc() routine */ -List * -get_relnames(PlannerInfo *root, Relids relids) +static uint64 +hashTempTupleDesc(TupleDesc desc) { - int i; - RangeTblEntry *rte; - List *l = NIL; + uint64 s; + int i; - if (relids == NULL) - return NIL; + s = hash_combine(0, hash_uint32(desc->natts)); - /* - * Check: don't take into account relations without underlying plane - * source table. - */ - Assert(!bms_is_member(0, relids)); + for (i = 0; i < desc->natts; ++i) + { + const char *attname = NameStr(TupleDescAttr(desc, i)->attname); + uint64 s1; - i = -1; - while ((i = bms_next_member(relids, i)) >= 0) + s = hash_combine64(s, hash_uint32(TupleDescAttr(desc, i)->atttypid)); + s1 = hash_bytes_extended((const unsigned char *) attname, strlen(attname), 0); + s = hash_combine64(s, s1); + } + return s; +} + +/* + * Get list of relation indexes and prepare list of permanent table reloids, + * list of temporary table reloids (can be changed between query launches) and + * array of table signatures. + */ +void +get_list_of_relids(PlannerInfo *root, Relids relids, RelSortOut *rels) +{ + int index; + RangeTblEntry *entry; + List *hrels = NIL; + List *hashes = NIL; + + if (relids == NULL) + return; + + index = -1; + while ((index = bms_next_member(relids, index)) >= 0) { - rte = planner_rt_fetch(i, root); - if (OidIsValid(rte->relid)) - l = lappend(l, makeString(pstrdup(rte->eref->aliasname))); + HeapTuple htup; + Form_pg_class classForm; + char *relname = NULL; + + entry = planner_rt_fetch(index, root); + + if (!OidIsValid(entry->relid)) + { + /* Invalid oid */ + hashes = lappend_uint64(hashes, (UINT64_MAX / 7)); + continue; + } + + htup = SearchSysCache1(RELOID, ObjectIdGetDatum(entry->relid)); + if (!HeapTupleIsValid(htup)) + elog(PANIC, "cache lookup failed for reloid %u", entry->relid); + + classForm = (Form_pg_class) GETSTRUCT(htup); + + if (classForm->relpersistence == RELPERSISTENCE_TEMP) + { + /* The case of temporary table */ + + Relation trel = relation_open(entry->relid, NoLock); + TupleDesc tdesc = RelationGetDescr(trel); + + hashes = lappend_uint64(hashes, hashTempTupleDesc(tdesc)); + relation_close(trel, NoLock); + } + else + { + /* The case of regular table */ + relname = quote_qualified_identifier( + get_namespace_name(get_rel_namespace(entry->relid)), + classForm->relrewrite ? + get_rel_name(classForm->relrewrite) : + NameStr(classForm->relname)); + hashes = lappend_uint64(hashes, DatumGetInt64(hash_any_extended( + (unsigned char *) relname, + strlen(relname), 0))); + + hrels = lappend_oid(hrels, entry->relid); + pfree(relname); + } + + ReleaseSysCache(htup); } - return l; + + rels->hrels = list_concat(rels->hrels, hrels); + rels->signatures = list_concat(rels->signatures, hashes); + return; } /* @@ -462,7 +532,7 @@ aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) (*dest)->lefttree->targetlist); /* Copy bare expressions for further AQO learning case. */ node->grouping_exprs = copyObject(groupExprs); - node->relids = get_relnames(root, ap->subpath->parent->relids); + get_list_of_relids(root, ap->subpath->parent->relids, node->rels); node->jointype = JOIN_INNER; } else if (is_appropriate_path(src)) @@ -473,8 +543,7 @@ aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) node->jointype = JOIN_INNER; } - node->relids = list_concat(node->relids, - get_relnames(root, src->parent->relids)); + get_list_of_relids(root, src->parent->relids, node->rels); if (src->parallel_workers > 0) node->parallel_divisor = get_parallel_divisor(src); @@ -507,7 +576,10 @@ AQOnodeCopy(struct ExtensibleNode *enew, const struct ExtensibleNode *eold) memcpy(new, old, sizeof(AQOPlanNode)); /* These lists couldn't contain AQO nodes. Use basic machinery */ - new->relids = copyObject(old->relids); + new->rels = palloc(sizeof(RelSortOut)); + new->rels->hrels = list_copy(old->rels->hrels); + new->rels->signatures = list_copy(old->rels->signatures); + new->clauses = copyObject(old->clauses); new->grouping_exprs = copyObject(old->grouping_exprs); new->selectivities = copyObject(old->selectivities); @@ -548,7 +620,7 @@ AQOnodeOut(struct StringInfoData *str, const struct ExtensibleNode *enode) Assert(0); WRITE_BOOL_FIELD(had_path); - WRITE_NODE_FIELD(relids); + WRITE_NODE_FIELD(rels); WRITE_NODE_FIELD(clauses); WRITE_NODE_FIELD(selectivities); WRITE_NODE_FIELD(grouping_exprs); @@ -601,7 +673,7 @@ AQOnodeRead(struct ExtensibleNode *enode) Assert(0); READ_BOOL_FIELD(had_path); - READ_NODE_FIELD(relids); + READ_NODE_FIELD(rels); READ_NODE_FIELD(clauses); READ_NODE_FIELD(selectivities); READ_NODE_FIELD(grouping_exprs); @@ -643,10 +715,10 @@ aqo_store_upper_signature_hook(PlannerInfo *root, RelOptInfo *output_rel, void *extra) { - A_Const *fss_node = makeNode(A_Const); - List *relnames; - List *clauses; - List *selectivities; + A_Const *fss_node = makeNode(A_Const); + RelSortOut rels = {NIL, NIL}; + List *clauses; + List *selectivities; if (prev_create_upper_paths_hook) (*prev_create_upper_paths_hook)(root, stage, input_rel, output_rel, extra); @@ -661,9 +733,10 @@ aqo_store_upper_signature_hook(PlannerInfo *root, set_cheapest(input_rel); clauses = get_path_clauses(input_rel->cheapest_total_path, root, &selectivities); - relnames = get_relnames(root, input_rel->relids); + get_list_of_relids(root, input_rel->relids, &rels); fss_node->val.type = T_Integer; fss_node->location = -1; - fss_node->val.val.ival = get_fss_for_object(relnames, clauses, NIL, NULL, NULL); + fss_node->val.val.ival = get_fss_for_object(rels.signatures, clauses, NIL, + NULL, NULL); output_rel->ext_nodes = lappend(output_rel->ext_nodes, (void *) fss_node); } diff --git a/path_utils.h b/path_utils.h index 54ee181d..1803e08d 100644 --- a/path_utils.h +++ b/path_utils.h @@ -8,17 +8,29 @@ #define AQO_PLAN_NODE "AQOPlanNode" +/* + * Find and sort out relations that used in the query: + * Use oids of relations to store dependency of ML row on a set of tables. + * Use oids of temporary tables to get access to these structure for preparing + * a kind of signature. + */ +typedef struct +{ + List *hrels; /* oids of persistent relations */ + List *signatures; /* list of hashes: on qualified name of a persistent + * table or on a table structure for temp table */ +} RelSortOut; + /* * information for adaptive query optimization */ typedef struct AQOPlanNode { - ExtensibleNode node; - bool had_path; - List *relids; - List *temp_relnames; /* We store name of temporary table because OID by-default haven't sense at other backends. */ - List *clauses; - List *selectivities; + ExtensibleNode node; + bool had_path; + RelSortOut *rels; + List *clauses; + List *selectivities; /* Grouping expressions from a target list. */ List *grouping_exprs; @@ -48,7 +60,8 @@ extern List *get_selectivities(PlannerInfo *root, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo); -extern List *get_relnames(PlannerInfo *root, Relids relids); +extern void get_list_of_relids(PlannerInfo *root, Relids relids, + RelSortOut *rels); extern List *get_path_clauses(Path *path, PlannerInfo *root, diff --git a/postprocessing.c b/postprocessing.c index dd420bce..fcf820bf 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -59,12 +59,12 @@ static char *PlanStateInfo = "PlanStateInfo"; static void atomic_fss_learn_step(uint64 fhash, int fss, OkNNrdata *data, double *features, double target, double rfactor, - List *relnames, bool isTimedOut); + List *reloids, bool isTimedOut); static bool learnOnPlanState(PlanState *p, void *context); -static void learn_agg_sample(aqo_obj_stat *ctx, List *relidslist, +static void learn_agg_sample(aqo_obj_stat *ctx, RelSortOut *rels, double learned, double rfactor, Plan *plan, bool notExecuted); -static void learn_sample(aqo_obj_stat *ctx, List *relidslist, +static void learn_sample(aqo_obj_stat *ctx, RelSortOut *rels, double learned, double rfactor, Plan *plan, bool notExecuted); static List *restore_selectivities(List *clauselist, @@ -91,7 +91,7 @@ static bool ExtractFromQueryEnv(QueryDesc *queryDesc); static void atomic_fss_learn_step(uint64 fs, int fss, OkNNrdata *data, double *features, double target, double rfactor, - List *relnames, bool isTimedOut) + List *reloids, bool isTimedOut) { LOCKTAG tag; @@ -102,13 +102,13 @@ atomic_fss_learn_step(uint64 fs, int fss, OkNNrdata *data, data->rows = 0; data->rows = OkNNr_learn(data, features, target, rfactor); - update_fss_ext(fs, fss, data, relnames, isTimedOut); + update_fss_ext(fs, fss, data, reloids, isTimedOut); LockRelease(&tag, ExclusiveLock, false); } static void -learn_agg_sample(aqo_obj_stat *ctx, List *relnames, +learn_agg_sample(aqo_obj_stat *ctx, RelSortOut *rels, double learned, double rfactor, Plan *plan, bool notExecuted) { AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); @@ -127,7 +127,7 @@ learn_agg_sample(aqo_obj_stat *ctx, List *relnames, return; target = log(learned); - child_fss = get_fss_for_object(relnames, ctx->clauselist, NIL, NULL, NULL); + child_fss = get_fss_for_object(rels->signatures, ctx->clauselist, NIL, NULL, NULL); fss = get_grouped_exprs_hash(child_fss, aqo_node->grouping_exprs); memset(&data, 0, sizeof(OkNNrdata)); @@ -136,7 +136,7 @@ learn_agg_sample(aqo_obj_stat *ctx, List *relnames, /* Critical section */ atomic_fss_learn_step(fhash, fss, &data, NULL, - target, rfactor, relnames, ctx->isTimedOut); + target, rfactor, rels->hrels, ctx->isTimedOut); /* End of critical section */ } @@ -145,7 +145,7 @@ learn_agg_sample(aqo_obj_stat *ctx, List *relnames, * true cardinalities) performs learning procedure. */ static void -learn_sample(aqo_obj_stat *ctx, List *relnames, +learn_sample(aqo_obj_stat *ctx, RelSortOut *rels, double learned, double rfactor, Plan *plan, bool notExecuted) { AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); @@ -158,7 +158,7 @@ learn_sample(aqo_obj_stat *ctx, List *relnames, memset(&data, 0, sizeof(OkNNrdata)); target = log(learned); - fss = get_fss_for_object(relnames, ctx->clauselist, + fss = get_fss_for_object(rels->signatures, ctx->clauselist, ctx->selectivities, &data.cols, &features); /* Only Agg nodes can have non-empty a grouping expressions list. */ @@ -177,7 +177,7 @@ learn_sample(aqo_obj_stat *ctx, List *relnames, /* Critical section */ atomic_fss_learn_step(fs, fss, &data, features, target, rfactor, - relnames, ctx->isTimedOut); + rels->hrels, ctx->isTimedOut); /* End of critical section */ if (data.cols > 0) @@ -510,7 +510,7 @@ learnOnPlanState(PlanState *p, void *context) List *cur_selectivities; cur_selectivities = restore_selectivities(aqo_node->clauses, - aqo_node->relids, + aqo_node->rels->hrels, aqo_node->jointype, aqo_node->was_parametrized); SubplanCtx.selectivities = list_concat(SubplanCtx.selectivities, @@ -518,14 +518,14 @@ learnOnPlanState(PlanState *p, void *context) SubplanCtx.clauselist = list_concat(SubplanCtx.clauselist, list_copy(aqo_node->clauses)); - if (aqo_node->relids != NIL) + if (aqo_node->rels->hrels != NIL) { /* * This plan can be stored as a cached plan. In the case we will have * bogus path_relids field (changed by list_concat routine) at the * next usage (and aqo-learn) of this plan. */ - ctx->relidslist = list_copy(aqo_node->relids); + ctx->relidslist = list_copy(aqo_node->rels->hrels); if (p->instrument) { @@ -537,12 +537,12 @@ learnOnPlanState(PlanState *p, void *context) { if (IsA(p, AggState)) learn_agg_sample(&SubplanCtx, - aqo_node->relids, learn_rows, rfactor, + aqo_node->rels, learn_rows, rfactor, p->plan, notExecuted); else learn_sample(&SubplanCtx, - aqo_node->relids, learn_rows, rfactor, + aqo_node->rels, learn_rows, rfactor, p->plan, notExecuted); } } diff --git a/preprocessing.c b/preprocessing.c index 0d376d7b..d768104d 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -135,13 +135,13 @@ aqo_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) { - bool query_is_stored = false; - LOCKTAG tag; - MemoryContext oldCxt; + bool query_is_stored = false; + LOCKTAG tag; + MemoryContext oldCxt; /* * We do not work inside an parallel worker now by reason of insert into - * the heap during planning. Transactions is synchronized between parallel + * the heap during planning. Transactions are synchronized between parallel * sections. See GetCurrentCommandId() comments also. */ if (!aqoIsEnabled() || @@ -419,7 +419,7 @@ isQueryUsingSystemRelation_walker(Node *node, void *context) Relation rel = table_open(rte->relid, AccessShareLock); bool is_catalog = IsCatalogRelation(rel); bool is_aqo_rel = IsAQORelation(rel); - bool *trivQuery = (bool *) context; + bool *trivQuery = (bool *) context; table_close(rel, AccessShareLock); if (is_catalog || is_aqo_rel) diff --git a/sql/aqo_learn.sql b/sql/aqo_learn.sql index e1ffe7e5..139daf14 100644 --- a/sql/aqo_learn.sql +++ b/sql/aqo_learn.sql @@ -1,3 +1,24 @@ +-- The function just copied from stats_ext.sql +create function check_estimated_rows(text) returns table (estimated int, actual int) +language plpgsql as +$$ +declare + ln text; + tmp text[]; + first_row bool := true; +begin + for ln in + execute format('explain analyze %s', $1) + loop + if first_row then + first_row := false; + tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)'); + return query select tmp[1]::int, tmp[2]::int; + end if; + end loop; +end; +$$; + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -100,6 +121,21 @@ CREATE TABLE tmp1 AS SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; SELECT count(*) FROM tmp1; + +-- Remove data on some unneeded instances of tmp1 table. +SELECT public.clean_aqo_data(); + +-- Result of the query below should be empty +SELECT * FROM aqo_query_texts aqt1, aqo_query_texts aqt2 +WHERE aqt1.query_text = aqt2.query_text AND aqt1.query_hash <> aqt2.query_hash; + +-- Fix the state of the AQO data +SELECT reliability,nfeatures,query_text +FROM aqo_data ad, aqo_query_texts aqt +WHERE aqt.query_hash = ad.fspace_hash +ORDER BY (md5(query_text)) +; + DROP TABLE tmp1; SET aqo.mode = 'controlled'; @@ -121,13 +157,15 @@ WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; EXPLAIN SELECT t1.a, t2.b FROM aqo_test1 AS t1, aqo_test0 AS t2 WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; -EXPlAIN SELECT t1.a, t2.b, t3.c +EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; -EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c -FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 -WHERE t1.a = t2.b AND t2.a = t3.b; +SELECT * FROM check_estimated_rows(' + SELECT t1.a AS a, t2.a AS b, t3.a AS c + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + WHERE t1.a = t2.b AND t2.a = t3.b; +'); EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 @@ -150,22 +188,25 @@ WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; EXPLAIN SELECT t1.a, t2.b FROM aqo_test1 AS t1, aqo_test0 AS t2 WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; -EXPlAIN SELECT t1.a, t2.b, t3.c +EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; -EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c -FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 -WHERE t1.a = t2.b AND t2.a = t3.b; +SELECT * FROM check_estimated_rows(' + SELECT t1.a AS a, t2.a AS b, t3.a AS c + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + WHERE t1.a = t2.b AND t2.a = t3.b; +'); -EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d -FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 -WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; +SELECT * FROM check_estimated_rows(' + SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; +'); DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; - DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; - DROP EXTENSION aqo; +DROP FUNCTION check_estimated_rows; diff --git a/sql/clean_aqo_data.sql b/sql/clean_aqo_data.sql index 6f09d62f..acd64b16 100644 --- a/sql/clean_aqo_data.sql +++ b/sql/clean_aqo_data.sql @@ -5,6 +5,7 @@ DROP TABLE IF EXISTS a; DROP TABLE IF EXISTS b; CREATE TABLE a(); SELECT * FROM a; +SELECT 'a'::regclass::oid AS a_oid \gset SELECT clean_aqo_data(); /* @@ -14,15 +15,15 @@ SELECT clean_aqo_data(); * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat * should remain */ -SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); DROP TABLE a; SELECT clean_aqo_data(); @@ -34,17 +35,17 @@ SELECT clean_aqo_data(); * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat, * should be deleted */ -SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); CREATE TABLE a(); @@ -62,29 +63,29 @@ CREATE TABLE b(); SELECT * FROM a; SELECT * FROM b; SELECT * FROM b CROSS JOIN a; --- SELECT 'a'::regclass::oid AS a_oid \gset --- SELECT 'b'::regclass::oid AS b_oid \gset +SELECT 'a'::regclass::oid AS a_oid \gset +SELECT 'b'::regclass::oid AS b_oid \gset -- new lines added to aqo_data -SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); -SELECT count(*) FROM aqo_data WHERE 'b' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); DROP TABLE a; SELECT clean_aqo_data(); @@ -95,48 +96,48 @@ SELECT clean_aqo_data(); * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_texts, * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_stat, */ -SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); -- lines corresponding to b_oid in all theese tables should remain -SELECT count(*) FROM aqo_data WHERE 'b' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); DROP TABLE b; SELECT clean_aqo_data(); -- lines corresponding to b_oid in theese tables deleted -SELECT count(*) FROM aqo_data WHERE 'b' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); DROP EXTENSION aqo; \ No newline at end of file diff --git a/sql/statement_timeout.sql b/sql/statement_timeout.sql index 419d85de..6885ab91 100644 --- a/sql/statement_timeout.sql +++ b/sql/statement_timeout.sql @@ -1,6 +1,5 @@ -- Check the learning-on-timeout feature -- For stabilized reproduction autovacuum must be disabled. - CREATE FUNCTION check_estimated_rows(text) RETURNS TABLE (estimated int) LANGUAGE plpgsql AS $$ DECLARE @@ -62,3 +61,4 @@ SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); DROP TABLE t; DROP EXTENSION aqo; +DROP FUNCTION check_estimated_rows; diff --git a/sql/temp_tables.sql b/sql/temp_tables.sql new file mode 100644 index 00000000..cd24a051 --- /dev/null +++ b/sql/temp_tables.sql @@ -0,0 +1,95 @@ +CREATE EXTENSION aqo; +SET aqo.mode = 'learn'; + +CREATE TEMP TABLE tt(); +CREATE TABLE pt(); + +-- Ignore queries with the only temp tables +SELECT count(*) FROM tt; +SELECT count(*) FROM tt AS t1, tt AS t2; +SELECT * FROM aqo_data; + +-- Should be stored in the ML base +SELECT count(*) FROM pt; +SELECT count(*) FROM pt, tt; +SELECT count(*) FROM pt AS pt1, tt AS tt1, tt AS tt2, pt AS pt2; +SELECT count(*) FROM aqo_data; + +DROP TABLE tt; +SELECT clean_aqo_data(); +SELECT count(*) FROM aqo_data; -- Should be the same as above +DROP TABLE pt; +SELECT clean_aqo_data(); +SELECT count(*) FROM aqo_data; -- Should be 0 +SELECT query_text FROM aqo_queries aq LEFT JOIN aqo_query_texts aqt +ON aq.query_hash = aqt.query_hash +; -- TODO: should contain just one row + +-- Test learning on temporary table +CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; +CREATE TEMP TABLE tt AS SELECT -x AS x, (x % 7) AS y FROM generate_series(1,100) AS x; +CREATE TEMP TABLE ttd AS -- the same structure as tt + SELECT -(x*3) AS x, (x % 9) AS y FROM generate_series(1,100) AS x; +ANALYZE pt,tt,ttd; + +create function check_estimated_rows(text) returns table (estimated int, actual int) +language plpgsql as +$$ +declare + ln text; + tmp text[]; + first_row bool := true; +begin + for ln in + execute format('explain analyze %s', $1) + loop + if first_row then + first_row := false; + tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)'); + return query select tmp[1]::int, tmp[2]::int; + end if; + end loop; +end; +$$; + +-- Check: AQO learns on queries with temp tables + +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Estimation failed. Learn. +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Should use AQO estimation +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,ttd WHERE pt.x = ttd.x GROUP BY (pt.x); +'); -- TODO: Should use AQO estimation with another temp table of the same structure + +SET aqo.mode = 'forced'; -- Now we use all fss records for each query +DROP TABLE pt; +SELECT clean_aqo_data(); +CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; +CREATE TEMP TABLE ttd1 AS + SELECT -(x*3) AS x, (x % 9) AS y1 FROM generate_series(1,100) AS x; +ANALYZE; + +-- Check: use AQO knowledge with different temp table of the same structure + +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Estimation failed. Learn. +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Should use AQO estimation +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,ttd WHERE pt.x = ttd.x GROUP BY (pt.x); +'); -- Should use AQO estimation with another temp table of the same structure +SELECT * FROM check_estimated_rows(' + SELECT pt1.x, avg(pt1.y) FROM pt AS pt1,ttd WHERE pt1.x = ttd.x GROUP BY (pt1.x); +'); -- Alias doesn't influence feature space +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,ttd1 WHERE pt.x = ttd1.x GROUP BY (pt.x); +'); -- Don't use AQO for temp table because of different attname + +DROP TABLE pt CASCADE; +DROP EXTENSION aqo; +DROP FUNCTION check_estimated_rows; diff --git a/sql/top_queries.sql b/sql/top_queries.sql index da04e682..9f4c9074 100755 --- a/sql/top_queries.sql +++ b/sql/top_queries.sql @@ -3,9 +3,10 @@ SET aqo.mode = 'disabled'; SET aqo.force_collect_stat = 'on'; -- --- Dummy test. CREATE TABLE shouldn't find in the ML storage. But a simple --- select must be in. Also here we test on gathering a stat on temp and plain +-- Dummy test. CREATE TABLE shouldn't be found in the ML storage. But a simple +-- select must recorded. Also here we test on gathering a stat on temp and plain -- relations. +-- XXX: Right now we ignore queries if no one permanent table is touched. -- CREATE TEMP TABLE ttt AS SELECT count(*) AS cnt FROM generate_series(1,10); CREATE TABLE ttp AS SELECT count(*) AS cnt FROM generate_series(1,10); diff --git a/sql/unsupported.sql b/sql/unsupported.sql index 059f5c64..5168079c 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -156,20 +156,19 @@ EXPLAIN (COSTS OFF) -- XXX: Do we stuck into an unstable behavior of an error value? -- Live with this variant of the test for some time. -SELECT - num, to_char(error, '9.99EEEE')::text AS error, query_text +SELECT to_char(error, '9.99EEEE')::text AS error, query_text FROM public.show_cardinality_errors(true) cef, aqo_query_texts aqt WHERE aqt.query_hash = cef.id -ORDER BY (error, md5(query_text)) DESC; +ORDER BY (md5(query_text),error) DESC; DROP TABLE t,t1 CASCADE; SELECT public.clean_aqo_data(); -- Look for any remaining queries in the ML storage. -SELECT num, to_char(error, '9.99EEEE')::text AS error, query_text +SELECT to_char(error, '9.99EEEE')::text AS error, query_text FROM public.show_cardinality_errors(true) cef, aqo_query_texts aqt WHERE aqt.query_hash = cef.id -ORDER BY (error, md5(query_text)) DESC; +ORDER BY (md5(query_text),error) DESC; DROP EXTENSION aqo; diff --git a/storage.c b/storage.c index 7a03a840..9f9fd418 100644 --- a/storage.c +++ b/storage.c @@ -159,6 +159,7 @@ find_query(uint64 qhash, QueryContextData *ctx) * * Such logic is possible, because this update is performed by AQO itself. It is * not break any learning logic besides possible additional learning iterations. + * Pass NIL as a value of the relations field to avoid updating it. */ bool update_query(uint64 qhash, uint64 fhash, @@ -324,21 +325,21 @@ add_query_text(uint64 qhash, const char *query_string) return true; } - +/* static ArrayType * -form_strings_vector(List *relnames) +form_strings_vector(List *reloids) { Datum *rels; ArrayType *array; ListCell *lc; int i = 0; - if (relnames == NIL) + if (reloids == NIL) return NULL; - rels = (Datum *) palloc(list_length(relnames) * sizeof(Datum)); + rels = (Datum *) palloc(list_length(reloids) * sizeof(Datum)); - foreach(lc, relnames) + foreach(lc, reloids) { char *relname = strVal(lfirst(lc)); @@ -357,7 +358,7 @@ deform_strings_vector(Datum datum) Datum *values; int i; int nelems = 0; - List *relnames = NIL; + List *reloids = NIL; deconstruct_array(array, TEXTOID, -1, false, TYPALIGN_INT, &values, NULL, &nelems); @@ -366,23 +367,24 @@ deform_strings_vector(Datum datum) Value *s; s = makeString(pstrdup(TextDatumGetCString(values[i]))); - relnames = lappend(relnames, s); + reloids = lappend(reloids, s); } pfree(values); pfree(array); - return relnames; + return reloids; } +*/ bool -load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **relnames, bool isSafe) +load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool isSafe) { if (isSafe && (!aqo_learn_statement_timeout || !lc_has_fss(fs, fss))) - return load_fss(fs, fss, data, relnames); + return load_fss(fs, fss, data, reloids); else { Assert(aqo_learn_statement_timeout); - return lc_load_fss(fs, fss, data, relnames); + return lc_load_fss(fs, fss, data, reloids); } } @@ -401,7 +403,7 @@ load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **relnames, bool isSafe) * objects in the given feature space */ bool -load_fss(uint64 fs, int fss, OkNNrdata *data, List **relnames) +load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids) { Relation hrel; Relation irel; @@ -445,11 +447,24 @@ load_fss(uint64 fs, int fss, OkNNrdata *data, List **relnames) deform_vector(values[4], data->targets, &(data->rows)); deform_vector(values[6], data->rfactors, &(data->rows)); - if (relnames != NULL) - *relnames = deform_strings_vector(values[5]); + if (reloids != NULL) + { + ArrayType *array = DatumGetArrayTypePCopy(PG_DETOAST_DATUM(values[5])); + Datum *values; + int nrows; + int i; + + deconstruct_array(array, OIDOID, sizeof(Oid), true, + TYPALIGN_INT, &values, NULL, &nrows); + for (i = 0; i < nrows; ++i) + *reloids = lappend_oid(*reloids, DatumGetObjectId(values[i])); + + pfree(values); + pfree(array); + } } else - elog(ERROR, "unexpected number of features for hash (" \ + elog(ERROR, "[AQO] Unexpected number of features for hash (" \ UINT64_FORMAT", %d):\ expected %d features, obtained %d", fs, fss, data->cols, DatumGetInt32(values[2])); @@ -466,13 +481,13 @@ load_fss(uint64 fs, int fss, OkNNrdata *data, List **relnames) } bool -update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *relnames, +update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids, bool isTimedOut) { if (!isTimedOut) - return update_fss(fs, fss, data, relnames); + return update_fss(fs, fss, data, reloids); else - return lc_update_fss(fs, fss, data, relnames); + return lc_update_fss(fs, fss, data, reloids); } /* @@ -488,7 +503,7 @@ update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *relnames, * Caller guaranteed that no one AQO process insert or update this data row. */ bool -update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames) +update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids) { Relation hrel; Relation irel; @@ -541,10 +556,28 @@ update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames) values[4] = PointerGetDatum(form_vector(data->targets, data->rows)); - /* Form array of relids. Only once. */ - values[5] = PointerGetDatum(form_strings_vector(relnames)); - if ((void *) values[5] == NULL) + /* Serialize list of reloids. Only once. */ + if (reloids != NIL) + { + int nrows = list_length(reloids); + ListCell *lc; + Datum *elems; + ArrayType *array; + int i = 0; + + elems = palloc(sizeof(*elems) * nrows); + foreach (lc, reloids) + elems[i++] = ObjectIdGetDatum(lfirst_oid(lc)); + + array = construct_array(elems, nrows, OIDOID, sizeof(Oid), true, + TYPALIGN_INT); + values[5] = PointerGetDatum(array); + pfree(elems); + } + else + /* XXX: Is it really possible? */ isnull[5] = true; + values[6] = PointerGetDatum(form_vector(data->rfactors, data->rows)); tuple = heap_form_tuple(tupDesc, values, isnull); diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index bc486184..5bb14e9e 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -210,10 +210,10 @@ # Number of rows in aqo_data: related to pgbench test and total value. my $pgb_fss_count = $node->safe_psql('postgres', " SELECT count(*) FROM aqo_data - WHERE $aoid ::regclass::text = ANY(oids) OR - $boid ::regclass::text = ANY(oids) OR - $toid ::regclass::text = ANY(oids) OR - $hoid ::regclass::text = ANY(oids) + WHERE $aoid = ANY(oids) OR + $boid = ANY(oids) OR + $toid = ANY(oids) OR + $hoid = ANY(oids) "); $fss_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_data;"); @@ -223,10 +223,10 @@ WHERE fspace_hash IN ( SELECT fspace_hash FROM aqo_data WHERE - $aoid ::regclass::text = ANY(oids) OR - $boid ::regclass::text = ANY(oids) OR - $toid ::regclass::text = ANY(oids) OR - $hoid ::regclass::text = ANY(oids) + $aoid = ANY(oids) OR + $boid = ANY(oids) OR + $toid = ANY(oids) OR + $hoid = ANY(oids) ) "); $fs_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_queries;"); @@ -237,10 +237,10 @@ WHERE query_hash IN ( SELECT fspace_hash FROM aqo_data WHERE - $aoid ::regclass::text = ANY(oids) OR - $boid ::regclass::text = ANY(oids) OR - $toid ::regclass::text = ANY(oids) OR - $hoid ::regclass::text = ANY(oids) + $aoid = ANY(oids) OR + $boid = ANY(oids) OR + $toid = ANY(oids) OR + $hoid = ANY(oids) ) "); $fs_samples_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_texts;"); @@ -251,10 +251,10 @@ WHERE query_hash IN ( SELECT fspace_hash FROM aqo_data WHERE - $aoid ::regclass::text = ANY(oids) OR - $boid ::regclass::text = ANY(oids) OR - $toid ::regclass::text = ANY(oids) OR - $hoid ::regclass::text = ANY(oids) + $aoid = ANY(oids) OR + $boid = ANY(oids) OR + $toid = ANY(oids) OR + $hoid = ANY(oids) ) "); $stat_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_stat;"); From 7ef2c18d91ff01ecfa0dd7589e6f9eef5c873c43 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Mon, 16 May 2022 14:14:42 +0500 Subject: [PATCH 077/203] Fix cardinality error calculation. Switch from the top_queries(n) routine to show_execution_time(controlled) to unify AQO interface. --- aqo--1.4--1.5.sql | 68 +++++++++++++++++++++++++++++++++++++--- expected/gucs.out | 20 ++++++++++++ expected/top_queries.out | 35 +++++++++++++++++++-- expected/unsupported.out | 10 +++--- sql/gucs.sql | 4 +++ sql/top_queries.sql | 18 +++++++++-- t/001_pgbench.pl | 7 +++-- 7 files changed, 145 insertions(+), 17 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index b0d97594..907ed610 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -5,15 +5,17 @@ -- -- Re-create the aqo_data table. Do so to keep the columns order. +-- The oids array contains oids of permanent tables only. It is used for cleanup +-- ML knowledge base from queries that refer to removed tables. -- DROP TABLE public.aqo_data CASCADE; CREATE TABLE public.aqo_data ( fspace_hash bigint NOT NULL REFERENCES public.aqo_queries ON DELETE CASCADE, fsspace_hash int NOT NULL, - nfeatures int NOT NULL, - features double precision[][], - targets double precision[], - oids oid [] DEFAULT NULL, + nfeatures int NOT NULL, + features double precision[][], + targets double precision[], + oids oid [] DEFAULT NULL, reliability double precision [] ); CREATE UNIQUE INDEX aqo_fss_access_idx ON public.aqo_data (fspace_hash, fsspace_hash); @@ -72,4 +74,60 @@ BEGIN END LOOP; END LOOP; END; -$$ LANGUAGE plpgsql; \ No newline at end of file +$$ LANGUAGE plpgsql; + +DROP FUNCTION public.top_time_queries; + +-- +-- Show execution time of queries, for which AQO has statistics. +-- controlled - show stat on executions where AQO was used for cardinality +-- estimations, or not used (controlled = false). +-- Last case is possible in disabled mode with aqo.force_collect_stat = 'on'. +-- +CREATE OR REPLACE FUNCTION public.show_execution_time(controlled boolean) +RETURNS TABLE(num bigint, id bigint, fshash bigint, exec_time float, nexecs bigint) +AS $$ +BEGIN +IF (controlled) THEN + -- Show a query execution time made with AQO support for the planner + -- cardinality estimations. Here we return result of last execution. + RETURN QUERY + SELECT + row_number() OVER (ORDER BY (exectime, queryid, fs_hash) DESC) AS nn, + queryid, fs_hash, exectime, execs + FROM ( + SELECT + aq.query_hash AS queryid, + aq.fspace_hash AS fs_hash, + execution_time_with_aqo[array_length(execution_time_with_aqo, 1)] AS exectime, + executions_with_aqo AS execs + FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs + ON aq.query_hash = aqs.query_hash + WHERE TRUE = ANY (SELECT unnest(execution_time_with_aqo) IS NOT NULL) + ) AS q1 + ORDER BY nn ASC; + +ELSE + -- Show a query execution time made without any AQO advise. + -- Return an average value across all executions. + RETURN QUERY + SELECT + row_number() OVER (ORDER BY (exectime, queryid, fs_hash) DESC) AS nn, + queryid, fs_hash, exectime, execs + FROM ( + SELECT + aq.query_hash AS queryid, + aq.fspace_hash AS fs_hash, + array_avg(execution_time_without_aqo) AS exectime, + executions_without_aqo AS execs + FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs + ON aq.query_hash = aqs.query_hash + WHERE TRUE = ANY (SELECT unnest(execution_time_without_aqo) IS NOT NULL) + ) AS q1 + ORDER BY (nn) ASC; +END IF; +END; +$$ LANGUAGE plpgsql; + +COMMENT ON FUNCTION public.show_execution_time(boolean) IS +'Get execution time of queries. If controlled = true (AQO could advise cardinality estimations), show time of last execution attempt. Another case (AQO not used), return an average value of execution time across all known executions.'; diff --git a/expected/gucs.out b/expected/gucs.out index 1a036f64..c56fc91a 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -35,4 +35,24 @@ SELECT obj_description('public.show_cardinality_errors'::regproc::oid); Get cardinality error of queries the last time they were executed. Order queries according to an error value. (1 row) +SELECT obj_description('public.show_execution_time'::regproc::oid); + obj_description +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Get execution time of queries. If controlled = true (AQO could advise cardinality estimations), show time of last execution attempt. Another case (AQO not used), return an average value of execution time across all known executions. +(1 row) + +\df show_cardinality_errors + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+-------------------------+------------------------------------------------------------------------------------+---------------------+------ + public | show_cardinality_errors | TABLE(num bigint, id bigint, fshash bigint, error double precision, nexecs bigint) | controlled boolean | func +(1 row) + +\df show_execution_time + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+---------------------+----------------------------------------------------------------------------------------+---------------------+------ + public | show_execution_time | TABLE(num bigint, id bigint, fshash bigint, exec_time double precision, nexecs bigint) | controlled boolean | func +(1 row) + DROP EXTENSION aqo; diff --git a/expected/top_queries.out b/expected/top_queries.out index 19c57543..dc5ccb95 100644 --- a/expected/top_queries.out +++ b/expected/top_queries.out @@ -21,16 +21,32 @@ SELECT count(*) AS cnt FROM ttp WHERE cnt % 100 = 0; 0 (1 row) -SELECT num FROM top_time_queries(3); -NOTICE: Top 3 execution time queries +SELECT num FROM show_execution_time(true); -- Just for checking, return zero. + num +----- +(0 rows) + +SELECT num FROM show_execution_time(false); num ----- 1 2 (2 rows) +-- Without the AQO control queries with and without temp tables are logged. +SELECT query_text,nexecs +FROM show_execution_time(false) ce, aqo_query_texts aqt +WHERE ce.id = aqt.query_hash +ORDER BY (md5(query_text)); + query_text | nexecs +------------------------------------------------------+-------- + SELECT count(*) AS cnt FROM ttt WHERE cnt % 100 = 0; | 1 + SELECT count(*) AS cnt FROM ttp WHERE cnt % 100 = 0; | 1 +(2 rows) + -- --- num of query uses table t2 should be bigger than num of query uses table t1 and be the first +-- num of query which uses the table t2 should be bigger than num of query which +-- uses the table t1 and must be the first -- CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y FROM generate_series(1,1000) AS gs; @@ -68,3 +84,16 @@ SELECT count(*) FROM show_cardinality_errors(true); 0 (1 row) +-- Fix list of logged queries +SELECT query_text,nexecs +FROM show_cardinality_errors(false) ce, aqo_query_texts aqt +WHERE ce.id = aqt.query_hash +ORDER BY (md5(query_text)); + query_text | nexecs +------------------------------------------------------------------------------------------------+-------- + SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; | 1 + SELECT count(*) AS cnt FROM ttt WHERE cnt % 100 = 0; | 1 + SELECT count(*) AS cnt FROM ttp WHERE cnt % 100 = 0; | 1 + SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; | 1 +(4 rows) + diff --git a/expected/unsupported.out b/expected/unsupported.out index 56015cb7..01af838a 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -546,9 +546,9 @@ WHERE aqt.query_hash = cef.id ORDER BY (md5(query_text),error) DESC; error | query_text -----------+------------------------------------------------------------------------------------------------ - 3.00e+01 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; - 1.33e+00 | SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; - 1.15e+02 | SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + 7.68e-01 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; + 7.04e-02 | SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; + 1.42e+00 | SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; 0.00e+00 | SELECT * FROM + | (SELECT * FROM t WHERE x < 0) AS t0 + | JOIN + @@ -559,14 +559,14 @@ ORDER BY (md5(query_text),error) DESC; | SELECT count(*) FROM t WHERE + | x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR + | x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); - 3.00e+01 | SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; + 4.54e-01 | SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; 0.00e+00 | SELECT count(*) FROM ( + | SELECT count(*) AS x FROM ( + | SELECT count(*) FROM t1 GROUP BY (x,y) + | ) AS q1 + | ) AS q2 + | WHERE q2.x > 1; - 3.00e+01 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,x*y)) AS q1; + 7.68e-01 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,x*y)) AS q1; 0.00e+00 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t WHERE x = 1); 0.00e+00 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + | SELECT count(*) FROM t WHERE (SELECT avg(x) FROM t t0 WHERE t0.x = t.x) = + diff --git a/sql/gucs.sql b/sql/gucs.sql index a5c999a4..15269b95 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -13,5 +13,9 @@ EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) -- Check existence of the interface functions. SELECT obj_description('public.show_cardinality_errors'::regproc::oid); +SELECT obj_description('public.show_execution_time'::regproc::oid); + +\df show_cardinality_errors +\df show_execution_time DROP EXTENSION aqo; diff --git a/sql/top_queries.sql b/sql/top_queries.sql index 9f4c9074..11bebdc5 100755 --- a/sql/top_queries.sql +++ b/sql/top_queries.sql @@ -12,10 +12,18 @@ CREATE TEMP TABLE ttt AS SELECT count(*) AS cnt FROM generate_series(1,10); CREATE TABLE ttp AS SELECT count(*) AS cnt FROM generate_series(1,10); SELECT count(*) AS cnt FROM ttt WHERE cnt % 100 = 0; SELECT count(*) AS cnt FROM ttp WHERE cnt % 100 = 0; -SELECT num FROM top_time_queries(3); +SELECT num FROM show_execution_time(true); -- Just for checking, return zero. +SELECT num FROM show_execution_time(false); + +-- Without the AQO control queries with and without temp tables are logged. +SELECT query_text,nexecs +FROM show_execution_time(false) ce, aqo_query_texts aqt +WHERE ce.id = aqt.query_hash +ORDER BY (md5(query_text)); -- --- num of query uses table t2 should be bigger than num of query uses table t1 and be the first +-- num of query which uses the table t2 should be bigger than num of query which +-- uses the table t1 and must be the first -- CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y FROM generate_series(1,1000) AS gs; @@ -35,3 +43,9 @@ WHERE te.fshash = ( -- Should return zero SELECT count(*) FROM show_cardinality_errors(true); + +-- Fix list of logged queries +SELECT query_text,nexecs +FROM show_cardinality_errors(false) ce, aqo_query_texts aqt +WHERE ce.id = aqt.query_hash +ORDER BY (md5(query_text)); diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index 5bb14e9e..cedc101d 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -147,9 +147,12 @@ JOIN aqo_query_texts t ON (t.query_hash = v.id) WHERE v.error > 0."); note("\n Queries: \n $res \n"); +$res = $node->safe_psql('postgres', "SELECT * FROM public.show_execution_time(false) v"); +note("\n TIMES: \n $res \n"); + $res = $node->safe_psql('postgres', - "SELECT count(*) FROM top_time_queries(10) v - WHERE v.execution_time > 0."); + "SELECT count(*) FROM public.show_execution_time(false) v + WHERE v.exec_time > 0."); is($res, 3); # ############################################################################## From 28b3b0d57c0e70754b16b3cad9bfb7b3ad101e06 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Mon, 16 May 2022 16:10:04 +0500 Subject: [PATCH 078/203] Add into AQO a GUC on minimum number of joins threshold. If number of joins in a query less than this value - ignore this query. Also, rewrite (and rename) the aqo_drop routine. --- aqo--1.4--1.5.sql | 36 +++++++ aqo.c | 12 +++ aqo.h | 1 + expected/aqo_learn.out | 234 +++++++++++++++++++++++++++++++++++++++++ expected/gucs.out | 52 +++++++++ expected/schema.out | 10 +- postprocessing.c | 2 +- preprocessing.c | 63 +++++++++-- sql/aqo_learn.sql | 95 +++++++++++++++++ sql/gucs.sql | 13 +++ t/001_pgbench.pl | 3 +- 11 files changed, 506 insertions(+), 15 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index 907ed610..261d86e1 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -77,6 +77,7 @@ END; $$ LANGUAGE plpgsql; DROP FUNCTION public.top_time_queries; +DROP FUNCTION public.aqo_drop; -- -- Show execution time of queries, for which AQO has statistics. @@ -131,3 +132,38 @@ $$ LANGUAGE plpgsql; COMMENT ON FUNCTION public.show_execution_time(boolean) IS 'Get execution time of queries. If controlled = true (AQO could advise cardinality estimations), show time of last execution attempt. Another case (AQO not used), return an average value of execution time across all known executions.'; + +CREATE OR REPLACE FUNCTION public.aqo_drop_class(id bigint) +RETURNS integer AS $$ +DECLARE + fs bigint; + num integer; +BEGIN + IF (id = 0) THEN + raise EXCEPTION '[AQO] Cannot remove basic class %.', id; + END IF; + + SELECT fspace_hash FROM public.aqo_queries WHERE (query_hash = id) INTO fs; + + IF (fs IS NULL) THEN + raise WARNING '[AQO] Nothing to remove for the class %.', id; + RETURN 0; + END IF; + + IF (fs <> id) THEN + raise WARNING '[AQO] Removing query class has non-generic feature space value: id = %, fs = %.', id, fs; + END IF; + + SELECT count(*) FROM public.aqo_data WHERE fspace_hash = fs INTO num; + + /* + * Remove the only from aqo_queries table. All other data will be removed by + * CASCADE deletion. + */ + DELETE FROM public.aqo_queries WHERE query_hash = id; + RETURN num; +END; +$$ LANGUAGE plpgsql; + +COMMENT ON FUNCTION public.aqo_drop_class(bigint) IS +'Remove info about an query class from AQO ML knowledge base.'; \ No newline at end of file diff --git a/aqo.c b/aqo.c index 3e0210e8..2a9a680f 100644 --- a/aqo.c +++ b/aqo.c @@ -200,6 +200,18 @@ _PG_init(void) NULL ); + DefineCustomIntVariable("aqo.join_threshold", + "Sets the threshold of number of JOINs in query beyond which AQO is used.", + NULL, + &aqo_join_threshold, + 0, + 0, INT_MAX / 1000, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); + prev_shmem_startup_hook = shmem_startup_hook; shmem_startup_hook = aqo_init_shmem; prev_planner_hook = planner_hook; diff --git a/aqo.h b/aqo.h index 92db265b..3891e2d4 100644 --- a/aqo.h +++ b/aqo.h @@ -173,6 +173,7 @@ extern bool aqo_enabled; extern bool force_collect_stat; extern bool aqo_show_hash; extern bool aqo_show_details; +extern int aqo_join_threshold; /* * It is mostly needed for auto tuning of query. with auto tuning mode aqo diff --git a/expected/aqo_learn.out b/expected/aqo_learn.out index 088a5c60..10a0fecb 100644 --- a/expected/aqo_learn.out +++ b/expected/aqo_learn.out @@ -500,6 +500,240 @@ SELECT * FROM check_estimated_rows(' 17 | 17 (1 row) +-- Test limit on number of joins +SET aqo.mode = 'learn'; +SELECT * FROM aqo_drop_class(0); +ERROR: [AQO] Cannot remove basic class 0. +CONTEXT: PL/pgSQL function aqo_drop_class(bigint) line 7 at RAISE +SELECT * FROM aqo_drop_class(42); +WARNING: [AQO] Nothing to remove for the class 42. + aqo_drop_class +---------------- + 0 +(1 row) + +-- Remove all data from ML knowledge base +SELECT count(*) FROM ( +SELECT aqo_drop_class(q1.id::bigint) FROM ( + SELECT query_hash AS id + FROM aqo_queries WHERE query_hash <> 0) AS q1 +) AS q2; + count +------- + 7 +(1 row) + +SELECT count(*) FROM aqo_data; + count +------- + 0 +(1 row) + +SET aqo.join_threshold = 3; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); + estimated | actual +-----------+-------- + 20 | 20 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); + estimated | actual +-----------+-------- + 20 | 19 +(1 row) + +SELECT count(*) FROM aqo_data; -- Return 0 - do not learn on the queries above + count +------- + 0 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT * + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; +'); + estimated | actual +-----------+-------- + 20 | 17 +(1 row) + +SELECT count(*) FROM -- Learn on the query + (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 +; + count +------- + 1 +(1 row) + +SELECT query_text FROM aqo_query_texts WHERE query_hash <> 0; -- Check query + query_text +---------------------------------------------------------------------------- + explain analyze + + SELECT * + + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4+ + WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; + + +(1 row) + +SET aqo.join_threshold = 1; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); + estimated | actual +-----------+-------- + 20 | 20 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); + estimated | actual +-----------+-------- + 20 | 19 +(1 row) + +SELECT count(*) FROM + (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 +; -- Learn on a query with one join + count +------- + 2 +(1 row) + +SET aqo.join_threshold = 0; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); + estimated | actual +-----------+-------- + 20 | 20 +(1 row) + +SELECT count(*) FROM + (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 +; -- Learn on the query without any joins now + count +------- + 3 +(1 row) + +SET aqo.join_threshold = 1; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1 t1 JOIN aqo_test1 AS t2 USING (a)'); + estimated | actual +-----------+-------- + 20 | 20 +(1 row) + +SELECT count(*) FROM + (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 +; -- See one more query in the AQO knowledge base + count +------- + 4 +(1 row) + +SELECT * FROM check_estimated_rows('WITH selected AS (SELECT * FROM aqo_test1 t1) SELECT count(*) FROM selected'); + estimated | actual +-----------+-------- + 1 | 1 +(1 row) + +SELECT * FROM check_estimated_rows(' + WITH selected AS ( + SELECT * FROM aqo_test1 t1 JOIN aqo_test1 AS t2 USING (a) + ) SELECT count(*) FROM selected') +; + estimated | actual +-----------+-------- + 1 | 1 +(1 row) + +SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 + count +------- + 5 +(1 row) + +-- InitPlan +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1 WHERE t1.a IN ( + SELECT t2.a FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a) + )'); + estimated | actual +-----------+-------- + 20 | 19 +(1 row) + +SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 + count +------- + 6 +(1 row) + +-- SubPlan +SELECT * FROM check_estimated_rows(' + SELECT ( + SELECT avg(t2.a) FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a) AND (t2.a = t1.a) + ) FROM aqo_test1 AS t1; +'); + estimated | actual +-----------+-------- + 20 | 20 +(1 row) + +SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 + count +------- + 7 +(1 row) + +-- Subquery +SET aqo.join_threshold = 3; +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1, + (SELECT t2.a FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a)) q1 + WHERE q1.a*t1.a = t1.a + 15; +'); -- Two JOINs, ignore it + estimated | actual +-----------+-------- + 2 | 4 +(1 row) + +SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 + count +------- + 7 +(1 row) + +SET aqo.join_threshold = 2; +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1, + (SELECT t2.a FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a)) q1 + WHERE q1.a*t1.a = t1.a + 15; +'); -- One JOIN from subquery, another one from the query + estimated | actual +-----------+-------- + 2 | 4 +(1 row) + +SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 + count +------- + 8 +(1 row) + +SELECT * FROM check_estimated_rows(' + WITH selected AS ( + SELECT t2.a FROM aqo_test1 t1 JOIN aqo_test1 AS t2 USING (a) + ) SELECT count(*) FROM aqo_test1 t3, selected WHERE selected.a = t3.a') +; -- One JOIN extracted from CTE, another - from a FROM part of the query + estimated | actual +-----------+-------- + 1 | 1 +(1 row) + +SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 + count +------- + 9 +(1 row) + +RESET aqo.join_threshold; DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; diff --git a/expected/gucs.out b/expected/gucs.out index c56fc91a..53bcd24d 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -1,9 +1,48 @@ CREATE EXTENSION aqo; +-- Check interface variables and their default values. Detect, if default value +-- of a GUC is changed. +SHOW aqo.join_threshold; + aqo.join_threshold +-------------------- + 0 +(1 row) + +SHOW aqo.learn_statement_timeout; + aqo.learn_statement_timeout +----------------------------- + off +(1 row) + +SHOW aqo.show_hash; + aqo.show_hash +--------------- + off +(1 row) + +SHOW aqo.show_details; + aqo.show_details +------------------ + off +(1 row) + +SHOW aqo.force_collect_stat; + aqo.force_collect_stat +------------------------ + off +(1 row) + +SHOW aqo.mode; + aqo.mode +------------ + controlled +(1 row) + SET aqo.mode = 'learn'; SET aqo.show_details = true; CREATE TABLE t(x int); INSERT INTO t (x) (SELECT * FROM generate_series(1, 100) AS gs); ANALYZE t; +-- Check AQO addons to explain (the only stable data) EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT x FROM t; QUERY PLAN @@ -41,6 +80,12 @@ SELECT obj_description('public.show_execution_time'::regproc::oid); Get execution time of queries. If controlled = true (AQO could advise cardinality estimations), show time of last execution attempt. Another case (AQO not used), return an average value of execution time across all known executions. (1 row) +SELECT obj_description('public.aqo_drop_class'::regproc::oid); + obj_description +-------------------------------------------------------------- + Remove info about an query class from AQO ML knowledge base. +(1 row) + \df show_cardinality_errors List of functions Schema | Name | Result data type | Argument data types | Type @@ -55,4 +100,11 @@ SELECT obj_description('public.show_execution_time'::regproc::oid); public | show_execution_time | TABLE(num bigint, id bigint, fshash bigint, exec_time double precision, nexecs bigint) | controlled boolean | func (1 row) +\df aqo_drop_class + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+----------------+------------------+---------------------+------ + public | aqo_drop_class | integer | id bigint | func +(1 row) + DROP EXTENSION aqo; diff --git a/expected/schema.out b/expected/schema.out index 82ab68e8..aa048898 100644 --- a/expected/schema.out +++ b/expected/schema.out @@ -25,20 +25,18 @@ SELECT * FROM test; -- Exclude fields with hash values from the queries. Hash is depend on -- nodefuncs code which is highly PostgreSQL version specific. SELECT query_text FROM public.aqo_query_texts; - query_text --------------------------------------------- + query_text +--------------------------------------- COMMON feature space (do not delete!) - INSERT INTO test (data) VALUES ('string'); SELECT * FROM test; -(3 rows) +(2 rows) SELECT learn_aqo, use_aqo, auto_tuning FROM public.aqo_queries; learn_aqo | use_aqo | auto_tuning -----------+---------+------------- f | f | f t | f | t - t | f | t -(3 rows) +(2 rows) DROP SCHEMA IF EXISTS test1 CASCADE; NOTICE: drop cascades to 2 other objects diff --git a/postprocessing.c b/postprocessing.c index fcf820bf..55350049 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -873,7 +873,7 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) automatical_query_tuning(query_context.query_hash, stat); /* Write AQO statistics to the aqo_query_stat table */ - update_aqo_stat(query_context.fspace_hash, stat); + update_aqo_stat(query_context.query_hash, stat); pfree_query_stat(stat); } diff --git a/preprocessing.c b/preprocessing.c index d768104d..2c70584f 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -70,6 +70,8 @@ /* List of feature spaces, that are processing in this backend. */ List *cur_classes = NIL; +int aqo_join_threshold = 0; + static bool isQueryUsingSystemRelation(Query *query); static bool isQueryUsingSystemRelation_walker(Node *node, void *context); @@ -366,6 +368,12 @@ disable_aqo_for_query(void) query_context.planning_time = -1.; } +typedef struct AQOPreWalkerCtx +{ + bool trivQuery; + int njoins; +} AQOPreWalkerCtx; + /* * Examine a fully-parsed query, and return TRUE iff any relation underlying * the query is a system relation or no one relation touched by the query. @@ -373,12 +381,14 @@ disable_aqo_for_query(void) static bool isQueryUsingSystemRelation(Query *query) { - bool trivQuery = true; + AQOPreWalkerCtx ctx; bool result; - result = isQueryUsingSystemRelation_walker((Node *) query, &trivQuery); + ctx.trivQuery = true; + ctx.njoins = 0; + result = isQueryUsingSystemRelation_walker((Node *) query, &ctx); - if (result || trivQuery) + if (result || ctx.trivQuery || ctx.njoins < aqo_join_threshold) return true; return false; } @@ -399,16 +409,53 @@ IsAQORelation(Relation rel) return false; } +/* + * Walk through jointree and calculate number of potential joins + */ +static void +jointree_walker(Node *jtnode, void *context) +{ + AQOPreWalkerCtx *ctx = (AQOPreWalkerCtx *) context; + + if (jtnode == NULL || IsA(jtnode, RangeTblRef)) + return; + else if (IsA(jtnode, FromExpr)) + { + FromExpr *f = (FromExpr *) jtnode; + ListCell *l; + + /* Count number of potential joins by number of sources in FROM list */ + ctx->njoins += list_length(f->fromlist) - 1; + + foreach(l, f->fromlist) + jointree_walker(lfirst(l), context); + } + else if (IsA(jtnode, JoinExpr)) + { + JoinExpr *j = (JoinExpr *) jtnode; + + /* Don't forget about explicit JOIN statement */ + ctx->njoins++; + jointree_walker(j->larg, context); + jointree_walker(j->rarg, context); + } + else + elog(ERROR, "unrecognized node type: %d", (int) nodeTag(jtnode)); + return; +} + static bool isQueryUsingSystemRelation_walker(Node *node, void *context) { + AQOPreWalkerCtx *ctx = (AQOPreWalkerCtx *) context; + if (node == NULL) return false; if (IsA(node, Query)) { - Query *query = (Query *) node; - ListCell *rtable; + Query *query = (Query *) node; + ListCell *rtable; foreach(rtable, query->rtable) { @@ -419,13 +466,12 @@ isQueryUsingSystemRelation_walker(Node *node, void *context) Relation rel = table_open(rte->relid, AccessShareLock); bool is_catalog = IsCatalogRelation(rel); bool is_aqo_rel = IsAQORelation(rel); - bool *trivQuery = (bool *) context; table_close(rel, AccessShareLock); if (is_catalog || is_aqo_rel) return true; - *trivQuery = false; + ctx->trivQuery = false; } else if (rte->rtekind == RTE_FUNCTION) { @@ -435,6 +481,9 @@ isQueryUsingSystemRelation_walker(Node *node, void *context) } } + jointree_walker((Node *) query->jointree, context); + + /* Recursively plunge into subqueries and CTEs */ return query_tree_walker(query, isQueryUsingSystemRelation_walker, context, diff --git a/sql/aqo_learn.sql b/sql/aqo_learn.sql index 139daf14..6ff77c43 100644 --- a/sql/aqo_learn.sql +++ b/sql/aqo_learn.sql @@ -204,6 +204,101 @@ SELECT * FROM check_estimated_rows(' WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; '); +-- Test limit on number of joins +SET aqo.mode = 'learn'; + +SELECT * FROM aqo_drop_class(0); +SELECT * FROM aqo_drop_class(42); + +-- Remove all data from ML knowledge base +SELECT count(*) FROM ( +SELECT aqo_drop_class(q1.id::bigint) FROM ( + SELECT query_hash AS id + FROM aqo_queries WHERE query_hash <> 0) AS q1 +) AS q2; +SELECT count(*) FROM aqo_data; + +SET aqo.join_threshold = 3; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); +SELECT count(*) FROM aqo_data; -- Return 0 - do not learn on the queries above + +SELECT * FROM check_estimated_rows(' + SELECT * + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; +'); +SELECT count(*) FROM -- Learn on the query + (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 +; +SELECT query_text FROM aqo_query_texts WHERE query_hash <> 0; -- Check query + +SET aqo.join_threshold = 1; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); +SELECT count(*) FROM + (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 +; -- Learn on a query with one join + +SET aqo.join_threshold = 0; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); +SELECT count(*) FROM + (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 +; -- Learn on the query without any joins now + +SET aqo.join_threshold = 1; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1 t1 JOIN aqo_test1 AS t2 USING (a)'); +SELECT count(*) FROM + (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 +; -- See one more query in the AQO knowledge base + +SELECT * FROM check_estimated_rows('WITH selected AS (SELECT * FROM aqo_test1 t1) SELECT count(*) FROM selected'); +SELECT * FROM check_estimated_rows(' + WITH selected AS ( + SELECT * FROM aqo_test1 t1 JOIN aqo_test1 AS t2 USING (a) + ) SELECT count(*) FROM selected') +; +SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 + +-- InitPlan +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1 WHERE t1.a IN ( + SELECT t2.a FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a) + )'); +SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 + +-- SubPlan +SELECT * FROM check_estimated_rows(' + SELECT ( + SELECT avg(t2.a) FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a) AND (t2.a = t1.a) + ) FROM aqo_test1 AS t1; +'); +SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 + +-- Subquery +SET aqo.join_threshold = 3; +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1, + (SELECT t2.a FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a)) q1 + WHERE q1.a*t1.a = t1.a + 15; +'); -- Two JOINs, ignore it +SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 +SET aqo.join_threshold = 2; +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1, + (SELECT t2.a FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a)) q1 + WHERE q1.a*t1.a = t1.a + 15; +'); -- One JOIN from subquery, another one from the query +SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 + +SELECT * FROM check_estimated_rows(' + WITH selected AS ( + SELECT t2.a FROM aqo_test1 t1 JOIN aqo_test1 AS t2 USING (a) + ) SELECT count(*) FROM aqo_test1 t3, selected WHERE selected.a = t3.a') +; -- One JOIN extracted from CTE, another - from a FROM part of the query +SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 + +RESET aqo.join_threshold; DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; diff --git a/sql/gucs.sql b/sql/gucs.sql index 15269b95..6fd8e9ea 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -1,4 +1,14 @@ CREATE EXTENSION aqo; + +-- Check interface variables and their default values. Detect, if default value +-- of a GUC is changed. +SHOW aqo.join_threshold; +SHOW aqo.learn_statement_timeout; +SHOW aqo.show_hash; +SHOW aqo.show_details; +SHOW aqo.force_collect_stat; +SHOW aqo.mode; + SET aqo.mode = 'learn'; SET aqo.show_details = true; @@ -6,6 +16,7 @@ CREATE TABLE t(x int); INSERT INTO t (x) (SELECT * FROM generate_series(1, 100) AS gs); ANALYZE t; +-- Check AQO addons to explain (the only stable data) EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT x FROM t; EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) @@ -14,8 +25,10 @@ EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) -- Check existence of the interface functions. SELECT obj_description('public.show_cardinality_errors'::regproc::oid); SELECT obj_description('public.show_execution_time'::regproc::oid); +SELECT obj_description('public.aqo_drop_class'::regproc::oid); \df show_cardinality_errors \df show_execution_time +\df aqo_drop_class DROP EXTENSION aqo; diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index cedc101d..39bd1a9b 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -111,7 +111,8 @@ $stat_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_texts"); # This constants looks like magic numbers. But query set of the pgbench test # is fixed for a long time. -is( (($fs_count == 7) and ($fs_samples_count == 6) and ($stat_count == 7)), 1); +note("fs: $fs_count, $fs_samples_count, $stat_count"); +is( (($fs_count == 6) and ($fs_samples_count == 5) and ($stat_count == 6)), 1); my $analytics = File::Temp->new(); append_to_file($analytics, q{ From 3e11fac11572b1f7bbbefc9fc59adb08aa6b3f3c Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 18 May 2022 15:24:03 +0500 Subject: [PATCH 079/203] By default, feature space should be equal to query_hash: minor fix and a set of regression tests. --- expected/aqo_disabled.out | 78 ++++++++++++++++++++++++++++++++++++++- preprocessing.c | 11 ++---- sql/aqo_disabled.sql | 25 ++++++++++++- 3 files changed, 104 insertions(+), 10 deletions(-) diff --git a/expected/aqo_disabled.out b/expected/aqo_disabled.out index 3162fa6a..56f46f05 100644 --- a/expected/aqo_disabled.out +++ b/expected/aqo_disabled.out @@ -16,6 +16,59 @@ AS ( ) INSERT INTO aqo_test1 (SELECT * FROM t); CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; +CREATE EXTENSION aqo; +SET aqo.mode = 'controlled'; +CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 +WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; +SELECT count(*) FROM tmp1; + count +------- + 3 +(1 row) + +DROP TABLE tmp1; +CREATE TABLE tmp1 AS SELECT t1.a, t2.b, t3.c +FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 +WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; +SELECT count(*) FROM tmp1; + count +------- + 0 +(1 row) + +DROP TABLE tmp1; +EXPLAIN SELECT * FROM aqo_test0 +WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; + QUERY PLAN +---------------------------------------------------------------------------------- + Index Scan using aqo_test0_idx_a on aqo_test0 (cost=0.28..8.35 rows=1 width=16) + Index Cond: (a < 3) + Filter: ((b < 3) AND (c < 3) AND (d < 3)) +(3 rows) + +EXPLAIN SELECT t1.a, t2.b, t3.c +FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 +WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; + QUERY PLAN +------------------------------------------------------------------------------------------------ + Nested Loop (cost=0.28..50.59 rows=1 width=12) + Join Filter: (t1.b = t3.b) + -> Nested Loop (cost=0.28..9.56 rows=1 width=12) + -> Seq Scan on aqo_test1 t1 (cost=0.00..1.25 rows=1 width=8) + Filter: (a < 1) + -> Index Scan using aqo_test0_idx_a on aqo_test0 t2 (cost=0.28..8.30 rows=1 width=8) + Index Cond: (a = t1.a) + Filter: (c < 1) + -> Seq Scan on aqo_test0 t3 (cost=0.00..41.02 rows=1 width=8) + Filter: ((b < 1) AND (d < 0)) +(10 rows) + +SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero + count +------- + 0 +(1 row) + SET aqo.mode = 'disabled'; CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -62,7 +115,12 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) -CREATE EXTENSION aqo; +SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero + count +------- + 0 +(1 row) + SET aqo.mode = 'intelligent'; CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -83,6 +141,12 @@ SELECT count(*) FROM tmp1; (1 row) DROP TABLE tmp1; +SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero + count +------- + 0 +(1 row) + SET aqo.mode = 'controlled'; UPDATE aqo_queries SET learn_aqo = true, use_aqo = true, auto_tuning = false; EXPLAIN SELECT * FROM aqo_test0 @@ -111,6 +175,12 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) +SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero + count +------- + 0 +(1 row) + SET aqo.mode = 'disabled'; EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -138,6 +208,12 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) +SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero + count +------- + 0 +(1 row) + DROP EXTENSION aqo; DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; diff --git a/preprocessing.c b/preprocessing.c index 2c70584f..cc438fae 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -172,6 +172,9 @@ aqo_planner(Query *parse, selectivity_cache_clear(); query_context.query_hash = get_query_hash(parse, query_string); + /* By default, they should be equal */ + query_context.fspace_hash = query_context.query_hash; + if (query_is_deactivated(query_context.query_hash) || list_member_uint64(cur_classes,query_context.query_hash)) { @@ -211,7 +214,6 @@ aqo_planner(Query *parse, query_context.adding_query = true; query_context.learn_aqo = true; query_context.use_aqo = false; - query_context.fspace_hash = query_context.query_hash; query_context.auto_tuning = true; query_context.collect_stat = true; break; @@ -220,7 +222,7 @@ aqo_planner(Query *parse, query_context.learn_aqo = true; query_context.use_aqo = true; query_context.auto_tuning = false; - query_context.fspace_hash = 0; + query_context.fspace_hash = 0; /* Use common feature space */ query_context.collect_stat = false; break; case AQO_MODE_CONTROLLED: @@ -239,7 +241,6 @@ aqo_planner(Query *parse, query_context.adding_query = true; query_context.learn_aqo = true; query_context.use_aqo = true; - query_context.fspace_hash = query_context.query_hash; query_context.auto_tuning = false; query_context.collect_stat = true; break; @@ -287,7 +288,6 @@ aqo_planner(Query *parse, * suppressed manually) and collect stats. */ query_context.collect_stat = true; - query_context.fspace_hash = query_context.query_hash; break; case AQO_MODE_INTELLIGENT: @@ -331,14 +331,11 @@ aqo_planner(Query *parse, } if (force_collect_stat) - { /* * If this GUC is set, AQO will analyze query results and collect * query execution statistics in any mode. */ query_context.collect_stat = true; - query_context.fspace_hash = query_context.query_hash; - } if (!IsQueryDisabled()) /* It's good place to set timestamp of start of a planning process. */ diff --git a/sql/aqo_disabled.sql b/sql/aqo_disabled.sql index 7d755be9..9c232a56 100644 --- a/sql/aqo_disabled.sql +++ b/sql/aqo_disabled.sql @@ -17,6 +17,25 @@ AS ( ) INSERT INTO aqo_test1 (SELECT * FROM t); CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; +CREATE EXTENSION aqo; + +SET aqo.mode = 'controlled'; + +CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 +WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; +SELECT count(*) FROM tmp1; +DROP TABLE tmp1; +CREATE TABLE tmp1 AS SELECT t1.a, t2.b, t3.c +FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 +WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; +SELECT count(*) FROM tmp1; +DROP TABLE tmp1; +EXPLAIN SELECT * FROM aqo_test0 +WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; +EXPLAIN SELECT t1.a, t2.b, t3.c +FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 +WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; +SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero SET aqo.mode = 'disabled'; @@ -38,8 +57,7 @@ EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; -CREATE EXTENSION aqo; - +SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero SET aqo.mode = 'intelligent'; CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 @@ -53,6 +71,7 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = SELECT count(*) FROM tmp1; DROP TABLE tmp1; +SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero SET aqo.mode = 'controlled'; UPDATE aqo_queries SET learn_aqo = true, use_aqo = true, auto_tuning = false; @@ -64,6 +83,7 @@ EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; +SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero SET aqo.mode = 'disabled'; EXPLAIN SELECT * FROM aqo_test0 @@ -72,6 +92,7 @@ WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; +SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero DROP EXTENSION aqo; From a7be15f3dd0d51544a6082e9b6b9a6507d998549 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 18 May 2022 16:47:52 +0500 Subject: [PATCH 080/203] Rework the cleanup routine in AQO interface. Allow user to know how many records were removed during this procedure. --- aqo--1.4--1.5.sql | 44 ++++++++++++++++++++++++++++++++++++- expected/aqo_learn.out | 8 +++---- expected/clean_aqo_data.out | 40 ++++++++++++++++----------------- expected/gucs.out | 13 +++++++++++ expected/temp_tables.out | 24 ++++++++++---------- expected/unsupported.out | 8 +++---- sql/aqo_learn.sql | 2 +- sql/clean_aqo_data.sql | 10 ++++----- sql/gucs.sql | 2 ++ sql/temp_tables.sql | 6 ++--- sql/unsupported.sql | 2 +- t/001_pgbench.pl | 2 +- 12 files changed, 109 insertions(+), 52 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index 261d86e1..c10ec921 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -78,6 +78,7 @@ $$ LANGUAGE plpgsql; DROP FUNCTION public.top_time_queries; DROP FUNCTION public.aqo_drop; +DROP FUNCTION public.clean_aqo_data; -- -- Show execution time of queries, for which AQO has statistics. @@ -166,4 +167,45 @@ END; $$ LANGUAGE plpgsql; COMMENT ON FUNCTION public.aqo_drop_class(bigint) IS -'Remove info about an query class from AQO ML knowledge base.'; \ No newline at end of file +'Remove info about an query class from AQO ML knowledge base.'; + +-- +-- Remove unneeded rows from the AQO ML storage. +-- For common feature space, remove rows from aqo_data only. +-- For custom feature space - remove all rows related to the space from all AQO +-- tables even if only one oid for one feature subspace of the space is illegal. +-- Returns number of deleted rows from aqo_queries and aqo_data tables. +-- +CREATE OR REPLACE FUNCTION public.aqo_cleanup(OUT nfs integer, OUT nfss integer) +AS $$ +DECLARE + fs bigint; + fss integer; +BEGIN + -- Save current number of rows + SELECT count(*) FROM aqo_queries INTO nfs; + SELECT count(*) FROM aqo_data INTO nfss; + + FOR fs,fss IN SELECT q1.fs,q1.fss FROM ( + SELECT fspace_hash fs, fsspace_hash fss, unnest(oids) AS reloid + FROM aqo_data) AS q1 + WHERE q1.reloid NOT IN (SELECT oid FROM pg_class) + GROUP BY (q1.fs,q1.fss) + LOOP + IF (fs = 0) THEN + DELETE FROM aqo_data WHERE fsspace_hash = fss; + continue; + END IF; + + -- Remove ALL feature space if one of oids isn't exists + DELETE FROM aqo_queries WHERE fspace_hash = fs; + END LOOP; + + -- Calculate difference with previous state of knowledge base + nfs := nfs - (SELECT count(*) FROM aqo_queries); + nfss := nfss - (SELECT count(*) FROM aqo_data); +END; +$$ LANGUAGE plpgsql; + +COMMENT ON FUNCTION public.aqo_cleanup() IS +'Remove unneeded rows from the AQO ML storage'; diff --git a/expected/aqo_learn.out b/expected/aqo_learn.out index 10a0fecb..672d752d 100644 --- a/expected/aqo_learn.out +++ b/expected/aqo_learn.out @@ -235,10 +235,10 @@ SELECT count(*) FROM tmp1; (1 row) -- Remove data on some unneeded instances of tmp1 table. -SELECT public.clean_aqo_data(); - clean_aqo_data ----------------- - +SELECT public.aqo_cleanup(); + aqo_cleanup +------------- + (9,18) (1 row) -- Result of the query below should be empty diff --git a/expected/clean_aqo_data.out b/expected/clean_aqo_data.out index acee95bd..cf75839a 100644 --- a/expected/clean_aqo_data.out +++ b/expected/clean_aqo_data.out @@ -10,10 +10,10 @@ SELECT * FROM a; (0 rows) SELECT 'a'::regclass::oid AS a_oid \gset -SELECT clean_aqo_data(); - clean_aqo_data ----------------- - +SELECT aqo_cleanup(); + aqo_cleanup +------------- + (0,0) (1 row) /* @@ -53,10 +53,10 @@ SELECT count(*) FROM aqo_query_stat WHERE (1 row) DROP TABLE a; -SELECT clean_aqo_data(); - clean_aqo_data ----------------- - +SELECT aqo_cleanup(); + aqo_cleanup +------------- + (1,1) (1 row) /* @@ -107,10 +107,10 @@ SELECT 'a'::regclass::oid AS a_oid \gset -- add manually line with different fspace_hash and query_hash to aqo_queries INSERT INTO aqo_queries VALUES (:a_oid + 1, 't', 't', :a_oid, 'f'); DROP TABLE a; -SELECT clean_aqo_data(); - clean_aqo_data ----------------- - +SELECT aqo_cleanup(); + aqo_cleanup +------------- + (1,1) (1 row) -- this line should remain @@ -196,10 +196,10 @@ SELECT count(*) FROM aqo_query_stat WHERE (1 row) DROP TABLE a; -SELECT clean_aqo_data(); - clean_aqo_data ----------------- - +SELECT aqo_cleanup(); + aqo_cleanup +------------- + (2,4) (1 row) /* @@ -274,10 +274,10 @@ SELECT count(*) FROM aqo_query_stat WHERE (1 row) DROP TABLE b; -SELECT clean_aqo_data(); - clean_aqo_data ----------------- - +SELECT aqo_cleanup(); + aqo_cleanup +------------- + (1,1) (1 row) -- lines corresponding to b_oid in theese tables deleted diff --git a/expected/gucs.out b/expected/gucs.out index 53bcd24d..d76a45c6 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -86,6 +86,12 @@ SELECT obj_description('public.aqo_drop_class'::regproc::oid); Remove info about an query class from AQO ML knowledge base. (1 row) +SELECT obj_description('public.aqo_cleanup'::regproc::oid); + obj_description +---------------------------------------------- + Remove unneeded rows from the AQO ML storage +(1 row) + \df show_cardinality_errors List of functions Schema | Name | Result data type | Argument data types | Type @@ -107,4 +113,11 @@ SELECT obj_description('public.aqo_drop_class'::regproc::oid); public | aqo_drop_class | integer | id bigint | func (1 row) +\df aqo_cleanup + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+-------------+------------------+-----------------------------------+------ + public | aqo_cleanup | record | OUT nfs integer, OUT nfss integer | func +(1 row) + DROP EXTENSION aqo; diff --git a/expected/temp_tables.out b/expected/temp_tables.out index daf2602f..e71ea09e 100644 --- a/expected/temp_tables.out +++ b/expected/temp_tables.out @@ -46,10 +46,10 @@ SELECT count(*) FROM aqo_data; (1 row) DROP TABLE tt; -SELECT clean_aqo_data(); - clean_aqo_data ----------------- - +SELECT aqo_cleanup(); + aqo_cleanup +------------- + (0,0) (1 row) SELECT count(*) FROM aqo_data; -- Should be the same as above @@ -59,10 +59,10 @@ SELECT count(*) FROM aqo_data; -- Should be the same as above (1 row) DROP TABLE pt; -SELECT clean_aqo_data(); - clean_aqo_data ----------------- - +SELECT aqo_cleanup(); + aqo_cleanup +------------- + (3,10) (1 row) SELECT count(*) FROM aqo_data; -- Should be 0 @@ -133,10 +133,10 @@ SELECT * FROM check_estimated_rows(' SET aqo.mode = 'forced'; -- Now we use all fss records for each query DROP TABLE pt; -SELECT clean_aqo_data(); - clean_aqo_data ----------------- - +SELECT aqo_cleanup(); + aqo_cleanup +------------- + (2,6) (1 row) CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; diff --git a/expected/unsupported.out b/expected/unsupported.out index 01af838a..8710a565 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -579,10 +579,10 @@ ORDER BY (md5(query_text),error) DESC; (12 rows) DROP TABLE t,t1 CASCADE; -SELECT public.clean_aqo_data(); - clean_aqo_data ----------------- - +SELECT public.aqo_cleanup(); + aqo_cleanup +------------- + (12,42) (1 row) -- Look for any remaining queries in the ML storage. diff --git a/sql/aqo_learn.sql b/sql/aqo_learn.sql index 6ff77c43..ed5c1ed9 100644 --- a/sql/aqo_learn.sql +++ b/sql/aqo_learn.sql @@ -123,7 +123,7 @@ WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; SELECT count(*) FROM tmp1; -- Remove data on some unneeded instances of tmp1 table. -SELECT public.clean_aqo_data(); +SELECT public.aqo_cleanup(); -- Result of the query below should be empty SELECT * FROM aqo_query_texts aqt1, aqo_query_texts aqt2 diff --git a/sql/clean_aqo_data.sql b/sql/clean_aqo_data.sql index acd64b16..509071a1 100644 --- a/sql/clean_aqo_data.sql +++ b/sql/clean_aqo_data.sql @@ -6,7 +6,7 @@ DROP TABLE IF EXISTS b; CREATE TABLE a(); SELECT * FROM a; SELECT 'a'::regclass::oid AS a_oid \gset -SELECT clean_aqo_data(); +SELECT aqo_cleanup(); /* * lines with a_oid in aqo_data, @@ -26,7 +26,7 @@ SELECT count(*) FROM aqo_query_stat WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); DROP TABLE a; -SELECT clean_aqo_data(); +SELECT aqo_cleanup(); /* * lines with a_oid in aqo_data, @@ -54,7 +54,7 @@ SELECT 'a'::regclass::oid AS a_oid \gset -- add manually line with different fspace_hash and query_hash to aqo_queries INSERT INTO aqo_queries VALUES (:a_oid + 1, 't', 't', :a_oid, 'f'); DROP TABLE a; -SELECT clean_aqo_data(); +SELECT aqo_cleanup(); -- this line should remain SELECT count(*) FROM aqo_queries WHERE (fspace_hash = :a_oid AND query_hash = :a_oid + 1); @@ -88,7 +88,7 @@ SELECT count(*) FROM aqo_query_stat WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); DROP TABLE a; -SELECT clean_aqo_data(); +SELECT aqo_cleanup(); /* * lines corresponding to a_oid and both a_oid's fspace_hash deleted in aqo_data, @@ -124,7 +124,7 @@ SELECT count(*) FROM aqo_query_stat WHERE aqo_queries.fspace_hash = aqo_queries.query_hash); DROP TABLE b; -SELECT clean_aqo_data(); +SELECT aqo_cleanup(); -- lines corresponding to b_oid in theese tables deleted SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); diff --git a/sql/gucs.sql b/sql/gucs.sql index 6fd8e9ea..63d18418 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -26,9 +26,11 @@ EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT obj_description('public.show_cardinality_errors'::regproc::oid); SELECT obj_description('public.show_execution_time'::regproc::oid); SELECT obj_description('public.aqo_drop_class'::regproc::oid); +SELECT obj_description('public.aqo_cleanup'::regproc::oid); \df show_cardinality_errors \df show_execution_time \df aqo_drop_class +\df aqo_cleanup DROP EXTENSION aqo; diff --git a/sql/temp_tables.sql b/sql/temp_tables.sql index cd24a051..2ca22de0 100644 --- a/sql/temp_tables.sql +++ b/sql/temp_tables.sql @@ -16,10 +16,10 @@ SELECT count(*) FROM pt AS pt1, tt AS tt1, tt AS tt2, pt AS pt2; SELECT count(*) FROM aqo_data; DROP TABLE tt; -SELECT clean_aqo_data(); +SELECT aqo_cleanup(); SELECT count(*) FROM aqo_data; -- Should be the same as above DROP TABLE pt; -SELECT clean_aqo_data(); +SELECT aqo_cleanup(); SELECT count(*) FROM aqo_data; -- Should be 0 SELECT query_text FROM aqo_queries aq LEFT JOIN aqo_query_texts aqt ON aq.query_hash = aqt.query_hash @@ -66,7 +66,7 @@ SELECT * FROM check_estimated_rows(' SET aqo.mode = 'forced'; -- Now we use all fss records for each query DROP TABLE pt; -SELECT clean_aqo_data(); +SELECT aqo_cleanup(); CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; CREATE TEMP TABLE ttd1 AS SELECT -(x*3) AS x, (x % 9) AS y1 FROM generate_series(1,100) AS x; diff --git a/sql/unsupported.sql b/sql/unsupported.sql index 5168079c..cb986d26 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -163,7 +163,7 @@ ORDER BY (md5(query_text),error) DESC; DROP TABLE t,t1 CASCADE; -SELECT public.clean_aqo_data(); +SELECT public.aqo_cleanup(); -- Look for any remaining queries in the ML storage. SELECT to_char(error, '9.99EEEE')::text AS error, query_text diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index 39bd1a9b..1cf4bce8 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -272,7 +272,7 @@ pgbench_history CASCADE;"); # Clean unneeded AQO knowledge -$node->safe_psql('postgres', "SELECT clean_aqo_data()"); +$node->safe_psql('postgres', "SELECT public.aqo_cleanup()"); # Calculate total number of rows in AQO-related tables. my $new_fs_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_queries;"); From 328cb273c2c2076806f2177ca8f906ce86a6323c Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Thu, 19 May 2022 00:04:57 +0500 Subject: [PATCH 081/203] First step of the AQO UI modifying. Remove some dubious functions. After this commit, UI of AQO should be consistent with content of wiki page: https://github.com/postgrespro/aqo/wiki/User-Interface --- aqo--1.4--1.5.sql | 131 +++++++++++++++++++++++++++++++++------ expected/gucs.out | 41 +++++++----- expected/top_queries.out | 12 ++-- expected/unsupported.out | 6 +- sql/gucs.sql | 14 +++-- sql/top_queries.sql | 12 ++-- sql/unsupported.sql | 6 +- t/001_pgbench.pl | 10 +-- 8 files changed, 169 insertions(+), 63 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index c10ec921..7bdf34cd 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -10,13 +10,13 @@ -- DROP TABLE public.aqo_data CASCADE; CREATE TABLE public.aqo_data ( - fspace_hash bigint NOT NULL REFERENCES public.aqo_queries ON DELETE CASCADE, - fsspace_hash int NOT NULL, - nfeatures int NOT NULL, - features double precision[][], - targets double precision[], - oids oid [] DEFAULT NULL, - reliability double precision [] + fspace_hash bigint NOT NULL REFERENCES public.aqo_queries ON DELETE CASCADE, + fsspace_hash int NOT NULL, + nfeatures int NOT NULL, + features double precision[][], + targets double precision[], + oids oid [] DEFAULT NULL, + reliability double precision [] ); CREATE UNIQUE INDEX aqo_fss_access_idx ON public.aqo_data (fspace_hash, fsspace_hash); @@ -79,6 +79,11 @@ $$ LANGUAGE plpgsql; DROP FUNCTION public.top_time_queries; DROP FUNCTION public.aqo_drop; DROP FUNCTION public.clean_aqo_data; +DROP FUNCTION public.show_cardinality_errors; +DROP FUNCTION array_mse; +DROP FUNCTION array_avg; +DROP FUNCTION public.aqo_ne_queries; -- Not needed anymore due to changing in the logic +DROP FUNCTION public.aqo_clear_hist; -- Should be renamed and reworked -- -- Show execution time of queries, for which AQO has statistics. @@ -86,7 +91,7 @@ DROP FUNCTION public.clean_aqo_data; -- estimations, or not used (controlled = false). -- Last case is possible in disabled mode with aqo.force_collect_stat = 'on'. -- -CREATE OR REPLACE FUNCTION public.show_execution_time(controlled boolean) +CREATE OR REPLACE FUNCTION public.aqo_execution_time(controlled boolean) RETURNS TABLE(num bigint, id bigint, fshash bigint, exec_time float, nexecs bigint) AS $$ BEGIN @@ -120,7 +125,7 @@ ELSE SELECT aq.query_hash AS queryid, aq.fspace_hash AS fs_hash, - array_avg(execution_time_without_aqo) AS exectime, + (SELECT AVG(t) FROM unnest(execution_time_without_aqo) t) AS exectime, executions_without_aqo AS execs FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs ON aq.query_hash = aqs.query_hash @@ -131,28 +136,31 @@ END IF; END; $$ LANGUAGE plpgsql; -COMMENT ON FUNCTION public.show_execution_time(boolean) IS +COMMENT ON FUNCTION public.aqo_execution_time(boolean) IS 'Get execution time of queries. If controlled = true (AQO could advise cardinality estimations), show time of last execution attempt. Another case (AQO not used), return an average value of execution time across all known executions.'; -CREATE OR REPLACE FUNCTION public.aqo_drop_class(id bigint) +-- +-- Remove all information about a query class from AQO storage. +-- +CREATE OR REPLACE FUNCTION public.aqo_drop_class(queryid bigint) RETURNS integer AS $$ DECLARE fs bigint; num integer; BEGIN - IF (id = 0) THEN - raise EXCEPTION '[AQO] Cannot remove basic class %.', id; + IF (queryid = 0) THEN + raise EXCEPTION '[AQO] Cannot remove basic class %.', queryid; END IF; - SELECT fspace_hash FROM public.aqo_queries WHERE (query_hash = id) INTO fs; + SELECT fspace_hash FROM public.aqo_queries WHERE (query_hash = queryid) INTO fs; IF (fs IS NULL) THEN - raise WARNING '[AQO] Nothing to remove for the class %.', id; + raise WARNING '[AQO] Nothing to remove for the class %.', queryid; RETURN 0; END IF; - IF (fs <> id) THEN - raise WARNING '[AQO] Removing query class has non-generic feature space value: id = %, fs = %.', id, fs; + IF (fs <> queryid) THEN + raise WARNING '[AQO] Removing query class has non-generic feature space value: id = %, fs = %.', queryid, fs; END IF; SELECT count(*) FROM public.aqo_data WHERE fspace_hash = fs INTO num; @@ -161,7 +169,7 @@ BEGIN * Remove the only from aqo_queries table. All other data will be removed by * CASCADE deletion. */ - DELETE FROM public.aqo_queries WHERE query_hash = id; + DELETE FROM public.aqo_queries WHERE query_hash = queryid; RETURN num; END; $$ LANGUAGE plpgsql; @@ -179,8 +187,8 @@ COMMENT ON FUNCTION public.aqo_drop_class(bigint) IS CREATE OR REPLACE FUNCTION public.aqo_cleanup(OUT nfs integer, OUT nfss integer) AS $$ DECLARE - fs bigint; - fss integer; + fs bigint; + fss integer; BEGIN -- Save current number of rows SELECT count(*) FROM aqo_queries INTO nfs; @@ -209,3 +217,86 @@ $$ LANGUAGE plpgsql; COMMENT ON FUNCTION public.aqo_cleanup() IS 'Remove unneeded rows from the AQO ML storage'; + +-- +-- Get cardinality error of queries the last time they were executed. +-- IN: +-- controlled - show queries executed under a control of AQO (true); +-- executed without an AQO control, but AQO has a stat on the query (false). +-- +-- OUT: +-- num - sequental number. Smaller number corresponds to higher error. +-- id - ID of a query. +-- fshash - feature space. Usually equal to zero or ID. +-- error - AQO error that calculated on plan nodes of the query. +-- nexecs - number of executions of queries associated with this ID. +-- +CREATE OR REPLACE FUNCTION public.aqo_cardinality_error(controlled boolean) +RETURNS TABLE(num bigint, id bigint, fshash bigint, error float, nexecs bigint) +AS $$ +BEGIN +IF (controlled) THEN + RETURN QUERY + SELECT + row_number() OVER (ORDER BY (cerror, query_id, fs_hash) DESC) AS nn, + query_id, fs_hash, cerror, execs + FROM ( + SELECT + aq.query_hash AS query_id, + aq.fspace_hash AS fs_hash, + cardinality_error_with_aqo[array_length(cardinality_error_with_aqo, 1)] AS cerror, + executions_with_aqo AS execs + FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs + ON aq.query_hash = aqs.query_hash + WHERE TRUE = ANY (SELECT unnest(cardinality_error_with_aqo) IS NOT NULL) + ) AS q1 + ORDER BY nn ASC; +ELSE + RETURN QUERY + SELECT + row_number() OVER (ORDER BY (cerror, query_id, fs_hash) DESC) AS nn, + query_id, fs_hash, cerror, execs + FROM ( + SELECT + aq.query_hash AS query_id, + aq.fspace_hash AS fs_hash, + (SELECT AVG(t) FROM unnest(cardinality_error_without_aqo) t) AS cerror, + executions_without_aqo AS execs + FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs + ON aq.query_hash = aqs.query_hash + WHERE TRUE = ANY (SELECT unnest(cardinality_error_without_aqo) IS NOT NULL) + ) AS q1 + ORDER BY (nn) ASC; +END IF; +END; +$$ LANGUAGE plpgsql; + +COMMENT ON FUNCTION public.aqo_cardinality_error(boolean) IS +'Get cardinality error of queries the last time they were executed. Order queries according to an error value.'; + +-- +-- Remove all learning data for query with given ID. +-- Can be used in the case when user don't want to drop preferences and +-- accumulated statistics on a query class, but tries to re-learn AQO on this +-- class. +-- Returns a number of deleted rows in the aqo_data table. +-- +CREATE OR REPLACE FUNCTION public.aqo_reset_query(queryid bigint) +RETURNS integer AS $$ +DECLARE + num integer; + fs bigint; +BEGIN + IF (queryid = 0) THEN + raise WARNING '[AQO] Reset common feature space.' + END IF; + + SELECT fspace_hash FROM public.aqo_queries WHERE query_hash = queryid INTO fs; + SELECT count(*) FROM public.aqo_data WHERE fspace_hash = fs INTO num; + DELETE FROM public.aqo_data WHERE fspace_hash = fs; + RETURN num; +END; +$$ LANGUAGE plpgsql; + +COMMENT ON FUNCTION public.aqo_reset_query(bigint) IS +'Remove from AQO storage only learning data for given QueryId.'; diff --git a/expected/gucs.out b/expected/gucs.out index d76a45c6..3c615f4f 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -19,7 +19,7 @@ SHOW aqo.show_hash; off (1 row) -SHOW aqo.show_details; +SHOW aqo.show_details; aqo.show_details ------------------ off @@ -31,7 +31,7 @@ SHOW aqo.force_collect_stat; off (1 row) -SHOW aqo.mode; +SHOW aqo.mode; aqo.mode ------------ controlled @@ -68,13 +68,13 @@ EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) (6 rows) -- Check existence of the interface functions. -SELECT obj_description('public.show_cardinality_errors'::regproc::oid); +SELECT obj_description('public.aqo_cardinality_error'::regproc::oid); obj_description --------------------------------------------------------------------------------------------------------------- Get cardinality error of queries the last time they were executed. Order queries according to an error value. (1 row) -SELECT obj_description('public.show_execution_time'::regproc::oid); +SELECT obj_description('public.aqo_execution_time'::regproc::oid); obj_description ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ Get execution time of queries. If controlled = true (AQO could advise cardinality estimations), show time of last execution attempt. Another case (AQO not used), return an average value of execution time across all known executions. @@ -92,25 +92,31 @@ SELECT obj_description('public.aqo_cleanup'::regproc::oid); Remove unneeded rows from the AQO ML storage (1 row) -\df show_cardinality_errors - List of functions - Schema | Name | Result data type | Argument data types | Type ---------+-------------------------+------------------------------------------------------------------------------------+---------------------+------ - public | show_cardinality_errors | TABLE(num bigint, id bigint, fshash bigint, error double precision, nexecs bigint) | controlled boolean | func +SELECT obj_description('public.aqo_reset_query'::regproc::oid); + obj_description +--------------------------------------------------------------- + Remove from AQO storage only learning data for given QueryId. +(1 row) + +\df aqo_cardinality_error + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+-----------------------+------------------------------------------------------------------------------------+---------------------+------ + public | aqo_cardinality_error | TABLE(num bigint, id bigint, fshash bigint, error double precision, nexecs bigint) | controlled boolean | func (1 row) -\df show_execution_time +\df aqo_execution_time List of functions - Schema | Name | Result data type | Argument data types | Type ---------+---------------------+----------------------------------------------------------------------------------------+---------------------+------ - public | show_execution_time | TABLE(num bigint, id bigint, fshash bigint, exec_time double precision, nexecs bigint) | controlled boolean | func + Schema | Name | Result data type | Argument data types | Type +--------+--------------------+----------------------------------------------------------------------------------------+---------------------+------ + public | aqo_execution_time | TABLE(num bigint, id bigint, fshash bigint, exec_time double precision, nexecs bigint) | controlled boolean | func (1 row) \df aqo_drop_class List of functions Schema | Name | Result data type | Argument data types | Type --------+----------------+------------------+---------------------+------ - public | aqo_drop_class | integer | id bigint | func + public | aqo_drop_class | integer | queryid bigint | func (1 row) \df aqo_cleanup @@ -120,4 +126,11 @@ SELECT obj_description('public.aqo_cleanup'::regproc::oid); public | aqo_cleanup | record | OUT nfs integer, OUT nfss integer | func (1 row) +\df aqo_reset_query + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+-----------------+------------------+---------------------+------ + public | aqo_reset_query | integer | queryid bigint | func +(1 row) + DROP EXTENSION aqo; diff --git a/expected/top_queries.out b/expected/top_queries.out index dc5ccb95..e3339140 100644 --- a/expected/top_queries.out +++ b/expected/top_queries.out @@ -21,12 +21,12 @@ SELECT count(*) AS cnt FROM ttp WHERE cnt % 100 = 0; 0 (1 row) -SELECT num FROM show_execution_time(true); -- Just for checking, return zero. +SELECT num FROM aqo_execution_time(true); -- Just for checking, return zero. num ----- (0 rows) -SELECT num FROM show_execution_time(false); +SELECT num FROM aqo_execution_time(false); num ----- 1 @@ -35,7 +35,7 @@ SELECT num FROM show_execution_time(false); -- Without the AQO control queries with and without temp tables are logged. SELECT query_text,nexecs -FROM show_execution_time(false) ce, aqo_query_texts aqt +FROM aqo_execution_time(false) ce, aqo_query_texts aqt WHERE ce.id = aqt.query_hash ORDER BY (md5(query_text)); query_text | nexecs @@ -64,7 +64,7 @@ SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y 31 (1 row) -SELECT num, to_char(error, '9.99EEEE') FROM show_cardinality_errors(false) AS te +SELECT num, to_char(error, '9.99EEEE') FROM aqo_cardinality_error(false) AS te WHERE te.fshash = ( SELECT fspace_hash FROM aqo_queries WHERE aqo_queries.query_hash = ( @@ -78,7 +78,7 @@ WHERE te.fshash = ( (1 row) -- Should return zero -SELECT count(*) FROM show_cardinality_errors(true); +SELECT count(*) FROM aqo_cardinality_error(true); count ------- 0 @@ -86,7 +86,7 @@ SELECT count(*) FROM show_cardinality_errors(true); -- Fix list of logged queries SELECT query_text,nexecs -FROM show_cardinality_errors(false) ce, aqo_query_texts aqt +FROM aqo_cardinality_error(false) ce, aqo_query_texts aqt WHERE ce.id = aqt.query_hash ORDER BY (md5(query_text)); query_text | nexecs diff --git a/expected/unsupported.out b/expected/unsupported.out index 8710a565..3a7fd101 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -459,7 +459,7 @@ SELECT * FROM JOINS: 0 (13 rows) --- AQO need to predict total fetched tuples in a table. +-- AQO needs to predict total fetched tuples in a table. -- -- At a non-leaf node we have prediction about input tuples - is a number of -- predicted output rows in underlying node. But for Scan nodes we don't have @@ -541,7 +541,7 @@ EXPLAIN (COSTS OFF) -- XXX: Do we stuck into an unstable behavior of an error value? -- Live with this variant of the test for some time. SELECT to_char(error, '9.99EEEE')::text AS error, query_text -FROM public.show_cardinality_errors(true) cef, aqo_query_texts aqt +FROM public.aqo_cardinality_error(true) cef, aqo_query_texts aqt WHERE aqt.query_hash = cef.id ORDER BY (md5(query_text),error) DESC; error | query_text @@ -587,7 +587,7 @@ SELECT public.aqo_cleanup(); -- Look for any remaining queries in the ML storage. SELECT to_char(error, '9.99EEEE')::text AS error, query_text -FROM public.show_cardinality_errors(true) cef, aqo_query_texts aqt +FROM public.aqo_cardinality_error(true) cef, aqo_query_texts aqt WHERE aqt.query_hash = cef.id ORDER BY (md5(query_text),error) DESC; error | query_text diff --git a/sql/gucs.sql b/sql/gucs.sql index 63d18418..4013669f 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -5,9 +5,9 @@ CREATE EXTENSION aqo; SHOW aqo.join_threshold; SHOW aqo.learn_statement_timeout; SHOW aqo.show_hash; -SHOW aqo.show_details; +SHOW aqo.show_details; SHOW aqo.force_collect_stat; -SHOW aqo.mode; +SHOW aqo.mode; SET aqo.mode = 'learn'; SET aqo.show_details = true; @@ -23,14 +23,16 @@ EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT x FROM t; -- Check existence of the interface functions. -SELECT obj_description('public.show_cardinality_errors'::regproc::oid); -SELECT obj_description('public.show_execution_time'::regproc::oid); +SELECT obj_description('public.aqo_cardinality_error'::regproc::oid); +SELECT obj_description('public.aqo_execution_time'::regproc::oid); SELECT obj_description('public.aqo_drop_class'::regproc::oid); SELECT obj_description('public.aqo_cleanup'::regproc::oid); +SELECT obj_description('public.aqo_reset_query'::regproc::oid); -\df show_cardinality_errors -\df show_execution_time +\df aqo_cardinality_error +\df aqo_execution_time \df aqo_drop_class \df aqo_cleanup +\df aqo_reset_query DROP EXTENSION aqo; diff --git a/sql/top_queries.sql b/sql/top_queries.sql index 11bebdc5..2725d087 100755 --- a/sql/top_queries.sql +++ b/sql/top_queries.sql @@ -12,12 +12,12 @@ CREATE TEMP TABLE ttt AS SELECT count(*) AS cnt FROM generate_series(1,10); CREATE TABLE ttp AS SELECT count(*) AS cnt FROM generate_series(1,10); SELECT count(*) AS cnt FROM ttt WHERE cnt % 100 = 0; SELECT count(*) AS cnt FROM ttp WHERE cnt % 100 = 0; -SELECT num FROM show_execution_time(true); -- Just for checking, return zero. -SELECT num FROM show_execution_time(false); +SELECT num FROM aqo_execution_time(true); -- Just for checking, return zero. +SELECT num FROM aqo_execution_time(false); -- Without the AQO control queries with and without temp tables are logged. SELECT query_text,nexecs -FROM show_execution_time(false) ce, aqo_query_texts aqt +FROM aqo_execution_time(false) ce, aqo_query_texts aqt WHERE ce.id = aqt.query_hash ORDER BY (md5(query_text)); @@ -32,7 +32,7 @@ CREATE TABLE t2 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; -SELECT num, to_char(error, '9.99EEEE') FROM show_cardinality_errors(false) AS te +SELECT num, to_char(error, '9.99EEEE') FROM aqo_cardinality_error(false) AS te WHERE te.fshash = ( SELECT fspace_hash FROM aqo_queries WHERE aqo_queries.query_hash = ( @@ -42,10 +42,10 @@ WHERE te.fshash = ( ); -- Should return zero -SELECT count(*) FROM show_cardinality_errors(true); +SELECT count(*) FROM aqo_cardinality_error(true); -- Fix list of logged queries SELECT query_text,nexecs -FROM show_cardinality_errors(false) ce, aqo_query_texts aqt +FROM aqo_cardinality_error(false) ce, aqo_query_texts aqt WHERE ce.id = aqt.query_hash ORDER BY (md5(query_text)); diff --git a/sql/unsupported.sql b/sql/unsupported.sql index cb986d26..7698b168 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -126,7 +126,7 @@ SELECT * FROM (SELECT * FROM t WHERE x > 20) AS t1 USING(x); --- AQO need to predict total fetched tuples in a table. +-- AQO needs to predict total fetched tuples in a table. -- -- At a non-leaf node we have prediction about input tuples - is a number of -- predicted output rows in underlying node. But for Scan nodes we don't have @@ -157,7 +157,7 @@ EXPLAIN (COSTS OFF) -- XXX: Do we stuck into an unstable behavior of an error value? -- Live with this variant of the test for some time. SELECT to_char(error, '9.99EEEE')::text AS error, query_text -FROM public.show_cardinality_errors(true) cef, aqo_query_texts aqt +FROM public.aqo_cardinality_error(true) cef, aqo_query_texts aqt WHERE aqt.query_hash = cef.id ORDER BY (md5(query_text),error) DESC; @@ -167,7 +167,7 @@ SELECT public.aqo_cleanup(); -- Look for any remaining queries in the ML storage. SELECT to_char(error, '9.99EEEE')::text AS error, query_text -FROM public.show_cardinality_errors(true) cef, aqo_query_texts aqt +FROM public.aqo_cardinality_error(true) cef, aqo_query_texts aqt WHERE aqt.query_hash = cef.id ORDER BY (md5(query_text),error) DESC; diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index 1cf4bce8..ab4b3549 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -134,25 +134,25 @@ 'analytical queries in pgbench (disabled mode)'); $res = $node->safe_psql('postgres', - "SELECT count(*) FROM show_cardinality_errors(false) v + "SELECT count(*) FROM aqo_cardinality_error(false) v JOIN aqo_query_texts t ON (t.query_hash = v.id) WHERE v.error > 0. AND t.query_text LIKE '%pgbench_accounts%'"); is($res, 3); $res = $node->safe_psql('postgres', - "SELECT * FROM show_cardinality_errors(false) v + "SELECT * FROM aqo_cardinality_error(false) v JOIN aqo_query_texts t ON (t.query_hash = v.id) WHERE v.error > 0. AND t.query_text LIKE '%pgbench_accounts%'"); note("\n TopN: \n $res \n"); $res = $node->safe_psql('postgres', - "SELECT v.error, t.query_text FROM show_cardinality_errors(false) v + "SELECT v.error, t.query_text FROM aqo_cardinality_error(false) v JOIN aqo_query_texts t ON (t.query_hash = v.id) WHERE v.error > 0."); note("\n Queries: \n $res \n"); -$res = $node->safe_psql('postgres', "SELECT * FROM public.show_execution_time(false) v"); +$res = $node->safe_psql('postgres', "SELECT * FROM public.aqo_execution_time(false) v"); note("\n TIMES: \n $res \n"); $res = $node->safe_psql('postgres', - "SELECT count(*) FROM public.show_execution_time(false) v + "SELECT count(*) FROM public.aqo_execution_time(false) v WHERE v.exec_time > 0."); is($res, 3); From 766443557d17fdd4fadeb36d1d96bc95ce6358df Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Thu, 19 May 2022 11:57:08 +0500 Subject: [PATCH 082/203] Bugfix. Choose memory context for a query environment piece in more safe way. Sometimes someone can invent a queryEnv and use it with short-lived plans. So, anyone under the hood should create its queryEnv in the same memory context. --- postprocessing.c | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/postprocessing.c b/postprocessing.c index 55350049..eeb4c249 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -913,11 +913,20 @@ StoreToQueryEnv(QueryDesc *queryDesc) MemoryContext oldCxt; bool newentry = false; - oldCxt = MemoryContextSwitchTo(GetMemoryChunkContext(queryDesc->plannedstmt)); - - if (queryDesc->queryEnv == NULL) - queryDesc->queryEnv = create_queryEnv(); + /* + * Choose memory context for AQO parameters. Use pre-existed context if + * someone earlier created queryEnv (usually, SPI), or base on the queryDesc + * memory context. + */ + if (queryDesc->queryEnv != NULL) + oldCxt = MemoryContextSwitchTo(GetMemoryChunkContext(queryDesc->queryEnv)); + else + { + oldCxt = MemoryContextSwitchTo(GetMemoryChunkContext(queryDesc)); + queryDesc->queryEnv = create_queryEnv(); + } + Assert(queryDesc->queryEnv); enr = get_ENR(queryDesc->queryEnv, AQOPrivateData); if (enr == NULL) { @@ -965,11 +974,20 @@ StorePlanInternals(QueryDesc *queryDesc) njoins = 0; planstate_tree_walker(queryDesc->planstate, calculateJoinNum, &njoins); - oldCxt = MemoryContextSwitchTo(GetMemoryChunkContext(queryDesc->plannedstmt)); - - if (queryDesc->queryEnv == NULL) - queryDesc->queryEnv = create_queryEnv(); + /* + * Choose memory context for AQO parameters. Use pre-existed context if + * someone earlier created queryEnv (usually, SPI), or base on the queryDesc + * memory context. + */ + if (queryDesc->queryEnv != NULL) + oldCxt = MemoryContextSwitchTo(GetMemoryChunkContext(queryDesc->queryEnv)); + else + { + oldCxt = MemoryContextSwitchTo(GetMemoryChunkContext(queryDesc)); + queryDesc->queryEnv = create_queryEnv(); + } + Assert(queryDesc->queryEnv); enr = get_ENR(queryDesc->queryEnv, PlanStateInfo); if (enr == NULL) { From f22809a68501227a05e01af9e2dcd5dd61143286 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Thu, 19 May 2022 19:33:03 +0500 Subject: [PATCH 083/203] Bugfix. Implement deep copy of uint64 list. Each element here is dynamically allocated in some memory context. If we copy the list in another memctx we should allocate memory for new elements too. --- hash.c | 23 +++++++++++++++++++++++ hash.h | 1 + path_utils.c | 2 +- 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/hash.c b/hash.c index c1b16404..c7733b1f 100644 --- a/hash.c +++ b/hash.c @@ -98,6 +98,29 @@ list_member_uint64(const List *list, uint64 datum) return false; } +/* + * Deep copy of uint64 list. + * Each element here is dynamically allocated in some memory context. + * If we copy the list in another memctx we should allocate memory for new + * elements too. + */ +List * +list_copy_uint64(List *list) +{ + ListCell *lc; + List *nlist = NIL; + + foreach(lc, list) + { + uint64 *val = palloc(sizeof(uint64)); + + *val = *(uint64 *) lfirst(lc); + nlist = lappend(nlist, (void *) val); + } + + return nlist; +} + List * lappend_uint64(List *list, uint64 datum) { diff --git a/hash.h b/hash.h index a218c9a4..01c90bed 100644 --- a/hash.h +++ b/hash.h @@ -5,6 +5,7 @@ extern uint64 get_query_hash(Query *parse, const char *query_text); extern bool list_member_uint64(const List *list, uint64 datum); +extern List *list_copy_uint64(List *list); extern List *lappend_uint64(List *list, uint64 datum); extern List *ldelete_uint64(List *list, uint64 datum); extern int get_fss_for_object(List *relsigns, List *clauselist, diff --git a/path_utils.c b/path_utils.c index fbd21299..ad3ef628 100644 --- a/path_utils.c +++ b/path_utils.c @@ -578,7 +578,7 @@ AQOnodeCopy(struct ExtensibleNode *enew, const struct ExtensibleNode *eold) /* These lists couldn't contain AQO nodes. Use basic machinery */ new->rels = palloc(sizeof(RelSortOut)); new->rels->hrels = list_copy(old->rels->hrels); - new->rels->signatures = list_copy(old->rels->signatures); + new->rels->signatures = list_copy_uint64(old->rels->signatures); new->clauses = copyObject(old->clauses); new->grouping_exprs = copyObject(old->grouping_exprs); From 84e42f81d721671a35e7f7aab7aba7825949b613 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Mon, 23 May 2022 22:37:52 +0500 Subject: [PATCH 084/203] Remove unnecessary declaration of an UI routine --- aqo--1.4--1.5.sql | 56 ----------------------------------------------- 1 file changed, 56 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index 7bdf34cd..159f3895 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -20,62 +20,6 @@ CREATE TABLE public.aqo_data ( ); CREATE UNIQUE INDEX aqo_fss_access_idx ON public.aqo_data (fspace_hash, fsspace_hash); - --- --- Remove rows from the AQO ML knowledge base, related to previously dropped --- tables of the database. --- -CREATE OR REPLACE FUNCTION public.clean_aqo_data() RETURNS void AS $$ -DECLARE - aqo_data_row aqo_data%ROWTYPE; - aqo_queries_row aqo_queries%ROWTYPE; - aqo_query_texts_row aqo_query_texts%ROWTYPE; - aqo_query_stat_row aqo_query_stat%ROWTYPE; - oid_var oid; - fspace_hash_var bigint; - delete_row boolean DEFAULT false; -BEGIN - FOR aqo_data_row IN (SELECT * FROM aqo_data) - LOOP - delete_row = false; - SELECT aqo_data_row.fspace_hash INTO fspace_hash_var FROM aqo_data; - - IF (aqo_data_row.oids IS NOT NULL) THEN - FOREACH oid_var IN ARRAY aqo_data_row.oids - LOOP - IF NOT EXISTS (SELECT relname FROM pg_class WHERE oid = oid_var) THEN - delete_row = true; - END IF; - END LOOP; - END IF; - - FOR aqo_queries_row IN (SELECT * FROM public.aqo_queries) - LOOP - IF (delete_row = true AND fspace_hash_var <> 0 AND - fspace_hash_var = aqo_queries_row.fspace_hash AND - aqo_queries_row.fspace_hash = aqo_queries_row.query_hash) THEN - DELETE FROM aqo_data WHERE aqo_data = aqo_data_row; - DELETE FROM aqo_queries WHERE aqo_queries = aqo_queries_row; - - FOR aqo_query_texts_row IN (SELECT * FROM aqo_query_texts) - LOOP - DELETE FROM aqo_query_texts - WHERE aqo_query_texts_row.query_hash = fspace_hash_var AND - aqo_query_texts = aqo_query_texts_row; - END LOOP; - - FOR aqo_query_stat_row IN (SELECT * FROM aqo_query_stat) - LOOP - DELETE FROM aqo_query_stat - WHERE aqo_query_stat_row.query_hash = fspace_hash_var AND - aqo_query_stat = aqo_query_stat_row; - END LOOP; - END IF; - END LOOP; - END LOOP; -END; -$$ LANGUAGE plpgsql; - DROP FUNCTION public.top_time_queries; DROP FUNCTION public.aqo_drop; DROP FUNCTION public.clean_aqo_data; From 6e8f0ca3fcc999ebd34e4220484a69c1edccdd01 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Tue, 24 May 2022 08:53:45 +0500 Subject: [PATCH 085/203] Set join limit to non-zero value. We should accept the law of life: people establish their attitude to product with first glance, first use, without learning any optimization techniques. So, most of them will use it with default value of GUCS. According to this fact, set default value of joins limit to higher value and set it to zero manually at each test case. --- aqo.c | 2 +- expected/aqo_CVE-2020-14350.out | 1 + expected/aqo_controlled.out | 1 + expected/aqo_disabled.out | 1 + expected/aqo_fdw.out | 1 + expected/aqo_forced.out | 1 + expected/aqo_intelligent.out | 1 + expected/aqo_learn.out | 1 + expected/clean_aqo_data.out | 1 + expected/forced_stat_collection.out | 1 + expected/gucs.out | 39 +---------------------------- expected/plancache.out | 1 + expected/schema.out | 1 + expected/statement_timeout.out | 1 + expected/temp_tables.out | 1 + expected/top_queries.out | 1 + expected/unsupported.out | 1 + sql/aqo_CVE-2020-14350.sql | 1 + sql/aqo_controlled.sql | 1 + sql/aqo_disabled.sql | 1 + sql/aqo_fdw.sql | 1 + sql/aqo_forced.sql | 1 + sql/aqo_intelligent.sql | 1 + sql/aqo_learn.sql | 1 + sql/clean_aqo_data.sql | 1 + sql/forced_stat_collection.sql | 1 + sql/gucs.sql | 10 +------- sql/plancache.sql | 1 + sql/schema.sql | 1 + sql/statement_timeout.sql | 1 + sql/temp_tables.sql | 1 + sql/top_queries.sql | 1 + sql/unsupported.sql | 1 + t/001_pgbench.pl | 1 + t/002_pg_stat_statements_aqo.pl | 1 + 35 files changed, 35 insertions(+), 48 deletions(-) diff --git a/aqo.c b/aqo.c index 2a9a680f..3adc3801 100644 --- a/aqo.c +++ b/aqo.c @@ -204,7 +204,7 @@ _PG_init(void) "Sets the threshold of number of JOINs in query beyond which AQO is used.", NULL, &aqo_join_threshold, - 0, + 3, 0, INT_MAX / 1000, PGC_USERSET, 0, diff --git a/expected/aqo_CVE-2020-14350.out b/expected/aqo_CVE-2020-14350.out index de90beaa..ccdc4694 100644 --- a/expected/aqo_CVE-2020-14350.out +++ b/expected/aqo_CVE-2020-14350.out @@ -27,6 +27,7 @@ END $$ LANGUAGE plpgsql RETURNS NULL ON NULL INPUT; RESET ROLE; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; -- Test result (must be 'off') SET ROLE regress_hacker; SHOW is_superuser; diff --git a/expected/aqo_controlled.out b/expected/aqo_controlled.out index 316ade00..11a46395 100644 --- a/expected/aqo_controlled.out +++ b/expected/aqo_controlled.out @@ -26,6 +26,7 @@ AS ( CREATE INDEX aqo_test2_idx_a ON aqo_test2 (a); ANALYZE aqo_test2; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'controlled'; EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 diff --git a/expected/aqo_disabled.out b/expected/aqo_disabled.out index 56f46f05..9ec08977 100644 --- a/expected/aqo_disabled.out +++ b/expected/aqo_disabled.out @@ -17,6 +17,7 @@ AS ( CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'controlled'; CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out index 7956f649..ee4a4ab6 100644 --- a/expected/aqo_fdw.out +++ b/expected/aqo_fdw.out @@ -8,6 +8,7 @@ CREATE EXTENSION postgres_fdw; SET aqo.mode = 'learn'; SET aqo.show_details = 'true'; -- show AQO info for each node and entire query. SET aqo.show_hash = 'false'; -- a hash value is system-depended. Ignore it. +SET aqo.join_threshold = 0; DO $d$ BEGIN EXECUTE $$CREATE SERVER loopback FOREIGN DATA WRAPPER postgres_fdw diff --git a/expected/aqo_forced.out b/expected/aqo_forced.out index 6da016f2..11032f2f 100644 --- a/expected/aqo_forced.out +++ b/expected/aqo_forced.out @@ -17,6 +17,7 @@ AS ( CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'controlled'; EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 diff --git a/expected/aqo_intelligent.out b/expected/aqo_intelligent.out index 1e984a2c..f3724e2b 100644 --- a/expected/aqo_intelligent.out +++ b/expected/aqo_intelligent.out @@ -17,6 +17,7 @@ AS ( CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'intelligent'; EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; diff --git a/expected/aqo_learn.out b/expected/aqo_learn.out index 672d752d..dad8048d 100644 --- a/expected/aqo_learn.out +++ b/expected/aqo_learn.out @@ -37,6 +37,7 @@ AS ( CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'intelligent'; EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; diff --git a/expected/clean_aqo_data.out b/expected/clean_aqo_data.out index cf75839a..af9b7ae3 100644 --- a/expected/clean_aqo_data.out +++ b/expected/clean_aqo_data.out @@ -1,4 +1,5 @@ CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; DROP TABLE IF EXISTS a; NOTICE: table "a" does not exist, skipping diff --git a/expected/forced_stat_collection.out b/expected/forced_stat_collection.out index 716517a2..7a1d89c5 100644 --- a/expected/forced_stat_collection.out +++ b/expected/forced_stat_collection.out @@ -1,4 +1,5 @@ \set citizens 1000 +SET aqo.join_threshold = 0; SET aqo.mode = 'disabled'; SET aqo.force_collect_stat = 'on'; CREATE TABLE person ( diff --git a/expected/gucs.out b/expected/gucs.out index 3c615f4f..2141a058 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -1,42 +1,5 @@ CREATE EXTENSION aqo; --- Check interface variables and their default values. Detect, if default value --- of a GUC is changed. -SHOW aqo.join_threshold; - aqo.join_threshold --------------------- - 0 -(1 row) - -SHOW aqo.learn_statement_timeout; - aqo.learn_statement_timeout ------------------------------ - off -(1 row) - -SHOW aqo.show_hash; - aqo.show_hash ---------------- - off -(1 row) - -SHOW aqo.show_details; - aqo.show_details ------------------- - off -(1 row) - -SHOW aqo.force_collect_stat; - aqo.force_collect_stat ------------------------- - off -(1 row) - -SHOW aqo.mode; - aqo.mode ------------- - controlled -(1 row) - +SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = true; CREATE TABLE t(x int); diff --git a/expected/plancache.out b/expected/plancache.out index 0d019334..3a01968c 100644 --- a/expected/plancache.out +++ b/expected/plancache.out @@ -1,5 +1,6 @@ -- Tests on interaction of AQO with cached plans. CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'intelligent'; SET aqo.show_details = 'on'; SET aqo.show_hash = 'off'; diff --git a/expected/schema.out b/expected/schema.out index aa048898..221b62c0 100644 --- a/expected/schema.out +++ b/expected/schema.out @@ -12,6 +12,7 @@ ERROR: no schema has been selected to create in CREATE SCHEMA IF NOT EXISTS test1; SET search_path TO test1, public; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'intelligent'; CREATE TABLE test (id SERIAL, data TEXT); INSERT INTO test (data) VALUES ('string'); diff --git a/expected/statement_timeout.out b/expected/statement_timeout.out index c8c9f50c..6d1af3a7 100644 --- a/expected/statement_timeout.out +++ b/expected/statement_timeout.out @@ -21,6 +21,7 @@ CREATE TABLE t AS SELECT * FROM generate_series(1,100) AS x; ANALYZE t; DELETE FROM t WHERE x > 5; -- Force optimizer to make overestimated prediction. CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'off'; SET aqo.learn_statement_timeout = 'on'; diff --git a/expected/temp_tables.out b/expected/temp_tables.out index e71ea09e..0bacb407 100644 --- a/expected/temp_tables.out +++ b/expected/temp_tables.out @@ -1,4 +1,5 @@ CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; CREATE TEMP TABLE tt(); CREATE TABLE pt(); diff --git a/expected/top_queries.out b/expected/top_queries.out index e3339140..9ddaf84a 100644 --- a/expected/top_queries.out +++ b/expected/top_queries.out @@ -1,4 +1,5 @@ CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'disabled'; SET aqo.force_collect_stat = 'on'; -- diff --git a/expected/unsupported.out b/expected/unsupported.out index 3a7fd101..d9df8159 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -1,4 +1,5 @@ CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; DROP TABLE IF EXISTS t; diff --git a/sql/aqo_CVE-2020-14350.sql b/sql/aqo_CVE-2020-14350.sql index f7dd4e23..1b36b50b 100644 --- a/sql/aqo_CVE-2020-14350.sql +++ b/sql/aqo_CVE-2020-14350.sql @@ -27,6 +27,7 @@ $$ LANGUAGE plpgsql RETURNS NULL ON NULL INPUT; RESET ROLE; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; -- Test result (must be 'off') SET ROLE regress_hacker; diff --git a/sql/aqo_controlled.sql b/sql/aqo_controlled.sql index c4d1db08..ed39323b 100644 --- a/sql/aqo_controlled.sql +++ b/sql/aqo_controlled.sql @@ -29,6 +29,7 @@ CREATE INDEX aqo_test2_idx_a ON aqo_test2 (a); ANALYZE aqo_test2; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'controlled'; diff --git a/sql/aqo_disabled.sql b/sql/aqo_disabled.sql index 9c232a56..28c074a9 100644 --- a/sql/aqo_disabled.sql +++ b/sql/aqo_disabled.sql @@ -18,6 +18,7 @@ AS ( CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'controlled'; diff --git a/sql/aqo_fdw.sql b/sql/aqo_fdw.sql index e31923d9..67fddb8f 100644 --- a/sql/aqo_fdw.sql +++ b/sql/aqo_fdw.sql @@ -9,6 +9,7 @@ CREATE EXTENSION postgres_fdw; SET aqo.mode = 'learn'; SET aqo.show_details = 'true'; -- show AQO info for each node and entire query. SET aqo.show_hash = 'false'; -- a hash value is system-depended. Ignore it. +SET aqo.join_threshold = 0; DO $d$ BEGIN diff --git a/sql/aqo_forced.sql b/sql/aqo_forced.sql index 307c85f1..c637beb8 100644 --- a/sql/aqo_forced.sql +++ b/sql/aqo_forced.sql @@ -19,6 +19,7 @@ CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'controlled'; diff --git a/sql/aqo_intelligent.sql b/sql/aqo_intelligent.sql index bc3351de..8c560e3e 100644 --- a/sql/aqo_intelligent.sql +++ b/sql/aqo_intelligent.sql @@ -19,6 +19,7 @@ CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'intelligent'; diff --git a/sql/aqo_learn.sql b/sql/aqo_learn.sql index ed5c1ed9..f3e44b35 100644 --- a/sql/aqo_learn.sql +++ b/sql/aqo_learn.sql @@ -40,6 +40,7 @@ CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'intelligent'; diff --git a/sql/clean_aqo_data.sql b/sql/clean_aqo_data.sql index 509071a1..a6c41d5a 100644 --- a/sql/clean_aqo_data.sql +++ b/sql/clean_aqo_data.sql @@ -1,4 +1,5 @@ CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; DROP TABLE IF EXISTS a; diff --git a/sql/forced_stat_collection.sql b/sql/forced_stat_collection.sql index 9c169a26..df754536 100644 --- a/sql/forced_stat_collection.sql +++ b/sql/forced_stat_collection.sql @@ -1,5 +1,6 @@ \set citizens 1000 +SET aqo.join_threshold = 0; SET aqo.mode = 'disabled'; SET aqo.force_collect_stat = 'on'; diff --git a/sql/gucs.sql b/sql/gucs.sql index 4013669f..69c26a15 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -1,13 +1,5 @@ CREATE EXTENSION aqo; - --- Check interface variables and their default values. Detect, if default value --- of a GUC is changed. -SHOW aqo.join_threshold; -SHOW aqo.learn_statement_timeout; -SHOW aqo.show_hash; -SHOW aqo.show_details; -SHOW aqo.force_collect_stat; -SHOW aqo.mode; +SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = true; diff --git a/sql/plancache.sql b/sql/plancache.sql index 035b8904..ef81de1f 100644 --- a/sql/plancache.sql +++ b/sql/plancache.sql @@ -1,6 +1,7 @@ -- Tests on interaction of AQO with cached plans. CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'intelligent'; SET aqo.show_details = 'on'; SET aqo.show_hash = 'off'; diff --git a/sql/schema.sql b/sql/schema.sql index 8e61dedb..ff45f6d3 100644 --- a/sql/schema.sql +++ b/sql/schema.sql @@ -11,6 +11,7 @@ CREATE EXTENSION aqo; -- fail CREATE SCHEMA IF NOT EXISTS test1; SET search_path TO test1, public; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'intelligent'; CREATE TABLE test (id SERIAL, data TEXT); diff --git a/sql/statement_timeout.sql b/sql/statement_timeout.sql index 6885ab91..84cdd5d8 100644 --- a/sql/statement_timeout.sql +++ b/sql/statement_timeout.sql @@ -23,6 +23,7 @@ ANALYZE t; DELETE FROM t WHERE x > 5; -- Force optimizer to make overestimated prediction. CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'off'; SET aqo.learn_statement_timeout = 'on'; diff --git a/sql/temp_tables.sql b/sql/temp_tables.sql index 2ca22de0..0bf61c50 100644 --- a/sql/temp_tables.sql +++ b/sql/temp_tables.sql @@ -1,4 +1,5 @@ CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; CREATE TEMP TABLE tt(); diff --git a/sql/top_queries.sql b/sql/top_queries.sql index 2725d087..46d35324 100755 --- a/sql/top_queries.sql +++ b/sql/top_queries.sql @@ -1,4 +1,5 @@ CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'disabled'; SET aqo.force_collect_stat = 'on'; diff --git a/sql/unsupported.sql b/sql/unsupported.sql index 7698b168..3c482fe4 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -1,4 +1,5 @@ CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index ab4b3549..699ba169 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -12,6 +12,7 @@ shared_preload_libraries = 'aqo' aqo.mode = 'intelligent' log_statement = 'ddl' + aqo.join_threshold = 0 }); # Test constants. Default values. diff --git a/t/002_pg_stat_statements_aqo.pl b/t/002_pg_stat_statements_aqo.pl index c0bc5127..69c020c9 100644 --- a/t/002_pg_stat_statements_aqo.pl +++ b/t/002_pg_stat_statements_aqo.pl @@ -15,6 +15,7 @@ aqo.profile_enable = 'true' aqo.force_collect_stat = 'false' log_statement = 'ddl' # reduce size of logs. + aqo.join_threshold = 0 }); # Test constants. my $TRANSACTIONS = 100; From 51c556aaa6ba9a1fcc9b08ec24da61860e63f48f Mon Sep 17 00:00:00 2001 From: Andrey Kazarinov Date: Sun, 29 May 2022 21:18:11 +0300 Subject: [PATCH 086/203] [PGPRO-6374] relocatable aqo --- Makefile | 3 +- aqo--1.0--1.1.sql | 18 ++++----- aqo--1.0.sql | 28 ++++++------- aqo--1.1--1.2.sql | 36 ++++++++--------- aqo--1.2--1.3.sql | 14 +++---- aqo--1.2.sql | 54 ++++++++++++------------- aqo--1.3--1.4.sql | 12 +++--- aqo--1.4--1.5.sql | 60 ++++++++++++++-------------- aqo.control | 2 +- expected/aqo_learn.out | 2 +- expected/gucs.out | 10 ++--- expected/relocatable.out | 85 ++++++++++++++++++++++++++++++++++++++++ expected/schema.out | 4 +- expected/unsupported.out | 6 +-- sql/aqo_learn.sql | 2 +- sql/gucs.sql | 10 ++--- sql/relocatable.sql | 38 ++++++++++++++++++ sql/schema.sql | 4 +- sql/unsupported.sql | 6 +-- storage.c | 14 +++---- 20 files changed, 266 insertions(+), 142 deletions(-) create mode 100644 expected/relocatable.out create mode 100644 sql/relocatable.sql diff --git a/Makefile b/Makefile index 5beaba9a..05d05bb9 100755 --- a/Makefile +++ b/Makefile @@ -25,7 +25,8 @@ REGRESS = aqo_disabled \ plancache \ statement_timeout \ temp_tables \ - top_queries + top_queries \ + relocatable fdw_srcdir = $(top_srcdir)/contrib/postgres_fdw stat_srcdir = $(top_srcdir)/contrib/pg_stat_statements diff --git a/aqo--1.0--1.1.sql b/aqo--1.0--1.1.sql index 37fdf635..427ddf3d 100644 --- a/aqo--1.0--1.1.sql +++ b/aqo--1.0--1.1.sql @@ -1,13 +1,13 @@ -ALTER TABLE public.aqo_query_texts ALTER COLUMN query_text TYPE text; +ALTER TABLE aqo_query_texts ALTER COLUMN query_text TYPE text; -DROP INDEX public.aqo_queries_query_hash_idx CASCADE; -DROP INDEX public.aqo_query_texts_query_hash_idx CASCADE; -DROP INDEX public.aqo_query_stat_idx CASCADE; -DROP INDEX public.aqo_fss_access_idx CASCADE; +DROP INDEX aqo_queries_query_hash_idx CASCADE; +DROP INDEX aqo_query_texts_query_hash_idx CASCADE; +DROP INDEX aqo_query_stat_idx CASCADE; +DROP INDEX aqo_fss_access_idx CASCADE; CREATE UNIQUE INDEX aqo_fss_access_idx - ON public.aqo_data (fspace_hash, fsspace_hash); + ON aqo_data (fspace_hash, fsspace_hash); CREATE OR REPLACE FUNCTION aqo_migrate_to_1_1_get_pk(rel regclass) RETURNS regclass AS $$ @@ -28,15 +28,15 @@ $$ LANGUAGE plpgsql; DO $$ BEGIN EXECUTE pg_catalog.format('ALTER TABLE %s RENAME to %s', - aqo_migrate_to_1_1_get_pk('public.aqo_queries'), + aqo_migrate_to_1_1_get_pk('aqo_queries'), 'aqo_queries_query_hash_idx'); EXECUTE pg_catalog.format('ALTER TABLE %s RENAME to %s', - aqo_migrate_to_1_1_get_pk('public.aqo_query_texts'), + aqo_migrate_to_1_1_get_pk('aqo_query_texts'), 'aqo_query_texts_query_hash_idx'); EXECUTE pg_catalog.format('ALTER TABLE %s RENAME to %s', - aqo_migrate_to_1_1_get_pk('public.aqo_query_stat'), + aqo_migrate_to_1_1_get_pk('aqo_query_stat'), 'aqo_query_stat_idx'); END $$; diff --git a/aqo--1.0.sql b/aqo--1.0.sql index 67395744..0bb02ab8 100644 --- a/aqo--1.0.sql +++ b/aqo--1.0.sql @@ -1,7 +1,7 @@ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "CREATE EXTENSION aqo" to load this file. \quit -CREATE TABLE public.aqo_queries ( +CREATE TABLE aqo_queries ( query_hash bigint PRIMARY KEY, learn_aqo boolean NOT NULL, use_aqo boolean NOT NULL, @@ -9,13 +9,13 @@ CREATE TABLE public.aqo_queries ( auto_tuning boolean NOT NULL ); -CREATE TABLE public.aqo_query_texts ( - query_hash bigint PRIMARY KEY REFERENCES public.aqo_queries ON DELETE CASCADE, +CREATE TABLE aqo_query_texts ( + query_hash bigint PRIMARY KEY REFERENCES aqo_queries ON DELETE CASCADE, query_text varchar NOT NULL ); -CREATE TABLE public.aqo_query_stat ( - query_hash bigint PRIMARY KEY REFERENCES public.aqo_queries ON DELETE CASCADE, +CREATE TABLE aqo_query_stat ( + query_hash bigint PRIMARY KEY REFERENCES aqo_queries ON DELETE CASCADE, execution_time_with_aqo double precision[], execution_time_without_aqo double precision[], planning_time_with_aqo double precision[], @@ -26,8 +26,8 @@ CREATE TABLE public.aqo_query_stat ( executions_without_aqo bigint ); -CREATE TABLE public.aqo_data ( - fspace_hash bigint NOT NULL REFERENCES public.aqo_queries ON DELETE CASCADE, +CREATE TABLE aqo_data ( + fspace_hash bigint NOT NULL REFERENCES aqo_queries ON DELETE CASCADE, fsspace_hash int NOT NULL, nfeatures int NOT NULL, features double precision[][], @@ -35,18 +35,18 @@ CREATE TABLE public.aqo_data ( UNIQUE (fspace_hash, fsspace_hash) ); -CREATE INDEX aqo_queries_query_hash_idx ON public.aqo_queries (query_hash); -CREATE INDEX aqo_query_texts_query_hash_idx ON public.aqo_query_texts (query_hash); -CREATE INDEX aqo_query_stat_idx ON public.aqo_query_stat (query_hash); -CREATE INDEX aqo_fss_access_idx ON public.aqo_data (fspace_hash, fsspace_hash); +CREATE INDEX aqo_queries_query_hash_idx ON aqo_queries (query_hash); +CREATE INDEX aqo_query_texts_query_hash_idx ON aqo_query_texts (query_hash); +CREATE INDEX aqo_query_stat_idx ON aqo_query_stat (query_hash); +CREATE INDEX aqo_fss_access_idx ON aqo_data (fspace_hash, fsspace_hash); -INSERT INTO public.aqo_queries VALUES (0, false, false, 0, false); -INSERT INTO public.aqo_query_texts VALUES (0, 'COMMON feature space (do not delete!)'); +INSERT INTO aqo_queries VALUES (0, false, false, 0, false); +INSERT INTO aqo_query_texts VALUES (0, 'COMMON feature space (do not delete!)'); -- a virtual query for COMMON feature space CREATE FUNCTION invalidate_deactivated_queries_cache() RETURNS trigger AS 'MODULE_PATHNAME' LANGUAGE C; CREATE TRIGGER aqo_queries_invalidate AFTER UPDATE OR DELETE OR TRUNCATE - ON public.aqo_queries FOR EACH STATEMENT + ON aqo_queries FOR EACH STATEMENT EXECUTE PROCEDURE invalidate_deactivated_queries_cache(); diff --git a/aqo--1.1--1.2.sql b/aqo--1.1--1.2.sql index 9291e7b7..27baff66 100644 --- a/aqo--1.1--1.2.sql +++ b/aqo--1.1--1.2.sql @@ -14,8 +14,8 @@ $$ LANGUAGE plpgsql; DO $$ BEGIN EXECUTE pg_catalog.format( - 'ALTER TABLE public.aqo_data DROP CONSTRAINT %s', - aqo_migrate_to_1_2_get_pk('public.aqo_data'::regclass), + 'ALTER TABLE aqo_data DROP CONSTRAINT %s', + aqo_migrate_to_1_2_get_pk('aqo_data'::regclass), 'aqo_queries_query_hash_idx'); END $$; @@ -28,7 +28,7 @@ DROP FUNCTION aqo_migrate_to_1_2_get_pk(regclass); -- -- Show query state at the AQO knowledge base -CREATE OR REPLACE FUNCTION public.aqo_status(hash bigint) +CREATE OR REPLACE FUNCTION aqo_status(hash bigint) RETURNS TABLE ( "learn" BOOL, "use aqo" BOOL, @@ -49,7 +49,7 @@ SELECT learn_aqo,use_aqo,auto_tuning,fspace_hash, to_char(execution_time_with_aqo[n3],'9.99EEEE'), to_char(cardinality_error_with_aqo[n1],'9.99EEEE'), executions_with_aqo -FROM public.aqo_queries aq, public.aqo_query_stat aqs, +FROM aqo_queries aq, aqo_query_stat aqs, (SELECT array_length(n1,1) AS n1, array_length(n2,1) AS n2, array_length(n3,1) AS n3, array_length(n4,1) AS n4 FROM @@ -57,50 +57,50 @@ FROM public.aqo_queries aq, public.aqo_query_stat aqs, cardinality_error_without_aqo AS n2, execution_time_with_aqo AS n3, execution_time_without_aqo AS n4 - FROM public.aqo_query_stat aqs WHERE + FROM aqo_query_stat aqs WHERE aqs.query_hash = $1) AS al) AS q WHERE (aqs.query_hash = aq.query_hash) AND aqs.query_hash = $1; $func$ LANGUAGE SQL; -CREATE OR REPLACE FUNCTION public.aqo_enable_query(hash bigint) +CREATE OR REPLACE FUNCTION aqo_enable_query(hash bigint) RETURNS VOID AS $func$ -UPDATE public.aqo_queries SET +UPDATE aqo_queries SET learn_aqo = 'true', use_aqo = 'true' WHERE query_hash = $1; $func$ LANGUAGE SQL; -CREATE OR REPLACE FUNCTION public.aqo_disable_query(hash bigint) +CREATE OR REPLACE FUNCTION aqo_disable_query(hash bigint) RETURNS VOID AS $func$ -UPDATE public.aqo_queries SET +UPDATE aqo_queries SET learn_aqo = 'false', use_aqo = 'false', auto_tuning = 'false' WHERE query_hash = $1; $func$ LANGUAGE SQL; -CREATE OR REPLACE FUNCTION public.aqo_clear_hist(hash bigint) +CREATE OR REPLACE FUNCTION aqo_clear_hist(hash bigint) RETURNS VOID AS $func$ -DELETE FROM public.aqo_data WHERE fspace_hash=$1; +DELETE FROM aqo_data WHERE fspace_hash=$1; $func$ LANGUAGE SQL; -- Show queries that contains 'Never executed' nodes at the plan. -CREATE OR REPLACE FUNCTION public.aqo_ne_queries() +CREATE OR REPLACE FUNCTION aqo_ne_queries() RETURNS SETOF int AS $func$ -SELECT query_hash FROM public.aqo_query_stat aqs +SELECT query_hash FROM aqo_query_stat aqs WHERE -1 = ANY (cardinality_error_with_aqo::double precision[]); $func$ LANGUAGE SQL; -CREATE OR REPLACE FUNCTION public.aqo_drop(hash bigint) +CREATE OR REPLACE FUNCTION aqo_drop(hash bigint) RETURNS VOID AS $func$ -DELETE FROM public.aqo_queries aq WHERE (aq.query_hash = $1); -DELETE FROM public.aqo_data ad WHERE (ad.fspace_hash = $1); -DELETE FROM public.aqo_query_stat aq WHERE (aq.query_hash = $1); -DELETE FROM public.aqo_query_texts aq WHERE (aq.query_hash = $1); +DELETE FROM aqo_queries aq WHERE (aq.query_hash = $1); +DELETE FROM aqo_data ad WHERE (ad.fspace_hash = $1); +DELETE FROM aqo_query_stat aq WHERE (aq.query_hash = $1); +DELETE FROM aqo_query_texts aq WHERE (aq.query_hash = $1); $func$ LANGUAGE SQL; diff --git a/aqo--1.2--1.3.sql b/aqo--1.2--1.3.sql index c29a6f10..b1cfe3a9 100755 --- a/aqo--1.2--1.3.sql +++ b/aqo--1.2--1.3.sql @@ -1,9 +1,9 @@ -ALTER TABLE public.aqo_data ADD COLUMN oids text [] DEFAULT NULL; +ALTER TABLE aqo_data ADD COLUMN oids text [] DEFAULT NULL; -- -- Remove data, related to previously dropped tables, from the AQO tables. -- -CREATE OR REPLACE FUNCTION public.clean_aqo_data() RETURNS void AS $$ +CREATE OR REPLACE FUNCTION clean_aqo_data() RETURNS void AS $$ DECLARE aqo_data_row aqo_data%ROWTYPE; aqo_queries_row aqo_queries%ROWTYPE; @@ -29,7 +29,7 @@ BEGIN END LOOP; END IF; - FOR aqo_queries_row IN (SELECT * FROM public.aqo_queries) + FOR aqo_queries_row IN (SELECT * FROM aqo_queries) LOOP IF (delete_row = true AND fspace_hash_var <> 0 AND fspace_hash_var = aqo_queries_row.fspace_hash AND @@ -87,7 +87,7 @@ $$ LANGUAGE plpgsql; -- -- Top of queries with the highest value of execution time. -- -CREATE OR REPLACE FUNCTION public.top_time_queries(n int) +CREATE OR REPLACE FUNCTION top_time_queries(n int) RETURNS TABLE(num bigint, fspace_hash bigint, query_hash bigint, @@ -103,7 +103,7 @@ BEGIN aqo_queries.query_hash, to_char(array_avg(execution_time_without_aqo), '9.99EEEE')::float, to_char(array_mse(execution_time_without_aqo), '9.99EEEE')::float - FROM public.aqo_queries INNER JOIN aqo_query_stat + FROM aqo_queries INNER JOIN aqo_query_stat ON aqo_queries.query_hash = aqo_query_stat.query_hash GROUP BY (execution_time_without_aqo, aqo_queries.fspace_hash, aqo_queries.query_hash) ORDER BY execution_time DESC LIMIT n; @@ -113,7 +113,7 @@ $$ LANGUAGE plpgsql; -- -- Top of queries with largest value of total cardinality error. -- -CREATE OR REPLACE FUNCTION public.top_error_queries(n int) +CREATE OR REPLACE FUNCTION top_error_queries(n int) RETURNS TABLE(num bigint, fspace_hash bigint, query_hash bigint, @@ -129,7 +129,7 @@ BEGIN aqo_queries.query_hash, to_char(array_avg(cardinality_error_without_aqo), '9.99EEEE')::float, to_char(array_mse(cardinality_error_without_aqo), '9.99EEEE')::float - FROM public.aqo_queries INNER JOIN aqo_query_stat + FROM aqo_queries INNER JOIN aqo_query_stat ON aqo_queries.query_hash = aqo_query_stat.query_hash GROUP BY (cardinality_error_without_aqo, aqo_queries.fspace_hash, aqo_queries.query_hash) ORDER BY error DESC LIMIT n; diff --git a/aqo--1.2.sql b/aqo--1.2.sql index 7e3abf4a..1e2943a8 100644 --- a/aqo--1.2.sql +++ b/aqo--1.2.sql @@ -1,7 +1,7 @@ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "CREATE EXTENSION aqo" to load this file. \quit -CREATE TABLE public.aqo_queries ( +CREATE TABLE aqo_queries ( query_hash bigint CONSTRAINT aqo_queries_query_hash_idx PRIMARY KEY, learn_aqo boolean NOT NULL, use_aqo boolean NOT NULL, @@ -9,13 +9,13 @@ CREATE TABLE public.aqo_queries ( auto_tuning boolean NOT NULL ); -CREATE TABLE public.aqo_query_texts ( - query_hash bigint CONSTRAINT aqo_query_texts_query_hash_idx PRIMARY KEY REFERENCES public.aqo_queries ON DELETE CASCADE, +CREATE TABLE aqo_query_texts ( + query_hash bigint CONSTRAINT aqo_query_texts_query_hash_idx PRIMARY KEY REFERENCES aqo_queries ON DELETE CASCADE, query_text text NOT NULL ); -CREATE TABLE public.aqo_query_stat ( - query_hash bigint CONSTRAINT aqo_query_stat_idx PRIMARY KEY REFERENCES public.aqo_queries ON DELETE CASCADE, +CREATE TABLE aqo_query_stat ( + query_hash bigint CONSTRAINT aqo_query_stat_idx PRIMARY KEY REFERENCES aqo_queries ON DELETE CASCADE, execution_time_with_aqo double precision[], execution_time_without_aqo double precision[], planning_time_with_aqo double precision[], @@ -26,25 +26,25 @@ CREATE TABLE public.aqo_query_stat ( executions_without_aqo bigint ); -CREATE TABLE public.aqo_data ( - fspace_hash bigint NOT NULL REFERENCES public.aqo_queries ON DELETE CASCADE, +CREATE TABLE aqo_data ( + fspace_hash bigint NOT NULL REFERENCES aqo_queries ON DELETE CASCADE, fsspace_hash int NOT NULL, nfeatures int NOT NULL, features double precision[][], targets double precision[] ); -CREATE UNIQUE INDEX aqo_fss_access_idx ON public.aqo_data (fspace_hash, fsspace_hash); +CREATE UNIQUE INDEX aqo_fss_access_idx ON aqo_data (fspace_hash, fsspace_hash); -INSERT INTO public.aqo_queries VALUES (0, false, false, 0, false); -INSERT INTO public.aqo_query_texts VALUES (0, 'COMMON feature space (do not delete!)'); +INSERT INTO aqo_queries VALUES (0, false, false, 0, false); +INSERT INTO aqo_query_texts VALUES (0, 'COMMON feature space (do not delete!)'); -- a virtual query for COMMON feature space CREATE FUNCTION invalidate_deactivated_queries_cache() RETURNS trigger AS 'MODULE_PATHNAME' LANGUAGE C; CREATE TRIGGER aqo_queries_invalidate AFTER UPDATE OR DELETE OR TRUNCATE - ON public.aqo_queries FOR EACH STATEMENT + ON aqo_queries FOR EACH STATEMENT EXECUTE PROCEDURE invalidate_deactivated_queries_cache(); -- @@ -52,7 +52,7 @@ CREATE TRIGGER aqo_queries_invalidate AFTER UPDATE OR DELETE OR TRUNCATE -- -- Show query state at the AQO knowledge base -CREATE FUNCTION public.aqo_status(hash bigint) +CREATE FUNCTION aqo_status(hash bigint) RETURNS TABLE ( "learn" BOOL, "use aqo" BOOL, @@ -73,7 +73,7 @@ SELECT learn_aqo,use_aqo,auto_tuning,fspace_hash, to_char(execution_time_with_aqo[n3],'9.99EEEE'), to_char(cardinality_error_with_aqo[n1],'9.99EEEE'), executions_with_aqo -FROM public.aqo_queries aq, public.aqo_query_stat aqs, +FROM aqo_queries aq, aqo_query_stat aqs, (SELECT array_length(n1,1) AS n1, array_length(n2,1) AS n2, array_length(n3,1) AS n3, array_length(n4,1) AS n4 FROM @@ -81,50 +81,50 @@ FROM public.aqo_queries aq, public.aqo_query_stat aqs, cardinality_error_without_aqo AS n2, execution_time_with_aqo AS n3, execution_time_without_aqo AS n4 - FROM public.aqo_query_stat aqs WHERE + FROM aqo_query_stat aqs WHERE aqs.query_hash = $1) AS al) AS q WHERE (aqs.query_hash = aq.query_hash) AND aqs.query_hash = $1; $func$ LANGUAGE SQL; -CREATE FUNCTION public.aqo_enable_query(hash bigint) +CREATE FUNCTION aqo_enable_query(hash bigint) RETURNS VOID AS $func$ -UPDATE public.aqo_queries SET +UPDATE aqo_queries SET learn_aqo = 'true', use_aqo = 'true' WHERE query_hash = $1; $func$ LANGUAGE SQL; -CREATE FUNCTION public.aqo_disable_query(hash bigint) +CREATE FUNCTION aqo_disable_query(hash bigint) RETURNS VOID AS $func$ -UPDATE public.aqo_queries SET +UPDATE aqo_queries SET learn_aqo = 'false', use_aqo = 'false', auto_tuning = 'false' WHERE query_hash = $1; $func$ LANGUAGE SQL; -CREATE FUNCTION public.aqo_clear_hist(hash bigint) +CREATE FUNCTION aqo_clear_hist(hash bigint) RETURNS VOID AS $func$ -DELETE FROM public.aqo_data WHERE fspace_hash=$1; +DELETE FROM aqo_data WHERE fspace_hash=$1; $func$ LANGUAGE SQL; -- Show queries that contains 'Never executed' nodes at the plan. -CREATE FUNCTION public.aqo_ne_queries() +CREATE FUNCTION aqo_ne_queries() RETURNS SETOF int AS $func$ -SELECT query_hash FROM public.aqo_query_stat aqs +SELECT query_hash FROM aqo_query_stat aqs WHERE -1 = ANY (cardinality_error_with_aqo::double precision[]); $func$ LANGUAGE SQL; -CREATE FUNCTION public.aqo_drop(hash bigint) +CREATE FUNCTION aqo_drop(hash bigint) RETURNS VOID AS $func$ -DELETE FROM public.aqo_queries aq WHERE (aq.query_hash = $1); -DELETE FROM public.aqo_data ad WHERE (ad.fspace_hash = $1); -DELETE FROM public.aqo_query_stat aq WHERE (aq.query_hash = $1); -DELETE FROM public.aqo_query_texts aq WHERE (aq.query_hash = $1); +DELETE FROM aqo_queries aq WHERE (aq.query_hash = $1); +DELETE FROM aqo_data ad WHERE (ad.fspace_hash = $1); +DELETE FROM aqo_query_stat aq WHERE (aq.query_hash = $1); +DELETE FROM aqo_query_texts aq WHERE (aq.query_hash = $1); $func$ LANGUAGE SQL; diff --git a/aqo--1.3--1.4.sql b/aqo--1.3--1.4.sql index f6df0263..002a148a 100755 --- a/aqo--1.3--1.4.sql +++ b/aqo--1.3--1.4.sql @@ -3,9 +3,9 @@ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "ALTER EXTENSION aqo UPDATE TO '1.4'" to load this file. \quit -ALTER TABLE public.aqo_data ADD COLUMN reliability double precision []; +ALTER TABLE aqo_data ADD COLUMN reliability double precision []; -DROP FUNCTION public.top_error_queries(int); +DROP FUNCTION top_error_queries(int); -- -- Get cardinality error of queries the last time they were executed. @@ -20,7 +20,7 @@ DROP FUNCTION public.top_error_queries(int); -- error - AQO error that calculated on plan nodes of the query. -- nexecs - number of executions of queries associated with this ID. -- -CREATE OR REPLACE FUNCTION public.show_cardinality_errors(controlled boolean) +CREATE OR REPLACE FUNCTION show_cardinality_errors(controlled boolean) RETURNS TABLE(num bigint, id bigint, fshash bigint, error float, nexecs bigint) AS $$ BEGIN @@ -35,7 +35,7 @@ IF (controlled) THEN aq.fspace_hash AS fs_hash, cardinality_error_with_aqo[array_length(cardinality_error_with_aqo, 1)] AS cerror, executions_with_aqo AS execs - FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs + FROM aqo_queries aq JOIN aqo_query_stat aqs ON aq.query_hash = aqs.query_hash WHERE TRUE = ANY (SELECT unnest(cardinality_error_with_aqo) IS NOT NULL) ) AS q1 @@ -51,7 +51,7 @@ ELSE aq.fspace_hash AS fs_hash, array_avg(cardinality_error_without_aqo) AS cerror, executions_without_aqo AS execs - FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs + FROM aqo_queries aq JOIN aqo_query_stat aqs ON aq.query_hash = aqs.query_hash WHERE TRUE = ANY (SELECT unnest(cardinality_error_without_aqo) IS NOT NULL) ) AS q1 @@ -60,5 +60,5 @@ END IF; END; $$ LANGUAGE plpgsql; -COMMENT ON FUNCTION public.show_cardinality_errors(boolean) IS +COMMENT ON FUNCTION show_cardinality_errors(boolean) IS 'Get cardinality error of queries the last time they were executed. Order queries according to an error value.'; diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index 159f3895..f833e251 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -8,9 +8,9 @@ -- The oids array contains oids of permanent tables only. It is used for cleanup -- ML knowledge base from queries that refer to removed tables. -- -DROP TABLE public.aqo_data CASCADE; -CREATE TABLE public.aqo_data ( - fspace_hash bigint NOT NULL REFERENCES public.aqo_queries ON DELETE CASCADE, +DROP TABLE aqo_data CASCADE; +CREATE TABLE aqo_data ( + fspace_hash bigint NOT NULL REFERENCES aqo_queries ON DELETE CASCADE, fsspace_hash int NOT NULL, nfeatures int NOT NULL, features double precision[][], @@ -18,16 +18,16 @@ CREATE TABLE public.aqo_data ( oids oid [] DEFAULT NULL, reliability double precision [] ); -CREATE UNIQUE INDEX aqo_fss_access_idx ON public.aqo_data (fspace_hash, fsspace_hash); +CREATE UNIQUE INDEX aqo_fss_access_idx ON aqo_data (fspace_hash, fsspace_hash); -DROP FUNCTION public.top_time_queries; -DROP FUNCTION public.aqo_drop; -DROP FUNCTION public.clean_aqo_data; -DROP FUNCTION public.show_cardinality_errors; +DROP FUNCTION top_time_queries; +DROP FUNCTION aqo_drop; +DROP FUNCTION clean_aqo_data; +DROP FUNCTION show_cardinality_errors; DROP FUNCTION array_mse; DROP FUNCTION array_avg; -DROP FUNCTION public.aqo_ne_queries; -- Not needed anymore due to changing in the logic -DROP FUNCTION public.aqo_clear_hist; -- Should be renamed and reworked +DROP FUNCTION aqo_ne_queries; -- Not needed anymore due to changing in the logic +DROP FUNCTION aqo_clear_hist; -- Should be renamed and reworked -- -- Show execution time of queries, for which AQO has statistics. @@ -35,7 +35,7 @@ DROP FUNCTION public.aqo_clear_hist; -- Should be renamed and reworked -- estimations, or not used (controlled = false). -- Last case is possible in disabled mode with aqo.force_collect_stat = 'on'. -- -CREATE OR REPLACE FUNCTION public.aqo_execution_time(controlled boolean) +CREATE OR REPLACE FUNCTION aqo_execution_time(controlled boolean) RETURNS TABLE(num bigint, id bigint, fshash bigint, exec_time float, nexecs bigint) AS $$ BEGIN @@ -52,7 +52,7 @@ IF (controlled) THEN aq.fspace_hash AS fs_hash, execution_time_with_aqo[array_length(execution_time_with_aqo, 1)] AS exectime, executions_with_aqo AS execs - FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs + FROM aqo_queries aq JOIN aqo_query_stat aqs ON aq.query_hash = aqs.query_hash WHERE TRUE = ANY (SELECT unnest(execution_time_with_aqo) IS NOT NULL) ) AS q1 @@ -71,7 +71,7 @@ ELSE aq.fspace_hash AS fs_hash, (SELECT AVG(t) FROM unnest(execution_time_without_aqo) t) AS exectime, executions_without_aqo AS execs - FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs + FROM aqo_queries aq JOIN aqo_query_stat aqs ON aq.query_hash = aqs.query_hash WHERE TRUE = ANY (SELECT unnest(execution_time_without_aqo) IS NOT NULL) ) AS q1 @@ -80,13 +80,13 @@ END IF; END; $$ LANGUAGE plpgsql; -COMMENT ON FUNCTION public.aqo_execution_time(boolean) IS +COMMENT ON FUNCTION aqo_execution_time(boolean) IS 'Get execution time of queries. If controlled = true (AQO could advise cardinality estimations), show time of last execution attempt. Another case (AQO not used), return an average value of execution time across all known executions.'; -- -- Remove all information about a query class from AQO storage. -- -CREATE OR REPLACE FUNCTION public.aqo_drop_class(queryid bigint) +CREATE OR REPLACE FUNCTION aqo_drop_class(queryid bigint) RETURNS integer AS $$ DECLARE fs bigint; @@ -96,7 +96,7 @@ BEGIN raise EXCEPTION '[AQO] Cannot remove basic class %.', queryid; END IF; - SELECT fspace_hash FROM public.aqo_queries WHERE (query_hash = queryid) INTO fs; + SELECT fspace_hash FROM aqo_queries WHERE (query_hash = queryid) INTO fs; IF (fs IS NULL) THEN raise WARNING '[AQO] Nothing to remove for the class %.', queryid; @@ -107,18 +107,18 @@ BEGIN raise WARNING '[AQO] Removing query class has non-generic feature space value: id = %, fs = %.', queryid, fs; END IF; - SELECT count(*) FROM public.aqo_data WHERE fspace_hash = fs INTO num; + SELECT count(*) FROM aqo_data WHERE fspace_hash = fs INTO num; /* * Remove the only from aqo_queries table. All other data will be removed by * CASCADE deletion. */ - DELETE FROM public.aqo_queries WHERE query_hash = queryid; + DELETE FROM aqo_queries WHERE query_hash = queryid; RETURN num; END; $$ LANGUAGE plpgsql; -COMMENT ON FUNCTION public.aqo_drop_class(bigint) IS +COMMENT ON FUNCTION aqo_drop_class(bigint) IS 'Remove info about an query class from AQO ML knowledge base.'; -- @@ -128,7 +128,7 @@ COMMENT ON FUNCTION public.aqo_drop_class(bigint) IS -- tables even if only one oid for one feature subspace of the space is illegal. -- Returns number of deleted rows from aqo_queries and aqo_data tables. -- -CREATE OR REPLACE FUNCTION public.aqo_cleanup(OUT nfs integer, OUT nfss integer) +CREATE OR REPLACE FUNCTION aqo_cleanup(OUT nfs integer, OUT nfss integer) AS $$ DECLARE fs bigint; @@ -159,7 +159,7 @@ BEGIN END; $$ LANGUAGE plpgsql; -COMMENT ON FUNCTION public.aqo_cleanup() IS +COMMENT ON FUNCTION aqo_cleanup() IS 'Remove unneeded rows from the AQO ML storage'; -- @@ -175,7 +175,7 @@ COMMENT ON FUNCTION public.aqo_cleanup() IS -- error - AQO error that calculated on plan nodes of the query. -- nexecs - number of executions of queries associated with this ID. -- -CREATE OR REPLACE FUNCTION public.aqo_cardinality_error(controlled boolean) +CREATE OR REPLACE FUNCTION aqo_cardinality_error(controlled boolean) RETURNS TABLE(num bigint, id bigint, fshash bigint, error float, nexecs bigint) AS $$ BEGIN @@ -190,7 +190,7 @@ IF (controlled) THEN aq.fspace_hash AS fs_hash, cardinality_error_with_aqo[array_length(cardinality_error_with_aqo, 1)] AS cerror, executions_with_aqo AS execs - FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs + FROM aqo_queries aq JOIN aqo_query_stat aqs ON aq.query_hash = aqs.query_hash WHERE TRUE = ANY (SELECT unnest(cardinality_error_with_aqo) IS NOT NULL) ) AS q1 @@ -206,7 +206,7 @@ ELSE aq.fspace_hash AS fs_hash, (SELECT AVG(t) FROM unnest(cardinality_error_without_aqo) t) AS cerror, executions_without_aqo AS execs - FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs + FROM aqo_queries aq JOIN aqo_query_stat aqs ON aq.query_hash = aqs.query_hash WHERE TRUE = ANY (SELECT unnest(cardinality_error_without_aqo) IS NOT NULL) ) AS q1 @@ -215,7 +215,7 @@ END IF; END; $$ LANGUAGE plpgsql; -COMMENT ON FUNCTION public.aqo_cardinality_error(boolean) IS +COMMENT ON FUNCTION aqo_cardinality_error(boolean) IS 'Get cardinality error of queries the last time they were executed. Order queries according to an error value.'; -- @@ -225,7 +225,7 @@ COMMENT ON FUNCTION public.aqo_cardinality_error(boolean) IS -- class. -- Returns a number of deleted rows in the aqo_data table. -- -CREATE OR REPLACE FUNCTION public.aqo_reset_query(queryid bigint) +CREATE OR REPLACE FUNCTION aqo_reset_query(queryid bigint) RETURNS integer AS $$ DECLARE num integer; @@ -235,12 +235,12 @@ BEGIN raise WARNING '[AQO] Reset common feature space.' END IF; - SELECT fspace_hash FROM public.aqo_queries WHERE query_hash = queryid INTO fs; - SELECT count(*) FROM public.aqo_data WHERE fspace_hash = fs INTO num; - DELETE FROM public.aqo_data WHERE fspace_hash = fs; + SELECT fspace_hash FROM aqo_queries WHERE query_hash = queryid INTO fs; + SELECT count(*) FROM aqo_data WHERE fspace_hash = fs INTO num; + DELETE FROM aqo_data WHERE fspace_hash = fs; RETURN num; END; $$ LANGUAGE plpgsql; -COMMENT ON FUNCTION public.aqo_reset_query(bigint) IS +COMMENT ON FUNCTION aqo_reset_query(bigint) IS 'Remove from AQO storage only learning data for given QueryId.'; diff --git a/aqo.control b/aqo.control index 9c6c65b3..5507effb 100644 --- a/aqo.control +++ b/aqo.control @@ -2,4 +2,4 @@ comment = 'machine learning for cardinality estimation in optimizer' default_version = '1.5' module_pathname = '$libdir/aqo' -relocatable = false +relocatable = true diff --git a/expected/aqo_learn.out b/expected/aqo_learn.out index dad8048d..0153fdce 100644 --- a/expected/aqo_learn.out +++ b/expected/aqo_learn.out @@ -236,7 +236,7 @@ SELECT count(*) FROM tmp1; (1 row) -- Remove data on some unneeded instances of tmp1 table. -SELECT public.aqo_cleanup(); +SELECT aqo_cleanup(); aqo_cleanup ------------- (9,18) diff --git a/expected/gucs.out b/expected/gucs.out index 2141a058..995eca7b 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -31,31 +31,31 @@ EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) (6 rows) -- Check existence of the interface functions. -SELECT obj_description('public.aqo_cardinality_error'::regproc::oid); +SELECT obj_description('aqo_cardinality_error'::regproc::oid); obj_description --------------------------------------------------------------------------------------------------------------- Get cardinality error of queries the last time they were executed. Order queries according to an error value. (1 row) -SELECT obj_description('public.aqo_execution_time'::regproc::oid); +SELECT obj_description('aqo_execution_time'::regproc::oid); obj_description ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ Get execution time of queries. If controlled = true (AQO could advise cardinality estimations), show time of last execution attempt. Another case (AQO not used), return an average value of execution time across all known executions. (1 row) -SELECT obj_description('public.aqo_drop_class'::regproc::oid); +SELECT obj_description('aqo_drop_class'::regproc::oid); obj_description -------------------------------------------------------------- Remove info about an query class from AQO ML knowledge base. (1 row) -SELECT obj_description('public.aqo_cleanup'::regproc::oid); +SELECT obj_description('aqo_cleanup'::regproc::oid); obj_description ---------------------------------------------- Remove unneeded rows from the AQO ML storage (1 row) -SELECT obj_description('public.aqo_reset_query'::regproc::oid); +SELECT obj_description('aqo_reset_query'::regproc::oid); obj_description --------------------------------------------------------------- Remove from AQO storage only learning data for given QueryId. diff --git a/expected/relocatable.out b/expected/relocatable.out new file mode 100644 index 00000000..8e5eca93 --- /dev/null +++ b/expected/relocatable.out @@ -0,0 +1,85 @@ +DROP EXTENSION IF EXISTS aqo CASCADE; +DROP SCHEMA IF EXISTS test CASCADE; +NOTICE: schema "test" does not exist, skipping +CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; +SET aqo.mode = 'intelligent'; +CREATE TABLE test (id SERIAL, data TEXT); +INSERT INTO test (data) VALUES ('string'); +SELECT * FROM test; + id | data +----+-------- + 1 | string +(1 row) + +SELECT query_text FROM aqo_query_texts; + query_text +--------------------------------------- + COMMON feature space (do not delete!) + SELECT * FROM test; +(2 rows) + +SELECT learn_aqo, use_aqo, auto_tuning FROM aqo_queries; + learn_aqo | use_aqo | auto_tuning +-----------+---------+------------- + f | f | f + t | f | t +(2 rows) + +CREATE SCHEMA IF NOT EXISTS test; +ALTER EXTENSION aqo SET SCHEMA test; +SET aqo.mode = 'intelligent'; +CREATE TABLE test1 (id SERIAL, data TEXT); +INSERT INTO test1 (data) VALUES ('string'); +SELECT * FROM test1; + id | data +----+-------- + 1 | string +(1 row) + +SELECT query_text FROM test.aqo_query_texts; + query_text +--------------------------------------- + COMMON feature space (do not delete!) + SELECT * FROM test; +(2 rows) + +SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries; + learn_aqo | use_aqo | auto_tuning +-----------+---------+------------- + f | f | f + t | f | t +(2 rows) + +SET search_path TO test; +CREATE TABLE test2 (id SERIAL, data TEXT); +INSERT INTO test2 (data) VALUES ('string'); +SELECT * FROM test2; + id | data +----+-------- + 1 | string +(1 row) + +SELECT query_text FROM aqo_query_texts; + query_text +--------------------------------------- + COMMON feature space (do not delete!) + SELECT * FROM test; + SELECT * FROM test2; +(3 rows) + +SELECT learn_aqo, use_aqo, auto_tuning FROM aqo_queries; + learn_aqo | use_aqo | auto_tuning +-----------+---------+------------- + f | f | f + t | f | t + t | f | t +(3 rows) + +DROP SCHEMA IF EXISTS test CASCADE; +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to extension aqo +drop cascades to table test2 +DROP EXTENSION IF EXISTS aqo CASCADE; +NOTICE: extension "aqo" does not exist, skipping +SET search_path TO public; diff --git a/expected/schema.out b/expected/schema.out index 221b62c0..e2004386 100644 --- a/expected/schema.out +++ b/expected/schema.out @@ -25,14 +25,14 @@ SELECT * FROM test; -- Check AQO service relations state after some manipulations -- Exclude fields with hash values from the queries. Hash is depend on -- nodefuncs code which is highly PostgreSQL version specific. -SELECT query_text FROM public.aqo_query_texts; +SELECT query_text FROM aqo_query_texts; query_text --------------------------------------- COMMON feature space (do not delete!) SELECT * FROM test; (2 rows) -SELECT learn_aqo, use_aqo, auto_tuning FROM public.aqo_queries; +SELECT learn_aqo, use_aqo, auto_tuning FROM aqo_queries; learn_aqo | use_aqo | auto_tuning -----------+---------+------------- f | f | f diff --git a/expected/unsupported.out b/expected/unsupported.out index d9df8159..2f4f04a5 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -542,7 +542,7 @@ EXPLAIN (COSTS OFF) -- XXX: Do we stuck into an unstable behavior of an error value? -- Live with this variant of the test for some time. SELECT to_char(error, '9.99EEEE')::text AS error, query_text -FROM public.aqo_cardinality_error(true) cef, aqo_query_texts aqt +FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt WHERE aqt.query_hash = cef.id ORDER BY (md5(query_text),error) DESC; error | query_text @@ -580,7 +580,7 @@ ORDER BY (md5(query_text),error) DESC; (12 rows) DROP TABLE t,t1 CASCADE; -SELECT public.aqo_cleanup(); +SELECT aqo_cleanup(); aqo_cleanup ------------- (12,42) @@ -588,7 +588,7 @@ SELECT public.aqo_cleanup(); -- Look for any remaining queries in the ML storage. SELECT to_char(error, '9.99EEEE')::text AS error, query_text -FROM public.aqo_cardinality_error(true) cef, aqo_query_texts aqt +FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt WHERE aqt.query_hash = cef.id ORDER BY (md5(query_text),error) DESC; error | query_text diff --git a/sql/aqo_learn.sql b/sql/aqo_learn.sql index f3e44b35..1db42929 100644 --- a/sql/aqo_learn.sql +++ b/sql/aqo_learn.sql @@ -124,7 +124,7 @@ WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; SELECT count(*) FROM tmp1; -- Remove data on some unneeded instances of tmp1 table. -SELECT public.aqo_cleanup(); +SELECT aqo_cleanup(); -- Result of the query below should be empty SELECT * FROM aqo_query_texts aqt1, aqo_query_texts aqt2 diff --git a/sql/gucs.sql b/sql/gucs.sql index 69c26a15..d87af3c3 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -15,11 +15,11 @@ EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT x FROM t; -- Check existence of the interface functions. -SELECT obj_description('public.aqo_cardinality_error'::regproc::oid); -SELECT obj_description('public.aqo_execution_time'::regproc::oid); -SELECT obj_description('public.aqo_drop_class'::regproc::oid); -SELECT obj_description('public.aqo_cleanup'::regproc::oid); -SELECT obj_description('public.aqo_reset_query'::regproc::oid); +SELECT obj_description('aqo_cardinality_error'::regproc::oid); +SELECT obj_description('aqo_execution_time'::regproc::oid); +SELECT obj_description('aqo_drop_class'::regproc::oid); +SELECT obj_description('aqo_cleanup'::regproc::oid); +SELECT obj_description('aqo_reset_query'::regproc::oid); \df aqo_cardinality_error \df aqo_execution_time diff --git a/sql/relocatable.sql b/sql/relocatable.sql new file mode 100644 index 00000000..60085816 --- /dev/null +++ b/sql/relocatable.sql @@ -0,0 +1,38 @@ +DROP EXTENSION IF EXISTS aqo CASCADE; +DROP SCHEMA IF EXISTS test CASCADE; + +CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; +SET aqo.mode = 'intelligent'; + +CREATE TABLE test (id SERIAL, data TEXT); +INSERT INTO test (data) VALUES ('string'); +SELECT * FROM test; + +SELECT query_text FROM aqo_query_texts; +SELECT learn_aqo, use_aqo, auto_tuning FROM aqo_queries; + +CREATE SCHEMA IF NOT EXISTS test; +ALTER EXTENSION aqo SET SCHEMA test; + +SET aqo.mode = 'intelligent'; + +CREATE TABLE test1 (id SERIAL, data TEXT); +INSERT INTO test1 (data) VALUES ('string'); +SELECT * FROM test1; + +SELECT query_text FROM test.aqo_query_texts; +SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries; + +SET search_path TO test; + +CREATE TABLE test2 (id SERIAL, data TEXT); +INSERT INTO test2 (data) VALUES ('string'); +SELECT * FROM test2; + +SELECT query_text FROM aqo_query_texts; +SELECT learn_aqo, use_aqo, auto_tuning FROM aqo_queries; +DROP SCHEMA IF EXISTS test CASCADE; +DROP EXTENSION IF EXISTS aqo CASCADE; + +SET search_path TO public; \ No newline at end of file diff --git a/sql/schema.sql b/sql/schema.sql index ff45f6d3..f6c5c53d 100644 --- a/sql/schema.sql +++ b/sql/schema.sql @@ -21,6 +21,6 @@ SELECT * FROM test; -- Check AQO service relations state after some manipulations -- Exclude fields with hash values from the queries. Hash is depend on -- nodefuncs code which is highly PostgreSQL version specific. -SELECT query_text FROM public.aqo_query_texts; -SELECT learn_aqo, use_aqo, auto_tuning FROM public.aqo_queries; +SELECT query_text FROM aqo_query_texts; +SELECT learn_aqo, use_aqo, auto_tuning FROM aqo_queries; DROP SCHEMA IF EXISTS test1 CASCADE; diff --git a/sql/unsupported.sql b/sql/unsupported.sql index 3c482fe4..127ae18b 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -158,17 +158,17 @@ EXPLAIN (COSTS OFF) -- XXX: Do we stuck into an unstable behavior of an error value? -- Live with this variant of the test for some time. SELECT to_char(error, '9.99EEEE')::text AS error, query_text -FROM public.aqo_cardinality_error(true) cef, aqo_query_texts aqt +FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt WHERE aqt.query_hash = cef.id ORDER BY (md5(query_text),error) DESC; DROP TABLE t,t1 CASCADE; -SELECT public.aqo_cleanup(); +SELECT aqo_cleanup(); -- Look for any remaining queries in the ML storage. SELECT to_char(error, '9.99EEEE')::text AS error, query_text -FROM public.aqo_cardinality_error(true) cef, aqo_query_texts aqt +FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt WHERE aqt.query_hash = cef.id ORDER BY (md5(query_text),error) DESC; diff --git a/storage.c b/storage.c index 9f9fd418..c9f75b3e 100644 --- a/storage.c +++ b/storage.c @@ -118,7 +118,7 @@ find_query(uint64 qhash, QueryContextData *ctx) Datum values[5]; bool nulls[5] = {false, false, false, false, false}; - if (!open_aqo_relation("public", "aqo_queries", "aqo_queries_query_hash_idx", + if (!open_aqo_relation(NULL, "aqo_queries", "aqo_queries_query_hash_idx", AccessShareLock, &hrel, &irel)) return false; @@ -184,7 +184,7 @@ update_query(uint64 qhash, uint64 fhash, if (XactReadOnly) return false; - if (!open_aqo_relation("public", "aqo_queries", "aqo_queries_query_hash_idx", + if (!open_aqo_relation(NULL, "aqo_queries", "aqo_queries_query_hash_idx", RowExclusiveLock, &hrel, &irel)) return false; @@ -289,7 +289,7 @@ add_query_text(uint64 qhash, const char *query_string) if (XactReadOnly) return false; - if (!open_aqo_relation("public", "aqo_query_texts", + if (!open_aqo_relation(NULL, "aqo_query_texts", "aqo_query_texts_query_hash_idx", RowExclusiveLock, &hrel, &irel)) return false; @@ -417,7 +417,7 @@ load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids) bool isnull[AQO_DATA_COLUMNS]; bool success = true; - if (!open_aqo_relation("public", "aqo_data", + if (!open_aqo_relation(NULL, "aqo_data", "aqo_fss_access_idx", AccessShareLock, &hrel, &irel)) return false; @@ -526,7 +526,7 @@ update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids) if (XactReadOnly) return false; - if (!open_aqo_relation("public", "aqo_data", + if (!open_aqo_relation(NULL, "aqo_data", "aqo_fss_access_idx", RowExclusiveLock, &hrel, &irel)) return false; @@ -659,7 +659,7 @@ get_aqo_stat(uint64 qhash) bool shouldFree; - if (!open_aqo_relation("public", "aqo_query_stat", + if (!open_aqo_relation(NULL, "aqo_query_stat", "aqo_query_stat_idx", AccessShareLock, &hrel, &irel)) return false; @@ -727,7 +727,7 @@ update_aqo_stat(uint64 qhash, QueryStat *stat) if (XactReadOnly) return; - if (!open_aqo_relation("public", "aqo_query_stat", + if (!open_aqo_relation(NULL, "aqo_query_stat", "aqo_query_stat_idx", RowExclusiveLock, &hrel, &irel)) return; From 229cb8a1f2b27f2d16ca42ae71467915d13d71f7 Mon Sep 17 00:00:00 2001 From: Andrey Kazarinov Date: Mon, 30 May 2022 18:34:58 +0300 Subject: [PATCH 087/203] [PGPRO-6374] Review-related fixes --- aqo--1.0--1.1.sql | 18 +++---- aqo--1.0.sql | 28 +++++------ aqo--1.1--1.2.sql | 36 +++++++------- aqo--1.2--1.3.sql | 14 +++--- aqo--1.2.sql | 54 ++++++++++----------- aqo--1.3--1.4.sql | 12 ++--- aqo--1.4--1.5.sql | 117 +++++++++++++++++++++++++++++++++++++++++++--- 7 files changed, 191 insertions(+), 88 deletions(-) diff --git a/aqo--1.0--1.1.sql b/aqo--1.0--1.1.sql index 427ddf3d..37fdf635 100644 --- a/aqo--1.0--1.1.sql +++ b/aqo--1.0--1.1.sql @@ -1,13 +1,13 @@ -ALTER TABLE aqo_query_texts ALTER COLUMN query_text TYPE text; +ALTER TABLE public.aqo_query_texts ALTER COLUMN query_text TYPE text; -DROP INDEX aqo_queries_query_hash_idx CASCADE; -DROP INDEX aqo_query_texts_query_hash_idx CASCADE; -DROP INDEX aqo_query_stat_idx CASCADE; -DROP INDEX aqo_fss_access_idx CASCADE; +DROP INDEX public.aqo_queries_query_hash_idx CASCADE; +DROP INDEX public.aqo_query_texts_query_hash_idx CASCADE; +DROP INDEX public.aqo_query_stat_idx CASCADE; +DROP INDEX public.aqo_fss_access_idx CASCADE; CREATE UNIQUE INDEX aqo_fss_access_idx - ON aqo_data (fspace_hash, fsspace_hash); + ON public.aqo_data (fspace_hash, fsspace_hash); CREATE OR REPLACE FUNCTION aqo_migrate_to_1_1_get_pk(rel regclass) RETURNS regclass AS $$ @@ -28,15 +28,15 @@ $$ LANGUAGE plpgsql; DO $$ BEGIN EXECUTE pg_catalog.format('ALTER TABLE %s RENAME to %s', - aqo_migrate_to_1_1_get_pk('aqo_queries'), + aqo_migrate_to_1_1_get_pk('public.aqo_queries'), 'aqo_queries_query_hash_idx'); EXECUTE pg_catalog.format('ALTER TABLE %s RENAME to %s', - aqo_migrate_to_1_1_get_pk('aqo_query_texts'), + aqo_migrate_to_1_1_get_pk('public.aqo_query_texts'), 'aqo_query_texts_query_hash_idx'); EXECUTE pg_catalog.format('ALTER TABLE %s RENAME to %s', - aqo_migrate_to_1_1_get_pk('aqo_query_stat'), + aqo_migrate_to_1_1_get_pk('public.aqo_query_stat'), 'aqo_query_stat_idx'); END $$; diff --git a/aqo--1.0.sql b/aqo--1.0.sql index 0bb02ab8..67395744 100644 --- a/aqo--1.0.sql +++ b/aqo--1.0.sql @@ -1,7 +1,7 @@ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "CREATE EXTENSION aqo" to load this file. \quit -CREATE TABLE aqo_queries ( +CREATE TABLE public.aqo_queries ( query_hash bigint PRIMARY KEY, learn_aqo boolean NOT NULL, use_aqo boolean NOT NULL, @@ -9,13 +9,13 @@ CREATE TABLE aqo_queries ( auto_tuning boolean NOT NULL ); -CREATE TABLE aqo_query_texts ( - query_hash bigint PRIMARY KEY REFERENCES aqo_queries ON DELETE CASCADE, +CREATE TABLE public.aqo_query_texts ( + query_hash bigint PRIMARY KEY REFERENCES public.aqo_queries ON DELETE CASCADE, query_text varchar NOT NULL ); -CREATE TABLE aqo_query_stat ( - query_hash bigint PRIMARY KEY REFERENCES aqo_queries ON DELETE CASCADE, +CREATE TABLE public.aqo_query_stat ( + query_hash bigint PRIMARY KEY REFERENCES public.aqo_queries ON DELETE CASCADE, execution_time_with_aqo double precision[], execution_time_without_aqo double precision[], planning_time_with_aqo double precision[], @@ -26,8 +26,8 @@ CREATE TABLE aqo_query_stat ( executions_without_aqo bigint ); -CREATE TABLE aqo_data ( - fspace_hash bigint NOT NULL REFERENCES aqo_queries ON DELETE CASCADE, +CREATE TABLE public.aqo_data ( + fspace_hash bigint NOT NULL REFERENCES public.aqo_queries ON DELETE CASCADE, fsspace_hash int NOT NULL, nfeatures int NOT NULL, features double precision[][], @@ -35,18 +35,18 @@ CREATE TABLE aqo_data ( UNIQUE (fspace_hash, fsspace_hash) ); -CREATE INDEX aqo_queries_query_hash_idx ON aqo_queries (query_hash); -CREATE INDEX aqo_query_texts_query_hash_idx ON aqo_query_texts (query_hash); -CREATE INDEX aqo_query_stat_idx ON aqo_query_stat (query_hash); -CREATE INDEX aqo_fss_access_idx ON aqo_data (fspace_hash, fsspace_hash); +CREATE INDEX aqo_queries_query_hash_idx ON public.aqo_queries (query_hash); +CREATE INDEX aqo_query_texts_query_hash_idx ON public.aqo_query_texts (query_hash); +CREATE INDEX aqo_query_stat_idx ON public.aqo_query_stat (query_hash); +CREATE INDEX aqo_fss_access_idx ON public.aqo_data (fspace_hash, fsspace_hash); -INSERT INTO aqo_queries VALUES (0, false, false, 0, false); -INSERT INTO aqo_query_texts VALUES (0, 'COMMON feature space (do not delete!)'); +INSERT INTO public.aqo_queries VALUES (0, false, false, 0, false); +INSERT INTO public.aqo_query_texts VALUES (0, 'COMMON feature space (do not delete!)'); -- a virtual query for COMMON feature space CREATE FUNCTION invalidate_deactivated_queries_cache() RETURNS trigger AS 'MODULE_PATHNAME' LANGUAGE C; CREATE TRIGGER aqo_queries_invalidate AFTER UPDATE OR DELETE OR TRUNCATE - ON aqo_queries FOR EACH STATEMENT + ON public.aqo_queries FOR EACH STATEMENT EXECUTE PROCEDURE invalidate_deactivated_queries_cache(); diff --git a/aqo--1.1--1.2.sql b/aqo--1.1--1.2.sql index 27baff66..9291e7b7 100644 --- a/aqo--1.1--1.2.sql +++ b/aqo--1.1--1.2.sql @@ -14,8 +14,8 @@ $$ LANGUAGE plpgsql; DO $$ BEGIN EXECUTE pg_catalog.format( - 'ALTER TABLE aqo_data DROP CONSTRAINT %s', - aqo_migrate_to_1_2_get_pk('aqo_data'::regclass), + 'ALTER TABLE public.aqo_data DROP CONSTRAINT %s', + aqo_migrate_to_1_2_get_pk('public.aqo_data'::regclass), 'aqo_queries_query_hash_idx'); END $$; @@ -28,7 +28,7 @@ DROP FUNCTION aqo_migrate_to_1_2_get_pk(regclass); -- -- Show query state at the AQO knowledge base -CREATE OR REPLACE FUNCTION aqo_status(hash bigint) +CREATE OR REPLACE FUNCTION public.aqo_status(hash bigint) RETURNS TABLE ( "learn" BOOL, "use aqo" BOOL, @@ -49,7 +49,7 @@ SELECT learn_aqo,use_aqo,auto_tuning,fspace_hash, to_char(execution_time_with_aqo[n3],'9.99EEEE'), to_char(cardinality_error_with_aqo[n1],'9.99EEEE'), executions_with_aqo -FROM aqo_queries aq, aqo_query_stat aqs, +FROM public.aqo_queries aq, public.aqo_query_stat aqs, (SELECT array_length(n1,1) AS n1, array_length(n2,1) AS n2, array_length(n3,1) AS n3, array_length(n4,1) AS n4 FROM @@ -57,50 +57,50 @@ FROM aqo_queries aq, aqo_query_stat aqs, cardinality_error_without_aqo AS n2, execution_time_with_aqo AS n3, execution_time_without_aqo AS n4 - FROM aqo_query_stat aqs WHERE + FROM public.aqo_query_stat aqs WHERE aqs.query_hash = $1) AS al) AS q WHERE (aqs.query_hash = aq.query_hash) AND aqs.query_hash = $1; $func$ LANGUAGE SQL; -CREATE OR REPLACE FUNCTION aqo_enable_query(hash bigint) +CREATE OR REPLACE FUNCTION public.aqo_enable_query(hash bigint) RETURNS VOID AS $func$ -UPDATE aqo_queries SET +UPDATE public.aqo_queries SET learn_aqo = 'true', use_aqo = 'true' WHERE query_hash = $1; $func$ LANGUAGE SQL; -CREATE OR REPLACE FUNCTION aqo_disable_query(hash bigint) +CREATE OR REPLACE FUNCTION public.aqo_disable_query(hash bigint) RETURNS VOID AS $func$ -UPDATE aqo_queries SET +UPDATE public.aqo_queries SET learn_aqo = 'false', use_aqo = 'false', auto_tuning = 'false' WHERE query_hash = $1; $func$ LANGUAGE SQL; -CREATE OR REPLACE FUNCTION aqo_clear_hist(hash bigint) +CREATE OR REPLACE FUNCTION public.aqo_clear_hist(hash bigint) RETURNS VOID AS $func$ -DELETE FROM aqo_data WHERE fspace_hash=$1; +DELETE FROM public.aqo_data WHERE fspace_hash=$1; $func$ LANGUAGE SQL; -- Show queries that contains 'Never executed' nodes at the plan. -CREATE OR REPLACE FUNCTION aqo_ne_queries() +CREATE OR REPLACE FUNCTION public.aqo_ne_queries() RETURNS SETOF int AS $func$ -SELECT query_hash FROM aqo_query_stat aqs +SELECT query_hash FROM public.aqo_query_stat aqs WHERE -1 = ANY (cardinality_error_with_aqo::double precision[]); $func$ LANGUAGE SQL; -CREATE OR REPLACE FUNCTION aqo_drop(hash bigint) +CREATE OR REPLACE FUNCTION public.aqo_drop(hash bigint) RETURNS VOID AS $func$ -DELETE FROM aqo_queries aq WHERE (aq.query_hash = $1); -DELETE FROM aqo_data ad WHERE (ad.fspace_hash = $1); -DELETE FROM aqo_query_stat aq WHERE (aq.query_hash = $1); -DELETE FROM aqo_query_texts aq WHERE (aq.query_hash = $1); +DELETE FROM public.aqo_queries aq WHERE (aq.query_hash = $1); +DELETE FROM public.aqo_data ad WHERE (ad.fspace_hash = $1); +DELETE FROM public.aqo_query_stat aq WHERE (aq.query_hash = $1); +DELETE FROM public.aqo_query_texts aq WHERE (aq.query_hash = $1); $func$ LANGUAGE SQL; diff --git a/aqo--1.2--1.3.sql b/aqo--1.2--1.3.sql index b1cfe3a9..c29a6f10 100755 --- a/aqo--1.2--1.3.sql +++ b/aqo--1.2--1.3.sql @@ -1,9 +1,9 @@ -ALTER TABLE aqo_data ADD COLUMN oids text [] DEFAULT NULL; +ALTER TABLE public.aqo_data ADD COLUMN oids text [] DEFAULT NULL; -- -- Remove data, related to previously dropped tables, from the AQO tables. -- -CREATE OR REPLACE FUNCTION clean_aqo_data() RETURNS void AS $$ +CREATE OR REPLACE FUNCTION public.clean_aqo_data() RETURNS void AS $$ DECLARE aqo_data_row aqo_data%ROWTYPE; aqo_queries_row aqo_queries%ROWTYPE; @@ -29,7 +29,7 @@ BEGIN END LOOP; END IF; - FOR aqo_queries_row IN (SELECT * FROM aqo_queries) + FOR aqo_queries_row IN (SELECT * FROM public.aqo_queries) LOOP IF (delete_row = true AND fspace_hash_var <> 0 AND fspace_hash_var = aqo_queries_row.fspace_hash AND @@ -87,7 +87,7 @@ $$ LANGUAGE plpgsql; -- -- Top of queries with the highest value of execution time. -- -CREATE OR REPLACE FUNCTION top_time_queries(n int) +CREATE OR REPLACE FUNCTION public.top_time_queries(n int) RETURNS TABLE(num bigint, fspace_hash bigint, query_hash bigint, @@ -103,7 +103,7 @@ BEGIN aqo_queries.query_hash, to_char(array_avg(execution_time_without_aqo), '9.99EEEE')::float, to_char(array_mse(execution_time_without_aqo), '9.99EEEE')::float - FROM aqo_queries INNER JOIN aqo_query_stat + FROM public.aqo_queries INNER JOIN aqo_query_stat ON aqo_queries.query_hash = aqo_query_stat.query_hash GROUP BY (execution_time_without_aqo, aqo_queries.fspace_hash, aqo_queries.query_hash) ORDER BY execution_time DESC LIMIT n; @@ -113,7 +113,7 @@ $$ LANGUAGE plpgsql; -- -- Top of queries with largest value of total cardinality error. -- -CREATE OR REPLACE FUNCTION top_error_queries(n int) +CREATE OR REPLACE FUNCTION public.top_error_queries(n int) RETURNS TABLE(num bigint, fspace_hash bigint, query_hash bigint, @@ -129,7 +129,7 @@ BEGIN aqo_queries.query_hash, to_char(array_avg(cardinality_error_without_aqo), '9.99EEEE')::float, to_char(array_mse(cardinality_error_without_aqo), '9.99EEEE')::float - FROM aqo_queries INNER JOIN aqo_query_stat + FROM public.aqo_queries INNER JOIN aqo_query_stat ON aqo_queries.query_hash = aqo_query_stat.query_hash GROUP BY (cardinality_error_without_aqo, aqo_queries.fspace_hash, aqo_queries.query_hash) ORDER BY error DESC LIMIT n; diff --git a/aqo--1.2.sql b/aqo--1.2.sql index 1e2943a8..7e3abf4a 100644 --- a/aqo--1.2.sql +++ b/aqo--1.2.sql @@ -1,7 +1,7 @@ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "CREATE EXTENSION aqo" to load this file. \quit -CREATE TABLE aqo_queries ( +CREATE TABLE public.aqo_queries ( query_hash bigint CONSTRAINT aqo_queries_query_hash_idx PRIMARY KEY, learn_aqo boolean NOT NULL, use_aqo boolean NOT NULL, @@ -9,13 +9,13 @@ CREATE TABLE aqo_queries ( auto_tuning boolean NOT NULL ); -CREATE TABLE aqo_query_texts ( - query_hash bigint CONSTRAINT aqo_query_texts_query_hash_idx PRIMARY KEY REFERENCES aqo_queries ON DELETE CASCADE, +CREATE TABLE public.aqo_query_texts ( + query_hash bigint CONSTRAINT aqo_query_texts_query_hash_idx PRIMARY KEY REFERENCES public.aqo_queries ON DELETE CASCADE, query_text text NOT NULL ); -CREATE TABLE aqo_query_stat ( - query_hash bigint CONSTRAINT aqo_query_stat_idx PRIMARY KEY REFERENCES aqo_queries ON DELETE CASCADE, +CREATE TABLE public.aqo_query_stat ( + query_hash bigint CONSTRAINT aqo_query_stat_idx PRIMARY KEY REFERENCES public.aqo_queries ON DELETE CASCADE, execution_time_with_aqo double precision[], execution_time_without_aqo double precision[], planning_time_with_aqo double precision[], @@ -26,25 +26,25 @@ CREATE TABLE aqo_query_stat ( executions_without_aqo bigint ); -CREATE TABLE aqo_data ( - fspace_hash bigint NOT NULL REFERENCES aqo_queries ON DELETE CASCADE, +CREATE TABLE public.aqo_data ( + fspace_hash bigint NOT NULL REFERENCES public.aqo_queries ON DELETE CASCADE, fsspace_hash int NOT NULL, nfeatures int NOT NULL, features double precision[][], targets double precision[] ); -CREATE UNIQUE INDEX aqo_fss_access_idx ON aqo_data (fspace_hash, fsspace_hash); +CREATE UNIQUE INDEX aqo_fss_access_idx ON public.aqo_data (fspace_hash, fsspace_hash); -INSERT INTO aqo_queries VALUES (0, false, false, 0, false); -INSERT INTO aqo_query_texts VALUES (0, 'COMMON feature space (do not delete!)'); +INSERT INTO public.aqo_queries VALUES (0, false, false, 0, false); +INSERT INTO public.aqo_query_texts VALUES (0, 'COMMON feature space (do not delete!)'); -- a virtual query for COMMON feature space CREATE FUNCTION invalidate_deactivated_queries_cache() RETURNS trigger AS 'MODULE_PATHNAME' LANGUAGE C; CREATE TRIGGER aqo_queries_invalidate AFTER UPDATE OR DELETE OR TRUNCATE - ON aqo_queries FOR EACH STATEMENT + ON public.aqo_queries FOR EACH STATEMENT EXECUTE PROCEDURE invalidate_deactivated_queries_cache(); -- @@ -52,7 +52,7 @@ CREATE TRIGGER aqo_queries_invalidate AFTER UPDATE OR DELETE OR TRUNCATE -- -- Show query state at the AQO knowledge base -CREATE FUNCTION aqo_status(hash bigint) +CREATE FUNCTION public.aqo_status(hash bigint) RETURNS TABLE ( "learn" BOOL, "use aqo" BOOL, @@ -73,7 +73,7 @@ SELECT learn_aqo,use_aqo,auto_tuning,fspace_hash, to_char(execution_time_with_aqo[n3],'9.99EEEE'), to_char(cardinality_error_with_aqo[n1],'9.99EEEE'), executions_with_aqo -FROM aqo_queries aq, aqo_query_stat aqs, +FROM public.aqo_queries aq, public.aqo_query_stat aqs, (SELECT array_length(n1,1) AS n1, array_length(n2,1) AS n2, array_length(n3,1) AS n3, array_length(n4,1) AS n4 FROM @@ -81,50 +81,50 @@ FROM aqo_queries aq, aqo_query_stat aqs, cardinality_error_without_aqo AS n2, execution_time_with_aqo AS n3, execution_time_without_aqo AS n4 - FROM aqo_query_stat aqs WHERE + FROM public.aqo_query_stat aqs WHERE aqs.query_hash = $1) AS al) AS q WHERE (aqs.query_hash = aq.query_hash) AND aqs.query_hash = $1; $func$ LANGUAGE SQL; -CREATE FUNCTION aqo_enable_query(hash bigint) +CREATE FUNCTION public.aqo_enable_query(hash bigint) RETURNS VOID AS $func$ -UPDATE aqo_queries SET +UPDATE public.aqo_queries SET learn_aqo = 'true', use_aqo = 'true' WHERE query_hash = $1; $func$ LANGUAGE SQL; -CREATE FUNCTION aqo_disable_query(hash bigint) +CREATE FUNCTION public.aqo_disable_query(hash bigint) RETURNS VOID AS $func$ -UPDATE aqo_queries SET +UPDATE public.aqo_queries SET learn_aqo = 'false', use_aqo = 'false', auto_tuning = 'false' WHERE query_hash = $1; $func$ LANGUAGE SQL; -CREATE FUNCTION aqo_clear_hist(hash bigint) +CREATE FUNCTION public.aqo_clear_hist(hash bigint) RETURNS VOID AS $func$ -DELETE FROM aqo_data WHERE fspace_hash=$1; +DELETE FROM public.aqo_data WHERE fspace_hash=$1; $func$ LANGUAGE SQL; -- Show queries that contains 'Never executed' nodes at the plan. -CREATE FUNCTION aqo_ne_queries() +CREATE FUNCTION public.aqo_ne_queries() RETURNS SETOF int AS $func$ -SELECT query_hash FROM aqo_query_stat aqs +SELECT query_hash FROM public.aqo_query_stat aqs WHERE -1 = ANY (cardinality_error_with_aqo::double precision[]); $func$ LANGUAGE SQL; -CREATE FUNCTION aqo_drop(hash bigint) +CREATE FUNCTION public.aqo_drop(hash bigint) RETURNS VOID AS $func$ -DELETE FROM aqo_queries aq WHERE (aq.query_hash = $1); -DELETE FROM aqo_data ad WHERE (ad.fspace_hash = $1); -DELETE FROM aqo_query_stat aq WHERE (aq.query_hash = $1); -DELETE FROM aqo_query_texts aq WHERE (aq.query_hash = $1); +DELETE FROM public.aqo_queries aq WHERE (aq.query_hash = $1); +DELETE FROM public.aqo_data ad WHERE (ad.fspace_hash = $1); +DELETE FROM public.aqo_query_stat aq WHERE (aq.query_hash = $1); +DELETE FROM public.aqo_query_texts aq WHERE (aq.query_hash = $1); $func$ LANGUAGE SQL; diff --git a/aqo--1.3--1.4.sql b/aqo--1.3--1.4.sql index 002a148a..f6df0263 100755 --- a/aqo--1.3--1.4.sql +++ b/aqo--1.3--1.4.sql @@ -3,9 +3,9 @@ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "ALTER EXTENSION aqo UPDATE TO '1.4'" to load this file. \quit -ALTER TABLE aqo_data ADD COLUMN reliability double precision []; +ALTER TABLE public.aqo_data ADD COLUMN reliability double precision []; -DROP FUNCTION top_error_queries(int); +DROP FUNCTION public.top_error_queries(int); -- -- Get cardinality error of queries the last time they were executed. @@ -20,7 +20,7 @@ DROP FUNCTION top_error_queries(int); -- error - AQO error that calculated on plan nodes of the query. -- nexecs - number of executions of queries associated with this ID. -- -CREATE OR REPLACE FUNCTION show_cardinality_errors(controlled boolean) +CREATE OR REPLACE FUNCTION public.show_cardinality_errors(controlled boolean) RETURNS TABLE(num bigint, id bigint, fshash bigint, error float, nexecs bigint) AS $$ BEGIN @@ -35,7 +35,7 @@ IF (controlled) THEN aq.fspace_hash AS fs_hash, cardinality_error_with_aqo[array_length(cardinality_error_with_aqo, 1)] AS cerror, executions_with_aqo AS execs - FROM aqo_queries aq JOIN aqo_query_stat aqs + FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs ON aq.query_hash = aqs.query_hash WHERE TRUE = ANY (SELECT unnest(cardinality_error_with_aqo) IS NOT NULL) ) AS q1 @@ -51,7 +51,7 @@ ELSE aq.fspace_hash AS fs_hash, array_avg(cardinality_error_without_aqo) AS cerror, executions_without_aqo AS execs - FROM aqo_queries aq JOIN aqo_query_stat aqs + FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs ON aq.query_hash = aqs.query_hash WHERE TRUE = ANY (SELECT unnest(cardinality_error_without_aqo) IS NOT NULL) ) AS q1 @@ -60,5 +60,5 @@ END IF; END; $$ LANGUAGE plpgsql; -COMMENT ON FUNCTION show_cardinality_errors(boolean) IS +COMMENT ON FUNCTION public.show_cardinality_errors(boolean) IS 'Get cardinality error of queries the last time they were executed. Order queries according to an error value.'; diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index f833e251..71da787d 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -3,12 +3,49 @@ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "ALTER EXTENSION aqo UPDATE TO '1.5'" to load this file. \quit +-- +-- Re-create the aqo_queries table. +-- +DROP TABLE public.aqo_queries CASCADE; +CREATE TABLE aqo_queries ( + query_hash bigint CONSTRAINT aqo_queries_query_hash_idx PRIMARY KEY, + learn_aqo boolean NOT NULL, + use_aqo boolean NOT NULL, + fspace_hash bigint NOT NULL, + auto_tuning boolean NOT NULL +); + +-- +-- Re-create the aqo_query_texts table. +-- +DROP TABLE public.aqo_query_texts CASCADE; +CREATE TABLE aqo_query_texts ( + query_hash bigint CONSTRAINT aqo_query_texts_query_hash_idx PRIMARY KEY REFERENCES aqo_queries ON DELETE CASCADE, + query_text text NOT NULL +); + +-- +-- Re-create the aqo_query_stat table. +-- +DROP TABLE public.aqo_query_stat CASCADE; +CREATE TABLE aqo_query_stat ( + query_hash bigint CONSTRAINT aqo_query_stat_idx PRIMARY KEY REFERENCES aqo_queries ON DELETE CASCADE, + execution_time_with_aqo double precision[], + execution_time_without_aqo double precision[], + planning_time_with_aqo double precision[], + planning_time_without_aqo double precision[], + cardinality_error_with_aqo double precision[], + cardinality_error_without_aqo double precision[], + executions_with_aqo bigint, + executions_without_aqo bigint +); + -- -- Re-create the aqo_data table. Do so to keep the columns order. -- The oids array contains oids of permanent tables only. It is used for cleanup -- ML knowledge base from queries that refer to removed tables. -- -DROP TABLE aqo_data CASCADE; +DROP TABLE public.aqo_data CASCADE; CREATE TABLE aqo_data ( fspace_hash bigint NOT NULL REFERENCES aqo_queries ON DELETE CASCADE, fsspace_hash int NOT NULL, @@ -20,14 +57,22 @@ CREATE TABLE aqo_data ( ); CREATE UNIQUE INDEX aqo_fss_access_idx ON aqo_data (fspace_hash, fsspace_hash); -DROP FUNCTION top_time_queries; -DROP FUNCTION aqo_drop; -DROP FUNCTION clean_aqo_data; -DROP FUNCTION show_cardinality_errors; +INSERT INTO aqo_queries VALUES (0, false, false, 0, false); +INSERT INTO aqo_query_texts VALUES (0, 'COMMON feature space (do not delete!)'); +-- a virtual query for COMMON feature space + +CREATE TRIGGER aqo_queries_invalidate AFTER UPDATE OR DELETE OR TRUNCATE + ON aqo_queries FOR EACH STATEMENT + EXECUTE PROCEDURE invalidate_deactivated_queries_cache(); + +DROP FUNCTION public.top_time_queries; +DROP FUNCTION public.aqo_drop; +DROP FUNCTION public.clean_aqo_data; +DROP FUNCTION public.show_cardinality_errors; DROP FUNCTION array_mse; DROP FUNCTION array_avg; -DROP FUNCTION aqo_ne_queries; -- Not needed anymore due to changing in the logic -DROP FUNCTION aqo_clear_hist; -- Should be renamed and reworked +DROP FUNCTION public.aqo_ne_queries; -- Not needed anymore due to changing in the logic +DROP FUNCTION public.aqo_clear_hist; -- Should be renamed and reworked -- -- Show execution time of queries, for which AQO has statistics. @@ -244,3 +289,61 @@ $$ LANGUAGE plpgsql; COMMENT ON FUNCTION aqo_reset_query(bigint) IS 'Remove from AQO storage only learning data for given QueryId.'; + +DROP FUNCTION public.aqo_status; +CREATE FUNCTION aqo_status(hash bigint) +RETURNS TABLE ( + "learn" BOOL, + "use aqo" BOOL, + "auto tune" BOOL, + "fspace hash" bigINT, + "t_naqo" TEXT, + "err_naqo" TEXT, + "iters" BIGINT, + "t_aqo" TEXT, + "err_aqo" TEXT, + "iters_aqo" BIGINT +) +AS $func$ +SELECT learn_aqo,use_aqo,auto_tuning,fspace_hash, + to_char(execution_time_without_aqo[n4],'9.99EEEE'), + to_char(cardinality_error_without_aqo[n2],'9.99EEEE'), + executions_without_aqo, + to_char(execution_time_with_aqo[n3],'9.99EEEE'), + to_char(cardinality_error_with_aqo[n1],'9.99EEEE'), + executions_with_aqo +FROM aqo_queries aq, aqo_query_stat aqs, + (SELECT array_length(n1,1) AS n1, array_length(n2,1) AS n2, + array_length(n3,1) AS n3, array_length(n4,1) AS n4 + FROM + (SELECT cardinality_error_with_aqo AS n1, + cardinality_error_without_aqo AS n2, + execution_time_with_aqo AS n3, + execution_time_without_aqo AS n4 + FROM aqo_query_stat aqs WHERE + aqs.query_hash = $1) AS al) AS q +WHERE (aqs.query_hash = aq.query_hash) AND + aqs.query_hash = $1; +$func$ LANGUAGE SQL; + +DROP FUNCTION public.aqo_enable_query; +CREATE FUNCTION aqo_enable_query(hash bigint) +RETURNS VOID +AS $func$ +UPDATE aqo_queries SET + learn_aqo = 'true', + use_aqo = 'true' + WHERE query_hash = $1; +$func$ LANGUAGE SQL; + +DROP FUNCTION public.aqo_disable_query; +CREATE FUNCTION aqo_disable_query(hash bigint) +RETURNS VOID +AS $func$ +UPDATE aqo_queries SET + learn_aqo = 'false', + use_aqo = 'false', + auto_tuning = 'false' + WHERE query_hash = $1; +$func$ LANGUAGE SQL; + From 58bf24be9c13fe8324659f581706a63635521676 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Tue, 31 May 2022 12:07:01 +0300 Subject: [PATCH 088/203] Minor fixes on making AQO relocatable. Plus add a test which type of relocatability we really want. --- aqo--1.4--1.5.sql | 62 +++++++--------- expected/relocatable.out | 150 ++++++++++++++++++++++++--------------- sql/relocatable.sql | 58 +++++++++------ 3 files changed, 153 insertions(+), 117 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index 71da787d..c5e1117e 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -3,10 +3,23 @@ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "ALTER EXTENSION aqo UPDATE TO '1.5'" to load this file. \quit --- --- Re-create the aqo_queries table. --- +DROP FUNCTION array_mse; +DROP FUNCTION array_avg; +DROP FUNCTION public.aqo_clear_hist; -- Should be renamed and reworked +DROP FUNCTION public.aqo_disable_query; +DROP FUNCTION public.aqo_drop; +DROP FUNCTION public.aqo_enable_query; +DROP FUNCTION public.aqo_ne_queries; -- Not needed anymore due to changing in the logic +DROP FUNCTION public.aqo_status; +DROP FUNCTION public.clean_aqo_data; +DROP FUNCTION public.show_cardinality_errors; +DROP FUNCTION public.top_time_queries; + +DROP TABLE public.aqo_data CASCADE; DROP TABLE public.aqo_queries CASCADE; +DROP TABLE public.aqo_query_texts CASCADE; +DROP TABLE public.aqo_query_stat CASCADE; + CREATE TABLE aqo_queries ( query_hash bigint CONSTRAINT aqo_queries_query_hash_idx PRIMARY KEY, learn_aqo boolean NOT NULL, @@ -15,19 +28,11 @@ CREATE TABLE aqo_queries ( auto_tuning boolean NOT NULL ); --- --- Re-create the aqo_query_texts table. --- -DROP TABLE public.aqo_query_texts CASCADE; CREATE TABLE aqo_query_texts ( query_hash bigint CONSTRAINT aqo_query_texts_query_hash_idx PRIMARY KEY REFERENCES aqo_queries ON DELETE CASCADE, query_text text NOT NULL ); --- --- Re-create the aqo_query_stat table. --- -DROP TABLE public.aqo_query_stat CASCADE; CREATE TABLE aqo_query_stat ( query_hash bigint CONSTRAINT aqo_query_stat_idx PRIMARY KEY REFERENCES aqo_queries ON DELETE CASCADE, execution_time_with_aqo double precision[], @@ -41,18 +46,21 @@ CREATE TABLE aqo_query_stat ( ); -- --- Re-create the aqo_data table. Do so to keep the columns order. +-- Re-create the aqo_data table. -- The oids array contains oids of permanent tables only. It is used for cleanup -- ML knowledge base from queries that refer to removed tables. -- -DROP TABLE public.aqo_data CASCADE; CREATE TABLE aqo_data ( fspace_hash bigint NOT NULL REFERENCES aqo_queries ON DELETE CASCADE, fsspace_hash int NOT NULL, nfeatures int NOT NULL, features double precision[][], targets double precision[], + + -- oids of permanent tables only. It is used for cleanup + -- ML knowledge base from queries that refer to removed tables. oids oid [] DEFAULT NULL, + reliability double precision [] ); CREATE UNIQUE INDEX aqo_fss_access_idx ON aqo_data (fspace_hash, fsspace_hash); @@ -65,15 +73,6 @@ CREATE TRIGGER aqo_queries_invalidate AFTER UPDATE OR DELETE OR TRUNCATE ON aqo_queries FOR EACH STATEMENT EXECUTE PROCEDURE invalidate_deactivated_queries_cache(); -DROP FUNCTION public.top_time_queries; -DROP FUNCTION public.aqo_drop; -DROP FUNCTION public.clean_aqo_data; -DROP FUNCTION public.show_cardinality_errors; -DROP FUNCTION array_mse; -DROP FUNCTION array_avg; -DROP FUNCTION public.aqo_ne_queries; -- Not needed anymore due to changing in the logic -DROP FUNCTION public.aqo_clear_hist; -- Should be renamed and reworked - -- -- Show execution time of queries, for which AQO has statistics. -- controlled - show stat on executions where AQO was used for cardinality @@ -290,7 +289,6 @@ $$ LANGUAGE plpgsql; COMMENT ON FUNCTION aqo_reset_query(bigint) IS 'Remove from AQO storage only learning data for given QueryId.'; -DROP FUNCTION public.aqo_status; CREATE FUNCTION aqo_status(hash bigint) RETURNS TABLE ( "learn" BOOL, @@ -303,8 +301,7 @@ RETURNS TABLE ( "t_aqo" TEXT, "err_aqo" TEXT, "iters_aqo" BIGINT -) -AS $func$ +) AS $$ SELECT learn_aqo,use_aqo,auto_tuning,fspace_hash, to_char(execution_time_without_aqo[n4],'9.99EEEE'), to_char(cardinality_error_without_aqo[n2],'9.99EEEE'), @@ -324,26 +321,21 @@ FROM aqo_queries aq, aqo_query_stat aqs, aqs.query_hash = $1) AS al) AS q WHERE (aqs.query_hash = aq.query_hash) AND aqs.query_hash = $1; -$func$ LANGUAGE SQL; +$$ LANGUAGE SQL; -DROP FUNCTION public.aqo_enable_query; CREATE FUNCTION aqo_enable_query(hash bigint) -RETURNS VOID -AS $func$ +RETURNS VOID AS $$ UPDATE aqo_queries SET learn_aqo = 'true', use_aqo = 'true' WHERE query_hash = $1; -$func$ LANGUAGE SQL; +$$ LANGUAGE SQL; -DROP FUNCTION public.aqo_disable_query; CREATE FUNCTION aqo_disable_query(hash bigint) -RETURNS VOID -AS $func$ +RETURNS VOID AS $$ UPDATE aqo_queries SET learn_aqo = 'false', use_aqo = 'false', auto_tuning = 'false' WHERE query_hash = $1; -$func$ LANGUAGE SQL; - +$$ LANGUAGE SQL; diff --git a/expected/relocatable.out b/expected/relocatable.out index 8e5eca93..39055fbb 100644 --- a/expected/relocatable.out +++ b/expected/relocatable.out @@ -1,85 +1,117 @@ -DROP EXTENSION IF EXISTS aqo CASCADE; -DROP SCHEMA IF EXISTS test CASCADE; -NOTICE: schema "test" does not exist, skipping +DROP EXTENSION aqo CASCADE; CREATE EXTENSION aqo; SET aqo.join_threshold = 0; -SET aqo.mode = 'intelligent'; -CREATE TABLE test (id SERIAL, data TEXT); -INSERT INTO test (data) VALUES ('string'); -SELECT * FROM test; - id | data -----+-------- - 1 | string +SET aqo.mode = 'learn'; -- use this mode for unconditional learning +CREATE TABLE test AS (SELECT id, 'payload' || id FROM generate_series(1,100) id); +ANALYZE test; +-- Learn on a query +SELECT count(*) FROM test; + count +------- + 100 (1 row) -SELECT query_text FROM aqo_query_texts; - query_text ---------------------------------------- - COMMON feature space (do not delete!) - SELECT * FROM test; -(2 rows) - -SELECT learn_aqo, use_aqo, auto_tuning FROM aqo_queries; - learn_aqo | use_aqo | auto_tuning ------------+---------+------------- - f | f | f - t | f | t +SELECT query_text,learn_aqo, use_aqo, auto_tuning +FROM aqo_query_texts JOIN aqo_queries USING (query_hash) +; -- Check result. TODO: use aqo_status() + query_text | learn_aqo | use_aqo | auto_tuning +---------------------------------------+-----------+---------+------------- + COMMON feature space (do not delete!) | f | f | f + SELECT count(*) FROM test; | t | t | f (2 rows) +-- Create a schema and move AQO into it. CREATE SCHEMA IF NOT EXISTS test; ALTER EXTENSION aqo SET SCHEMA test; -SET aqo.mode = 'intelligent'; -CREATE TABLE test1 (id SERIAL, data TEXT); -INSERT INTO test1 (data) VALUES ('string'); -SELECT * FROM test1; - id | data -----+-------- - 1 | string +-- Do something to be confident that AQO works +SELECT count(*) FROM test; + count +------- + 100 +(1 row) + +SELECT count(*) FROM test WHERE id < 10; + count +------- + 9 +(1 row) + +SELECT query_text,learn_aqo, use_aqo, auto_tuning +FROM test.aqo_query_texts JOIN test.aqo_queries USING (query_hash) +; -- Check result. TODO: We want to find here both queries executed above + query_text | learn_aqo | use_aqo | auto_tuning +---------------------------------------+-----------+---------+------------- + COMMON feature space (do not delete!) | f | f | f + SELECT count(*) FROM test; | t | t | f +(2 rows) + +-- Add schema which contains AQO to the end of search_path +SELECT set_config('search_path', current_setting('search_path') || ', test', false); + set_config +----------------------- + "$user", public, test +(1 row) + +SELECT count(*) FROM test; + count +------- + 100 +(1 row) + +SELECT count(*) FROM test WHERE id < 10; + count +------- + 9 (1 row) -SELECT query_text FROM test.aqo_query_texts; - query_text ---------------------------------------- - COMMON feature space (do not delete!) - SELECT * FROM test; +SELECT query_text,learn_aqo, use_aqo, auto_tuning +FROM test.aqo_query_texts JOIN test.aqo_queries USING (query_hash) +; -- Check result. + query_text | learn_aqo | use_aqo | auto_tuning +------------------------------------------+-----------+---------+------------- + COMMON feature space (do not delete!) | f | f | f + SELECT count(*) FROM test; | t | t | f + SELECT count(*) FROM test WHERE id < 10; | t | t | f +(3 rows) + +/* + * Below, we should check each UI function + */ +SELECT aqo_disable_query(id) FROM ( + SELECT query_hash AS id FROM aqo_queries WHERE query_hash <> 0) AS q1; + aqo_disable_query +------------------- + + (2 rows) SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries; learn_aqo | use_aqo | auto_tuning -----------+---------+------------- f | f | f - t | f | t -(2 rows) - -SET search_path TO test; -CREATE TABLE test2 (id SERIAL, data TEXT); -INSERT INTO test2 (data) VALUES ('string'); -SELECT * FROM test2; - id | data -----+-------- - 1 | string -(1 row) - -SELECT query_text FROM aqo_query_texts; - query_text ---------------------------------------- - COMMON feature space (do not delete!) - SELECT * FROM test; - SELECT * FROM test2; + f | f | f + f | f | f (3 rows) -SELECT learn_aqo, use_aqo, auto_tuning FROM aqo_queries; +SELECT aqo_enable_query(id) FROM ( + SELECT query_hash AS id FROM aqo_queries WHERE query_hash <> 0) AS q1; + aqo_enable_query +------------------ + + +(2 rows) + +SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries; learn_aqo | use_aqo | auto_tuning -----------+---------+------------- f | f | f - t | f | t - t | f | t + t | t | f + t | t | f (3 rows) +RESET search_path; +DROP TABLE test CASCADE; DROP SCHEMA IF EXISTS test CASCADE; -NOTICE: drop cascades to 2 other objects -DETAIL: drop cascades to extension aqo -drop cascades to table test2 +NOTICE: drop cascades to extension aqo DROP EXTENSION IF EXISTS aqo CASCADE; NOTICE: extension "aqo" does not exist, skipping -SET search_path TO public; diff --git a/sql/relocatable.sql b/sql/relocatable.sql index 60085816..d48de902 100644 --- a/sql/relocatable.sql +++ b/sql/relocatable.sql @@ -1,38 +1,50 @@ -DROP EXTENSION IF EXISTS aqo CASCADE; -DROP SCHEMA IF EXISTS test CASCADE; - +DROP EXTENSION aqo CASCADE; CREATE EXTENSION aqo; SET aqo.join_threshold = 0; -SET aqo.mode = 'intelligent'; +SET aqo.mode = 'learn'; -- use this mode for unconditional learning -CREATE TABLE test (id SERIAL, data TEXT); -INSERT INTO test (data) VALUES ('string'); -SELECT * FROM test; +CREATE TABLE test AS (SELECT id, 'payload' || id FROM generate_series(1,100) id); +ANALYZE test; -SELECT query_text FROM aqo_query_texts; -SELECT learn_aqo, use_aqo, auto_tuning FROM aqo_queries; +-- Learn on a query +SELECT count(*) FROM test; +SELECT query_text,learn_aqo, use_aqo, auto_tuning +FROM aqo_query_texts JOIN aqo_queries USING (query_hash) +; -- Check result. TODO: use aqo_status() +-- Create a schema and move AQO into it. CREATE SCHEMA IF NOT EXISTS test; ALTER EXTENSION aqo SET SCHEMA test; -SET aqo.mode = 'intelligent'; +-- Do something to be confident that AQO works +SELECT count(*) FROM test; +SELECT count(*) FROM test WHERE id < 10; -CREATE TABLE test1 (id SERIAL, data TEXT); -INSERT INTO test1 (data) VALUES ('string'); -SELECT * FROM test1; +SELECT query_text,learn_aqo, use_aqo, auto_tuning +FROM test.aqo_query_texts JOIN test.aqo_queries USING (query_hash) +; -- Check result. TODO: We want to find here both queries executed above -SELECT query_text FROM test.aqo_query_texts; -SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries; +-- Add schema which contains AQO to the end of search_path +SELECT set_config('search_path', current_setting('search_path') || ', test', false); -SET search_path TO test; +SELECT count(*) FROM test; +SELECT count(*) FROM test WHERE id < 10; -CREATE TABLE test2 (id SERIAL, data TEXT); -INSERT INTO test2 (data) VALUES ('string'); -SELECT * FROM test2; +SELECT query_text,learn_aqo, use_aqo, auto_tuning +FROM test.aqo_query_texts JOIN test.aqo_queries USING (query_hash) +; -- Check result. -SELECT query_text FROM aqo_query_texts; -SELECT learn_aqo, use_aqo, auto_tuning FROM aqo_queries; +/* + * Below, we should check each UI function + */ +SELECT aqo_disable_query(id) FROM ( + SELECT query_hash AS id FROM aqo_queries WHERE query_hash <> 0) AS q1; +SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries; +SELECT aqo_enable_query(id) FROM ( + SELECT query_hash AS id FROM aqo_queries WHERE query_hash <> 0) AS q1; +SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries; + +RESET search_path; +DROP TABLE test CASCADE; DROP SCHEMA IF EXISTS test CASCADE; DROP EXTENSION IF EXISTS aqo CASCADE; - -SET search_path TO public; \ No newline at end of file From 7b2a7318364907faa3d48c7d45f131cee859eb24 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Mon, 9 May 2022 21:09:36 +0300 Subject: [PATCH 089/203] Add search by fss hash in aqo_data table and hash table. If we didn't any neibours with fs and fss hash indexes in aqo_data, we write new object in aqo_data with target value as average value of neirest neibours by fss_hash. I don't consider fs_hash in find neirest neibour for calculating average value of target for new object because I think fs_hash contain a description of a completely different query with a different table that matches or almost matches the indicative descriptions of the current object, but they are not an entity. --- Makefile | 3 +- aqo--1.0.sql | 2 + aqo.h | 2 +- cardinality_estimation.c | 5 +- expected/aqo_fdw.out | 22 +--- expected/aqo_learn.out | 30 ++--- expected/look_a_like.out | 240 +++++++++++++++++++++++++++++++++++++++ expected/unsupported.out | 2 +- machine_learning.h | 2 + postprocessing.c | 22 +++- sql/look_a_like.sql | 70 ++++++++++++ storage.c | 46 +++++--- 12 files changed, 395 insertions(+), 51 deletions(-) create mode 100644 expected/look_a_like.out create mode 100644 sql/look_a_like.sql diff --git a/Makefile b/Makefile index 05d05bb9..5a3875cf 100755 --- a/Makefile +++ b/Makefile @@ -26,7 +26,8 @@ REGRESS = aqo_disabled \ statement_timeout \ temp_tables \ top_queries \ - relocatable + relocatable\ + look_a_like fdw_srcdir = $(top_srcdir)/contrib/postgres_fdw stat_srcdir = $(top_srcdir)/contrib/pg_stat_statements diff --git a/aqo--1.0.sql b/aqo--1.0.sql index 67395744..4281bfa7 100644 --- a/aqo--1.0.sql +++ b/aqo--1.0.sql @@ -50,3 +50,5 @@ CREATE FUNCTION invalidate_deactivated_queries_cache() RETURNS trigger CREATE TRIGGER aqo_queries_invalidate AFTER UPDATE OR DELETE OR TRUNCATE ON public.aqo_queries FOR EACH STATEMENT EXECUTE PROCEDURE invalidate_deactivated_queries_cache(); +CREATE INDEX aqo_fss_idx + on public.aqo_data (fsspace_hash); \ No newline at end of file diff --git a/aqo.h b/aqo.h index 3891e2d4..23b0c970 100644 --- a/aqo.h +++ b/aqo.h @@ -284,7 +284,7 @@ extern bool update_query(uint64 qhash, uint64 fhash, extern bool add_query_text(uint64 query_hash, const char *query_string); extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool isSafe); -extern bool load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids); +extern bool load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool use_idx_fss); extern bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids, bool isTimedOut); extern bool update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids); diff --git a/cardinality_estimation.c b/cardinality_estimation.c index 48630754..20f3510c 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -91,7 +91,10 @@ predict_for_relation(List *clauses, List *selectivities, List *relsigns, * small part of paths was used for AQO learning and fetch into the AQO * knowledge base. */ - result = -1; + if (!load_fss(query_context.fspace_hash, *fss, &data, NULL, false)) + result = -1; + else + result = OkNNr_predict(&data, features); } #ifdef AQO_DEBUG_PRINT predict_debug_output(clauses, selectivities, relsigns, *fss, result); diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out index ee4a4ab6..922c76e6 100644 --- a/expected/aqo_fdw.out +++ b/expected/aqo_fdw.out @@ -88,32 +88,22 @@ SELECT str FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; ') AS str WHERE str NOT LIKE '%Sort Method%'; - str ------------------------------------------------------------- - Merge Join (actual rows=1 loops=1) + str +------------------------------------------- + Foreign Scan (actual rows=1 loops=1) AQO not used - Merge Cond: (a.x = b.x) - -> Sort (actual rows=1 loops=1) - AQO not used - Sort Key: a.x - -> Foreign Scan on frgn a (actual rows=1 loops=1) - AQO not used - -> Sort (actual rows=1 loops=1) - AQO not used - Sort Key: b.x - -> Foreign Scan on frgn b (actual rows=1 loops=1) - AQO not used + Relations: (frgn a) INNER JOIN (frgn b) Using aqo: true AQO mode: LEARN JOINS: 0 -(16 rows) +(6 rows) EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; QUERY PLAN -------------------------------------------------------------------------------------------------------- Foreign Scan (actual rows=1 loops=1) - AQO: rows=1, error=0% + AQO not used Output: a.x, b.x Relations: (public.frgn a) INNER JOIN (public.frgn b) Remote SQL: SELECT r1.x, r2.x FROM (public.local r1 INNER JOIN public.local r2 ON (((r1.x = r2.x)))) diff --git a/expected/aqo_learn.out b/expected/aqo_learn.out index 0153fdce..ad59bd4f 100644 --- a/expected/aqo_learn.out +++ b/expected/aqo_learn.out @@ -265,12 +265,9 @@ ORDER BY (md5(query_text)) | | WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; {1,1,1} | 4 | EXPLAIN SELECT * FROM aqo_test0 + | | WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; - {1} | 0 | SELECT count(*) FROM tmp1; - {1} | 0 | SELECT count(*) FROM tmp1; - {1} | 6 | EXPlAIN SELECT t1.a, t2.b, t3.c + - | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + - | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; - {1} | 1 | EXPlAIN SELECT t1.a, t2.b, t3.c + + {1,1,1,1,1} | 0 | SELECT count(*) FROM tmp1; + {1,1,1,1,1} | 0 | SELECT count(*) FROM tmp1; + {1} | 2 | EXPlAIN SELECT t1.a, t2.b, t3.c + | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; {1} | 2 | EXPlAIN SELECT t1.a, t2.b, t3.c + @@ -279,34 +276,37 @@ ORDER BY (md5(query_text)) {1} | 3 | EXPlAIN SELECT t1.a, t2.b, t3.c + | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; - {1} | 2 | EXPlAIN SELECT t1.a, t2.b, t3.c + + {1} | 6 | EXPlAIN SELECT t1.a, t2.b, t3.c + | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; - {1} | 2 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 + + {1,1} | 1 | EXPlAIN SELECT t1.a, t2.b, t3.c + + | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + + | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; + {1} | 4 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 + | | WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; {1} | 2 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 + | | WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; - {1} | 4 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 + + {1} | 2 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 + | | WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; - {1} | 2 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c + + {1} | 1 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + | | WHERE t1.a = t2.b AND t2.a = t3.b; - {1} | 1 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c + + {1} | 2 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + | | WHERE t1.a = t2.b AND t2.a = t3.b; {1} | 0 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + | | WHERE t1.a = t2.b AND t2.a = t3.b; - {1} | 3 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + + {1} | 2 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + | | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; - {1} | 0 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + + {1} | 3 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + | | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; {1} | 1 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + | | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; - {1} | 2 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + + {1} | 0 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + | | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; (21 rows) @@ -587,7 +587,7 @@ SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); estimated | actual -----------+-------- - 20 | 19 + 19 | 19 (1 row) SELECT count(*) FROM diff --git a/expected/look_a_like.out b/expected/look_a_like.out new file mode 100644 index 00000000..cf376116 --- /dev/null +++ b/expected/look_a_like.out @@ -0,0 +1,240 @@ +CREATE IF NOT EXISTS EXTENSION aqo; +ERROR: syntax error at or near "IF" +LINE 1: CREATE IF NOT EXISTS EXTENSION aqo; + ^ +SET aqo.join_threshold = 0; +SET aqo.mode = 'learn'; +SET aqo.show_details = 'on'; +DROP TABLE IF EXISTS a,b CASCADE; +NOTICE: table "a" does not exist, skipping +NOTICE: table "b" does not exist, skipping +CREATE TABLE a (x int); +INSERT INTO a (x) SELECT mod(ival,10) FROM generate_series(1,1000) As ival; +CREATE TABLE b (y int); +INSERT INTO b (y) SELECT mod(ival + 1,10) FROM generate_series(1,1000) As ival; +-- +-- Returns string-by-string explain of a query. Made for removing some strings +-- from the explain output. +-- +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; +-- no one predicted rows. we use knowledge cardinalities of the query +-- in the next queries with the same fss_hash +SELECT str AS result +FROM expln(' +SELECT x FROM A where x = 5;') AS str +WHERE str NOT LIKE 'Query Identifier%'; + result +------------------------------------------------ + Seq Scan on public.a (actual rows=100 loops=1) + AQO not used + Output: x + Filter: (a.x = 5) + Rows Removed by Filter: 900 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(8 rows) + +-- cardinality 100 in the first Seq Scan on a +SELECT str AS result +FROM expln(' +SELECT x FROM A,B WHERE x = 5 AND A.x = B.y;') AS str +WHERE str NOT LIKE 'Query Identifier%'; + result +------------------------------------------------------------ + Nested Loop (actual rows=10000 loops=1) + AQO not used + Output: a.x + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=100, error=0% + Output: a.x + Filter: (a.x = 5) + Rows Removed by Filter: 900 + -> Materialize (actual rows=100 loops=100) + AQO not used + Output: b.y + -> Seq Scan on public.b (actual rows=100 loops=1) + AQO not used + Output: b.y + Filter: (b.y = 5) + Rows Removed by Filter: 900 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(19 rows) + +-- cardinality 100 in Nesteed Loop in the first Seq Scan on a +SELECT str AS result +FROM expln(' +SELECT x, sum(x) FROM A,B WHERE y = 5 AND A.x = B.y group by(x);') AS str +WHERE str NOT LIKE 'Query Identifier%'; + result +------------------------------------------------------------------ + GroupAggregate (actual rows=1 loops=1) + AQO not used + Output: a.x, sum(a.x) + Group Key: a.x + -> Nested Loop (actual rows=10000 loops=1) + AQO not used + Output: a.x + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=100, error=0% + Output: a.x + Filter: (a.x = 5) + Rows Removed by Filter: 900 + -> Materialize (actual rows=100 loops=100) + AQO: rows=100, error=0% + Output: b.y + -> Seq Scan on public.b (actual rows=100 loops=1) + AQO: rows=100, error=0% + Output: b.y + Filter: (b.y = 5) + Rows Removed by Filter: 900 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(23 rows) + +-- cardinality 100 in the first Seq Scan on a +SELECT str AS result +FROM expln(' +SELECT x, sum(x) FROM A WHERE x = 5 group by(x);') AS str +WHERE str NOT LIKE 'Query Identifier%'; + result +------------------------------------------------------ + GroupAggregate (actual rows=1 loops=1) + AQO not used + Output: x, sum(x) + Group Key: a.x + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=100, error=0% + Output: x + Filter: (a.x = 5) + Rows Removed by Filter: 900 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(12 rows) + +-- no one predicted rows. we use knowledge cardinalities of the query +-- in the next queries with the same fss_hash +SELECT str AS result +FROM expln(' +SELECT x FROM A where x < 10 group by(x);') AS str +WHERE str NOT LIKE 'Query Identifier%'; + result +------------------------------------------------------- + HashAggregate (actual rows=10 loops=1) + AQO not used + Output: x + Group Key: a.x + Batches: 1 Memory Usage: 40kB + -> Seq Scan on public.a (actual rows=1000 loops=1) + AQO not used + Output: x + Filter: (a.x < 10) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(12 rows) + +-- cardinality 1000 in Seq Scan on a +SELECT str AS result +FROM expln(' +SELECT x,y FROM A,B WHERE x < 10 AND A.x = B.y;') AS str +WHERE str NOT LIKE 'Query Identifier%'; + result +------------------------------------------------------------- + Merge Join (actual rows=100000 loops=1) + AQO not used + Output: a.x, b.y + Merge Cond: (a.x = b.y) + -> Sort (actual rows=1000 loops=1) + AQO not used + Output: a.x + Sort Key: a.x + Sort Method: quicksort Memory: 79kB + -> Seq Scan on public.a (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: a.x + Filter: (a.x < 10) + -> Sort (actual rows=99901 loops=1) + AQO not used + Output: b.y + Sort Key: b.y + Sort Method: quicksort Memory: 79kB + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO not used + Output: b.y + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(24 rows) + +-- cardinality 100 in Seq Scan on a and Seq Scan on b +SELECT str AS result +FROM expln(' +SELECT x FROM A,B where x < 10 and y > 10 group by(x);') AS str +WHERE str NOT LIKE 'Query Identifier%'; + result +---------------------------------------------------------------- + HashAggregate (actual rows=0 loops=1) + AQO not used + Output: a.x + Group Key: a.x + Batches: 1 Memory Usage: 40kB + -> Nested Loop (actual rows=0 loops=1) + AQO not used + Output: a.x + -> Seq Scan on public.a (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: a.x + Filter: (a.x < 10) + -> Materialize (actual rows=0 loops=1000) + AQO not used + -> Seq Scan on public.b (actual rows=0 loops=1) + AQO not used + Filter: (b.y > 10) + Rows Removed by Filter: 1000 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(21 rows) + +-- cardinality 1000 Hash Cond: (a.x = b.y) and 1 Seq Scan on b +-- this cardinality is wrong because we take it from bad neibours (previous query). +-- clause y > 10 give count of rows with the same clauses. +SELECT str AS result +FROM expln(' +SELECT x,y FROM A,B WHERE x < 10 and y > 10 AND A.x = B.y;') AS str +WHERE str NOT LIKE 'Query Identifier%'; + result +---------------------------------------------------------- + Hash Join (actual rows=0 loops=1) + AQO not used + Output: a.x, b.y + Hash Cond: (a.x = b.y) + -> Seq Scan on public.a (actual rows=1 loops=1) + AQO: rows=1000, error=100% + Output: a.x + Filter: (a.x < 10) + -> Hash (actual rows=0 loops=1) + AQO not used + Output: b.y + Buckets: 1024 Batches: 1 Memory Usage: 8kB + -> Seq Scan on public.b (actual rows=0 loops=1) + AQO: rows=1, error=100% + Output: b.y + Filter: (b.y > 10) + Rows Removed by Filter: 1000 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(20 rows) + diff --git a/expected/unsupported.out b/expected/unsupported.out index 2f4f04a5..9596fcc4 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -381,7 +381,7 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) -> Aggregate (actual rows=1 loops=1000) AQO not used -> Seq Scan on t t0 (actual rows=50 loops=1000) - AQO not used + AQO: rows=50, error=0% Filter: (x = t.x) Rows Removed by Filter: 950 SubPlan 2 diff --git a/machine_learning.h b/machine_learning.h index a09b3102..592af1e0 100644 --- a/machine_learning.h +++ b/machine_learning.h @@ -25,5 +25,7 @@ typedef struct OkNNrdata extern double OkNNr_predict(OkNNrdata *data, double *features); extern int OkNNr_learn(OkNNrdata *data, double *features, double target, double rfactor); +extern int get_avg_over_neibours(OkNNrdata *data, + double *features); #endif /* MACHINE_LEARNING_H */ diff --git a/postprocessing.c b/postprocessing.c index eeb4c249..ee25a3d1 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -94,14 +94,32 @@ atomic_fss_learn_step(uint64 fs, int fss, OkNNrdata *data, List *reloids, bool isTimedOut) { LOCKTAG tag; + int j; init_lock_tag(&tag, fs, fss); LockAcquire(&tag, ExclusiveLock, false, false); if (!load_fss_ext(fs, fss, data, NULL, !isTimedOut)) - data->rows = 0; - data->rows = OkNNr_learn(data, features, target, rfactor); + /* + * Add a new object in aqo_data table with predicted target value + */ + if (load_fss(fs, fss, data, NULL, false)) + { + for (j = 0; j < data->cols; ++j) + data->matrix[data->rows][j] = features[j]; + data->targets[data->rows] = OkNNr_predict(data, features); + data->rfactors[data->rows] = rfactor; + data->rows += 1; + } + else + { + data->rows = 0; + data->rows = OkNNr_learn(data, features, target, rfactor); + } + else + data->rows = OkNNr_learn(data, features, target, rfactor); + update_fss_ext(fs, fss, data, reloids, isTimedOut); LockRelease(&tag, ExclusiveLock, false); diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql new file mode 100644 index 00000000..d8098b15 --- /dev/null +++ b/sql/look_a_like.sql @@ -0,0 +1,70 @@ +CREATE IF NOT EXISTS EXTENSION aqo; +SET aqo.join_threshold = 0; +SET aqo.mode = 'learn'; +SET aqo.show_details = 'on'; + +DROP TABLE IF EXISTS a,b CASCADE; +CREATE TABLE a (x int); +INSERT INTO a (x) SELECT mod(ival,10) FROM generate_series(1,1000) As ival; + +CREATE TABLE b (y int); +INSERT INTO b (y) SELECT mod(ival + 1,10) FROM generate_series(1,1000) As ival; + +-- +-- Returns string-by-string explain of a query. Made for removing some strings +-- from the explain output. +-- +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; + +-- no one predicted rows. we use knowledge cardinalities of the query +-- in the next queries with the same fss_hash +SELECT str AS result +FROM expln(' +SELECT x FROM A where x = 5;') AS str +WHERE str NOT LIKE 'Query Identifier%'; +-- cardinality 100 in the first Seq Scan on a +SELECT str AS result +FROM expln(' +SELECT x FROM A,B WHERE x = 5 AND A.x = B.y;') AS str +WHERE str NOT LIKE 'Query Identifier%'; +-- cardinality 100 in Nesteed Loop in the first Seq Scan on a +SELECT str AS result +FROM expln(' +SELECT x, sum(x) FROM A,B WHERE y = 5 AND A.x = B.y group by(x);') AS str +WHERE str NOT LIKE 'Query Identifier%'; +-- cardinality 100 in the first Seq Scan on a +SELECT str AS result +FROM expln(' +SELECT x, sum(x) FROM A WHERE x = 5 group by(x);') AS str +WHERE str NOT LIKE 'Query Identifier%'; + +-- no one predicted rows. we use knowledge cardinalities of the query +-- in the next queries with the same fss_hash +SELECT str AS result +FROM expln(' +SELECT x FROM A where x < 10 group by(x);') AS str +WHERE str NOT LIKE 'Query Identifier%'; +-- cardinality 1000 in Seq Scan on a +SELECT str AS result +FROM expln(' +SELECT x,y FROM A,B WHERE x < 10 AND A.x = B.y;') AS str +WHERE str NOT LIKE 'Query Identifier%'; + +-- cardinality 100 in Seq Scan on a and Seq Scan on b +SELECT str AS result +FROM expln(' +SELECT x FROM A,B where x < 10 and y > 10 group by(x);') AS str +WHERE str NOT LIKE 'Query Identifier%'; +-- cardinality 1000 Hash Cond: (a.x = b.y) and 1 Seq Scan on b +-- this cardinality is wrong because we take it from bad neibours (previous query). +-- clause y > 10 give count of rows with the same clauses. +SELECT str AS result +FROM expln(' +SELECT x,y FROM A,B WHERE x < 10 and y > 10 AND A.x = B.y;') AS str +WHERE str NOT LIKE 'Query Identifier%'; diff --git a/storage.c b/storage.c index c9f75b3e..00126de8 100644 --- a/storage.c +++ b/storage.c @@ -380,7 +380,7 @@ bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool isSafe) { if (isSafe && (!aqo_learn_statement_timeout || !lc_has_fss(fs, fss))) - return load_fss(fs, fss, data, reloids); + return load_fss(fs, fss, data, reloids, true); else { Assert(aqo_learn_statement_timeout); @@ -403,30 +403,39 @@ load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool isSafe) * objects in the given feature space */ bool -load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids) +load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool use_idx_fss) { Relation hrel; Relation irel; HeapTuple tuple; TupleTableSlot *slot; - bool shouldFree; - bool find_ok = false; - IndexScanDesc scan; - ScanKeyData key[2]; - Datum values[AQO_DATA_COLUMNS]; - bool isnull[AQO_DATA_COLUMNS]; - bool success = true; + bool shouldFree; + bool find_ok = false; + IndexScanDesc scan; + Datum values[AQO_DATA_COLUMNS]; + bool isnull[AQO_DATA_COLUMNS]; + bool success = true; + ScanKeyData key[2]; if (!open_aqo_relation(NULL, "aqo_data", "aqo_fss_access_idx", AccessShareLock, &hrel, &irel)) - return false; + return false; - scan = index_beginscan(hrel, irel, SnapshotSelf, 2, 0); - ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(fs)); - ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fss)); - index_rescan(scan, key, 2, NULL, 0); + if (use_idx_fss) + { + scan = index_beginscan(hrel, irel, SnapshotSelf, 2, 0); + ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(fs)); + ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fss)); + } + else + { + scan = index_beginscan(hrel, irel, SnapshotSelf, 2, 0); + ScanKeyInit(&key[0], 1, BTLessEqualStrategyNumber, F_INT8LE, Int64GetDatum(0)); + ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fss)); + } + index_rescan(scan, key, 2, NULL, 0); slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); find_ok = index_getnext_slot(scan, ForwardScanDirection, slot); @@ -480,6 +489,14 @@ load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids) return success; } +PG_FUNCTION_INFO_V1(xxx); +Datum xxx(PG_FUNCTION_ARGS) +{ + elog(NOTICE, "xxx called"); + load_fss(5, 2027816329,NULL, NULL, false); + PG_RETURN_VOID(); +} + bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids, bool isTimedOut) @@ -610,6 +627,7 @@ update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids) if (update_indexes) my_index_insert(irel, values, isnull, &(nw_tuple->t_self), hrel, UNIQUE_CHECK_YES); + result = true; } else From 253c7b3e7d25b2d51d65305804f6d81860b415ff Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Wed, 1 Jun 2022 17:02:17 +0300 Subject: [PATCH 090/203] Try to search in surrounding feature spaces for the fss data. Side effect: ML knowledge base become smaller because we make right decision more quickly. TODO: unexpectedly found out that we don't learn on postgres_fdw plan nodes. --- Makefile | 7 +- aqo--1.0.sql | 2 - aqo.h | 3 +- cardinality_estimation.c | 13 ++- expected/aqo_fdw.out | 7 +- expected/aqo_learn.out | 40 +++---- expected/clean_aqo_data.out | 4 +- expected/look_a_like.out | 6 +- expected/relocatable.out | 1 - expected/temp_tables.out | 12 +- expected/top_queries.out | 1 + machine_learning.h | 2 - postprocessing.c | 22 +--- sql/aqo_fdw.sql | 8 +- sql/aqo_learn.sql | 12 +- sql/look_a_like.sql | 4 +- sql/relocatable.sql | 1 - sql/temp_tables.sql | 4 +- sql/top_queries.sql | 2 + storage.c | 215 ++++++++++++++++++++++++------------ 20 files changed, 216 insertions(+), 150 deletions(-) diff --git a/Makefile b/Makefile index 5a3875cf..e87dc52e 100755 --- a/Makefile +++ b/Makefile @@ -4,9 +4,10 @@ EXTENSION = aqo EXTVERSION = 1.5 PGFILEDESC = "AQO - Adaptive Query Optimization" MODULE_big = aqo -OBJS = aqo.o auto_tuning.o cardinality_estimation.o cardinality_hooks.o \ -hash.o machine_learning.o path_utils.o postprocessing.o preprocessing.o \ -selectivity_cache.o storage.o utils.o learn_cache.o aqo_shared.o $(WIN32RES) +OBJS = $(WIN32RES) \ + aqo.o auto_tuning.o cardinality_estimation.o cardinality_hooks.o \ + hash.o machine_learning.o path_utils.o postprocessing.o preprocessing.o \ + selectivity_cache.o storage.o utils.o learn_cache.o aqo_shared.o TAP_TESTS = 1 diff --git a/aqo--1.0.sql b/aqo--1.0.sql index 4281bfa7..67395744 100644 --- a/aqo--1.0.sql +++ b/aqo--1.0.sql @@ -50,5 +50,3 @@ CREATE FUNCTION invalidate_deactivated_queries_cache() RETURNS trigger CREATE TRIGGER aqo_queries_invalidate AFTER UPDATE OR DELETE OR TRUNCATE ON public.aqo_queries FOR EACH STATEMENT EXECUTE PROCEDURE invalidate_deactivated_queries_cache(); -CREATE INDEX aqo_fss_idx - on public.aqo_data (fsspace_hash); \ No newline at end of file diff --git a/aqo.h b/aqo.h index 23b0c970..e970a2c3 100644 --- a/aqo.h +++ b/aqo.h @@ -284,7 +284,8 @@ extern bool update_query(uint64 qhash, uint64 fhash, extern bool add_query_text(uint64 query_hash, const char *query_string); extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool isSafe); -extern bool load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool use_idx_fss); +extern bool load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids, + bool wideSearch); extern bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids, bool isTimedOut); extern bool update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids); diff --git a/cardinality_estimation.c b/cardinality_estimation.c index 20f3510c..4ed9f86b 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -87,14 +87,21 @@ predict_for_relation(List *clauses, List *selectivities, List *relsigns, { /* * Due to planning optimizer tries to build many alternate paths. Many - * of these not used in final query execution path. Consequently, only - * small part of paths was used for AQO learning and fetch into the AQO - * knowledge base. + * of them aren't used in final query execution path. Consequently, only + * small part of paths was used for AQO learning and stored into + * the AQO knowledge base. */ + + /* Try to search in surrounding feature spaces for the same node */ if (!load_fss(query_context.fspace_hash, *fss, &data, NULL, false)) result = -1; else + { + elog(DEBUG5, "[AQO] Make prediction for fss %d by a neighbour " + "includes %d feature(s) and %d fact(s).", + *fss, data.cols, data.rows); result = OkNNr_predict(&data, features); + } } #ifdef AQO_DEBUG_PRINT predict_debug_output(clauses, selectivities, relsigns, *fss, result); diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out index 922c76e6..36af3bd6 100644 --- a/expected/aqo_fdw.out +++ b/expected/aqo_fdw.out @@ -85,8 +85,8 @@ SELECT x FROM frgn WHERE x < -10; -- AQO ignores constants -- Trivial JOIN push-down. SELECT str FROM expln(' -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) -SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; ') AS str WHERE str NOT LIKE '%Sort Method%'; str ------------------------------------------- @@ -98,8 +98,9 @@ SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; JOINS: 0 (6 rows) +-- TODO: Should learn on postgres_fdw nodes EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) -SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; + SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; QUERY PLAN -------------------------------------------------------------------------------------------------------- Foreign Scan (actual rows=1 loops=1) diff --git a/expected/aqo_learn.out b/expected/aqo_learn.out index ad59bd4f..2f0767ce 100644 --- a/expected/aqo_learn.out +++ b/expected/aqo_learn.out @@ -265,48 +265,48 @@ ORDER BY (md5(query_text)) | | WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; {1,1,1} | 4 | EXPLAIN SELECT * FROM aqo_test0 + | | WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; - {1,1,1,1,1} | 0 | SELECT count(*) FROM tmp1; - {1,1,1,1,1} | 0 | SELECT count(*) FROM tmp1; - {1} | 2 | EXPlAIN SELECT t1.a, t2.b, t3.c + + {1} | 0 | SELECT count(*) FROM tmp1; + {1} | 0 | SELECT count(*) FROM tmp1; + {1} | 6 | EXPlAIN SELECT t1.a, t2.b, t3.c + | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; - {1} | 2 | EXPlAIN SELECT t1.a, t2.b, t3.c + + {1} | 1 | EXPlAIN SELECT t1.a, t2.b, t3.c + | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; - {1} | 3 | EXPlAIN SELECT t1.a, t2.b, t3.c + + {1} | 2 | EXPlAIN SELECT t1.a, t2.b, t3.c + | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; - {1} | 6 | EXPlAIN SELECT t1.a, t2.b, t3.c + + {1} | 3 | EXPlAIN SELECT t1.a, t2.b, t3.c + | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; - {1,1} | 1 | EXPlAIN SELECT t1.a, t2.b, t3.c + + {1} | 2 | EXPlAIN SELECT t1.a, t2.b, t3.c + | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; - {1} | 4 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 + - | | WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; {1} | 2 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 + | | WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; {1} | 2 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 + | | WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; - {1} | 1 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c + + {1} | 4 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 + + | | WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; + {1} | 2 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + | | WHERE t1.a = t2.b AND t2.a = t3.b; - {1} | 2 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c + + {1} | 1 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + | | WHERE t1.a = t2.b AND t2.a = t3.b; {1} | 0 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + | | WHERE t1.a = t2.b AND t2.a = t3.b; - {1} | 2 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + + {1} | 3 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + | | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; - {1} | 3 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + + {1} | 0 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + | | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; {1} | 1 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + | | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; - {1} | 0 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + + {1} | 2 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + | | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; (21 rows) @@ -537,7 +537,8 @@ SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); 20 | 20 (1 row) -SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); estimated | actual -----------+-------- 20 | 19 @@ -553,13 +554,13 @@ SELECT * FROM check_estimated_rows(' SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; -'); +'); -- Learn on the query estimated | actual -----------+-------- 20 | 17 (1 row) -SELECT count(*) FROM -- Learn on the query +SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 ; count @@ -584,7 +585,8 @@ SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); 20 | 20 (1 row) -SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); +SELECT * FROM check_estimated_rows( + 'SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); estimated | actual -----------+-------- 19 | 19 @@ -592,7 +594,7 @@ SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1 AS t1, aqo_test1 AS SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 -; -- Learn on a query with one join +; -- Learn on a new query with one join (cardinality of this join AQO extracted from previous 3-join query) count ------- 2 diff --git a/expected/clean_aqo_data.out b/expected/clean_aqo_data.out index af9b7ae3..18f6e3b5 100644 --- a/expected/clean_aqo_data.out +++ b/expected/clean_aqo_data.out @@ -141,7 +141,7 @@ SELECT 'b'::regclass::oid AS b_oid \gset SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); count ------- - 3 + 2 (1 row) SELECT count(*) FROM aqo_queries WHERE @@ -200,7 +200,7 @@ DROP TABLE a; SELECT aqo_cleanup(); aqo_cleanup ------------- - (2,4) + (2,3) (1 row) /* diff --git a/expected/look_a_like.out b/expected/look_a_like.out index cf376116..bf966607 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -1,7 +1,4 @@ -CREATE IF NOT EXISTS EXTENSION aqo; -ERROR: syntax error at or near "IF" -LINE 1: CREATE IF NOT EXISTS EXTENSION aqo; - ^ +CREATE EXTENSION aqo; SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; @@ -238,3 +235,4 @@ WHERE str NOT LIKE 'Query Identifier%'; JOINS: 0 (20 rows) +DROP EXTENSION aqo CASCADE; diff --git a/expected/relocatable.out b/expected/relocatable.out index 39055fbb..ec9d88b2 100644 --- a/expected/relocatable.out +++ b/expected/relocatable.out @@ -1,4 +1,3 @@ -DROP EXTENSION aqo CASCADE; CREATE EXTENSION aqo; SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; -- use this mode for unconditional learning diff --git a/expected/temp_tables.out b/expected/temp_tables.out index 0bacb407..745aabdb 100644 --- a/expected/temp_tables.out +++ b/expected/temp_tables.out @@ -126,18 +126,18 @@ SELECT * FROM check_estimated_rows(' SELECT * FROM check_estimated_rows(' SELECT pt.x, avg(pt.y) FROM pt,ttd WHERE pt.x = ttd.x GROUP BY (pt.x); -'); -- TODO: Should use AQO estimation with another temp table of the same structure +'); -- Should use AQO estimation with another temp table of the same structure estimated | actual -----------+-------- - 100 | 0 + 1 | 0 (1 row) SET aqo.mode = 'forced'; -- Now we use all fss records for each query DROP TABLE pt; -SELECT aqo_cleanup(); - aqo_cleanup -------------- - (2,6) +SELECT * FROM aqo_cleanup(); + nfs | nfss +-----+------ + 2 | 5 (1 row) CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; diff --git a/expected/top_queries.out b/expected/top_queries.out index 9ddaf84a..250f1cad 100644 --- a/expected/top_queries.out +++ b/expected/top_queries.out @@ -98,3 +98,4 @@ ORDER BY (md5(query_text)); SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; | 1 (4 rows) +DROP EXTENSION aqo CASCADE; diff --git a/machine_learning.h b/machine_learning.h index 592af1e0..a09b3102 100644 --- a/machine_learning.h +++ b/machine_learning.h @@ -25,7 +25,5 @@ typedef struct OkNNrdata extern double OkNNr_predict(OkNNrdata *data, double *features); extern int OkNNr_learn(OkNNrdata *data, double *features, double target, double rfactor); -extern int get_avg_over_neibours(OkNNrdata *data, - double *features); #endif /* MACHINE_LEARNING_H */ diff --git a/postprocessing.c b/postprocessing.c index ee25a3d1..eeb4c249 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -94,32 +94,14 @@ atomic_fss_learn_step(uint64 fs, int fss, OkNNrdata *data, List *reloids, bool isTimedOut) { LOCKTAG tag; - int j; init_lock_tag(&tag, fs, fss); LockAcquire(&tag, ExclusiveLock, false, false); if (!load_fss_ext(fs, fss, data, NULL, !isTimedOut)) + data->rows = 0; - /* - * Add a new object in aqo_data table with predicted target value - */ - if (load_fss(fs, fss, data, NULL, false)) - { - for (j = 0; j < data->cols; ++j) - data->matrix[data->rows][j] = features[j]; - data->targets[data->rows] = OkNNr_predict(data, features); - data->rfactors[data->rows] = rfactor; - data->rows += 1; - } - else - { - data->rows = 0; - data->rows = OkNNr_learn(data, features, target, rfactor); - } - else - data->rows = OkNNr_learn(data, features, target, rfactor); - + data->rows = OkNNr_learn(data, features, target, rfactor); update_fss_ext(fs, fss, data, reloids, isTimedOut); LockRelease(&tag, ExclusiveLock, false); diff --git a/sql/aqo_fdw.sql b/sql/aqo_fdw.sql index 67fddb8f..2d71a20d 100644 --- a/sql/aqo_fdw.sql +++ b/sql/aqo_fdw.sql @@ -52,11 +52,13 @@ SELECT x FROM frgn WHERE x < -10; -- AQO ignores constants -- Trivial JOIN push-down. SELECT str FROM expln(' -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) -SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; ') AS str WHERE str NOT LIKE '%Sort Method%'; + +-- TODO: Should learn on postgres_fdw nodes EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) -SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; + SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; -- TODO: Non-mergejoinable join condition. EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) diff --git a/sql/aqo_learn.sql b/sql/aqo_learn.sql index 1db42929..ad06fafb 100644 --- a/sql/aqo_learn.sql +++ b/sql/aqo_learn.sql @@ -221,25 +221,27 @@ SELECT count(*) FROM aqo_data; SET aqo.join_threshold = 3; SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); -SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); SELECT count(*) FROM aqo_data; -- Return 0 - do not learn on the queries above SELECT * FROM check_estimated_rows(' SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; -'); -SELECT count(*) FROM -- Learn on the query +'); -- Learn on the query +SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 ; SELECT query_text FROM aqo_query_texts WHERE query_hash <> 0; -- Check query SET aqo.join_threshold = 1; SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); -SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); +SELECT * FROM check_estimated_rows( + 'SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 -; -- Learn on a query with one join +; -- Learn on a new query with one join (cardinality of this join AQO extracted from previous 3-join query) SET aqo.join_threshold = 0; SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index d8098b15..35f52706 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -1,4 +1,4 @@ -CREATE IF NOT EXISTS EXTENSION aqo; +CREATE EXTENSION aqo; SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; @@ -68,3 +68,5 @@ SELECT str AS result FROM expln(' SELECT x,y FROM A,B WHERE x < 10 and y > 10 AND A.x = B.y;') AS str WHERE str NOT LIKE 'Query Identifier%'; + +DROP EXTENSION aqo CASCADE; \ No newline at end of file diff --git a/sql/relocatable.sql b/sql/relocatable.sql index d48de902..64a29808 100644 --- a/sql/relocatable.sql +++ b/sql/relocatable.sql @@ -1,4 +1,3 @@ -DROP EXTENSION aqo CASCADE; CREATE EXTENSION aqo; SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; -- use this mode for unconditional learning diff --git a/sql/temp_tables.sql b/sql/temp_tables.sql index 0bf61c50..ab594e40 100644 --- a/sql/temp_tables.sql +++ b/sql/temp_tables.sql @@ -63,11 +63,11 @@ SELECT * FROM check_estimated_rows(' '); -- Should use AQO estimation SELECT * FROM check_estimated_rows(' SELECT pt.x, avg(pt.y) FROM pt,ttd WHERE pt.x = ttd.x GROUP BY (pt.x); -'); -- TODO: Should use AQO estimation with another temp table of the same structure +'); -- Should use AQO estimation with another temp table of the same structure SET aqo.mode = 'forced'; -- Now we use all fss records for each query DROP TABLE pt; -SELECT aqo_cleanup(); +SELECT * FROM aqo_cleanup(); CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; CREATE TEMP TABLE ttd1 AS SELECT -(x*3) AS x, (x % 9) AS y1 FROM generate_series(1,100) AS x; diff --git a/sql/top_queries.sql b/sql/top_queries.sql index 46d35324..f7b4fb59 100755 --- a/sql/top_queries.sql +++ b/sql/top_queries.sql @@ -50,3 +50,5 @@ SELECT query_text,nexecs FROM aqo_cardinality_error(false) ce, aqo_query_texts aqt WHERE ce.id = aqt.query_hash ORDER BY (md5(query_text)); + +DROP EXTENSION aqo CASCADE; diff --git a/storage.c b/storage.c index 00126de8..3d538d11 100644 --- a/storage.c +++ b/storage.c @@ -29,12 +29,11 @@ #include "preprocessing.h" #include "learn_cache.h" - #define AQO_DATA_COLUMNS (7) HTAB *deactivated_queries = NULL; static ArrayType *form_matrix(double **matrix, int nrows, int ncols); -static void deform_matrix(Datum datum, double **matrix); +static int deform_matrix(Datum datum, double **matrix); static ArrayType *form_vector(double *vector, int nrows); static void deform_vector(Datum datum, double *vector, int *nelems); @@ -389,97 +388,177 @@ load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool isSafe) } /* - * Loads feature subspace (fss) from table aqo_data into memory. - * The last column of the returned matrix is for target values of objects. - * Returns false if the operation failed, true otherwise. + * Return list of reloids on which + */ +static void +build_knn_matrix(Datum *values, bool *nulls, OkNNrdata *data) +{ + int nrows; + + Assert(DatumGetInt32(values[2]) == data->cols); + + if (data->rows >= 0) + /* trivial strategy - use first suitable record and ignore others */ + return; + + if (data->cols > 0) + /* + * The case than an object hasn't any filters and selectivities + */ + data->rows = deform_matrix(values[3], data->matrix); + + deform_vector(values[4], data->targets, &nrows); + Assert(data->rows < 0 || data->rows == nrows); + data->rows = nrows; + + deform_vector(values[6], data->rfactors, &nrows); + Assert(data->rows == nrows); +} + +/* + * Loads KNN matrix for the feature subspace (fss) from table aqo_data. + * If wideSearch is true, search row by an unique value of (fs, fss) + * If wideSearch is false - search rows across all fs values and try to build a + * KNN matrix by merging of existed matrixes with some algorithm. + * In the case of successful search, initializes the data variable and list of + * reloids. * - * 'fss_hash' is the hash of feature subspace which is supposed to be loaded - * 'ncols' is the number of clauses in the feature subspace - * 'matrix' is an allocated memory for matrix with the size of aqo_K rows - * and nhashes columns - * 'targets' is an allocated memory with size aqo_K for target values - * of the objects - * 'rows' is the pointer in which the function stores actual number of - * objects in the given feature space + * Returns false if any data not found, true otherwise. */ bool -load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool use_idx_fss) +load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool wideSearch) { - Relation hrel; - Relation irel; - HeapTuple tuple; + Relation hrel; + Relation irel; + HeapTuple tuple; TupleTableSlot *slot; bool shouldFree; - bool find_ok = false; IndexScanDesc scan; + ScanKeyData key[2]; Datum values[AQO_DATA_COLUMNS]; bool isnull[AQO_DATA_COLUMNS]; - bool success = true; - ScanKeyData key[2]; + bool success = false; + int keycount = 0; + List *oids = NIL; - if (!open_aqo_relation(NULL, "aqo_data", - "aqo_fss_access_idx", + if (!open_aqo_relation(NULL, "aqo_data", "aqo_fss_access_idx", AccessShareLock, &hrel, &irel)) - return false; + return false; - if (use_idx_fss) + if (wideSearch) { - scan = index_beginscan(hrel, irel, SnapshotSelf, 2, 0); - ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(fs)); - ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fss)); + /* Full scan key. Only one row wanted */ + ScanKeyInit(&key[keycount++], 1, BTEqualStrategyNumber, F_INT8EQ, + Int64GetDatum(fs)); + ScanKeyInit(&key[keycount++], 2, BTEqualStrategyNumber, F_INT4EQ, + Int32GetDatum(fss)); } else - { - scan = index_beginscan(hrel, irel, SnapshotSelf, 2, 0); - ScanKeyInit(&key[0], 1, BTLessEqualStrategyNumber, F_INT8LE, Int64GetDatum(0)); - ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fss)); - } + /* Pass along the index and get all tuples with the same fss */ + ScanKeyInit(&key[keycount++], 2, BTEqualStrategyNumber, F_INT4EQ, + Int32GetDatum(fss)); - index_rescan(scan, key, 2, NULL, 0); + scan = index_beginscan(hrel, irel, SnapshotSelf, keycount, 0); + index_rescan(scan, key, keycount, NULL, 0); slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); - find_ok = index_getnext_slot(scan, ForwardScanDirection, slot); + data->rows = -1; /* Attention! Use as a sign of nonentity */ - if (find_ok) + /* + * Iterate along all tuples found and prepare knn model + */ + while (index_getnext_slot(scan, ForwardScanDirection, slot)) { + ArrayType *array; + Datum *vals; + int nrows; + int i; + bool should_skip = false; + List *temp_oids = NIL; + tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); Assert(shouldFree != true); heap_deform_tuple(tuple, hrel->rd_att, values, isnull); - if (DatumGetInt32(values[2]) == data->cols) + /* Filter obviously unfamiliar tuples */ + + if (DatumGetInt32(values[2]) != data->cols) { - if (data->cols > 0) + if (wideSearch) + { /* - * The case than an object has not any filters and selectivities + * Looks like a hash collision, but it is so unlikely in a single + * fs, that we will LOG this fact and return immediately. */ - deform_matrix(values[3], data->matrix); + elog(LOG, "[AQO] Unexpected number of features for hash (" \ + UINT64_FORMAT", %d):\ + expected %d features, obtained %d", + fs, fss, data->cols, DatumGetInt32(values[2])); + Assert(success == false); + break; + } + else + /* Go to the next tuple */ + continue; + } - deform_vector(values[4], data->targets, &(data->rows)); - deform_vector(values[6], data->rfactors, &(data->rows)); + /* Decompose list of oids which the data depend on */ + array = DatumGetArrayTypePCopy(PG_DETOAST_DATUM(values[5])); + deconstruct_array(array, OIDOID, sizeof(Oid), true, + TYPALIGN_INT, &vals, NULL, &nrows); - if (reloids != NULL) + if (data->rows >= 0 && list_length(oids) != nrows) + { + /* Dubious case. So log it and skip these data */ + elog(LOG, + "[AQO] different number depended oids for the same fss %d: " + "%d and %d correspondingly.", + fss, list_length(oids), nrows); + should_skip = true; + } + else + { + for (i = 0; i < nrows; i++) { - ArrayType *array = DatumGetArrayTypePCopy(PG_DETOAST_DATUM(values[5])); - Datum *values; - int nrows; - int i; - - deconstruct_array(array, OIDOID, sizeof(Oid), true, - TYPALIGN_INT, &values, NULL, &nrows); - for (i = 0; i < nrows; ++i) - *reloids = lappend_oid(*reloids, DatumGetObjectId(values[i])); - - pfree(values); - pfree(array); + Oid reloid = DatumGetObjectId(vals[i]); + + if (!OidIsValid(reloid)) + elog(ERROR, "[AQO] Impossible OID in the knowledge base."); + + if (data->rows >= 0 && !list_member_oid(oids, reloid)) + { + elog(LOG, + "[AQO] Oid set for two records with equal fss %d don't match.", + fss); + should_skip = true; + break; + } + temp_oids = lappend_oid(temp_oids, reloid); } } - else - elog(ERROR, "[AQO] Unexpected number of features for hash (" \ - UINT64_FORMAT", %d):\ - expected %d features, obtained %d", - fs, fss, data->cols, DatumGetInt32(values[2])); + pfree(vals); + pfree(array); + + if (!should_skip) + { + if (data->rows < 0) + oids = copyObject(temp_oids); + build_knn_matrix(values, isnull, data); + } + + if (temp_oids != NIL) + pfree(temp_oids); + + /* + * It's OK, guess, because if something happened during merge of + * matrixes an ERROR will be thrown. + */ + if (data->rows > 0) + success = true; } - else - success = false; + + if (success && reloids != NULL) + /* return list of reloids, if needed */ + *reloids = oids; ExecDropSingleTupleTableSlot(slot); index_endscan(scan); @@ -489,14 +568,6 @@ load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool use_idx_fss) return success; } -PG_FUNCTION_INFO_V1(xxx); -Datum xxx(PG_FUNCTION_ARGS) -{ - elog(NOTICE, "xxx called"); - load_fss(5, 2027816329,NULL, NULL, false); - PG_RETURN_VOID(); -} - bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids, bool isTimedOut) @@ -627,7 +698,6 @@ update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids) if (update_indexes) my_index_insert(irel, values, isnull, &(nw_tuple->t_self), hrel, UNIQUE_CHECK_YES); - result = true; } else @@ -823,13 +893,13 @@ update_aqo_stat(uint64 qhash, QueryStat *stat) /* * Expands matrix from storage into simple C-array. */ -void +int deform_matrix(Datum datum, double **matrix) { ArrayType *array = DatumGetArrayTypePCopy(PG_DETOAST_DATUM(datum)); int nelems; Datum *values; - int rows; + int rows = 0; int cols; int i, j; @@ -847,6 +917,7 @@ deform_matrix(Datum datum, double **matrix) } pfree(values); pfree(array); + return rows; } /* From 309bd280f9244584e66e4ec89c189685639c01e8 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Tue, 14 Jun 2022 22:52:41 +0300 Subject: [PATCH 091/203] Fix look-a-like output test. Delete lines containing Memory --- expected/look_a_like.out | 21 ++++++++------------- sql/look_a_like.sql | 8 ++++---- 2 files changed, 12 insertions(+), 17 deletions(-) diff --git a/expected/look_a_like.out b/expected/look_a_like.out index bf966607..e3fbf4bb 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -124,14 +124,13 @@ WHERE str NOT LIKE 'Query Identifier%'; SELECT str AS result FROM expln(' SELECT x FROM A where x < 10 group by(x);') AS str -WHERE str NOT LIKE 'Query Identifier%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; result ------------------------------------------------------- HashAggregate (actual rows=10 loops=1) AQO not used Output: x Group Key: a.x - Batches: 1 Memory Usage: 40kB -> Seq Scan on public.a (actual rows=1000 loops=1) AQO not used Output: x @@ -139,13 +138,13 @@ WHERE str NOT LIKE 'Query Identifier%'; Using aqo: true AQO mode: LEARN JOINS: 0 -(12 rows) +(11 rows) -- cardinality 1000 in Seq Scan on a SELECT str AS result FROM expln(' SELECT x,y FROM A,B WHERE x < 10 AND A.x = B.y;') AS str -WHERE str NOT LIKE 'Query Identifier%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; result ------------------------------------------------------------- Merge Join (actual rows=100000 loops=1) @@ -156,7 +155,6 @@ WHERE str NOT LIKE 'Query Identifier%'; AQO not used Output: a.x Sort Key: a.x - Sort Method: quicksort Memory: 79kB -> Seq Scan on public.a (actual rows=1000 loops=1) AQO: rows=1000, error=0% Output: a.x @@ -165,27 +163,25 @@ WHERE str NOT LIKE 'Query Identifier%'; AQO not used Output: b.y Sort Key: b.y - Sort Method: quicksort Memory: 79kB -> Seq Scan on public.b (actual rows=1000 loops=1) AQO not used Output: b.y Using aqo: true AQO mode: LEARN JOINS: 0 -(24 rows) +(22 rows) -- cardinality 100 in Seq Scan on a and Seq Scan on b SELECT str AS result FROM expln(' SELECT x FROM A,B where x < 10 and y > 10 group by(x);') AS str -WHERE str NOT LIKE 'Query Identifier%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; result ---------------------------------------------------------------- HashAggregate (actual rows=0 loops=1) AQO not used Output: a.x Group Key: a.x - Batches: 1 Memory Usage: 40kB -> Nested Loop (actual rows=0 loops=1) AQO not used Output: a.x @@ -202,7 +198,7 @@ WHERE str NOT LIKE 'Query Identifier%'; Using aqo: true AQO mode: LEARN JOINS: 1 -(21 rows) +(20 rows) -- cardinality 1000 Hash Cond: (a.x = b.y) and 1 Seq Scan on b -- this cardinality is wrong because we take it from bad neibours (previous query). @@ -210,7 +206,7 @@ WHERE str NOT LIKE 'Query Identifier%'; SELECT str AS result FROM expln(' SELECT x,y FROM A,B WHERE x < 10 and y > 10 AND A.x = B.y;') AS str -WHERE str NOT LIKE 'Query Identifier%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; result ---------------------------------------------------------- Hash Join (actual rows=0 loops=1) @@ -224,7 +220,6 @@ WHERE str NOT LIKE 'Query Identifier%'; -> Hash (actual rows=0 loops=1) AQO not used Output: b.y - Buckets: 1024 Batches: 1 Memory Usage: 8kB -> Seq Scan on public.b (actual rows=0 loops=1) AQO: rows=1, error=100% Output: b.y @@ -233,6 +228,6 @@ WHERE str NOT LIKE 'Query Identifier%'; Using aqo: true AQO mode: LEARN JOINS: 0 -(20 rows) +(19 rows) DROP EXTENSION aqo CASCADE; diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index 35f52706..cf6b05c5 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -49,24 +49,24 @@ WHERE str NOT LIKE 'Query Identifier%'; SELECT str AS result FROM expln(' SELECT x FROM A where x < 10 group by(x);') AS str -WHERE str NOT LIKE 'Query Identifier%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; -- cardinality 1000 in Seq Scan on a SELECT str AS result FROM expln(' SELECT x,y FROM A,B WHERE x < 10 AND A.x = B.y;') AS str -WHERE str NOT LIKE 'Query Identifier%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; -- cardinality 100 in Seq Scan on a and Seq Scan on b SELECT str AS result FROM expln(' SELECT x FROM A,B where x < 10 and y > 10 group by(x);') AS str -WHERE str NOT LIKE 'Query Identifier%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; -- cardinality 1000 Hash Cond: (a.x = b.y) and 1 Seq Scan on b -- this cardinality is wrong because we take it from bad neibours (previous query). -- clause y > 10 give count of rows with the same clauses. SELECT str AS result FROM expln(' SELECT x,y FROM A,B WHERE x < 10 and y > 10 AND A.x = B.y;') AS str -WHERE str NOT LIKE 'Query Identifier%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; DROP EXTENSION aqo CASCADE; \ No newline at end of file From 5c4c9792d101f3c8d4aea9312899095c7931d099 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Thu, 23 Jun 2022 14:56:45 +0300 Subject: [PATCH 092/203] Fix AQO_DEBUG_PRINT --- cardinality_estimation.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cardinality_estimation.c b/cardinality_estimation.c index 4ed9f86b..97799016 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -33,7 +33,7 @@ predict_debug_output(List *clauses, List *selectivities, initStringInfo(&debug_str); appendStringInfo(&debug_str, "fss: %d, clausesNum: %d, ", - fss_hash, list_length(clauses)); + fss, list_length(clauses)); appendStringInfoString(&debug_str, ", selectivities: { "); foreach(lc, selectivities) @@ -45,8 +45,8 @@ predict_debug_output(List *clauses, List *selectivities, appendStringInfoString(&debug_str, "}, reloids: { "); foreach(lc, reloids) { - Value *relname = lfirst_node(String, lc); - appendStringInfo(&debug_str, "%s ", valStr(relname)); + Oid relname = lfirst_oid(lc); + appendStringInfo(&debug_str, "%d ", relname); } appendStringInfo(&debug_str, "}, result: %lf", result); From 839186f63a3a656255127f81fcb60e2fc1873ee0 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Thu, 2 Jun 2022 08:00:58 +0300 Subject: [PATCH 093/203] Add commit for smooth transition to file-based AQO storage. --- aqo.c | 13 +++++++++++++ postprocessing.c | 13 +++++++++++-- preprocessing.c | 1 + storage.c | 20 ++++++++++++++++++++ storage.h | 10 ++++++++++ 5 files changed, 55 insertions(+), 2 deletions(-) create mode 100644 storage.h diff --git a/aqo.c b/aqo.c index 3adc3801..8172f70d 100644 --- a/aqo.c +++ b/aqo.c @@ -23,6 +23,7 @@ #include "path_utils.h" #include "preprocessing.h" #include "learn_cache.h" +#include "storage.h" PG_MODULE_MAGIC; @@ -200,6 +201,18 @@ _PG_init(void) NULL ); + DefineCustomBoolVariable( + "aqo.use_file_storage", + "Used for smooth transition from table storage", + NULL, + &aqo_use_file_storage, + false, + PGC_USERSET, + 0, + NULL, + NULL, + NULL + ); DefineCustomIntVariable("aqo.join_threshold", "Sets the threshold of number of JOINs in query beyond which AQO is used.", NULL, diff --git a/postprocessing.c b/postprocessing.c index eeb4c249..47366b0d 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -29,6 +29,7 @@ #include "machine_learning.h" #include "preprocessing.h" #include "learn_cache.h" +#include "storage.h" typedef struct @@ -823,7 +824,12 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) } if (query_context.collect_stat) - stat = get_aqo_stat(query_context.query_hash); + { + if (!aqo_use_file_storage) + stat = get_aqo_stat(query_context.query_hash); + else + stat = aqo_load_stat(query_context.query_hash); + } { /* Calculate execution time. */ @@ -873,7 +879,10 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) automatical_query_tuning(query_context.query_hash, stat); /* Write AQO statistics to the aqo_query_stat table */ - update_aqo_stat(query_context.query_hash, stat); + if (!aqo_use_file_storage) + update_aqo_stat(query_context.query_hash, stat); + else + aqo_store_stat(query_context.query_hash, stat); pfree_query_stat(stat); } diff --git a/preprocessing.c b/preprocessing.c index cc438fae..d27db49f 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -65,6 +65,7 @@ #include "aqo.h" #include "hash.h" #include "preprocessing.h" +#include "storage.h" /* List of feature spaces, that are processing in this backend. */ diff --git a/storage.c b/storage.c index 3d538d11..838a1dd6 100644 --- a/storage.c +++ b/storage.c @@ -28,6 +28,7 @@ #include "machine_learning.h" #include "preprocessing.h" #include "learn_cache.h" +#include "storage.h" #define AQO_DATA_COLUMNS (7) HTAB *deactivated_queries = NULL; @@ -1104,3 +1105,22 @@ add_deactivated_query(uint64 query_hash) { hash_search(deactivated_queries, &query_hash, HASH_ENTER, NULL); } + +/* ***************************************************************************** + * + * Implement AQO file storage below + * + **************************************************************************** */ + +bool aqo_use_file_storage; + +void +aqo_store_stat(uint64 queryid, QueryStat * stat) +{ +} + +QueryStat * +aqo_load_stat(uint64 queryid) +{ + return NULL; +} diff --git a/storage.h b/storage.h new file mode 100644 index 00000000..cd8d0d7e --- /dev/null +++ b/storage.h @@ -0,0 +1,10 @@ +#ifndef STORAGE_H +#define STORAGE_H + +#include "aqo.h" + +extern bool aqo_use_file_storage; + +extern void aqo_store_stat(uint64 queryid, QueryStat * stat); +extern QueryStat *aqo_load_stat(uint64 queryid); +#endif /* STORAGE_H */ From a17bac6bb15fdf4679a57ff8a94db4181ae89bd5 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Thu, 2 Jun 2022 12:18:06 +0300 Subject: [PATCH 094/203] Move aqo_query_stat table into file storage. Use shmem hash table and file. Replace UI according to this change. --- aqo--1.4--1.5.sql | 52 ++- aqo.c | 4 +- aqo.h | 34 +- aqo_shared.c | 36 ++ aqo_shared.h | 5 + auto_tuning.c | 38 +-- expected/clean_aqo_data.out | 14 +- expected/forced_stat_collection.out | 3 +- expected/gucs.out | 39 +++ expected/plancache.out | 2 +- postprocessing.c | 139 ++------ sql/clean_aqo_data.sql | 14 +- sql/forced_stat_collection.sql | 3 +- sql/gucs.sql | 9 + sql/plancache.sql | 2 +- storage.c | 501 ++++++++++++++++++---------- storage.h | 44 ++- t/001_pgbench.pl | 9 +- utils.c | 43 --- 19 files changed, 573 insertions(+), 418 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index c5e1117e..e46938b0 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -33,17 +33,38 @@ CREATE TABLE aqo_query_texts ( query_text text NOT NULL ); -CREATE TABLE aqo_query_stat ( - query_hash bigint CONSTRAINT aqo_query_stat_idx PRIMARY KEY REFERENCES aqo_queries ON DELETE CASCADE, - execution_time_with_aqo double precision[], - execution_time_without_aqo double precision[], - planning_time_with_aqo double precision[], - planning_time_without_aqo double precision[], - cardinality_error_with_aqo double precision[], - cardinality_error_without_aqo double precision[], - executions_with_aqo bigint, - executions_without_aqo bigint -); +/* Now redefine */ +CREATE FUNCTION aqo_query_stat( + OUT queryid bigint, + OUT execution_time_with_aqo double precision[], + OUT execution_time_without_aqo double precision[], + OUT planning_time_with_aqo double precision[], + OUT planning_time_without_aqo double precision[], + OUT cardinality_error_with_aqo double precision[], + OUT cardinality_error_without_aqo double precision[], + OUT executions_with_aqo bigint, + OUT executions_without_aqo bigint +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_query_stat' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE VIEW aqo_query_stat AS SELECT * FROM aqo_query_stat(); + +-- +-- Remove all records in the AQO statistics. +-- Return number of rows removed. +-- +CREATE FUNCTION aqo_stat_reset() RETURNS bigint +AS 'MODULE_PATHNAME' +LANGUAGE C PARALLEL SAFE; + +COMMENT ON FUNCTION aqo_stat_reset() IS +'Reset query statistics gathered by AQO'; + +CREATE FUNCTION aqo_stat_remove(fs bigint) RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT PARALLEL SAFE; -- -- Re-create the aqo_data table. @@ -97,7 +118,7 @@ IF (controlled) THEN execution_time_with_aqo[array_length(execution_time_with_aqo, 1)] AS exectime, executions_with_aqo AS execs FROM aqo_queries aq JOIN aqo_query_stat aqs - ON aq.query_hash = aqs.query_hash + ON aq.query_hash = aqs.queryid WHERE TRUE = ANY (SELECT unnest(execution_time_with_aqo) IS NOT NULL) ) AS q1 ORDER BY nn ASC; @@ -116,7 +137,7 @@ ELSE (SELECT AVG(t) FROM unnest(execution_time_without_aqo) t) AS exectime, executions_without_aqo AS execs FROM aqo_queries aq JOIN aqo_query_stat aqs - ON aq.query_hash = aqs.query_hash + ON aq.query_hash = aqs.queryid WHERE TRUE = ANY (SELECT unnest(execution_time_without_aqo) IS NOT NULL) ) AS q1 ORDER BY (nn) ASC; @@ -195,6 +216,7 @@ BEGIN -- Remove ALL feature space if one of oids isn't exists DELETE FROM aqo_queries WHERE fspace_hash = fs; + PERFORM * FROM aqo_stat_remove(fs); END LOOP; -- Calculate difference with previous state of knowledge base @@ -235,7 +257,7 @@ IF (controlled) THEN cardinality_error_with_aqo[array_length(cardinality_error_with_aqo, 1)] AS cerror, executions_with_aqo AS execs FROM aqo_queries aq JOIN aqo_query_stat aqs - ON aq.query_hash = aqs.query_hash + ON aq.query_hash = aqs.queryid WHERE TRUE = ANY (SELECT unnest(cardinality_error_with_aqo) IS NOT NULL) ) AS q1 ORDER BY nn ASC; @@ -251,7 +273,7 @@ ELSE (SELECT AVG(t) FROM unnest(cardinality_error_without_aqo) t) AS cerror, executions_without_aqo AS execs FROM aqo_queries aq JOIN aqo_query_stat aqs - ON aq.query_hash = aqs.query_hash + ON aq.query_hash = aqs.queryid WHERE TRUE = ANY (SELECT unnest(cardinality_error_without_aqo) IS NOT NULL) ) AS q1 ORDER BY (nn) ASC; diff --git a/aqo.c b/aqo.c index 8172f70d..55aaac30 100644 --- a/aqo.c +++ b/aqo.c @@ -63,7 +63,7 @@ static const struct config_enum_entry format_options[] = { }; /* Parameters of autotuning */ -int aqo_stat_size = 20; +int aqo_stat_size = STAT_SAMPLE_SIZE; int auto_tuning_window_size = 5; double auto_tuning_exploration = 0.1; int auto_tuning_max_iterations = 50; @@ -206,7 +206,7 @@ _PG_init(void) "Used for smooth transition from table storage", NULL, &aqo_use_file_storage, - false, + true, PGC_USERSET, 0, NULL, diff --git a/aqo.h b/aqo.h index e970a2c3..a13a1c89 100644 --- a/aqo.h +++ b/aqo.h @@ -135,7 +135,6 @@ #include "optimizer/cost.h" #include "parser/analyze.h" #include "parser/parsetree.h" -#include "utils/array.h" #include "utils/builtins.h" #include "utils/guc.h" #include "utils/hsearch.h" @@ -145,6 +144,7 @@ #include "utils/snapmgr.h" #include "machine_learning.h" +#include "storage.h" /* Check PostgreSQL version (9.6.0 contains important changes in planner) */ #if PG_VERSION_NUM < 90600 @@ -175,32 +175,6 @@ extern bool aqo_show_hash; extern bool aqo_show_details; extern int aqo_join_threshold; -/* - * It is mostly needed for auto tuning of query. with auto tuning mode aqo - * checks stability of last executions of the query, bad influence of strong - * cardinality estimation on query execution (planner bug?) and so on. - * It can induce aqo to suppress machine learning for this query. - */ -typedef struct -{ - double *execution_time_with_aqo; - double *execution_time_without_aqo; - double *planning_time_with_aqo; - double *planning_time_without_aqo; - double *cardinality_error_with_aqo; - double *cardinality_error_without_aqo; - - int execution_time_with_aqo_size; - int execution_time_without_aqo_size; - int planning_time_with_aqo_size; - int planning_time_without_aqo_size; - int cardinality_error_with_aqo_size; - int cardinality_error_without_aqo_size; - - int64 executions_with_aqo; - int64 executions_without_aqo; -} QueryStat; - /* Parameters for current query */ typedef struct QueryContextData { @@ -289,8 +263,6 @@ extern bool load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids, extern bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids, bool isTimedOut); extern bool update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids); -QueryStat *get_aqo_stat(uint64 query_hash); -void update_aqo_stat(uint64 query_hash, QueryStat * stat); extern bool my_index_insert(Relation indexRelation, Datum *values, bool *isnull, ItemPointer heap_t_ctid, Relation heapRelation, IndexUniqueCheck checkUnique); @@ -318,7 +290,7 @@ void aqo_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, void aqo_ExecutorEnd(QueryDesc *queryDesc); /* Automatic query tuning */ -extern void automatical_query_tuning(uint64 query_hash, QueryStat * stat); +extern void automatical_query_tuning(uint64 query_hash, StatEntry *stat); /* Utilities */ extern int int64_compare(const void *a, const void *b); @@ -327,8 +299,6 @@ extern int double_cmp(const void *a, const void *b); extern int *argsort(void *a, int n, size_t es, int (*cmp) (const void *, const void *)); extern int *inverse_permutation(int *a, int n); -extern QueryStat *palloc_query_stat(void); -extern void pfree_query_stat(QueryStat *stat); /* Selectivity cache for parametrized baserels */ extern void cache_selectivity(int clause_hash, int relid, int global_relid, diff --git a/aqo_shared.c b/aqo_shared.c index 84e6eadb..260b4cac 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -4,9 +4,12 @@ #include "postgres.h" +#include "lib/dshash.h" +#include "miscadmin.h" #include "storage/shmem.h" #include "aqo_shared.h" +#include "storage.h" typedef struct @@ -23,11 +26,13 @@ shmem_startup_hook_type prev_shmem_startup_hook = NULL; AQOSharedState *aqo_state = NULL; HTAB *fss_htab = NULL; static int aqo_htab_max_items = 1000; +static int fs_max_items = 1000; /* Max number of different feature spaces in ML model */ static uint32 temp_storage_size = 1024 * 1024 * 10; /* Storage size, in bytes */ static dsm_segment *seg = NULL; static void aqo_detach_shmem(int code, Datum arg); +static void on_shmem_shutdown(int code, Datum arg); void * @@ -169,16 +174,23 @@ aqo_init_shmem(void) bool found; HASHCTL info; + if (prev_shmem_startup_hook) + prev_shmem_startup_hook(); + aqo_state = NULL; fss_htab = NULL; + stat_htab = NULL; LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); aqo_state = ShmemInitStruct("aqo", sizeof(AQOSharedState), &found); if (!found) { /* First time through ... */ + LWLockInitialize(&aqo_state->lock, LWLockNewTrancheId()); aqo_state->dsm_handler = DSM_HANDLE_INVALID; + + LWLockInitialize(&aqo_state->stat_lock, LWLockNewTrancheId()); } info.keysize = sizeof(htab_key); @@ -188,8 +200,31 @@ aqo_init_shmem(void) &info, HASH_ELEM | HASH_BLOBS); + info.keysize = sizeof(((StatEntry *) 0)->queryid); + info.entrysize = sizeof(StatEntry); + stat_htab = ShmemInitHash("aqo stat hash", + fs_max_items, fs_max_items, + &info, + HASH_ELEM | HASH_BLOBS); + LWLockRelease(AddinShmemInitLock); LWLockRegisterTranche(aqo_state->lock.tranche, "aqo"); + LWLockRegisterTranche(aqo_state->stat_lock.tranche, "aqo stat storage"); + + if (!IsUnderPostmaster) + { + on_shmem_exit(on_shmem_shutdown, (Datum) 0); + aqo_stat_load(); + } +} + +/* + * Main idea here is to store all ML data in temp files on postmaster shutdown. + */ +static void +on_shmem_shutdown(int code, Datum arg) +{ + aqo_stat_flush(); } Size @@ -199,6 +234,7 @@ aqo_memsize(void) size = MAXALIGN(sizeof(AQOSharedState)); size = add_size(size, hash_estimate_size(aqo_htab_max_items, sizeof(htab_entry))); + size = add_size(size, hash_estimate_size(fs_max_items, sizeof(AQOSharedState))); return size; } diff --git a/aqo_shared.h b/aqo_shared.h index eb5323e0..31f5ec28 100644 --- a/aqo_shared.h +++ b/aqo_shared.h @@ -5,6 +5,8 @@ #include "storage/dsm.h" #include "storage/ipc.h" #include "storage/lwlock.h" +#include "utils/dsa.h" +#include "lib/dshash.h" #define AQO_SHARED_MAGIC 0x053163 @@ -25,6 +27,9 @@ typedef struct AQOSharedState { LWLock lock; /* mutual exclusion */ dsm_handle dsm_handler; + + /* Storage fields */ + LWLock stat_lock; /* lock for access to stat storage */ } AQOSharedState; diff --git a/auto_tuning.c b/auto_tuning.c index 29930db0..c2031932 100644 --- a/auto_tuning.c +++ b/auto_tuning.c @@ -18,6 +18,7 @@ #include "postgres.h" #include "aqo.h" +#include "storage.h" /* * Auto tuning criteria criteria of an query convergence by overall cardinality @@ -35,7 +36,7 @@ static bool is_in_infinite_loop_cq(double *elems, int nelems); /* * Returns mean value of the array of doubles. */ -double +static double get_mean(double *elems, int nelems) { double sum = 0; @@ -52,7 +53,7 @@ get_mean(double *elems, int nelems) * Having a time series it tries to predict its next value. * Now it do simple window averaging. */ -double +static double get_estimation(double *elems, int nelems) { int start; @@ -70,7 +71,7 @@ get_estimation(double *elems, int nelems) /* * Checks whether the series is stable with absolute or relative error. */ -bool +static bool is_stable(double *elems, int nelems) { double est, @@ -91,7 +92,7 @@ is_stable(double *elems, int nelems) * Now it checks whether the cardinality quality stopped decreasing with * absolute or relative error. */ -bool +static bool converged_cq(double *elems, int nelems) { if (nelems < auto_tuning_window_size + 2) @@ -107,7 +108,7 @@ converged_cq(double *elems, int nelems) * Now it checks whether the cardinality quality stopped decreasing with * absolute or relative error 0.1. */ -bool +static bool is_in_infinite_loop_cq(double *elems, int nelems) { if (nelems - auto_tuning_infinite_loop < auto_tuning_window_size + 2) @@ -144,7 +145,7 @@ is_in_infinite_loop_cq(double *elems, int nelems) * this query to false. */ void -automatical_query_tuning(uint64 qhash, QueryStat * stat) +automatical_query_tuning(uint64 qhash, StatEntry *stat) { double unstability = auto_tuning_exploration; double t_aqo, @@ -152,14 +153,13 @@ automatical_query_tuning(uint64 qhash, QueryStat * stat) double p_use = -1; int64 num_iterations; - num_iterations = stat->executions_with_aqo + stat->executions_without_aqo; + num_iterations = stat->execs_with_aqo + stat->execs_without_aqo; query_context.learn_aqo = true; - if (stat->executions_without_aqo < auto_tuning_window_size + 1) + if (stat->execs_without_aqo < auto_tuning_window_size + 1) query_context.use_aqo = false; - else if (!converged_cq(stat->cardinality_error_with_aqo, - stat->cardinality_error_with_aqo_size) && - !is_in_infinite_loop_cq(stat->cardinality_error_with_aqo, - stat->cardinality_error_with_aqo_size)) + else if (!converged_cq(stat->est_error_aqo, stat->cur_stat_slot_aqo) && + !is_in_infinite_loop_cq(stat->est_error_aqo, + stat->cur_stat_slot_aqo)) query_context.use_aqo = true; else { @@ -168,15 +168,11 @@ automatical_query_tuning(uint64 qhash, QueryStat * stat) * by execution time. It is volatile, probabilistic part of code. * XXX: this logic of auto tuning may be reworked later. */ - t_aqo = get_estimation(stat->execution_time_with_aqo, - stat->execution_time_with_aqo_size) + - get_estimation(stat->planning_time_with_aqo, - stat->planning_time_with_aqo_size); - - t_not_aqo = get_estimation(stat->execution_time_without_aqo, - stat->execution_time_without_aqo_size) + - get_estimation(stat->planning_time_without_aqo, - stat->planning_time_without_aqo_size); + t_aqo = get_estimation(stat->exec_time_aqo, stat->cur_stat_slot_aqo) + + get_estimation(stat->plan_time_aqo, stat->cur_stat_slot_aqo); + + t_not_aqo = get_estimation(stat->exec_time, stat->cur_stat_slot) + + get_estimation(stat->plan_time, stat->cur_stat_slot); p_use = t_not_aqo / (t_not_aqo + t_aqo); diff --git a/expected/clean_aqo_data.out b/expected/clean_aqo_data.out index 18f6e3b5..570e8067 100644 --- a/expected/clean_aqo_data.out +++ b/expected/clean_aqo_data.out @@ -46,7 +46,7 @@ SELECT count(*) FROM aqo_query_texts WHERE (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- @@ -91,7 +91,7 @@ SELECT count(*) FROM aqo_query_texts WHERE (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count @@ -160,7 +160,7 @@ SELECT count(*) FROM aqo_query_texts WHERE (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- @@ -189,7 +189,7 @@ SELECT count(*) FROM aqo_query_texts WHERE (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); count ------- @@ -233,7 +233,7 @@ SELECT count(*) FROM aqo_query_texts WHERE (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count @@ -266,7 +266,7 @@ SELECT count(*) FROM aqo_query_texts WHERE (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count @@ -306,7 +306,7 @@ SELECT count(*) FROM aqo_query_texts WHERE (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count diff --git a/expected/forced_stat_collection.out b/expected/forced_stat_collection.out index 7a1d89c5..6abf9a5b 100644 --- a/expected/forced_stat_collection.out +++ b/expected/forced_stat_collection.out @@ -38,7 +38,8 @@ SELECT * FROM aqo_data; (0 rows) SELECT learn_aqo,use_aqo,auto_tuning,cardinality_error_without_aqo ce,executions_without_aqo nex -FROM aqo_queries JOIN aqo_query_stat USING (query_hash); +FROM aqo_queries AS aq JOIN aqo_query_stat AS aqs +ON aq.query_hash = aqs.queryid; learn_aqo | use_aqo | auto_tuning | ce | nex -----------+---------+-------------+----------------------+----- f | f | f | {0.8637762840285226} | 1 diff --git a/expected/gucs.out b/expected/gucs.out index 995eca7b..b80e9e23 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -5,6 +5,12 @@ SET aqo.show_details = true; CREATE TABLE t(x int); INSERT INTO t (x) (SELECT * FROM generate_series(1, 100) AS gs); ANALYZE t; +SELECT * FROM aqo_stat_reset(); -- Remember! DROP EXTENSION doesn't remove any AQO data gathered. + aqo_stat_reset +---------------- + 61 +(1 row) + -- Check AQO addons to explain (the only stable data) EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT x FROM t; @@ -30,6 +36,7 @@ EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) JOINS: 0 (6 rows) +SET aqo.mode = 'disabled'; -- Check existence of the interface functions. SELECT obj_description('aqo_cardinality_error'::regproc::oid); obj_description @@ -61,6 +68,12 @@ SELECT obj_description('aqo_reset_query'::regproc::oid); Remove from AQO storage only learning data for given QueryId. (1 row) +SELECT obj_description('aqo_stat_reset'::regproc::oid); + obj_description +---------------------------------------- + Reset query statistics gathered by AQO +(1 row) + \df aqo_cardinality_error List of functions Schema | Name | Result data type | Argument data types | Type @@ -96,4 +109,30 @@ SELECT obj_description('aqo_reset_query'::regproc::oid); public | aqo_reset_query | integer | queryid bigint | func (1 row) +\df aqo_stat_reset + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+----------------+------------------+---------------------+------ + public | aqo_stat_reset | bigint | | func +(1 row) + +-- Check stat reset +SELECT count(*) FROM aqo_query_stat; + count +------- + 1 +(1 row) + +SELECT * FROM aqo_stat_reset(); -- Remove one record + aqo_stat_reset +---------------- + 1 +(1 row) + +SELECT count(*) FROM aqo_query_stat; + count +------- + 0 +(1 row) + DROP EXTENSION aqo; diff --git a/expected/plancache.out b/expected/plancache.out index 3a01968c..3808bc6c 100644 --- a/expected/plancache.out +++ b/expected/plancache.out @@ -30,7 +30,7 @@ BEGIN RETURN QUERY SELECT executions_without_aqo nnex, executions_with_aqo nex, planning_time_with_aqo pt - FROM aqo_query_stat WHERE query_hash = qhash; + FROM aqo_query_stat WHERE queryid = qhash; END $$ LANGUAGE 'plpgsql'; -- The function shows 6 executions without an AQO support (nnex) and -- 4 executions with usage of an AQO knowledge base (nex). Planning time in the diff --git a/postprocessing.c b/postprocessing.c index 47366b0d..de2c077b 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -72,13 +72,6 @@ static List *restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, bool was_parametrized); -static void update_query_stat_row(double *et, int *et_size, - double *pt, int *pt_size, - double *ce, int *ce_size, - double planning_time, - double execution_time, - double cardinality_error, - int64 *n_exec); static void StoreToQueryEnv(QueryDesc *queryDesc); static void StorePlanInternals(QueryDesc *queryDesc); static bool ExtractFromQueryEnv(QueryDesc *queryDesc); @@ -556,50 +549,6 @@ learnOnPlanState(PlanState *p, void *context) return false; } -/* - * Updates given row of query statistics: - * et - execution time - * pt - planning time - * ce - cardinality error - */ -void -update_query_stat_row(double *et, int *et_size, - double *pt, int *pt_size, - double *ce, int *ce_size, - double planning_time, - double execution_time, - double cardinality_error, - int64 *n_exec) -{ - int i; - - /* - * If plan contains one or more "never visited" nodes, cardinality_error - * have -1 value and will be written to the knowledge base. User can use it - * as a sign that AQO ignores this query. - */ - if (*ce_size >= aqo_stat_size) - for (i = 1; i < aqo_stat_size; ++i) - ce[i - 1] = ce[i]; - *ce_size = (*ce_size >= aqo_stat_size) ? aqo_stat_size : (*ce_size + 1); - ce[*ce_size - 1] = cardinality_error; - - if (*et_size >= aqo_stat_size) - for (i = 1; i < aqo_stat_size; ++i) - et[i - 1] = et[i]; - - *et_size = (*et_size >= aqo_stat_size) ? aqo_stat_size : (*et_size + 1); - et[*et_size - 1] = execution_time; - - if (*pt_size >= aqo_stat_size) - for (i = 1; i < aqo_stat_size; ++i) - pt[i - 1] = pt[i]; - - *pt_size = (*pt_size >= aqo_stat_size) ? aqo_stat_size : (*pt_size + 1); - pt[*pt_size - 1] = planning_time; /* Just remember: planning time can be negative. */ - (*n_exec)++; -} - /***************************************************************************** * * QUERY EXECUTION STATISTICS COLLECTING HOOKS @@ -774,12 +723,12 @@ aqo_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, void aqo_ExecutorEnd(QueryDesc *queryDesc) { - double execution_time; - double cardinality_error; - QueryStat *stat = NULL; - instr_time endtime; - EphemeralNamedRelation enr = get_ENR(queryDesc->queryEnv, PlanStateInfo); - LOCKTAG tag; + double execution_time; + double cardinality_error; + StatEntry *stat; + instr_time endtime; + EphemeralNamedRelation enr = get_ENR(queryDesc->queryEnv, PlanStateInfo); + LOCKTAG tag; cardinality_sum_errors = 0.; cardinality_num_objects = 0; @@ -823,73 +772,41 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) list_free(ctx.selectivities); } - if (query_context.collect_stat) - { - if (!aqo_use_file_storage) - stat = get_aqo_stat(query_context.query_hash); - else - stat = aqo_load_stat(query_context.query_hash); - } + /* Calculate execution time. */ + INSTR_TIME_SET_CURRENT(endtime); + INSTR_TIME_SUBTRACT(endtime, query_context.start_execution_time); + execution_time = INSTR_TIME_GET_DOUBLE(endtime); - { - /* Calculate execution time. */ - INSTR_TIME_SET_CURRENT(endtime); - INSTR_TIME_SUBTRACT(endtime, query_context.start_execution_time); - execution_time = INSTR_TIME_GET_DOUBLE(endtime); + if (cardinality_num_objects > 0) + cardinality_error = cardinality_sum_errors / cardinality_num_objects; + else + cardinality_error = -1; - if (cardinality_num_objects > 0) - cardinality_error = cardinality_sum_errors / cardinality_num_objects; - else - cardinality_error = -1; + /* Prevent concurrent updates. */ + init_lock_tag(&tag, query_context.query_hash, query_context.fspace_hash); + LockAcquire(&tag, ExclusiveLock, false, false); - /* Prevent concurrent updates. */ - init_lock_tag(&tag, query_context.query_hash, query_context.fspace_hash); - LockAcquire(&tag, ExclusiveLock, false, false); + if (query_context.collect_stat) + { + /* Write AQO statistics to the aqo_query_stat table */ + stat = aqo_stat_store(query_context.query_hash, + query_context.use_aqo, + query_context.planning_time, execution_time, + cardinality_error); if (stat != NULL) { - /* Calculate AQO statistics. */ - if (query_context.use_aqo) - /* For the case, when query executed with AQO predictions. */ - update_query_stat_row(stat->execution_time_with_aqo, - &stat->execution_time_with_aqo_size, - stat->planning_time_with_aqo, - &stat->planning_time_with_aqo_size, - stat->cardinality_error_with_aqo, - &stat->cardinality_error_with_aqo_size, - query_context.planning_time, - execution_time, - cardinality_error, - &stat->executions_with_aqo); - else - /* For the case, when query executed without AQO predictions. */ - update_query_stat_row(stat->execution_time_without_aqo, - &stat->execution_time_without_aqo_size, - stat->planning_time_without_aqo, - &stat->planning_time_without_aqo_size, - stat->cardinality_error_without_aqo, - &stat->cardinality_error_without_aqo_size, - query_context.planning_time, - execution_time, - cardinality_error, - &stat->executions_without_aqo); - /* Store all learn data into the AQO service relations. */ if (!query_context.adding_query && query_context.auto_tuning) automatical_query_tuning(query_context.query_hash, stat); - /* Write AQO statistics to the aqo_query_stat table */ - if (!aqo_use_file_storage) - update_aqo_stat(query_context.query_hash, stat); - else - aqo_store_stat(query_context.query_hash, stat); - pfree_query_stat(stat); + pfree(stat); } - - /* Allow concurrent queries to update this feature space. */ - LockRelease(&tag, ExclusiveLock, false); } + /* Allow concurrent queries to update this feature space. */ + LockRelease(&tag, ExclusiveLock, false); + selectivity_cache_clear(); cur_classes = ldelete_uint64(cur_classes, query_context.query_hash); diff --git a/sql/clean_aqo_data.sql b/sql/clean_aqo_data.sql index a6c41d5a..39f7e170 100644 --- a/sql/clean_aqo_data.sql +++ b/sql/clean_aqo_data.sql @@ -23,7 +23,7 @@ SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); DROP TABLE a; @@ -45,7 +45,7 @@ SELECT count(*) FROM aqo_query_texts WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); @@ -75,7 +75,7 @@ SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); @@ -85,7 +85,7 @@ SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); DROP TABLE a; @@ -106,7 +106,7 @@ SELECT count(*) FROM aqo_query_texts WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); @@ -120,7 +120,7 @@ SELECT count(*) FROM aqo_query_texts WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); @@ -137,7 +137,7 @@ SELECT count(*) FROM aqo_query_texts WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); diff --git a/sql/forced_stat_collection.sql b/sql/forced_stat_collection.sql index df754536..3b4ce55d 100644 --- a/sql/forced_stat_collection.sql +++ b/sql/forced_stat_collection.sql @@ -30,7 +30,8 @@ SELECT count(*) FROM person WHERE age<18 AND passport IS NOT NULL; SELECT * FROM aqo_data; SELECT learn_aqo,use_aqo,auto_tuning,cardinality_error_without_aqo ce,executions_without_aqo nex -FROM aqo_queries JOIN aqo_query_stat USING (query_hash); +FROM aqo_queries AS aq JOIN aqo_query_stat AS aqs +ON aq.query_hash = aqs.queryid; SELECT query_text FROM aqo_query_texts ORDER BY (md5(query_text)); diff --git a/sql/gucs.sql b/sql/gucs.sql index d87af3c3..9ce9c1a6 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -8,11 +8,13 @@ CREATE TABLE t(x int); INSERT INTO t (x) (SELECT * FROM generate_series(1, 100) AS gs); ANALYZE t; +SELECT * FROM aqo_stat_reset(); -- Remember! DROP EXTENSION doesn't remove any AQO data gathered. -- Check AQO addons to explain (the only stable data) EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT x FROM t; EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT x FROM t; +SET aqo.mode = 'disabled'; -- Check existence of the interface functions. SELECT obj_description('aqo_cardinality_error'::regproc::oid); @@ -20,11 +22,18 @@ SELECT obj_description('aqo_execution_time'::regproc::oid); SELECT obj_description('aqo_drop_class'::regproc::oid); SELECT obj_description('aqo_cleanup'::regproc::oid); SELECT obj_description('aqo_reset_query'::regproc::oid); +SELECT obj_description('aqo_stat_reset'::regproc::oid); \df aqo_cardinality_error \df aqo_execution_time \df aqo_drop_class \df aqo_cleanup \df aqo_reset_query +\df aqo_stat_reset + +-- Check stat reset +SELECT count(*) FROM aqo_query_stat; +SELECT * FROM aqo_stat_reset(); -- Remove one record +SELECT count(*) FROM aqo_query_stat; DROP EXTENSION aqo; diff --git a/sql/plancache.sql b/sql/plancache.sql index ef81de1f..529db2be 100644 --- a/sql/plancache.sql +++ b/sql/plancache.sql @@ -33,7 +33,7 @@ BEGIN RETURN QUERY SELECT executions_without_aqo nnex, executions_with_aqo nex, planning_time_with_aqo pt - FROM aqo_query_stat WHERE query_hash = qhash; + FROM aqo_query_stat WHERE queryid = qhash; END $$ LANGUAGE 'plpgsql'; -- The function shows 6 executions without an AQO support (nnex) and diff --git a/storage.c b/storage.c index 838a1dd6..ea6afa70 100644 --- a/storage.c +++ b/storage.c @@ -17,14 +17,16 @@ #include "postgres.h" -#include "nodes/value.h" -#include "postgres.h" +#include #include "access/heapam.h" #include "access/table.h" #include "access/tableam.h" +#include "miscadmin.h" +#include "pgstat.h" #include "aqo.h" +#include "aqo_shared.h" #include "machine_learning.h" #include "preprocessing.h" #include "learn_cache.h" @@ -36,7 +38,6 @@ HTAB *deactivated_queries = NULL; static ArrayType *form_matrix(double **matrix, int nrows, int ncols); static int deform_matrix(Datum datum, double **matrix); -static ArrayType *form_vector(double *vector, int nrows); static void deform_vector(Datum datum, double *vector, int *nelems); #define FormVectorSz(v_name) (form_vector((v_name), (v_name ## _size))) @@ -730,167 +731,6 @@ update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids) return result; } -/* - * Returns QueryStat for the given query_hash. Returns empty QueryStat if - * no statistics is stored for the given query_hash in table aqo_query_stat. - * Returns NULL and executes disable_aqo_for_query if aqo_query_stat - * is not found. - */ -QueryStat * -get_aqo_stat(uint64 qhash) -{ - Relation hrel; - Relation irel; - TupleTableSlot *slot; - IndexScanDesc scan; - ScanKeyData key; - QueryStat *stat = palloc_query_stat(); - bool shouldFree; - - - if (!open_aqo_relation(NULL, "aqo_query_stat", - "aqo_query_stat_idx", - AccessShareLock, &hrel, &irel)) - return false; - - scan = index_beginscan(hrel, irel, SnapshotSelf, 1, 0); - ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(qhash)); - index_rescan(scan, &key, 1, NULL, 0); - slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); - - if (index_getnext_slot(scan, ForwardScanDirection, slot)) - { - HeapTuple tuple; - Datum values[9]; - bool nulls[9]; - - tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); - Assert(shouldFree != true); - heap_deform_tuple(tuple, hrel->rd_att, values, nulls); - - DeformVectorSz(values[1], stat->execution_time_with_aqo); - DeformVectorSz(values[2], stat->execution_time_without_aqo); - DeformVectorSz(values[3], stat->planning_time_with_aqo); - DeformVectorSz(values[4], stat->planning_time_without_aqo); - DeformVectorSz(values[5], stat->cardinality_error_with_aqo); - DeformVectorSz(values[6], stat->cardinality_error_without_aqo); - - stat->executions_with_aqo = DatumGetInt64(values[7]); - stat->executions_without_aqo = DatumGetInt64(values[8]); - } - - ExecDropSingleTupleTableSlot(slot); - index_endscan(scan); - index_close(irel, AccessShareLock); - table_close(hrel, AccessShareLock); - return stat; -} - -/* - * Saves given QueryStat for the given query_hash. - * Executes disable_aqo_for_query if aqo_query_stat is not found. - */ -void -update_aqo_stat(uint64 qhash, QueryStat *stat) -{ - Relation hrel; - Relation irel; - SnapshotData snap; - TupleTableSlot *slot; - TupleDesc tupDesc; - HeapTuple tuple, - nw_tuple; - Datum values[9]; - bool isnull[9] = { false, false, false, - false, false, false, - false, false, false }; - bool replace[9] = { false, true, true, - true, true, true, - true, true, true }; - bool shouldFree; - bool update_indexes; - IndexScanDesc scan; - ScanKeyData key; - - /* Couldn't allow to write if xact must be read-only. */ - if (XactReadOnly) - return; - - if (!open_aqo_relation(NULL, "aqo_query_stat", - "aqo_query_stat_idx", - RowExclusiveLock, &hrel, &irel)) - return; - - tupDesc = RelationGetDescr(hrel); - - InitDirtySnapshot(snap); - scan = index_beginscan(hrel, irel, &snap, 1, 0); - ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(qhash)); - index_rescan(scan, &key, 1, NULL, 0); - slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); - - /*values[0] will be initialized later */ - values[1] = PointerGetDatum(FormVectorSz(stat->execution_time_with_aqo)); - values[2] = PointerGetDatum(FormVectorSz(stat->execution_time_without_aqo)); - values[3] = PointerGetDatum(FormVectorSz(stat->planning_time_with_aqo)); - values[4] = PointerGetDatum(FormVectorSz(stat->planning_time_without_aqo)); - values[5] = PointerGetDatum(FormVectorSz(stat->cardinality_error_with_aqo)); - values[6] = PointerGetDatum(FormVectorSz(stat->cardinality_error_without_aqo)); - - values[7] = Int64GetDatum(stat->executions_with_aqo); - values[8] = Int64GetDatum(stat->executions_without_aqo); - - if (!index_getnext_slot(scan, ForwardScanDirection, slot)) - { - /* Such signature (hash) doesn't yet exist in the ML knowledge base. */ - values[0] = Int64GetDatum(qhash); - tuple = heap_form_tuple(tupDesc, values, isnull); - simple_heap_insert(hrel, tuple); - my_index_insert(irel, values, isnull, &(tuple->t_self), - hrel, UNIQUE_CHECK_YES); - } - else if (!TransactionIdIsValid(snap.xmin) && !TransactionIdIsValid(snap.xmax)) - { - /* Need to update ML data row and no one backend concurrently doing it. */ - tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); - Assert(shouldFree != true); - values[0] = heap_getattr(tuple, 1, tupDesc, &isnull[0]); - nw_tuple = heap_modify_tuple(tuple, tupDesc, values, isnull, replace); - if (my_simple_heap_update(hrel, &(nw_tuple->t_self), nw_tuple, - &update_indexes)) - { - /* NOTE: insert index tuple iff heap update succeeded! */ - if (update_indexes) - my_index_insert(irel, values, isnull, - &(nw_tuple->t_self), - hrel, UNIQUE_CHECK_YES); - } - else - { - /* - * Ooops, somebody concurrently updated the tuple. It is possible - * only in the case of changes made by third-party code. - */ - elog(ERROR, "AQO statistic data for query signature "UINT64_FORMAT - " concurrently updated by a stranger backend.", - qhash); - } - } - else - { - /* - * Concurrent update was made. To prevent deadlocks refuse to update. - */ - } - - ExecDropSingleTupleTableSlot(slot); - index_endscan(scan); - index_close(irel, RowExclusiveLock); - table_close(hrel, RowExclusiveLock); - - CommandCounterIncrement(); -} - /* * Expands matrix from storage into simple C-array. */ @@ -1108,19 +948,340 @@ add_deactivated_query(uint64 query_hash) /* ***************************************************************************** * - * Implement AQO file storage below + * Implementation of the AQO file storage * **************************************************************************** */ +#define PGAQO_STAT_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_statistics.stat" + bool aqo_use_file_storage; -void -aqo_store_stat(uint64 queryid, QueryStat * stat) +HTAB *stat_htab = NULL; +HTAB *queries_htab = NULL; /* TODO */ +HTAB *data_htab = NULL; /* TODO */ + +/* TODO: think about how to keep query texts. */ + +/* + * Update AQO statistics. + * + * Add a record (and replace old, if all stat slots is full) to stat slot for + * a query class. + * Returns a copy of stat entry, allocated in current memory context. Caller is + * in charge to free this struct after usage. + */ +StatEntry * +aqo_stat_store(uint64 queryid, bool use_aqo, + double plan_time, double exec_time, double est_error) +{ + StatEntry *entry; + bool found; + int pos; + + Assert(stat_htab); + + LWLockAcquire(&aqo_state->stat_lock, LW_EXCLUSIVE); + entry = (StatEntry *) hash_search(stat_htab, &queryid, HASH_ENTER, &found); + + /* Initialize entry on first usage */ + if (!found) + { + uint64 qid = entry->queryid; + memset(entry, 0, sizeof(StatEntry)); + entry->queryid = qid; + } + + /* Update the entry data */ + + if (use_aqo) + { + Assert(entry->cur_stat_slot_aqo >= 0); + pos = entry->cur_stat_slot_aqo; + if (entry->cur_stat_slot_aqo < STAT_SAMPLE_SIZE - 1) + entry->cur_stat_slot_aqo++; + else + { + size_t sz = (STAT_SAMPLE_SIZE - 1) * sizeof(entry->est_error_aqo[0]); + + Assert(entry->cur_stat_slot_aqo = STAT_SAMPLE_SIZE - 1); + memmove(entry->plan_time_aqo, &entry->plan_time_aqo[1], sz); + memmove(entry->exec_time_aqo, &entry->exec_time_aqo[1], sz); + memmove(entry->est_error_aqo, &entry->est_error_aqo[1], sz); + } + + entry->execs_with_aqo++; + entry->plan_time_aqo[pos] = plan_time; + entry->exec_time_aqo[pos] = exec_time; + entry->est_error_aqo[pos] = est_error; + } + else + { + Assert(entry->cur_stat_slot >= 0); + pos = entry->cur_stat_slot; + if (entry->cur_stat_slot < STAT_SAMPLE_SIZE - 1) + entry->cur_stat_slot++; + else + { + size_t sz = (STAT_SAMPLE_SIZE - 1) * sizeof(entry->est_error[0]); + + Assert(entry->cur_stat_slot = STAT_SAMPLE_SIZE - 1); + memmove(entry->plan_time, &entry->plan_time[1], sz); + memmove(entry->exec_time, &entry->exec_time[1], sz); + memmove(entry->est_error, &entry->est_error[1], sz); + } + + entry->execs_without_aqo++; + entry->plan_time[pos] = plan_time; + entry->exec_time[pos] = exec_time; + entry->est_error[pos] = est_error; + } + entry = memcpy(palloc(sizeof(StatEntry)), entry, sizeof(StatEntry)); + LWLockRelease(&aqo_state->stat_lock); + return entry; +} + +#include "funcapi.h" +PG_FUNCTION_INFO_V1(aqo_query_stat); + +typedef enum { + QUERYID = 0, + EXEC_TIME_AQO, + EXEC_TIME, + PLAN_TIME_AQO, + PLAN_TIME, + EST_ERROR_AQO, + EST_ERROR, + NEXECS_AQO, + NEXECS, + TOTAL_NCOLS +} aqo_stat_cols; + +/* + * Returns AQO statistics on controlled query classes. + */ +Datum +aqo_query_stat(PG_FUNCTION_ARGS) { + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupDesc; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Tuplestorestate *tupstore; + Datum values[TOTAL_NCOLS + 1]; + bool nulls[TOTAL_NCOLS + 1]; + HASH_SEQ_STATUS hash_seq; + StatEntry *entry; + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* Switch into long-lived context to construct returned data structures */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + Assert(tupDesc->natts == TOTAL_NCOLS); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupDesc; + + MemoryContextSwitchTo(oldcontext); + + memset(nulls, 0, TOTAL_NCOLS + 1); + LWLockAcquire(&aqo_state->stat_lock, LW_SHARED); + hash_seq_init(&hash_seq, stat_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + values[QUERYID] = Int64GetDatum(entry->queryid); + values[NEXECS] = Int64GetDatum(entry->execs_without_aqo); + values[NEXECS_AQO] = Int64GetDatum(entry->execs_with_aqo); + values[EXEC_TIME_AQO] = PointerGetDatum(form_vector(entry->exec_time_aqo, entry->cur_stat_slot_aqo)); + values[EXEC_TIME] = PointerGetDatum(form_vector(entry->exec_time, entry->cur_stat_slot)); + values[PLAN_TIME_AQO] = PointerGetDatum(form_vector(entry->plan_time_aqo, entry->cur_stat_slot_aqo)); + values[PLAN_TIME] = PointerGetDatum(form_vector(entry->plan_time, entry->cur_stat_slot)); + values[EST_ERROR_AQO] = PointerGetDatum(form_vector(entry->est_error_aqo, entry->cur_stat_slot_aqo)); + values[EST_ERROR] = PointerGetDatum(form_vector(entry->est_error, entry->cur_stat_slot)); + tuplestore_putvalues(tupstore, tupDesc, values, nulls); + } + + LWLockRelease(&aqo_state->stat_lock); + tuplestore_donestoring(tupstore); + return (Datum) 0; } -QueryStat * -aqo_load_stat(uint64 queryid) +PG_FUNCTION_INFO_V1(aqo_stat_reset); + +Datum +aqo_stat_reset(PG_FUNCTION_ARGS) +{ + HASH_SEQ_STATUS hash_seq; + StatEntry *entry; + long num_remove = 0; + long num_entries; + + LWLockAcquire(&aqo_state->stat_lock, LW_EXCLUSIVE); + num_entries = hash_get_num_entries(stat_htab); + hash_seq_init(&hash_seq, stat_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + if (hash_search(stat_htab, &entry->queryid, HASH_REMOVE, NULL) == NULL) + elog(ERROR, "[AQO] hash table corrupted"); + num_remove++; + } + LWLockRelease(&aqo_state->stat_lock); + Assert(num_remove == num_entries); /* Is it really impossible? */ + + /* TODO: clean disk storage */ + + PG_RETURN_INT64(num_remove); +} + +PG_FUNCTION_INFO_V1(aqo_stat_remove); + +Datum +aqo_stat_remove(PG_FUNCTION_ARGS) +{ + uint64 queryid = (uint64) PG_GETARG_INT64(0); + StatEntry *entry; + bool removed; + + LWLockAcquire(&aqo_state->stat_lock, LW_EXCLUSIVE); + entry = (StatEntry *) hash_search(stat_htab, &queryid, HASH_REMOVE, NULL); + removed = (entry) ? true : false; + LWLockRelease(&aqo_state->stat_lock); + PG_RETURN_BOOL(removed); +} + +static const uint32 PGAQO_FILE_HEADER = 123467589; +static const uint32 PGAQO_PG_MAJOR_VERSION = PG_VERSION_NUM / 100; + +/* Implement data flushing according to pgss_shmem_shutdown() */ +void +aqo_stat_flush(void) { - return NULL; + HASH_SEQ_STATUS hash_seq; + StatEntry *entry; + FILE *file; + size_t entry_len = sizeof(StatEntry); + int32 num; + + file = AllocateFile(PGAQO_STAT_FILE ".tmp", PG_BINARY_W); + if (file == NULL) + goto error; + + LWLockAcquire(&aqo_state->stat_lock, LW_SHARED); + if (fwrite(&PGAQO_FILE_HEADER, sizeof(uint32), 1, file) != 1) + goto error; + if (fwrite(&PGAQO_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1) + goto error; + num = hash_get_num_entries(stat_htab); + + if (fwrite(&num, sizeof(int32), 1, file) != 1) + goto error; + + hash_seq_init(&hash_seq, stat_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + if (fwrite(entry, entry_len, 1, file) != 1) + { + hash_seq_term(&hash_seq); + goto error; + } + num--; + } + Assert(num == 0); + + if (FreeFile(file)) + { + file = NULL; + goto error; + } + + unlink(PGAQO_STAT_FILE); + LWLockRelease(&aqo_state->stat_lock); + (void) durable_rename(PGAQO_STAT_FILE ".tmp", PGAQO_STAT_FILE, LOG); + return; + +error: + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not write file \"%s\": %m", + PGAQO_STAT_FILE))); + unlink(PGAQO_STAT_FILE); + + if (file) + FreeFile(file); + LWLockRelease(&aqo_state->stat_lock); } + +void +aqo_stat_load(void) +{ + FILE *file; + int i; + uint32 header; + int32 num; + int32 pgver; + + file = AllocateFile(PGAQO_STAT_FILE, PG_BINARY_R); + if (file == NULL) + { + if (errno != ENOENT) + goto read_error; + return; + } + + if (fread(&header, sizeof(uint32), 1, file) != 1 || + fread(&pgver, sizeof(uint32), 1, file) != 1 || + fread(&num, sizeof(int32), 1, file) != 1) + goto read_error; + + if (header != PGAQO_FILE_HEADER || pgver != PGAQO_PG_MAJOR_VERSION) + goto data_error; + + for (i = 0; i < num; i++) + { + bool found; + StatEntry fentry; + StatEntry *entry; + + if (fread(&fentry, sizeof(StatEntry), 1, file) != 1) + goto read_error; + + entry = (StatEntry *) hash_search(stat_htab, &fentry.queryid, + HASH_ENTER, &found); + Assert(!found); + memcpy(entry, &fentry, sizeof(StatEntry)); + } + + FreeFile(file); + unlink(PGAQO_STAT_FILE); + return; + +read_error: + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", + PGAQO_STAT_FILE))); + goto fail; +data_error: + ereport(LOG, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("ignoring invalid data in file \"%s\"", + PGAQO_STAT_FILE))); +fail: + if (file) + FreeFile(file); + unlink(PGAQO_STAT_FILE); +} \ No newline at end of file diff --git a/storage.h b/storage.h index cd8d0d7e..fe117859 100644 --- a/storage.h +++ b/storage.h @@ -1,10 +1,48 @@ #ifndef STORAGE_H #define STORAGE_H -#include "aqo.h" +#include "utils/array.h" + +#define STAT_SAMPLE_SIZE (20) + +/* + * Storage struct for AQO statistics + * It is mostly needed for auto tuning feature. With auto tuning mode aqo + * analyzes stability of last executions of the query, negative influence of + * strong cardinality estimation on a query execution (planner bug?) and so on. + * It can motivate aqo to suppress machine learning for this query class. + * Also, it can be used for an analytics. + */ +typedef struct StatEntry +{ + uint64 queryid; /* The key in the hash table, should be the first field ever */ + + int64 execs_with_aqo; + int64 execs_without_aqo; + + int cur_stat_slot; + double exec_time[STAT_SAMPLE_SIZE]; + double plan_time[STAT_SAMPLE_SIZE]; + double est_error[STAT_SAMPLE_SIZE]; + + int cur_stat_slot_aqo; + double exec_time_aqo[STAT_SAMPLE_SIZE]; + double plan_time_aqo[STAT_SAMPLE_SIZE]; + double est_error_aqo[STAT_SAMPLE_SIZE]; +} StatEntry; extern bool aqo_use_file_storage; -extern void aqo_store_stat(uint64 queryid, QueryStat * stat); -extern QueryStat *aqo_load_stat(uint64 queryid); +extern HTAB *stat_htab; +extern HTAB *queries_htab; /* TODO */ +extern HTAB *data_htab; /* TODO */ + +extern StatEntry *aqo_stat_store(uint64 queryid, bool use_aqo, double plan_time, + double exec_time, double est_error); +extern void aqo_stat_flush(void); +extern void aqo_stat_load(void); + +/* Utility routines */ +extern ArrayType *form_vector(double *vector, int nrows); + #endif /* STORAGE_H */ diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index 699ba169..3d574351 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -78,6 +78,7 @@ $node->safe_psql('postgres', " ALTER SYSTEM SET aqo.mode = 'disabled'; SELECT pg_reload_conf(); + SELECT * FROM aqo_stat_reset(); -- Remove old data "); $node->command_ok([ 'pgbench', '-t', "$TRANSACTIONS", '-c', "$CLIENTS", '-j', "$THREADS" ], @@ -89,6 +90,7 @@ $fs_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_queries;"); $fs_samples_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_texts;"); $stat_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_stat;"); +note("counter: $fss_count, $fs_count, $fs_samples_count, $stat_count"); is( (($fss_count == 0) and ($fs_count == 1) and ($fs_samples_count == 1) and ($stat_count == 0)), 1); # Check: no problems with stats collection in highly concurrent environment. @@ -127,7 +129,7 @@ }); # Avoid problems with an error fluctuations during the test above. -$node->safe_psql('postgres', "TRUNCATE aqo_query_stat"); +$node->safe_psql('postgres', "SELECT aqo_stat_reset()"); # Look for top of problematic queries. $node->command_ok([ 'pgbench', '-t', "10", '-c', "$CLIENTS", '-j', "$THREADS", @@ -208,7 +210,8 @@ # New queries won't add rows into AQO knowledge base. $node->safe_psql('postgres', "ALTER SYSTEM SET aqo.mode = 'disabled'"); -$node->restart(); +$node->safe_psql('postgres', "SELECT pg_reload_conf()"); +$node->restart(); # AQO data storage should survive after a restart $res = $node->safe_psql('postgres', "SHOW aqo.mode"); is($res, 'disabled'); @@ -326,7 +329,7 @@ $node->safe_psql('postgres', " CREATE EXTENSION aqo; ALTER SYSTEM SET aqo.mode = 'intelligent'; - ALTER SYSTEM SET log_statement = 'all'; + ALTER SYSTEM SET log_statement = 'ddl'; SELECT pg_reload_conf(); "); $node->restart(); diff --git a/utils.c b/utils.c index 3fda40d6..029af9ab 100644 --- a/utils.c +++ b/utils.c @@ -114,46 +114,3 @@ inverse_permutation(int *idx, int n) inv[idx[i]] = i; return inv; } - -/* - * Allocates empty QueryStat object. - */ -QueryStat * -palloc_query_stat(void) -{ - QueryStat *res; - MemoryContext oldCxt; - - oldCxt = MemoryContextSwitchTo(AQOMemoryContext); - res = palloc0(sizeof(QueryStat)); - res->execution_time_with_aqo = palloc0(aqo_stat_size * - sizeof(res->execution_time_with_aqo[0])); - res->execution_time_without_aqo = palloc0(aqo_stat_size * - sizeof(res->execution_time_without_aqo[0])); - res->planning_time_with_aqo = palloc0(aqo_stat_size * - sizeof(res->planning_time_with_aqo[0])); - res->planning_time_without_aqo = palloc0(aqo_stat_size * - sizeof(res->planning_time_without_aqo[0])); - res->cardinality_error_with_aqo = palloc0(aqo_stat_size * - sizeof(res->cardinality_error_with_aqo[0])); - res->cardinality_error_without_aqo = palloc0(aqo_stat_size * - sizeof(res->cardinality_error_without_aqo[0])); - MemoryContextSwitchTo(oldCxt); - - return res; -} - -/* - * Frees QueryStat object. - */ -void -pfree_query_stat(QueryStat * stat) -{ - pfree(stat->execution_time_with_aqo); - pfree(stat->execution_time_without_aqo); - pfree(stat->planning_time_with_aqo); - pfree(stat->planning_time_without_aqo); - pfree(stat->cardinality_error_with_aqo); - pfree(stat->cardinality_error_without_aqo); - pfree(stat); -} From 16ce543cf4f1feacb5efc9be8182c1ea73637d19 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Wed, 8 Jun 2022 13:07:09 +0300 Subject: [PATCH 095/203] File storage for query texts --- aqo--1.4--1.5.sql | 44 +-- aqo.h | 1 - aqo_shared.c | 30 +- aqo_shared.h | 5 + expected/aqo_controlled.out | 7 + expected/aqo_disabled.out | 7 + expected/aqo_forced.out | 7 + expected/aqo_intelligent.out | 7 + expected/aqo_learn.out | 97 ++--- expected/clean_aqo_data.out | 14 +- expected/gucs.out | 34 +- expected/plancache.out | 2 +- expected/relocatable.out | 6 +- expected/temp_tables.out | 6 +- expected/top_queries.out | 6 +- expected/unsupported.out | 4 +- preprocessing.c | 5 +- sql/aqo_controlled.sql | 3 + sql/aqo_disabled.sql | 3 + sql/aqo_forced.sql | 3 + sql/aqo_intelligent.sql | 3 + sql/aqo_learn.sql | 16 +- sql/clean_aqo_data.sql | 14 +- sql/gucs.sql | 8 +- sql/plancache.sql | 2 +- sql/relocatable.sql | 6 +- sql/temp_tables.sql | 4 +- sql/top_queries.sql | 6 +- sql/unsupported.sql | 4 +- storage.c | 615 ++++++++++++++++++++++++-------- storage.h | 18 + t/001_pgbench.pl | 43 +-- t/002_pg_stat_statements_aqo.pl | 10 +- 33 files changed, 709 insertions(+), 331 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index e46938b0..5c73597e 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -28,12 +28,23 @@ CREATE TABLE aqo_queries ( auto_tuning boolean NOT NULL ); -CREATE TABLE aqo_query_texts ( - query_hash bigint CONSTRAINT aqo_query_texts_query_hash_idx PRIMARY KEY REFERENCES aqo_queries ON DELETE CASCADE, - query_text text NOT NULL -); +CREATE FUNCTION aqo_query_texts(OUT queryid bigint, OUT query_text text) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_query_texts' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; +CREATE FUNCTION aqo_qtexts_remove(queryid bigint) RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- Remove all records in the AQO storage. +-- Return number of rows removed. +-- +CREATE FUNCTION aqo_reset() RETURNS bigint +AS 'MODULE_PATHNAME' LANGUAGE C PARALLEL SAFE; +COMMENT ON FUNCTION aqo_reset() IS +'Reset all data gathered by AQO'; -/* Now redefine */ CREATE FUNCTION aqo_query_stat( OUT queryid bigint, OUT execution_time_with_aqo double precision[], @@ -50,17 +61,7 @@ AS 'MODULE_PATHNAME', 'aqo_query_stat' LANGUAGE C STRICT VOLATILE PARALLEL SAFE; CREATE VIEW aqo_query_stat AS SELECT * FROM aqo_query_stat(); - --- --- Remove all records in the AQO statistics. --- Return number of rows removed. --- -CREATE FUNCTION aqo_stat_reset() RETURNS bigint -AS 'MODULE_PATHNAME' -LANGUAGE C PARALLEL SAFE; - -COMMENT ON FUNCTION aqo_stat_reset() IS -'Reset query statistics gathered by AQO'; +CREATE VIEW aqo_query_texts AS SELECT * FROM aqo_query_texts(); CREATE FUNCTION aqo_stat_remove(fs bigint) RETURNS bool AS 'MODULE_PATHNAME' @@ -87,7 +88,7 @@ CREATE TABLE aqo_data ( CREATE UNIQUE INDEX aqo_fss_access_idx ON aqo_data (fspace_hash, fsspace_hash); INSERT INTO aqo_queries VALUES (0, false, false, 0, false); -INSERT INTO aqo_query_texts VALUES (0, 'COMMON feature space (do not delete!)'); +-- INSERT INTO aqo_query_texts VALUES (0, 'COMMON feature space (do not delete!)'); -- a virtual query for COMMON feature space CREATE TRIGGER aqo_queries_invalidate AFTER UPDATE OR DELETE OR TRUNCATE @@ -174,11 +175,9 @@ BEGIN SELECT count(*) FROM aqo_data WHERE fspace_hash = fs INTO num; - /* - * Remove the only from aqo_queries table. All other data will be removed by - * CASCADE deletion. - */ DELETE FROM aqo_queries WHERE query_hash = queryid; + PERFORM aqo_stat_remove(queryid); + PERFORM aqo_qtexts_remove(queryid); RETURN num; END; $$ LANGUAGE plpgsql; @@ -216,7 +215,8 @@ BEGIN -- Remove ALL feature space if one of oids isn't exists DELETE FROM aqo_queries WHERE fspace_hash = fs; - PERFORM * FROM aqo_stat_remove(fs); + PERFORM * FROM aqo_stat_remove(fs); + PERFORM * FROM aqo_qtexts_remove(fs); END LOOP; -- Calculate difference with previous state of knowledge base diff --git a/aqo.h b/aqo.h index a13a1c89..7ff47a2c 100644 --- a/aqo.h +++ b/aqo.h @@ -255,7 +255,6 @@ int get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash); extern bool find_query(uint64 qhash, QueryContextData *ctx); extern bool update_query(uint64 qhash, uint64 fhash, bool learn_aqo, bool use_aqo, bool auto_tuning); -extern bool add_query_text(uint64 query_hash, const char *query_string); extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool isSafe); extern bool load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids, diff --git a/aqo_shared.c b/aqo_shared.c index 260b4cac..8cc7dc39 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -180,40 +180,54 @@ aqo_init_shmem(void) aqo_state = NULL; fss_htab = NULL; stat_htab = NULL; + qtexts_htab = NULL; LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); - aqo_state = ShmemInitStruct("aqo", sizeof(AQOSharedState), &found); + aqo_state = ShmemInitStruct("AQO", sizeof(AQOSharedState), &found); if (!found) { /* First time through ... */ LWLockInitialize(&aqo_state->lock, LWLockNewTrancheId()); aqo_state->dsm_handler = DSM_HANDLE_INVALID; + aqo_state->qtexts_dsa_handler = DSM_HANDLE_INVALID; + aqo_state->qtext_trancheid = LWLockNewTrancheId(); + aqo_state->qtexts_changed = false; LWLockInitialize(&aqo_state->stat_lock, LWLockNewTrancheId()); + LWLockInitialize(&aqo_state->qtexts_lock, LWLockNewTrancheId()); } info.keysize = sizeof(htab_key); info.entrysize = sizeof(htab_entry); - fss_htab = ShmemInitHash("aqo hash", + fss_htab = ShmemInitHash("AQO hash", aqo_htab_max_items, aqo_htab_max_items, &info, HASH_ELEM | HASH_BLOBS); info.keysize = sizeof(((StatEntry *) 0)->queryid); info.entrysize = sizeof(StatEntry); - stat_htab = ShmemInitHash("aqo stat hash", + stat_htab = ShmemInitHash("AQO Stat HTAB", fs_max_items, fs_max_items, - &info, - HASH_ELEM | HASH_BLOBS); + &info, HASH_ELEM | HASH_BLOBS); + + /* Init shared memory table for query texts */ + info.keysize = sizeof(((QueryTextEntry *) 0)->queryid); + info.entrysize = sizeof(QueryTextEntry); + qtexts_htab = ShmemInitHash("AQO Query Texts HTAB", + fs_max_items, fs_max_items, + &info, HASH_ELEM | HASH_BLOBS); LWLockRelease(AddinShmemInitLock); - LWLockRegisterTranche(aqo_state->lock.tranche, "aqo"); - LWLockRegisterTranche(aqo_state->stat_lock.tranche, "aqo stat storage"); + LWLockRegisterTranche(aqo_state->lock.tranche, "AQO"); + LWLockRegisterTranche(aqo_state->stat_lock.tranche, "AQO Stat Lock Tranche"); + LWLockRegisterTranche(aqo_state->qtexts_lock.tranche, "AQO QTexts Lock Tranche"); + LWLockRegisterTranche(aqo_state->qtext_trancheid, "AQO Query Texts Tranche"); + if (!IsUnderPostmaster) { - on_shmem_exit(on_shmem_shutdown, (Datum) 0); + before_shmem_exit(on_shmem_shutdown, (Datum) 0); aqo_stat_load(); } } diff --git a/aqo_shared.h b/aqo_shared.h index 31f5ec28..b2daf082 100644 --- a/aqo_shared.h +++ b/aqo_shared.h @@ -30,6 +30,11 @@ typedef struct AQOSharedState /* Storage fields */ LWLock stat_lock; /* lock for access to stat storage */ + + LWLock qtexts_lock; /* Lock for shared fields below */ + dsa_handle qtexts_dsa_handler; /* DSA area for storing of query texts */ + int qtext_trancheid; + bool qtexts_changed; } AQOSharedState; diff --git a/expected/aqo_controlled.out b/expected/aqo_controlled.out index 11a46395..5f019e83 100644 --- a/expected/aqo_controlled.out +++ b/expected/aqo_controlled.out @@ -297,4 +297,11 @@ DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; DROP INDEX aqo_test2_idx_a; DROP TABLE aqo_test2; +-- XXX: extension dropping doesn't clear file storage. Do it manually. +SELECT aqo_reset(); + aqo_reset +----------- + 22 +(1 row) + DROP EXTENSION aqo; diff --git a/expected/aqo_disabled.out b/expected/aqo_disabled.out index 9ec08977..3438d5b8 100644 --- a/expected/aqo_disabled.out +++ b/expected/aqo_disabled.out @@ -215,6 +215,13 @@ SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be z 0 (1 row) +-- XXX: extension dropping doesn't clear file storage. Do it manually. +SELECT aqo_reset(); + aqo_reset +----------- + 8 +(1 row) + DROP EXTENSION aqo; DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; diff --git a/expected/aqo_forced.out b/expected/aqo_forced.out index 11032f2f..e3d40bfc 100644 --- a/expected/aqo_forced.out +++ b/expected/aqo_forced.out @@ -82,4 +82,11 @@ DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; +-- XXX: extension dropping doesn't clear file storage. Do it manually. +SELECT aqo_reset(); + aqo_reset +----------- + 0 +(1 row) + DROP EXTENSION aqo; diff --git a/expected/aqo_intelligent.out b/expected/aqo_intelligent.out index f3724e2b..739f1ec5 100644 --- a/expected/aqo_intelligent.out +++ b/expected/aqo_intelligent.out @@ -503,4 +503,11 @@ DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; +-- XXX: extension dropping doesn't clear file storage. Do it manually. +SELECT aqo_reset(); + aqo_reset +----------- + 48 +(1 row) + DROP EXTENSION aqo; diff --git a/expected/aqo_learn.out b/expected/aqo_learn.out index 2f0767ce..7aeecb22 100644 --- a/expected/aqo_learn.out +++ b/expected/aqo_learn.out @@ -244,72 +244,36 @@ SELECT aqo_cleanup(); -- Result of the query below should be empty SELECT * FROM aqo_query_texts aqt1, aqo_query_texts aqt2 -WHERE aqt1.query_text = aqt2.query_text AND aqt1.query_hash <> aqt2.query_hash; - query_hash | query_text | query_hash | query_text -------------+------------+------------+------------ +WHERE aqt1.query_text = aqt2.query_text AND aqt1.queryid <> aqt2.queryid; + queryid | query_text | queryid | query_text +---------+------------+---------+------------ (0 rows) -- Fix the state of the AQO data -SELECT reliability,nfeatures,query_text +SELECT min(reliability),sum(nfeatures),query_text FROM aqo_data ad, aqo_query_texts aqt -WHERE aqt.query_hash = ad.fspace_hash -ORDER BY (md5(query_text)) +WHERE aqt.queryid = ad.fspace_hash +GROUP BY (query_text) ORDER BY (md5(query_text)) ; - reliability | nfeatures | query_text --------------+-----------+---------------------------------------------------------------------------------------- - {1} | 1 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test1 AS t1, aqo_test0 AS t2 + - | | WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; - {1} | 5 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test1 AS t1, aqo_test0 AS t2 + - | | WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; - {1} | 4 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test1 AS t1, aqo_test0 AS t2 + - | | WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; - {1,1,1} | 4 | EXPLAIN SELECT * FROM aqo_test0 + - | | WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; - {1} | 0 | SELECT count(*) FROM tmp1; - {1} | 0 | SELECT count(*) FROM tmp1; - {1} | 6 | EXPlAIN SELECT t1.a, t2.b, t3.c + - | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + - | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; - {1} | 1 | EXPlAIN SELECT t1.a, t2.b, t3.c + - | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + - | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; - {1} | 2 | EXPlAIN SELECT t1.a, t2.b, t3.c + - | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + - | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; - {1} | 3 | EXPlAIN SELECT t1.a, t2.b, t3.c + - | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + - | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; - {1} | 2 | EXPlAIN SELECT t1.a, t2.b, t3.c + - | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + - | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; - {1} | 2 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 + - | | WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; - {1} | 2 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 + - | | WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; - {1} | 4 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 + - | | WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; - {1} | 2 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c + - | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + - | | WHERE t1.a = t2.b AND t2.a = t3.b; - {1} | 1 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c + - | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + - | | WHERE t1.a = t2.b AND t2.a = t3.b; - {1} | 0 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c + - | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + - | | WHERE t1.a = t2.b AND t2.a = t3.b; - {1} | 3 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + - | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + - | | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; - {1} | 0 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + - | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + - | | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; - {1} | 1 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + - | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + - | | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; - {1} | 2 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + - | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + - | | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; -(21 rows) + min | sum | query_text +---------+-----+---------------------------------------------------------------------------------------- + {1} | 10 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test1 AS t1, aqo_test0 AS t2 + + | | WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; + {1,1,1} | 4 | EXPLAIN SELECT * FROM aqo_test0 + + | | WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; + {1} | 0 | SELECT count(*) FROM tmp1; + {1} | 14 | EXPlAIN SELECT t1.a, t2.b, t3.c + + | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + + | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; + {1} | 8 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 + + | | WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; + {1} | 3 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c + + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + + | | WHERE t1.a = t2.b AND t2.a = t3.b; + {1} | 6 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + + | | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; +(7 rows) DROP TABLE tmp1; SET aqo.mode = 'controlled'; @@ -568,7 +532,7 @@ SELECT count(*) FROM 1 (1 row) -SELECT query_text FROM aqo_query_texts WHERE query_hash <> 0; -- Check query +SELECT query_text FROM aqo_query_texts WHERE queryid <> 0; -- Check query query_text ---------------------------------------------------------------------------- explain analyze + @@ -736,10 +700,17 @@ SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) A 9 (1 row) +DROP FUNCTION check_estimated_rows; RESET aqo.join_threshold; DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; +-- XXX: extension dropping doesn't clear file storage. Do it manually. +SELECT aqo_reset(); + aqo_reset +----------- + 18 +(1 row) + DROP EXTENSION aqo; -DROP FUNCTION check_estimated_rows; diff --git a/expected/clean_aqo_data.out b/expected/clean_aqo_data.out index 570e8067..43279254 100644 --- a/expected/clean_aqo_data.out +++ b/expected/clean_aqo_data.out @@ -38,7 +38,7 @@ SELECT count(*) FROM aqo_queries WHERE (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- @@ -82,7 +82,7 @@ SELECT count(*) FROM aqo_queries WHERE (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count @@ -152,7 +152,7 @@ SELECT count(*) FROM aqo_queries WHERE (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- @@ -181,7 +181,7 @@ SELECT count(*) FROM aqo_queries WHERE (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); count ------- @@ -224,7 +224,7 @@ SELECT count(*) FROM aqo_queries WHERE (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count @@ -257,7 +257,7 @@ SELECT count(*) FROM aqo_queries WHERE (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count @@ -297,7 +297,7 @@ SELECT count(*) FROM aqo_queries WHERE (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count diff --git a/expected/gucs.out b/expected/gucs.out index b80e9e23..b594cbea 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -5,10 +5,10 @@ SET aqo.show_details = true; CREATE TABLE t(x int); INSERT INTO t (x) (SELECT * FROM generate_series(1, 100) AS gs); ANALYZE t; -SELECT * FROM aqo_stat_reset(); -- Remember! DROP EXTENSION doesn't remove any AQO data gathered. - aqo_stat_reset ----------------- - 61 +SELECT * FROM aqo_reset(); -- Remember! DROP EXTENSION doesn't remove any AQO data gathered. + aqo_reset +----------- + 12 (1 row) -- Check AQO addons to explain (the only stable data) @@ -68,10 +68,10 @@ SELECT obj_description('aqo_reset_query'::regproc::oid); Remove from AQO storage only learning data for given QueryId. (1 row) -SELECT obj_description('aqo_stat_reset'::regproc::oid); - obj_description ----------------------------------------- - Reset query statistics gathered by AQO +SELECT obj_description('aqo_reset'::regproc::oid); + obj_description +-------------------------------- + Reset all data gathered by AQO (1 row) \df aqo_cardinality_error @@ -109,11 +109,11 @@ SELECT obj_description('aqo_stat_reset'::regproc::oid); public | aqo_reset_query | integer | queryid bigint | func (1 row) -\df aqo_stat_reset - List of functions - Schema | Name | Result data type | Argument data types | Type ---------+----------------+------------------+---------------------+------ - public | aqo_stat_reset | bigint | | func +\df aqo_reset + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+-----------+------------------+---------------------+------ + public | aqo_reset | bigint | | func (1 row) -- Check stat reset @@ -123,10 +123,10 @@ SELECT count(*) FROM aqo_query_stat; 1 (1 row) -SELECT * FROM aqo_stat_reset(); -- Remove one record - aqo_stat_reset ----------------- - 1 +SELECT * FROM aqo_reset(); -- Remove one record from texts and one from stat + aqo_reset +----------- + 2 (1 row) SELECT count(*) FROM aqo_query_stat; diff --git a/expected/plancache.out b/expected/plancache.out index 3808bc6c..edcf30e7 100644 --- a/expected/plancache.out +++ b/expected/plancache.out @@ -24,7 +24,7 @@ BEGIN execute 'EXECUTE fooplan(1)'; END LOOP; - SELECT query_hash FROM aqo_query_texts + SELECT queryid FROM aqo_query_texts WHERE query_text LIKE '%count(*) FROM test WHERE x%' INTO qhash; RETURN QUERY SELECT executions_without_aqo nnex, diff --git a/expected/relocatable.out b/expected/relocatable.out index ec9d88b2..d869ca3b 100644 --- a/expected/relocatable.out +++ b/expected/relocatable.out @@ -11,7 +11,7 @@ SELECT count(*) FROM test; (1 row) SELECT query_text,learn_aqo, use_aqo, auto_tuning -FROM aqo_query_texts JOIN aqo_queries USING (query_hash) +FROM aqo_query_texts JOIN aqo_queries ON (queryid = query_hash) ; -- Check result. TODO: use aqo_status() query_text | learn_aqo | use_aqo | auto_tuning ---------------------------------------+-----------+---------+------------- @@ -36,7 +36,7 @@ SELECT count(*) FROM test WHERE id < 10; (1 row) SELECT query_text,learn_aqo, use_aqo, auto_tuning -FROM test.aqo_query_texts JOIN test.aqo_queries USING (query_hash) +FROM test.aqo_query_texts JOIN test.aqo_queries ON (queryid = query_hash) ; -- Check result. TODO: We want to find here both queries executed above query_text | learn_aqo | use_aqo | auto_tuning ---------------------------------------+-----------+---------+------------- @@ -64,7 +64,7 @@ SELECT count(*) FROM test WHERE id < 10; (1 row) SELECT query_text,learn_aqo, use_aqo, auto_tuning -FROM test.aqo_query_texts JOIN test.aqo_queries USING (query_hash) +FROM test.aqo_query_texts JOIN test.aqo_queries ON (queryid = query_hash) ; -- Check result. query_text | learn_aqo | use_aqo | auto_tuning ------------------------------------------+-----------+---------+------------- diff --git a/expected/temp_tables.out b/expected/temp_tables.out index 745aabdb..bd214fd2 100644 --- a/expected/temp_tables.out +++ b/expected/temp_tables.out @@ -73,13 +73,13 @@ SELECT count(*) FROM aqo_data; -- Should be 0 (1 row) SELECT query_text FROM aqo_queries aq LEFT JOIN aqo_query_texts aqt -ON aq.query_hash = aqt.query_hash -; -- TODO: should contain just one row +ON aq.query_hash = aqt.queryid +ORDER BY (md5(query_text)); -- TODO: should contain just one row query_text ------------------------------------------ + SELECT count(*) FROM tt AS t1, tt AS t2; COMMON feature space (do not delete!) SELECT count(*) FROM tt; - SELECT count(*) FROM tt AS t1, tt AS t2; (3 rows) -- Test learning on temporary table diff --git a/expected/top_queries.out b/expected/top_queries.out index 250f1cad..cc5592df 100644 --- a/expected/top_queries.out +++ b/expected/top_queries.out @@ -37,7 +37,7 @@ SELECT num FROM aqo_execution_time(false); -- Without the AQO control queries with and without temp tables are logged. SELECT query_text,nexecs FROM aqo_execution_time(false) ce, aqo_query_texts aqt -WHERE ce.id = aqt.query_hash +WHERE ce.id = aqt.queryid ORDER BY (md5(query_text)); query_text | nexecs ------------------------------------------------------+-------- @@ -69,7 +69,7 @@ SELECT num, to_char(error, '9.99EEEE') FROM aqo_cardinality_error(false) AS te WHERE te.fshash = ( SELECT fspace_hash FROM aqo_queries WHERE aqo_queries.query_hash = ( - SELECT aqo_query_texts.query_hash FROM aqo_query_texts + SELECT aqo_query_texts.queryid FROM aqo_query_texts WHERE query_text = 'SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1;' ) ); @@ -88,7 +88,7 @@ SELECT count(*) FROM aqo_cardinality_error(true); -- Fix list of logged queries SELECT query_text,nexecs FROM aqo_cardinality_error(false) ce, aqo_query_texts aqt -WHERE ce.id = aqt.query_hash +WHERE ce.id = aqt.queryid ORDER BY (md5(query_text)); query_text | nexecs ------------------------------------------------------------------------------------------------+-------- diff --git a/expected/unsupported.out b/expected/unsupported.out index 9596fcc4..243de4dd 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -543,7 +543,7 @@ EXPLAIN (COSTS OFF) -- Live with this variant of the test for some time. SELECT to_char(error, '9.99EEEE')::text AS error, query_text FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt -WHERE aqt.query_hash = cef.id +WHERE aqt.queryid = cef.id ORDER BY (md5(query_text),error) DESC; error | query_text -----------+------------------------------------------------------------------------------------------------ @@ -589,7 +589,7 @@ SELECT aqo_cleanup(); -- Look for any remaining queries in the ML storage. SELECT to_char(error, '9.99EEEE')::text AS error, query_text FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt -WHERE aqt.query_hash = cef.id +WHERE aqt.queryid = cef.id ORDER BY (md5(query_text),error) DESC; error | query_text -------+------------ diff --git a/preprocessing.c b/preprocessing.c index d27db49f..43312e80 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -323,10 +323,9 @@ aqo_planner(Query *parse, /* * Add query text into the ML-knowledge base. Just for further - * analysis. In the case of cached plans we could have NULL query text. + * analysis. In the case of cached plans we may have NULL query text. */ - if (query_string != NULL) - add_query_text(query_context.query_hash, query_string); + aqo_qtext_store(query_context.query_hash, query_string); LockRelease(&tag, ExclusiveLock, false); } diff --git a/sql/aqo_controlled.sql b/sql/aqo_controlled.sql index ed39323b..c337c702 100644 --- a/sql/aqo_controlled.sql +++ b/sql/aqo_controlled.sql @@ -150,4 +150,7 @@ DROP TABLE aqo_test1; DROP INDEX aqo_test2_idx_a; DROP TABLE aqo_test2; +-- XXX: extension dropping doesn't clear file storage. Do it manually. +SELECT aqo_reset(); + DROP EXTENSION aqo; diff --git a/sql/aqo_disabled.sql b/sql/aqo_disabled.sql index 28c074a9..8c8e487c 100644 --- a/sql/aqo_disabled.sql +++ b/sql/aqo_disabled.sql @@ -95,6 +95,9 @@ FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero +-- XXX: extension dropping doesn't clear file storage. Do it manually. +SELECT aqo_reset(); + DROP EXTENSION aqo; DROP INDEX aqo_test0_idx_a; diff --git a/sql/aqo_forced.sql b/sql/aqo_forced.sql index c637beb8..bf64470c 100644 --- a/sql/aqo_forced.sql +++ b/sql/aqo_forced.sql @@ -57,4 +57,7 @@ DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; +-- XXX: extension dropping doesn't clear file storage. Do it manually. +SELECT aqo_reset(); + DROP EXTENSION aqo; diff --git a/sql/aqo_intelligent.sql b/sql/aqo_intelligent.sql index 8c560e3e..028ce936 100644 --- a/sql/aqo_intelligent.sql +++ b/sql/aqo_intelligent.sql @@ -209,4 +209,7 @@ DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; +-- XXX: extension dropping doesn't clear file storage. Do it manually. +SELECT aqo_reset(); + DROP EXTENSION aqo; diff --git a/sql/aqo_learn.sql b/sql/aqo_learn.sql index ad06fafb..b7dcfea5 100644 --- a/sql/aqo_learn.sql +++ b/sql/aqo_learn.sql @@ -128,13 +128,13 @@ SELECT aqo_cleanup(); -- Result of the query below should be empty SELECT * FROM aqo_query_texts aqt1, aqo_query_texts aqt2 -WHERE aqt1.query_text = aqt2.query_text AND aqt1.query_hash <> aqt2.query_hash; +WHERE aqt1.query_text = aqt2.query_text AND aqt1.queryid <> aqt2.queryid; -- Fix the state of the AQO data -SELECT reliability,nfeatures,query_text +SELECT min(reliability),sum(nfeatures),query_text FROM aqo_data ad, aqo_query_texts aqt -WHERE aqt.query_hash = ad.fspace_hash -ORDER BY (md5(query_text)) +WHERE aqt.queryid = ad.fspace_hash +GROUP BY (query_text) ORDER BY (md5(query_text)) ; DROP TABLE tmp1; @@ -233,7 +233,7 @@ SELECT * FROM check_estimated_rows(' SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 ; -SELECT query_text FROM aqo_query_texts WHERE query_hash <> 0; -- Check query +SELECT query_text FROM aqo_query_texts WHERE queryid <> 0; -- Check query SET aqo.join_threshold = 1; SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); @@ -301,10 +301,14 @@ SELECT * FROM check_estimated_rows(' ; -- One JOIN extracted from CTE, another - from a FROM part of the query SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 +DROP FUNCTION check_estimated_rows; RESET aqo.join_threshold; DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; + +-- XXX: extension dropping doesn't clear file storage. Do it manually. +SELECT aqo_reset(); + DROP EXTENSION aqo; -DROP FUNCTION check_estimated_rows; diff --git a/sql/clean_aqo_data.sql b/sql/clean_aqo_data.sql index 39f7e170..6ecf92ea 100644 --- a/sql/clean_aqo_data.sql +++ b/sql/clean_aqo_data.sql @@ -20,7 +20,7 @@ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE @@ -41,7 +41,7 @@ SELECT count(*) FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE @@ -72,7 +72,7 @@ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE @@ -82,7 +82,7 @@ SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE @@ -102,7 +102,7 @@ SELECT count(*) FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE @@ -116,7 +116,7 @@ SELECT count(*) FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE @@ -133,7 +133,7 @@ SELECT count(*) FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE diff --git a/sql/gucs.sql b/sql/gucs.sql index 9ce9c1a6..fe2c4d17 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -8,7 +8,7 @@ CREATE TABLE t(x int); INSERT INTO t (x) (SELECT * FROM generate_series(1, 100) AS gs); ANALYZE t; -SELECT * FROM aqo_stat_reset(); -- Remember! DROP EXTENSION doesn't remove any AQO data gathered. +SELECT * FROM aqo_reset(); -- Remember! DROP EXTENSION doesn't remove any AQO data gathered. -- Check AQO addons to explain (the only stable data) EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT x FROM t; @@ -22,18 +22,18 @@ SELECT obj_description('aqo_execution_time'::regproc::oid); SELECT obj_description('aqo_drop_class'::regproc::oid); SELECT obj_description('aqo_cleanup'::regproc::oid); SELECT obj_description('aqo_reset_query'::regproc::oid); -SELECT obj_description('aqo_stat_reset'::regproc::oid); +SELECT obj_description('aqo_reset'::regproc::oid); \df aqo_cardinality_error \df aqo_execution_time \df aqo_drop_class \df aqo_cleanup \df aqo_reset_query -\df aqo_stat_reset +\df aqo_reset -- Check stat reset SELECT count(*) FROM aqo_query_stat; -SELECT * FROM aqo_stat_reset(); -- Remove one record +SELECT * FROM aqo_reset(); -- Remove one record from texts and one from stat SELECT count(*) FROM aqo_query_stat; DROP EXTENSION aqo; diff --git a/sql/plancache.sql b/sql/plancache.sql index 529db2be..3b074b90 100644 --- a/sql/plancache.sql +++ b/sql/plancache.sql @@ -27,7 +27,7 @@ BEGIN execute 'EXECUTE fooplan(1)'; END LOOP; - SELECT query_hash FROM aqo_query_texts + SELECT queryid FROM aqo_query_texts WHERE query_text LIKE '%count(*) FROM test WHERE x%' INTO qhash; RETURN QUERY SELECT executions_without_aqo nnex, diff --git a/sql/relocatable.sql b/sql/relocatable.sql index 64a29808..cfc76333 100644 --- a/sql/relocatable.sql +++ b/sql/relocatable.sql @@ -8,7 +8,7 @@ ANALYZE test; -- Learn on a query SELECT count(*) FROM test; SELECT query_text,learn_aqo, use_aqo, auto_tuning -FROM aqo_query_texts JOIN aqo_queries USING (query_hash) +FROM aqo_query_texts JOIN aqo_queries ON (queryid = query_hash) ; -- Check result. TODO: use aqo_status() -- Create a schema and move AQO into it. @@ -20,7 +20,7 @@ SELECT count(*) FROM test; SELECT count(*) FROM test WHERE id < 10; SELECT query_text,learn_aqo, use_aqo, auto_tuning -FROM test.aqo_query_texts JOIN test.aqo_queries USING (query_hash) +FROM test.aqo_query_texts JOIN test.aqo_queries ON (queryid = query_hash) ; -- Check result. TODO: We want to find here both queries executed above -- Add schema which contains AQO to the end of search_path @@ -30,7 +30,7 @@ SELECT count(*) FROM test; SELECT count(*) FROM test WHERE id < 10; SELECT query_text,learn_aqo, use_aqo, auto_tuning -FROM test.aqo_query_texts JOIN test.aqo_queries USING (query_hash) +FROM test.aqo_query_texts JOIN test.aqo_queries ON (queryid = query_hash) ; -- Check result. /* diff --git a/sql/temp_tables.sql b/sql/temp_tables.sql index ab594e40..04db87a1 100644 --- a/sql/temp_tables.sql +++ b/sql/temp_tables.sql @@ -23,8 +23,8 @@ DROP TABLE pt; SELECT aqo_cleanup(); SELECT count(*) FROM aqo_data; -- Should be 0 SELECT query_text FROM aqo_queries aq LEFT JOIN aqo_query_texts aqt -ON aq.query_hash = aqt.query_hash -; -- TODO: should contain just one row +ON aq.query_hash = aqt.queryid +ORDER BY (md5(query_text)); -- TODO: should contain just one row -- Test learning on temporary table CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; diff --git a/sql/top_queries.sql b/sql/top_queries.sql index f7b4fb59..98b27846 100755 --- a/sql/top_queries.sql +++ b/sql/top_queries.sql @@ -19,7 +19,7 @@ SELECT num FROM aqo_execution_time(false); -- Without the AQO control queries with and without temp tables are logged. SELECT query_text,nexecs FROM aqo_execution_time(false) ce, aqo_query_texts aqt -WHERE ce.id = aqt.query_hash +WHERE ce.id = aqt.queryid ORDER BY (md5(query_text)); -- @@ -37,7 +37,7 @@ SELECT num, to_char(error, '9.99EEEE') FROM aqo_cardinality_error(false) AS te WHERE te.fshash = ( SELECT fspace_hash FROM aqo_queries WHERE aqo_queries.query_hash = ( - SELECT aqo_query_texts.query_hash FROM aqo_query_texts + SELECT aqo_query_texts.queryid FROM aqo_query_texts WHERE query_text = 'SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1;' ) ); @@ -48,7 +48,7 @@ SELECT count(*) FROM aqo_cardinality_error(true); -- Fix list of logged queries SELECT query_text,nexecs FROM aqo_cardinality_error(false) ce, aqo_query_texts aqt -WHERE ce.id = aqt.query_hash +WHERE ce.id = aqt.queryid ORDER BY (md5(query_text)); DROP EXTENSION aqo CASCADE; diff --git a/sql/unsupported.sql b/sql/unsupported.sql index 127ae18b..04970481 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -159,7 +159,7 @@ EXPLAIN (COSTS OFF) -- Live with this variant of the test for some time. SELECT to_char(error, '9.99EEEE')::text AS error, query_text FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt -WHERE aqt.query_hash = cef.id +WHERE aqt.queryid = cef.id ORDER BY (md5(query_text),error) DESC; DROP TABLE t,t1 CASCADE; @@ -169,7 +169,7 @@ SELECT aqo_cleanup(); -- Look for any remaining queries in the ML storage. SELECT to_char(error, '9.99EEEE')::text AS error, query_text FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt -WHERE aqt.query_hash = cef.id +WHERE aqt.queryid = cef.id ORDER BY (md5(query_text),error) DESC; DROP EXTENSION aqo; diff --git a/storage.c b/storage.c index ea6afa70..5c0ad760 100644 --- a/storage.c +++ b/storage.c @@ -23,7 +23,6 @@ #include "access/table.h" #include "access/tableam.h" #include "miscadmin.h" -#include "pgstat.h" #include "aqo.h" #include "aqo_shared.h" @@ -264,68 +263,6 @@ update_query(uint64 qhash, uint64 fhash, return result; } -/* - * Creates entry for new query in aqo_query_texts table with given fields. - * Returns false if the operation failed, true otherwise. - */ -bool -add_query_text(uint64 qhash, const char *query_string) -{ - Relation hrel; - Relation irel; - HeapTuple tuple; - Datum values[2]; - bool isnull[2] = {false, false}; - - /* Variables for checking of concurrent writings. */ - TupleTableSlot *slot; - IndexScanDesc scan; - ScanKeyData key; - SnapshotData snap; - - values[0] = Int64GetDatum(qhash); - values[1] = CStringGetTextDatum(query_string); - - /* Couldn't allow to write if xact must be read-only. */ - if (XactReadOnly) - return false; - - if (!open_aqo_relation(NULL, "aqo_query_texts", - "aqo_query_texts_query_hash_idx", - RowExclusiveLock, &hrel, &irel)) - return false; - - tuple = heap_form_tuple(RelationGetDescr(hrel), values, isnull); - - /* - * Start an index scan. Use dirty snapshot to check concurrent updates that - * can be made before, but still not visible. - */ - InitDirtySnapshot(snap); - scan = index_beginscan(hrel, irel, &snap, 1, 0); - ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(qhash)); - - index_rescan(scan, &key, 1, NULL, 0); - slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); - - if (!index_getnext_slot(scan, ForwardScanDirection, slot)) - { - tuple = heap_form_tuple(RelationGetDescr(hrel), values, isnull); - - simple_heap_insert(hrel, tuple); - my_index_insert(irel, values, isnull, &(tuple->t_self), hrel, - UNIQUE_CHECK_YES); - } - - ExecDropSingleTupleTableSlot(slot); - index_endscan(scan); - index_close(irel, RowExclusiveLock); - table_close(hrel, RowExclusiveLock); - - CommandCounterIncrement(); - return true; -} - /* static ArrayType * form_strings_vector(List *reloids) @@ -952,7 +889,41 @@ add_deactivated_query(uint64 query_hash) * **************************************************************************** */ +#include "funcapi.h" +#include "pgstat.h" + #define PGAQO_STAT_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_statistics.stat" +#define PGAQO_TEXT_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_query_texts.stat" + +PG_FUNCTION_INFO_V1(aqo_query_stat); +//PG_FUNCTION_INFO_V1(aqo_stat_reset); // ? +PG_FUNCTION_INFO_V1(aqo_query_texts); +PG_FUNCTION_INFO_V1(aqo_stat_remove); +PG_FUNCTION_INFO_V1(aqo_qtexts_remove); +//PG_FUNCTION_INFO_V1(aqo_qtexts_reset); // ? +PG_FUNCTION_INFO_V1(aqo_reset); + +typedef enum { + QUERYID = 0, + EXEC_TIME_AQO, + EXEC_TIME, + PLAN_TIME_AQO, + PLAN_TIME, + EST_ERROR_AQO, + EST_ERROR, + NEXECS_AQO, + NEXECS, + TOTAL_NCOLS +} aqo_stat_cols; + +typedef enum { + QT_QUERYID = 0, + QT_QUERY_STRING, + QT_TOTAL_NCOLS +} aqo_qtexts_cols; + +typedef void* (*form_record_t) (void *ctx, size_t *size); +typedef void (*deform_record_t) (void *data, size_t size); bool aqo_use_file_storage; @@ -960,8 +931,19 @@ HTAB *stat_htab = NULL; HTAB *queries_htab = NULL; /* TODO */ HTAB *data_htab = NULL; /* TODO */ +HTAB *qtexts_htab = NULL; +dsa_area *qtext_dsa = NULL; /* TODO: think about how to keep query texts. */ +/* Used to check data file consistency */ +static const uint32 PGAQO_FILE_HEADER = 123467589; +static const uint32 PGAQO_PG_MAJOR_VERSION = PG_VERSION_NUM / 100; + +static void dsa_init(void); +static int data_store(const char *filename, form_record_t callback, + long nrecs, void *ctx); +static void data_load(const char *filename, deform_record_t callback, void *ctx); + /* * Update AQO statistics. * @@ -1040,22 +1022,6 @@ aqo_stat_store(uint64 queryid, bool use_aqo, return entry; } -#include "funcapi.h" -PG_FUNCTION_INFO_V1(aqo_query_stat); - -typedef enum { - QUERYID = 0, - EXEC_TIME_AQO, - EXEC_TIME, - PLAN_TIME_AQO, - PLAN_TIME, - EST_ERROR_AQO, - EST_ERROR, - NEXECS_AQO, - NEXECS, - TOTAL_NCOLS -} aqo_stat_cols; - /* * Returns AQO statistics on controlled query classes. */ @@ -1120,13 +1086,11 @@ aqo_query_stat(PG_FUNCTION_ARGS) return (Datum) 0; } -PG_FUNCTION_INFO_V1(aqo_stat_reset); - -Datum -aqo_stat_reset(PG_FUNCTION_ARGS) +static long +aqo_stat_reset(void) { HASH_SEQ_STATUS hash_seq; - StatEntry *entry; + StatEntry *entry; long num_remove = 0; long num_entries; @@ -1142,19 +1106,17 @@ aqo_stat_reset(PG_FUNCTION_ARGS) LWLockRelease(&aqo_state->stat_lock); Assert(num_remove == num_entries); /* Is it really impossible? */ - /* TODO: clean disk storage */ + aqo_stat_flush(); - PG_RETURN_INT64(num_remove); + return num_remove; } -PG_FUNCTION_INFO_V1(aqo_stat_remove); - Datum aqo_stat_remove(PG_FUNCTION_ARGS) { - uint64 queryid = (uint64) PG_GETARG_INT64(0); - StatEntry *entry; - bool removed; + uint64 queryid = (uint64) PG_GETARG_INT64(0); + StatEntry *entry; + bool removed; LWLockAcquire(&aqo_state->stat_lock, LW_EXCLUSIVE); entry = (StatEntry *) hash_search(stat_htab, &queryid, HASH_REMOVE, NULL); @@ -1163,78 +1125,234 @@ aqo_stat_remove(PG_FUNCTION_ARGS) PG_RETURN_BOOL(removed); } -static const uint32 PGAQO_FILE_HEADER = 123467589; -static const uint32 PGAQO_PG_MAJOR_VERSION = PG_VERSION_NUM / 100; +static void * +_form_stat_record_cb(void *ctx, size_t *size) +{ + HASH_SEQ_STATUS *hash_seq = (HASH_SEQ_STATUS *) ctx; + StatEntry *entry; + + *size = sizeof(StatEntry); + entry = hash_seq_search(hash_seq); + if (entry == NULL) + return NULL; + + return memcpy(palloc(*size), entry, *size); +} /* Implement data flushing according to pgss_shmem_shutdown() */ + void aqo_stat_flush(void) { HASH_SEQ_STATUS hash_seq; - StatEntry *entry; - FILE *file; - size_t entry_len = sizeof(StatEntry); - int32 num; + int ret; + long entries; - file = AllocateFile(PGAQO_STAT_FILE ".tmp", PG_BINARY_W); + LWLockAcquire(&aqo_state->stat_lock, LW_SHARED); + entries = hash_get_num_entries(stat_htab); + hash_seq_init(&hash_seq, stat_htab); + ret = data_store(PGAQO_STAT_FILE, _form_stat_record_cb, entries, + (void *) &hash_seq); + if (ret != 0) + hash_seq_term(&hash_seq); + + LWLockRelease(&aqo_state->stat_lock); +} + +static void * +_form_qtext_record_cb(void *ctx, size_t *size) +{ + HASH_SEQ_STATUS *hash_seq = (HASH_SEQ_STATUS *) ctx; + QueryTextEntry *entry; + void *data; + char *query_string; + char *ptr; + + entry = hash_seq_search(hash_seq); + if (entry == NULL) + return NULL; + + Assert(DsaPointerIsValid(entry->qtext_dp)); + query_string = dsa_get_address(qtext_dsa, entry->qtext_dp); + *size = sizeof(entry->queryid) + strlen(query_string) + 1; + data = palloc(*size); + ptr = data; + memcpy(ptr, &entry->queryid, sizeof(entry->queryid)); + ptr += sizeof(entry->queryid); + memcpy(ptr, query_string, strlen(query_string) + 1); + return memcpy(palloc(*size), data, *size); +} + +void +aqo_qtexts_flush(void) +{ + HASH_SEQ_STATUS hash_seq; + int ret; + long entries; + + dsa_init(); + LWLockAcquire(&aqo_state->qtexts_lock, LW_SHARED); + + if (!aqo_state->qtexts_changed) + /* XXX: mull over forced mode. */ + goto end; + + entries = hash_get_num_entries(qtexts_htab); + hash_seq_init(&hash_seq, qtexts_htab); + ret = data_store(PGAQO_TEXT_FILE, _form_qtext_record_cb, entries, + (void *) &hash_seq); + if (ret != 0) + hash_seq_term(&hash_seq); + aqo_state->qtexts_changed = false; + +end: + LWLockRelease(&aqo_state->qtexts_lock); +} + +static int +data_store(const char *filename, form_record_t callback, + long nrecs, void *ctx) +{ + FILE *file; + size_t size; + uint counter = 0; + void *data; + char *tmpfile; + + tmpfile = psprintf("%s.tmp", filename); + file = AllocateFile(tmpfile, PG_BINARY_W); if (file == NULL) goto error; - LWLockAcquire(&aqo_state->stat_lock, LW_SHARED); - if (fwrite(&PGAQO_FILE_HEADER, sizeof(uint32), 1, file) != 1) - goto error; - if (fwrite(&PGAQO_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1) - goto error; - num = hash_get_num_entries(stat_htab); - - if (fwrite(&num, sizeof(int32), 1, file) != 1) + if (fwrite(&PGAQO_FILE_HEADER, sizeof(uint32), 1, file) != 1 || + fwrite(&PGAQO_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1 || + fwrite(&nrecs, sizeof(long), 1, file) != 1) goto error; - hash_seq_init(&hash_seq, stat_htab); - while ((entry = hash_seq_search(&hash_seq)) != NULL) + while ((data = callback(ctx, &size)) != NULL) { - if (fwrite(entry, entry_len, 1, file) != 1) - { - hash_seq_term(&hash_seq); + /* TODO: Add CRC code ? */ + if (fwrite(&size, sizeof(size), 1, file) != 1 || + fwrite(data, size, 1, file) != 1) goto error; - } - num--; + pfree(data); + counter++; } - Assert(num == 0); + Assert(counter == nrecs); if (FreeFile(file)) { file = NULL; goto error; } - unlink(PGAQO_STAT_FILE); - LWLockRelease(&aqo_state->stat_lock); - (void) durable_rename(PGAQO_STAT_FILE ".tmp", PGAQO_STAT_FILE, LOG); - return; + (void) durable_rename(tmpfile, filename, LOG); + pfree(tmpfile); + elog(DEBUG2, "[AQO] %d records stored in file %s.", counter, filename); + return 0; error: ereport(LOG, (errcode_for_file_access(), - errmsg("could not write file \"%s\": %m", - PGAQO_STAT_FILE))); - unlink(PGAQO_STAT_FILE); + errmsg("could not write file \"%s\": %m", tmpfile))); if (file) FreeFile(file); - LWLockRelease(&aqo_state->stat_lock); + unlink(tmpfile); + pfree(tmpfile); + return -1; +} + +static void +_deform_stat_record_cb(void *data, size_t size) +{ + bool found; + StatEntry *entry; + uint64 queryid; + + Assert(LWLockHeldByMeInMode(&aqo_state->stat_lock, LW_EXCLUSIVE)); + Assert(size == sizeof(StatEntry)); + + queryid = ((StatEntry *) data)->queryid; + entry = (StatEntry *) hash_search(stat_htab, &queryid, HASH_ENTER, &found); + Assert(!found); + memcpy(entry, data, sizeof(StatEntry)); } void aqo_stat_load(void) +{ + long entries; + + Assert(!LWLockHeldByMe(&aqo_state->stat_lock)); + + LWLockAcquire(&aqo_state->stat_lock, LW_EXCLUSIVE); + entries = hash_get_num_entries(stat_htab); + Assert(entries == 0); + data_load(PGAQO_STAT_FILE, _deform_stat_record_cb, NULL); + + LWLockRelease(&aqo_state->stat_lock); +} + + +static void +_deform_qtexts_record_cb(void *data, size_t size) +{ + bool found; + QueryTextEntry *entry; + uint64 queryid = *(uint64 *) data; + char *query_string = (char *) data + sizeof(queryid); + size_t len = size - sizeof(queryid); + char *strptr; + + Assert(LWLockHeldByMeInMode(&aqo_state->qtexts_lock, LW_EXCLUSIVE)); + Assert(strlen(query_string) + 1 == len); + entry = (QueryTextEntry *) hash_search(qtexts_htab, &queryid, + HASH_ENTER, &found); + Assert(!found); + + entry->qtext_dp = dsa_allocate(qtext_dsa, len); + Assert(DsaPointerIsValid(entry->qtext_dp)); + strptr = (char *) dsa_get_address(qtext_dsa, entry->qtext_dp); + strlcpy(strptr, query_string, len); +} + +void +aqo_qtexts_load(void) +{ + uint64 queryid = 0; + bool found; + + Assert(!LWLockHeldByMe(&aqo_state->qtexts_lock)); + Assert(qtext_dsa != NULL); + + LWLockAcquire(&aqo_state->qtexts_lock, LW_EXCLUSIVE); + Assert(hash_get_num_entries(qtexts_htab) == 0); + data_load(PGAQO_TEXT_FILE, _deform_qtexts_record_cb, NULL); + + /* Check existence of default feature space */ + (void) hash_search(qtexts_htab, &queryid, HASH_FIND, &found); + + aqo_state->qtexts_changed = false; /* mem data consistent with disk */ + LWLockRelease(&aqo_state->qtexts_lock); + + if (!found) + { + if (!aqo_qtext_store(0, "COMMON feature space (do not delete!)")) + elog(PANIC, "[AQO] DSA Initialization was unsuccessful"); + } +} + +static void +data_load(const char *filename, deform_record_t callback, void *ctx) { FILE *file; - int i; + long i; uint32 header; - int32 num; int32 pgver; + long num; - file = AllocateFile(PGAQO_STAT_FILE, PG_BINARY_R); + file = AllocateFile(filename, PG_BINARY_R); if (file == NULL) { if (errno != ENOENT) @@ -1244,7 +1362,7 @@ aqo_stat_load(void) if (fread(&header, sizeof(uint32), 1, file) != 1 || fread(&pgver, sizeof(uint32), 1, file) != 1 || - fread(&num, sizeof(int32), 1, file) != 1) + fread(&num, sizeof(long), 1, file) != 1) goto read_error; if (header != PGAQO_FILE_HEADER || pgver != PGAQO_PG_MAJOR_VERSION) @@ -1252,36 +1370,249 @@ aqo_stat_load(void) for (i = 0; i < num; i++) { - bool found; - StatEntry fentry; - StatEntry *entry; + void *data; + size_t size; - if (fread(&fentry, sizeof(StatEntry), 1, file) != 1) + if (fread(&size, sizeof(size), 1, file) != 1) goto read_error; - - entry = (StatEntry *) hash_search(stat_htab, &fentry.queryid, - HASH_ENTER, &found); - Assert(!found); - memcpy(entry, &fentry, sizeof(StatEntry)); + data = palloc(size); + if (fread(data, size, 1, file) != 1) + goto read_error; + callback(data, size); + pfree(data); } FreeFile(file); - unlink(PGAQO_STAT_FILE); + unlink(filename); + + elog(DEBUG2, "[AQO] %ld records loaded from file %s.", num, filename); return; read_error: ereport(LOG, (errcode_for_file_access(), - errmsg("could not read file \"%s\": %m", - PGAQO_STAT_FILE))); + errmsg("could not read file \"%s\": %m", filename))); goto fail; data_error: ereport(LOG, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("ignoring invalid data in file \"%s\"", - PGAQO_STAT_FILE))); + errmsg("ignoring invalid data in file \"%s\"", filename))); fail: if (file) FreeFile(file); - unlink(PGAQO_STAT_FILE); -} \ No newline at end of file + unlink(filename); +} + +static void +on_shmem_shutdown(int code, Datum arg) +{ + aqo_qtexts_flush(); +} + +/* + * Initialize DSA memory for AQO shared data with variable length. + * On first call, create DSA segments and load data into hash table and DSA + * from disk. + */ +static void +dsa_init() +{ + MemoryContext old_context; + + if (qtext_dsa) + return; + + old_context = MemoryContextSwitchTo(TopMemoryContext); + LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); + + if (aqo_state->qtexts_dsa_handler == DSM_HANDLE_INVALID) + { + qtext_dsa = dsa_create(aqo_state->qtext_trancheid); + dsa_pin(qtext_dsa); + aqo_state->qtexts_dsa_handler = dsa_get_handle(qtext_dsa); + + /* Load and initialize quuery texts hash table */ + aqo_qtexts_load(); + } + else + qtext_dsa = dsa_attach(aqo_state->qtexts_dsa_handler); + + dsa_pin_mapping(qtext_dsa); + MemoryContextSwitchTo(old_context); + LWLockRelease(&aqo_state->lock); + + before_shmem_exit(on_shmem_shutdown, (Datum) 0); +} + +/* ************************************************************************** */ + +/* + * XXX: Maybe merge with aqo_queries ? + */ +bool +aqo_qtext_store(uint64 queryid, const char *query_string) +{ + QueryTextEntry *entry; + bool found; + + Assert(!LWLockHeldByMe(&aqo_state->qtexts_lock)); + + if (query_string == NULL) + return false; + + dsa_init(); + + LWLockAcquire(&aqo_state->qtexts_lock, LW_EXCLUSIVE); + entry = (QueryTextEntry *) hash_search(qtexts_htab, &queryid, HASH_ENTER, + &found); + + /* Initialize entry on first usage */ + if (!found) + { + size_t size = strlen(query_string) + 1; + char *strptr; + + entry->queryid = queryid; + entry->qtext_dp = dsa_allocate(qtext_dsa, size); + Assert(DsaPointerIsValid(entry->qtext_dp)); + strptr = (char *) dsa_get_address(qtext_dsa, entry->qtext_dp); + strlcpy(strptr, query_string, size); + aqo_state->qtexts_changed = true; + } + LWLockRelease(&aqo_state->qtexts_lock); + return !found; +} + +Datum +aqo_query_texts(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupDesc; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Tuplestorestate *tupstore; + Datum values[QT_TOTAL_NCOLS]; + bool nulls[QT_TOTAL_NCOLS]; + HASH_SEQ_STATUS hash_seq; + QueryTextEntry *entry; + + Assert(!LWLockHeldByMe(&aqo_state->qtexts_lock)); + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* Switch into long-lived context to construct returned data structures */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + Assert(tupDesc->natts == QT_TOTAL_NCOLS); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupDesc; + + MemoryContextSwitchTo(oldcontext); + + dsa_init(); + memset(nulls, 0, QT_TOTAL_NCOLS); + LWLockAcquire(&aqo_state->qtexts_lock, LW_SHARED); + hash_seq_init(&hash_seq, qtexts_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + Assert(DsaPointerIsValid(entry->qtext_dp)); + char *ptr = dsa_get_address(qtext_dsa, entry->qtext_dp); + values[QT_QUERYID] = Int64GetDatum(entry->queryid); + values[QT_QUERY_STRING] = CStringGetTextDatum(ptr); + tuplestore_putvalues(tupstore, tupDesc, values, nulls); + } + + LWLockRelease(&aqo_state->qtexts_lock); + tuplestore_donestoring(tupstore); + return (Datum) 0; +} + +Datum +aqo_qtexts_remove(PG_FUNCTION_ARGS) +{ + uint64 queryid = (uint64) PG_GETARG_INT64(0); + bool found = false; + QueryTextEntry *entry; + + dsa_init(); + + Assert(!LWLockHeldByMe(&aqo_state->qtexts_lock)); + LWLockAcquire(&aqo_state->qtexts_lock, LW_EXCLUSIVE); + + /* + * Look for a record with this queryid. DSA fields must be freed before + * deletion of the record. + */ + entry = (QueryTextEntry *) hash_search(qtexts_htab, &queryid, HASH_FIND, &found); + if (!found) + goto end; + + /* Free DSA memory, allocated foro this record */ + Assert(DsaPointerIsValid(entry->qtext_dp)); + dsa_free(qtext_dsa, entry->qtext_dp); + + (void) hash_search(qtexts_htab, &queryid, HASH_REMOVE, &found); + Assert(found); +end: + LWLockRelease(&aqo_state->qtexts_lock); + PG_RETURN_BOOL(found); +} + +static long +aqo_qtexts_reset(void) +{ + HASH_SEQ_STATUS hash_seq; + QueryTextEntry *entry; + long num_remove = 0; + long num_entries; + + dsa_init(); + + Assert(!LWLockHeldByMe(&aqo_state->qtexts_lock)); + LWLockAcquire(&aqo_state->qtexts_lock, LW_EXCLUSIVE); + num_entries = hash_get_num_entries(qtexts_htab); + hash_seq_init(&hash_seq, qtexts_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + if (entry->queryid == 0) + continue; + + Assert(DsaPointerIsValid(entry->qtext_dp)); + dsa_free(qtext_dsa, entry->qtext_dp); + if (hash_search(qtexts_htab, &entry->queryid, HASH_REMOVE, NULL) == NULL) + elog(ERROR, "[AQO] hash table corrupted"); + num_remove++; + } + aqo_state->qtexts_changed = true; + LWLockRelease(&aqo_state->qtexts_lock); + Assert(num_remove == num_entries - 1); /* Is it really impossible? */ + + /* TODO: clean disk storage */ + + return num_remove; +} + +Datum +aqo_reset(PG_FUNCTION_ARGS) +{ + long counter = 0; + + counter += aqo_stat_reset(); + counter += aqo_qtexts_reset(); + PG_RETURN_INT64(counter); +} diff --git a/storage.h b/storage.h index fe117859..34014e70 100644 --- a/storage.h +++ b/storage.h @@ -2,6 +2,7 @@ #define STORAGE_H #include "utils/array.h" +#include "utils/dsa.h" /* Public structs have links to DSA memory blocks */ #define STAT_SAMPLE_SIZE (20) @@ -31,9 +32,23 @@ typedef struct StatEntry double est_error_aqo[STAT_SAMPLE_SIZE]; } StatEntry; +/* + * Storage entry for query texts. + * Query strings may have very different sizes. So, in hash table we store only + * link to DSA-allocated memory. + */ +typedef struct QueryTextEntry +{ + uint64 queryid; + + /* Link to DSA-allocated momory block. Can be shared across backends */ + dsa_pointer qtext_dp; +} QueryTextEntry; + extern bool aqo_use_file_storage; extern HTAB *stat_htab; +extern HTAB *qtexts_htab; extern HTAB *queries_htab; /* TODO */ extern HTAB *data_htab; /* TODO */ @@ -42,6 +57,9 @@ extern StatEntry *aqo_stat_store(uint64 queryid, bool use_aqo, double plan_time, extern void aqo_stat_flush(void); extern void aqo_stat_load(void); +extern bool aqo_qtext_store(uint64 queryid, const char *query_string); +extern void aqo_qtexts_flush(void); +extern void aqo_qtexts_load(void); /* Utility routines */ extern ArrayType *form_vector(double *vector, int nrows); diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index 3d574351..afe654e9 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -4,7 +4,7 @@ use Config; use PostgresNode; use TestLib; -use Test::More tests => 21; +use Test::More tests => 22; my $node = get_new_node('aqotest'); $node->init; @@ -78,7 +78,7 @@ $node->safe_psql('postgres', " ALTER SYSTEM SET aqo.mode = 'disabled'; SELECT pg_reload_conf(); - SELECT * FROM aqo_stat_reset(); -- Remove old data + SELECT * FROM aqo_reset(); -- Remove old data "); $node->command_ok([ 'pgbench', '-t', "$TRANSACTIONS", '-c', "$CLIENTS", '-j', "$THREADS" ], @@ -129,7 +129,7 @@ }); # Avoid problems with an error fluctuations during the test above. -$node->safe_psql('postgres', "SELECT aqo_stat_reset()"); +$node->safe_psql('postgres', "SELECT aqo_reset()"); # Look for top of problematic queries. $node->command_ok([ 'pgbench', '-t', "10", '-c', "$CLIENTS", '-j', "$THREADS", @@ -138,17 +138,17 @@ $res = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_cardinality_error(false) v - JOIN aqo_query_texts t ON (t.query_hash = v.id) + JOIN aqo_query_texts t ON (t.queryid = v.id) WHERE v.error > 0. AND t.query_text LIKE '%pgbench_accounts%'"); is($res, 3); $res = $node->safe_psql('postgres', "SELECT * FROM aqo_cardinality_error(false) v - JOIN aqo_query_texts t ON (t.query_hash = v.id) + JOIN aqo_query_texts t ON (t.queryid = v.id) WHERE v.error > 0. AND t.query_text LIKE '%pgbench_accounts%'"); note("\n TopN: \n $res \n"); $res = $node->safe_psql('postgres', "SELECT v.error, t.query_text FROM aqo_cardinality_error(false) v - JOIN aqo_query_texts t ON (t.query_hash = v.id) + JOIN aqo_query_texts t ON (t.queryid = v.id) WHERE v.error > 0."); note("\n Queries: \n $res \n"); $res = $node->safe_psql('postgres', "SELECT * FROM public.aqo_execution_time(false) v"); @@ -242,7 +242,7 @@ # Number of rows in aqo_query_texts: related to pgbench test and total value. my $pgb_fs_samples_count = $node->safe_psql('postgres', " SELECT count(*) FROM aqo_query_texts - WHERE query_hash IN ( + WHERE queryid IN ( SELECT fspace_hash FROM aqo_data WHERE $aoid = ANY(oids) OR @@ -252,11 +252,12 @@ ) "); $fs_samples_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_texts;"); +is($pgb_fs_samples_count > 0, 1, "AQO query texts exists"); # Number of rows in aqo_query_stat: related to pgbench test and total value. my $pgb_stat_count = $node->safe_psql('postgres', " - SELECT count(*) FROM aqo_query_texts - WHERE query_hash IN ( + SELECT count(*) FROM aqo_query_stat + WHERE queryid IN ( SELECT fspace_hash FROM aqo_data WHERE $aoid = ANY(oids) OR @@ -267,10 +268,6 @@ "); $stat_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_stat;"); -note("pgbench-related rows: aqo_data - $pgb_fss_count/$fss_count, - aqo_queries: $pgb_fs_count/$fs_count, aqo_query_texts: $pgb_fs_samples_count/$fs_samples_count, - aqo_query_stat: $pgb_stat_count/$stat_count"); - $node->safe_psql('postgres', " DROP TABLE pgbench_accounts, pgbench_branches, pgbench_tellers, pgbench_history CASCADE;"); @@ -283,16 +280,22 @@ my $new_fss_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_data;"); my $new_fs_samples_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_texts;"); my $new_stat_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_stat;"); -note("Total AQO rows after dropping pgbench-related tables: - aqo_queries: $new_fs_count, aqo_data: $new_fss_count, - aqo_query_texts: $new_fs_samples_count, aqo_query_stat: $new_stat_count"); +note("Total AQO rows after dropping pgbench-related tables: + aqo_queries: ($new_fs_count, $fs_count, $pgb_fs_count), + aqo_data: ($new_fss_count, $fss_count, $pgb_fss_count), + aqo_query_texts: ($new_fs_samples_count, $fs_samples_count, $pgb_fs_samples_count), + aqo_query_stat: ($new_stat_count, $stat_count, $pgb_stat_count)"); # Check total number of rows in AQO knowledge base after removing of # pgbench-related data. -is($new_fs_count == $fs_count - $pgb_fs_count, 1, 'Total number of feature spaces'); -is($new_fss_count == $fss_count - $pgb_fss_count, 1, 'Total number of feature subspaces'); -is($new_fs_samples_count == $fs_samples_count - $pgb_fs_samples_count, 1, 'Total number of samples in aqo_query_texts'); -is($new_stat_count == $stat_count - $pgb_stat_count, 1, 'Total number of samples in aqo_query_texts'); +is($new_fs_count == $fs_count - $pgb_fs_count, 1, + 'Total number of feature spaces'); +is($new_fss_count == $fss_count - $pgb_fss_count, 1, + 'Total number of feature subspaces'); +is($new_fs_samples_count == $fs_samples_count - $pgb_fs_samples_count, 1, + 'Total number of samples in aqo_query_texts'); +is($new_stat_count == $stat_count - $pgb_stat_count, 1, + 'Total number of samples in aqo_query_stat'); $node->safe_psql('postgres', "DROP EXTENSION aqo"); diff --git a/t/002_pg_stat_statements_aqo.pl b/t/002_pg_stat_statements_aqo.pl index 69c020c9..d60dca10 100644 --- a/t/002_pg_stat_statements_aqo.pl +++ b/t/002_pg_stat_statements_aqo.pl @@ -3,7 +3,7 @@ use PostgresNode; use TestLib; -use Test::More tests => 3; +use Test::More tests => 2; my $node = get_new_node('profiling'); $node->init; @@ -57,11 +57,5 @@ $res = $node->safe_psql('postgres', "SELECT count(*) FROM pg_stat_statements where query = 'SELECT * FROM aqo_test0'"); is($res, 1); # The same query add in pg_stat_statements $res = $node->safe_psql('postgres', "SELECT count(*) from aqo_query_texts where query_text = 'SELECT * FROM aqo_test0'"); -is($res, 0); # The same query isn't add in aqo_query_texts -$query_id = $node->safe_psql('postgres', "SELECT queryid FROM pg_stat_statements where query = 'SELECT * FROM aqo_test0'"); -$res = $node->safe_psql('postgres', "insert into aqo_queries values ($query_id,'f','f',$query_id,'f')"); -# Add query in aqo_query_texts -$res = $node->safe_psql('postgres', "insert into aqo_query_texts values ($query_id,'SELECT * FROM aqo_test0')"); -$res = $node->safe_psql('postgres', "SELECT count(*) from aqo_query_texts where query_text = 'SELECT * FROM aqo_test0'"); # The same query is in aqo_query_texts -is($res, 1); +is($res, 0); # The same query isn't added into aqo_query_texts $node->stop(); \ No newline at end of file From f0127961f6b12d4d88240f6b3c0b1f930076839b Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Tue, 28 Jun 2022 08:52:14 +0500 Subject: [PATCH 096/203] Replace aqo_data table with shmem hash table + DSA + file storage. --- aqo--1.4--1.5.sql | 72 +- aqo.h | 3 - aqo_shared.c | 22 +- aqo_shared.h | 5 + expected/aqo_controlled.out | 2 +- expected/aqo_disabled.out | 2 +- expected/aqo_forced.out | 2 +- expected/aqo_intelligent.out | 2 +- expected/aqo_learn.out | 28 +- expected/clean_aqo_data.out | 42 +- expected/forced_stat_collection.out | 4 +- expected/gucs.out | 4 +- expected/relocatable.out | 10 +- expected/statement_timeout.out | 13 +- expected/temp_tables.out | 4 +- expected/unsupported.out | 6 + learn_cache.c | 2 +- machine_learning.c | 2 + sql/aqo_learn.sql | 28 +- sql/clean_aqo_data.sql | 42 +- sql/relocatable.sql | 6 +- sql/statement_timeout.sql | 3 +- sql/unsupported.sql | 1 + storage.c | 988 ++++++++++++++++------------ storage.h | 33 +- t/001_pgbench.pl | 6 +- 26 files changed, 779 insertions(+), 553 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index 5c73597e..2e8f2391 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -60,35 +60,31 @@ RETURNS SETOF record AS 'MODULE_PATHNAME', 'aqo_query_stat' LANGUAGE C STRICT VOLATILE PARALLEL SAFE; +CREATE FUNCTION aqo_data( + OUT fs bigint, + OUT fss integer, + OUT nfeatures integer, + OUT features double precision[][], + OUT targets double precision[], + OUT reliability double precision[], + OUT oids integer[] +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_data' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; +CREATE FUNCTION aqo_data_remove(fs bigint, fss int) RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE C PARALLEL SAFE; + CREATE VIEW aqo_query_stat AS SELECT * FROM aqo_query_stat(); CREATE VIEW aqo_query_texts AS SELECT * FROM aqo_query_texts(); +CREATE VIEW aqo_data AS SELECT * FROM aqo_data(); CREATE FUNCTION aqo_stat_remove(fs bigint) RETURNS bool AS 'MODULE_PATHNAME' LANGUAGE C STRICT PARALLEL SAFE; --- --- Re-create the aqo_data table. --- The oids array contains oids of permanent tables only. It is used for cleanup --- ML knowledge base from queries that refer to removed tables. --- -CREATE TABLE aqo_data ( - fspace_hash bigint NOT NULL REFERENCES aqo_queries ON DELETE CASCADE, - fsspace_hash int NOT NULL, - nfeatures int NOT NULL, - features double precision[][], - targets double precision[], - - -- oids of permanent tables only. It is used for cleanup - -- ML knowledge base from queries that refer to removed tables. - oids oid [] DEFAULT NULL, - - reliability double precision [] -); -CREATE UNIQUE INDEX aqo_fss_access_idx ON aqo_data (fspace_hash, fsspace_hash); - INSERT INTO aqo_queries VALUES (0, false, false, 0, false); --- INSERT INTO aqo_query_texts VALUES (0, 'COMMON feature space (do not delete!)'); -- a virtual query for COMMON feature space CREATE TRIGGER aqo_queries_invalidate AFTER UPDATE OR DELETE OR TRUNCATE @@ -155,29 +151,30 @@ COMMENT ON FUNCTION aqo_execution_time(boolean) IS CREATE OR REPLACE FUNCTION aqo_drop_class(queryid bigint) RETURNS integer AS $$ DECLARE - fs bigint; + lfs bigint; num integer; BEGIN IF (queryid = 0) THEN raise EXCEPTION '[AQO] Cannot remove basic class %.', queryid; END IF; - SELECT fspace_hash FROM aqo_queries WHERE (query_hash = queryid) INTO fs; + SELECT fspace_hash FROM aqo_queries WHERE (query_hash = queryid) INTO lfs; - IF (fs IS NULL) THEN + IF (lfs IS NULL) THEN raise WARNING '[AQO] Nothing to remove for the class %.', queryid; RETURN 0; END IF; - IF (fs <> queryid) THEN + IF (lfs <> queryid) THEN raise WARNING '[AQO] Removing query class has non-generic feature space value: id = %, fs = %.', queryid, fs; END IF; - SELECT count(*) FROM aqo_data WHERE fspace_hash = fs INTO num; + SELECT count(*) FROM aqo_data WHERE fs = lfs INTO num; DELETE FROM aqo_queries WHERE query_hash = queryid; PERFORM aqo_stat_remove(queryid); PERFORM aqo_qtexts_remove(queryid); + PERFORM aqo_data_remove(lfs, NULL); RETURN num; END; $$ LANGUAGE plpgsql; @@ -195,28 +192,29 @@ COMMENT ON FUNCTION aqo_drop_class(bigint) IS CREATE OR REPLACE FUNCTION aqo_cleanup(OUT nfs integer, OUT nfss integer) AS $$ DECLARE - fs bigint; - fss integer; + lfs bigint; + lfss integer; BEGIN -- Save current number of rows SELECT count(*) FROM aqo_queries INTO nfs; SELECT count(*) FROM aqo_data INTO nfss; - FOR fs,fss IN SELECT q1.fs,q1.fss FROM ( - SELECT fspace_hash fs, fsspace_hash fss, unnest(oids) AS reloid + FOR lfs,lfss IN SELECT q1.fs,q1.fss FROM ( + SELECT fs, fss, unnest(oids) AS reloid FROM aqo_data) AS q1 WHERE q1.reloid NOT IN (SELECT oid FROM pg_class) GROUP BY (q1.fs,q1.fss) LOOP - IF (fs = 0) THEN - DELETE FROM aqo_data WHERE fsspace_hash = fss; - continue; - END IF; +-- IF (fs = 0) THEN +-- DELETE FROM aqo_data WHERE fsspace_hash = fss; +-- continue; +-- END IF; -- Remove ALL feature space if one of oids isn't exists - DELETE FROM aqo_queries WHERE fspace_hash = fs; - PERFORM * FROM aqo_stat_remove(fs); - PERFORM * FROM aqo_qtexts_remove(fs); + DELETE FROM aqo_queries WHERE fspace_hash = lfs; + PERFORM aqo_stat_remove(lfs); + PERFORM aqo_qtexts_remove(lfs); + PERFORM aqo_data_remove(lfs, NULL); END LOOP; -- Calculate difference with previous state of knowledge base diff --git a/aqo.h b/aqo.h index 7ff47a2c..de7fae64 100644 --- a/aqo.h +++ b/aqo.h @@ -257,11 +257,8 @@ extern bool update_query(uint64 qhash, uint64 fhash, bool learn_aqo, bool use_aqo, bool auto_tuning); extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool isSafe); -extern bool load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids, - bool wideSearch); extern bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids, bool isTimedOut); -extern bool update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids); extern bool my_index_insert(Relation indexRelation, Datum *values, bool *isnull, ItemPointer heap_t_ctid, Relation heapRelation, IndexUniqueCheck checkUnique); diff --git a/aqo_shared.c b/aqo_shared.c index 8cc7dc39..b9a802e1 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -27,6 +27,7 @@ AQOSharedState *aqo_state = NULL; HTAB *fss_htab = NULL; static int aqo_htab_max_items = 1000; static int fs_max_items = 1000; /* Max number of different feature spaces in ML model */ +static int fss_max_items = 10000; static uint32 temp_storage_size = 1024 * 1024 * 10; /* Storage size, in bytes */ static dsm_segment *seg = NULL; @@ -181,6 +182,7 @@ aqo_init_shmem(void) fss_htab = NULL; stat_htab = NULL; qtexts_htab = NULL; + data_htab = NULL; LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); aqo_state = ShmemInitStruct("AQO", sizeof(AQOSharedState), &found); @@ -190,12 +192,17 @@ aqo_init_shmem(void) LWLockInitialize(&aqo_state->lock, LWLockNewTrancheId()); aqo_state->dsm_handler = DSM_HANDLE_INVALID; + aqo_state->qtexts_dsa_handler = DSM_HANDLE_INVALID; aqo_state->qtext_trancheid = LWLockNewTrancheId(); aqo_state->qtexts_changed = false; + aqo_state->data_dsa_handler = DSM_HANDLE_INVALID; + aqo_state->data_trancheid = LWLockNewTrancheId(); + aqo_state->data_changed = false; LWLockInitialize(&aqo_state->stat_lock, LWLockNewTrancheId()); LWLockInitialize(&aqo_state->qtexts_lock, LWLockNewTrancheId()); + LWLockInitialize(&aqo_state->data_lock, LWLockNewTrancheId()); } info.keysize = sizeof(htab_key); @@ -218,17 +225,25 @@ aqo_init_shmem(void) fs_max_items, fs_max_items, &info, HASH_ELEM | HASH_BLOBS); + /* Shared memory hash table for the data */ + info.keysize = sizeof(data_key); + info.entrysize = sizeof(DataEntry); + data_htab = ShmemInitHash("AQO Data HTAB", + fss_max_items, fss_max_items, + &info, HASH_ELEM | HASH_BLOBS); + LWLockRelease(AddinShmemInitLock); LWLockRegisterTranche(aqo_state->lock.tranche, "AQO"); LWLockRegisterTranche(aqo_state->stat_lock.tranche, "AQO Stat Lock Tranche"); LWLockRegisterTranche(aqo_state->qtexts_lock.tranche, "AQO QTexts Lock Tranche"); LWLockRegisterTranche(aqo_state->qtext_trancheid, "AQO Query Texts Tranche"); - + LWLockRegisterTranche(aqo_state->data_lock.tranche, "AQO Data Lock Tranche"); + LWLockRegisterTranche(aqo_state->data_trancheid, "AQO Data Tranche"); if (!IsUnderPostmaster) { before_shmem_exit(on_shmem_shutdown, (Datum) 0); - aqo_stat_load(); + aqo_stat_load(); /* Doesn't use DSA, so can be loaded in postmaster */ } } @@ -249,6 +264,9 @@ aqo_memsize(void) size = MAXALIGN(sizeof(AQOSharedState)); size = add_size(size, hash_estimate_size(aqo_htab_max_items, sizeof(htab_entry))); size = add_size(size, hash_estimate_size(fs_max_items, sizeof(AQOSharedState))); + size = add_size(size, hash_estimate_size(fs_max_items, sizeof(StatEntry))); + size = add_size(size, hash_estimate_size(fs_max_items, sizeof(QueryTextEntry))); + size = add_size(size, hash_estimate_size(fss_max_items, sizeof(DataEntry))); return size; } diff --git a/aqo_shared.h b/aqo_shared.h index b2daf082..bf03648b 100644 --- a/aqo_shared.h +++ b/aqo_shared.h @@ -35,6 +35,11 @@ typedef struct AQOSharedState dsa_handle qtexts_dsa_handler; /* DSA area for storing of query texts */ int qtext_trancheid; bool qtexts_changed; + + LWLock data_lock; /* Lock for shared fields below */ + dsa_handle data_dsa_handler; + int data_trancheid; + bool data_changed; } AQOSharedState; diff --git a/expected/aqo_controlled.out b/expected/aqo_controlled.out index 5f019e83..b7b33aa9 100644 --- a/expected/aqo_controlled.out +++ b/expected/aqo_controlled.out @@ -301,7 +301,7 @@ DROP TABLE aqo_test2; SELECT aqo_reset(); aqo_reset ----------- - 22 + 50 (1 row) DROP EXTENSION aqo; diff --git a/expected/aqo_disabled.out b/expected/aqo_disabled.out index 3438d5b8..6fa67fc0 100644 --- a/expected/aqo_disabled.out +++ b/expected/aqo_disabled.out @@ -219,7 +219,7 @@ SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be z SELECT aqo_reset(); aqo_reset ----------- - 8 + 18 (1 row) DROP EXTENSION aqo; diff --git a/expected/aqo_forced.out b/expected/aqo_forced.out index e3d40bfc..5e4d53e8 100644 --- a/expected/aqo_forced.out +++ b/expected/aqo_forced.out @@ -86,7 +86,7 @@ DROP TABLE aqo_test1; SELECT aqo_reset(); aqo_reset ----------- - 0 + 3 (1 row) DROP EXTENSION aqo; diff --git a/expected/aqo_intelligent.out b/expected/aqo_intelligent.out index 739f1ec5..aff0d16e 100644 --- a/expected/aqo_intelligent.out +++ b/expected/aqo_intelligent.out @@ -507,7 +507,7 @@ DROP TABLE aqo_test1; SELECT aqo_reset(); aqo_reset ----------- - 48 + 103 (1 row) DROP EXTENSION aqo; diff --git a/expected/aqo_learn.out b/expected/aqo_learn.out index 7aeecb22..718fbe0a 100644 --- a/expected/aqo_learn.out +++ b/expected/aqo_learn.out @@ -252,7 +252,7 @@ WHERE aqt1.query_text = aqt2.query_text AND aqt1.queryid <> aqt2.queryid; -- Fix the state of the AQO data SELECT min(reliability),sum(nfeatures),query_text FROM aqo_data ad, aqo_query_texts aqt -WHERE aqt.queryid = ad.fspace_hash +WHERE aqt.queryid = ad.fs GROUP BY (query_text) ORDER BY (md5(query_text)) ; min | sum | query_text @@ -524,8 +524,8 @@ SELECT * FROM check_estimated_rows(' 20 | 17 (1 row) -SELECT count(*) FROM - (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 +SELECT count(*) FROM -- Learn on the query + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 ; count ------- @@ -557,8 +557,8 @@ SELECT * FROM check_estimated_rows( (1 row) SELECT count(*) FROM - (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 -; -- Learn on a new query with one join (cardinality of this join AQO extracted from previous 3-join query) + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 +; -- Learn on a query with one join count ------- 2 @@ -572,7 +572,7 @@ SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); (1 row) SELECT count(*) FROM - (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 ; -- Learn on the query without any joins now count ------- @@ -587,7 +587,7 @@ SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1 t1 JOIN aqo_test1 AS (1 row) SELECT count(*) FROM - (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 ; -- See one more query in the AQO knowledge base count ------- @@ -610,7 +610,7 @@ SELECT * FROM check_estimated_rows(' 1 | 1 (1 row) -SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 count ------- 5 @@ -626,7 +626,7 @@ SELECT * FROM check_estimated_rows(' 20 | 19 (1 row) -SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 count ------- 6 @@ -643,7 +643,7 @@ SELECT * FROM check_estimated_rows(' 20 | 20 (1 row) -SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 count ------- 7 @@ -661,7 +661,7 @@ SELECT * FROM check_estimated_rows(' 2 | 4 (1 row) -SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 count ------- 7 @@ -678,7 +678,7 @@ SELECT * FROM check_estimated_rows(' 2 | 4 (1 row) -SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 count ------- 8 @@ -694,7 +694,7 @@ SELECT * FROM check_estimated_rows(' 1 | 1 (1 row) -SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 count ------- 9 @@ -710,7 +710,7 @@ DROP TABLE aqo_test1; SELECT aqo_reset(); aqo_reset ----------- - 18 + 44 (1 row) DROP EXTENSION aqo; diff --git a/expected/clean_aqo_data.out b/expected/clean_aqo_data.out index 43279254..07ae3854 100644 --- a/expected/clean_aqo_data.out +++ b/expected/clean_aqo_data.out @@ -31,7 +31,7 @@ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); count ------- 1 @@ -39,7 +39,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- 1 @@ -47,7 +47,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- 1 @@ -74,7 +74,7 @@ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; count ------- @@ -83,7 +83,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -92,7 +92,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -145,7 +145,7 @@ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); count ------- 2 @@ -153,7 +153,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- 2 @@ -161,7 +161,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- 2 @@ -174,7 +174,7 @@ SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)); count ------- 2 @@ -182,7 +182,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); count ------- 2 @@ -190,7 +190,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); count ------- 2 @@ -216,7 +216,7 @@ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; count ------- @@ -225,7 +225,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -234,7 +234,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -249,7 +249,7 @@ SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; count ------- @@ -258,7 +258,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -267,7 +267,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -289,7 +289,7 @@ SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; count ------- @@ -298,7 +298,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -307,7 +307,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- diff --git a/expected/forced_stat_collection.out b/expected/forced_stat_collection.out index 6abf9a5b..43030489 100644 --- a/expected/forced_stat_collection.out +++ b/expected/forced_stat_collection.out @@ -33,8 +33,8 @@ SELECT count(*) FROM person WHERE age<18 AND passport IS NOT NULL; (1 row) SELECT * FROM aqo_data; - fspace_hash | fsspace_hash | nfeatures | features | targets | oids | reliability --------------+--------------+-----------+----------+---------+------+------------- + fs | fss | nfeatures | features | targets | reliability | oids +----+-----+-----------+----------+---------+-------------+------ (0 rows) SELECT learn_aqo,use_aqo,auto_tuning,cardinality_error_without_aqo ce,executions_without_aqo nex diff --git a/expected/gucs.out b/expected/gucs.out index b594cbea..40c177c9 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -8,7 +8,7 @@ ANALYZE t; SELECT * FROM aqo_reset(); -- Remember! DROP EXTENSION doesn't remove any AQO data gathered. aqo_reset ----------- - 12 + 19 (1 row) -- Check AQO addons to explain (the only stable data) @@ -126,7 +126,7 @@ SELECT count(*) FROM aqo_query_stat; SELECT * FROM aqo_reset(); -- Remove one record from texts and one from stat aqo_reset ----------- - 2 + 3 (1 row) SELECT count(*) FROM aqo_query_stat; diff --git a/expected/relocatable.out b/expected/relocatable.out index d869ca3b..4658e75d 100644 --- a/expected/relocatable.out +++ b/expected/relocatable.out @@ -35,13 +35,14 @@ SELECT count(*) FROM test WHERE id < 10; 9 (1 row) -SELECT query_text,learn_aqo, use_aqo, auto_tuning +SELECT query_text, learn_aqo, use_aqo, auto_tuning FROM test.aqo_query_texts JOIN test.aqo_queries ON (queryid = query_hash) +ORDER BY (md5(query_text)) ; -- Check result. TODO: We want to find here both queries executed above query_text | learn_aqo | use_aqo | auto_tuning ---------------------------------------+-----------+---------+------------- - COMMON feature space (do not delete!) | f | f | f SELECT count(*) FROM test; | t | t | f + COMMON feature space (do not delete!) | f | f | f (2 rows) -- Add schema which contains AQO to the end of search_path @@ -63,13 +64,14 @@ SELECT count(*) FROM test WHERE id < 10; 9 (1 row) -SELECT query_text,learn_aqo, use_aqo, auto_tuning +SELECT query_text, learn_aqo, use_aqo, auto_tuning FROM test.aqo_query_texts JOIN test.aqo_queries ON (queryid = query_hash) +ORDER BY (md5(query_text)) ; -- Check result. query_text | learn_aqo | use_aqo | auto_tuning ------------------------------------------+-----------+---------+------------- - COMMON feature space (do not delete!) | f | f | f SELECT count(*) FROM test; | t | t | f + COMMON feature space (do not delete!) | f | f | f SELECT count(*) FROM test WHERE id < 10; | t | t | f (3 rows) diff --git a/expected/statement_timeout.out b/expected/statement_timeout.out index 6d1af3a7..302b9b43 100644 --- a/expected/statement_timeout.out +++ b/expected/statement_timeout.out @@ -68,7 +68,12 @@ SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); DELETE FROM t WHERE x > 2; ANALYZE t; INSERT INTO t (x) (SELECT * FROM generate_series(3,5) AS x); -TRUNCATE aqo_data; +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 +(1 row) + SET statement_timeout = 800; SELECT *, pg_sleep(1) FROM t; -- Not learned NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. @@ -107,5 +112,11 @@ SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); (1 row) DROP TABLE t; +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 +(1 row) + DROP EXTENSION aqo; DROP FUNCTION check_estimated_rows; diff --git a/expected/temp_tables.out b/expected/temp_tables.out index bd214fd2..6d9d1b73 100644 --- a/expected/temp_tables.out +++ b/expected/temp_tables.out @@ -17,8 +17,8 @@ SELECT count(*) FROM tt AS t1, tt AS t2; (1 row) SELECT * FROM aqo_data; - fspace_hash | fsspace_hash | nfeatures | features | targets | oids | reliability --------------+--------------+-----------+----------+---------+------+------------- + fs | fss | nfeatures | features | targets | reliability | oids +----+-----+-----------+----------+---------+-------------+------ (0 rows) -- Should be stored in the ML base diff --git a/expected/unsupported.out b/expected/unsupported.out index 243de4dd..da9e7d89 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -595,4 +595,10 @@ ORDER BY (md5(query_text),error) DESC; -------+------------ (0 rows) +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 +(1 row) + DROP EXTENSION aqo; diff --git a/learn_cache.c b/learn_cache.c index 3f75a4a9..306592eb 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -274,7 +274,7 @@ lc_flush_data(void) Assert(delta > 0); ptr += delta; size -= delta; - update_fss(hdr->key.fs, hdr->key.fss, &data, reloids); + aqo_data_store(hdr->key.fs, hdr->key.fss, &data, reloids); if (!hash_search(fss_htab, (void *) &hdr->key, HASH_REMOVE, NULL)) elog(ERROR, "[AQO] Flush: local ML cache is corrupted."); diff --git a/machine_learning.c b/machine_learning.c index 52c1ab40..42dfb6f5 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -125,6 +125,8 @@ OkNNr_predict(OkNNrdata *data, double *features) double w_sum; double result = 0.; + Assert(data != NULL); + for (i = 0; i < data->rows; ++i) distances[i] = fs_distance(data->matrix[i], features, data->cols); diff --git a/sql/aqo_learn.sql b/sql/aqo_learn.sql index b7dcfea5..db461f50 100644 --- a/sql/aqo_learn.sql +++ b/sql/aqo_learn.sql @@ -133,7 +133,7 @@ WHERE aqt1.query_text = aqt2.query_text AND aqt1.queryid <> aqt2.queryid; -- Fix the state of the AQO data SELECT min(reliability),sum(nfeatures),query_text FROM aqo_data ad, aqo_query_texts aqt -WHERE aqt.queryid = ad.fspace_hash +WHERE aqt.queryid = ad.fs GROUP BY (query_text) ORDER BY (md5(query_text)) ; @@ -229,9 +229,9 @@ SELECT * FROM check_estimated_rows(' SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; -'); -- Learn on the query -SELECT count(*) FROM - (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 +'); +SELECT count(*) FROM -- Learn on the query + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 ; SELECT query_text FROM aqo_query_texts WHERE queryid <> 0; -- Check query @@ -240,19 +240,19 @@ SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); SELECT * FROM check_estimated_rows( 'SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); SELECT count(*) FROM - (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 -; -- Learn on a new query with one join (cardinality of this join AQO extracted from previous 3-join query) + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 +; -- Learn on a query with one join SET aqo.join_threshold = 0; SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); SELECT count(*) FROM - (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 ; -- Learn on the query without any joins now SET aqo.join_threshold = 1; SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1 t1 JOIN aqo_test1 AS t2 USING (a)'); SELECT count(*) FROM - (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 ; -- See one more query in the AQO knowledge base SELECT * FROM check_estimated_rows('WITH selected AS (SELECT * FROM aqo_test1 t1) SELECT count(*) FROM selected'); @@ -261,14 +261,14 @@ SELECT * FROM check_estimated_rows(' SELECT * FROM aqo_test1 t1 JOIN aqo_test1 AS t2 USING (a) ) SELECT count(*) FROM selected') ; -SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 -- InitPlan SELECT * FROM check_estimated_rows(' SELECT * FROM aqo_test1 AS t1 WHERE t1.a IN ( SELECT t2.a FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a) )'); -SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 -- SubPlan SELECT * FROM check_estimated_rows(' @@ -276,7 +276,7 @@ SELECT * FROM check_estimated_rows(' SELECT avg(t2.a) FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a) AND (t2.a = t1.a) ) FROM aqo_test1 AS t1; '); -SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 -- Subquery SET aqo.join_threshold = 3; @@ -285,21 +285,21 @@ SELECT * FROM check_estimated_rows(' (SELECT t2.a FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a)) q1 WHERE q1.a*t1.a = t1.a + 15; '); -- Two JOINs, ignore it -SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 SET aqo.join_threshold = 2; SELECT * FROM check_estimated_rows(' SELECT * FROM aqo_test1 AS t1, (SELECT t2.a FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a)) q1 WHERE q1.a*t1.a = t1.a + 15; '); -- One JOIN from subquery, another one from the query -SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 SELECT * FROM check_estimated_rows(' WITH selected AS ( SELECT t2.a FROM aqo_test1 t1 JOIN aqo_test1 AS t2 USING (a) ) SELECT count(*) FROM aqo_test1 t3, selected WHERE selected.a = t3.a') ; -- One JOIN extracted from CTE, another - from a FROM part of the query -SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 DROP FUNCTION check_estimated_rows; RESET aqo.join_threshold; diff --git a/sql/clean_aqo_data.sql b/sql/clean_aqo_data.sql index 6ecf92ea..e02bf806 100644 --- a/sql/clean_aqo_data.sql +++ b/sql/clean_aqo_data.sql @@ -18,13 +18,13 @@ SELECT aqo_cleanup(); */ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); DROP TABLE a; SELECT aqo_cleanup(); @@ -38,15 +38,15 @@ SELECT aqo_cleanup(); */ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); CREATE TABLE a(); @@ -70,23 +70,23 @@ SELECT 'b'::regclass::oid AS b_oid \gset -- new lines added to aqo_data SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); DROP TABLE a; SELECT aqo_cleanup(); @@ -99,29 +99,29 @@ SELECT aqo_cleanup(); */ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); -- lines corresponding to b_oid in all theese tables should remain SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); DROP TABLE b; @@ -130,15 +130,15 @@ SELECT aqo_cleanup(); -- lines corresponding to b_oid in theese tables deleted SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); DROP EXTENSION aqo; \ No newline at end of file diff --git a/sql/relocatable.sql b/sql/relocatable.sql index cfc76333..51facc66 100644 --- a/sql/relocatable.sql +++ b/sql/relocatable.sql @@ -19,8 +19,9 @@ ALTER EXTENSION aqo SET SCHEMA test; SELECT count(*) FROM test; SELECT count(*) FROM test WHERE id < 10; -SELECT query_text,learn_aqo, use_aqo, auto_tuning +SELECT query_text, learn_aqo, use_aqo, auto_tuning FROM test.aqo_query_texts JOIN test.aqo_queries ON (queryid = query_hash) +ORDER BY (md5(query_text)) ; -- Check result. TODO: We want to find here both queries executed above -- Add schema which contains AQO to the end of search_path @@ -29,8 +30,9 @@ SELECT set_config('search_path', current_setting('search_path') || ', test', fal SELECT count(*) FROM test; SELECT count(*) FROM test WHERE id < 10; -SELECT query_text,learn_aqo, use_aqo, auto_tuning +SELECT query_text, learn_aqo, use_aqo, auto_tuning FROM test.aqo_query_texts JOIN test.aqo_queries ON (queryid = query_hash) +ORDER BY (md5(query_text)) ; -- Check result. /* diff --git a/sql/statement_timeout.sql b/sql/statement_timeout.sql index 84cdd5d8..9666c1de 100644 --- a/sql/statement_timeout.sql +++ b/sql/statement_timeout.sql @@ -46,7 +46,7 @@ SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); DELETE FROM t WHERE x > 2; ANALYZE t; INSERT INTO t (x) (SELECT * FROM generate_series(3,5) AS x); -TRUNCATE aqo_data; +SELECT 1 FROM aqo_reset(); SET statement_timeout = 800; SELECT *, pg_sleep(1) FROM t; -- Not learned @@ -61,5 +61,6 @@ SELECT *, pg_sleep(1) FROM t; -- Get reliable data SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); DROP TABLE t; +SELECT 1 FROM aqo_reset(); DROP EXTENSION aqo; DROP FUNCTION check_estimated_rows; diff --git a/sql/unsupported.sql b/sql/unsupported.sql index 04970481..c0b6102b 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -172,4 +172,5 @@ FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt WHERE aqt.queryid = cef.id ORDER BY (md5(query_text),error) DESC; +SELECT 1 FROM aqo_reset(); DROP EXTENSION aqo; diff --git a/storage.c b/storage.c index 5c0ad760..f436348f 100644 --- a/storage.c +++ b/storage.c @@ -34,14 +34,9 @@ #define AQO_DATA_COLUMNS (7) HTAB *deactivated_queries = NULL; -static ArrayType *form_matrix(double **matrix, int nrows, int ncols); -static int deform_matrix(Datum datum, double **matrix); - -static void deform_vector(Datum datum, double *vector, int *nelems); +static ArrayType *form_matrix(double *matrix, int nrows, int ncols); #define FormVectorSz(v_name) (form_vector((v_name), (v_name ## _size))) -#define DeformVectorSz(datum, v_name) (deform_vector((datum), (v_name), &(v_name ## _size))) - static bool my_simple_heap_update(Relation relation, ItemPointer otid, @@ -318,7 +313,7 @@ bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool isSafe) { if (isSafe && (!aqo_learn_statement_timeout || !lc_has_fss(fs, fss))) - return load_fss(fs, fss, data, reloids, true); + return load_aqo_data(fs, fss, data, reloids, false); else { Assert(aqo_learn_statement_timeout); @@ -326,418 +321,34 @@ load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool isSafe) } } -/* - * Return list of reloids on which - */ -static void -build_knn_matrix(Datum *values, bool *nulls, OkNNrdata *data) -{ - int nrows; - - Assert(DatumGetInt32(values[2]) == data->cols); - - if (data->rows >= 0) - /* trivial strategy - use first suitable record and ignore others */ - return; - - if (data->cols > 0) - /* - * The case than an object hasn't any filters and selectivities - */ - data->rows = deform_matrix(values[3], data->matrix); - - deform_vector(values[4], data->targets, &nrows); - Assert(data->rows < 0 || data->rows == nrows); - data->rows = nrows; - - deform_vector(values[6], data->rfactors, &nrows); - Assert(data->rows == nrows); -} - -/* - * Loads KNN matrix for the feature subspace (fss) from table aqo_data. - * If wideSearch is true, search row by an unique value of (fs, fss) - * If wideSearch is false - search rows across all fs values and try to build a - * KNN matrix by merging of existed matrixes with some algorithm. - * In the case of successful search, initializes the data variable and list of - * reloids. - * - * Returns false if any data not found, true otherwise. - */ -bool -load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool wideSearch) -{ - Relation hrel; - Relation irel; - HeapTuple tuple; - TupleTableSlot *slot; - bool shouldFree; - IndexScanDesc scan; - ScanKeyData key[2]; - Datum values[AQO_DATA_COLUMNS]; - bool isnull[AQO_DATA_COLUMNS]; - bool success = false; - int keycount = 0; - List *oids = NIL; - - if (!open_aqo_relation(NULL, "aqo_data", "aqo_fss_access_idx", - AccessShareLock, &hrel, &irel)) - return false; - - if (wideSearch) - { - /* Full scan key. Only one row wanted */ - ScanKeyInit(&key[keycount++], 1, BTEqualStrategyNumber, F_INT8EQ, - Int64GetDatum(fs)); - ScanKeyInit(&key[keycount++], 2, BTEqualStrategyNumber, F_INT4EQ, - Int32GetDatum(fss)); - } - else - /* Pass along the index and get all tuples with the same fss */ - ScanKeyInit(&key[keycount++], 2, BTEqualStrategyNumber, F_INT4EQ, - Int32GetDatum(fss)); - - scan = index_beginscan(hrel, irel, SnapshotSelf, keycount, 0); - index_rescan(scan, key, keycount, NULL, 0); - slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); - data->rows = -1; /* Attention! Use as a sign of nonentity */ - - /* - * Iterate along all tuples found and prepare knn model - */ - while (index_getnext_slot(scan, ForwardScanDirection, slot)) - { - ArrayType *array; - Datum *vals; - int nrows; - int i; - bool should_skip = false; - List *temp_oids = NIL; - - tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); - Assert(shouldFree != true); - heap_deform_tuple(tuple, hrel->rd_att, values, isnull); - - /* Filter obviously unfamiliar tuples */ - - if (DatumGetInt32(values[2]) != data->cols) - { - if (wideSearch) - { - /* - * Looks like a hash collision, but it is so unlikely in a single - * fs, that we will LOG this fact and return immediately. - */ - elog(LOG, "[AQO] Unexpected number of features for hash (" \ - UINT64_FORMAT", %d):\ - expected %d features, obtained %d", - fs, fss, data->cols, DatumGetInt32(values[2])); - Assert(success == false); - break; - } - else - /* Go to the next tuple */ - continue; - } - - /* Decompose list of oids which the data depend on */ - array = DatumGetArrayTypePCopy(PG_DETOAST_DATUM(values[5])); - deconstruct_array(array, OIDOID, sizeof(Oid), true, - TYPALIGN_INT, &vals, NULL, &nrows); - - if (data->rows >= 0 && list_length(oids) != nrows) - { - /* Dubious case. So log it and skip these data */ - elog(LOG, - "[AQO] different number depended oids for the same fss %d: " - "%d and %d correspondingly.", - fss, list_length(oids), nrows); - should_skip = true; - } - else - { - for (i = 0; i < nrows; i++) - { - Oid reloid = DatumGetObjectId(vals[i]); - - if (!OidIsValid(reloid)) - elog(ERROR, "[AQO] Impossible OID in the knowledge base."); - - if (data->rows >= 0 && !list_member_oid(oids, reloid)) - { - elog(LOG, - "[AQO] Oid set for two records with equal fss %d don't match.", - fss); - should_skip = true; - break; - } - temp_oids = lappend_oid(temp_oids, reloid); - } - } - pfree(vals); - pfree(array); - - if (!should_skip) - { - if (data->rows < 0) - oids = copyObject(temp_oids); - build_knn_matrix(values, isnull, data); - } - - if (temp_oids != NIL) - pfree(temp_oids); - - /* - * It's OK, guess, because if something happened during merge of - * matrixes an ERROR will be thrown. - */ - if (data->rows > 0) - success = true; - } - - if (success && reloids != NULL) - /* return list of reloids, if needed */ - *reloids = oids; - - ExecDropSingleTupleTableSlot(slot); - index_endscan(scan); - index_close(irel, AccessShareLock); - table_close(hrel, AccessShareLock); - - return success; -} - bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids, bool isTimedOut) { if (!isTimedOut) - return update_fss(fs, fss, data, reloids); + return aqo_data_store(fs, fss, data, reloids); else return lc_update_fss(fs, fss, data, reloids); } -/* - * Updates the specified line in the specified feature subspace. - * Returns false if the operation failed, true otherwise. - * - * 'fss_hash' specifies the feature subspace 'nrows' x 'ncols' is the shape - * of 'matrix' 'targets' is vector of size 'nrows' - * - * Necessary to prevent waiting for another transaction to commit in index - * insertion or heap update. - * - * Caller guaranteed that no one AQO process insert or update this data row. - */ -bool -update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids) -{ - Relation hrel; - Relation irel; - SnapshotData snap; - TupleTableSlot *slot; - TupleDesc tupDesc; - HeapTuple tuple, - nw_tuple; - Datum values[AQO_DATA_COLUMNS]; - bool isnull[AQO_DATA_COLUMNS]; - bool replace[AQO_DATA_COLUMNS] = { false, false, false, true, true, false, true }; - bool shouldFree; - bool find_ok = false; - bool update_indexes; - IndexScanDesc scan; - ScanKeyData key[2]; - bool result = true; - - /* Couldn't allow to write if xact must be read-only. */ - if (XactReadOnly) - return false; - - if (!open_aqo_relation(NULL, "aqo_data", - "aqo_fss_access_idx", - RowExclusiveLock, &hrel, &irel)) - return false; - - memset(isnull, 0, sizeof(bool) * AQO_DATA_COLUMNS); - tupDesc = RelationGetDescr(hrel); - InitDirtySnapshot(snap); - scan = index_beginscan(hrel, irel, &snap, 2, 0); - ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(fs)); - ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fss)); - - index_rescan(scan, key, 2, NULL, 0); - - slot = MakeSingleTupleTableSlot(tupDesc, &TTSOpsBufferHeapTuple); - find_ok = index_getnext_slot(scan, ForwardScanDirection, slot); - - if (!find_ok) - { - values[0] = Int64GetDatum(fs); - values[1] = Int32GetDatum(fss); - values[2] = Int32GetDatum(data->cols); - - if (data->cols > 0) - values[3] = PointerGetDatum(form_matrix(data->matrix, data->rows, data->cols)); - else - isnull[3] = true; - - values[4] = PointerGetDatum(form_vector(data->targets, data->rows)); - - /* Serialize list of reloids. Only once. */ - if (reloids != NIL) - { - int nrows = list_length(reloids); - ListCell *lc; - Datum *elems; - ArrayType *array; - int i = 0; - - elems = palloc(sizeof(*elems) * nrows); - foreach (lc, reloids) - elems[i++] = ObjectIdGetDatum(lfirst_oid(lc)); - - array = construct_array(elems, nrows, OIDOID, sizeof(Oid), true, - TYPALIGN_INT); - values[5] = PointerGetDatum(array); - pfree(elems); - } - else - /* XXX: Is it really possible? */ - isnull[5] = true; - - values[6] = PointerGetDatum(form_vector(data->rfactors, data->rows)); - tuple = heap_form_tuple(tupDesc, values, isnull); - - /* - * Don't use PG_TRY() section because of dirty snapshot and caller atomic - * prerequisities guarantees to us that no one concurrent insertion can - * exists. - */ - simple_heap_insert(hrel, tuple); - my_index_insert(irel, values, isnull, &(tuple->t_self), - hrel, UNIQUE_CHECK_YES); - } - else if (!TransactionIdIsValid(snap.xmin) && !TransactionIdIsValid(snap.xmax)) - { - tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); - Assert(shouldFree != true); - heap_deform_tuple(tuple, hrel->rd_att, values, isnull); - - if (data->cols > 0) - values[3] = PointerGetDatum(form_matrix(data->matrix, data->rows, data->cols)); - else - isnull[3] = true; - - values[4] = PointerGetDatum(form_vector(data->targets, data->rows)); - values[6] = PointerGetDatum(form_vector(data->rfactors, data->rows)); - nw_tuple = heap_modify_tuple(tuple, tupDesc, values, isnull, replace); - if (my_simple_heap_update(hrel, &(nw_tuple->t_self), nw_tuple, - &update_indexes)) - { - if (update_indexes) - my_index_insert(irel, values, isnull, &(nw_tuple->t_self), - hrel, UNIQUE_CHECK_YES); - result = true; - } - else - { - /* - * Ooops, somebody concurrently updated the tuple. It is possible - * only in the case of changes made by third-party code. - */ - elog(ERROR, "AQO data piece ("UINT64_FORMAT" %d) concurrently" - " updated by a stranger backend.", - fs, fss); - result = false; - } - } - else - { - /* - * Concurrent update was made. To prevent deadlocks refuse to update. - */ - result = false; - } - - ExecDropSingleTupleTableSlot(slot); - index_endscan(scan); - index_close(irel, RowExclusiveLock); - table_close(hrel, RowExclusiveLock); - - CommandCounterIncrement(); - return result; -} - -/* - * Expands matrix from storage into simple C-array. - */ -int -deform_matrix(Datum datum, double **matrix) -{ - ArrayType *array = DatumGetArrayTypePCopy(PG_DETOAST_DATUM(datum)); - int nelems; - Datum *values; - int rows = 0; - int cols; - int i, - j; - - deconstruct_array(array, - FLOAT8OID, 8, FLOAT8PASSBYVAL, 'd', - &values, NULL, &nelems); - if (nelems != 0) - { - rows = ARR_DIMS(array)[0]; - cols = ARR_DIMS(array)[1]; - for (i = 0; i < rows; ++i) - for (j = 0; j < cols; ++j) - matrix[i][j] = DatumGetFloat8(values[i * cols + j]); - } - pfree(values); - pfree(array); - return rows; -} - -/* - * Expands vector from storage into simple C-array. - * Also returns its number of elements. - */ -void -deform_vector(Datum datum, double *vector, int *nelems) -{ - ArrayType *array = DatumGetArrayTypePCopy(PG_DETOAST_DATUM(datum)); - Datum *values; - int i; - - deconstruct_array(array, - FLOAT8OID, 8, FLOAT8PASSBYVAL, 'd', - &values, NULL, nelems); - for (i = 0; i < *nelems; ++i) - vector[i] = DatumGetFloat8(values[i]); - pfree(values); - pfree(array); -} - /* * Forms ArrayType object for storage from simple C-array matrix. */ ArrayType * -form_matrix(double **matrix, int nrows, int ncols) +form_matrix(double *matrix, int nrows, int ncols) { Datum *elems; ArrayType *array; - int dims[2]; + int dims[2] = {nrows, ncols}; int lbs[2]; int i, j; - dims[0] = nrows; - dims[1] = ncols; lbs[0] = lbs[1] = 1; elems = palloc(sizeof(*elems) * nrows * ncols); for (i = 0; i < nrows; ++i) for (j = 0; j < ncols; ++j) - elems[i * ncols + j] = Float8GetDatum(matrix[i][j]); + elems[i * ncols + j] = Float8GetDatum(matrix[i * ncols + j]); array = construct_md_array(elems, NULL, 2, dims, lbs, FLOAT8OID, 8, FLOAT8PASSBYVAL, 'd'); @@ -894,34 +505,30 @@ add_deactivated_query(uint64 query_hash) #define PGAQO_STAT_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_statistics.stat" #define PGAQO_TEXT_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_query_texts.stat" +#define PGAQO_DATA_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_data.stat" PG_FUNCTION_INFO_V1(aqo_query_stat); -//PG_FUNCTION_INFO_V1(aqo_stat_reset); // ? PG_FUNCTION_INFO_V1(aqo_query_texts); +PG_FUNCTION_INFO_V1(aqo_data); PG_FUNCTION_INFO_V1(aqo_stat_remove); PG_FUNCTION_INFO_V1(aqo_qtexts_remove); -//PG_FUNCTION_INFO_V1(aqo_qtexts_reset); // ? +PG_FUNCTION_INFO_V1(aqo_data_remove); PG_FUNCTION_INFO_V1(aqo_reset); typedef enum { - QUERYID = 0, - EXEC_TIME_AQO, - EXEC_TIME, - PLAN_TIME_AQO, - PLAN_TIME, - EST_ERROR_AQO, - EST_ERROR, - NEXECS_AQO, - NEXECS, - TOTAL_NCOLS + QUERYID = 0, EXEC_TIME_AQO, EXEC_TIME, PLAN_TIME_AQO, PLAN_TIME, + EST_ERROR_AQO, EST_ERROR, NEXECS_AQO, NEXECS, TOTAL_NCOLS } aqo_stat_cols; typedef enum { - QT_QUERYID = 0, - QT_QUERY_STRING, - QT_TOTAL_NCOLS + QT_QUERYID = 0, QT_QUERY_STRING, QT_TOTAL_NCOLS } aqo_qtexts_cols; +typedef enum { + AD_FS = 0, AD_FSS, AD_NFEATURES, AD_FEATURES, AD_TARGETS, AD_RELIABILITY, + AD_OIDS, AD_TOTAL_NCOLS +} aqo_data_cols; + typedef void* (*form_record_t) (void *ctx, size_t *size); typedef void (*deform_record_t) (void *data, size_t size); @@ -929,11 +536,12 @@ bool aqo_use_file_storage; HTAB *stat_htab = NULL; HTAB *queries_htab = NULL; /* TODO */ -HTAB *data_htab = NULL; /* TODO */ HTAB *qtexts_htab = NULL; dsa_area *qtext_dsa = NULL; -/* TODO: think about how to keep query texts. */ + +HTAB *data_htab = NULL; +dsa_area *data_dsa = NULL; /* Used to check data file consistency */ static const uint32 PGAQO_FILE_HEADER = 123467589; @@ -943,7 +551,7 @@ static void dsa_init(void); static int data_store(const char *filename, form_record_t callback, long nrecs, void *ctx); static void data_load(const char *filename, deform_record_t callback, void *ctx); - +static size_t _compute_data_dsa(const DataEntry *entry); /* * Update AQO statistics. * @@ -1180,7 +788,7 @@ _form_qtext_record_cb(void *ctx, size_t *size) memcpy(ptr, &entry->queryid, sizeof(entry->queryid)); ptr += sizeof(entry->queryid); memcpy(ptr, query_string, strlen(query_string) + 1); - return memcpy(palloc(*size), data, *size); + return data; } void @@ -1209,6 +817,72 @@ aqo_qtexts_flush(void) LWLockRelease(&aqo_state->qtexts_lock); } +/* + * Getting a hash table iterator, return a newly allocated memory chunk and its + * size for subsequent writing into storage. + */ +static void * +_form_data_record_cb(void *ctx, size_t *size) +{ + HASH_SEQ_STATUS *hash_seq = (HASH_SEQ_STATUS *) ctx; + DataEntry *entry; + char *data; + char *ptr, + *dsa_ptr; + size_t sz; + + entry = hash_seq_search(hash_seq); + if (entry == NULL) + return NULL; + + /* Size of data is DataEntry (without DSA pointer) plus size of DSA chunk */ + sz = offsetof(DataEntry, data_dp) + _compute_data_dsa(entry); + ptr = data = palloc(sz); + + /* Put the data into the chunk */ + + /* Plane copy of all bytes of hash table entry */ + memcpy(ptr, entry, offsetof(DataEntry, data_dp)); + ptr += offsetof(DataEntry, data_dp); + + Assert(DsaPointerIsValid(entry->data_dp)); + dsa_ptr = (char *) dsa_get_address(data_dsa, entry->data_dp); + Assert((sz - (ptr - data)) == _compute_data_dsa(entry)); + memcpy(ptr, dsa_ptr, sz - (ptr - data)); + *size = sz; + return data; +} + +void +aqo_data_flush(void) +{ + HASH_SEQ_STATUS hash_seq; + int ret; + long entries; + + dsa_init(); + LWLockAcquire(&aqo_state->data_lock, LW_SHARED); + + if (!aqo_state->data_changed) + /* XXX: mull over forced mode. */ + goto end; + + entries = hash_get_num_entries(data_htab); + hash_seq_init(&hash_seq, data_htab); + ret = data_store(PGAQO_DATA_FILE, _form_data_record_cb, entries, + (void *) &hash_seq); + if (ret != 0) + /* + * Something happened and storing procedure hasn't finished walking + * along all records of the hash table. + */ + hash_seq_term(&hash_seq); + else + aqo_state->data_changed = false; +end: + LWLockRelease(&aqo_state->data_lock); +} + static int data_store(const char *filename, form_record_t callback, long nrecs, void *ctx) @@ -1248,13 +922,13 @@ data_store(const char *filename, form_record_t callback, (void) durable_rename(tmpfile, filename, LOG); pfree(tmpfile); - elog(DEBUG2, "[AQO] %d records stored in file %s.", counter, filename); + elog(LOG, "[AQO] %d records stored in file %s.", counter, filename); return 0; error: ereport(LOG, (errcode_for_file_access(), - errmsg("could not write file \"%s\": %m", tmpfile))); + errmsg("could not write AQO file \"%s\": %m", tmpfile))); if (file) FreeFile(file); @@ -1294,7 +968,6 @@ aqo_stat_load(void) LWLockRelease(&aqo_state->stat_lock); } - static void _deform_qtexts_record_cb(void *data, size_t size) { @@ -1343,6 +1016,51 @@ aqo_qtexts_load(void) } } +/* + * Getting a data chunk from a caller, add a record into the 'ML data' + * shmem hash table. Allocate and fill DSA chunk for variadic part of the data. + */ +static void +_deform_data_record_cb(void *data, size_t size) +{ + bool found; + DataEntry *fentry = (DataEntry *) data; /*Depends on a platform? */ + DataEntry *entry; + size_t sz; + char *ptr = (char *) data, + *dsa_ptr; + + Assert(LWLockHeldByMeInMode(&aqo_state->data_lock, LW_EXCLUSIVE)); + entry = (DataEntry *) hash_search(data_htab, &fentry->key, + HASH_ENTER, &found); + Assert(!found); + + /* Copy fixed-size part of entry byte-by-byte even with caves */ + memcpy(entry, fentry, offsetof(DataEntry, data_dp)); + ptr += offsetof(DataEntry, data_dp); + + sz = _compute_data_dsa(entry); + Assert(sz + offsetof(DataEntry, data_dp) == size); + entry->data_dp = dsa_allocate(data_dsa, sz); + Assert(DsaPointerIsValid(entry->data_dp)); + dsa_ptr = (char *) dsa_get_address(data_dsa, entry->data_dp); + memcpy(dsa_ptr, ptr, sz); +} + +void +aqo_data_load(void) +{ + Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + Assert(data_dsa != NULL); + + LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); + Assert(hash_get_num_entries(data_htab) == 0); + data_load(PGAQO_DATA_FILE, _deform_data_record_cb, NULL); + + aqo_state->data_changed = false; /* mem data is consistent with disk */ + LWLockRelease(&aqo_state->data_lock); +} + static void data_load(const char *filename, deform_record_t callback, void *ctx) { @@ -1385,7 +1103,7 @@ data_load(const char *filename, deform_record_t callback, void *ctx) FreeFile(file); unlink(filename); - elog(DEBUG2, "[AQO] %ld records loaded from file %s.", num, filename); + elog(LOG, "[AQO] %ld records loaded from file %s.", num, filename); return; read_error: @@ -1407,6 +1125,7 @@ static void on_shmem_shutdown(int code, Datum arg) { aqo_qtexts_flush(); + aqo_data_flush(); } /* @@ -1422,22 +1141,34 @@ dsa_init() if (qtext_dsa) return; + Assert(data_dsa == NULL && data_dsa == NULL); old_context = MemoryContextSwitchTo(TopMemoryContext); LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); if (aqo_state->qtexts_dsa_handler == DSM_HANDLE_INVALID) { + Assert(aqo_state->data_dsa_handler == DSM_HANDLE_INVALID); + qtext_dsa = dsa_create(aqo_state->qtext_trancheid); dsa_pin(qtext_dsa); aqo_state->qtexts_dsa_handler = dsa_get_handle(qtext_dsa); + data_dsa = dsa_create(aqo_state->data_trancheid); + dsa_pin(data_dsa); + aqo_state->data_dsa_handler = dsa_get_handle(data_dsa); + /* Load and initialize quuery texts hash table */ aqo_qtexts_load(); + aqo_data_load(); } else + { qtext_dsa = dsa_attach(aqo_state->qtexts_dsa_handler); + data_dsa = dsa_attach(aqo_state->data_dsa_handler); + } dsa_pin_mapping(qtext_dsa); + dsa_pin_mapping(data_dsa); MemoryContextSwitchTo(old_context); LWLockRelease(&aqo_state->lock); @@ -1607,6 +1338,426 @@ aqo_qtexts_reset(void) return num_remove; } +static size_t +_compute_data_dsa(const DataEntry *entry) +{ + size_t size = sizeof(data_key); /* header's size */ + + size += sizeof(double) * entry->rows * entry->cols; /* matrix */ + size += 2 * sizeof(double) * entry->rows; /* targets, rfactors */ + + /* Calculate memory size needed to store relation names */ + size += entry->nrels * sizeof(Oid); + return size; +} + +/* + * Insert new record or update existed in the AQO data storage. + * Return true if data was changed. + */ +bool +aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) +{ + DataEntry *entry; + bool found; + data_key key = {.fs = fs, .fss = fss}; + int i; + char *ptr; + ListCell *lc; + size_t size; + + Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + + dsa_init(); + + LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); + entry = (DataEntry *) hash_search(data_htab, &key, HASH_ENTER, &found); + + /* Initialize entry on first usage */ + if (!found) + { + entry->cols = data->cols; + entry->rows = data->rows; + entry->nrels = list_length(reloids); + + size = _compute_data_dsa(entry); + entry->data_dp = dsa_allocate0(data_dsa, size); + Assert(DsaPointerIsValid(entry->data_dp)); + } + + Assert(DsaPointerIsValid(entry->data_dp)); + Assert(entry->rows <= data->rows); /* Reserved for the future features */ + + if (entry->cols != data->cols || entry->nrels != list_length(reloids)) + { + /* Collision happened? */ + elog(LOG, "[AQO] Does a collision happened? Check it if possible (fs: %lu, fss: %d).", + fs, fss); + goto end; + } + + if (entry->rows < data->rows) + { + entry->rows = data->rows; + size = _compute_data_dsa(entry); + + /* Need to re-allocate DSA chunk */ + dsa_free(data_dsa, entry->data_dp); + entry->data_dp = dsa_allocate0(data_dsa, size); + Assert(DsaPointerIsValid(entry->data_dp)); + } + ptr = (char *) dsa_get_address(data_dsa, entry->data_dp); + + /* + * Copy AQO data into allocated DSA segment + */ + + memcpy(ptr, &key, sizeof(data_key)); /* Just for debug */ + ptr += sizeof(data_key); + if (entry->cols > 0) + { + for (i = 0; i < entry->rows; i++) + { + memcpy(ptr, data->matrix[i], sizeof(double) * data->cols); + ptr += sizeof(double) * data->cols; + } + } + /* copy targets into DSM storage */ + memcpy(ptr, data->targets, sizeof(double) * entry->rows); + ptr += sizeof(double) * entry->rows; + /* copy rfactors into DSM storage */ + memcpy(ptr, data->rfactors, sizeof(double) * entry->rows); + ptr += sizeof(double) * entry->rows; + /* store list of relations. XXX: optimize ? */ + foreach(lc, reloids) + { + Oid reloid = lfirst_oid(lc); + + memcpy(ptr, &reloid, sizeof(Oid)); + ptr += sizeof(Oid); + } + + aqo_state->data_changed = true; +end: + LWLockRelease(&aqo_state->data_lock); + return aqo_state->data_changed; +} + +static void +build_knn_matrix(OkNNrdata *data, const OkNNrdata *temp_data) +{ + Assert(data->cols == temp_data->cols); + + if (data->rows >= 0) + /* trivial strategy - use first suitable record and ignore others */ + return; + + memcpy(data, temp_data, sizeof(OkNNrdata)); + if (data->cols > 0) + { + int i; + + for (i = 0; i < data->rows; i++) + memcpy(data->matrix[i], temp_data->matrix[i], data->cols * sizeof(double)); + } +} + +static OkNNrdata * +_fill_knn_data(const DataEntry *entry, List **reloids) +{ + OkNNrdata *data; + char *ptr; + int i; + size_t offset; + size_t sz = _compute_data_dsa(entry); + + data = OkNNr_allocate(entry->cols); + data->rows = entry->rows; + + ptr = (char *) dsa_get_address(data_dsa, entry->data_dp); + + /* Check invariants */ + Assert(entry->rows < aqo_K); + Assert(ptr != NULL); + Assert(entry->key.fs == ((data_key *)ptr)->fs && + entry->key.fss == ((data_key *)ptr)->fss); + + ptr += sizeof(data_key); + + if (entry->cols > 0) + { + for (i = 0; i < entry->rows; i++) + { + memcpy(data->matrix[i], ptr, sizeof(double) * data->cols); + ptr += sizeof(double) * data->cols; + } + } + /* copy targets from DSM storage */ + memcpy(data->targets, ptr, sizeof(double) * entry->rows); + ptr += sizeof(double) * entry->rows; + offset = ptr - (char *) dsa_get_address(data_dsa, entry->data_dp); + Assert(offset < sz); + + /* copy rfactors from DSM storage */ + memcpy(data->rfactors, ptr, sizeof(double) * entry->rows); + ptr += sizeof(double) * entry->rows; + offset = ptr - (char *) dsa_get_address(data_dsa, entry->data_dp); + Assert(offset <= sz); + + if (reloids == NULL) + return data; + + /* store list of relations. XXX: optimize ? */ + for (i = 0; i < entry->nrels; i++) + { + *reloids = lappend_oid(*reloids, ObjectIdGetDatum(*(Oid*)ptr)); + ptr += sizeof(Oid); + } + Assert(ptr - (char *) dsa_get_address(data_dsa, entry->data_dp) == sz); + return data; +} + +/* + * Return on feature subspace, unique defined by its class (fs) and hash value + * (fss). + * If reloids is NULL, skip loading of this list. + * If wideSearch is true - make seqscan on the hash table to see for relevant + * data across neighbours. + */ +bool +load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, + bool wideSearch) +{ + DataEntry *entry; + bool found; + data_key key = {.fs = fs, .fss = fss}; + OkNNrdata *temp_data; + + Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + + dsa_init(); + + LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); + entry = (DataEntry *) hash_search(data_htab, &key, HASH_FIND, &found); + + if (!found) + goto end; + + /* One entry with all correctly filled fields is found */ + Assert(entry); + Assert(DsaPointerIsValid(entry->data_dp)); + + if (entry->cols != data->cols) + { + /* Collision happened? */ + elog(LOG, "[AQO] Does a collision happened? Check it if possible (fs: %lu, fss: %d).", + fs, fss); + found = false; + goto end; + } + + temp_data = _fill_knn_data(entry, reloids); + build_knn_matrix(data, temp_data); +end: + LWLockRelease(&aqo_state->data_lock); + + return found; +} + +Datum +aqo_data(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupDesc; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Tuplestorestate *tupstore; + Datum values[AD_TOTAL_NCOLS]; + bool nulls[AD_TOTAL_NCOLS]; + HASH_SEQ_STATUS hash_seq; + DataEntry *entry; + + Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* Switch into long-lived context to construct returned data structures */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + Assert(tupDesc->natts == AD_TOTAL_NCOLS); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupDesc; + + MemoryContextSwitchTo(oldcontext); + + dsa_init(); + memset(nulls, 0, AD_TOTAL_NCOLS); + LWLockAcquire(&aqo_state->data_lock, LW_SHARED); + hash_seq_init(&hash_seq, data_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + char *ptr; + + values[AD_FS] = Int64GetDatum(entry->key.fs); + values[AD_FSS] = Int64GetDatum(entry->key.fss); + values[AD_NFEATURES] = Int32GetDatum(entry->cols); + + /* Fill values from the DSA data chunk */ + Assert(DsaPointerIsValid(entry->data_dp)); + ptr = dsa_get_address(data_dsa, entry->data_dp); + Assert(entry->key.fs == ((data_key*)ptr)->fs && entry->key.fss == ((data_key*)ptr)->fss); + ptr += sizeof(data_key); + + if (entry->cols > 0) + values[AD_FEATURES] = PointerGetDatum(form_matrix((double *)ptr, entry->rows, entry->cols)); + else + nulls[AD_FEATURES] = true; + + ptr += sizeof(double) * entry->rows * entry->cols; + values[AD_TARGETS] = PointerGetDatum(form_vector((double *)ptr, entry->rows)); + ptr += sizeof(double) * entry->rows; + values[AD_RELIABILITY] = PointerGetDatum(form_vector((double *)ptr, entry->rows)); + ptr += sizeof(double) * entry->rows; + + if (entry->nrels > 0) + { + Datum *elems; + ArrayType *array; + int i; + + elems = palloc(sizeof(*elems) * entry->nrels); + for(i = 0; i < entry->nrels; i++) + elems[i] = ObjectIdGetDatum(*(Oid *)ptr); + + array = construct_array(elems, entry->nrels, OIDOID, + sizeof(Oid), true, TYPALIGN_INT); + values[AD_OIDS] = PointerGetDatum(array); + pfree(elems); + } + else + nulls[AD_OIDS] = true; + + tuplestore_putvalues(tupstore, tupDesc, values, nulls); + } + + LWLockRelease(&aqo_state->data_lock); + tuplestore_donestoring(tupstore); + return (Datum) 0; +} + +static long +_aqo_data_clean(uint64 fs) +{ + HASH_SEQ_STATUS hash_seq; + DataEntry *entry; + long removed = 0; + + Assert(LWLockHeldByMe(&aqo_state->data_lock)); + hash_seq_init(&hash_seq, data_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + if (entry->key.fs != fs) + continue; + + Assert(DsaPointerIsValid(entry->data_dp)); + dsa_free(data_dsa, entry->data_dp); + if (hash_search(data_htab, &entry->key, HASH_REMOVE, NULL) == NULL) + elog(ERROR, "[AQO] hash table corrupted"); + removed++; + } + + return removed; +} + +Datum +aqo_data_remove(PG_FUNCTION_ARGS) +{ + data_key key; + bool found; + DataEntry *entry; + + dsa_init(); + + Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); + + if (PG_ARGISNULL(1)) + { + /* Remove all feature subspaces from the space */ + found = (_aqo_data_clean((uint64) PG_GETARG_INT64(0)) > 0); + goto end; + } + + key.fs = (uint64) PG_GETARG_INT64(0); + key.fss = PG_GETARG_INT32(1); + + /* + * Look for a record with this queryid. DSA fields must be freed before + * deletion of the record. + */ + entry = (DataEntry *) hash_search(qtexts_htab, &key, HASH_FIND, &found); + if (!found) + goto end; + + /* Free DSA memory, allocated foro this record */ + Assert(DsaPointerIsValid(entry->data_dp)); + dsa_free(data_dsa, entry->data_dp); + + (void) hash_search(data_htab, &key, HASH_REMOVE, &found); + Assert(found); +end: + if (found) + aqo_state->data_changed = true; + LWLockRelease(&aqo_state->data_lock); + PG_RETURN_BOOL(found); +} + +static long +aqo_data_reset(void) +{ + HASH_SEQ_STATUS hash_seq; + DataEntry *entry; + long num_remove = 0; + long num_entries; + + dsa_init(); + + Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); + num_entries = hash_get_num_entries(data_htab); + hash_seq_init(&hash_seq, data_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + Assert(DsaPointerIsValid(entry->data_dp)); + dsa_free(data_dsa, entry->data_dp); + if (hash_search(data_htab, &entry->key, HASH_REMOVE, NULL) == NULL) + elog(ERROR, "[AQO] hash table corrupted"); + num_remove++; + } + aqo_state->data_changed = true; + LWLockRelease(&aqo_state->data_lock); + Assert(num_remove == num_entries); + + /* TODO: clean disk storage */ + + return num_remove; +} + Datum aqo_reset(PG_FUNCTION_ARGS) { @@ -1614,5 +1765,6 @@ aqo_reset(PG_FUNCTION_ARGS) counter += aqo_stat_reset(); counter += aqo_qtexts_reset(); + counter += aqo_data_reset(); PG_RETURN_INT64(counter); } diff --git a/storage.h b/storage.h index 34014e70..80a29ef2 100644 --- a/storage.h +++ b/storage.h @@ -1,9 +1,12 @@ #ifndef STORAGE_H #define STORAGE_H +#include "nodes/pg_list.h" #include "utils/array.h" #include "utils/dsa.h" /* Public structs have links to DSA memory blocks */ +#include "machine_learning.h" + #define STAT_SAMPLE_SIZE (20) /* @@ -41,10 +44,33 @@ typedef struct QueryTextEntry { uint64 queryid; - /* Link to DSA-allocated momory block. Can be shared across backends */ + /* Link to DSA-allocated memory block. Can be shared across backends */ dsa_pointer qtext_dp; } QueryTextEntry; +typedef struct data_key +{ + uint64 fs; + int64 fss; /* just for alignment */ +} data_key; + +typedef struct DataEntry +{ + data_key key; + + /* defines a size and data placement in the DSA memory block */ + int cols; /* aka nfeatures */ + int rows; /* aka number of equations */ + int nrels; + + /* + * Link to DSA-allocated memory block. Can be shared across backends. + * Contains: + * matrix[][], targets[], reliability[], oids. + */ + dsa_pointer data_dp; +} DataEntry; + extern bool aqo_use_file_storage; extern HTAB *stat_htab; @@ -60,6 +86,11 @@ extern void aqo_stat_load(void); extern bool aqo_qtext_store(uint64 queryid, const char *query_string); extern void aqo_qtexts_flush(void); extern void aqo_qtexts_load(void); + +extern bool aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids); +extern bool load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids); +extern void aqo_data_flush(void); +extern void aqo_data_load(void); /* Utility routines */ extern ArrayType *form_vector(double *vector, int nrows); diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index afe654e9..e73e23ec 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -229,7 +229,7 @@ my $pgb_fs_count = $node->safe_psql('postgres', " SELECT count(*) FROM aqo_queries WHERE fspace_hash IN ( - SELECT fspace_hash FROM aqo_data + SELECT fs FROM aqo_data WHERE $aoid = ANY(oids) OR $boid = ANY(oids) OR @@ -243,7 +243,7 @@ my $pgb_fs_samples_count = $node->safe_psql('postgres', " SELECT count(*) FROM aqo_query_texts WHERE queryid IN ( - SELECT fspace_hash FROM aqo_data + SELECT fs FROM aqo_data WHERE $aoid = ANY(oids) OR $boid = ANY(oids) OR @@ -258,7 +258,7 @@ my $pgb_stat_count = $node->safe_psql('postgres', " SELECT count(*) FROM aqo_query_stat WHERE queryid IN ( - SELECT fspace_hash FROM aqo_data + SELECT fs FROM aqo_data WHERE $aoid = ANY(oids) OR $boid = ANY(oids) OR From 346d9f502824f43e04a5c55bbbb1148a7f6dc62f Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Tue, 14 Jun 2022 11:43:16 +0300 Subject: [PATCH 097/203] Replace aqo_data table with shmem hash table + DSA + file storage. --- cardinality_estimation.c | 27 ++++++++++----------------- machine_learning.c | 27 +++++++++++++++++++++++++++ machine_learning.h | 3 +++ postprocessing.c | 18 ++++++------------ storage.h | 3 ++- 5 files changed, 48 insertions(+), 30 deletions(-) diff --git a/cardinality_estimation.c b/cardinality_estimation.c index 97799016..523b8e2e 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -64,8 +64,8 @@ predict_for_relation(List *clauses, List *selectivities, List *relsigns, { double *features; double result; - int i; - OkNNrdata data; + int ncols; + OkNNrdata *data; if (relsigns == NIL) /* @@ -75,14 +75,11 @@ predict_for_relation(List *clauses, List *selectivities, List *relsigns, return -4.; *fss = get_fss_for_object(relsigns, clauses, selectivities, - &data.cols, &features); + &ncols, &features); + data = OkNNr_allocate(ncols); - if (data.cols > 0) - for (i = 0; i < aqo_K; ++i) - data.matrix[i] = palloc0(sizeof(double) * data.cols); - - if (load_fss_ext(query_context.fspace_hash, *fss, &data, NULL, true)) - result = OkNNr_predict(&data, features); + if (load_fss_ext(query_context.fspace_hash, *fss, data, NULL, true)) + result = OkNNr_predict(data, features); else { /* @@ -93,25 +90,21 @@ predict_for_relation(List *clauses, List *selectivities, List *relsigns, */ /* Try to search in surrounding feature spaces for the same node */ - if (!load_fss(query_context.fspace_hash, *fss, &data, NULL, false)) + if (!load_aqo_data(query_context.fspace_hash, *fss, data, NULL, true)) result = -1; else { elog(DEBUG5, "[AQO] Make prediction for fss %d by a neighbour " "includes %d feature(s) and %d fact(s).", - *fss, data.cols, data.rows); - result = OkNNr_predict(&data, features); + *fss, data->cols, data->rows); + result = OkNNr_predict(data, features); } } #ifdef AQO_DEBUG_PRINT predict_debug_output(clauses, selectivities, relsigns, *fss, result); #endif pfree(features); - if (data.cols > 0) - { - for (i = 0; i < aqo_K; ++i) - pfree(data.matrix[i]); - } + OkNNr_free(data); if (result < 0) return -1; diff --git a/machine_learning.c b/machine_learning.c index 42dfb6f5..ca7fc6ef 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -41,6 +41,33 @@ static double fs_similarity(double dist); static double compute_weights(double *distances, int nrows, double *w, int *idx); +OkNNrdata* +OkNNr_allocate(int ncols) +{ + OkNNrdata *data = palloc(sizeof(OkNNrdata)); + int i; + + if (ncols > 0) + for (i = 0; i < aqo_K; ++i) + data->matrix[i] = palloc0(sizeof(double) * ncols); + + data->cols = ncols; + return data; +} + +void +OkNNr_free(OkNNrdata *data) +{ + int i; + + if (data->cols > 0) + { + for (i = 0; i < aqo_K; ++i) + pfree(data->matrix[i]); + } + pfree(data); +} + /* * Computes L2-distance between two given vectors. */ diff --git a/machine_learning.h b/machine_learning.h index a09b3102..b114cade 100644 --- a/machine_learning.h +++ b/machine_learning.h @@ -21,6 +21,9 @@ typedef struct OkNNrdata double rfactors[aqo_K]; } OkNNrdata; +extern OkNNrdata* OkNNr_allocate(int ncols); +extern void OkNNr_free(OkNNrdata *data); + /* Machine learning techniques */ extern double OkNNr_predict(OkNNrdata *data, double *features); extern int OkNNr_learn(OkNNrdata *data, diff --git a/postprocessing.c b/postprocessing.c index de2c077b..0a63cf1f 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -146,14 +146,13 @@ learn_sample(aqo_obj_stat *ctx, RelSortOut *rels, uint64 fs = query_context.fspace_hash; double *features; double target; - OkNNrdata data; + OkNNrdata *data; int fss; - int i; + int ncols; - memset(&data, 0, sizeof(OkNNrdata)); target = log(learned); fss = get_fss_for_object(rels->signatures, ctx->clauselist, - ctx->selectivities, &data.cols, &features); + ctx->selectivities, &ncols, &features); /* Only Agg nodes can have non-empty a grouping expressions list. */ Assert(!IsA(plan, Agg) || aqo_node->grouping_exprs != NIL); @@ -165,19 +164,14 @@ learn_sample(aqo_obj_stat *ctx, RelSortOut *rels, if (notExecuted && aqo_node->prediction > 0) return; - if (data.cols > 0) - for (i = 0; i < aqo_K; ++i) - data.matrix[i] = palloc(sizeof(double) * data.cols); + data = OkNNr_allocate(ncols); /* Critical section */ - atomic_fss_learn_step(fs, fss, &data, features, target, rfactor, + atomic_fss_learn_step(fs, fss, data, features, target, rfactor, rels->hrels, ctx->isTimedOut); /* End of critical section */ - if (data.cols > 0) - for (i = 0; i < aqo_K; ++i) - pfree(data.matrix[i]); - + OkNNr_free(data); pfree(features); } diff --git a/storage.h b/storage.h index 80a29ef2..ba2d671d 100644 --- a/storage.h +++ b/storage.h @@ -88,7 +88,8 @@ extern void aqo_qtexts_flush(void); extern void aqo_qtexts_load(void); extern bool aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids); -extern bool load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids); +extern bool load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, + bool wideSearch); extern void aqo_data_flush(void); extern void aqo_data_load(void); /* Utility routines */ From 723f9d12896d3acec7ea6f6de2465bccfd20f809 Mon Sep 17 00:00:00 2001 From: Andrey Kazarinov Date: Tue, 21 Jun 2022 15:43:30 +0300 Subject: [PATCH 098/203] file storage for aqo_queries [draft] --- aqo--1.4--1.5.sql | 103 +++--- aqo.h | 4 +- aqo_shared.c | 17 +- aqo_shared.h | 2 + auto_tuning.c | 4 +- expected/aqo_controlled.out | 18 +- expected/aqo_disabled.out | 19 +- expected/aqo_intelligent.out | 2 +- expected/aqo_learn.out | 4 +- expected/clean_aqo_data.out | 70 ++--- expected/forced_stat_collection.out | 2 +- expected/gucs.out | 10 +- expected/relocatable.out | 12 +- expected/temp_tables.out | 2 +- expected/top_queries.out | 10 +- preprocessing.c | 4 +- sql/aqo_controlled.sql | 6 +- sql/aqo_disabled.sql | 12 +- sql/aqo_intelligent.sql | 4 +- sql/aqo_learn.sql | 8 +- sql/clean_aqo_data.sql | 70 ++--- sql/forced_stat_collection.sql | 2 +- sql/gucs.sql | 2 +- sql/relocatable.sql | 12 +- sql/temp_tables.sql | 2 +- sql/top_queries.sql | 5 +- storage.c | 468 ++++++++++++++++++---------- storage.h | 14 + 28 files changed, 543 insertions(+), 345 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index 2e8f2391..c6dc056f 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -20,13 +20,19 @@ DROP TABLE public.aqo_queries CASCADE; DROP TABLE public.aqo_query_texts CASCADE; DROP TABLE public.aqo_query_stat CASCADE; -CREATE TABLE aqo_queries ( - query_hash bigint CONSTRAINT aqo_queries_query_hash_idx PRIMARY KEY, - learn_aqo boolean NOT NULL, - use_aqo boolean NOT NULL, - fspace_hash bigint NOT NULL, - auto_tuning boolean NOT NULL -); +CREATE FUNCTION aqo_queries ( + OUT queryid bigint, + OUT fspace_hash bigint, + OUT learn_aqo boolean, + OUT use_aqo boolean, + OUT auto_tuning boolean +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_queries' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; +CREATE FUNCTION aqo_queries_remove(queryid bigint) RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT PARALLEL SAFE; CREATE FUNCTION aqo_query_texts(OUT queryid bigint, OUT query_text text) RETURNS SETOF record @@ -79,17 +85,18 @@ LANGUAGE C PARALLEL SAFE; CREATE VIEW aqo_query_stat AS SELECT * FROM aqo_query_stat(); CREATE VIEW aqo_query_texts AS SELECT * FROM aqo_query_texts(); CREATE VIEW aqo_data AS SELECT * FROM aqo_data(); +CREATE VIEW aqo_queries AS SELECT * FROM aqo_queries(); CREATE FUNCTION aqo_stat_remove(fs bigint) RETURNS bool AS 'MODULE_PATHNAME' LANGUAGE C STRICT PARALLEL SAFE; -INSERT INTO aqo_queries VALUES (0, false, false, 0, false); +-- INSERT INTO aqo_queries VALUES (0, false, false, 0, false); -- a virtual query for COMMON feature space -CREATE TRIGGER aqo_queries_invalidate AFTER UPDATE OR DELETE OR TRUNCATE - ON aqo_queries FOR EACH STATEMENT - EXECUTE PROCEDURE invalidate_deactivated_queries_cache(); +--CREATE TRIGGER aqo_queries_invalidate AFTER UPDATE OR DELETE OR TRUNCATE +-- ON aqo_queries FOR EACH STATEMENT +-- EXECUTE PROCEDURE invalidate_deactivated_queries_cache(); -- -- Show execution time of queries, for which AQO has statistics. @@ -110,12 +117,12 @@ IF (controlled) THEN queryid, fs_hash, exectime, execs FROM ( SELECT - aq.query_hash AS queryid, + aq.queryid AS queryid, aq.fspace_hash AS fs_hash, execution_time_with_aqo[array_length(execution_time_with_aqo, 1)] AS exectime, executions_with_aqo AS execs FROM aqo_queries aq JOIN aqo_query_stat aqs - ON aq.query_hash = aqs.queryid + ON aq.queryid = aqs.queryid WHERE TRUE = ANY (SELECT unnest(execution_time_with_aqo) IS NOT NULL) ) AS q1 ORDER BY nn ASC; @@ -129,12 +136,12 @@ ELSE queryid, fs_hash, exectime, execs FROM ( SELECT - aq.query_hash AS queryid, + aq.queryid AS queryid, aq.fspace_hash AS fs_hash, (SELECT AVG(t) FROM unnest(execution_time_without_aqo) t) AS exectime, executions_without_aqo AS execs FROM aqo_queries aq JOIN aqo_query_stat aqs - ON aq.query_hash = aqs.queryid + ON aq.queryid = aqs.queryid WHERE TRUE = ANY (SELECT unnest(execution_time_without_aqo) IS NOT NULL) ) AS q1 ORDER BY (nn) ASC; @@ -148,32 +155,32 @@ COMMENT ON FUNCTION aqo_execution_time(boolean) IS -- -- Remove all information about a query class from AQO storage. -- -CREATE OR REPLACE FUNCTION aqo_drop_class(queryid bigint) +CREATE OR REPLACE FUNCTION aqo_drop_class(queryid_rm bigint) RETURNS integer AS $$ DECLARE lfs bigint; num integer; BEGIN - IF (queryid = 0) THEN - raise EXCEPTION '[AQO] Cannot remove basic class %.', queryid; + IF (queryid_rm = 0) THEN + raise EXCEPTION '[AQO] Cannot remove basic class %.', queryid_rm; END IF; - SELECT fspace_hash FROM aqo_queries WHERE (query_hash = queryid) INTO lfs; + SELECT fspace_hash FROM aqo_queries WHERE (queryid = queryid_rm) INTO lfs; IF (lfs IS NULL) THEN - raise WARNING '[AQO] Nothing to remove for the class %.', queryid; + raise WARNING '[AQO] Nothing to remove for the class %.', queryid_rm; RETURN 0; END IF; - IF (lfs <> queryid) THEN - raise WARNING '[AQO] Removing query class has non-generic feature space value: id = %, fs = %.', queryid, fs; + IF (lfs <> queryid_rm) THEN + raise WARNING '[AQO] Removing query class has non-generic feature space value: id = %, fs = %.', queryid_rm, fs; END IF; SELECT count(*) FROM aqo_data WHERE fs = lfs INTO num; - DELETE FROM aqo_queries WHERE query_hash = queryid; - PERFORM aqo_stat_remove(queryid); - PERFORM aqo_qtexts_remove(queryid); + PERFORM aqo_queries_remove(queryid_rm); + PERFORM aqo_stat_remove(queryid_rm); + PERFORM aqo_qtexts_remove(queryid_rm); PERFORM aqo_data_remove(lfs, NULL); RETURN num; END; @@ -211,7 +218,7 @@ BEGIN -- END IF; -- Remove ALL feature space if one of oids isn't exists - DELETE FROM aqo_queries WHERE fspace_hash = lfs; + PERFORM aqo_queries_remove(lfs); PERFORM aqo_stat_remove(lfs); PERFORM aqo_qtexts_remove(lfs); PERFORM aqo_data_remove(lfs, NULL); @@ -250,12 +257,12 @@ IF (controlled) THEN query_id, fs_hash, cerror, execs FROM ( SELECT - aq.query_hash AS query_id, + aq.queryid AS query_id, aq.fspace_hash AS fs_hash, cardinality_error_with_aqo[array_length(cardinality_error_with_aqo, 1)] AS cerror, executions_with_aqo AS execs FROM aqo_queries aq JOIN aqo_query_stat aqs - ON aq.query_hash = aqs.queryid + ON aq.queryid = aqs.queryid WHERE TRUE = ANY (SELECT unnest(cardinality_error_with_aqo) IS NOT NULL) ) AS q1 ORDER BY nn ASC; @@ -266,12 +273,12 @@ ELSE query_id, fs_hash, cerror, execs FROM ( SELECT - aq.query_hash AS query_id, + aq.queryid AS query_id, aq.fspace_hash AS fs_hash, (SELECT AVG(t) FROM unnest(cardinality_error_without_aqo) t) AS cerror, executions_without_aqo AS execs FROM aqo_queries aq JOIN aqo_query_stat aqs - ON aq.query_hash = aqs.queryid + ON aq.queryid = aqs.queryid WHERE TRUE = ANY (SELECT unnest(cardinality_error_without_aqo) IS NOT NULL) ) AS q1 ORDER BY (nn) ASC; @@ -289,17 +296,17 @@ COMMENT ON FUNCTION aqo_cardinality_error(boolean) IS -- class. -- Returns a number of deleted rows in the aqo_data table. -- -CREATE OR REPLACE FUNCTION aqo_reset_query(queryid bigint) +CREATE OR REPLACE FUNCTION aqo_reset_query(queryid_res bigint) RETURNS integer AS $$ DECLARE num integer; fs bigint; BEGIN - IF (queryid = 0) THEN + IF (queryid_res = 0) THEN raise WARNING '[AQO] Reset common feature space.' END IF; - SELECT fspace_hash FROM aqo_queries WHERE query_hash = queryid INTO fs; + SELECT fspace_hash FROM aqo_queries WHERE queryid = queryid_res INTO fs; SELECT count(*) FROM aqo_data WHERE fspace_hash = fs INTO num; DELETE FROM aqo_data WHERE fspace_hash = fs; RETURN num; @@ -338,18 +345,18 @@ FROM aqo_queries aq, aqo_query_stat aqs, execution_time_with_aqo AS n3, execution_time_without_aqo AS n4 FROM aqo_query_stat aqs WHERE - aqs.query_hash = $1) AS al) AS q -WHERE (aqs.query_hash = aq.query_hash) AND - aqs.query_hash = $1; + aqs.queryid = $1) AS al) AS q +WHERE (aqs.queryid = aq.queryid) AND + aqs.queryid = $1; $$ LANGUAGE SQL; -CREATE FUNCTION aqo_enable_query(hash bigint) +/* CREATE FUNCTION aqo_enable_query(hash bigint) RETURNS VOID AS $$ UPDATE aqo_queries SET learn_aqo = 'true', use_aqo = 'true' - WHERE query_hash = $1; -$$ LANGUAGE SQL; + WHERE queryid = $1; +$$ LANGUAGE SQL; CREATE FUNCTION aqo_disable_query(hash bigint) RETURNS VOID AS $$ @@ -357,5 +364,21 @@ UPDATE aqo_queries SET learn_aqo = 'false', use_aqo = 'false', auto_tuning = 'false' - WHERE query_hash = $1; + WHERE queryid = $1; $$ LANGUAGE SQL; +*/ + +CREATE FUNCTION aqo_enable_query(hash bigint) +RETURNS void +AS 'MODULE_PATHNAME', 'aqo_enable_query' +LANGUAGE C STRICT VOLATILE; + +CREATE FUNCTION aqo_disable_query(hash bigint) +RETURNS void +AS 'MODULE_PATHNAME', 'aqo_enable_query' +LANGUAGE C STRICT VOLATILE; + +CREATE FUNCTION aqo_queries_update(learn_aqo int, use_aqo int, auto_tuning int) +RETURNS void +AS 'MODULE_PATHNAME', 'aqo_queries_update' +LANGUAGE C STRICT VOLATILE; \ No newline at end of file diff --git a/aqo.h b/aqo.h index de7fae64..ece63736 100644 --- a/aqo.h +++ b/aqo.h @@ -252,9 +252,7 @@ int get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash); /* Storage interaction */ -extern bool find_query(uint64 qhash, QueryContextData *ctx); -extern bool update_query(uint64 qhash, uint64 fhash, - bool learn_aqo, bool use_aqo, bool auto_tuning); +extern bool file_find_query(uint64 queryid); extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool isSafe); extern bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, diff --git a/aqo_shared.c b/aqo_shared.c index b9a802e1..819b585b 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -183,6 +183,7 @@ aqo_init_shmem(void) stat_htab = NULL; qtexts_htab = NULL; data_htab = NULL; + queries_htab = NULL; LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); aqo_state = ShmemInitStruct("AQO", sizeof(AQOSharedState), &found); @@ -203,6 +204,7 @@ aqo_init_shmem(void) LWLockInitialize(&aqo_state->stat_lock, LWLockNewTrancheId()); LWLockInitialize(&aqo_state->qtexts_lock, LWLockNewTrancheId()); LWLockInitialize(&aqo_state->data_lock, LWLockNewTrancheId()); + LWLockInitialize(&aqo_state->queries_lock, LWLockNewTrancheId()); } info.keysize = sizeof(htab_key); @@ -232,6 +234,13 @@ aqo_init_shmem(void) fss_max_items, fss_max_items, &info, HASH_ELEM | HASH_BLOBS); + /* Shared memory hash table for queries */ + info.keysize = sizeof(((QueriesEntry *) 0)->queryid); + info.entrysize = sizeof(QueriesEntry); + queries_htab = ShmemInitHash("AQO Queries HTAB", + fs_max_items, fs_max_items, + &info, HASH_ELEM | HASH_BLOBS); + LWLockRelease(AddinShmemInitLock); LWLockRegisterTranche(aqo_state->lock.tranche, "AQO"); LWLockRegisterTranche(aqo_state->stat_lock.tranche, "AQO Stat Lock Tranche"); @@ -239,11 +248,15 @@ aqo_init_shmem(void) LWLockRegisterTranche(aqo_state->qtext_trancheid, "AQO Query Texts Tranche"); LWLockRegisterTranche(aqo_state->data_lock.tranche, "AQO Data Lock Tranche"); LWLockRegisterTranche(aqo_state->data_trancheid, "AQO Data Tranche"); + LWLockRegisterTranche(aqo_state->queries_lock.tranche, "AQO Queries Lock Tranche"); if (!IsUnderPostmaster) { before_shmem_exit(on_shmem_shutdown, (Datum) 0); - aqo_stat_load(); /* Doesn't use DSA, so can be loaded in postmaster */ + + /* Doesn't use DSA, so can be loaded in postmaster */ + aqo_stat_load(); + aqo_queries_load(); } } @@ -254,6 +267,7 @@ static void on_shmem_shutdown(int code, Datum arg) { aqo_stat_flush(); + aqo_queries_flush(); } Size @@ -267,6 +281,7 @@ aqo_memsize(void) size = add_size(size, hash_estimate_size(fs_max_items, sizeof(StatEntry))); size = add_size(size, hash_estimate_size(fs_max_items, sizeof(QueryTextEntry))); size = add_size(size, hash_estimate_size(fss_max_items, sizeof(DataEntry))); + size = add_size(size, hash_estimate_size(fs_max_items, sizeof(QueriesEntry))); return size; } diff --git a/aqo_shared.h b/aqo_shared.h index bf03648b..242322ab 100644 --- a/aqo_shared.h +++ b/aqo_shared.h @@ -40,6 +40,8 @@ typedef struct AQOSharedState dsa_handle data_dsa_handler; int data_trancheid; bool data_changed; + + LWLock queries_lock; /* lock for access to queries storage */ } AQOSharedState; diff --git a/auto_tuning.c b/auto_tuning.c index c2031932..2a713bfc 100644 --- a/auto_tuning.c +++ b/auto_tuning.c @@ -200,11 +200,11 @@ automatical_query_tuning(uint64 qhash, StatEntry *stat) } if (num_iterations <= auto_tuning_max_iterations || p_use > 0.5) - update_query(qhash, + aqo_queries_store(qhash, query_context.fspace_hash, query_context.learn_aqo, query_context.use_aqo, true); else - update_query(qhash, query_context.fspace_hash, false, false, false); + aqo_queries_store(qhash, query_context.fspace_hash, false, false, false); } diff --git a/expected/aqo_controlled.out b/expected/aqo_controlled.out index b7b33aa9..956a5441 100644 --- a/expected/aqo_controlled.out +++ b/expected/aqo_controlled.out @@ -107,9 +107,12 @@ SELECT count(*) FROM tmp1; DROP TABLE tmp1; SET aqo.mode = 'controlled'; -UPDATE aqo_queries SET auto_tuning=false; -UPDATE aqo_queries SET learn_aqo=true; -UPDATE aqo_queries SET use_aqo=false; +SELECT aqo_queries_update(1, 0, 0); + aqo_queries_update +-------------------- + +(1 row) + EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -191,7 +194,12 @@ WHERE t1.a = t2.b AND t2.a = t3.b; -> Seq Scan on aqo_test1 t3 (9 rows) -UPDATE aqo_queries SET use_aqo=true; +SELECT aqo_queries_update(2, 1, 2); + aqo_queries_update +-------------------- + +(1 row) + EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -301,7 +309,7 @@ DROP TABLE aqo_test2; SELECT aqo_reset(); aqo_reset ----------- - 50 + 61 (1 row) DROP EXTENSION aqo; diff --git a/expected/aqo_disabled.out b/expected/aqo_disabled.out index 6fa67fc0..4b8a43fa 100644 --- a/expected/aqo_disabled.out +++ b/expected/aqo_disabled.out @@ -64,7 +64,7 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) -SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero count ------- 0 @@ -116,7 +116,7 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) -SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero count ------- 0 @@ -142,14 +142,19 @@ SELECT count(*) FROM tmp1; (1 row) DROP TABLE tmp1; -SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero count ------- 0 (1 row) SET aqo.mode = 'controlled'; -UPDATE aqo_queries SET learn_aqo = true, use_aqo = true, auto_tuning = false; +SELECT aqo_queries_update(1, 1, 0); + aqo_queries_update +-------------------- + +(1 row) + EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; QUERY PLAN @@ -176,7 +181,7 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) -SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero count ------- 0 @@ -209,7 +214,7 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) -SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero count ------- 0 @@ -219,7 +224,7 @@ SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be z SELECT aqo_reset(); aqo_reset ----------- - 18 + 23 (1 row) DROP EXTENSION aqo; diff --git a/expected/aqo_intelligent.out b/expected/aqo_intelligent.out index aff0d16e..1870ca01 100644 --- a/expected/aqo_intelligent.out +++ b/expected/aqo_intelligent.out @@ -507,7 +507,7 @@ DROP TABLE aqo_test1; SELECT aqo_reset(); aqo_reset ----------- - 103 + 127 (1 row) DROP EXTENSION aqo; diff --git a/expected/aqo_learn.out b/expected/aqo_learn.out index 718fbe0a..07ee6a1e 100644 --- a/expected/aqo_learn.out +++ b/expected/aqo_learn.out @@ -480,8 +480,8 @@ WARNING: [AQO] Nothing to remove for the class 42. -- Remove all data from ML knowledge base SELECT count(*) FROM ( SELECT aqo_drop_class(q1.id::bigint) FROM ( - SELECT query_hash AS id - FROM aqo_queries WHERE query_hash <> 0) AS q1 + SELECT queryid AS id + FROM aqo_queries WHERE queryid <> 0) AS q1 ) AS q2; count ------- diff --git a/expected/clean_aqo_data.out b/expected/clean_aqo_data.out index 07ae3854..aa2eaa7e 100644 --- a/expected/clean_aqo_data.out +++ b/expected/clean_aqo_data.out @@ -20,8 +20,8 @@ SELECT aqo_cleanup(); /* * lines with a_oid in aqo_data, * lines with fspace_hash corresponding to a_oid in aqo_queries, - * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_texts, - * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat + * lines with queryid corresponding to a_oid's fspace_hash in aqo_query_texts, + * lines with queryid corresponding to a_oid's fspace_hash in aqo_query_stat * should remain */ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); @@ -38,7 +38,7 @@ SELECT count(*) FROM aqo_queries WHERE (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- @@ -46,7 +46,7 @@ SELECT count(*) FROM aqo_query_texts WHERE (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- @@ -62,9 +62,9 @@ SELECT aqo_cleanup(); /* * lines with a_oid in aqo_data, - * lines with a_oid's fspace_hash EQUAL TO query_hash in aqo_queries, - * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_texts, - * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat, + * lines with a_oid's fspace_hash EQUAL TO queryid in aqo_queries, + * lines with queryid corresponding to a_oid's fspace_hash in aqo_query_texts, + * lines with queryid corresponding to a_oid's fspace_hash in aqo_query_stat, * should be deleted */ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); @@ -75,25 +75,25 @@ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash; + aqo_queries.fspace_hash = aqo_queries.queryid; count ------- 0 (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_queries.fspace_hash = aqo_queries.queryid); count ------- 0 (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_queries.fspace_hash = aqo_queries.queryid); count ------- 0 @@ -105,7 +105,7 @@ SELECT * FROM a; (0 rows) SELECT 'a'::regclass::oid AS a_oid \gset --- add manually line with different fspace_hash and query_hash to aqo_queries +-- add manually line with different fspace_hash and queryid to aqo_queries INSERT INTO aqo_queries VALUES (:a_oid + 1, 't', 't', :a_oid, 'f'); DROP TABLE a; SELECT aqo_cleanup(); @@ -115,7 +115,7 @@ SELECT aqo_cleanup(); (1 row) -- this line should remain -SELECT count(*) FROM aqo_queries WHERE (fspace_hash = :a_oid AND query_hash = :a_oid + 1); +SELECT count(*) FROM aqo_queries WHERE (fspace_hash = :a_oid AND queryid = :a_oid + 1); count ------- 1 @@ -152,7 +152,7 @@ SELECT count(*) FROM aqo_queries WHERE (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- @@ -160,7 +160,7 @@ SELECT count(*) FROM aqo_query_texts WHERE (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- @@ -181,7 +181,7 @@ SELECT count(*) FROM aqo_queries WHERE (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); count ------- @@ -189,7 +189,7 @@ SELECT count(*) FROM aqo_query_texts WHERE (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); count ------- @@ -206,8 +206,8 @@ SELECT aqo_cleanup(); /* * lines corresponding to a_oid and both a_oid's fspace_hash deleted in aqo_data, * lines with fspace_hash corresponding to a_oid deleted in aqo_queries, - * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_texts, - * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_stat, + * lines with queryid corresponding to a_oid's fspace_hash deleted in aqo_query_texts, + * lines with queryid corresponding to a_oid's fspace_hash deleted in aqo_query_stat, */ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); count @@ -217,25 +217,25 @@ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash; + aqo_queries.fspace_hash = aqo_queries.queryid; count ------- 0 (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_queries.fspace_hash = aqo_queries.queryid); count ------- 0 (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_queries.fspace_hash = aqo_queries.queryid); count ------- 0 @@ -250,25 +250,25 @@ SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash; + aqo_queries.fspace_hash = aqo_queries.queryid; count ------- 1 (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_queries.fspace_hash = aqo_queries.queryid); count ------- 1 (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_queries.fspace_hash = aqo_queries.queryid); count ------- 1 @@ -290,25 +290,25 @@ SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash; + aqo_queries.fspace_hash = aqo_queries.queryid; count ------- 0 (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_queries.fspace_hash = aqo_queries.queryid); count ------- 0 (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_queries.fspace_hash = aqo_queries.queryid); count ------- 0 diff --git a/expected/forced_stat_collection.out b/expected/forced_stat_collection.out index 43030489..ec5ba020 100644 --- a/expected/forced_stat_collection.out +++ b/expected/forced_stat_collection.out @@ -39,7 +39,7 @@ SELECT * FROM aqo_data; SELECT learn_aqo,use_aqo,auto_tuning,cardinality_error_without_aqo ce,executions_without_aqo nex FROM aqo_queries AS aq JOIN aqo_query_stat AS aqs -ON aq.query_hash = aqs.queryid; +ON aq.queryid = aqs.queryid; learn_aqo | use_aqo | auto_tuning | ce | nex -----------+---------+-------------+----------------------+----- f | f | f | {0.8637762840285226} | 1 diff --git a/expected/gucs.out b/expected/gucs.out index 40c177c9..7f74f527 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -8,7 +8,7 @@ ANALYZE t; SELECT * FROM aqo_reset(); -- Remember! DROP EXTENSION doesn't remove any AQO data gathered. aqo_reset ----------- - 19 + 25 (1 row) -- Check AQO addons to explain (the only stable data) @@ -92,7 +92,7 @@ SELECT obj_description('aqo_reset'::regproc::oid); List of functions Schema | Name | Result data type | Argument data types | Type --------+----------------+------------------+---------------------+------ - public | aqo_drop_class | integer | queryid bigint | func + public | aqo_drop_class | integer | queryid_rm bigint | func (1 row) \df aqo_cleanup @@ -106,7 +106,7 @@ SELECT obj_description('aqo_reset'::regproc::oid); List of functions Schema | Name | Result data type | Argument data types | Type --------+-----------------+------------------+---------------------+------ - public | aqo_reset_query | integer | queryid bigint | func + public | aqo_reset_query | integer | queryid_res bigint | func (1 row) \df aqo_reset @@ -123,10 +123,10 @@ SELECT count(*) FROM aqo_query_stat; 1 (1 row) -SELECT * FROM aqo_reset(); -- Remove one record from texts and one from stat +SELECT * FROM aqo_reset(); -- Remove one record from all tables aqo_reset ----------- - 3 + 4 (1 row) SELECT count(*) FROM aqo_query_stat; diff --git a/expected/relocatable.out b/expected/relocatable.out index 4658e75d..98b53217 100644 --- a/expected/relocatable.out +++ b/expected/relocatable.out @@ -10,8 +10,8 @@ SELECT count(*) FROM test; 100 (1 row) -SELECT query_text,learn_aqo, use_aqo, auto_tuning -FROM aqo_query_texts JOIN aqo_queries ON (queryid = query_hash) +SELECT query_text, learn_aqo, use_aqo, auto_tuning +FROM aqo_query_texts aqt JOIN aqo_queries aq ON (aqt.queryid = aq.queryid) ; -- Check result. TODO: use aqo_status() query_text | learn_aqo | use_aqo | auto_tuning ---------------------------------------+-----------+---------+------------- @@ -36,7 +36,7 @@ SELECT count(*) FROM test WHERE id < 10; (1 row) SELECT query_text, learn_aqo, use_aqo, auto_tuning -FROM test.aqo_query_texts JOIN test.aqo_queries ON (queryid = query_hash) +FROM test.aqo_query_texts aqt JOIN test.aqo_queries aq ON (aqt.queryid = aq.queryid) ORDER BY (md5(query_text)) ; -- Check result. TODO: We want to find here both queries executed above query_text | learn_aqo | use_aqo | auto_tuning @@ -65,7 +65,7 @@ SELECT count(*) FROM test WHERE id < 10; (1 row) SELECT query_text, learn_aqo, use_aqo, auto_tuning -FROM test.aqo_query_texts JOIN test.aqo_queries ON (queryid = query_hash) +FROM test.aqo_query_texts aqt JOIN test.aqo_queries aq ON (aqt.queryid = aq.queryid) ORDER BY (md5(query_text)) ; -- Check result. query_text | learn_aqo | use_aqo | auto_tuning @@ -79,7 +79,7 @@ ORDER BY (md5(query_text)) * Below, we should check each UI function */ SELECT aqo_disable_query(id) FROM ( - SELECT query_hash AS id FROM aqo_queries WHERE query_hash <> 0) AS q1; + SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; aqo_disable_query ------------------- @@ -95,7 +95,7 @@ SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries; (3 rows) SELECT aqo_enable_query(id) FROM ( - SELECT query_hash AS id FROM aqo_queries WHERE query_hash <> 0) AS q1; + SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; aqo_enable_query ------------------ diff --git a/expected/temp_tables.out b/expected/temp_tables.out index 6d9d1b73..79de6284 100644 --- a/expected/temp_tables.out +++ b/expected/temp_tables.out @@ -73,7 +73,7 @@ SELECT count(*) FROM aqo_data; -- Should be 0 (1 row) SELECT query_text FROM aqo_queries aq LEFT JOIN aqo_query_texts aqt -ON aq.query_hash = aqt.queryid +ON aq.queryid = aqt.queryid ORDER BY (md5(query_text)); -- TODO: should contain just one row query_text ------------------------------------------ diff --git a/expected/top_queries.out b/expected/top_queries.out index cc5592df..13e9cfa2 100644 --- a/expected/top_queries.out +++ b/expected/top_queries.out @@ -68,7 +68,7 @@ SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y SELECT num, to_char(error, '9.99EEEE') FROM aqo_cardinality_error(false) AS te WHERE te.fshash = ( SELECT fspace_hash FROM aqo_queries - WHERE aqo_queries.query_hash = ( + WHERE aqo_queries.queryid = ( SELECT aqo_query_texts.queryid FROM aqo_query_texts WHERE query_text = 'SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1;' ) @@ -98,4 +98,10 @@ ORDER BY (md5(query_text)); SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; | 1 (4 rows) -DROP EXTENSION aqo CASCADE; +SELECT aqo_reset(); + aqo_reset +----------- + 23 +(1 row) + +DROP EXTENSION aqo; diff --git a/preprocessing.c b/preprocessing.c index 43312e80..37913712 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -205,7 +205,7 @@ aqo_planner(Query *parse, goto ignore_query_settings; } - query_is_stored = find_query(query_context.query_hash, &query_context); + query_is_stored = file_find_query(query_context.query_hash); if (!query_is_stored) { @@ -317,7 +317,7 @@ aqo_planner(Query *parse, * concurrent addition from another backend we will try to restart * preprocessing routine. */ - update_query(query_context.query_hash, query_context.fspace_hash, + aqo_queries_store(query_context.query_hash, query_context.fspace_hash, query_context.learn_aqo, query_context.use_aqo, query_context.auto_tuning); diff --git a/sql/aqo_controlled.sql b/sql/aqo_controlled.sql index c337c702..959dd82a 100644 --- a/sql/aqo_controlled.sql +++ b/sql/aqo_controlled.sql @@ -77,10 +77,8 @@ SELECT count(*) FROM tmp1; DROP TABLE tmp1; SET aqo.mode = 'controlled'; -UPDATE aqo_queries SET auto_tuning=false; -UPDATE aqo_queries SET learn_aqo=true; -UPDATE aqo_queries SET use_aqo=false; +SELECT aqo_queries_update(1, 0, 0); EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 @@ -108,7 +106,7 @@ SELECT t1.a AS a, t2.a AS b, t3.a AS c FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 WHERE t1.a = t2.b AND t2.a = t3.b; -UPDATE aqo_queries SET use_aqo=true; +SELECT aqo_queries_update(2, 1, 2); EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 diff --git a/sql/aqo_disabled.sql b/sql/aqo_disabled.sql index 8c8e487c..3bf7a47b 100644 --- a/sql/aqo_disabled.sql +++ b/sql/aqo_disabled.sql @@ -36,7 +36,7 @@ WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; -SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero SET aqo.mode = 'disabled'; @@ -58,7 +58,7 @@ EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; -SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero SET aqo.mode = 'intelligent'; CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 @@ -72,10 +72,10 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = SELECT count(*) FROM tmp1; DROP TABLE tmp1; -SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero SET aqo.mode = 'controlled'; -UPDATE aqo_queries SET learn_aqo = true, use_aqo = true, auto_tuning = false; +SELECT aqo_queries_update(1, 1, 0); EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -84,7 +84,7 @@ EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; -SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero SET aqo.mode = 'disabled'; EXPLAIN SELECT * FROM aqo_test0 @@ -93,7 +93,7 @@ WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; -SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero -- XXX: extension dropping doesn't clear file storage. Do it manually. SELECT aqo_reset(); diff --git a/sql/aqo_intelligent.sql b/sql/aqo_intelligent.sql index 028ce936..a3bce4f2 100644 --- a/sql/aqo_intelligent.sql +++ b/sql/aqo_intelligent.sql @@ -145,7 +145,7 @@ DROP TABLE tmp1; SET aqo.mode = 'controlled'; -UPDATE aqo_queries SET learn_aqo = false, use_aqo = false, auto_tuning = false; +SELECT aqo_queries_update(0, 0, 0); EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -174,7 +174,7 @@ EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; -UPDATE aqo_queries SET learn_aqo = false, use_aqo = true, auto_tuning = false; +SELECT aqo_queries_update(0, 1, 0); EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; diff --git a/sql/aqo_learn.sql b/sql/aqo_learn.sql index db461f50..6256d2d7 100644 --- a/sql/aqo_learn.sql +++ b/sql/aqo_learn.sql @@ -141,7 +141,7 @@ DROP TABLE tmp1; SET aqo.mode = 'controlled'; -UPDATE aqo_queries SET learn_aqo = false, use_aqo = false, auto_tuning = false; +SELECT aqo_queries_update(0, 0, 0); EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -172,7 +172,7 @@ EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; -UPDATE aqo_queries SET learn_aqo = false, use_aqo = true, auto_tuning = false; +SELECT aqo_queries_update(0, 1, 0); EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -214,8 +214,8 @@ SELECT * FROM aqo_drop_class(42); -- Remove all data from ML knowledge base SELECT count(*) FROM ( SELECT aqo_drop_class(q1.id::bigint) FROM ( - SELECT query_hash AS id - FROM aqo_queries WHERE query_hash <> 0) AS q1 + SELECT queryid AS id + FROM aqo_queries WHERE queryid <> 0) AS q1 ) AS q2; SELECT count(*) FROM aqo_data; diff --git a/sql/clean_aqo_data.sql b/sql/clean_aqo_data.sql index e02bf806..1fc4374e 100644 --- a/sql/clean_aqo_data.sql +++ b/sql/clean_aqo_data.sql @@ -12,18 +12,18 @@ SELECT aqo_cleanup(); /* * lines with a_oid in aqo_data, * lines with fspace_hash corresponding to a_oid in aqo_queries, - * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_texts, - * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat + * lines with queryid corresponding to a_oid's fspace_hash in aqo_query_texts, + * lines with queryid corresponding to a_oid's fspace_hash in aqo_query_stat * should remain */ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); DROP TABLE a; @@ -31,33 +31,33 @@ SELECT aqo_cleanup(); /* * lines with a_oid in aqo_data, - * lines with a_oid's fspace_hash EQUAL TO query_hash in aqo_queries, - * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_texts, - * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat, + * lines with a_oid's fspace_hash EQUAL TO queryid in aqo_queries, + * lines with queryid corresponding to a_oid's fspace_hash in aqo_query_texts, + * lines with queryid corresponding to a_oid's fspace_hash in aqo_query_stat, * should be deleted */ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash; + aqo_queries.fspace_hash = aqo_queries.queryid; SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_queries.fspace_hash = aqo_queries.queryid); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_queries.fspace_hash = aqo_queries.queryid); CREATE TABLE a(); SELECT * FROM a; SELECT 'a'::regclass::oid AS a_oid \gset --- add manually line with different fspace_hash and query_hash to aqo_queries +-- add manually line with different fspace_hash and queryid to aqo_queries INSERT INTO aqo_queries VALUES (:a_oid + 1, 't', 't', :a_oid, 'f'); DROP TABLE a; SELECT aqo_cleanup(); -- this line should remain -SELECT count(*) FROM aqo_queries WHERE (fspace_hash = :a_oid AND query_hash = :a_oid + 1); +SELECT count(*) FROM aqo_queries WHERE (fspace_hash = :a_oid AND queryid = :a_oid + 1); CREATE TABLE a(); CREATE TABLE b(); @@ -72,20 +72,20 @@ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); DROP TABLE a; @@ -94,35 +94,35 @@ SELECT aqo_cleanup(); /* * lines corresponding to a_oid and both a_oid's fspace_hash deleted in aqo_data, * lines with fspace_hash corresponding to a_oid deleted in aqo_queries, - * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_texts, - * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_stat, + * lines with queryid corresponding to a_oid's fspace_hash deleted in aqo_query_texts, + * lines with queryid corresponding to a_oid's fspace_hash deleted in aqo_query_stat, */ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash; + aqo_queries.fspace_hash = aqo_queries.queryid; SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_queries.fspace_hash = aqo_queries.queryid); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_queries.fspace_hash = aqo_queries.queryid); -- lines corresponding to b_oid in all theese tables should remain SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash; + aqo_queries.fspace_hash = aqo_queries.queryid; SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_queries.fspace_hash = aqo_queries.queryid); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_queries.fspace_hash = aqo_queries.queryid); DROP TABLE b; SELECT aqo_cleanup(); @@ -131,14 +131,14 @@ SELECT aqo_cleanup(); SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash; + aqo_queries.fspace_hash = aqo_queries.queryid; SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_queries.fspace_hash = aqo_queries.queryid); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_queries.fspace_hash = aqo_queries.queryid); DROP EXTENSION aqo; \ No newline at end of file diff --git a/sql/forced_stat_collection.sql b/sql/forced_stat_collection.sql index 3b4ce55d..a3a63685 100644 --- a/sql/forced_stat_collection.sql +++ b/sql/forced_stat_collection.sql @@ -31,7 +31,7 @@ SELECT * FROM aqo_data; SELECT learn_aqo,use_aqo,auto_tuning,cardinality_error_without_aqo ce,executions_without_aqo nex FROM aqo_queries AS aq JOIN aqo_query_stat AS aqs -ON aq.query_hash = aqs.queryid; +ON aq.queryid = aqs.queryid; SELECT query_text FROM aqo_query_texts ORDER BY (md5(query_text)); diff --git a/sql/gucs.sql b/sql/gucs.sql index fe2c4d17..1dba9c6c 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -33,7 +33,7 @@ SELECT obj_description('aqo_reset'::regproc::oid); -- Check stat reset SELECT count(*) FROM aqo_query_stat; -SELECT * FROM aqo_reset(); -- Remove one record from texts and one from stat +SELECT * FROM aqo_reset(); -- Remove one record from all tables SELECT count(*) FROM aqo_query_stat; DROP EXTENSION aqo; diff --git a/sql/relocatable.sql b/sql/relocatable.sql index 51facc66..18a31643 100644 --- a/sql/relocatable.sql +++ b/sql/relocatable.sql @@ -7,8 +7,8 @@ ANALYZE test; -- Learn on a query SELECT count(*) FROM test; -SELECT query_text,learn_aqo, use_aqo, auto_tuning -FROM aqo_query_texts JOIN aqo_queries ON (queryid = query_hash) +SELECT query_text, learn_aqo, use_aqo, auto_tuning +FROM aqo_query_texts aqt JOIN aqo_queries aq ON (aqt.queryid = aq.queryid) ; -- Check result. TODO: use aqo_status() -- Create a schema and move AQO into it. @@ -20,7 +20,7 @@ SELECT count(*) FROM test; SELECT count(*) FROM test WHERE id < 10; SELECT query_text, learn_aqo, use_aqo, auto_tuning -FROM test.aqo_query_texts JOIN test.aqo_queries ON (queryid = query_hash) +FROM test.aqo_query_texts aqt JOIN test.aqo_queries aq ON (aqt.queryid = aq.queryid) ORDER BY (md5(query_text)) ; -- Check result. TODO: We want to find here both queries executed above @@ -31,7 +31,7 @@ SELECT count(*) FROM test; SELECT count(*) FROM test WHERE id < 10; SELECT query_text, learn_aqo, use_aqo, auto_tuning -FROM test.aqo_query_texts JOIN test.aqo_queries ON (queryid = query_hash) +FROM test.aqo_query_texts aqt JOIN test.aqo_queries aq ON (aqt.queryid = aq.queryid) ORDER BY (md5(query_text)) ; -- Check result. @@ -39,10 +39,10 @@ ORDER BY (md5(query_text)) * Below, we should check each UI function */ SELECT aqo_disable_query(id) FROM ( - SELECT query_hash AS id FROM aqo_queries WHERE query_hash <> 0) AS q1; + SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries; SELECT aqo_enable_query(id) FROM ( - SELECT query_hash AS id FROM aqo_queries WHERE query_hash <> 0) AS q1; + SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries; RESET search_path; diff --git a/sql/temp_tables.sql b/sql/temp_tables.sql index 04db87a1..97b1e628 100644 --- a/sql/temp_tables.sql +++ b/sql/temp_tables.sql @@ -23,7 +23,7 @@ DROP TABLE pt; SELECT aqo_cleanup(); SELECT count(*) FROM aqo_data; -- Should be 0 SELECT query_text FROM aqo_queries aq LEFT JOIN aqo_query_texts aqt -ON aq.query_hash = aqt.queryid +ON aq.queryid = aqt.queryid ORDER BY (md5(query_text)); -- TODO: should contain just one row -- Test learning on temporary table diff --git a/sql/top_queries.sql b/sql/top_queries.sql index 98b27846..bf3c9f60 100755 --- a/sql/top_queries.sql +++ b/sql/top_queries.sql @@ -36,7 +36,7 @@ SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y SELECT num, to_char(error, '9.99EEEE') FROM aqo_cardinality_error(false) AS te WHERE te.fshash = ( SELECT fspace_hash FROM aqo_queries - WHERE aqo_queries.query_hash = ( + WHERE aqo_queries.queryid = ( SELECT aqo_query_texts.queryid FROM aqo_query_texts WHERE query_text = 'SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1;' ) @@ -51,4 +51,5 @@ FROM aqo_cardinality_error(false) ce, aqo_query_texts aqt WHERE ce.id = aqt.queryid ORDER BY (md5(query_text)); -DROP EXTENSION aqo CASCADE; +SELECT aqo_reset(); +DROP EXTENSION aqo; diff --git a/storage.c b/storage.c index f436348f..642b22c3 100644 --- a/storage.c +++ b/storage.c @@ -90,174 +90,6 @@ open_aqo_relation(char *heaprelnspname, char *heaprelname, } -/* - * Returns whether the query with given hash is in aqo_queries. - * If yes, returns the content of the first line with given hash. - * - * Use dirty snapshot to see all (include in-progess) data. We want to prevent - * wait in the XactLockTableWait routine. - * If query is found in the knowledge base, fill the query context struct. - */ -bool -find_query(uint64 qhash, QueryContextData *ctx) -{ - Relation hrel; - Relation irel; - HeapTuple tuple; - TupleTableSlot *slot; - bool shouldFree = true; - IndexScanDesc scan; - ScanKeyData key; - SnapshotData snap; - bool find_ok = false; - Datum values[5]; - bool nulls[5] = {false, false, false, false, false}; - - if (!open_aqo_relation(NULL, "aqo_queries", "aqo_queries_query_hash_idx", - AccessShareLock, &hrel, &irel)) - return false; - - InitDirtySnapshot(snap); - scan = index_beginscan(hrel, irel, &snap, 1, 0); - ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(qhash)); - - index_rescan(scan, &key, 1, NULL, 0); - slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); - find_ok = index_getnext_slot(scan, ForwardScanDirection, slot); - - if (find_ok) - { - tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); - Assert(shouldFree != true); - heap_deform_tuple(tuple, hrel->rd_att, values, nulls); - - /* Fill query context data */ - ctx->learn_aqo = DatumGetBool(values[1]); - ctx->use_aqo = DatumGetBool(values[2]); - ctx->fspace_hash = DatumGetInt64(values[3]); - ctx->auto_tuning = DatumGetBool(values[4]); - ctx->collect_stat = query_context.auto_tuning; - } - - ExecDropSingleTupleTableSlot(slot); - index_endscan(scan); - index_close(irel, AccessShareLock); - table_close(hrel, AccessShareLock); - return find_ok; -} - -/* - * Update query status in intelligent mode. - * - * Do it gently: to prevent possible deadlocks, revert this update if any - * concurrent transaction is doing it. - * - * Such logic is possible, because this update is performed by AQO itself. It is - * not break any learning logic besides possible additional learning iterations. - * Pass NIL as a value of the relations field to avoid updating it. - */ -bool -update_query(uint64 qhash, uint64 fhash, - bool learn_aqo, bool use_aqo, bool auto_tuning) -{ - Relation hrel; - Relation irel; - TupleTableSlot *slot; - HeapTuple tuple, - nw_tuple; - Datum values[5]; - bool isnull[5] = { false, false, false, false, false }; - bool replace[5] = { false, true, true, true, true }; - bool shouldFree; - bool result = true; - bool update_indexes; - IndexScanDesc scan; - ScanKeyData key; - SnapshotData snap; - - /* Couldn't allow to write if xact must be read-only. */ - if (XactReadOnly) - return false; - - if (!open_aqo_relation(NULL, "aqo_queries", "aqo_queries_query_hash_idx", - RowExclusiveLock, &hrel, &irel)) - return false; - - /* - * Start an index scan. Use dirty snapshot to check concurrent updates that - * can be made before, but still not visible. - */ - InitDirtySnapshot(snap); - scan = index_beginscan(hrel, irel, &snap, 1, 0); - ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(qhash)); - - index_rescan(scan, &key, 1, NULL, 0); - slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); - - values[0] = Int64GetDatum(qhash); - values[1] = BoolGetDatum(learn_aqo); - values[2] = BoolGetDatum(use_aqo); - values[3] = Int64GetDatum(fhash); - values[4] = BoolGetDatum(auto_tuning); - - if (!index_getnext_slot(scan, ForwardScanDirection, slot)) - { - /* New tuple for the ML knowledge base */ - tuple = heap_form_tuple(RelationGetDescr(hrel), values, isnull); - simple_heap_insert(hrel, tuple); - my_index_insert(irel, values, isnull, &(tuple->t_self), - hrel, UNIQUE_CHECK_YES); - } - else if (!TransactionIdIsValid(snap.xmin) && - !TransactionIdIsValid(snap.xmax)) - { - /* - * Update existed data. No one concurrent transaction doesn't update this - * right now. - */ - tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); - Assert(shouldFree != true); - nw_tuple = heap_modify_tuple(tuple, hrel->rd_att, values, isnull, replace); - - if (my_simple_heap_update(hrel, &(nw_tuple->t_self), nw_tuple, - &update_indexes)) - { - if (update_indexes) - my_index_insert(irel, values, isnull, - &(nw_tuple->t_self), - hrel, UNIQUE_CHECK_YES); - result = true; - } - else - { - /* - * Ooops, somebody concurrently updated the tuple. It is possible - * only in the case of changes made by third-party code. - */ - elog(ERROR, "AQO feature space data for signature ("UINT64_FORMAT \ - ", "UINT64_FORMAT") concurrently" - " updated by a stranger backend.", - qhash, fhash); - result = false; - } - } - else - { - /* - * Concurrent update was made. To prevent deadlocks refuse to update. - */ - result = false; - } - - ExecDropSingleTupleTableSlot(slot); - index_endscan(scan); - index_close(irel, RowExclusiveLock); - table_close(hrel, RowExclusiveLock); - - CommandCounterIncrement(); - return result; -} - /* static ArrayType * form_strings_vector(List *reloids) @@ -506,13 +338,19 @@ add_deactivated_query(uint64 query_hash) #define PGAQO_STAT_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_statistics.stat" #define PGAQO_TEXT_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_query_texts.stat" #define PGAQO_DATA_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_data.stat" +#define PGAQO_QUERIES_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_queries.stat" PG_FUNCTION_INFO_V1(aqo_query_stat); PG_FUNCTION_INFO_V1(aqo_query_texts); PG_FUNCTION_INFO_V1(aqo_data); +PG_FUNCTION_INFO_V1(aqo_queries); PG_FUNCTION_INFO_V1(aqo_stat_remove); PG_FUNCTION_INFO_V1(aqo_qtexts_remove); PG_FUNCTION_INFO_V1(aqo_data_remove); +PG_FUNCTION_INFO_V1(aqo_queries_remove); +PG_FUNCTION_INFO_V1(aqo_enable_query); +PG_FUNCTION_INFO_V1(aqo_disable_query); +PG_FUNCTION_INFO_V1(aqo_queries_update); PG_FUNCTION_INFO_V1(aqo_reset); typedef enum { @@ -529,13 +367,18 @@ typedef enum { AD_OIDS, AD_TOTAL_NCOLS } aqo_data_cols; +typedef enum { + AQ_QUERYID = 0, AQ_FSPACE_HASH, AQ_LEARN_AQO, AQ_USE_AQO, AQ_AUTO_TUNING, + AQ_TOTAL_NCOLS +} aqo_queries_cols; + typedef void* (*form_record_t) (void *ctx, size_t *size); typedef void (*deform_record_t) (void *data, size_t size); bool aqo_use_file_storage; HTAB *stat_htab = NULL; -HTAB *queries_htab = NULL; /* TODO */ +HTAB *queries_htab = NULL; HTAB *qtexts_htab = NULL; dsa_area *qtext_dsa = NULL; @@ -883,6 +726,38 @@ aqo_data_flush(void) LWLockRelease(&aqo_state->data_lock); } +static void * +_form_queries_record_cb(void *ctx, size_t *size) +{ + HASH_SEQ_STATUS *hash_seq = (HASH_SEQ_STATUS *) ctx; + QueriesEntry *entry; + + *size = sizeof(QueriesEntry); + entry = hash_seq_search(hash_seq); + if (entry == NULL) + return NULL; + + return memcpy(palloc(*size), entry, *size); +} + +void +aqo_queries_flush(void) +{ + HASH_SEQ_STATUS hash_seq; + int ret; + long entries; + + LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); + entries = hash_get_num_entries(queries_htab); + hash_seq_init(&hash_seq, queries_htab); + ret = data_store(PGAQO_QUERIES_FILE, _form_queries_record_cb, entries, + (void *) &hash_seq); + if (ret != 0) + hash_seq_term(&hash_seq); + + LWLockRelease(&aqo_state->queries_lock); +} + static int data_store(const char *filename, form_record_t callback, long nrecs, void *ctx) @@ -1061,6 +936,45 @@ aqo_data_load(void) LWLockRelease(&aqo_state->data_lock); } +static void +_deform_queries_record_cb(void *data, size_t size) +{ + bool found; + QueriesEntry *entry; + uint64 queryid; + + Assert(LWLockHeldByMeInMode(&aqo_state->queries_lock, LW_EXCLUSIVE)); + Assert(size == sizeof(QueriesEntry)); + + queryid = ((QueriesEntry *) data)->queryid; + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_ENTER, &found); + Assert(!found); + memcpy(entry, data, sizeof(QueriesEntry)); +} + +void +aqo_queries_load(void) +{ + long entries; + bool found; + uint64 queryid = 0; + + Assert(!LWLockHeldByMe(&aqo_state->queries_lock)); + + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + entries = hash_get_num_entries(queries_htab); + Assert(entries == 0); + data_load(PGAQO_QUERIES_FILE, _deform_queries_record_cb, NULL); + (void) hash_search(queries_htab, &queryid, HASH_FIND, &found); + + LWLockRelease(&aqo_state->queries_lock); + if (!found) + { + if (!aqo_queries_store(0, 0, 0, 0, 0)) + elog(PANIC, "[AQO] aqo_queries initialization was unsuccessful"); + } +} + static void data_load(const char *filename, deform_record_t callback, void *ctx) { @@ -1157,7 +1071,7 @@ dsa_init() dsa_pin(data_dsa); aqo_state->data_dsa_handler = dsa_get_handle(data_dsa); - /* Load and initialize quuery texts hash table */ + /* Load and initialize query texts hash table */ aqo_qtexts_load(); aqo_data_load(); } @@ -1758,6 +1672,219 @@ aqo_data_reset(void) return num_remove; } +Datum +aqo_queries(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupDesc; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Tuplestorestate *tupstore; + Datum values[AQ_TOTAL_NCOLS + 1]; + bool nulls[AQ_TOTAL_NCOLS + 1]; + HASH_SEQ_STATUS hash_seq; + QueriesEntry *entry; + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* Switch into long-lived context to construct returned data structures */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + Assert(tupDesc->natts == AQ_TOTAL_NCOLS); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupDesc; + + MemoryContextSwitchTo(oldcontext); + + memset(nulls, 0, AQ_TOTAL_NCOLS + 1); + LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); + hash_seq_init(&hash_seq, queries_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + values[AQ_QUERYID] = Int64GetDatum(entry->queryid); + values[AQ_FSPACE_HASH] = Int64GetDatum(entry->fspace_hash); + values[AQ_LEARN_AQO] = BoolGetDatum(entry->learn_aqo); + values[AQ_USE_AQO] = BoolGetDatum(entry->use_aqo); + values[AQ_AUTO_TUNING] = BoolGetDatum(entry->auto_tuning); + tuplestore_putvalues(tupstore, tupDesc, values, nulls); + } + + LWLockRelease(&aqo_state->queries_lock); + tuplestore_donestoring(tupstore); + return (Datum) 0; +} + +Datum +aqo_queries_remove(PG_FUNCTION_ARGS) +{ + uint64 queryid = (uint64) PG_GETARG_INT64(0); + QueriesEntry *entry; + bool removed; + + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_REMOVE, NULL); + removed = (entry) ? true : false; + LWLockRelease(&aqo_state->queries_lock); + PG_RETURN_BOOL(removed); +} + +QueriesEntry * +aqo_queries_store(uint64 queryid, uint64 fspace_hash, bool learn_aqo, + bool use_aqo, bool auto_tuning) +{ + QueriesEntry *entry; + bool found; + + Assert(queries_htab); + + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_ENTER, &found); + + /* Initialize entry on first usage */ + if (!found) + { + uint64 qid = entry->queryid; + memset(entry, 0, sizeof(QueriesEntry)); + entry->queryid = qid; + entry->fspace_hash = fspace_hash; + } + entry->learn_aqo = learn_aqo; + entry->use_aqo = use_aqo; + entry->auto_tuning = auto_tuning; + + entry = memcpy(palloc(sizeof(QueriesEntry)), entry, sizeof(QueriesEntry)); + LWLockRelease(&aqo_state->queries_lock); + return entry; +} + +static long +aqo_queries_reset(void) +{ + HASH_SEQ_STATUS hash_seq; + QueriesEntry *entry; + long num_remove = 0; + long num_entries; + + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + num_entries = hash_get_num_entries(queries_htab); + hash_seq_init(&hash_seq, queries_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + if (hash_search(queries_htab, &entry->queryid, HASH_REMOVE, NULL) == NULL) + elog(ERROR, "[AQO] hash table corrupted"); + num_remove++; + } + LWLockRelease(&aqo_state->queries_lock); + Assert(num_remove == num_entries); + + aqo_queries_flush(); + + return num_remove; +} + +Datum +aqo_enable_query(PG_FUNCTION_ARGS) +{ + uint64 queryid = (uint64) PG_GETARG_INT64(0); + QueriesEntry *entry; + bool found; + + Assert(queries_htab); + + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_FIND, &found); + + if(found) + { + entry->learn_aqo = 1; + entry->use_aqo = 1; + } + else + { + elog(ERROR, "[AQO] Entry with queryid %ld not contained in table", queryid); + } + LWLockRelease(&aqo_state->queries_lock); + PG_RETURN_VOID(); +} + +Datum +aqo_disable_query(PG_FUNCTION_ARGS) +{ + uint64 queryid = (uint64) PG_GETARG_INT64(0); + QueriesEntry *entry; + bool found; + + Assert(queries_htab); + + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_FIND, &found); + + if(found) + { + entry->learn_aqo = 0; + entry->use_aqo = 0; + entry->auto_tuning = 0; + } + else + { + elog(ERROR, "[AQO] Entry with %ld not contained in table", queryid); + } + LWLockRelease(&aqo_state->queries_lock); + PG_RETURN_VOID(); +} + +bool +file_find_query(uint64 queryid) +{ + bool found; + + Assert(queries_htab); + + LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); + hash_search(queries_htab, &queryid, HASH_FIND, &found); + LWLockRelease(&aqo_state->queries_lock); + return found; +} + +Datum +aqo_queries_update(PG_FUNCTION_ARGS) +{ + HASH_SEQ_STATUS hash_seq; + QueriesEntry *entry; + int learn_aqo = (int) PG_GETARG_INT32(0); + int use_aqo = (int) PG_GETARG_INT32(1); + int auto_tuning = (int) PG_GETARG_INT32(2); + + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + hash_seq_init(&hash_seq, queries_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + if (learn_aqo != 2) + entry->learn_aqo = learn_aqo; + if (use_aqo != 2) + entry->use_aqo = use_aqo; + if (auto_tuning != 2) + entry->auto_tuning = auto_tuning; + } + LWLockRelease(&aqo_state->queries_lock); + PG_RETURN_VOID(); +} + Datum aqo_reset(PG_FUNCTION_ARGS) { @@ -1766,5 +1893,6 @@ aqo_reset(PG_FUNCTION_ARGS) counter += aqo_stat_reset(); counter += aqo_qtexts_reset(); counter += aqo_data_reset(); + counter += aqo_queries_reset(); PG_RETURN_INT64(counter); } diff --git a/storage.h b/storage.h index ba2d671d..cf90caf6 100644 --- a/storage.h +++ b/storage.h @@ -71,6 +71,15 @@ typedef struct DataEntry dsa_pointer data_dp; } DataEntry; +typedef struct QueriesEntry +{ + uint64 queryid; + uint64 fspace_hash; + bool learn_aqo; + bool use_aqo; + bool auto_tuning; +} QueriesEntry; + extern bool aqo_use_file_storage; extern HTAB *stat_htab; @@ -92,6 +101,11 @@ extern bool load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool wideSearch); extern void aqo_data_flush(void); extern void aqo_data_load(void); + +extern QueriesEntry *aqo_queries_store(uint64 queryid, uint64 fspace_hash, bool learn_aqo, + bool use_aqo, bool auto_tuning); +extern void aqo_queries_flush(void); +extern void aqo_queries_load(void); /* Utility routines */ extern ArrayType *form_vector(double *vector, int nrows); From 05f19b2ff50511c6b13ba62999c9c19f835b1f0d Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Mon, 27 Jun 2022 10:18:07 +0500 Subject: [PATCH 099/203] Replace aqo_queries table with a file storage. --- README.md | 4 +- aqo--1.4--1.5.sql | 66 +++----- aqo.c | 26 ++- aqo.h | 14 +- aqo_shared.c | 1 + aqo_shared.h | 1 + auto_tuning.c | 25 ++- expected/aqo_controlled.out | 30 ++-- expected/aqo_disabled.out | 29 ++-- expected/aqo_forced.out | 8 +- expected/aqo_intelligent.out | 28 +++- expected/aqo_learn.out | 28 +++- expected/clean_aqo_data.out | 108 +++++------- expected/relocatable.out | 20 ++- expected/temp_tables.out | 6 + expected/top_queries.out | 10 +- learn_cache.c | 1 + postprocessing.c | 7 +- preprocessing.c | 11 +- sql/aqo_controlled.sql | 12 +- sql/aqo_disabled.sql | 17 +- sql/aqo_forced.sql | 2 +- sql/aqo_intelligent.sql | 12 +- sql/aqo_learn.sql | 12 +- sql/clean_aqo_data.sql | 96 +++++------ sql/relocatable.sql | 3 +- sql/temp_tables.sql | 1 + sql/top_queries.sql | 4 +- storage.c | 313 +++++++++-------------------------- storage.h | 19 ++- t/001_pgbench.pl | 2 +- 31 files changed, 390 insertions(+), 526 deletions(-) diff --git a/README.md b/README.md index aa7946cf..e28ac89c 100644 --- a/README.md +++ b/README.md @@ -212,7 +212,7 @@ execution of such query type. Disabling of AQO usage is reasonable for that cases in which query execution time increases after applying AQO. It happens sometimes because of cost models incompleteness. -`Fspace_hash` setting is for extra advanced AQO tuning. It may be changed manually +`fs` setting is for extra advanced AQO tuning. It may be changed manually to optimize a number of queries using the same model. It may decrease the amount of memory for models and even the query execution time, but also it may cause the bad AQO's behavior, so please use it only if you know exactly @@ -230,7 +230,7 @@ ignored. If `aqo.mode` is `'learn'`, then the normalized query hash appends to aqo_queries with the default settings `learn_aqo=true`, `use_aqo=true`, `auto_tuning=false`, and -`fspace_hash = query_hash` which means that AQO uses separate machine learning +`fs = queryid` which means that AQO uses separate machine learning model for this query type optimization. After that the query is processed as if it already was in aqo_queries. diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index c6dc056f..903423e3 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -21,11 +21,11 @@ DROP TABLE public.aqo_query_texts CASCADE; DROP TABLE public.aqo_query_stat CASCADE; CREATE FUNCTION aqo_queries ( - OUT queryid bigint, - OUT fspace_hash bigint, - OUT learn_aqo boolean, - OUT use_aqo boolean, - OUT auto_tuning boolean + OUT queryid bigint, + OUT fs bigint, + OUT learn_aqo boolean, + OUT use_aqo boolean, + OUT auto_tuning boolean ) RETURNS SETOF record AS 'MODULE_PATHNAME', 'aqo_queries' @@ -91,13 +91,6 @@ CREATE FUNCTION aqo_stat_remove(fs bigint) RETURNS bool AS 'MODULE_PATHNAME' LANGUAGE C STRICT PARALLEL SAFE; --- INSERT INTO aqo_queries VALUES (0, false, false, 0, false); --- a virtual query for COMMON feature space - ---CREATE TRIGGER aqo_queries_invalidate AFTER UPDATE OR DELETE OR TRUNCATE --- ON aqo_queries FOR EACH STATEMENT --- EXECUTE PROCEDURE invalidate_deactivated_queries_cache(); - -- -- Show execution time of queries, for which AQO has statistics. -- controlled - show stat on executions where AQO was used for cardinality @@ -118,7 +111,7 @@ IF (controlled) THEN FROM ( SELECT aq.queryid AS queryid, - aq.fspace_hash AS fs_hash, + aq.fs AS fs_hash, execution_time_with_aqo[array_length(execution_time_with_aqo, 1)] AS exectime, executions_with_aqo AS execs FROM aqo_queries aq JOIN aqo_query_stat aqs @@ -137,7 +130,7 @@ ELSE FROM ( SELECT aq.queryid AS queryid, - aq.fspace_hash AS fs_hash, + aq.fs AS fs_hash, (SELECT AVG(t) FROM unnest(execution_time_without_aqo) t) AS exectime, executions_without_aqo AS execs FROM aqo_queries aq JOIN aqo_query_stat aqs @@ -165,7 +158,7 @@ BEGIN raise EXCEPTION '[AQO] Cannot remove basic class %.', queryid_rm; END IF; - SELECT fspace_hash FROM aqo_queries WHERE (queryid = queryid_rm) INTO lfs; + SELECT fs FROM aqo_queries WHERE (queryid = queryid_rm) INTO lfs; IF (lfs IS NULL) THEN raise WARNING '[AQO] Nothing to remove for the class %.', queryid_rm; @@ -258,7 +251,7 @@ IF (controlled) THEN FROM ( SELECT aq.queryid AS query_id, - aq.fspace_hash AS fs_hash, + aq.fs AS fs_hash, cardinality_error_with_aqo[array_length(cardinality_error_with_aqo, 1)] AS cerror, executions_with_aqo AS execs FROM aqo_queries aq JOIN aqo_query_stat aqs @@ -274,7 +267,7 @@ ELSE FROM ( SELECT aq.queryid AS query_id, - aq.fspace_hash AS fs_hash, + aq.fs AS fs_hash, (SELECT AVG(t) FROM unnest(cardinality_error_without_aqo) t) AS cerror, executions_without_aqo AS execs FROM aqo_queries aq JOIN aqo_query_stat aqs @@ -300,15 +293,15 @@ CREATE OR REPLACE FUNCTION aqo_reset_query(queryid_res bigint) RETURNS integer AS $$ DECLARE num integer; - fs bigint; + lfs bigint; BEGIN IF (queryid_res = 0) THEN raise WARNING '[AQO] Reset common feature space.' END IF; - SELECT fspace_hash FROM aqo_queries WHERE queryid = queryid_res INTO fs; - SELECT count(*) FROM aqo_data WHERE fspace_hash = fs INTO num; - DELETE FROM aqo_data WHERE fspace_hash = fs; + SELECT fs FROM aqo_queries WHERE queryid = queryid_res INTO lfs; + SELECT count(*) FROM aqo_data WHERE fs = lfs INTO num; + DELETE FROM aqo_data WHERE fs = lfs; RETURN num; END; $$ LANGUAGE plpgsql; @@ -329,7 +322,7 @@ RETURNS TABLE ( "err_aqo" TEXT, "iters_aqo" BIGINT ) AS $$ -SELECT learn_aqo,use_aqo,auto_tuning,fspace_hash, +SELECT learn_aqo,use_aqo,auto_tuning,fs, to_char(execution_time_without_aqo[n4],'9.99EEEE'), to_char(cardinality_error_without_aqo[n2],'9.99EEEE'), executions_without_aqo, @@ -350,35 +343,18 @@ WHERE (aqs.queryid = aq.queryid) AND aqs.queryid = $1; $$ LANGUAGE SQL; -/* CREATE FUNCTION aqo_enable_query(hash bigint) -RETURNS VOID AS $$ -UPDATE aqo_queries SET - learn_aqo = 'true', - use_aqo = 'true' - WHERE queryid = $1; -$$ LANGUAGE SQL; - -CREATE FUNCTION aqo_disable_query(hash bigint) -RETURNS VOID AS $$ -UPDATE aqo_queries SET - learn_aqo = 'false', - use_aqo = 'false', - auto_tuning = 'false' - WHERE queryid = $1; -$$ LANGUAGE SQL; -*/ - -CREATE FUNCTION aqo_enable_query(hash bigint) +CREATE FUNCTION aqo_enable_query(queryid bigint) RETURNS void AS 'MODULE_PATHNAME', 'aqo_enable_query' LANGUAGE C STRICT VOLATILE; -CREATE FUNCTION aqo_disable_query(hash bigint) +CREATE FUNCTION aqo_disable_query(queryid bigint) RETURNS void AS 'MODULE_PATHNAME', 'aqo_enable_query' LANGUAGE C STRICT VOLATILE; -CREATE FUNCTION aqo_queries_update(learn_aqo int, use_aqo int, auto_tuning int) -RETURNS void +CREATE FUNCTION aqo_queries_update(queryid bigint, fs bigint, learn_aqo bool, + use_aqo bool, auto_tuning bool) +RETURNS bool AS 'MODULE_PATHNAME', 'aqo_queries_update' -LANGUAGE C STRICT VOLATILE; \ No newline at end of file +LANGUAGE C VOLATILE; \ No newline at end of file diff --git a/aqo.c b/aqo.c index 55aaac30..d078b5ca 100644 --- a/aqo.c +++ b/aqo.c @@ -276,20 +276,6 @@ _PG_init(void) RequestAddinShmemSpace(aqo_memsize()); } -PG_FUNCTION_INFO_V1(invalidate_deactivated_queries_cache); - -/* - * Clears the cache of deactivated queries if the user changed aqo_queries - * manually. - */ -Datum -invalidate_deactivated_queries_cache(PG_FUNCTION_ARGS) -{ - fini_deactivated_queries_storage(); - init_deactivated_queries_storage(); - PG_RETURN_POINTER(NULL); -} - /* * Return AQO schema's Oid or InvalidOid if that's not possible. */ @@ -367,3 +353,15 @@ IsQueryDisabled(void) return false; } + +PG_FUNCTION_INFO_V1(invalidate_deactivated_queries_cache); + +/* + * Clears the cache of deactivated queries if the user changed aqo_queries + * manually. + */ +Datum +invalidate_deactivated_queries_cache(PG_FUNCTION_ARGS) +{ + PG_RETURN_POINTER(NULL); +} diff --git a/aqo.h b/aqo.h index ece63736..db40b82e 100644 --- a/aqo.h +++ b/aqo.h @@ -144,7 +144,7 @@ #include "utils/snapmgr.h" #include "machine_learning.h" -#include "storage.h" +//#include "storage.h" /* Check PostgreSQL version (9.6.0 contains important changes in planner) */ #if PG_VERSION_NUM < 90600 @@ -200,6 +200,8 @@ typedef struct QueryContextData double planning_time; } QueryContextData; +struct StatEntry; + extern double predicted_ppi_rows; extern double fss_ppi_hash; @@ -252,18 +254,10 @@ int get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash); /* Storage interaction */ -extern bool file_find_query(uint64 queryid); extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool isSafe); extern bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids, bool isTimedOut); -extern bool my_index_insert(Relation indexRelation, Datum *values, bool *isnull, - ItemPointer heap_t_ctid, Relation heapRelation, - IndexUniqueCheck checkUnique); -void init_deactivated_queries_storage(void); -void fini_deactivated_queries_storage(void); -extern bool query_is_deactivated(uint64 query_hash); -extern void add_deactivated_query(uint64 query_hash); /* Query preprocessing hooks */ extern void print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, @@ -284,7 +278,7 @@ void aqo_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, void aqo_ExecutorEnd(QueryDesc *queryDesc); /* Automatic query tuning */ -extern void automatical_query_tuning(uint64 query_hash, StatEntry *stat); +extern void automatical_query_tuning(uint64 query_hash, struct StatEntry *stat); /* Utilities */ extern int int64_compare(const void *a, const void *b); diff --git a/aqo_shared.c b/aqo_shared.c index 819b585b..dd9686c9 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -200,6 +200,7 @@ aqo_init_shmem(void) aqo_state->data_dsa_handler = DSM_HANDLE_INVALID; aqo_state->data_trancheid = LWLockNewTrancheId(); aqo_state->data_changed = false; + aqo_state->queries_changed = false; LWLockInitialize(&aqo_state->stat_lock, LWLockNewTrancheId()); LWLockInitialize(&aqo_state->qtexts_lock, LWLockNewTrancheId()); diff --git a/aqo_shared.h b/aqo_shared.h index 242322ab..b1b79387 100644 --- a/aqo_shared.h +++ b/aqo_shared.h @@ -42,6 +42,7 @@ typedef struct AQOSharedState bool data_changed; LWLock queries_lock; /* lock for access to queries storage */ + bool queries_changed; } AQOSharedState; diff --git a/auto_tuning.c b/auto_tuning.c index 2a713bfc..7a15e516 100644 --- a/auto_tuning.c +++ b/auto_tuning.c @@ -145,13 +145,13 @@ is_in_infinite_loop_cq(double *elems, int nelems) * this query to false. */ void -automatical_query_tuning(uint64 qhash, StatEntry *stat) +automatical_query_tuning(uint64 queryid, StatEntry *stat) { - double unstability = auto_tuning_exploration; - double t_aqo, - t_not_aqo; - double p_use = -1; - int64 num_iterations; + double unstability = auto_tuning_exploration; + double t_aqo, + t_not_aqo; + double p_use = -1; + int64 num_iterations; num_iterations = stat->execs_with_aqo + stat->execs_without_aqo; query_context.learn_aqo = true; @@ -195,16 +195,15 @@ automatical_query_tuning(uint64 qhash, StatEntry *stat) * If our decision is using AQO for this query class, then learn on new * queries of this type. Otherwise, turn off. */ - query_context.use_aqo = (random() / ((double) MAX_RANDOM_VALUE + 1)) < p_use; + query_context.use_aqo = + (random() / ((double) MAX_RANDOM_VALUE + 1)) < p_use; query_context.learn_aqo = query_context.use_aqo; } if (num_iterations <= auto_tuning_max_iterations || p_use > 0.5) - aqo_queries_store(qhash, - query_context.fspace_hash, - query_context.learn_aqo, - query_context.use_aqo, - true); + aqo_queries_store(queryid, query_context.fspace_hash, + query_context.learn_aqo, query_context.use_aqo, true); else - aqo_queries_store(qhash, query_context.fspace_hash, false, false, false); + aqo_queries_store(queryid, + query_context.fspace_hash, false, false, false); } diff --git a/expected/aqo_controlled.out b/expected/aqo_controlled.out index 956a5441..cf88bf42 100644 --- a/expected/aqo_controlled.out +++ b/expected/aqo_controlled.out @@ -107,10 +107,13 @@ SELECT count(*) FROM tmp1; DROP TABLE tmp1; SET aqo.mode = 'controlled'; -SELECT aqo_queries_update(1, 0, 0); - aqo_queries_update --------------------- - +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, true, false, false) +; -- learn = true, use = false, tuning = false + count +------- + 12 (1 row) EXPLAIN (COSTS FALSE) @@ -194,10 +197,13 @@ WHERE t1.a = t2.b AND t2.a = t3.b; -> Seq Scan on aqo_test1 t3 (9 rows) -SELECT aqo_queries_update(2, 1, 2); - aqo_queries_update --------------------- - +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, NULL, true, NULL) +; -- set use = true + count +------- + 12 (1 row) EXPLAIN (COSTS FALSE) @@ -306,10 +312,10 @@ DROP TABLE aqo_test1; DROP INDEX aqo_test2_idx_a; DROP TABLE aqo_test2; -- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT aqo_reset(); - aqo_reset ------------ - 61 +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 (1 row) DROP EXTENSION aqo; diff --git a/expected/aqo_disabled.out b/expected/aqo_disabled.out index 4b8a43fa..606d258e 100644 --- a/expected/aqo_disabled.out +++ b/expected/aqo_disabled.out @@ -64,7 +64,7 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) -SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero count ------- 0 @@ -116,7 +116,7 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) -SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero count ------- 0 @@ -142,17 +142,20 @@ SELECT count(*) FROM tmp1; (1 row) DROP TABLE tmp1; -SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero count ------- 0 (1 row) SET aqo.mode = 'controlled'; -SELECT aqo_queries_update(1, 1, 0); - aqo_queries_update --------------------- - +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, true, true, false) +; -- Enable all disabled query classes + count +------- + 5 (1 row) EXPLAIN SELECT * FROM aqo_test0 @@ -181,7 +184,7 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) -SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero count ------- 0 @@ -214,17 +217,17 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) -SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero count ------- 0 (1 row) -- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT aqo_reset(); - aqo_reset ------------ - 23 +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 (1 row) DROP EXTENSION aqo; diff --git a/expected/aqo_forced.out b/expected/aqo_forced.out index 5e4d53e8..091ead32 100644 --- a/expected/aqo_forced.out +++ b/expected/aqo_forced.out @@ -83,10 +83,10 @@ DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; -- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT aqo_reset(); - aqo_reset ------------ - 3 +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 (1 row) DROP EXTENSION aqo; diff --git a/expected/aqo_intelligent.out b/expected/aqo_intelligent.out index 1870ca01..7ec943f5 100644 --- a/expected/aqo_intelligent.out +++ b/expected/aqo_intelligent.out @@ -289,7 +289,15 @@ SELECT count(*) FROM tmp1; DROP TABLE tmp1; SET aqo.mode = 'controlled'; -UPDATE aqo_queries SET learn_aqo = false, use_aqo = false, auto_tuning = false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, false, false, false) +; -- Disable all query classes + count +------- + 25 +(1 row) + EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; QUERY PLAN @@ -394,7 +402,15 @@ WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; -> Seq Scan on aqo_test1 t4 (cost=0.00..1.20 rows=20 width=8) (13 rows) -UPDATE aqo_queries SET learn_aqo = false, use_aqo = true, auto_tuning = false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, false, true, false) +; -- set learn = false, use = true, tuning = false + count +------- + 25 +(1 row) + EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; QUERY PLAN @@ -504,10 +520,10 @@ DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; -- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT aqo_reset(); - aqo_reset ------------ - 127 +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 (1 row) DROP EXTENSION aqo; diff --git a/expected/aqo_learn.out b/expected/aqo_learn.out index 07ee6a1e..1abb9b04 100644 --- a/expected/aqo_learn.out +++ b/expected/aqo_learn.out @@ -277,7 +277,15 @@ GROUP BY (query_text) ORDER BY (md5(query_text)) DROP TABLE tmp1; SET aqo.mode = 'controlled'; -UPDATE aqo_queries SET learn_aqo = false, use_aqo = false, auto_tuning = false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, false, false, false) +; -- Disable all AQO query classes + count +------- + 8 +(1 row) + EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; QUERY PLAN @@ -376,7 +384,15 @@ WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; -> Seq Scan on aqo_test1 t4 (cost=0.00..1.20 rows=20 width=8) (13 rows) -UPDATE aqo_queries SET learn_aqo = false, use_aqo = true, auto_tuning = false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, false, true, false) +; -- learn = false, use = true, tuning = false + count +------- + 8 +(1 row) + EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; QUERY PLAN @@ -707,10 +723,10 @@ DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; -- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT aqo_reset(); - aqo_reset ------------ - 44 +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 (1 row) DROP EXTENSION aqo; diff --git a/expected/clean_aqo_data.out b/expected/clean_aqo_data.out index aa2eaa7e..f731b3dc 100644 --- a/expected/clean_aqo_data.out +++ b/expected/clean_aqo_data.out @@ -19,9 +19,9 @@ SELECT aqo_cleanup(); /* * lines with a_oid in aqo_data, - * lines with fspace_hash corresponding to a_oid in aqo_queries, - * lines with queryid corresponding to a_oid's fspace_hash in aqo_query_texts, - * lines with queryid corresponding to a_oid's fspace_hash in aqo_query_stat + * lines with fs corresponding to a_oid in aqo_queries, + * lines with queryid corresponding to a_oid's fs in aqo_query_texts, + * lines with queryid corresponding to a_oid's fs in aqo_query_stat * should remain */ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); @@ -31,7 +31,7 @@ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); count ------- 1 @@ -39,7 +39,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- 1 @@ -47,7 +47,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- 1 @@ -62,9 +62,9 @@ SELECT aqo_cleanup(); /* * lines with a_oid in aqo_data, - * lines with a_oid's fspace_hash EQUAL TO queryid in aqo_queries, - * lines with queryid corresponding to a_oid's fspace_hash in aqo_query_texts, - * lines with queryid corresponding to a_oid's fspace_hash in aqo_query_stat, + * lines with a_oid's fs EQUAL TO queryid in aqo_queries, + * lines with queryid corresponding to a_oid's fs in aqo_query_texts, + * lines with queryid corresponding to a_oid's fs in aqo_query_stat, * should be deleted */ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); @@ -74,8 +74,8 @@ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid; + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; count ------- 0 @@ -83,8 +83,8 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); count ------- 0 @@ -92,35 +92,13 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); count ------- 0 (1 row) -CREATE TABLE a(); -SELECT * FROM a; --- -(0 rows) - -SELECT 'a'::regclass::oid AS a_oid \gset --- add manually line with different fspace_hash and queryid to aqo_queries -INSERT INTO aqo_queries VALUES (:a_oid + 1, 't', 't', :a_oid, 'f'); -DROP TABLE a; -SELECT aqo_cleanup(); - aqo_cleanup -------------- - (1,1) -(1 row) - --- this line should remain -SELECT count(*) FROM aqo_queries WHERE (fspace_hash = :a_oid AND queryid = :a_oid + 1); - count -------- - 1 -(1 row) - CREATE TABLE a(); CREATE TABLE b(); SELECT * FROM a; @@ -145,7 +123,7 @@ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); count ------- 2 @@ -153,7 +131,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- 2 @@ -161,7 +139,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- 2 @@ -174,7 +152,7 @@ SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)); count ------- 2 @@ -182,7 +160,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); count ------- 2 @@ -190,7 +168,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); count ------- 2 @@ -204,10 +182,10 @@ SELECT aqo_cleanup(); (1 row) /* - * lines corresponding to a_oid and both a_oid's fspace_hash deleted in aqo_data, - * lines with fspace_hash corresponding to a_oid deleted in aqo_queries, - * lines with queryid corresponding to a_oid's fspace_hash deleted in aqo_query_texts, - * lines with queryid corresponding to a_oid's fspace_hash deleted in aqo_query_stat, + * lines corresponding to a_oid and both a_oid's fs deleted in aqo_data, + * lines with fs corresponding to a_oid deleted in aqo_queries, + * lines with queryid corresponding to a_oid's fs deleted in aqo_query_texts, + * lines with queryid corresponding to a_oid's fs deleted in aqo_query_stat, */ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); count @@ -216,8 +194,8 @@ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid; + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; count ------- 0 @@ -225,8 +203,8 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); count ------- 0 @@ -234,8 +212,8 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); count ------- 0 @@ -249,8 +227,8 @@ SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid; + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; count ------- 1 @@ -258,8 +236,8 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); count ------- 1 @@ -267,8 +245,8 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); count ------- 1 @@ -289,8 +267,8 @@ SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid; + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; count ------- 0 @@ -298,8 +276,8 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); count ------- 0 @@ -307,8 +285,8 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); count ------- 0 diff --git a/expected/relocatable.out b/expected/relocatable.out index 98b53217..f24add25 100644 --- a/expected/relocatable.out +++ b/expected/relocatable.out @@ -12,11 +12,12 @@ SELECT count(*) FROM test; SELECT query_text, learn_aqo, use_aqo, auto_tuning FROM aqo_query_texts aqt JOIN aqo_queries aq ON (aqt.queryid = aq.queryid) +ORDER BY (md5(query_text)) ; -- Check result. TODO: use aqo_status() query_text | learn_aqo | use_aqo | auto_tuning ---------------------------------------+-----------+---------+------------- - COMMON feature space (do not delete!) | f | f | f SELECT count(*) FROM test; | t | t | f + COMMON feature space (do not delete!) | f | f | f (2 rows) -- Create a schema and move AQO into it. @@ -38,12 +39,13 @@ SELECT count(*) FROM test WHERE id < 10; SELECT query_text, learn_aqo, use_aqo, auto_tuning FROM test.aqo_query_texts aqt JOIN test.aqo_queries aq ON (aqt.queryid = aq.queryid) ORDER BY (md5(query_text)) -; -- Check result. TODO: We want to find here both queries executed above - query_text | learn_aqo | use_aqo | auto_tuning ----------------------------------------+-----------+---------+------------- - SELECT count(*) FROM test; | t | t | f - COMMON feature space (do not delete!) | f | f | f -(2 rows) +; -- Find out both queries executed above + query_text | learn_aqo | use_aqo | auto_tuning +------------------------------------------+-----------+---------+------------- + SELECT count(*) FROM test; | t | t | f + COMMON feature space (do not delete!) | f | f | f + SELECT count(*) FROM test WHERE id < 10; | t | t | f +(3 rows) -- Add schema which contains AQO to the end of search_path SELECT set_config('search_path', current_setting('search_path') || ', test', false); @@ -90,8 +92,8 @@ SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries; learn_aqo | use_aqo | auto_tuning -----------+---------+------------- f | f | f - f | f | f - f | f | f + t | t | f + t | t | f (3 rows) SELECT aqo_enable_query(id) FROM ( diff --git a/expected/temp_tables.out b/expected/temp_tables.out index 79de6284..b40790f0 100644 --- a/expected/temp_tables.out +++ b/expected/temp_tables.out @@ -186,5 +186,11 @@ SELECT * FROM check_estimated_rows(' (1 row) DROP TABLE pt CASCADE; +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 +(1 row) + DROP EXTENSION aqo; DROP FUNCTION check_estimated_rows; diff --git a/expected/top_queries.out b/expected/top_queries.out index 13e9cfa2..99e114dc 100644 --- a/expected/top_queries.out +++ b/expected/top_queries.out @@ -67,7 +67,7 @@ SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y SELECT num, to_char(error, '9.99EEEE') FROM aqo_cardinality_error(false) AS te WHERE te.fshash = ( - SELECT fspace_hash FROM aqo_queries + SELECT fs FROM aqo_queries WHERE aqo_queries.queryid = ( SELECT aqo_query_texts.queryid FROM aqo_query_texts WHERE query_text = 'SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1;' @@ -98,10 +98,10 @@ ORDER BY (md5(query_text)); SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; | 1 (4 rows) -SELECT aqo_reset(); - aqo_reset ------------ - 23 +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 (1 row) DROP EXTENSION aqo; diff --git a/learn_cache.c b/learn_cache.c index 306592eb..e0951fbe 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -18,6 +18,7 @@ #include "aqo.h" #include "aqo_shared.h" #include "learn_cache.h" +#include "storage.h" typedef struct diff --git a/postprocessing.c b/postprocessing.c index 0a63cf1f..f9e8db46 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -106,7 +106,7 @@ learn_agg_sample(aqo_obj_stat *ctx, RelSortOut *rels, double learned, double rfactor, Plan *plan, bool notExecuted) { AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); - uint64 fhash = query_context.fspace_hash; + uint64 fs = query_context.fspace_hash; int child_fss; double target; OkNNrdata data; @@ -121,7 +121,8 @@ learn_agg_sample(aqo_obj_stat *ctx, RelSortOut *rels, return; target = log(learned); - child_fss = get_fss_for_object(rels->signatures, ctx->clauselist, NIL, NULL, NULL); + child_fss = get_fss_for_object(rels->signatures, ctx->clauselist, + NIL, NULL,NULL); fss = get_grouped_exprs_hash(child_fss, aqo_node->grouping_exprs); memset(&data, 0, sizeof(OkNNrdata)); @@ -129,7 +130,7 @@ learn_agg_sample(aqo_obj_stat *ctx, RelSortOut *rels, data.matrix[i] = NULL; /* Critical section */ - atomic_fss_learn_step(fhash, fss, &data, NULL, + atomic_fss_learn_step(fs, fss, &data, NULL, target, rfactor, rels->hrels, ctx->isTimedOut); /* End of critical section */ } diff --git a/preprocessing.c b/preprocessing.c index 37913712..62999512 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -11,7 +11,7 @@ * 'use_aqo': whether to use AQO estimations in query optimization * 'learn_aqo': whether to update AQO data based on query execution * statistics - * 'fspace_hash': hash of feature space to use with given query + * 'fs': hash of feature space to use with given query * 'auto_tuning': whether AQO may change use_aqo and learn_aqo values * for the next execution of such type of query using * its self-tuning algorithm @@ -185,6 +185,7 @@ aqo_planner(Query *parse, * recursion, as an example). */ disable_aqo_for_query(); + return call_default_planner(parse, query_string, cursorOptions, @@ -205,7 +206,7 @@ aqo_planner(Query *parse, goto ignore_query_settings; } - query_is_stored = file_find_query(query_context.query_hash); + query_is_stored = aqo_queries_find(query_context.query_hash, &query_context); if (!query_is_stored) { @@ -312,14 +313,15 @@ aqo_planner(Query *parse, */ init_lock_tag(&tag, query_context.query_hash, 0); LockAcquire(&tag, ExclusiveLock, false, false); + /* * Add query into the AQO knowledge base. To process an error with * concurrent addition from another backend we will try to restart * preprocessing routine. */ aqo_queries_store(query_context.query_hash, query_context.fspace_hash, - query_context.learn_aqo, query_context.use_aqo, - query_context.auto_tuning); + query_context.learn_aqo, query_context.use_aqo, + query_context.auto_tuning); /* * Add query text into the ML-knowledge base. Just for further @@ -353,7 +355,6 @@ aqo_planner(Query *parse, void disable_aqo_for_query(void) { - query_context.learn_aqo = false; query_context.use_aqo = false; query_context.auto_tuning = false; diff --git a/sql/aqo_controlled.sql b/sql/aqo_controlled.sql index 959dd82a..0ba88e56 100644 --- a/sql/aqo_controlled.sql +++ b/sql/aqo_controlled.sql @@ -78,7 +78,10 @@ DROP TABLE tmp1; SET aqo.mode = 'controlled'; -SELECT aqo_queries_update(1, 0, 0); +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, true, false, false) +; -- learn = true, use = false, tuning = false EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 @@ -106,7 +109,10 @@ SELECT t1.a AS a, t2.a AS b, t3.a AS c FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 WHERE t1.a = t2.b AND t2.a = t3.b; -SELECT aqo_queries_update(2, 1, 2); +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, NULL, true, NULL) +; -- set use = true EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 @@ -149,6 +155,6 @@ DROP INDEX aqo_test2_idx_a; DROP TABLE aqo_test2; -- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT aqo_reset(); +SELECT 1 FROM aqo_reset(); DROP EXTENSION aqo; diff --git a/sql/aqo_disabled.sql b/sql/aqo_disabled.sql index 3bf7a47b..fd709cf3 100644 --- a/sql/aqo_disabled.sql +++ b/sql/aqo_disabled.sql @@ -36,7 +36,7 @@ WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; -SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero SET aqo.mode = 'disabled'; @@ -58,7 +58,7 @@ EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; -SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero SET aqo.mode = 'intelligent'; CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 @@ -72,10 +72,13 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = SELECT count(*) FROM tmp1; DROP TABLE tmp1; -SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero SET aqo.mode = 'controlled'; -SELECT aqo_queries_update(1, 1, 0); +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, true, true, false) +; -- Enable all disabled query classes EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -84,7 +87,7 @@ EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; -SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero SET aqo.mode = 'disabled'; EXPLAIN SELECT * FROM aqo_test0 @@ -93,10 +96,10 @@ WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; -SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero -- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT aqo_reset(); +SELECT 1 FROM aqo_reset(); DROP EXTENSION aqo; diff --git a/sql/aqo_forced.sql b/sql/aqo_forced.sql index bf64470c..92a26564 100644 --- a/sql/aqo_forced.sql +++ b/sql/aqo_forced.sql @@ -58,6 +58,6 @@ DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; -- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT aqo_reset(); +SELECT 1 FROM aqo_reset(); DROP EXTENSION aqo; diff --git a/sql/aqo_intelligent.sql b/sql/aqo_intelligent.sql index a3bce4f2..545325c1 100644 --- a/sql/aqo_intelligent.sql +++ b/sql/aqo_intelligent.sql @@ -145,7 +145,10 @@ DROP TABLE tmp1; SET aqo.mode = 'controlled'; -SELECT aqo_queries_update(0, 0, 0); +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, false, false, false) +; -- Disable all query classes EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -174,7 +177,10 @@ EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; -SELECT aqo_queries_update(0, 1, 0); +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, false, true, false) +; -- set learn = false, use = true, tuning = false EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -210,6 +216,6 @@ DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; -- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT aqo_reset(); +SELECT 1 FROM aqo_reset(); DROP EXTENSION aqo; diff --git a/sql/aqo_learn.sql b/sql/aqo_learn.sql index 6256d2d7..676f5b55 100644 --- a/sql/aqo_learn.sql +++ b/sql/aqo_learn.sql @@ -141,7 +141,10 @@ DROP TABLE tmp1; SET aqo.mode = 'controlled'; -SELECT aqo_queries_update(0, 0, 0); +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, false, false, false) +; -- Disable all AQO query classes EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -172,7 +175,10 @@ EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; -SELECT aqo_queries_update(0, 1, 0); +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, false, true, false) +; -- learn = false, use = true, tuning = false EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -309,6 +315,6 @@ DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; -- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT aqo_reset(); +SELECT 1 FROM aqo_reset(); DROP EXTENSION aqo; diff --git a/sql/clean_aqo_data.sql b/sql/clean_aqo_data.sql index 1fc4374e..a5ce4e26 100644 --- a/sql/clean_aqo_data.sql +++ b/sql/clean_aqo_data.sql @@ -11,53 +11,43 @@ SELECT aqo_cleanup(); /* * lines with a_oid in aqo_data, - * lines with fspace_hash corresponding to a_oid in aqo_queries, - * lines with queryid corresponding to a_oid's fspace_hash in aqo_query_texts, - * lines with queryid corresponding to a_oid's fspace_hash in aqo_query_stat + * lines with fs corresponding to a_oid in aqo_queries, + * lines with queryid corresponding to a_oid's fs in aqo_query_texts, + * lines with queryid corresponding to a_oid's fs in aqo_query_stat * should remain */ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); DROP TABLE a; SELECT aqo_cleanup(); /* * lines with a_oid in aqo_data, - * lines with a_oid's fspace_hash EQUAL TO queryid in aqo_queries, - * lines with queryid corresponding to a_oid's fspace_hash in aqo_query_texts, - * lines with queryid corresponding to a_oid's fspace_hash in aqo_query_stat, + * lines with a_oid's fs EQUAL TO queryid in aqo_queries, + * lines with queryid corresponding to a_oid's fs in aqo_query_texts, + * lines with queryid corresponding to a_oid's fs in aqo_query_stat, * should be deleted */ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid; + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid); - -CREATE TABLE a(); -SELECT * FROM a; -SELECT 'a'::regclass::oid AS a_oid \gset --- add manually line with different fspace_hash and queryid to aqo_queries -INSERT INTO aqo_queries VALUES (:a_oid + 1, 't', 't', :a_oid, 'f'); -DROP TABLE a; -SELECT aqo_cleanup(); --- this line should remain -SELECT count(*) FROM aqo_queries WHERE (fspace_hash = :a_oid AND queryid = :a_oid + 1); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); CREATE TABLE a(); CREATE TABLE b(); @@ -70,59 +60,59 @@ SELECT 'b'::regclass::oid AS b_oid \gset -- new lines added to aqo_data SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); DROP TABLE a; SELECT aqo_cleanup(); /* - * lines corresponding to a_oid and both a_oid's fspace_hash deleted in aqo_data, - * lines with fspace_hash corresponding to a_oid deleted in aqo_queries, - * lines with queryid corresponding to a_oid's fspace_hash deleted in aqo_query_texts, - * lines with queryid corresponding to a_oid's fspace_hash deleted in aqo_query_stat, + * lines corresponding to a_oid and both a_oid's fs deleted in aqo_data, + * lines with fs corresponding to a_oid deleted in aqo_queries, + * lines with queryid corresponding to a_oid's fs deleted in aqo_query_texts, + * lines with queryid corresponding to a_oid's fs deleted in aqo_query_stat, */ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid; + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); -- lines corresponding to b_oid in all theese tables should remain SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid; + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); DROP TABLE b; SELECT aqo_cleanup(); @@ -130,15 +120,15 @@ SELECT aqo_cleanup(); -- lines corresponding to b_oid in theese tables deleted SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid; + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); DROP EXTENSION aqo; \ No newline at end of file diff --git a/sql/relocatable.sql b/sql/relocatable.sql index 18a31643..2d8af862 100644 --- a/sql/relocatable.sql +++ b/sql/relocatable.sql @@ -9,6 +9,7 @@ ANALYZE test; SELECT count(*) FROM test; SELECT query_text, learn_aqo, use_aqo, auto_tuning FROM aqo_query_texts aqt JOIN aqo_queries aq ON (aqt.queryid = aq.queryid) +ORDER BY (md5(query_text)) ; -- Check result. TODO: use aqo_status() -- Create a schema and move AQO into it. @@ -22,7 +23,7 @@ SELECT count(*) FROM test WHERE id < 10; SELECT query_text, learn_aqo, use_aqo, auto_tuning FROM test.aqo_query_texts aqt JOIN test.aqo_queries aq ON (aqt.queryid = aq.queryid) ORDER BY (md5(query_text)) -; -- Check result. TODO: We want to find here both queries executed above +; -- Find out both queries executed above -- Add schema which contains AQO to the end of search_path SELECT set_config('search_path', current_setting('search_path') || ', test', false); diff --git a/sql/temp_tables.sql b/sql/temp_tables.sql index 97b1e628..070721ce 100644 --- a/sql/temp_tables.sql +++ b/sql/temp_tables.sql @@ -92,5 +92,6 @@ SELECT * FROM check_estimated_rows(' '); -- Don't use AQO for temp table because of different attname DROP TABLE pt CASCADE; +SELECT 1 FROM aqo_reset(); DROP EXTENSION aqo; DROP FUNCTION check_estimated_rows; diff --git a/sql/top_queries.sql b/sql/top_queries.sql index bf3c9f60..62626d4f 100755 --- a/sql/top_queries.sql +++ b/sql/top_queries.sql @@ -35,7 +35,7 @@ SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y SELECT num, to_char(error, '9.99EEEE') FROM aqo_cardinality_error(false) AS te WHERE te.fshash = ( - SELECT fspace_hash FROM aqo_queries + SELECT fs FROM aqo_queries WHERE aqo_queries.queryid = ( SELECT aqo_query_texts.queryid FROM aqo_query_texts WHERE query_text = 'SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1;' @@ -51,5 +51,5 @@ FROM aqo_cardinality_error(false) ce, aqo_query_texts aqt WHERE ce.id = aqt.queryid ORDER BY (md5(query_text)); -SELECT aqo_reset(); +SELECT 1 FROM aqo_reset(); DROP EXTENSION aqo; diff --git a/storage.c b/storage.c index 642b22c3..ee25dcd4 100644 --- a/storage.c +++ b/storage.c @@ -38,109 +38,6 @@ static ArrayType *form_matrix(double *matrix, int nrows, int ncols); #define FormVectorSz(v_name) (form_vector((v_name), (v_name ## _size))) -static bool my_simple_heap_update(Relation relation, - ItemPointer otid, - HeapTuple tup, - bool *update_indexes); - -/* - * Open an AQO-related relation. - * It should be done carefully because of a possible concurrent DROP EXTENSION - * command. In such case AQO must be disabled in this backend. - */ -static bool -open_aqo_relation(char *heaprelnspname, char *heaprelname, - char *indrelname, LOCKMODE lockmode, - Relation *hrel, Relation *irel) -{ - Oid reloid; - RangeVar *rv; - - reloid = RelnameGetRelid(indrelname); - if (!OidIsValid(reloid)) - goto cleanup; - - rv = makeRangeVar(heaprelnspname, heaprelname, -1); - *hrel = table_openrv_extended(rv, lockmode, true); - if (*hrel == NULL) - goto cleanup; - - /* Try to open index relation carefully. */ - *irel = try_relation_open(reloid, lockmode); - if (*irel == NULL) - { - relation_close(*hrel, lockmode); - goto cleanup; - } - - return true; - -cleanup: - /* - * Absence of any AQO-related table tell us that someone executed - * a 'DROP EXTENSION aqo' command. We disable AQO for all future queries - * in this backend. For performance reasons we do it locally. - * Clear profiling hash table. - * Also, we gently disable AQO for the rest of the current query - * execution process. - */ - aqo_enabled = false; - disable_aqo_for_query(); - return false; - -} - -/* -static ArrayType * -form_strings_vector(List *reloids) -{ - Datum *rels; - ArrayType *array; - ListCell *lc; - int i = 0; - - if (reloids == NIL) - return NULL; - - rels = (Datum *) palloc(list_length(reloids) * sizeof(Datum)); - - foreach(lc, reloids) - { - char *relname = strVal(lfirst(lc)); - - rels[i++] = CStringGetTextDatum(relname); - } - - array = construct_array(rels, i, TEXTOID, -1, false, TYPALIGN_INT); - pfree(rels); - return array; -} - -static List * -deform_strings_vector(Datum datum) -{ - ArrayType *array = DatumGetArrayTypePCopy(PG_DETOAST_DATUM(datum)); - Datum *values; - int i; - int nelems = 0; - List *reloids = NIL; - - deconstruct_array(array, TEXTOID, -1, false, TYPALIGN_INT, - &values, NULL, &nelems); - for (i = 0; i < nelems; ++i) - { - Value *s; - - s = makeString(pstrdup(TextDatumGetCString(values[i]))); - reloids = lappend(reloids, s); - } - - pfree(values); - pfree(array); - return reloids; -} -*/ - bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool isSafe) { @@ -191,7 +88,7 @@ form_matrix(double *matrix, int nrows, int ncols) /* * Forms ArrayType object for storage from simple C-array vector. */ -ArrayType * +static ArrayType * form_vector(double *vector, int nrows) { Datum *elems; @@ -211,80 +108,6 @@ form_vector(double *vector, int nrows) return array; } -/* - * Returns true if updated successfully, false if updated concurrently by - * another session, error otherwise. - */ -static bool -my_simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup, - bool *update_indexes) -{ - TM_Result result; - TM_FailureData hufd; - LockTupleMode lockmode; - - Assert(update_indexes != NULL); - result = heap_update(relation, otid, tup, - GetCurrentCommandId(true), InvalidSnapshot, - true /* wait for commit */ , - &hufd, &lockmode); - switch (result) - { - case TM_SelfModified: - /* Tuple was already updated in current command? */ - elog(ERROR, "tuple already updated by self"); - break; - - case TM_Ok: - /* done successfully */ - if (!HeapTupleIsHeapOnly(tup)) - *update_indexes = true; - else - *update_indexes = false; - return true; - - case TM_Updated: - return false; - break; - - case TM_BeingModified: - return false; - break; - - default: - elog(ERROR, "unrecognized heap_update status: %u", result); - break; - } - return false; -} - - -/* Provides correct insert in both PostgreQL 9.6.X and 10.X.X */ -bool -my_index_insert(Relation indexRelation, - Datum *values, bool *isnull, - ItemPointer heap_t_ctid, - Relation heapRelation, - IndexUniqueCheck checkUnique) -{ - /* Index must be UNIQUE to support uniqueness checks */ - Assert(checkUnique == UNIQUE_CHECK_NO || - indexRelation->rd_index->indisunique); - -#if PG_VERSION_NUM < 100000 - return index_insert(indexRelation, values, isnull, heap_t_ctid, - heapRelation, checkUnique); -#elif PG_VERSION_NUM < 140000 - return index_insert(indexRelation, values, isnull, heap_t_ctid, - heapRelation, checkUnique, - BuildIndexInfo(indexRelation)); -#else - return index_insert(indexRelation, values, isnull, heap_t_ctid, - heapRelation, checkUnique, false, - BuildIndexInfo(indexRelation)); -#endif -} - /* Creates a storage for hashes of deactivated queries */ void init_deactivated_queries_storage(void) @@ -301,29 +124,21 @@ init_deactivated_queries_storage(void) HASH_ELEM | HASH_BLOBS); } -/* Destroys the storage for hash of deactivated queries */ -void -fini_deactivated_queries_storage(void) -{ - hash_destroy(deactivated_queries); - deactivated_queries = NULL; -} - /* Checks whether the query with given hash is deactivated */ bool -query_is_deactivated(uint64 query_hash) +query_is_deactivated(uint64 queryid) { bool found; - hash_search(deactivated_queries, &query_hash, HASH_FIND, &found); + hash_search(deactivated_queries, &queryid, HASH_FIND, &found); return found; } -/* Adds given query hash into the set of hashes of deactivated queries*/ +/* Adds given query hash into the set of hashes of deactivated queries */ void -add_deactivated_query(uint64 query_hash) +add_deactivated_query(uint64 queryid) { - hash_search(deactivated_queries, &query_hash, HASH_ENTER, NULL); + hash_search(deactivated_queries, &queryid, HASH_ENTER, NULL); } /* ***************************************************************************** @@ -368,7 +183,7 @@ typedef enum { } aqo_data_cols; typedef enum { - AQ_QUERYID = 0, AQ_FSPACE_HASH, AQ_LEARN_AQO, AQ_USE_AQO, AQ_AUTO_TUNING, + AQ_QUERYID = 0, AQ_FS, AQ_LEARN_AQO, AQ_USE_AQO, AQ_AUTO_TUNING, AQ_TOTAL_NCOLS } aqo_queries_cols; @@ -965,6 +780,8 @@ aqo_queries_load(void) entries = hash_get_num_entries(queries_htab); Assert(entries == 0); data_load(PGAQO_QUERIES_FILE, _deform_queries_record_cb, NULL); + + /* Check existence of default feature space */ (void) hash_search(queries_htab, &queryid, HASH_FIND, &found); LWLockRelease(&aqo_state->queries_lock); @@ -1717,7 +1534,7 @@ aqo_queries(PG_FUNCTION_ARGS) while ((entry = hash_seq_search(&hash_seq)) != NULL) { values[AQ_QUERYID] = Int64GetDatum(entry->queryid); - values[AQ_FSPACE_HASH] = Int64GetDatum(entry->fspace_hash); + values[AQ_FS] = Int64GetDatum(entry->fs); values[AQ_LEARN_AQO] = BoolGetDatum(entry->learn_aqo); values[AQ_USE_AQO] = BoolGetDatum(entry->use_aqo); values[AQ_AUTO_TUNING] = BoolGetDatum(entry->auto_tuning); @@ -1743,8 +1560,8 @@ aqo_queries_remove(PG_FUNCTION_ARGS) PG_RETURN_BOOL(removed); } -QueriesEntry * -aqo_queries_store(uint64 queryid, uint64 fspace_hash, bool learn_aqo, +bool +aqo_queries_store(uint64 queryid, uint64 fs, bool learn_aqo, bool use_aqo, bool auto_tuning) { QueriesEntry *entry; @@ -1752,24 +1569,20 @@ aqo_queries_store(uint64 queryid, uint64 fspace_hash, bool learn_aqo, Assert(queries_htab); - LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); - entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_ENTER, &found); + /* Guard for default feature space */ + Assert(queryid != 0 || (fs == 0 && learn_aqo == false && + use_aqo == false && auto_tuning == false)); - /* Initialize entry on first usage */ - if (!found) - { - uint64 qid = entry->queryid; - memset(entry, 0, sizeof(QueriesEntry)); - entry->queryid = qid; - entry->fspace_hash = fspace_hash; - } + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_ENTER, + &found); + entry->fs = fs; entry->learn_aqo = learn_aqo; entry->use_aqo = use_aqo; entry->auto_tuning = auto_tuning; - entry = memcpy(palloc(sizeof(QueriesEntry)), entry, sizeof(QueriesEntry)); LWLockRelease(&aqo_state->queries_lock); - return entry; + return true; } static long @@ -1785,12 +1598,16 @@ aqo_queries_reset(void) hash_seq_init(&hash_seq, queries_htab); while ((entry = hash_seq_search(&hash_seq)) != NULL) { + if (entry->queryid == 0) + /* Don't remove default feature space */ + continue; + if (hash_search(queries_htab, &entry->queryid, HASH_REMOVE, NULL) == NULL) elog(ERROR, "[AQO] hash table corrupted"); num_remove++; } LWLockRelease(&aqo_state->queries_lock); - Assert(num_remove == num_entries); + Assert(num_remove == num_entries - 1); aqo_queries_flush(); @@ -1806,18 +1623,23 @@ aqo_enable_query(PG_FUNCTION_ARGS) Assert(queries_htab); + if (queryid == 0) + elog(ERROR, "[AQO] Default class can't be updated."); + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_FIND, &found); - if(found) + if (found) { - entry->learn_aqo = 1; - entry->use_aqo = 1; + entry->learn_aqo = true; + entry->use_aqo = true; + if (aqo_mode == AQO_MODE_INTELLIGENT) + entry->auto_tuning = true; } else - { elog(ERROR, "[AQO] Entry with queryid %ld not contained in table", queryid); - } + + hash_search(deactivated_queries, &queryid, HASH_REMOVE, NULL); LWLockRelease(&aqo_state->queries_lock); PG_RETURN_VOID(); } @@ -1836,9 +1658,9 @@ aqo_disable_query(PG_FUNCTION_ARGS) if(found) { - entry->learn_aqo = 0; - entry->use_aqo = 0; - entry->auto_tuning = 0; + entry->learn_aqo = false; + entry->use_aqo = false; + entry->auto_tuning = false; } else { @@ -1849,40 +1671,61 @@ aqo_disable_query(PG_FUNCTION_ARGS) } bool -file_find_query(uint64 queryid) +aqo_queries_find(uint64 queryid, QueryContextData *ctx) { bool found; + QueriesEntry *entry; Assert(queries_htab); LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); - hash_search(queries_htab, &queryid, HASH_FIND, &found); + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_FIND, &found); + if (found) + { + ctx->query_hash = entry->queryid; + ctx->learn_aqo = entry->learn_aqo; + ctx->use_aqo = entry->use_aqo; + ctx->auto_tuning = entry->auto_tuning; + } LWLockRelease(&aqo_state->queries_lock); return found; } -Datum +/* + * Update AQO preferences for a given queryid value. + * if incoming param is null - leave it unchanged. + * if forced is false, do nothing if query with such ID isn't exists yet. + * Return true if operation have done some changes. + */ +Datum aqo_queries_update(PG_FUNCTION_ARGS) { - HASH_SEQ_STATUS hash_seq; - QueriesEntry *entry; - int learn_aqo = (int) PG_GETARG_INT32(0); - int use_aqo = (int) PG_GETARG_INT32(1); - int auto_tuning = (int) PG_GETARG_INT32(2); + QueriesEntry *entry; + uint64 queryid = PG_GETARG_INT64(AQ_QUERYID); + bool found; + + if (queryid == 0) + /* Do nothing for default feature space */ + PG_RETURN_BOOL(false); LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); - hash_seq_init(&hash_seq, queries_htab); - while ((entry = hash_seq_search(&hash_seq)) != NULL) - { - if (learn_aqo != 2) - entry->learn_aqo = learn_aqo; - if (use_aqo != 2) - entry->use_aqo = use_aqo; - if (auto_tuning != 2) - entry->auto_tuning = auto_tuning; - } + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_FIND, + &found); + + if (!PG_ARGISNULL(AQ_FS)) + entry->fs = PG_GETARG_INT64(AQ_FS); + if (!PG_ARGISNULL(AQ_LEARN_AQO)) + entry->learn_aqo = PG_GETARG_INT64(AQ_LEARN_AQO); + if (!PG_ARGISNULL(AQ_USE_AQO)) + entry->use_aqo = PG_GETARG_INT64(AQ_USE_AQO); + if (!PG_ARGISNULL(AQ_AUTO_TUNING)) + entry->auto_tuning = PG_GETARG_INT64(AQ_AUTO_TUNING); + + /* Remove the class from cache of deactivated queries */ + hash_search(deactivated_queries, &queryid, HASH_REMOVE, NULL); + LWLockRelease(&aqo_state->queries_lock); - PG_RETURN_VOID(); + PG_RETURN_BOOL(true); } Datum diff --git a/storage.h b/storage.h index cf90caf6..1024840f 100644 --- a/storage.h +++ b/storage.h @@ -5,6 +5,7 @@ #include "utils/array.h" #include "utils/dsa.h" /* Public structs have links to DSA memory blocks */ +#include "aqo.h" #include "machine_learning.h" #define STAT_SAMPLE_SIZE (20) @@ -74,7 +75,8 @@ typedef struct DataEntry typedef struct QueriesEntry { uint64 queryid; - uint64 fspace_hash; + + uint64 fs; bool learn_aqo; bool use_aqo; bool auto_tuning; @@ -102,11 +104,18 @@ extern bool load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, extern void aqo_data_flush(void); extern void aqo_data_load(void); -extern QueriesEntry *aqo_queries_store(uint64 queryid, uint64 fspace_hash, bool learn_aqo, - bool use_aqo, bool auto_tuning); +extern bool aqo_queries_find(uint64 queryid, QueryContextData *ctx); +extern bool aqo_queries_store(uint64 queryid, uint64 fs, bool learn_aqo, + bool use_aqo, bool auto_tuning); extern void aqo_queries_flush(void); extern void aqo_queries_load(void); -/* Utility routines */ -extern ArrayType *form_vector(double *vector, int nrows); + +/* + * Machinery for deactivated queries cache. + * TODO: Should live in a custom memory context + */ +extern void init_deactivated_queries_storage(void); +extern bool query_is_deactivated(uint64 query_hash); +extern void add_deactivated_query(uint64 query_hash); #endif /* STORAGE_H */ diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index e73e23ec..83b374f1 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -228,7 +228,7 @@ # Number of rows in aqo_queries: related to pgbench test and total value. my $pgb_fs_count = $node->safe_psql('postgres', " SELECT count(*) FROM aqo_queries - WHERE fspace_hash IN ( + WHERE fs IN ( SELECT fs FROM aqo_data WHERE $aoid = ANY(oids) OR From 0efeef786504e74e11019674bac29eb89f827e4d Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Mon, 27 Jun 2022 16:04:58 +0500 Subject: [PATCH 100/203] Minor performance optimizations and code improvements --- aqo.c | 14 --- aqo.h | 1 - cardinality_estimation.c | 1 + expected/forced_stat_collection.out | 3 +- preprocessing.c | 32 ++----- sql/forced_stat_collection.sql | 3 +- storage.c | 139 +++++++++++++--------------- storage.h | 2 - 8 files changed, 78 insertions(+), 117 deletions(-) diff --git a/aqo.c b/aqo.c index d078b5ca..790f27e5 100644 --- a/aqo.c +++ b/aqo.c @@ -34,8 +34,6 @@ void _PG_init(void); /* Strategy of determining feature space for new queries. */ int aqo_mode; -bool aqo_enabled = false; /* Signals that CREATE EXTENSION have executed and - all extension tables is ready for use. */ bool force_collect_stat; /* @@ -201,18 +199,6 @@ _PG_init(void) NULL ); - DefineCustomBoolVariable( - "aqo.use_file_storage", - "Used for smooth transition from table storage", - NULL, - &aqo_use_file_storage, - true, - PGC_USERSET, - 0, - NULL, - NULL, - NULL - ); DefineCustomIntVariable("aqo.join_threshold", "Sets the threshold of number of JOINs in query beyond which AQO is used.", NULL, diff --git a/aqo.h b/aqo.h index db40b82e..345b748b 100644 --- a/aqo.h +++ b/aqo.h @@ -169,7 +169,6 @@ typedef enum } AQO_MODE; extern int aqo_mode; -extern bool aqo_enabled; extern bool force_collect_stat; extern bool aqo_show_hash; extern bool aqo_show_details; diff --git a/cardinality_estimation.c b/cardinality_estimation.c index 523b8e2e..cb8997f6 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -22,6 +22,7 @@ #include "aqo.h" #include "hash.h" #include "machine_learning.h" +#include "storage.h" #ifdef AQO_DEBUG_PRINT static void diff --git a/expected/forced_stat_collection.out b/expected/forced_stat_collection.out index ec5ba020..e514e386 100644 --- a/expected/forced_stat_collection.out +++ b/expected/forced_stat_collection.out @@ -1,7 +1,7 @@ \set citizens 1000 SET aqo.join_threshold = 0; SET aqo.mode = 'disabled'; -SET aqo.force_collect_stat = 'on'; +SET aqo.force_collect_stat = 'off'; CREATE TABLE person ( id serial PRIMARY KEY, age integer, @@ -20,6 +20,7 @@ INSERT INTO person (id,age,gender,passport) FROM (SELECT *, 14+(id % 60) AS age FROM generate_series(1, :citizens) id) AS q1 ); CREATE EXTENSION aqo; +SET aqo.force_collect_stat = 'on'; SELECT count(*) FROM person WHERE age<18; count ------- diff --git a/preprocessing.c b/preprocessing.c index 62999512..f3f89218 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -98,30 +98,18 @@ call_default_planner(Query *parse, } /* - * Check, that a 'CREATE EXTENSION aqo' command has been executed. - * This function allows us to execute the get_extension_oid routine only once - * at each backend. - * If any AQO-related table is missed we will set aqo_enabled to false (see - * a storage implementation module). + * Can AQO be used for the query? */ static bool -aqoIsEnabled(void) +aqoIsEnabled(Query *parse) { - if (creating_extension) - /* Nothing to tell in this mode. */ + if (creating_extension || + (aqo_mode == AQO_MODE_DISABLED && !force_collect_stat) || + (parse->commandType != CMD_SELECT && parse->commandType != CMD_INSERT && + parse->commandType != CMD_UPDATE && parse->commandType != CMD_DELETE)) return false; - if (aqo_enabled) - /* - * Fast path. Dropping should be detected by absence of any AQO-related - * table. - */ - return true; - - if (get_extension_oid("aqo", true) != InvalidOid) - aqo_enabled = true; - - return aqo_enabled; + return true; } /* @@ -147,12 +135,8 @@ aqo_planner(Query *parse, * the heap during planning. Transactions are synchronized between parallel * sections. See GetCurrentCommandId() comments also. */ - if (!aqoIsEnabled() || - (parse->commandType != CMD_SELECT && parse->commandType != CMD_INSERT && - parse->commandType != CMD_UPDATE && parse->commandType != CMD_DELETE) || - creating_extension || + if (!aqoIsEnabled(parse) || IsInParallelMode() || IsParallelWorker() || - (aqo_mode == AQO_MODE_DISABLED && !force_collect_stat) || strstr(application_name, "postgres_fdw") != NULL || /* Prevent distributed deadlocks */ strstr(application_name, "pgfdw:") != NULL || /* caused by fdw */ isQueryUsingSystemRelation(parse) || diff --git a/sql/forced_stat_collection.sql b/sql/forced_stat_collection.sql index a3a63685..ad234655 100644 --- a/sql/forced_stat_collection.sql +++ b/sql/forced_stat_collection.sql @@ -2,7 +2,7 @@ SET aqo.join_threshold = 0; SET aqo.mode = 'disabled'; -SET aqo.force_collect_stat = 'on'; +SET aqo.force_collect_stat = 'off'; CREATE TABLE person ( id serial PRIMARY KEY, @@ -24,6 +24,7 @@ INSERT INTO person (id,age,gender,passport) ); CREATE EXTENSION aqo; +SET aqo.force_collect_stat = 'on'; SELECT count(*) FROM person WHERE age<18; SELECT count(*) FROM person WHERE age<18 AND passport IS NOT NULL; diff --git a/storage.c b/storage.c index ee25dcd4..5f2dfcb7 100644 --- a/storage.c +++ b/storage.c @@ -19,10 +19,9 @@ #include -#include "access/heapam.h" -#include "access/table.h" -#include "access/tableam.h" +#include "funcapi.h" #include "miscadmin.h" +#include "pgstat.h" #include "aqo.h" #include "aqo_shared.h" @@ -31,12 +30,73 @@ #include "learn_cache.h" #include "storage.h" -#define AQO_DATA_COLUMNS (7) + +/* AQO storage file names */ +#define PGAQO_STAT_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_statistics.stat" +#define PGAQO_TEXT_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_query_texts.stat" +#define PGAQO_DATA_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_data.stat" +#define PGAQO_QUERIES_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_queries.stat" + +#define AQO_DATA_COLUMNS (7) +#define FormVectorSz(v_name) (form_vector((v_name), (v_name ## _size))) + + +typedef enum { + QUERYID = 0, EXEC_TIME_AQO, EXEC_TIME, PLAN_TIME_AQO, PLAN_TIME, + EST_ERROR_AQO, EST_ERROR, NEXECS_AQO, NEXECS, TOTAL_NCOLS +} aqo_stat_cols; + +typedef enum { + QT_QUERYID = 0, QT_QUERY_STRING, QT_TOTAL_NCOLS +} aqo_qtexts_cols; + +typedef enum { + AD_FS = 0, AD_FSS, AD_NFEATURES, AD_FEATURES, AD_TARGETS, AD_RELIABILITY, + AD_OIDS, AD_TOTAL_NCOLS +} aqo_data_cols; + +typedef enum { + AQ_QUERYID = 0, AQ_FS, AQ_LEARN_AQO, AQ_USE_AQO, AQ_AUTO_TUNING, + AQ_TOTAL_NCOLS +} aqo_queries_cols; + +typedef void* (*form_record_t) (void *ctx, size_t *size); +typedef void (*deform_record_t) (void *data, size_t size); + + +HTAB *stat_htab = NULL; +HTAB *queries_htab = NULL; +HTAB *qtexts_htab = NULL; +dsa_area *qtext_dsa = NULL; +HTAB *data_htab = NULL; +dsa_area *data_dsa = NULL; HTAB *deactivated_queries = NULL; +/* Used to check data file consistency */ +static const uint32 PGAQO_FILE_HEADER = 123467589; +static const uint32 PGAQO_PG_MAJOR_VERSION = PG_VERSION_NUM / 100; + + static ArrayType *form_matrix(double *matrix, int nrows, int ncols); +static void dsa_init(void); +static int data_store(const char *filename, form_record_t callback, + long nrecs, void *ctx); +static void data_load(const char *filename, deform_record_t callback, void *ctx); +static size_t _compute_data_dsa(const DataEntry *entry); + +PG_FUNCTION_INFO_V1(aqo_query_stat); +PG_FUNCTION_INFO_V1(aqo_query_texts); +PG_FUNCTION_INFO_V1(aqo_data); +PG_FUNCTION_INFO_V1(aqo_queries); +PG_FUNCTION_INFO_V1(aqo_stat_remove); +PG_FUNCTION_INFO_V1(aqo_qtexts_remove); +PG_FUNCTION_INFO_V1(aqo_data_remove); +PG_FUNCTION_INFO_V1(aqo_queries_remove); +PG_FUNCTION_INFO_V1(aqo_enable_query); +PG_FUNCTION_INFO_V1(aqo_disable_query); +PG_FUNCTION_INFO_V1(aqo_queries_update); +PG_FUNCTION_INFO_V1(aqo_reset); -#define FormVectorSz(v_name) (form_vector((v_name), (v_name ## _size))) bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool isSafe) @@ -141,75 +201,6 @@ add_deactivated_query(uint64 queryid) hash_search(deactivated_queries, &queryid, HASH_ENTER, NULL); } -/* ***************************************************************************** - * - * Implementation of the AQO file storage - * - **************************************************************************** */ - -#include "funcapi.h" -#include "pgstat.h" - -#define PGAQO_STAT_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_statistics.stat" -#define PGAQO_TEXT_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_query_texts.stat" -#define PGAQO_DATA_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_data.stat" -#define PGAQO_QUERIES_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_queries.stat" - -PG_FUNCTION_INFO_V1(aqo_query_stat); -PG_FUNCTION_INFO_V1(aqo_query_texts); -PG_FUNCTION_INFO_V1(aqo_data); -PG_FUNCTION_INFO_V1(aqo_queries); -PG_FUNCTION_INFO_V1(aqo_stat_remove); -PG_FUNCTION_INFO_V1(aqo_qtexts_remove); -PG_FUNCTION_INFO_V1(aqo_data_remove); -PG_FUNCTION_INFO_V1(aqo_queries_remove); -PG_FUNCTION_INFO_V1(aqo_enable_query); -PG_FUNCTION_INFO_V1(aqo_disable_query); -PG_FUNCTION_INFO_V1(aqo_queries_update); -PG_FUNCTION_INFO_V1(aqo_reset); - -typedef enum { - QUERYID = 0, EXEC_TIME_AQO, EXEC_TIME, PLAN_TIME_AQO, PLAN_TIME, - EST_ERROR_AQO, EST_ERROR, NEXECS_AQO, NEXECS, TOTAL_NCOLS -} aqo_stat_cols; - -typedef enum { - QT_QUERYID = 0, QT_QUERY_STRING, QT_TOTAL_NCOLS -} aqo_qtexts_cols; - -typedef enum { - AD_FS = 0, AD_FSS, AD_NFEATURES, AD_FEATURES, AD_TARGETS, AD_RELIABILITY, - AD_OIDS, AD_TOTAL_NCOLS -} aqo_data_cols; - -typedef enum { - AQ_QUERYID = 0, AQ_FS, AQ_LEARN_AQO, AQ_USE_AQO, AQ_AUTO_TUNING, - AQ_TOTAL_NCOLS -} aqo_queries_cols; - -typedef void* (*form_record_t) (void *ctx, size_t *size); -typedef void (*deform_record_t) (void *data, size_t size); - -bool aqo_use_file_storage; - -HTAB *stat_htab = NULL; -HTAB *queries_htab = NULL; - -HTAB *qtexts_htab = NULL; -dsa_area *qtext_dsa = NULL; - -HTAB *data_htab = NULL; -dsa_area *data_dsa = NULL; - -/* Used to check data file consistency */ -static const uint32 PGAQO_FILE_HEADER = 123467589; -static const uint32 PGAQO_PG_MAJOR_VERSION = PG_VERSION_NUM / 100; - -static void dsa_init(void); -static int data_store(const char *filename, form_record_t callback, - long nrecs, void *ctx); -static void data_load(const char *filename, deform_record_t callback, void *ctx); -static size_t _compute_data_dsa(const DataEntry *entry); /* * Update AQO statistics. * diff --git a/storage.h b/storage.h index 1024840f..27c97256 100644 --- a/storage.h +++ b/storage.h @@ -82,8 +82,6 @@ typedef struct QueriesEntry bool auto_tuning; } QueriesEntry; -extern bool aqo_use_file_storage; - extern HTAB *stat_htab; extern HTAB *qtexts_htab; extern HTAB *queries_htab; /* TODO */ From 7d649fc09700977b47a25b7ba82a5b81303917a7 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Tue, 28 Jun 2022 11:21:06 +0500 Subject: [PATCH 101/203] Merge file storage feature and look-a-like --- expected/aqo_learn.out | 2 +- expected/clean_aqo_data.out | 32 +++++++------- expected/gucs.out | 16 +++---- machine_learning.c | 8 +++- postprocessing.c | 10 ++--- sql/aqo_learn.sql | 4 +- sql/clean_aqo_data.sql | 8 ++-- sql/gucs.sql | 4 +- storage.c | 85 +++++++++++++++++++++++++++++-------- 9 files changed, 110 insertions(+), 59 deletions(-) diff --git a/expected/aqo_learn.out b/expected/aqo_learn.out index 1abb9b04..e08f089b 100644 --- a/expected/aqo_learn.out +++ b/expected/aqo_learn.out @@ -540,7 +540,7 @@ SELECT * FROM check_estimated_rows(' 20 | 17 (1 row) -SELECT count(*) FROM -- Learn on the query +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 ; count diff --git a/expected/clean_aqo_data.out b/expected/clean_aqo_data.out index f731b3dc..e66f274b 100644 --- a/expected/clean_aqo_data.out +++ b/expected/clean_aqo_data.out @@ -11,10 +11,10 @@ SELECT * FROM a; (0 rows) SELECT 'a'::regclass::oid AS a_oid \gset -SELECT aqo_cleanup(); - aqo_cleanup -------------- - (0,0) +SELECT true FROM aqo_cleanup(); + bool +------ + t (1 row) /* @@ -54,10 +54,10 @@ SELECT count(*) FROM aqo_query_stat WHERE (1 row) DROP TABLE a; -SELECT aqo_cleanup(); - aqo_cleanup -------------- - (1,1) +SELECT true FROM aqo_cleanup(); + bool +------ + t (1 row) /* @@ -175,10 +175,10 @@ SELECT count(*) FROM aqo_query_stat WHERE (1 row) DROP TABLE a; -SELECT aqo_cleanup(); - aqo_cleanup -------------- - (2,3) +SELECT true FROM aqo_cleanup(); + bool +------ + t (1 row) /* @@ -253,10 +253,10 @@ SELECT count(*) FROM aqo_query_stat WHERE (1 row) DROP TABLE b; -SELECT aqo_cleanup(); - aqo_cleanup -------------- - (1,1) +SELECT true FROM aqo_cleanup(); + bool +------ + t (1 row) -- lines corresponding to b_oid in theese tables deleted diff --git a/expected/gucs.out b/expected/gucs.out index 7f74f527..e238bc61 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -5,10 +5,10 @@ SET aqo.show_details = true; CREATE TABLE t(x int); INSERT INTO t (x) (SELECT * FROM generate_series(1, 100) AS gs); ANALYZE t; -SELECT * FROM aqo_reset(); -- Remember! DROP EXTENSION doesn't remove any AQO data gathered. - aqo_reset ------------ - 25 +SELECT true FROM aqo_reset(); -- Remember! DROP EXTENSION doesn't remove any AQO data gathered. + bool +------ + t (1 row) -- Check AQO addons to explain (the only stable data) @@ -123,10 +123,10 @@ SELECT count(*) FROM aqo_query_stat; 1 (1 row) -SELECT * FROM aqo_reset(); -- Remove one record from all tables - aqo_reset ------------ - 4 +SELECT true FROM aqo_reset(); -- Remove one record from all tables + bool +------ + t (1 row) SELECT count(*) FROM aqo_query_stat; diff --git a/machine_learning.c b/machine_learning.c index ca7fc6ef..3077983d 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -48,10 +48,14 @@ OkNNr_allocate(int ncols) int i; if (ncols > 0) - for (i = 0; i < aqo_K; ++i) - data->matrix[i] = palloc0(sizeof(double) * ncols); + for (i = 0; i < aqo_K; i++) + data->matrix[i] = palloc0(ncols * sizeof(double)); + else + for (i = 0; i < aqo_K; i++) + data->matrix[i] = NULL; data->cols = ncols; + data->rows = -1; return data; } diff --git a/postprocessing.c b/postprocessing.c index f9e8db46..437430ce 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -109,9 +109,8 @@ learn_agg_sample(aqo_obj_stat *ctx, RelSortOut *rels, uint64 fs = query_context.fspace_hash; int child_fss; double target; - OkNNrdata data; + OkNNrdata *data = OkNNr_allocate(0); int fss; - int i; /* * Learn 'not executed' nodes only once, if no one another knowledge exists @@ -125,13 +124,10 @@ learn_agg_sample(aqo_obj_stat *ctx, RelSortOut *rels, NIL, NULL,NULL); fss = get_grouped_exprs_hash(child_fss, aqo_node->grouping_exprs); - memset(&data, 0, sizeof(OkNNrdata)); - for (i = 0; i < aqo_K; i++) - data.matrix[i] = NULL; - /* Critical section */ - atomic_fss_learn_step(fs, fss, &data, NULL, + atomic_fss_learn_step(fs, fss, data, NULL, target, rfactor, rels->hrels, ctx->isTimedOut); + OkNNr_free(data); /* End of critical section */ } diff --git a/sql/aqo_learn.sql b/sql/aqo_learn.sql index 676f5b55..cb0122bb 100644 --- a/sql/aqo_learn.sql +++ b/sql/aqo_learn.sql @@ -235,8 +235,8 @@ SELECT * FROM check_estimated_rows(' SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; -'); -SELECT count(*) FROM -- Learn on the query +'); -- Learn on the query +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 ; SELECT query_text FROM aqo_query_texts WHERE queryid <> 0; -- Check query diff --git a/sql/clean_aqo_data.sql b/sql/clean_aqo_data.sql index a5ce4e26..d2abeb93 100644 --- a/sql/clean_aqo_data.sql +++ b/sql/clean_aqo_data.sql @@ -7,7 +7,7 @@ DROP TABLE IF EXISTS b; CREATE TABLE a(); SELECT * FROM a; SELECT 'a'::regclass::oid AS a_oid \gset -SELECT aqo_cleanup(); +SELECT true FROM aqo_cleanup(); /* * lines with a_oid in aqo_data, @@ -27,7 +27,7 @@ SELECT count(*) FROM aqo_query_stat WHERE aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); DROP TABLE a; -SELECT aqo_cleanup(); +SELECT true FROM aqo_cleanup(); /* * lines with a_oid in aqo_data, @@ -79,7 +79,7 @@ SELECT count(*) FROM aqo_query_stat WHERE aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); DROP TABLE a; -SELECT aqo_cleanup(); +SELECT true FROM aqo_cleanup(); /* * lines corresponding to a_oid and both a_oid's fs deleted in aqo_data, @@ -115,7 +115,7 @@ SELECT count(*) FROM aqo_query_stat WHERE aqo_queries.fs = aqo_queries.queryid); DROP TABLE b; -SELECT aqo_cleanup(); +SELECT true FROM aqo_cleanup(); -- lines corresponding to b_oid in theese tables deleted SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); diff --git a/sql/gucs.sql b/sql/gucs.sql index 1dba9c6c..2d113792 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -8,7 +8,7 @@ CREATE TABLE t(x int); INSERT INTO t (x) (SELECT * FROM generate_series(1, 100) AS gs); ANALYZE t; -SELECT * FROM aqo_reset(); -- Remember! DROP EXTENSION doesn't remove any AQO data gathered. +SELECT true FROM aqo_reset(); -- Remember! DROP EXTENSION doesn't remove any AQO data gathered. -- Check AQO addons to explain (the only stable data) EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT x FROM t; @@ -33,7 +33,7 @@ SELECT obj_description('aqo_reset'::regproc::oid); -- Check stat reset SELECT count(*) FROM aqo_query_stat; -SELECT * FROM aqo_reset(); -- Remove one record from all tables +SELECT true FROM aqo_reset(); -- Remove one record from all tables SELECT count(*) FROM aqo_query_stat; DROP EXTENSION aqo; diff --git a/storage.c b/storage.c index 5f2dfcb7..6a71a541 100644 --- a/storage.c +++ b/storage.c @@ -983,8 +983,10 @@ aqo_query_texts(PG_FUNCTION_ARGS) hash_seq_init(&hash_seq, qtexts_htab); while ((entry = hash_seq_search(&hash_seq)) != NULL) { + char *ptr; + Assert(DsaPointerIsValid(entry->qtext_dp)); - char *ptr = dsa_get_address(qtext_dsa, entry->qtext_dp); + ptr = dsa_get_address(qtext_dsa, entry->qtext_dp); values[QT_QUERYID] = Int64GetDatum(entry->queryid); values[QT_QUERY_STRING] = CStringGetTextDatum(ptr); tuplestore_putvalues(tupstore, tupDesc, values, nulls); @@ -1170,7 +1172,7 @@ build_knn_matrix(OkNNrdata *data, const OkNNrdata *temp_data) { Assert(data->cols == temp_data->cols); - if (data->rows >= 0) + if (data->rows > 0) /* trivial strategy - use first suitable record and ignore others */ return; @@ -1201,8 +1203,7 @@ _fill_knn_data(const DataEntry *entry, List **reloids) /* Check invariants */ Assert(entry->rows < aqo_K); Assert(ptr != NULL); - Assert(entry->key.fs == ((data_key *)ptr)->fs && - entry->key.fss == ((data_key *)ptr)->fss); + Assert(entry->key.fss == ((data_key *)ptr)->fss); ptr += sizeof(data_key); @@ -1227,6 +1228,7 @@ _fill_knn_data(const DataEntry *entry, List **reloids) Assert(offset <= sz); if (reloids == NULL) + /* Isn't needed to load reloids list */ return data; /* store list of relations. XXX: optimize ? */ @@ -1260,26 +1262,72 @@ load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, dsa_init(); LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); - entry = (DataEntry *) hash_search(data_htab, &key, HASH_FIND, &found); - if (!found) - goto end; + if (!wideSearch) + { + entry = (DataEntry *) hash_search(data_htab, &key, HASH_FIND, &found); - /* One entry with all correctly filled fields is found */ - Assert(entry); - Assert(DsaPointerIsValid(entry->data_dp)); + if (!found) + goto end; + + /* One entry with all correctly filled fields is found */ + Assert(entry); + Assert(DsaPointerIsValid(entry->data_dp)); + + if (entry->cols != data->cols) + { + /* Collision happened? */ + elog(LOG, "[AQO] Does a collision happened? Check it if possible (fs: %lu, fss: %d).", + fs, fss); + found = false; + goto end; + } - if (entry->cols != data->cols) + temp_data = _fill_knn_data(entry, reloids); + build_knn_matrix(data, temp_data); + } + else + /* Iterate across all elements of the table. XXX: Maybe slow. */ { - /* Collision happened? */ - elog(LOG, "[AQO] Does a collision happened? Check it if possible (fs: %lu, fss: %d).", - fs, fss); + HASH_SEQ_STATUS hash_seq; + int noids = -1; + found = false; - goto end; + hash_seq_init(&hash_seq, data_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + List *tmp_oids = NIL; + + if (entry->key.fss != fss || entry->cols != data->cols) + continue; + + temp_data = _fill_knn_data(entry, &tmp_oids); + + if (data->rows > 0 && list_length(tmp_oids) != noids) + { + /* Dubious case. So log it and skip these data */ + elog(LOG, + "[AQO] different number depended oids for the same fss %d: " + "%d and %d correspondingly.", + fss, list_length(tmp_oids), noids); + Assert(noids >= 0); + list_free(tmp_oids); + continue; + } + + noids = list_length(tmp_oids); + + if (reloids != NULL && *reloids == NIL) + *reloids = tmp_oids; + else + list_free(tmp_oids); + + build_knn_matrix(data, temp_data); + found = true; + } } - temp_data = _fill_knn_data(entry, reloids); - build_knn_matrix(data, temp_data); + Assert(!found || (data->rows > 0 && data->rows <= aqo_K)); end: LWLockRelease(&aqo_state->data_lock); @@ -1364,7 +1412,10 @@ aqo_data(PG_FUNCTION_ARGS) elems = palloc(sizeof(*elems) * entry->nrels); for(i = 0; i < entry->nrels; i++) + { elems[i] = ObjectIdGetDatum(*(Oid *)ptr); + ptr += sizeof(Oid); + } array = construct_array(elems, entry->nrels, OIDOID, sizeof(Oid), true, TYPALIGN_INT); From 307b4e565a9965ea008e2bb24837fde8d58d8017 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Wed, 29 Jun 2022 07:39:01 +0500 Subject: [PATCH 102/203] Remove custom lock tags from the AQO storage. --- aqo.c | 16 ---------------- aqo.h | 1 - postprocessing.c | 15 --------------- preprocessing.c | 10 ---------- 4 files changed, 42 deletions(-) diff --git a/aqo.c b/aqo.c index 790f27e5..5ac756aa 100644 --- a/aqo.c +++ b/aqo.c @@ -308,22 +308,6 @@ get_aqo_schema(void) return result; } -/* - * Init userlock - */ -void -init_lock_tag(LOCKTAG *tag, uint64 key1, int32 key2) -{ - uint32 key = key1 % UINT32_MAX; - - tag->locktag_field1 = AQO_MODULE_MAGIC; - tag->locktag_field2 = key; - tag->locktag_field3 = (uint32) key2; - tag->locktag_field4 = 0; - tag->locktag_type = LOCKTAG_USERLOCK; - tag->locktag_lockmethodid = USER_LOCKMETHOD; -} - /* * AQO is really needed for any activity? */ diff --git a/aqo.h b/aqo.h index 345b748b..135ae24d 100644 --- a/aqo.h +++ b/aqo.h @@ -295,7 +295,6 @@ extern double *selectivity_cache_find_global_relid(int clause_hash, extern void selectivity_cache_clear(void); extern Oid get_aqo_schema(void); -extern void init_lock_tag(LOCKTAG *tag, uint64 key1, int32 key2); extern bool IsQueryDisabled(void); extern List *cur_classes; diff --git a/postprocessing.c b/postprocessing.c index 437430ce..db3510ff 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -87,18 +87,11 @@ atomic_fss_learn_step(uint64 fs, int fss, OkNNrdata *data, double *features, double target, double rfactor, List *reloids, bool isTimedOut) { - LOCKTAG tag; - - init_lock_tag(&tag, fs, fss); - LockAcquire(&tag, ExclusiveLock, false, false); - if (!load_fss_ext(fs, fss, data, NULL, !isTimedOut)) data->rows = 0; data->rows = OkNNr_learn(data, features, target, rfactor); update_fss_ext(fs, fss, data, reloids, isTimedOut); - - LockRelease(&tag, ExclusiveLock, false); } static void @@ -719,7 +712,6 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) StatEntry *stat; instr_time endtime; EphemeralNamedRelation enr = get_ENR(queryDesc->queryEnv, PlanStateInfo); - LOCKTAG tag; cardinality_sum_errors = 0.; cardinality_num_objects = 0; @@ -773,10 +765,6 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) else cardinality_error = -1; - /* Prevent concurrent updates. */ - init_lock_tag(&tag, query_context.query_hash, query_context.fspace_hash); - LockAcquire(&tag, ExclusiveLock, false, false); - if (query_context.collect_stat) { /* Write AQO statistics to the aqo_query_stat table */ @@ -795,9 +783,6 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) } } - /* Allow concurrent queries to update this feature space. */ - LockRelease(&tag, ExclusiveLock, false); - selectivity_cache_clear(); cur_classes = ldelete_uint64(cur_classes, query_context.query_hash); diff --git a/preprocessing.c b/preprocessing.c index f3f89218..6d89035a 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -127,7 +127,6 @@ aqo_planner(Query *parse, ParamListInfo boundParams) { bool query_is_stored = false; - LOCKTAG tag; MemoryContext oldCxt; /* @@ -291,13 +290,6 @@ aqo_planner(Query *parse, ignore_query_settings: if (!query_is_stored && (query_context.adding_query || force_collect_stat)) { - /* - * find-add query and query text must be atomic operation to prevent - * concurrent insertions. - */ - init_lock_tag(&tag, query_context.query_hash, 0); - LockAcquire(&tag, ExclusiveLock, false, false); - /* * Add query into the AQO knowledge base. To process an error with * concurrent addition from another backend we will try to restart @@ -312,8 +304,6 @@ aqo_planner(Query *parse, * analysis. In the case of cached plans we may have NULL query text. */ aqo_qtext_store(query_context.query_hash, query_string); - - LockRelease(&tag, ExclusiveLock, false); } if (force_collect_stat) From 9025b0e3f0855ab3694e93de9d6948fca7fa385b Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 29 Jun 2022 08:54:27 +0500 Subject: [PATCH 103/203] Avoid memory leak on deletion from uint64 list --- hash.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/hash.c b/hash.c index c7733b1f..8e12f2ff 100644 --- a/hash.c +++ b/hash.c @@ -131,6 +131,11 @@ lappend_uint64(List *list, uint64 datum) return list; } +/* + * Remove element from a list and free the memory which was allocated to it. + * Looks unconventional, but we unconventionally allocate memory on append, so + * it maybe ok. + */ List * ldelete_uint64(List *list, uint64 datum) { @@ -140,6 +145,7 @@ ldelete_uint64(List *list, uint64 datum) { if (*((uint64 *)lfirst(cell)) == datum) { + pfree(lfirst(cell)); list = list_delete_ptr(list, lfirst(cell)); return list; } From ca6eefdff2bf3d8e230ecb4f0d7147b24179fc40 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 29 Jun 2022 10:05:41 +0500 Subject: [PATCH 104/203] Add handling of situation when AQO shmem storage is overflowed. Our tactics here: log a problem, switch backend into CONTROLLED mode and go further. TODO: 1) change aqo.mode for all backends; 2) switch to FROZEN mode if data storage is full. 3) How to process overflow of DSM? --- aqo.c | 29 ++++++++++++- aqo_shared.c | 18 ++++----- aqo_shared.h | 2 + preprocessing.c | 33 +++++++++++---- storage.c | 105 +++++++++++++++++++++++++++++++++++++++++++----- 5 files changed, 158 insertions(+), 29 deletions(-) diff --git a/aqo.c b/aqo.c index 5ac756aa..11f5a235 100644 --- a/aqo.c +++ b/aqo.c @@ -209,7 +209,34 @@ _PG_init(void) 0, NULL, NULL, - NULL); + NULL + ); + + DefineCustomIntVariable("aqo.fs_max_items", + "Max number of feature spaces that AQO can operate with.", + NULL, + &fs_max_items, + 1000, + 1, INT_MAX, + PGC_SUSET, + 0, + NULL, + NULL, + NULL + ); + + DefineCustomIntVariable("aqo.fss_max_items", + "Max number of feature subspaces that AQO can operate with.", + NULL, + &fss_max_items, + 1000, + 0, INT_MAX, + PGC_SUSET, + 0, + NULL, + NULL, + NULL + ); prev_shmem_startup_hook = shmem_startup_hook; shmem_startup_hook = aqo_init_shmem; diff --git a/aqo_shared.c b/aqo_shared.c index dd9686c9..e838d02e 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -26,8 +26,8 @@ shmem_startup_hook_type prev_shmem_startup_hook = NULL; AQOSharedState *aqo_state = NULL; HTAB *fss_htab = NULL; static int aqo_htab_max_items = 1000; -static int fs_max_items = 1000; /* Max number of different feature spaces in ML model */ -static int fss_max_items = 10000; +int fs_max_items = 1; /* Max number of different feature spaces in ML model */ +int fss_max_items = 1; /* Max number of different feature subspaces in ML model */ static uint32 temp_storage_size = 1024 * 1024 * 10; /* Storage size, in bytes */ static dsm_segment *seg = NULL; @@ -217,30 +217,26 @@ aqo_init_shmem(void) info.keysize = sizeof(((StatEntry *) 0)->queryid); info.entrysize = sizeof(StatEntry); - stat_htab = ShmemInitHash("AQO Stat HTAB", - fs_max_items, fs_max_items, + stat_htab = ShmemInitHash("AQO Stat HTAB", 64, fs_max_items, &info, HASH_ELEM | HASH_BLOBS); /* Init shared memory table for query texts */ info.keysize = sizeof(((QueryTextEntry *) 0)->queryid); info.entrysize = sizeof(QueryTextEntry); - qtexts_htab = ShmemInitHash("AQO Query Texts HTAB", - fs_max_items, fs_max_items, + qtexts_htab = ShmemInitHash("AQO Query Texts HTAB", 64, fs_max_items, &info, HASH_ELEM | HASH_BLOBS); /* Shared memory hash table for the data */ info.keysize = sizeof(data_key); info.entrysize = sizeof(DataEntry); - data_htab = ShmemInitHash("AQO Data HTAB", - fss_max_items, fss_max_items, + data_htab = ShmemInitHash("AQO Data HTAB", 64, fss_max_items, &info, HASH_ELEM | HASH_BLOBS); /* Shared memory hash table for queries */ info.keysize = sizeof(((QueriesEntry *) 0)->queryid); info.entrysize = sizeof(QueriesEntry); - queries_htab = ShmemInitHash("AQO Queries HTAB", - fs_max_items, fs_max_items, - &info, HASH_ELEM | HASH_BLOBS); + queries_htab = ShmemInitHash("AQO Queries HTAB", 64, fs_max_items, + &info, HASH_ELEM | HASH_BLOBS); LWLockRelease(AddinShmemInitLock); LWLockRegisterTranche(aqo_state->lock.tranche, "AQO"); diff --git a/aqo_shared.h b/aqo_shared.h index b1b79387..87232882 100644 --- a/aqo_shared.h +++ b/aqo_shared.h @@ -50,6 +50,8 @@ extern shmem_startup_hook_type prev_shmem_startup_hook; extern AQOSharedState *aqo_state; extern HTAB *fss_htab; +extern int fs_max_items; /* Max number of feature spaces that AQO can operate */ +extern int fss_max_items; extern Size aqo_memsize(void); extern void reset_dsm_cache(void); diff --git a/preprocessing.c b/preprocessing.c index 6d89035a..d464329a 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -295,15 +295,34 @@ aqo_planner(Query *parse, * concurrent addition from another backend we will try to restart * preprocessing routine. */ - aqo_queries_store(query_context.query_hash, query_context.fspace_hash, + if (aqo_queries_store(query_context.query_hash, query_context.fspace_hash, query_context.learn_aqo, query_context.use_aqo, - query_context.auto_tuning); + query_context.auto_tuning)) + { + /* + * Add query text into the ML-knowledge base. Just for further + * analysis. In the case of cached plans we may have NULL query text. + */ + if (!aqo_qtext_store(query_context.query_hash, query_string)) + { + Assert(0); /* panic only on debug installation */ + elog(ERROR, "[AQO] Impossible situation was detected. Maybe not enough of shared memory?"); + } + } + else + { + /* + * In the case of problems (shmem overflow, as a typical issue) - + * disable AQO for the query class. + */ + disable_aqo_for_query(); - /* - * Add query text into the ML-knowledge base. Just for further - * analysis. In the case of cached plans we may have NULL query text. - */ - aqo_qtext_store(query_context.query_hash, query_string); + /* + * Switch AQO to controlled mode. In this mode we wouldn't add new + * query classes, just use and learn on existed set. + */ + aqo_mode = AQO_MODE_CONTROLLED; + } } if (force_collect_stat) diff --git a/storage.c b/storage.c index 6a71a541..ef2a2d2e 100644 --- a/storage.c +++ b/storage.c @@ -204,10 +204,10 @@ add_deactivated_query(uint64 queryid) /* * Update AQO statistics. * - * Add a record (and replace old, if all stat slots is full) to stat slot for - * a query class. + * Add a record (or update an existed) to stat storage for the query class. * Returns a copy of stat entry, allocated in current memory context. Caller is * in charge to free this struct after usage. + * If stat hash table is full, return NULL and log this fact. */ StatEntry * aqo_stat_store(uint64 queryid, bool use_aqo, @@ -216,16 +216,36 @@ aqo_stat_store(uint64 queryid, bool use_aqo, StatEntry *entry; bool found; int pos; + bool tblOverflow; + HASHACTION action; Assert(stat_htab); LWLockAcquire(&aqo_state->stat_lock, LW_EXCLUSIVE); - entry = (StatEntry *) hash_search(stat_htab, &queryid, HASH_ENTER, &found); + tblOverflow = hash_get_num_entries(stat_htab) < fs_max_items ? false : true; + action = tblOverflow ? HASH_FIND : HASH_ENTER; + entry = (StatEntry *) hash_search(stat_htab, &queryid, action, &found); /* Initialize entry on first usage */ if (!found) { - uint64 qid = entry->queryid; + uint64 qid; + + if (action == HASH_FIND) + { + /* + * Hash table is full. To avoid possible problems - don't try to add + * more, just exit + */ + LWLockRelease(&aqo_state->stat_lock); + ereport(LOG, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("[AQO] Stat storage is full. No more feature spaces can be added."), + errhint("Increase value of aqo.fs_max_items on restart of the instance"))); + return NULL; + } + + qid = entry->queryid; memset(entry, 0, sizeof(StatEntry)); entry->queryid = qid; } @@ -907,6 +927,8 @@ aqo_qtext_store(uint64 queryid, const char *query_string) { QueryTextEntry *entry; bool found; + bool tblOverflow; + HASHACTION action; Assert(!LWLockHeldByMe(&aqo_state->qtexts_lock)); @@ -916,7 +938,12 @@ aqo_qtext_store(uint64 queryid, const char *query_string) dsa_init(); LWLockAcquire(&aqo_state->qtexts_lock, LW_EXCLUSIVE); - entry = (QueryTextEntry *) hash_search(qtexts_htab, &queryid, HASH_ENTER, + + /* Check hash table overflow */ + tblOverflow = hash_get_num_entries(qtexts_htab) < fs_max_items ? false : true; + action = tblOverflow ? HASH_FIND : HASH_ENTER; + + entry = (QueryTextEntry *) hash_search(qtexts_htab, &queryid, action, &found); /* Initialize entry on first usage */ @@ -925,6 +952,20 @@ aqo_qtext_store(uint64 queryid, const char *query_string) size_t size = strlen(query_string) + 1; char *strptr; + if (action == HASH_FIND) + { + /* + * Hash table is full. To avoid possible problems - don't try to add + * more, just exit + */ + LWLockRelease(&aqo_state->qtexts_lock); + ereport(LOG, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("[AQO] Query texts storage is full. No more feature spaces can be added."), + errhint("Increase value of aqo.fs_max_items on restart of the instance"))); + return false; + } + entry->queryid = queryid; entry->qtext_dp = dsa_allocate(qtext_dsa, size); Assert(DsaPointerIsValid(entry->qtext_dp)); @@ -933,7 +974,7 @@ aqo_qtext_store(uint64 queryid, const char *query_string) aqo_state->qtexts_changed = true; } LWLockRelease(&aqo_state->qtexts_lock); - return !found; + return true; } Datum @@ -1089,17 +1130,38 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) char *ptr; ListCell *lc; size_t size; + bool tblOverflow; + HASHACTION action; Assert(!LWLockHeldByMe(&aqo_state->data_lock)); dsa_init(); LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); - entry = (DataEntry *) hash_search(data_htab, &key, HASH_ENTER, &found); + + /* Check hash table overflow */ + tblOverflow = hash_get_num_entries(data_htab) < fss_max_items ? false : true; + action = tblOverflow ? HASH_FIND : HASH_ENTER; + + entry = (DataEntry *) hash_search(data_htab, &key, action, &found); /* Initialize entry on first usage */ if (!found) { + if (action == HASH_FIND) + { + /* + * Hash table is full. To avoid possible problems - don't try to add + * more, just exit + */ + LWLockRelease(&aqo_state->data_lock); + ereport(LOG, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("[AQO] Data storage is full. No more data can be added."), + errhint("Increase value of aqo.fss_max_items on restart of the instance"))); + return false; + } + entry->cols = data->cols; entry->rows = data->rows; entry->nrels = list_length(reloids); @@ -1603,11 +1665,13 @@ aqo_queries_remove(PG_FUNCTION_ARGS) } bool -aqo_queries_store(uint64 queryid, uint64 fs, bool learn_aqo, - bool use_aqo, bool auto_tuning) +aqo_queries_store(uint64 queryid, + uint64 fs, bool learn_aqo, bool use_aqo, bool auto_tuning) { QueriesEntry *entry; bool found; + bool tblOverflow; + HASHACTION action; Assert(queries_htab); @@ -1616,8 +1680,29 @@ aqo_queries_store(uint64 queryid, uint64 fs, bool learn_aqo, use_aqo == false && auto_tuning == false)); LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); - entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_ENTER, + + /* Check hash table overflow */ + tblOverflow = hash_get_num_entries(queries_htab) < fs_max_items ? false : true; + action = tblOverflow ? HASH_FIND : HASH_ENTER; + + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, action, &found); + + /* Initialize entry on first usage */ + if (!found && action == HASH_FIND) + { + /* + * Hash table is full. To avoid possible problems - don't try to add + * more, just exit + */ + LWLockRelease(&aqo_state->queries_lock); + ereport(LOG, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("[AQO] Queries storage is full. No more feature spaces can be added."), + errhint("Increase value of aqo.fs_max_items on restart of the instance"))); + return false; + } + entry->fs = fs; entry->learn_aqo = learn_aqo; entry->use_aqo = use_aqo; From 7a03aa5e3f3ac6f3568618bb87417d9eef1e05d7 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Fri, 8 Jul 2022 16:50:54 +0300 Subject: [PATCH 105/203] Fix bugs of output features in view aqo_data, aqo_queries and aqo_query_stat and in checking invariants. --- storage.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/storage.c b/storage.c index ef2a2d2e..4983c73e 100644 --- a/storage.c +++ b/storage.c @@ -346,6 +346,8 @@ aqo_query_stat(PG_FUNCTION_ARGS) hash_seq_init(&hash_seq, stat_htab); while ((entry = hash_seq_search(&hash_seq)) != NULL) { + memset(nulls, 0, TOTAL_NCOLS + 1); + values[QUERYID] = Int64GetDatum(entry->queryid); values[NEXECS] = Int64GetDatum(entry->execs_without_aqo); values[NEXECS_AQO] = Int64GetDatum(entry->execs_with_aqo); @@ -1263,7 +1265,7 @@ _fill_knn_data(const DataEntry *entry, List **reloids) ptr = (char *) dsa_get_address(data_dsa, entry->data_dp); /* Check invariants */ - Assert(entry->rows < aqo_K); + Assert(entry->rows <= aqo_K); Assert(ptr != NULL); Assert(entry->key.fss == ((data_key *)ptr)->fss); @@ -1438,13 +1440,14 @@ aqo_data(PG_FUNCTION_ARGS) MemoryContextSwitchTo(oldcontext); dsa_init(); - memset(nulls, 0, AD_TOTAL_NCOLS); LWLockAcquire(&aqo_state->data_lock, LW_SHARED); hash_seq_init(&hash_seq, data_htab); while ((entry = hash_seq_search(&hash_seq)) != NULL) { char *ptr; + memset(nulls, 0, AD_TOTAL_NCOLS); + values[AD_FS] = Int64GetDatum(entry->key.fs); values[AD_FSS] = Int64GetDatum(entry->key.fss); values[AD_NFEATURES] = Int32GetDatum(entry->cols); @@ -1632,11 +1635,12 @@ aqo_queries(PG_FUNCTION_ARGS) MemoryContextSwitchTo(oldcontext); - memset(nulls, 0, AQ_TOTAL_NCOLS + 1); LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); hash_seq_init(&hash_seq, queries_htab); while ((entry = hash_seq_search(&hash_seq)) != NULL) { + memset(nulls, 0, AQ_TOTAL_NCOLS + 1); + values[AQ_QUERYID] = Int64GetDatum(entry->queryid); values[AQ_FS] = Int64GetDatum(entry->fs); values[AQ_LEARN_AQO] = BoolGetDatum(entry->learn_aqo); From 4d3a687125f3b1292bfe4082300d4ecdf6c79b05 Mon Sep 17 00:00:00 2001 From: Andrey Kazarinov Date: Mon, 11 Jul 2022 16:45:02 +0300 Subject: [PATCH 106/203] set max size 1000 --- aqo.c | 13 +++++++++++++ aqo_shared.c | 1 + aqo_shared.h | 1 + storage.c | 1 + 4 files changed, 16 insertions(+) diff --git a/aqo.c b/aqo.c index 11f5a235..0d52e376 100644 --- a/aqo.c +++ b/aqo.c @@ -238,6 +238,19 @@ _PG_init(void) NULL ); + DefineCustomIntVariable("aqo.max_size", + "Query max size in aqo_query_texts.", + NULL, + &max_size, + 1000, + 0, INT_MAX, + PGC_SUSET, + 0, + NULL, + NULL, + NULL + ); + prev_shmem_startup_hook = shmem_startup_hook; shmem_startup_hook = aqo_init_shmem; prev_planner_hook = planner_hook; diff --git a/aqo_shared.c b/aqo_shared.c index e838d02e..723113a7 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -28,6 +28,7 @@ HTAB *fss_htab = NULL; static int aqo_htab_max_items = 1000; int fs_max_items = 1; /* Max number of different feature spaces in ML model */ int fss_max_items = 1; /* Max number of different feature subspaces in ML model */ +int max_size = 1000; static uint32 temp_storage_size = 1024 * 1024 * 10; /* Storage size, in bytes */ static dsm_segment *seg = NULL; diff --git a/aqo_shared.h b/aqo_shared.h index 87232882..a13d2c88 100644 --- a/aqo_shared.h +++ b/aqo_shared.h @@ -52,6 +52,7 @@ extern HTAB *fss_htab; extern int fs_max_items; /* Max number of feature spaces that AQO can operate */ extern int fss_max_items; +extern int max_size; extern Size aqo_memsize(void); extern void reset_dsm_cache(void); diff --git a/storage.c b/storage.c index 4983c73e..f170c344 100644 --- a/storage.c +++ b/storage.c @@ -969,6 +969,7 @@ aqo_qtext_store(uint64 queryid, const char *query_string) } entry->queryid = queryid; + size = size > max_size ? max_size : size; entry->qtext_dp = dsa_allocate(qtext_dsa, size); Assert(DsaPointerIsValid(entry->qtext_dp)); strptr = (char *) dsa_get_address(qtext_dsa, entry->qtext_dp); From ff8b8274e254f22edb757931e80577496ec13010 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Tue, 12 Jul 2022 08:49:51 +0500 Subject: [PATCH 107/203] Slightly refactor patch on query max size. --- aqo.c | 4 ++-- aqo_shared.c | 1 - aqo_shared.h | 2 +- storage.c | 8 +++++--- storage.h | 2 ++ 5 files changed, 10 insertions(+), 7 deletions(-) diff --git a/aqo.c b/aqo.c index 0d52e376..547c0d1c 100644 --- a/aqo.c +++ b/aqo.c @@ -238,10 +238,10 @@ _PG_init(void) NULL ); - DefineCustomIntVariable("aqo.max_size", + DefineCustomIntVariable("aqo.querytext_max_size", "Query max size in aqo_query_texts.", NULL, - &max_size, + &querytext_max_size, 1000, 0, INT_MAX, PGC_SUSET, diff --git a/aqo_shared.c b/aqo_shared.c index 723113a7..e838d02e 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -28,7 +28,6 @@ HTAB *fss_htab = NULL; static int aqo_htab_max_items = 1000; int fs_max_items = 1; /* Max number of different feature spaces in ML model */ int fss_max_items = 1; /* Max number of different feature subspaces in ML model */ -int max_size = 1000; static uint32 temp_storage_size = 1024 * 1024 * 10; /* Storage size, in bytes */ static dsm_segment *seg = NULL; diff --git a/aqo_shared.h b/aqo_shared.h index a13d2c88..1317349e 100644 --- a/aqo_shared.h +++ b/aqo_shared.h @@ -52,7 +52,7 @@ extern HTAB *fss_htab; extern int fs_max_items; /* Max number of feature spaces that AQO can operate */ extern int fss_max_items; -extern int max_size; +extern int querytext_max_size; extern Size aqo_memsize(void); extern void reset_dsm_cache(void); diff --git a/storage.c b/storage.c index f170c344..db7ce660 100644 --- a/storage.c +++ b/storage.c @@ -64,6 +64,8 @@ typedef void* (*form_record_t) (void *ctx, size_t *size); typedef void (*deform_record_t) (void *data, size_t size); +int querytext_max_size = 1000; + HTAB *stat_htab = NULL; HTAB *queries_htab = NULL; HTAB *qtexts_htab = NULL; @@ -934,7 +936,7 @@ aqo_qtext_store(uint64 queryid, const char *query_string) Assert(!LWLockHeldByMe(&aqo_state->qtexts_lock)); - if (query_string == NULL) + if (query_string == NULL || querytext_max_size == 0) return false; dsa_init(); @@ -969,7 +971,7 @@ aqo_qtext_store(uint64 queryid, const char *query_string) } entry->queryid = queryid; - size = size > max_size ? max_size : size; + size = size > querytext_max_size ? querytext_max_size : size; entry->qtext_dp = dsa_allocate(qtext_dsa, size); Assert(DsaPointerIsValid(entry->qtext_dp)); strptr = (char *) dsa_get_address(qtext_dsa, entry->qtext_dp); @@ -1641,7 +1643,7 @@ aqo_queries(PG_FUNCTION_ARGS) while ((entry = hash_seq_search(&hash_seq)) != NULL) { memset(nulls, 0, AQ_TOTAL_NCOLS + 1); - + values[AQ_QUERYID] = Int64GetDatum(entry->queryid); values[AQ_FS] = Int64GetDatum(entry->fs); values[AQ_LEARN_AQO] = BoolGetDatum(entry->learn_aqo); diff --git a/storage.h b/storage.h index 27c97256..460ca7c4 100644 --- a/storage.h +++ b/storage.h @@ -82,6 +82,8 @@ typedef struct QueriesEntry bool auto_tuning; } QueriesEntry; +extern int querytext_max_size; + extern HTAB *stat_htab; extern HTAB *qtexts_htab; extern HTAB *queries_htab; /* TODO */ From 878cd559bc993879405cd103f352ba85f2c86cc5 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Tue, 12 Jul 2022 10:27:23 +0500 Subject: [PATCH 108/203] Add general limit on DSM memory which can be allocated by the AQO extension to store learning data. Also, use common DSA area to place data and query texts. Default limit on DSM memory is 100 MB. TODO: remove meaningless dsa variables. --- aqo.c | 17 ++++++- aqo_shared.c | 2 - aqo_shared.h | 2 - storage.c | 141 ++++++++++++++++++++++++++++++++++++++++++--------- storage.h | 1 + 5 files changed, 132 insertions(+), 31 deletions(-) diff --git a/aqo.c b/aqo.c index 547c0d1c..e24b7aac 100644 --- a/aqo.c +++ b/aqo.c @@ -216,7 +216,7 @@ _PG_init(void) "Max number of feature spaces that AQO can operate with.", NULL, &fs_max_items, - 1000, + 10000, 1, INT_MAX, PGC_SUSET, 0, @@ -229,7 +229,7 @@ _PG_init(void) "Max number of feature subspaces that AQO can operate with.", NULL, &fss_max_items, - 1000, + 100000, 0, INT_MAX, PGC_SUSET, 0, @@ -251,6 +251,19 @@ _PG_init(void) NULL ); + DefineCustomIntVariable("aqo.dsm_size_max", + "Maximum size of dynamic shared memory which AQO could allocate to store learning data.", + NULL, + &dsm_size_max, + 100, + 0, INT_MAX, + PGC_SUSET, + 0, + NULL, + NULL, + NULL + ); + prev_shmem_startup_hook = shmem_startup_hook; shmem_startup_hook = aqo_init_shmem; prev_planner_hook = planner_hook; diff --git a/aqo_shared.c b/aqo_shared.c index e838d02e..1ce73a0d 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -198,7 +198,6 @@ aqo_init_shmem(void) aqo_state->qtext_trancheid = LWLockNewTrancheId(); aqo_state->qtexts_changed = false; aqo_state->data_dsa_handler = DSM_HANDLE_INVALID; - aqo_state->data_trancheid = LWLockNewTrancheId(); aqo_state->data_changed = false; aqo_state->queries_changed = false; @@ -244,7 +243,6 @@ aqo_init_shmem(void) LWLockRegisterTranche(aqo_state->qtexts_lock.tranche, "AQO QTexts Lock Tranche"); LWLockRegisterTranche(aqo_state->qtext_trancheid, "AQO Query Texts Tranche"); LWLockRegisterTranche(aqo_state->data_lock.tranche, "AQO Data Lock Tranche"); - LWLockRegisterTranche(aqo_state->data_trancheid, "AQO Data Tranche"); LWLockRegisterTranche(aqo_state->queries_lock.tranche, "AQO Queries Lock Tranche"); if (!IsUnderPostmaster) diff --git a/aqo_shared.h b/aqo_shared.h index 1317349e..b3d7a6cb 100644 --- a/aqo_shared.h +++ b/aqo_shared.h @@ -38,7 +38,6 @@ typedef struct AQOSharedState LWLock data_lock; /* Lock for shared fields below */ dsa_handle data_dsa_handler; - int data_trancheid; bool data_changed; LWLock queries_lock; /* lock for access to queries storage */ @@ -52,7 +51,6 @@ extern HTAB *fss_htab; extern int fs_max_items; /* Max number of feature spaces that AQO can operate */ extern int fss_max_items; -extern int querytext_max_size; extern Size aqo_memsize(void); extern void reset_dsm_cache(void); diff --git a/storage.c b/storage.c index db7ce660..80b83125 100644 --- a/storage.c +++ b/storage.c @@ -61,10 +61,11 @@ typedef enum { } aqo_queries_cols; typedef void* (*form_record_t) (void *ctx, size_t *size); -typedef void (*deform_record_t) (void *data, size_t size); +typedef bool (*deform_record_t) (void *data, size_t size); int querytext_max_size = 1000; +int dsm_size_max = 100; /* in MB */ HTAB *stat_htab = NULL; HTAB *queries_htab = NULL; @@ -642,7 +643,7 @@ data_store(const char *filename, form_record_t callback, return -1; } -static void +static bool _deform_stat_record_cb(void *data, size_t size) { bool found; @@ -656,24 +657,35 @@ _deform_stat_record_cb(void *data, size_t size) entry = (StatEntry *) hash_search(stat_htab, &queryid, HASH_ENTER, &found); Assert(!found); memcpy(entry, data, sizeof(StatEntry)); + return true; } void aqo_stat_load(void) { - long entries; - Assert(!LWLockHeldByMe(&aqo_state->stat_lock)); LWLockAcquire(&aqo_state->stat_lock, LW_EXCLUSIVE); - entries = hash_get_num_entries(stat_htab); - Assert(entries == 0); + + /* Load on postmaster sturtup. So no any concurrent actions possible here. */ + Assert(hash_get_num_entries(stat_htab) == 0); + data_load(PGAQO_STAT_FILE, _deform_stat_record_cb, NULL); LWLockRelease(&aqo_state->stat_lock); } -static void +static bool +_check_dsa_validity(dsa_pointer ptr) +{ + if (DsaPointerIsValid(ptr)) + return true; + + elog(LOG, "[AQO] DSA Pointer isn't valid. Is the memory limit exceeded?"); + return false; +} + +static bool _deform_qtexts_record_cb(void *data, size_t size) { bool found; @@ -690,9 +702,19 @@ _deform_qtexts_record_cb(void *data, size_t size) Assert(!found); entry->qtext_dp = dsa_allocate(qtext_dsa, len); - Assert(DsaPointerIsValid(entry->qtext_dp)); + if (!_check_dsa_validity(entry->qtext_dp)) + { + /* + * DSA stuck into problems. Rollback changes. Return false in belief + * that caller recognize it and don't try to call us more. + */ + (void) hash_search(qtexts_htab, &queryid, HASH_REMOVE, NULL); + return false; + } + strptr = (char *) dsa_get_address(qtext_dsa, entry->qtext_dp); strlcpy(strptr, query_string, len); + return true; } void @@ -705,7 +727,15 @@ aqo_qtexts_load(void) Assert(qtext_dsa != NULL); LWLockAcquire(&aqo_state->qtexts_lock, LW_EXCLUSIVE); - Assert(hash_get_num_entries(qtexts_htab) == 0); + + if (hash_get_num_entries(qtexts_htab) != 0) + { + /* Someone have done it concurrently. */ + elog(LOG, "[AQO] Another backend have loaded query texts concurrently."); + LWLockRelease(&aqo_state->qtexts_lock); + return; + } + data_load(PGAQO_TEXT_FILE, _deform_qtexts_record_cb, NULL); /* Check existence of default feature space */ @@ -725,7 +755,7 @@ aqo_qtexts_load(void) * Getting a data chunk from a caller, add a record into the 'ML data' * shmem hash table. Allocate and fill DSA chunk for variadic part of the data. */ -static void +static bool _deform_data_record_cb(void *data, size_t size) { bool found; @@ -737,7 +767,7 @@ _deform_data_record_cb(void *data, size_t size) Assert(LWLockHeldByMeInMode(&aqo_state->data_lock, LW_EXCLUSIVE)); entry = (DataEntry *) hash_search(data_htab, &fentry->key, - HASH_ENTER, &found); + HASH_ENTER, &found); Assert(!found); /* Copy fixed-size part of entry byte-by-byte even with caves */ @@ -747,9 +777,20 @@ _deform_data_record_cb(void *data, size_t size) sz = _compute_data_dsa(entry); Assert(sz + offsetof(DataEntry, data_dp) == size); entry->data_dp = dsa_allocate(data_dsa, sz); - Assert(DsaPointerIsValid(entry->data_dp)); + + if (!_check_dsa_validity(entry->data_dp)) + { + /* + * DSA stuck into problems. Rollback changes. Return false in belief + * that caller recognize it and don't try to call us more. + */ + (void) hash_search(data_htab, &fentry->key, HASH_REMOVE, NULL); + return false; + } + dsa_ptr = (char *) dsa_get_address(data_dsa, entry->data_dp); memcpy(dsa_ptr, ptr, sz); + return true; } void @@ -759,14 +800,22 @@ aqo_data_load(void) Assert(data_dsa != NULL); LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); - Assert(hash_get_num_entries(data_htab) == 0); + + if (hash_get_num_entries(data_htab) != 0) + { + /* Someone have done it concurrently. */ + elog(LOG, "[AQO] Another backend have loaded query data concurrently."); + LWLockRelease(&aqo_state->data_lock); + return; + } + data_load(PGAQO_DATA_FILE, _deform_data_record_cb, NULL); aqo_state->data_changed = false; /* mem data is consistent with disk */ LWLockRelease(&aqo_state->data_lock); } -static void +static bool _deform_queries_record_cb(void *data, size_t size) { bool found; @@ -780,20 +829,22 @@ _deform_queries_record_cb(void *data, size_t size) entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_ENTER, &found); Assert(!found); memcpy(entry, data, sizeof(QueriesEntry)); + return true; } void aqo_queries_load(void) { - long entries; bool found; uint64 queryid = 0; Assert(!LWLockHeldByMe(&aqo_state->queries_lock)); LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); - entries = hash_get_num_entries(queries_htab); - Assert(entries == 0); + + /* Load on postmaster sturtup. So no any concurrent actions possible here. */ + Assert(hash_get_num_entries(queries_htab) == 0); + data_load(PGAQO_QUERIES_FILE, _deform_queries_record_cb, NULL); /* Check existence of default feature space */ @@ -836,14 +887,23 @@ data_load(const char *filename, deform_record_t callback, void *ctx) { void *data; size_t size; + bool res; if (fread(&size, sizeof(size), 1, file) != 1) goto read_error; data = palloc(size); if (fread(data, size, 1, file) != 1) goto read_error; - callback(data, size); + res = callback(data, size); pfree(data); + + if (!res) + { + /* Error detected. Do not try to read tails of the storage. */ + elog(LOG, "[AQO] Because of an error skip %ld storage records.", + num - i); + break; + } } FreeFile(file); @@ -896,11 +956,15 @@ dsa_init() Assert(aqo_state->data_dsa_handler == DSM_HANDLE_INVALID); qtext_dsa = dsa_create(aqo_state->qtext_trancheid); + Assert(qtext_dsa != NULL); + + if (dsm_size_max > 0) + dsa_set_size_limit(qtext_dsa, dsm_size_max * 1024 * 1024); + dsa_pin(qtext_dsa); aqo_state->qtexts_dsa_handler = dsa_get_handle(qtext_dsa); - data_dsa = dsa_create(aqo_state->data_trancheid); - dsa_pin(data_dsa); + data_dsa = qtext_dsa; aqo_state->data_dsa_handler = dsa_get_handle(data_dsa); /* Load and initialize query texts hash table */ @@ -910,11 +974,10 @@ dsa_init() else { qtext_dsa = dsa_attach(aqo_state->qtexts_dsa_handler); - data_dsa = dsa_attach(aqo_state->data_dsa_handler); + data_dsa = qtext_dsa; } dsa_pin_mapping(qtext_dsa); - dsa_pin_mapping(data_dsa); MemoryContextSwitchTo(old_context); LWLockRelease(&aqo_state->lock); @@ -973,7 +1036,17 @@ aqo_qtext_store(uint64 queryid, const char *query_string) entry->queryid = queryid; size = size > querytext_max_size ? querytext_max_size : size; entry->qtext_dp = dsa_allocate(qtext_dsa, size); - Assert(DsaPointerIsValid(entry->qtext_dp)); + + if (!_check_dsa_validity(entry->qtext_dp)) + { + /* + * DSA stuck into problems. Rollback changes. Return false in belief + * that caller recognize it and don't try to call us more. + */ + (void) hash_search(qtexts_htab, &queryid, HASH_REMOVE, NULL); + return false; + } + strptr = (char *) dsa_get_address(qtext_dsa, entry->qtext_dp); strlcpy(strptr, query_string, size); aqo_state->qtexts_changed = true; @@ -1173,7 +1246,16 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) size = _compute_data_dsa(entry); entry->data_dp = dsa_allocate0(data_dsa, size); - Assert(DsaPointerIsValid(entry->data_dp)); + + if (!_check_dsa_validity(entry->data_dp)) + { + /* + * DSA stuck into problems. Rollback changes. Return false in belief + * that caller recognize it and don't try to call us more. + */ + (void) hash_search(data_htab, &key, HASH_REMOVE, NULL); + return false; + } } Assert(DsaPointerIsValid(entry->data_dp)); @@ -1195,7 +1277,16 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) /* Need to re-allocate DSA chunk */ dsa_free(data_dsa, entry->data_dp); entry->data_dp = dsa_allocate0(data_dsa, size); - Assert(DsaPointerIsValid(entry->data_dp)); + + if (!_check_dsa_validity(entry->data_dp)) + { + /* + * DSA stuck into problems. Rollback changes. Return false in belief + * that caller recognize it and don't try to call us more. + */ + (void) hash_search(data_htab, &key, HASH_REMOVE, NULL); + return false; + } } ptr = (char *) dsa_get_address(data_dsa, entry->data_dp); diff --git a/storage.h b/storage.h index 460ca7c4..373cace0 100644 --- a/storage.h +++ b/storage.h @@ -83,6 +83,7 @@ typedef struct QueriesEntry } QueriesEntry; extern int querytext_max_size; +extern int dsm_size_max; extern HTAB *stat_htab; extern HTAB *qtexts_htab; From 9a1244fe13de6e01a61d9c81ca0e525af12317d5 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 13 Jul 2022 15:02:07 +0500 Subject: [PATCH 109/203] Bugfix. Copy of clauses, have got by aqo_get_clauses() has a specific structure and shouldn't be touched by any postgres machinery except node hash generator. --- cardinality_hooks.c | 11 +++++++++-- path_utils.c | 8 +++----- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/cardinality_hooks.c b/cardinality_hooks.c index 64d0fe14..2c837bd9 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -233,10 +233,17 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, { MemoryContext old_ctx_m; + selectivities = list_concat( + get_selectivities(root, param_clauses, rel->relid, + JOIN_INNER, NULL), + get_selectivities(root, rel->baserestrictinfo, + rel->relid, + JOIN_INNER, NULL)); + + /* Make specific copy of clauses with mutated subplans */ allclauses = list_concat(aqo_get_clauses(root, param_clauses), aqo_get_clauses(root, rel->baserestrictinfo)); - selectivities = get_selectivities(root, allclauses, rel->relid, - JOIN_INNER, NULL); + rte = planner_rt_fetch(rel->relid, root); get_eclasses(allclauses, &nargs, &args_hash, &eclass_hash); diff --git a/path_utils.c b/path_utils.c index ad3ef628..24f43002 100644 --- a/path_utils.c +++ b/path_utils.c @@ -443,18 +443,16 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) case T_ForeignPath: /* The same as in the default case */ default: - cur = list_concat(aqo_get_clauses(root, - path->parent->baserestrictinfo), + cur = list_concat(list_copy(path->parent->baserestrictinfo), path->param_info ? - aqo_get_clauses(root, - path->param_info->ppi_clauses) : - NIL); + path->param_info->ppi_clauses : NIL); if (path->param_info) cur_sel = get_selectivities(root, cur, path->parent->relid, JOIN_INNER, NULL); else cur_sel = get_selectivities(root, cur, 0, JOIN_INNER, NULL); *selectivities = cur_sel; + cur = aqo_get_clauses(root, cur); return cur; break; } From 2f4e6cd714651317bec48c181894780f9a658b69 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Fri, 15 Jul 2022 16:34:09 +0500 Subject: [PATCH 110/203] One more step to make AQO relocatable. Replace stored procedure aqo_cleanup() with the one, implemented in C. BTW, fix issue, then AQO takes control on queries, involving only a set of TEMP tables. Now AQO learn on queries with at least one plane table, permanently stored in a database. Fix regression and TAP tests: somewhere because of changed behaviour, somewhere it caused by mistakes. --- aqo--1.4--1.5.sql | 36 +-- aqo_shared.c | 8 +- aqo_shared.h | 1 + expected/aqo_learn.out | 8 +- expected/forced_stat_collection.out | 7 + expected/temp_tables.out | 39 ++-- expected/top_queries.out | 11 +- expected/unsupported.out | 22 +- preprocessing.c | 13 +- sql/aqo_learn.sql | 2 +- sql/forced_stat_collection.sql | 2 + sql/temp_tables.sql | 12 +- sql/top_queries.sql | 2 +- sql/unsupported.sql | 6 +- storage.c | 350 +++++++++++++++++++++++----- t/001_pgbench.pl | 5 +- t/002_pg_stat_statements_aqo.pl | 2 +- 17 files changed, 382 insertions(+), 144 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index 903423e3..0546bf42 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -59,7 +59,7 @@ CREATE FUNCTION aqo_query_stat( OUT planning_time_without_aqo double precision[], OUT cardinality_error_with_aqo double precision[], OUT cardinality_error_without_aqo double precision[], - OUT executions_with_aqo bigint, + OUT executions_with_aqo bigint, OUT executions_without_aqo bigint ) RETURNS SETOF record @@ -190,38 +190,8 @@ COMMENT ON FUNCTION aqo_drop_class(bigint) IS -- Returns number of deleted rows from aqo_queries and aqo_data tables. -- CREATE OR REPLACE FUNCTION aqo_cleanup(OUT nfs integer, OUT nfss integer) -AS $$ -DECLARE - lfs bigint; - lfss integer; -BEGIN - -- Save current number of rows - SELECT count(*) FROM aqo_queries INTO nfs; - SELECT count(*) FROM aqo_data INTO nfss; - - FOR lfs,lfss IN SELECT q1.fs,q1.fss FROM ( - SELECT fs, fss, unnest(oids) AS reloid - FROM aqo_data) AS q1 - WHERE q1.reloid NOT IN (SELECT oid FROM pg_class) - GROUP BY (q1.fs,q1.fss) - LOOP --- IF (fs = 0) THEN --- DELETE FROM aqo_data WHERE fsspace_hash = fss; --- continue; --- END IF; - - -- Remove ALL feature space if one of oids isn't exists - PERFORM aqo_queries_remove(lfs); - PERFORM aqo_stat_remove(lfs); - PERFORM aqo_qtexts_remove(lfs); - PERFORM aqo_data_remove(lfs, NULL); - END LOOP; - - -- Calculate difference with previous state of knowledge base - nfs := nfs - (SELECT count(*) FROM aqo_queries); - nfss := nfss - (SELECT count(*) FROM aqo_data); -END; -$$ LANGUAGE plpgsql; +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT VOLATILE; COMMENT ON FUNCTION aqo_cleanup() IS 'Remove unneeded rows from the AQO ML storage'; diff --git a/aqo_shared.c b/aqo_shared.c index 1ce73a0d..2ec063e7 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -216,25 +216,25 @@ aqo_init_shmem(void) info.keysize = sizeof(((StatEntry *) 0)->queryid); info.entrysize = sizeof(StatEntry); - stat_htab = ShmemInitHash("AQO Stat HTAB", 64, fs_max_items, + stat_htab = ShmemInitHash("AQO Stat HTAB", fs_max_items, fs_max_items, &info, HASH_ELEM | HASH_BLOBS); /* Init shared memory table for query texts */ info.keysize = sizeof(((QueryTextEntry *) 0)->queryid); info.entrysize = sizeof(QueryTextEntry); - qtexts_htab = ShmemInitHash("AQO Query Texts HTAB", 64, fs_max_items, + qtexts_htab = ShmemInitHash("AQO Query Texts HTAB", fs_max_items, fs_max_items, &info, HASH_ELEM | HASH_BLOBS); /* Shared memory hash table for the data */ info.keysize = sizeof(data_key); info.entrysize = sizeof(DataEntry); - data_htab = ShmemInitHash("AQO Data HTAB", 64, fss_max_items, + data_htab = ShmemInitHash("AQO Data HTAB", fss_max_items, fss_max_items, &info, HASH_ELEM | HASH_BLOBS); /* Shared memory hash table for queries */ info.keysize = sizeof(((QueriesEntry *) 0)->queryid); info.entrysize = sizeof(QueriesEntry); - queries_htab = ShmemInitHash("AQO Queries HTAB", 64, fs_max_items, + queries_htab = ShmemInitHash("AQO Queries HTAB", fs_max_items, fs_max_items, &info, HASH_ELEM | HASH_BLOBS); LWLockRelease(AddinShmemInitLock); diff --git a/aqo_shared.h b/aqo_shared.h index b3d7a6cb..61c0d3d0 100644 --- a/aqo_shared.h +++ b/aqo_shared.h @@ -30,6 +30,7 @@ typedef struct AQOSharedState /* Storage fields */ LWLock stat_lock; /* lock for access to stat storage */ + bool stat_changed; LWLock qtexts_lock; /* Lock for shared fields below */ dsa_handle qtexts_dsa_handler; /* DSA area for storing of query texts */ diff --git a/expected/aqo_learn.out b/expected/aqo_learn.out index e08f089b..3ccdb4e8 100644 --- a/expected/aqo_learn.out +++ b/expected/aqo_learn.out @@ -236,10 +236,10 @@ SELECT count(*) FROM tmp1; (1 row) -- Remove data on some unneeded instances of tmp1 table. -SELECT aqo_cleanup(); - aqo_cleanup -------------- - (9,18) +SELECT * FROM aqo_cleanup(); + nfs | nfss +-----+------ + 9 | 18 (1 row) -- Result of the query below should be empty diff --git a/expected/forced_stat_collection.out b/expected/forced_stat_collection.out index e514e386..10e14b4f 100644 --- a/expected/forced_stat_collection.out +++ b/expected/forced_stat_collection.out @@ -55,4 +55,11 @@ SELECT query_text FROM aqo_query_texts ORDER BY (md5(query_text)); SELECT count(*) FROM person WHERE age<18 AND passport IS NOT NULL; (3 rows) +DROP TABLE person; +SELECT 1 FROM aqo_reset(); -- Full remove of ML data before the end + ?column? +---------- + 1 +(1 row) + DROP EXTENSION aqo; diff --git a/expected/temp_tables.out b/expected/temp_tables.out index b40790f0..cb1da23f 100644 --- a/expected/temp_tables.out +++ b/expected/temp_tables.out @@ -16,10 +16,11 @@ SELECT count(*) FROM tt AS t1, tt AS t2; 0 (1 row) -SELECT * FROM aqo_data; - fs | fss | nfeatures | features | targets | reliability | oids -----+-----+-----------+----------+---------+-------------+------ -(0 rows) +SELECT query_text FROM aqo_query_texts; -- Default row should be returned + query_text +--------------------------------------- + COMMON feature space (do not delete!) +(1 row) -- Should be stored in the ML base SELECT count(*) FROM pt; @@ -40,30 +41,30 @@ SELECT count(*) FROM pt AS pt1, tt AS tt1, tt AS tt2, pt AS pt2; 0 (1 row) -SELECT count(*) FROM aqo_data; +SELECT count(*) FROM aqo_data; -- Don't bother about false negatives because of trivial query plans count ------- 10 (1 row) DROP TABLE tt; -SELECT aqo_cleanup(); - aqo_cleanup -------------- - (0,0) +SELECT * FROM aqo_cleanup(); + nfs | nfss +-----+------ + 0 | 0 (1 row) -SELECT count(*) FROM aqo_data; -- Should be the same as above +SELECT count(*) FROM aqo_data; -- Should return the same as previous call above count ------- 10 (1 row) DROP TABLE pt; -SELECT aqo_cleanup(); - aqo_cleanup -------------- - (3,10) +SELECT * FROM aqo_cleanup(); + nfs | nfss +-----+------ + 3 | 10 (1 row) SELECT count(*) FROM aqo_data; -- Should be 0 @@ -74,13 +75,11 @@ SELECT count(*) FROM aqo_data; -- Should be 0 SELECT query_text FROM aqo_queries aq LEFT JOIN aqo_query_texts aqt ON aq.queryid = aqt.queryid -ORDER BY (md5(query_text)); -- TODO: should contain just one row - query_text ------------------------------------------- - SELECT count(*) FROM tt AS t1, tt AS t2; +ORDER BY (md5(query_text)); -- The only the common class is returned + query_text +--------------------------------------- COMMON feature space (do not delete!) - SELECT count(*) FROM tt; -(3 rows) +(1 row) -- Test learning on temporary table CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; diff --git a/expected/top_queries.out b/expected/top_queries.out index 99e114dc..728405aa 100644 --- a/expected/top_queries.out +++ b/expected/top_queries.out @@ -10,7 +10,7 @@ SET aqo.force_collect_stat = 'on'; -- CREATE TEMP TABLE ttt AS SELECT count(*) AS cnt FROM generate_series(1,10); CREATE TABLE ttp AS SELECT count(*) AS cnt FROM generate_series(1,10); -SELECT count(*) AS cnt FROM ttt WHERE cnt % 100 = 0; +SELECT count(*) AS cnt FROM ttt WHERE cnt % 100 = 0; -- Ignore it cnt ----- 0 @@ -31,8 +31,7 @@ SELECT num FROM aqo_execution_time(false); num ----- 1 - 2 -(2 rows) +(1 row) -- Without the AQO control queries with and without temp tables are logged. SELECT query_text,nexecs @@ -41,9 +40,8 @@ WHERE ce.id = aqt.queryid ORDER BY (md5(query_text)); query_text | nexecs ------------------------------------------------------+-------- - SELECT count(*) AS cnt FROM ttt WHERE cnt % 100 = 0; | 1 SELECT count(*) AS cnt FROM ttp WHERE cnt % 100 = 0; | 1 -(2 rows) +(1 row) -- -- num of query which uses the table t2 should be bigger than num of query which @@ -93,10 +91,9 @@ ORDER BY (md5(query_text)); query_text | nexecs ------------------------------------------------------------------------------------------------+-------- SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; | 1 - SELECT count(*) AS cnt FROM ttt WHERE cnt % 100 = 0; | 1 SELECT count(*) AS cnt FROM ttp WHERE cnt % 100 = 0; | 1 SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; | 1 -(4 rows) +(3 rows) SELECT 1 FROM aqo_reset(); ?column? diff --git a/expected/unsupported.out b/expected/unsupported.out index da9e7d89..4b95c201 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -579,11 +579,23 @@ ORDER BY (md5(query_text),error) DESC; | ON q1.x = q2.x+1; (12 rows) -DROP TABLE t,t1 CASCADE; -SELECT aqo_cleanup(); - aqo_cleanup -------------- - (12,42) +DROP TABLE t,t1 CASCADE; -- delete all tables used in the test +SELECT count(*) FROM aqo_data; -- Just to detect some changes in the logic. May some false positives really bother us here? + count +------- + 42 +(1 row) + +SELECT * FROM aqo_cleanup(); + nfs | nfss +-----+------ + 12 | 42 +(1 row) + +SELECT count(*) FROM aqo_data; -- No one row should be returned + count +------- + 0 (1 row) -- Look for any remaining queries in the ML storage. diff --git a/preprocessing.c b/preprocessing.c index d464329a..748c85e4 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -367,7 +367,8 @@ typedef struct AQOPreWalkerCtx /* * Examine a fully-parsed query, and return TRUE iff any relation underlying - * the query is a system relation or no one relation touched by the query. + * the query is a system relation or no one permanent (non-temporary) relation + * touched by the query. */ static bool isQueryUsingSystemRelation(Query *query) @@ -458,11 +459,17 @@ isQueryUsingSystemRelation_walker(Node *node, void *context) bool is_catalog = IsCatalogRelation(rel); bool is_aqo_rel = IsAQORelation(rel); - table_close(rel, AccessShareLock); if (is_catalog || is_aqo_rel) + { + table_close(rel, AccessShareLock); return true; + } + + if (rel->rd_rel->relpersistence != RELPERSISTENCE_TEMP) + /* Plane non TEMP table */ + ctx->trivQuery = false; - ctx->trivQuery = false; + table_close(rel, AccessShareLock); } else if (rte->rtekind == RTE_FUNCTION) { diff --git a/sql/aqo_learn.sql b/sql/aqo_learn.sql index cb0122bb..8b57972e 100644 --- a/sql/aqo_learn.sql +++ b/sql/aqo_learn.sql @@ -124,7 +124,7 @@ WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; SELECT count(*) FROM tmp1; -- Remove data on some unneeded instances of tmp1 table. -SELECT aqo_cleanup(); +SELECT * FROM aqo_cleanup(); -- Result of the query below should be empty SELECT * FROM aqo_query_texts aqt1, aqo_query_texts aqt2 diff --git a/sql/forced_stat_collection.sql b/sql/forced_stat_collection.sql index ad234655..231938ca 100644 --- a/sql/forced_stat_collection.sql +++ b/sql/forced_stat_collection.sql @@ -36,4 +36,6 @@ ON aq.queryid = aqs.queryid; SELECT query_text FROM aqo_query_texts ORDER BY (md5(query_text)); +DROP TABLE person; +SELECT 1 FROM aqo_reset(); -- Full remove of ML data before the end DROP EXTENSION aqo; diff --git a/sql/temp_tables.sql b/sql/temp_tables.sql index 070721ce..aba78aba 100644 --- a/sql/temp_tables.sql +++ b/sql/temp_tables.sql @@ -8,23 +8,23 @@ CREATE TABLE pt(); -- Ignore queries with the only temp tables SELECT count(*) FROM tt; SELECT count(*) FROM tt AS t1, tt AS t2; -SELECT * FROM aqo_data; +SELECT query_text FROM aqo_query_texts; -- Default row should be returned -- Should be stored in the ML base SELECT count(*) FROM pt; SELECT count(*) FROM pt, tt; SELECT count(*) FROM pt AS pt1, tt AS tt1, tt AS tt2, pt AS pt2; -SELECT count(*) FROM aqo_data; +SELECT count(*) FROM aqo_data; -- Don't bother about false negatives because of trivial query plans DROP TABLE tt; -SELECT aqo_cleanup(); -SELECT count(*) FROM aqo_data; -- Should be the same as above +SELECT * FROM aqo_cleanup(); +SELECT count(*) FROM aqo_data; -- Should return the same as previous call above DROP TABLE pt; -SELECT aqo_cleanup(); +SELECT * FROM aqo_cleanup(); SELECT count(*) FROM aqo_data; -- Should be 0 SELECT query_text FROM aqo_queries aq LEFT JOIN aqo_query_texts aqt ON aq.queryid = aqt.queryid -ORDER BY (md5(query_text)); -- TODO: should contain just one row +ORDER BY (md5(query_text)); -- The only the common class is returned -- Test learning on temporary table CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; diff --git a/sql/top_queries.sql b/sql/top_queries.sql index 62626d4f..98a0c8ed 100755 --- a/sql/top_queries.sql +++ b/sql/top_queries.sql @@ -11,7 +11,7 @@ SET aqo.force_collect_stat = 'on'; -- CREATE TEMP TABLE ttt AS SELECT count(*) AS cnt FROM generate_series(1,10); CREATE TABLE ttp AS SELECT count(*) AS cnt FROM generate_series(1,10); -SELECT count(*) AS cnt FROM ttt WHERE cnt % 100 = 0; +SELECT count(*) AS cnt FROM ttt WHERE cnt % 100 = 0; -- Ignore it SELECT count(*) AS cnt FROM ttp WHERE cnt % 100 = 0; SELECT num FROM aqo_execution_time(true); -- Just for checking, return zero. SELECT num FROM aqo_execution_time(false); diff --git a/sql/unsupported.sql b/sql/unsupported.sql index c0b6102b..d5209af6 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -162,9 +162,11 @@ FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt WHERE aqt.queryid = cef.id ORDER BY (md5(query_text),error) DESC; -DROP TABLE t,t1 CASCADE; +DROP TABLE t,t1 CASCADE; -- delete all tables used in the test -SELECT aqo_cleanup(); +SELECT count(*) FROM aqo_data; -- Just to detect some changes in the logic. May some false positives really bother us here? +SELECT * FROM aqo_cleanup(); +SELECT count(*) FROM aqo_data; -- No one row should be returned -- Look for any remaining queries in the ML storage. SELECT to_char(error, '9.99EEEE')::text AS error, query_text diff --git a/storage.c b/storage.c index 80b83125..9d79553a 100644 --- a/storage.c +++ b/storage.c @@ -87,6 +87,11 @@ static int data_store(const char *filename, form_record_t callback, static void data_load(const char *filename, deform_record_t callback, void *ctx); static size_t _compute_data_dsa(const DataEntry *entry); +static bool _aqo_stat_remove(uint64 queryid); +static bool _aqo_queries_remove(uint64 queryid); +static bool _aqo_qtexts_remove(uint64 queryid); +static bool _aqo_data_remove(data_key *key); + PG_FUNCTION_INFO_V1(aqo_query_stat); PG_FUNCTION_INFO_V1(aqo_query_texts); PG_FUNCTION_INFO_V1(aqo_data); @@ -99,6 +104,7 @@ PG_FUNCTION_INFO_V1(aqo_enable_query); PG_FUNCTION_INFO_V1(aqo_disable_query); PG_FUNCTION_INFO_V1(aqo_queries_update); PG_FUNCTION_INFO_V1(aqo_reset); +PG_FUNCTION_INFO_V1(aqo_cleanup); bool @@ -393,18 +399,13 @@ aqo_stat_reset(void) return num_remove; } + Datum aqo_stat_remove(PG_FUNCTION_ARGS) { - uint64 queryid = (uint64) PG_GETARG_INT64(0); - StatEntry *entry; - bool removed; + uint64 queryid = (uint64) PG_GETARG_INT64(0); - LWLockAcquire(&aqo_state->stat_lock, LW_EXCLUSIVE); - entry = (StatEntry *) hash_search(stat_htab, &queryid, HASH_REMOVE, NULL); - removed = (entry) ? true : false; - LWLockRelease(&aqo_state->stat_lock); - PG_RETURN_BOOL(removed); + PG_RETURN_BOOL(_aqo_stat_remove(queryid)); } static void * @@ -1116,10 +1117,47 @@ aqo_query_texts(PG_FUNCTION_ARGS) return (Datum) 0; } -Datum -aqo_qtexts_remove(PG_FUNCTION_ARGS) +static bool +_aqo_stat_remove(uint64 queryid) +{ + bool found; + + Assert(!LWLockHeldByMe(&aqo_state->stat_lock)); + LWLockAcquire(&aqo_state->stat_lock, LW_EXCLUSIVE); + (void) hash_search(stat_htab, &queryid, HASH_FIND, &found); + + if (found) + { + (void) hash_search(stat_htab, &queryid, HASH_REMOVE, NULL); + aqo_state->stat_changed = true; + } + + LWLockRelease(&aqo_state->stat_lock); + return found; +} + +static bool +_aqo_queries_remove(uint64 queryid) +{ + bool found; + + Assert(!LWLockHeldByMe(&aqo_state->queries_lock)); + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + (void) hash_search(queries_htab, &queryid, HASH_FIND, &found); + + if (found) + { + (void) hash_search(queries_htab, &queryid, HASH_REMOVE, NULL); + aqo_state->queries_changed = true; + } + + LWLockRelease(&aqo_state->queries_lock); + return found; +} + +static bool +_aqo_qtexts_remove(uint64 queryid) { - uint64 queryid = (uint64) PG_GETARG_INT64(0); bool found = false; QueryTextEntry *entry; @@ -1132,19 +1170,54 @@ aqo_qtexts_remove(PG_FUNCTION_ARGS) * Look for a record with this queryid. DSA fields must be freed before * deletion of the record. */ - entry = (QueryTextEntry *) hash_search(qtexts_htab, &queryid, HASH_FIND, &found); - if (!found) - goto end; + entry = (QueryTextEntry *) hash_search(qtexts_htab, &queryid, HASH_FIND, + &found); + if (found) + { + /* Free DSA memory, allocated for this record */ + Assert(DsaPointerIsValid(entry->qtext_dp)); + dsa_free(qtext_dsa, entry->qtext_dp); - /* Free DSA memory, allocated foro this record */ - Assert(DsaPointerIsValid(entry->qtext_dp)); - dsa_free(qtext_dsa, entry->qtext_dp); + (void) hash_search(qtexts_htab, &queryid, HASH_REMOVE, NULL); + aqo_state->qtexts_changed = true; + } - (void) hash_search(qtexts_htab, &queryid, HASH_REMOVE, &found); - Assert(found); -end: LWLockRelease(&aqo_state->qtexts_lock); - PG_RETURN_BOOL(found); + return found; +} + +static bool +_aqo_data_remove(data_key *key) +{ + DataEntry *entry; + bool found; + + Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); + + entry = (DataEntry *) hash_search(data_htab, key, HASH_FIND, &found); + if (found) + { + /* Free DSA memory, allocated for this record */ + Assert(DsaPointerIsValid(entry->data_dp)); + dsa_free(data_dsa, entry->data_dp); + entry->data_dp = InvalidDsaPointer; + + if (hash_search(data_htab, key, HASH_REMOVE, NULL) == NULL) + elog(PANIC, "[AQO] Inconsistent data hash table"); + aqo_state->data_changed = true; + } + + LWLockRelease(&aqo_state->data_lock); + return found; +} + +Datum +aqo_qtexts_remove(PG_FUNCTION_ARGS) +{ + uint64 queryid = (uint64) PG_GETARG_INT64(0); + + PG_RETURN_BOOL(_aqo_qtexts_remove(queryid)); } static long @@ -1599,7 +1672,9 @@ _aqo_data_clean(uint64 fs) DataEntry *entry; long removed = 0; - Assert(LWLockHeldByMe(&aqo_state->data_lock)); + Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); + hash_seq_init(&hash_seq, data_htab); while ((entry = hash_seq_search(&hash_seq)) != NULL) { @@ -1608,11 +1683,13 @@ _aqo_data_clean(uint64 fs) Assert(DsaPointerIsValid(entry->data_dp)); dsa_free(data_dsa, entry->data_dp); + entry->data_dp = InvalidDsaPointer; if (hash_search(data_htab, &entry->key, HASH_REMOVE, NULL) == NULL) elog(ERROR, "[AQO] hash table corrupted"); removed++; } + LWLockRelease(&aqo_state->data_lock); return removed; } @@ -1621,42 +1698,19 @@ aqo_data_remove(PG_FUNCTION_ARGS) { data_key key; bool found; - DataEntry *entry; dsa_init(); - Assert(!LWLockHeldByMe(&aqo_state->data_lock)); - LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); - if (PG_ARGISNULL(1)) { /* Remove all feature subspaces from the space */ found = (_aqo_data_clean((uint64) PG_GETARG_INT64(0)) > 0); - goto end; + return found; } key.fs = (uint64) PG_GETARG_INT64(0); key.fss = PG_GETARG_INT32(1); - - /* - * Look for a record with this queryid. DSA fields must be freed before - * deletion of the record. - */ - entry = (DataEntry *) hash_search(qtexts_htab, &key, HASH_FIND, &found); - if (!found) - goto end; - - /* Free DSA memory, allocated foro this record */ - Assert(DsaPointerIsValid(entry->data_dp)); - dsa_free(data_dsa, entry->data_dp); - - (void) hash_search(data_htab, &key, HASH_REMOVE, &found); - Assert(found); -end: - if (found) - aqo_state->data_changed = true; - LWLockRelease(&aqo_state->data_lock); - PG_RETURN_BOOL(found); + PG_RETURN_BOOL(_aqo_data_remove(&key)); } static long @@ -1751,15 +1805,9 @@ aqo_queries(PG_FUNCTION_ARGS) Datum aqo_queries_remove(PG_FUNCTION_ARGS) { - uint64 queryid = (uint64) PG_GETARG_INT64(0); - QueriesEntry *entry; - bool removed; + uint64 queryid = (uint64) PG_GETARG_INT64(0); - LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); - entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_REMOVE, NULL); - removed = (entry) ? true : false; - LWLockRelease(&aqo_state->queries_lock); - PG_RETURN_BOOL(removed); + PG_RETURN_BOOL(_aqo_queries_remove(queryid)); } bool @@ -1964,3 +2012,195 @@ aqo_reset(PG_FUNCTION_ARGS) counter += aqo_queries_reset(); PG_RETURN_INT64(counter); } + +#include "utils/syscache.h" + +/* + * Scan aqo_queries. For each FS lookup aqo_data records: detect a record, where + * list of oids links to deleted tables. + * If + * + * Scan aqo_data hash table. Detect a record, where list of oids links to + * deleted tables. If gentle is TRUE, remove this record only. Another case, + * remove all records with the same (not default) fs from aqo_data. + * Scan aqo_queries. If no one record in aqo_data exists for this fs - remove + * the record from aqo_queries, aqo_query_stat and aqo_query_texts. + */ +static void +cleanup_aqo_database(bool gentle, int *fs_num, int *fss_num) +{ + HASH_SEQ_STATUS hash_seq; + QueriesEntry *entry; + + /* Call it because we might touch DSA segments during the cleanup */ + dsa_init(); + + *fs_num = 0; + *fss_num = 0; + + /* + * It's a long haul. So, make seq scan without any lock. It is possible + * because only this operation can delete data from hash table. + */ + hash_seq_init(&hash_seq, queries_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + HASH_SEQ_STATUS hash_seq2; + DataEntry *dentry; + List *junk_fss = NIL; + List *actual_fss = NIL; + ListCell *lc; + + /* Scan aqo_data for any junk records related to this FS */ + hash_seq_init(&hash_seq2, data_htab); + while ((dentry = hash_seq_search(&hash_seq2)) != NULL) + { + char *ptr; + + if (entry->fs != dentry->key.fs) + /* Another FS */ + continue; + + LWLockAcquire(&aqo_state->data_lock, LW_SHARED); + + Assert(DsaPointerIsValid(dentry->data_dp)); + ptr = dsa_get_address(data_dsa, dentry->data_dp); + + ptr += sizeof(data_key); + ptr += sizeof(double) * dentry->rows * dentry->cols; + ptr += sizeof(double) * 2 * dentry->rows; + + if (dentry->nrels > 0) + { + int i; + + /* Check each OID to be existed. */ + for(i = 0; i < dentry->nrels; i++) + { + Oid reloid = ObjectIdGetDatum(*(Oid *)ptr); + + if (!SearchSysCacheExists1(RELOID, reloid)) + /* Remember this value */ + junk_fss = list_append_unique_int(junk_fss, + dentry->key.fss); + else + actual_fss = list_append_unique_int(actual_fss, + dentry->key.fss); + + ptr += sizeof(Oid); + } + } + else + { + /* + * Impossible case. We don't use AQO for so simple or synthetic + * data. Just detect errors in this logic. + */ + ereport(PANIC, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("AQO detected incorrect behaviour: fs=%lu fss=%ld", + dentry->key.fs, dentry->key.fss))); + } + + LWLockRelease(&aqo_state->data_lock); + } + + /* + * In forced mode remove all child FSSes even some of them are still + * link to existed tables. + */ + if (junk_fss != NIL && !gentle) + junk_fss = list_concat(junk_fss, actual_fss); + + /* Remove junk records from aqo_data */ + foreach(lc, junk_fss) + { + data_key key = {.fs = entry->fs, .fss = lfirst_int(lc)}; + (*fss_num) += (int) _aqo_data_remove(&key); + } + + /* + * If no one live FSS exists, remove the class totally. Don't touch + * default query class. + */ + if (entry->fs != 0 && (actual_fss == NIL || (junk_fss != NIL && !gentle))) + { + /* Query Stat */ + _aqo_stat_remove(entry->queryid); + + /* Query text */ + _aqo_qtexts_remove(entry->queryid); + + /* Query class preferences */ + (*fs_num) += (int) _aqo_queries_remove(entry->queryid); + } + + list_free(junk_fss); + list_free(actual_fss); + } + + /* + * The best place to flush updated AQO storage: calling the routine, user + * realizes how heavy it is. + */ + aqo_stat_flush(); + aqo_data_flush(); + aqo_qtexts_flush(); + aqo_queries_flush(); +} + +Datum +aqo_cleanup(PG_FUNCTION_ARGS) +{ + int fs_num; + int fss_num; + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupDesc; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Tuplestorestate *tupstore; + Datum values[2]; + bool nulls[2] = {0, 0}; + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* Switch into long-lived context to construct returned data structures */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + Assert(tupDesc->natts == 2); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupDesc; + + MemoryContextSwitchTo(oldcontext); + + /* + * Make forced cleanup: if at least one fss isn't actual, remove parent FS + * and all its FSSes. + * Main idea of such behaviour here is, if a table was deleted, we have + * little chance to use this class in future. Only one use case here can be + * a reason: to use it as a base for search data in a set of neighbours. + * But, invent another UI function for such logic. + */ + cleanup_aqo_database(false, &fs_num, &fss_num); + + values[0] = Int32GetDatum(fs_num); + values[1] = Int32GetDatum(fss_num); + tuplestore_putvalues(tupstore, tupDesc, values, nulls); + tuplestore_donestoring(tupstore); + return (Datum) 0; +} diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index 83b374f1..1f096203 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -165,6 +165,7 @@ # # ############################################################################## +$node->safe_psql('postgres', "SELECT aqo_reset()"); $node->safe_psql('postgres', "DROP EXTENSION aqo"); $node->safe_psql('postgres', "CREATE EXTENSION aqo"); @@ -272,8 +273,8 @@ DROP TABLE pgbench_accounts, pgbench_branches, pgbench_tellers, pgbench_history CASCADE;"); -# Clean unneeded AQO knowledge -$node->safe_psql('postgres', "SELECT public.aqo_cleanup()"); +# Remove unnecessary AQO knowledge +$node->safe_psql('postgres', "SELECT * FROM aqo_cleanup()"); # Calculate total number of rows in AQO-related tables. my $new_fs_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_queries;"); diff --git a/t/002_pg_stat_statements_aqo.pl b/t/002_pg_stat_statements_aqo.pl index d60dca10..dfa84b3a 100644 --- a/t/002_pg_stat_statements_aqo.pl +++ b/t/002_pg_stat_statements_aqo.pl @@ -28,7 +28,7 @@ my $total_classes; $node->start(); # ERROR: AQO allow to load library only on startup -print "create extantion aqo"; +print "Create extension aqo"; $node->psql('postgres', "CREATE EXTENSION aqo"); $node->psql('postgres', "CREATE EXTENSION pg_stat_statements"); print "create preload libraries"; From 0241215e7c15435f7bf8f50e8386b7eafec46c1d Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 20 Jul 2022 17:15:22 +0500 Subject: [PATCH 111/203] Rewrite aqo_drop_class and remove some unnecessary functions from the UI. --- aqo--1.4--1.5.sql | 86 +++++++++++-------------------------- expected/aqo_learn.out | 8 +--- expected/gucs.out | 2 +- storage.c | 97 +++++++++++++++++++++--------------------- 4 files changed, 76 insertions(+), 117 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index 0546bf42..2af0f6ca 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -3,6 +3,7 @@ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "ALTER EXTENSION aqo UPDATE TO '1.5'" to load this file. \quit +/* Remove old interface of the extension */ DROP FUNCTION array_mse; DROP FUNCTION array_avg; DROP FUNCTION public.aqo_clear_hist; -- Should be renamed and reworked @@ -14,12 +15,15 @@ DROP FUNCTION public.aqo_status; DROP FUNCTION public.clean_aqo_data; DROP FUNCTION public.show_cardinality_errors; DROP FUNCTION public.top_time_queries; - DROP TABLE public.aqo_data CASCADE; DROP TABLE public.aqo_queries CASCADE; DROP TABLE public.aqo_query_texts CASCADE; DROP TABLE public.aqo_query_stat CASCADE; + +/* + * VIEWs to discover AQO data. + */ CREATE FUNCTION aqo_queries ( OUT queryid bigint, OUT fs bigint, @@ -30,28 +34,13 @@ CREATE FUNCTION aqo_queries ( RETURNS SETOF record AS 'MODULE_PATHNAME', 'aqo_queries' LANGUAGE C STRICT VOLATILE PARALLEL SAFE; -CREATE FUNCTION aqo_queries_remove(queryid bigint) RETURNS bool -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT PARALLEL SAFE; CREATE FUNCTION aqo_query_texts(OUT queryid bigint, OUT query_text text) RETURNS SETOF record AS 'MODULE_PATHNAME', 'aqo_query_texts' LANGUAGE C STRICT VOLATILE PARALLEL SAFE; -CREATE FUNCTION aqo_qtexts_remove(queryid bigint) RETURNS bool -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT PARALLEL SAFE; - --- --- Remove all records in the AQO storage. --- Return number of rows removed. --- -CREATE FUNCTION aqo_reset() RETURNS bigint -AS 'MODULE_PATHNAME' LANGUAGE C PARALLEL SAFE; -COMMENT ON FUNCTION aqo_reset() IS -'Reset all data gathered by AQO'; -CREATE FUNCTION aqo_query_stat( +CREATE FUNCTION aqo_query_stat ( OUT queryid bigint, OUT execution_time_with_aqo double precision[], OUT execution_time_without_aqo double precision[], @@ -66,7 +55,7 @@ RETURNS SETOF record AS 'MODULE_PATHNAME', 'aqo_query_stat' LANGUAGE C STRICT VOLATILE PARALLEL SAFE; -CREATE FUNCTION aqo_data( +CREATE FUNCTION aqo_data ( OUT fs bigint, OUT fss integer, OUT nfeatures integer, @@ -78,18 +67,13 @@ CREATE FUNCTION aqo_data( RETURNS SETOF record AS 'MODULE_PATHNAME', 'aqo_data' LANGUAGE C STRICT VOLATILE PARALLEL SAFE; -CREATE FUNCTION aqo_data_remove(fs bigint, fss int) RETURNS bool -AS 'MODULE_PATHNAME' -LANGUAGE C PARALLEL SAFE; CREATE VIEW aqo_query_stat AS SELECT * FROM aqo_query_stat(); CREATE VIEW aqo_query_texts AS SELECT * FROM aqo_query_texts(); CREATE VIEW aqo_data AS SELECT * FROM aqo_data(); CREATE VIEW aqo_queries AS SELECT * FROM aqo_queries(); -CREATE FUNCTION aqo_stat_remove(fs bigint) RETURNS bool -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT PARALLEL SAFE; +/* UI functions */ -- -- Show execution time of queries, for which AQO has statistics. @@ -141,44 +125,17 @@ ELSE END IF; END; $$ LANGUAGE plpgsql; - COMMENT ON FUNCTION aqo_execution_time(boolean) IS 'Get execution time of queries. If controlled = true (AQO could advise cardinality estimations), show time of last execution attempt. Another case (AQO not used), return an average value of execution time across all known executions.'; -- --- Remove all information about a query class from AQO storage. +-- Remove query class settings, text, statistics and ML data from AQO storage. +-- Return number of FSS records, removed from the storage. -- -CREATE OR REPLACE FUNCTION aqo_drop_class(queryid_rm bigint) -RETURNS integer AS $$ -DECLARE - lfs bigint; - num integer; -BEGIN - IF (queryid_rm = 0) THEN - raise EXCEPTION '[AQO] Cannot remove basic class %.', queryid_rm; - END IF; - - SELECT fs FROM aqo_queries WHERE (queryid = queryid_rm) INTO lfs; - - IF (lfs IS NULL) THEN - raise WARNING '[AQO] Nothing to remove for the class %.', queryid_rm; - RETURN 0; - END IF; - - IF (lfs <> queryid_rm) THEN - raise WARNING '[AQO] Removing query class has non-generic feature space value: id = %, fs = %.', queryid_rm, fs; - END IF; - - SELECT count(*) FROM aqo_data WHERE fs = lfs INTO num; - - PERFORM aqo_queries_remove(queryid_rm); - PERFORM aqo_stat_remove(queryid_rm); - PERFORM aqo_qtexts_remove(queryid_rm); - PERFORM aqo_data_remove(lfs, NULL); - RETURN num; -END; -$$ LANGUAGE plpgsql; - +CREATE OR REPLACE FUNCTION aqo_drop_class(queryid bigint) +RETURNS integer +AS 'MODULE_PATHNAME', 'aqo_drop_class' +LANGUAGE C STRICT VOLATILE; COMMENT ON FUNCTION aqo_drop_class(bigint) IS 'Remove info about an query class from AQO ML knowledge base.'; @@ -190,9 +147,8 @@ COMMENT ON FUNCTION aqo_drop_class(bigint) IS -- Returns number of deleted rows from aqo_queries and aqo_data tables. -- CREATE OR REPLACE FUNCTION aqo_cleanup(OUT nfs integer, OUT nfss integer) -AS 'MODULE_PATHNAME' +AS 'MODULE_PATHNAME', 'aqo_cleanup' LANGUAGE C STRICT VOLATILE; - COMMENT ON FUNCTION aqo_cleanup() IS 'Remove unneeded rows from the AQO ML storage'; @@ -327,4 +283,14 @@ CREATE FUNCTION aqo_queries_update(queryid bigint, fs bigint, learn_aqo bool, use_aqo bool, auto_tuning bool) RETURNS bool AS 'MODULE_PATHNAME', 'aqo_queries_update' -LANGUAGE C VOLATILE; \ No newline at end of file +LANGUAGE C VOLATILE; + +-- +-- Remove all records in the AQO storage. +-- Return number of rows removed. +-- +CREATE FUNCTION aqo_reset() RETURNS bigint +AS 'MODULE_PATHNAME', 'aqo_reset' +LANGUAGE C PARALLEL SAFE; +COMMENT ON FUNCTION aqo_reset() IS +'Reset all data gathered by AQO'; diff --git a/expected/aqo_learn.out b/expected/aqo_learn.out index 3ccdb4e8..db117a0c 100644 --- a/expected/aqo_learn.out +++ b/expected/aqo_learn.out @@ -485,14 +485,8 @@ SELECT * FROM check_estimated_rows(' SET aqo.mode = 'learn'; SELECT * FROM aqo_drop_class(0); ERROR: [AQO] Cannot remove basic class 0. -CONTEXT: PL/pgSQL function aqo_drop_class(bigint) line 7 at RAISE SELECT * FROM aqo_drop_class(42); -WARNING: [AQO] Nothing to remove for the class 42. - aqo_drop_class ----------------- - 0 -(1 row) - +ERROR: [AQO] Nothing to remove for the class 42. -- Remove all data from ML knowledge base SELECT count(*) FROM ( SELECT aqo_drop_class(q1.id::bigint) FROM ( diff --git a/expected/gucs.out b/expected/gucs.out index e238bc61..08cf0fbd 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -92,7 +92,7 @@ SELECT obj_description('aqo_reset'::regproc::oid); List of functions Schema | Name | Result data type | Argument data types | Type --------+----------------+------------------+---------------------+------ - public | aqo_drop_class | integer | queryid_rm bigint | func + public | aqo_drop_class | integer | queryid bigint | func (1 row) \df aqo_cleanup diff --git a/storage.c b/storage.c index 9d79553a..caceb007 100644 --- a/storage.c +++ b/storage.c @@ -96,15 +96,12 @@ PG_FUNCTION_INFO_V1(aqo_query_stat); PG_FUNCTION_INFO_V1(aqo_query_texts); PG_FUNCTION_INFO_V1(aqo_data); PG_FUNCTION_INFO_V1(aqo_queries); -PG_FUNCTION_INFO_V1(aqo_stat_remove); -PG_FUNCTION_INFO_V1(aqo_qtexts_remove); -PG_FUNCTION_INFO_V1(aqo_data_remove); -PG_FUNCTION_INFO_V1(aqo_queries_remove); PG_FUNCTION_INFO_V1(aqo_enable_query); PG_FUNCTION_INFO_V1(aqo_disable_query); PG_FUNCTION_INFO_V1(aqo_queries_update); PG_FUNCTION_INFO_V1(aqo_reset); PG_FUNCTION_INFO_V1(aqo_cleanup); +PG_FUNCTION_INFO_V1(aqo_drop_class); bool @@ -399,15 +396,6 @@ aqo_stat_reset(void) return num_remove; } - -Datum -aqo_stat_remove(PG_FUNCTION_ARGS) -{ - uint64 queryid = (uint64) PG_GETARG_INT64(0); - - PG_RETURN_BOOL(_aqo_stat_remove(queryid)); -} - static void * _form_stat_record_cb(void *ctx, size_t *size) { @@ -1212,14 +1200,6 @@ _aqo_data_remove(data_key *key) return found; } -Datum -aqo_qtexts_remove(PG_FUNCTION_ARGS) -{ - uint64 queryid = (uint64) PG_GETARG_INT64(0); - - PG_RETURN_BOOL(_aqo_qtexts_remove(queryid)); -} - static long aqo_qtexts_reset(void) { @@ -1693,26 +1673,6 @@ _aqo_data_clean(uint64 fs) return removed; } -Datum -aqo_data_remove(PG_FUNCTION_ARGS) -{ - data_key key; - bool found; - - dsa_init(); - - if (PG_ARGISNULL(1)) - { - /* Remove all feature subspaces from the space */ - found = (_aqo_data_clean((uint64) PG_GETARG_INT64(0)) > 0); - return found; - } - - key.fs = (uint64) PG_GETARG_INT64(0); - key.fss = PG_GETARG_INT32(1); - PG_RETURN_BOOL(_aqo_data_remove(&key)); -} - static long aqo_data_reset(void) { @@ -1802,14 +1762,6 @@ aqo_queries(PG_FUNCTION_ARGS) return (Datum) 0; } -Datum -aqo_queries_remove(PG_FUNCTION_ARGS) -{ - uint64 queryid = (uint64) PG_GETARG_INT64(0); - - PG_RETURN_BOOL(_aqo_queries_remove(queryid)); -} - bool aqo_queries_store(uint64 queryid, uint64 fs, bool learn_aqo, bool use_aqo, bool auto_tuning) @@ -2204,3 +2156,50 @@ aqo_cleanup(PG_FUNCTION_ARGS) tuplestore_donestoring(tupstore); return (Datum) 0; } + +/* + * XXX: Maybe to allow usage of NULL value to make a reset? + */ +Datum +aqo_drop_class(PG_FUNCTION_ARGS) +{ + uint64 queryid = PG_GETARG_INT64(0); + bool found; + QueriesEntry *entry; + uint64 fs; + long cnt; + + if (queryid == 0) + elog(ERROR, "[AQO] Cannot remove basic class %lu.", queryid); + + /* Extract FS value for the queryid */ + LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_FIND, + &found); + if (!found) + elog(ERROR, "[AQO] Nothing to remove for the class %lu.", queryid); + + fs = entry->fs; + LWLockRelease(&aqo_state->queries_lock); + + if (fs == 0) + elog(ERROR, "[AQO] Cannot remove class %lu with default FS.", queryid); + if (fs != queryid) + elog(WARNING, + "[AQO] Removing query class has non-generic feature space value: id = %lu, fs = %lu.", + queryid, fs); + + /* Now, remove all data related to the class */ + _aqo_queries_remove(queryid); + _aqo_stat_remove(queryid); + _aqo_qtexts_remove(queryid); + cnt = _aqo_data_clean(fs); + + /* Immediately save changes to permanent storage. */ + aqo_stat_flush(); + aqo_data_flush(); + aqo_qtexts_flush(); + aqo_queries_flush(); + + PG_RETURN_INT32(cnt); +} From 1f2144e7f1398cc1a9b5ddd6fb54cc6332df0980 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 21 Jul 2022 08:42:32 +0500 Subject: [PATCH 112/203] Remove aqo_reset_query until we realize it is necessary. Now we are on a way to relocatability and this function must be rewrited or removed. So far we haven't used it - maybe it have a bad design? Also fix regression tests unstability - rows reordering issue. --- aqo--1.4--1.5.sql | 27 --------------------------- expected/forced_stat_collection.out | 3 ++- expected/gucs.out | 13 ------------- expected/relocatable.out | 6 ++++-- sql/forced_stat_collection.sql | 3 ++- sql/gucs.sql | 2 -- sql/relocatable.sql | 6 ++++-- 7 files changed, 12 insertions(+), 48 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index 2af0f6ca..a05bc05e 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -208,33 +208,6 @@ $$ LANGUAGE plpgsql; COMMENT ON FUNCTION aqo_cardinality_error(boolean) IS 'Get cardinality error of queries the last time they were executed. Order queries according to an error value.'; --- --- Remove all learning data for query with given ID. --- Can be used in the case when user don't want to drop preferences and --- accumulated statistics on a query class, but tries to re-learn AQO on this --- class. --- Returns a number of deleted rows in the aqo_data table. --- -CREATE OR REPLACE FUNCTION aqo_reset_query(queryid_res bigint) -RETURNS integer AS $$ -DECLARE - num integer; - lfs bigint; -BEGIN - IF (queryid_res = 0) THEN - raise WARNING '[AQO] Reset common feature space.' - END IF; - - SELECT fs FROM aqo_queries WHERE queryid = queryid_res INTO lfs; - SELECT count(*) FROM aqo_data WHERE fs = lfs INTO num; - DELETE FROM aqo_data WHERE fs = lfs; - RETURN num; -END; -$$ LANGUAGE plpgsql; - -COMMENT ON FUNCTION aqo_reset_query(bigint) IS -'Remove from AQO storage only learning data for given QueryId.'; - CREATE FUNCTION aqo_status(hash bigint) RETURNS TABLE ( "learn" BOOL, diff --git a/expected/forced_stat_collection.out b/expected/forced_stat_collection.out index 10e14b4f..a0a44e6a 100644 --- a/expected/forced_stat_collection.out +++ b/expected/forced_stat_collection.out @@ -40,7 +40,8 @@ SELECT * FROM aqo_data; SELECT learn_aqo,use_aqo,auto_tuning,cardinality_error_without_aqo ce,executions_without_aqo nex FROM aqo_queries AS aq JOIN aqo_query_stat AS aqs -ON aq.queryid = aqs.queryid; +ON aq.queryid = aqs.queryid +ORDER BY (cardinality_error_without_aqo); learn_aqo | use_aqo | auto_tuning | ce | nex -----------+---------+-------------+----------------------+----- f | f | f | {0.8637762840285226} | 1 diff --git a/expected/gucs.out b/expected/gucs.out index 08cf0fbd..1255a82a 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -62,12 +62,6 @@ SELECT obj_description('aqo_cleanup'::regproc::oid); Remove unneeded rows from the AQO ML storage (1 row) -SELECT obj_description('aqo_reset_query'::regproc::oid); - obj_description ---------------------------------------------------------------- - Remove from AQO storage only learning data for given QueryId. -(1 row) - SELECT obj_description('aqo_reset'::regproc::oid); obj_description -------------------------------- @@ -102,13 +96,6 @@ SELECT obj_description('aqo_reset'::regproc::oid); public | aqo_cleanup | record | OUT nfs integer, OUT nfss integer | func (1 row) -\df aqo_reset_query - List of functions - Schema | Name | Result data type | Argument data types | Type ---------+-----------------+------------------+---------------------+------ - public | aqo_reset_query | integer | queryid_res bigint | func -(1 row) - \df aqo_reset List of functions Schema | Name | Result data type | Argument data types | Type diff --git a/expected/relocatable.out b/expected/relocatable.out index f24add25..5fcf06e6 100644 --- a/expected/relocatable.out +++ b/expected/relocatable.out @@ -88,7 +88,8 @@ SELECT aqo_disable_query(id) FROM ( (2 rows) -SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries; +SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries +ORDER BY (learn_aqo, use_aqo, auto_tuning); learn_aqo | use_aqo | auto_tuning -----------+---------+------------- f | f | f @@ -104,7 +105,8 @@ SELECT aqo_enable_query(id) FROM ( (2 rows) -SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries; +SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries +ORDER BY (learn_aqo, use_aqo, auto_tuning); learn_aqo | use_aqo | auto_tuning -----------+---------+------------- f | f | f diff --git a/sql/forced_stat_collection.sql b/sql/forced_stat_collection.sql index 231938ca..71c4ffc1 100644 --- a/sql/forced_stat_collection.sql +++ b/sql/forced_stat_collection.sql @@ -32,7 +32,8 @@ SELECT * FROM aqo_data; SELECT learn_aqo,use_aqo,auto_tuning,cardinality_error_without_aqo ce,executions_without_aqo nex FROM aqo_queries AS aq JOIN aqo_query_stat AS aqs -ON aq.queryid = aqs.queryid; +ON aq.queryid = aqs.queryid +ORDER BY (cardinality_error_without_aqo); SELECT query_text FROM aqo_query_texts ORDER BY (md5(query_text)); diff --git a/sql/gucs.sql b/sql/gucs.sql index 2d113792..9cb13e00 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -21,14 +21,12 @@ SELECT obj_description('aqo_cardinality_error'::regproc::oid); SELECT obj_description('aqo_execution_time'::regproc::oid); SELECT obj_description('aqo_drop_class'::regproc::oid); SELECT obj_description('aqo_cleanup'::regproc::oid); -SELECT obj_description('aqo_reset_query'::regproc::oid); SELECT obj_description('aqo_reset'::regproc::oid); \df aqo_cardinality_error \df aqo_execution_time \df aqo_drop_class \df aqo_cleanup -\df aqo_reset_query \df aqo_reset -- Check stat reset diff --git a/sql/relocatable.sql b/sql/relocatable.sql index 2d8af862..e8cc57c3 100644 --- a/sql/relocatable.sql +++ b/sql/relocatable.sql @@ -41,10 +41,12 @@ ORDER BY (md5(query_text)) */ SELECT aqo_disable_query(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; -SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries; +SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries +ORDER BY (learn_aqo, use_aqo, auto_tuning); SELECT aqo_enable_query(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; -SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries; +SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries +ORDER BY (learn_aqo, use_aqo, auto_tuning); RESET search_path; DROP TABLE test CASCADE; From 3286434c9ecb031f6a68e8776e275bc2b519b46b Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 21 Jul 2022 10:55:33 +0500 Subject: [PATCH 113/203] Rewrite aqo_cardinality_error in C. One more step towards true relocatability. --- aqo--1.4--1.5.sql | 40 +------------------- storage.c | 93 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 38 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index a05bc05e..18d27861 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -167,44 +167,8 @@ COMMENT ON FUNCTION aqo_cleanup() IS -- CREATE OR REPLACE FUNCTION aqo_cardinality_error(controlled boolean) RETURNS TABLE(num bigint, id bigint, fshash bigint, error float, nexecs bigint) -AS $$ -BEGIN -IF (controlled) THEN - RETURN QUERY - SELECT - row_number() OVER (ORDER BY (cerror, query_id, fs_hash) DESC) AS nn, - query_id, fs_hash, cerror, execs - FROM ( - SELECT - aq.queryid AS query_id, - aq.fs AS fs_hash, - cardinality_error_with_aqo[array_length(cardinality_error_with_aqo, 1)] AS cerror, - executions_with_aqo AS execs - FROM aqo_queries aq JOIN aqo_query_stat aqs - ON aq.queryid = aqs.queryid - WHERE TRUE = ANY (SELECT unnest(cardinality_error_with_aqo) IS NOT NULL) - ) AS q1 - ORDER BY nn ASC; -ELSE - RETURN QUERY - SELECT - row_number() OVER (ORDER BY (cerror, query_id, fs_hash) DESC) AS nn, - query_id, fs_hash, cerror, execs - FROM ( - SELECT - aq.queryid AS query_id, - aq.fs AS fs_hash, - (SELECT AVG(t) FROM unnest(cardinality_error_without_aqo) t) AS cerror, - executions_without_aqo AS execs - FROM aqo_queries aq JOIN aqo_query_stat aqs - ON aq.queryid = aqs.queryid - WHERE TRUE = ANY (SELECT unnest(cardinality_error_without_aqo) IS NOT NULL) - ) AS q1 - ORDER BY (nn) ASC; -END IF; -END; -$$ LANGUAGE plpgsql; - +AS 'MODULE_PATHNAME', 'aqo_cardinality_error' +LANGUAGE C STRICT VOLATILE; COMMENT ON FUNCTION aqo_cardinality_error(boolean) IS 'Get cardinality error of queries the last time they were executed. Order queries according to an error value.'; diff --git a/storage.c b/storage.c index caceb007..a2d112d7 100644 --- a/storage.c +++ b/storage.c @@ -102,6 +102,7 @@ PG_FUNCTION_INFO_V1(aqo_queries_update); PG_FUNCTION_INFO_V1(aqo_reset); PG_FUNCTION_INFO_V1(aqo_cleanup); PG_FUNCTION_INFO_V1(aqo_drop_class); +PG_FUNCTION_INFO_V1(aqo_cardinality_error); bool @@ -2203,3 +2204,95 @@ aqo_drop_class(PG_FUNCTION_ARGS) PG_RETURN_INT32(cnt); } + +typedef enum { + AQE_NN = 0, AQE_QUERYID, AQE_FS, AQE_CERROR, AQE_NEXECS, AQE_TOTAL_NCOLS +} ce_output_order; + +/* + * Show cardinality error gathered on last execution. + * Skip entries with empty stat slots. XXX: is it possible? + */ +Datum +aqo_cardinality_error(PG_FUNCTION_ARGS) +{ + bool controlled = PG_GETARG_BOOL(0); + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupDesc; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Tuplestorestate *tupstore; + Datum values[AQE_TOTAL_NCOLS]; + bool nulls[AQE_TOTAL_NCOLS]; + HASH_SEQ_STATUS hash_seq; + QueriesEntry *qentry; + StatEntry *sentry; + int counter = 0; + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* Switch into long-lived context to construct returned data structures */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + Assert(tupDesc->natts == AQE_TOTAL_NCOLS); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupDesc; + + MemoryContextSwitchTo(oldcontext); + + LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); + LWLockAcquire(&aqo_state->stat_lock, LW_SHARED); + + hash_seq_init(&hash_seq, queries_htab); + while ((qentry = hash_seq_search(&hash_seq)) != NULL) + { + bool found; + double *ce; + int64 nexecs; + int nvals; + + memset(nulls, 0, AQE_TOTAL_NCOLS * sizeof(nulls[0])); + + sentry = (StatEntry *) hash_search(stat_htab, &qentry->queryid, + HASH_FIND, &found); + if (!found) + /* Statistics not found by some reason. Just go further */ + continue; + + nvals = controlled ? sentry->cur_stat_slot_aqo : sentry->cur_stat_slot; + if (nvals == 0) + /* No one stat slot filled */ + continue; + + nexecs = controlled ? sentry->execs_with_aqo : sentry->execs_without_aqo; + ce = controlled ? sentry->est_error_aqo : sentry->est_error; + + values[AQE_NN] = Int32GetDatum(counter++); + values[AQE_QUERYID] = Int64GetDatum(qentry->queryid); + values[AQE_FS] = Int64GetDatum(qentry->fs); + values[AQE_NEXECS] = Int64GetDatum(nexecs); + values[AQE_CERROR] = Float8GetDatum(ce[nvals - 1]); + tuplestore_putvalues(tupstore, tupDesc, values, nulls); + } + + LWLockRelease(&aqo_state->stat_lock); + LWLockRelease(&aqo_state->queries_lock); + + tuplestore_donestoring(tupstore); + return (Datum) 0; +} From f5bed2c381389655bcc4a2e4a82a7b47aca941c5 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 21 Jul 2022 11:37:09 +0500 Subject: [PATCH 114/203] Rewrite aqo_execution_time in C. Now, I guess, the extension is truly relocatable. We should check it by some tests on extension moving. --- aqo--1.4--1.5.sql | 151 ++++++++++----------------------------- expected/schema.out | 6 +- expected/top_queries.out | 8 +-- sql/schema.sql | 6 +- sql/top_queries.sql | 2 +- storage.c | 107 ++++++++++++++++++++++++++- 6 files changed, 158 insertions(+), 122 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index 18d27861..23102d6e 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -75,6 +75,43 @@ CREATE VIEW aqo_queries AS SELECT * FROM aqo_queries(); /* UI functions */ + +CREATE FUNCTION aqo_enable_query(queryid bigint) +RETURNS void +AS 'MODULE_PATHNAME', 'aqo_enable_query' +LANGUAGE C STRICT VOLATILE; + +CREATE FUNCTION aqo_disable_query(queryid bigint) +RETURNS void +AS 'MODULE_PATHNAME', 'aqo_enable_query' +LANGUAGE C STRICT VOLATILE; + +CREATE FUNCTION aqo_queries_update( + queryid bigint, fs bigint, learn_aqo bool, use_aqo bool, auto_tuning bool) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_queries_update' +LANGUAGE C VOLATILE; + +-- +-- Get cardinality error of queries the last time they were executed. +-- IN: +-- controlled - show queries executed under a control of AQO (true); +-- executed without an AQO control, but AQO has a stat on the query (false). +-- +-- OUT: +-- num - sequental number. Smaller number corresponds to higher error. +-- id - ID of a query. +-- fshash - feature space. Usually equal to zero or ID. +-- error - AQO error that calculated on plan nodes of the query. +-- nexecs - number of executions of queries associated with this ID. +-- +CREATE OR REPLACE FUNCTION aqo_cardinality_error(controlled boolean) +RETURNS TABLE(num bigint, id bigint, fshash bigint, error float, nexecs bigint) +AS 'MODULE_PATHNAME', 'aqo_cardinality_error' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_cardinality_error(boolean) IS +'Get cardinality error of queries the last time they were executed. Order queries according to an error value.'; + -- -- Show execution time of queries, for which AQO has statistics. -- controlled - show stat on executions where AQO was used for cardinality @@ -83,48 +120,8 @@ CREATE VIEW aqo_queries AS SELECT * FROM aqo_queries(); -- CREATE OR REPLACE FUNCTION aqo_execution_time(controlled boolean) RETURNS TABLE(num bigint, id bigint, fshash bigint, exec_time float, nexecs bigint) -AS $$ -BEGIN -IF (controlled) THEN - -- Show a query execution time made with AQO support for the planner - -- cardinality estimations. Here we return result of last execution. - RETURN QUERY - SELECT - row_number() OVER (ORDER BY (exectime, queryid, fs_hash) DESC) AS nn, - queryid, fs_hash, exectime, execs - FROM ( - SELECT - aq.queryid AS queryid, - aq.fs AS fs_hash, - execution_time_with_aqo[array_length(execution_time_with_aqo, 1)] AS exectime, - executions_with_aqo AS execs - FROM aqo_queries aq JOIN aqo_query_stat aqs - ON aq.queryid = aqs.queryid - WHERE TRUE = ANY (SELECT unnest(execution_time_with_aqo) IS NOT NULL) - ) AS q1 - ORDER BY nn ASC; - -ELSE - -- Show a query execution time made without any AQO advise. - -- Return an average value across all executions. - RETURN QUERY - SELECT - row_number() OVER (ORDER BY (exectime, queryid, fs_hash) DESC) AS nn, - queryid, fs_hash, exectime, execs - FROM ( - SELECT - aq.queryid AS queryid, - aq.fs AS fs_hash, - (SELECT AVG(t) FROM unnest(execution_time_without_aqo) t) AS exectime, - executions_without_aqo AS execs - FROM aqo_queries aq JOIN aqo_query_stat aqs - ON aq.queryid = aqs.queryid - WHERE TRUE = ANY (SELECT unnest(execution_time_without_aqo) IS NOT NULL) - ) AS q1 - ORDER BY (nn) ASC; -END IF; -END; -$$ LANGUAGE plpgsql; +AS 'MODULE_PATHNAME', 'aqo_execution_time' +LANGUAGE C STRICT VOLATILE; COMMENT ON FUNCTION aqo_execution_time(boolean) IS 'Get execution time of queries. If controlled = true (AQO could advise cardinality estimations), show time of last execution attempt. Another case (AQO not used), return an average value of execution time across all known executions.'; @@ -152,76 +149,6 @@ LANGUAGE C STRICT VOLATILE; COMMENT ON FUNCTION aqo_cleanup() IS 'Remove unneeded rows from the AQO ML storage'; --- --- Get cardinality error of queries the last time they were executed. --- IN: --- controlled - show queries executed under a control of AQO (true); --- executed without an AQO control, but AQO has a stat on the query (false). --- --- OUT: --- num - sequental number. Smaller number corresponds to higher error. --- id - ID of a query. --- fshash - feature space. Usually equal to zero or ID. --- error - AQO error that calculated on plan nodes of the query. --- nexecs - number of executions of queries associated with this ID. --- -CREATE OR REPLACE FUNCTION aqo_cardinality_error(controlled boolean) -RETURNS TABLE(num bigint, id bigint, fshash bigint, error float, nexecs bigint) -AS 'MODULE_PATHNAME', 'aqo_cardinality_error' -LANGUAGE C STRICT VOLATILE; -COMMENT ON FUNCTION aqo_cardinality_error(boolean) IS -'Get cardinality error of queries the last time they were executed. Order queries according to an error value.'; - -CREATE FUNCTION aqo_status(hash bigint) -RETURNS TABLE ( - "learn" BOOL, - "use aqo" BOOL, - "auto tune" BOOL, - "fspace hash" bigINT, - "t_naqo" TEXT, - "err_naqo" TEXT, - "iters" BIGINT, - "t_aqo" TEXT, - "err_aqo" TEXT, - "iters_aqo" BIGINT -) AS $$ -SELECT learn_aqo,use_aqo,auto_tuning,fs, - to_char(execution_time_without_aqo[n4],'9.99EEEE'), - to_char(cardinality_error_without_aqo[n2],'9.99EEEE'), - executions_without_aqo, - to_char(execution_time_with_aqo[n3],'9.99EEEE'), - to_char(cardinality_error_with_aqo[n1],'9.99EEEE'), - executions_with_aqo -FROM aqo_queries aq, aqo_query_stat aqs, - (SELECT array_length(n1,1) AS n1, array_length(n2,1) AS n2, - array_length(n3,1) AS n3, array_length(n4,1) AS n4 - FROM - (SELECT cardinality_error_with_aqo AS n1, - cardinality_error_without_aqo AS n2, - execution_time_with_aqo AS n3, - execution_time_without_aqo AS n4 - FROM aqo_query_stat aqs WHERE - aqs.queryid = $1) AS al) AS q -WHERE (aqs.queryid = aq.queryid) AND - aqs.queryid = $1; -$$ LANGUAGE SQL; - -CREATE FUNCTION aqo_enable_query(queryid bigint) -RETURNS void -AS 'MODULE_PATHNAME', 'aqo_enable_query' -LANGUAGE C STRICT VOLATILE; - -CREATE FUNCTION aqo_disable_query(queryid bigint) -RETURNS void -AS 'MODULE_PATHNAME', 'aqo_enable_query' -LANGUAGE C STRICT VOLATILE; - -CREATE FUNCTION aqo_queries_update(queryid bigint, fs bigint, learn_aqo bool, - use_aqo bool, auto_tuning bool) -RETURNS bool -AS 'MODULE_PATHNAME', 'aqo_queries_update' -LANGUAGE C VOLATILE; - -- -- Remove all records in the AQO storage. -- Return number of rows removed. diff --git a/expected/schema.out b/expected/schema.out index e2004386..0b5a5c07 100644 --- a/expected/schema.out +++ b/expected/schema.out @@ -25,14 +25,16 @@ SELECT * FROM test; -- Check AQO service relations state after some manipulations -- Exclude fields with hash values from the queries. Hash is depend on -- nodefuncs code which is highly PostgreSQL version specific. -SELECT query_text FROM aqo_query_texts; +SELECT query_text FROM aqo_query_texts +ORDER BY (md5(query_text)) DESC; query_text --------------------------------------- COMMON feature space (do not delete!) SELECT * FROM test; (2 rows) -SELECT learn_aqo, use_aqo, auto_tuning FROM aqo_queries; +SELECT learn_aqo, use_aqo, auto_tuning FROM aqo_queries +ORDER BY (learn_aqo, use_aqo, auto_tuning); learn_aqo | use_aqo | auto_tuning -----------+---------+------------- f | f | f diff --git a/expected/top_queries.out b/expected/top_queries.out index 728405aa..ba72d7c8 100644 --- a/expected/top_queries.out +++ b/expected/top_queries.out @@ -63,7 +63,7 @@ SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y 31 (1 row) -SELECT num, to_char(error, '9.99EEEE') FROM aqo_cardinality_error(false) AS te +SELECT to_char(error, '9.99EEEE') FROM aqo_cardinality_error(false) AS te WHERE te.fshash = ( SELECT fs FROM aqo_queries WHERE aqo_queries.queryid = ( @@ -71,9 +71,9 @@ WHERE te.fshash = ( WHERE query_text = 'SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1;' ) ); - num | to_char ------+----------- - 1 | 1.94e+00 + to_char +----------- + 1.94e+00 (1 row) -- Should return zero diff --git a/sql/schema.sql b/sql/schema.sql index f6c5c53d..6f5f4454 100644 --- a/sql/schema.sql +++ b/sql/schema.sql @@ -21,6 +21,8 @@ SELECT * FROM test; -- Check AQO service relations state after some manipulations -- Exclude fields with hash values from the queries. Hash is depend on -- nodefuncs code which is highly PostgreSQL version specific. -SELECT query_text FROM aqo_query_texts; -SELECT learn_aqo, use_aqo, auto_tuning FROM aqo_queries; +SELECT query_text FROM aqo_query_texts +ORDER BY (md5(query_text)) DESC; +SELECT learn_aqo, use_aqo, auto_tuning FROM aqo_queries +ORDER BY (learn_aqo, use_aqo, auto_tuning); DROP SCHEMA IF EXISTS test1 CASCADE; diff --git a/sql/top_queries.sql b/sql/top_queries.sql index 98a0c8ed..da3817a0 100755 --- a/sql/top_queries.sql +++ b/sql/top_queries.sql @@ -33,7 +33,7 @@ CREATE TABLE t2 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; -SELECT num, to_char(error, '9.99EEEE') FROM aqo_cardinality_error(false) AS te +SELECT to_char(error, '9.99EEEE') FROM aqo_cardinality_error(false) AS te WHERE te.fshash = ( SELECT fs FROM aqo_queries WHERE aqo_queries.queryid = ( diff --git a/storage.c b/storage.c index a2d112d7..26d8ec38 100644 --- a/storage.c +++ b/storage.c @@ -103,6 +103,7 @@ PG_FUNCTION_INFO_V1(aqo_reset); PG_FUNCTION_INFO_V1(aqo_cleanup); PG_FUNCTION_INFO_V1(aqo_drop_class); PG_FUNCTION_INFO_V1(aqo_cardinality_error); +PG_FUNCTION_INFO_V1(aqo_execution_time); bool @@ -2282,7 +2283,7 @@ aqo_cardinality_error(PG_FUNCTION_ARGS) nexecs = controlled ? sentry->execs_with_aqo : sentry->execs_without_aqo; ce = controlled ? sentry->est_error_aqo : sentry->est_error; - values[AQE_NN] = Int32GetDatum(counter++); + values[AQE_NN] = Int32GetDatum(++counter); values[AQE_QUERYID] = Int64GetDatum(qentry->queryid); values[AQE_FS] = Int64GetDatum(qentry->fs); values[AQE_NEXECS] = Int64GetDatum(nexecs); @@ -2296,3 +2297,107 @@ aqo_cardinality_error(PG_FUNCTION_ARGS) tuplestore_donestoring(tupstore); return (Datum) 0; } + +typedef enum { + ET_NN = 0, ET_QUERYID, ET_FS, ET_EXECTIME, ET_NEXECS, ET_TOTAL_NCOLS +} et_output_order; + +/* + * XXX: maybe to merge with aqo_cardinality_error ? + * XXX: Do we really want sequental number ? + */ +Datum +aqo_execution_time(PG_FUNCTION_ARGS) +{ + bool controlled = PG_GETARG_BOOL(0); + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupDesc; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Tuplestorestate *tupstore; + Datum values[AQE_TOTAL_NCOLS]; + bool nulls[AQE_TOTAL_NCOLS]; + HASH_SEQ_STATUS hash_seq; + QueriesEntry *qentry; + StatEntry *sentry; + int counter = 0; + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* Switch into long-lived context to construct returned data structures */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + Assert(tupDesc->natts == ET_TOTAL_NCOLS); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupDesc; + + MemoryContextSwitchTo(oldcontext); + + LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); + LWLockAcquire(&aqo_state->stat_lock, LW_SHARED); + + hash_seq_init(&hash_seq, queries_htab); + while ((qentry = hash_seq_search(&hash_seq)) != NULL) + { + bool found; + double *et; + int64 nexecs; + int nvals; + double tm = 0; + + memset(nulls, 0, ET_TOTAL_NCOLS * sizeof(nulls[0])); + + sentry = (StatEntry *) hash_search(stat_htab, &qentry->queryid, + HASH_FIND, &found); + if (!found) + /* Statistics not found by some reason. Just go further */ + continue; + + nvals = controlled ? sentry->cur_stat_slot_aqo : sentry->cur_stat_slot; + if (nvals == 0) + /* No one stat slot filled */ + continue; + + nexecs = controlled ? sentry->execs_with_aqo : sentry->execs_without_aqo; + et = controlled ? sentry->exec_time_aqo : sentry->exec_time; + + if (!controlled) + { + int i; + /* Calculate average execution time */ + for (i = 0; i < nvals; i++) + tm += et[i]; + tm /= nvals; + } + else + tm = et[nvals - 1]; + + values[ET_NN] = Int32GetDatum(++counter); + values[ET_QUERYID] = Int64GetDatum(qentry->queryid); + values[ET_FS] = Int64GetDatum(qentry->fs); + values[ET_NEXECS] = Int64GetDatum(nexecs); + values[ET_EXECTIME] = Float8GetDatum(tm); + tuplestore_putvalues(tupstore, tupDesc, values, nulls); + } + + LWLockRelease(&aqo_state->stat_lock); + LWLockRelease(&aqo_state->queries_lock); + + tuplestore_donestoring(tupstore); + return (Datum) 0; +} From 96a7c2d2605580c920ea81b99838cad1ba146bbb Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 21 Jul 2022 14:12:08 +0500 Subject: [PATCH 115/203] Add TAP test on AQO working with pgbench after moving to another schema. --- t/001_pgbench.pl | 57 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 2 deletions(-) diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index 1f096203..eae0c829 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -4,7 +4,7 @@ use Config; use PostgresNode; use TestLib; -use Test::More tests => 22; +use Test::More tests => 27; my $node = get_new_node('aqotest'); $node->init; @@ -298,6 +298,59 @@ is($new_stat_count == $stat_count - $pgb_stat_count, 1, 'Total number of samples in aqo_query_stat'); +# ############################################################################## +# +# AQO works after moving to another schema +# +# ############################################################################## + +# Move the extension to not-in-search-path schema +# use LEARN mode to guarantee that AQO will be triggered on each query. +$node->safe_psql('postgres', "CREATE SCHEMA test; ALTER EXTENSION aqo SET SCHEMA test"); +$node->safe_psql('postgres', "SELECT * FROM test.aqo_reset()"); # Clear data + +$res = $node->safe_psql('postgres', "SELECT count(*) FROM test.aqo_queries"); +is($res, 1, 'The extension data was reset'); + +$node->command_ok([ 'pgbench', '-i', '-s', '1' ], 'init pgbench tables'); +$node->safe_psql('postgres', " + ALTER SYSTEM SET aqo.mode = 'learn'; + ALTER SYSTEM SET log_statement = 'ddl'; + SELECT pg_reload_conf(); +"); +$node->restart(); + +$node->command_ok([ 'pgbench', '-t', "25", '-c', "$CLIENTS", '-j', "$THREADS" ], + 'pgbench should work with moved AQO.'); + +# DEBUG +$res = $node->safe_psql('postgres', " + SELECT executions_with_aqo, query_text + FROM test.aqo_query_stat a, test.aqo_query_texts b + WHERE a.queryid = b.queryid +"); +note("executions:\n$res\n"); + +$res = $node->safe_psql('postgres', + "SELECT sum(executions_with_aqo) FROM test.aqo_query_stat"); + +# 25 trans * 10 clients * 4 query classes = 1000 + unique SELECT to pgbench_branches +is($res, 1001, 'Each query should be logged in LEARN mode'); +$res = $node->safe_psql('postgres', + "SELECT sum(executions_without_aqo) FROM test.aqo_query_stat"); +is($res, 0, 'AQO has learned on the queries - 2'); + +# Try to call UI functions. Break the test on an error +$res = $node->safe_psql('postgres', " + SELECT * FROM test.aqo_cardinality_error(true); + SELECT * FROM test.aqo_execution_time(true); + SELECT * FROM + (SELECT queryid FROM test.aqo_queries WHERE queryid<>0 LIMIT 1) q, + LATERAL test.aqo_drop_class(queryid); + SELECT * FROM test.aqo_cleanup(); +"); +note("OUTPUT:\n$res\n"); + $node->safe_psql('postgres', "DROP EXTENSION aqo"); # ############################################################################## @@ -333,7 +386,7 @@ $node->safe_psql('postgres', " CREATE EXTENSION aqo; ALTER SYSTEM SET aqo.mode = 'intelligent'; - ALTER SYSTEM SET log_statement = 'ddl'; + ALTER SYSTEM SET log_statement = 'none'; SELECT pg_reload_conf(); "); $node->restart(); From 2785297104b8556dfa9f4d8a13b83fe690c12a91 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Fri, 29 Jul 2022 15:05:49 +0500 Subject: [PATCH 116/203] Introduce the feature_subspace regression test. Search in neighbour classes disclosed some issues which we have been ignoring for a long time. But now we should fix them to get a practically usable tool. These problems mostly related to a subspace encoding algorithm. --- Makefile | 3 +- expected/feature_subspace.out | 72 +++++++++++++++++++++++++++++++++++ expected/look_a_like.out | 7 ++++ sql/feature_subspace.sql | 30 +++++++++++++++ sql/look_a_like.sql | 2 + 5 files changed, 113 insertions(+), 1 deletion(-) create mode 100644 expected/feature_subspace.out create mode 100644 sql/feature_subspace.sql diff --git a/Makefile b/Makefile index e87dc52e..1ef23b54 100755 --- a/Makefile +++ b/Makefile @@ -28,7 +28,8 @@ REGRESS = aqo_disabled \ temp_tables \ top_queries \ relocatable\ - look_a_like + look_a_like \ + feature_subspace fdw_srcdir = $(top_srcdir)/contrib/postgres_fdw stat_srcdir = $(top_srcdir)/contrib/pg_stat_statements diff --git a/expected/feature_subspace.out b/expected/feature_subspace.out new file mode 100644 index 00000000..931d4394 --- /dev/null +++ b/expected/feature_subspace.out @@ -0,0 +1,72 @@ +-- This test related to some issues on feature subspace calculation +CREATE EXTENSION aqo; +SET aqo.mode = 'learn'; +SET aqo.join_threshold = 0; +SET aqo.show_details = 'on'; +CREATE TABLE a AS (SELECT gs AS x FROM generate_series(1,10) AS gs); +CREATE TABLE b AS (SELECT gs AS x FROM generate_series(1,100) AS gs); +-- +-- A LEFT JOIN B isn't equal B LEFT JOIN A. +-- +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT * FROM a LEFT JOIN b USING (x); + QUERY PLAN +----------------------------------------------------- + Merge Left Join (actual rows=10 loops=1) + AQO not used + Merge Cond: (a.x = b.x) + -> Sort (actual rows=10 loops=1) + AQO not used + Sort Key: a.x + Sort Method: quicksort Memory: 25kB + -> Seq Scan on a (actual rows=10 loops=1) + AQO not used + -> Sort (actual rows=11 loops=1) + AQO not used + Sort Key: b.x + Sort Method: quicksort Memory: 30kB + -> Seq Scan on b (actual rows=100 loops=1) + AQO not used + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(18 rows) + +-- TODO: Using method of other classes neighbours we get a bad estimation. +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT * FROM b LEFT JOIN a USING (x); + QUERY PLAN +------------------------------------------------------ + Hash Left Join (actual rows=100 loops=1) + AQO: rows=10, error=-900% + Hash Cond: (b.x = a.x) + -> Seq Scan on b (actual rows=100 loops=1) + AQO: rows=100, error=0% + -> Hash (actual rows=10 loops=1) + AQO not used + Buckets: 1024 Batches: 1 Memory Usage: 9kB + -> Seq Scan on a (actual rows=10 loops=1) + AQO: rows=10, error=0% + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(13 rows) + +-- Look into the reason: two JOINs from different classes have the same FSS. +SELECT to_char(d1.targets[1], 'FM999.00') AS target FROM aqo_data d1 +JOIN aqo_data d2 ON (d1.fs <> d2.fs AND d1.fss = d2.fss) +WHERE 'a'::regclass = ANY (d1.oids) AND 'b'::regclass = ANY (d1.oids); + target +-------- + 2.30 + 4.61 +(2 rows) + +DROP TABLE a,b CASCADE; +SELECT true FROM aqo_reset(); + bool +------ + t +(1 row) + +DROP EXTENSION aqo; diff --git a/expected/look_a_like.out b/expected/look_a_like.out index e3fbf4bb..a867f10a 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -230,4 +230,11 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; JOINS: 0 (19 rows) +DROP TABLE a,b CASCADE; +SELECT true FROM aqo_reset(); + bool +------ + t +(1 row) + DROP EXTENSION aqo CASCADE; diff --git a/sql/feature_subspace.sql b/sql/feature_subspace.sql new file mode 100644 index 00000000..93434d14 --- /dev/null +++ b/sql/feature_subspace.sql @@ -0,0 +1,30 @@ +-- This test related to some issues on feature subspace calculation + +CREATE EXTENSION aqo; + +SET aqo.mode = 'learn'; +SET aqo.join_threshold = 0; +SET aqo.show_details = 'on'; + +CREATE TABLE a AS (SELECT gs AS x FROM generate_series(1,10) AS gs); +CREATE TABLE b AS (SELECT gs AS x FROM generate_series(1,100) AS gs); + +-- +-- A LEFT JOIN B isn't equal B LEFT JOIN A. +-- + +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT * FROM a LEFT JOIN b USING (x); + +-- TODO: Using method of other classes neighbours we get a bad estimation. +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT * FROM b LEFT JOIN a USING (x); + +-- Look into the reason: two JOINs from different classes have the same FSS. +SELECT to_char(d1.targets[1], 'FM999.00') AS target FROM aqo_data d1 +JOIN aqo_data d2 ON (d1.fs <> d2.fs AND d1.fss = d2.fss) +WHERE 'a'::regclass = ANY (d1.oids) AND 'b'::regclass = ANY (d1.oids); + +DROP TABLE a,b CASCADE; +SELECT true FROM aqo_reset(); +DROP EXTENSION aqo; diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index cf6b05c5..a179f8f4 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -69,4 +69,6 @@ FROM expln(' SELECT x,y FROM A,B WHERE x < 10 and y > 10 AND A.x = B.y;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +DROP TABLE a,b CASCADE; +SELECT true FROM aqo_reset(); DROP EXTENSION aqo CASCADE; \ No newline at end of file From 3e99fdafefb669c9b33c31dd27da073229d8695b Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Wed, 3 Aug 2022 12:18:16 +0300 Subject: [PATCH 117/203] Change uint on uint32 --- storage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage.c b/storage.c index 26d8ec38..f56171c8 100644 --- a/storage.c +++ b/storage.c @@ -586,7 +586,7 @@ data_store(const char *filename, form_record_t callback, { FILE *file; size_t size; - uint counter = 0; + uint32 counter = 0; void *data; char *tmpfile; From 41cf245d6ad1015fd642148be6668039760ec3ef Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Fri, 5 Aug 2022 10:19:45 +0300 Subject: [PATCH 118/203] Avoid memory leak on deletion from uint64 list --- hash.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/hash.c b/hash.c index 8e12f2ff..8981ad1a 100644 --- a/hash.c +++ b/hash.c @@ -176,6 +176,9 @@ get_grouped_exprs_hash(int child_fss, List *group_exprs) final_hashes[0] = child_fss; final_hashes[1] = get_int_array_hash(hashes, i); + + pfree(hashes); + return get_int_array_hash(final_hashes, 2); } @@ -475,6 +478,7 @@ get_relations_hash(List *relsigns) int nhashes = 0; int64 *hashes = palloc(list_length(relsigns) * sizeof(uint64)); ListCell *lc; + int64 result; foreach(lc, relsigns) { @@ -485,8 +489,12 @@ get_relations_hash(List *relsigns) qsort(hashes, nhashes, sizeof(int64), int64_compare); /* Make a final hash value */ - return DatumGetInt64(hash_any_extended((const unsigned char *) hashes, + + result = DatumGetInt64(hash_any_extended((const unsigned char *) hashes, nhashes * sizeof(int64), 0)); + + pfree(hashes); + return result; } /* From ef76161da42bbdff051d26a1a490a33df58fb369 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Wed, 3 Aug 2022 20:14:22 +0300 Subject: [PATCH 119/203] Fix feature_subspace output test. Delete platform dependent lines containing Memory and add order by command in feature_subspace test for statical result. --- expected/feature_subspace.out | 38 +++++++++++++++++++++++------------ sql/feature_subspace.sql | 27 +++++++++++++++++++------ 2 files changed, 46 insertions(+), 19 deletions(-) diff --git a/expected/feature_subspace.out b/expected/feature_subspace.out index 931d4394..185bede0 100644 --- a/expected/feature_subspace.out +++ b/expected/feature_subspace.out @@ -6,11 +6,24 @@ SET aqo.show_details = 'on'; CREATE TABLE a AS (SELECT gs AS x FROM generate_series(1,10) AS gs); CREATE TABLE b AS (SELECT gs AS x FROM generate_series(1,100) AS gs); -- +-- Returns string-by-string explain of a query. Made for removing some strings +-- from the explain output. +-- +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; +-- -- A LEFT JOIN B isn't equal B LEFT JOIN A. -- -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) -SELECT * FROM a LEFT JOIN b USING (x); - QUERY PLAN +SELECT str AS result +FROM expln(' +SELECT * FROM a LEFT JOIN b USING (x);') AS str +WHERE str NOT LIKE '%Memory%'; + result ----------------------------------------------------- Merge Left Join (actual rows=10 loops=1) AQO not used @@ -18,25 +31,25 @@ SELECT * FROM a LEFT JOIN b USING (x); -> Sort (actual rows=10 loops=1) AQO not used Sort Key: a.x - Sort Method: quicksort Memory: 25kB -> Seq Scan on a (actual rows=10 loops=1) AQO not used -> Sort (actual rows=11 loops=1) AQO not used Sort Key: b.x - Sort Method: quicksort Memory: 30kB -> Seq Scan on b (actual rows=100 loops=1) AQO not used Using aqo: true AQO mode: LEARN JOINS: 0 -(18 rows) +(16 rows) -- TODO: Using method of other classes neighbours we get a bad estimation. -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) -SELECT * FROM b LEFT JOIN a USING (x); - QUERY PLAN ------------------------------------------------------- +SELECT str AS result +FROM expln(' +SELECT * FROM b LEFT JOIN a USING (x);') AS str +WHERE str NOT LIKE '%Memory%'; + result +---------------------------------------------------- Hash Left Join (actual rows=100 loops=1) AQO: rows=10, error=-900% Hash Cond: (b.x = a.x) @@ -44,18 +57,17 @@ SELECT * FROM b LEFT JOIN a USING (x); AQO: rows=100, error=0% -> Hash (actual rows=10 loops=1) AQO not used - Buckets: 1024 Batches: 1 Memory Usage: 9kB -> Seq Scan on a (actual rows=10 loops=1) AQO: rows=10, error=0% Using aqo: true AQO mode: LEARN JOINS: 0 -(13 rows) +(12 rows) -- Look into the reason: two JOINs from different classes have the same FSS. SELECT to_char(d1.targets[1], 'FM999.00') AS target FROM aqo_data d1 JOIN aqo_data d2 ON (d1.fs <> d2.fs AND d1.fss = d2.fss) -WHERE 'a'::regclass = ANY (d1.oids) AND 'b'::regclass = ANY (d1.oids); +WHERE 'a'::regclass = ANY (d1.oids) AND 'b'::regclass = ANY (d1.oids) order by target; target -------- 2.30 diff --git a/sql/feature_subspace.sql b/sql/feature_subspace.sql index 93434d14..0176a700 100644 --- a/sql/feature_subspace.sql +++ b/sql/feature_subspace.sql @@ -10,20 +10,35 @@ CREATE TABLE a AS (SELECT gs AS x FROM generate_series(1,10) AS gs); CREATE TABLE b AS (SELECT gs AS x FROM generate_series(1,100) AS gs); -- --- A LEFT JOIN B isn't equal B LEFT JOIN A. +-- Returns string-by-string explain of a query. Made for removing some strings +-- from the explain output. -- +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) -SELECT * FROM a LEFT JOIN b USING (x); +-- +-- A LEFT JOIN B isn't equal B LEFT JOIN A. +-- +SELECT str AS result +FROM expln(' +SELECT * FROM a LEFT JOIN b USING (x);') AS str +WHERE str NOT LIKE '%Memory%'; -- TODO: Using method of other classes neighbours we get a bad estimation. -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) -SELECT * FROM b LEFT JOIN a USING (x); +SELECT str AS result +FROM expln(' +SELECT * FROM b LEFT JOIN a USING (x);') AS str +WHERE str NOT LIKE '%Memory%'; -- Look into the reason: two JOINs from different classes have the same FSS. SELECT to_char(d1.targets[1], 'FM999.00') AS target FROM aqo_data d1 JOIN aqo_data d2 ON (d1.fs <> d2.fs AND d1.fss = d2.fss) -WHERE 'a'::regclass = ANY (d1.oids) AND 'b'::regclass = ANY (d1.oids); +WHERE 'a'::regclass = ANY (d1.oids) AND 'b'::regclass = ANY (d1.oids) order by target; DROP TABLE a,b CASCADE; SELECT true FROM aqo_reset(); From 34147afb21093814d3f1e2cf6905e08d456e2067 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Tue, 30 Aug 2022 10:01:02 +0300 Subject: [PATCH 120/203] Add hierarchical memory context for saving datas not to use postgres memory contexts except situation when AQO prediction which is passed on to the optimizer. We add three additional memory context for managing memory. AQOMemoryContext is renamed as AQOCacheMemCtx and containe as in the previous time environment data. During predict for plan nodes all of palloc is saved into AQO Predict Memory Context and clean up in the execution stage of query. After executing query we collect some long lived information until it is put into AQO knowledge table. All of them are saved in AQO Learn Memory Context. During these stages we calculates hashes from having got clause, selectivity arrays and relid lists. These tactical information is short-lived, so we save it in the AQO Utility Memory Context. We clean up Utility Memory Context inside calculated function or immediately after her having completed. --- aqo.c | 58 ++++++++++++++++++++++++++++++----- aqo.h | 9 ++++-- cardinality_estimation.c | 3 -- cardinality_hooks.c | 66 +++++++++++++++++++--------------------- hash.c | 38 ++++++++++------------- machine_learning.c | 13 -------- path_utils.c | 3 +- postprocessing.c | 57 +++++++++++----------------------- preprocessing.c | 35 ++++++++++++++++----- selectivity_cache.c | 21 +++++++++++-- storage.c | 6 ---- 11 files changed, 171 insertions(+), 138 deletions(-) diff --git a/aqo.c b/aqo.c index e24b7aac..ce7f90cb 100644 --- a/aqo.c +++ b/aqo.c @@ -79,9 +79,23 @@ double log_selectivity_lower_bound = -30; * Currently we use it only to store query_text string which is initialized * after a query parsing and is used during the query planning. */ -MemoryContext AQOMemoryContext; -MemoryContext AQO_cache_mem_ctx; + QueryContextData query_context; + +MemoryContext AQOTopMemCtx = NULL; + +/* Is released at the end of transaction */ +MemoryContext AQOCacheMemCtx = NULL; + +/* Should be released in-place, just after a huge calculation */ +MemoryContext AQOUtilityMemCtx = NULL; + +/* Is released at the end of planning */ +MemoryContext AQOPredictMemCtx = NULL; + +/* Is released at the end of learning */ +MemoryContext AQOLearnMemCtx = NULL; + /* Additional plan info */ int njoins; @@ -116,7 +130,7 @@ aqo_free_callback(ResourceReleasePhase phase, if (isTopLevel) { - list_free_deep(cur_classes); + MemoryContextReset(AQOCacheMemCtx); cur_classes = NIL; } } @@ -302,12 +316,42 @@ _PG_init(void) create_upper_paths_hook = aqo_store_upper_signature_hook; init_deactivated_queries_storage(); - AQOMemoryContext = AllocSetContextCreate(TopMemoryContext, - "AQOMemoryContext", + + /* + * Create own Top memory Context for reporting AQO memory in the future. + */ + AQOTopMemCtx = AllocSetContextCreate(TopMemoryContext, + "AQOTopMemoryContext", ALLOCSET_DEFAULT_SIZES); - AQO_cache_mem_ctx = AllocSetContextCreate(TopMemoryContext, - "AQO_cache_mem_ctx", + /* + * AQO Cache Memory Context containe environment data. + */ + AQOCacheMemCtx = AllocSetContextCreate(AQOTopMemCtx, + "AQOCacheMemCtx", ALLOCSET_DEFAULT_SIZES); + /* + * AQOUtilityMemoryContext containe short-lived information which + * is appeared from having got clause, selectivity arrays and relid lists + * while calculating hashes. It clean up inside calculated + * function or immediately after her having completed. + */ + AQOUtilityMemCtx = AllocSetContextCreate(AQOTopMemCtx, + "AQOUtilityMemoryContext", + ALLOCSET_DEFAULT_SIZES); + /* + * AQOPredictMemoryContext save necessary information for making predict of plan nodes + * and clean up in the execution stage of query. + */ + AQOPredictMemCtx = AllocSetContextCreate(AQOTopMemCtx, + "AQOPredictMemoryContext", + ALLOCSET_DEFAULT_SIZES); + /* + * AQOLearnMemoryContext save necessary information for writing down to AQO knowledge table + * and clean up after doing this operation. + */ + AQOLearnMemCtx = AllocSetContextCreate(AQOTopMemCtx, + "AQOLearnMemoryContext", + ALLOCSET_DEFAULT_SIZES); RegisterResourceReleaseCallback(aqo_free_callback, NULL); RegisterAQOPlanNodeMethods(); diff --git a/aqo.h b/aqo.h index 135ae24d..092480df 100644 --- a/aqo.h +++ b/aqo.h @@ -221,9 +221,12 @@ extern double log_selectivity_lower_bound; extern QueryContextData query_context; extern int njoins; -/* Memory context for long-live data */ -extern MemoryContext AQOMemoryContext; -extern MemoryContext AQO_cache_mem_ctx; +/* AQO Memory contexts */ +extern MemoryContext AQOTopMemCtx; +extern MemoryContext AQOCacheMemCtx; +extern MemoryContext AQOUtilityMemCtx; +extern MemoryContext AQOPredictMemCtx; +extern MemoryContext AQOLearnMemCtx; /* Saved hook values in case of unload */ extern post_parse_analyze_hook_type prev_post_parse_analyze_hook; diff --git a/cardinality_estimation.c b/cardinality_estimation.c index cb8997f6..4baba286 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -52,7 +52,6 @@ predict_debug_output(List *clauses, List *selectivities, appendStringInfo(&debug_str, "}, result: %lf", result); elog(DEBUG1, "Prediction: %s", debug_str.data); - pfree(debug_str.data); } #endif @@ -104,8 +103,6 @@ predict_for_relation(List *clauses, List *selectivities, List *relsigns, #ifdef AQO_DEBUG_PRINT predict_debug_output(clauses, selectivities, relsigns, *fss, result); #endif - pfree(features); - OkNNr_free(data); if (result < 0) return -1; diff --git a/cardinality_hooks.c b/cardinality_hooks.c index 2c837bd9..a3e8e331 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -144,20 +144,21 @@ aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) List *selectivities = NULL; List *clauses; int fss = 0; + MemoryContext old_ctx_m; if (IsQueryDisabled()) /* Fast path. */ goto default_estimator; + old_ctx_m = MemoryContextSwitchTo(AQOPredictMemCtx); + if (query_context.use_aqo || query_context.learn_aqo) selectivities = get_selectivities(root, rel->baserestrictinfo, 0, JOIN_INNER, NULL); if (!query_context.use_aqo) { - if (query_context.learn_aqo) - list_free_deep(selectivities); - + MemoryContextSwitchTo(old_ctx_m); goto default_estimator; } @@ -174,10 +175,8 @@ aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) &fss); rel->fss_hash = fss; - list_free(rels.hrels); - list_free(rels.signatures); - list_free_deep(selectivities); - list_free(clauses); + /* Return to the caller's memory context. */ + MemoryContextSwitchTo(old_ctx_m); if (predicted >= 0) { @@ -224,14 +223,16 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, int *eclass_hash; int current_hash; int fss = 0; + MemoryContext oldctx; if (IsQueryDisabled()) /* Fast path */ goto default_estimator; + oldctx = MemoryContextSwitchTo(AQOPredictMemCtx); + if (query_context.use_aqo || query_context.learn_aqo) { - MemoryContext old_ctx_m; selectivities = list_concat( get_selectivities(root, param_clauses, rel->relid, @@ -247,8 +248,6 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, rte = planner_rt_fetch(rel->relid, root); get_eclasses(allclauses, &nargs, &args_hash, &eclass_hash); - old_ctx_m = MemoryContextSwitchTo(AQO_cache_mem_ctx); - forboth(l, allclauses, l2, selectivities) { current_hash = get_clause_hash( @@ -257,19 +256,11 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, cache_selectivity(current_hash, rel->relid, rte->relid, *((double *) lfirst(l2))); } - - MemoryContextSwitchTo(old_ctx_m); - pfree(args_hash); - pfree(eclass_hash); } if (!query_context.use_aqo) { - if (query_context.learn_aqo) - { - list_free_deep(selectivities); - list_free(allclauses); - } + MemoryContextSwitchTo(oldctx); goto default_estimator; } @@ -282,8 +273,9 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, } predicted = predict_for_relation(allclauses, selectivities, rels.signatures, &fss); - list_free(rels.hrels); - list_free(rels.signatures); + + /* Return to the caller's memory context */ + MemoryContextSwitchTo(oldctx); predicted_ppi_rows = predicted; fss_ppi_hash = fss; @@ -317,20 +309,20 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, List *outer_selectivities; List *current_selectivities = NULL; int fss = 0; + MemoryContext old_ctx_m; if (IsQueryDisabled()) /* Fast path */ goto default_estimator; + old_ctx_m = MemoryContextSwitchTo(AQOPredictMemCtx); + if (query_context.use_aqo || query_context.learn_aqo) current_selectivities = get_selectivities(root, restrictlist, 0, sjinfo->jointype, sjinfo); - if (!query_context.use_aqo) { - if (query_context.learn_aqo) - list_free_deep(current_selectivities); - + MemoryContextSwitchTo(old_ctx_m); goto default_estimator; } @@ -347,8 +339,9 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, predicted = predict_for_relation(allclauses, selectivities, rels.signatures, &fss); - list_free(rels.hrels); - list_free(rels.signatures); + + /* Return to the caller's memory context */ + MemoryContextSwitchTo(old_ctx_m); rel->fss_hash = fss; @@ -389,20 +382,21 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, List *outer_selectivities; List *current_selectivities = NULL; int fss = 0; + MemoryContext old_ctx_m; if (IsQueryDisabled()) /* Fast path */ goto default_estimator; + old_ctx_m = MemoryContextSwitchTo(AQOPredictMemCtx); + if (query_context.use_aqo || query_context.learn_aqo) current_selectivities = get_selectivities(root, clauses, 0, sjinfo->jointype, sjinfo); if (!query_context.use_aqo) { - if (query_context.learn_aqo) - list_free_deep(current_selectivities); - + MemoryContextSwitchTo(old_ctx_m); goto default_estimator; } @@ -417,8 +411,8 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, predicted = predict_for_relation(allclauses, selectivities, rels.signatures, &fss); - list_free(rels.hrels); - list_free(rels.signatures); + /* Return to the caller's memory context */ + MemoryContextSwitchTo(old_ctx_m); predicted_ppi_rows = predicted; fss_ppi_hash = fss; @@ -453,8 +447,6 @@ predict_num_groups(PlannerInfo *root, Path *subpath, List *group_exprs, clauses = get_path_clauses(subpath, root, &selectivities); (void) predict_for_relation(clauses, selectivities, rels.signatures, &child_fss); - list_free(rels.hrels); - list_free(rels.signatures); } *fss = get_grouped_exprs_hash(child_fss, group_exprs); @@ -475,6 +467,7 @@ aqo_estimate_num_groups_hook(PlannerInfo *root, List *groupExprs, { int fss; double predicted; + MemoryContext old_ctx_m; if (!query_context.use_aqo) goto default_estimator; @@ -489,12 +482,15 @@ aqo_estimate_num_groups_hook(PlannerInfo *root, List *groupExprs, if (groupExprs == NIL) return 1.0; + old_ctx_m = MemoryContextSwitchTo(AQOPredictMemCtx); + predicted = predict_num_groups(root, subpath, groupExprs, &fss); if (predicted > 0.) { grouped_rel->predicted_cardinality = predicted; grouped_rel->rows = predicted; grouped_rel->fss_hash = fss; + MemoryContextSwitchTo(old_ctx_m); return predicted; } else @@ -504,6 +500,8 @@ aqo_estimate_num_groups_hook(PlannerInfo *root, List *groupExprs, */ grouped_rel->predicted_cardinality = -1; + MemoryContextSwitchTo(old_ctx_m); + default_estimator: return default_estimate_num_groups(root, groupExprs, subpath, grouped_rel, pgset); diff --git a/hash.c b/hash.c index 8981ad1a..7a7b9b8e 100644 --- a/hash.c +++ b/hash.c @@ -71,7 +71,6 @@ get_query_hash(Query *parse, const char *query_text) /* XXX: remove_locations and remove_consts are heavy routines. */ str_repr = remove_locations(remove_consts(nodeToString(parse))); hash = DatumGetUInt64(hash_any_extended((void *) str_repr, strlen(str_repr),0)); - pfree(str_repr); return hash; } @@ -145,7 +144,6 @@ ldelete_uint64(List *list, uint64 datum) { if (*((uint64 *)lfirst(cell)) == datum) { - pfree(lfirst(cell)); list = list_delete_ptr(list, lfirst(cell)); return list; } @@ -177,8 +175,6 @@ get_grouped_exprs_hash(int child_fss, List *group_exprs) final_hashes[0] = child_fss; final_hashes[1] = get_int_array_hash(hashes, i); - pfree(hashes); - return get_int_array_hash(final_hashes, 2); } @@ -216,6 +212,7 @@ get_fss_for_object(List *relsigns, List *clauselist, int sh = 0, old_sh; int fss_hash; + MemoryContext old_ctx_m; n = list_length(clauselist); @@ -224,14 +221,15 @@ get_fss_for_object(List *relsigns, List *clauselist, (nfeatures == NULL && features == NULL)); get_eclasses(clauselist, &nargs, &args_hash, &eclass_hash); + if (nfeatures != NULL) + *features = palloc0(sizeof(**features) * n); + + old_ctx_m = MemoryContextSwitchTo(AQOUtilityMemCtx); clause_hashes = palloc(sizeof(*clause_hashes) * n); clause_has_consts = palloc(sizeof(*clause_has_consts) * n); sorted_clauses = palloc(sizeof(*sorted_clauses) * n); - if (nfeatures != NULL) - *features = palloc0(sizeof(**features) * n); - i = 0; foreach(lc, clauselist) { @@ -294,18 +292,14 @@ get_fss_for_object(List *relsigns, List *clauselist, /* * Generate feature subspace hash. */ + clauses_hash = get_int_array_hash(sorted_clauses, n - sh); eclasses_hash = get_int_array_hash(eclass_hash, nargs); relations_hash = (int) get_relations_hash(relsigns); fss_hash = get_fss_hash(clauses_hash, eclasses_hash, relations_hash); - pfree(clause_hashes); - pfree(sorted_clauses); - pfree(idx); - pfree(inverse_idx); - pfree(clause_has_consts); - pfree(args_hash); - pfree(eclass_hash); + MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOUtilityMemCtx); if (nfeatures != NULL) { @@ -493,7 +487,6 @@ get_relations_hash(List *relsigns) result = DatumGetInt64(hash_any_extended((const unsigned char *) hashes, nhashes * sizeof(int64), 0)); - pfree(hashes); return result; } @@ -688,13 +681,19 @@ get_eclasses(List *clauselist, int *nargs, int **args_hash, int **eclass_hash) int i, v; int *e_hashes; + MemoryContext old_ctx_m; get_clauselist_args(clauselist, nargs, args_hash); + *eclass_hash = palloc((*nargs) * sizeof(**eclass_hash)); - p = perform_eclasses_join(clauselist, *nargs, *args_hash); + old_ctx_m = MemoryContextSwitchTo(AQOUtilityMemCtx); + p = perform_eclasses_join(clauselist, *nargs, *args_hash); lsts = palloc((*nargs) * sizeof(*lsts)); e_hashes = palloc((*nargs) * sizeof(*e_hashes)); + + MemoryContextSwitchTo(old_ctx_m); + for (i = 0; i < *nargs; ++i) lsts[i] = NIL; @@ -706,15 +705,10 @@ get_eclasses(List *clauselist, int *nargs, int **args_hash, int **eclass_hash) for (i = 0; i < *nargs; ++i) e_hashes[i] = get_unordered_int_list_hash(lsts[i]); - *eclass_hash = palloc((*nargs) * sizeof(**eclass_hash)); for (i = 0; i < *nargs; ++i) (*eclass_hash)[i] = e_hashes[disjoint_set_get_parent(p, i)]; - for (i = 0; i < *nargs; ++i) - list_free(lsts[i]); - pfree(lsts); - pfree(p); - pfree(e_hashes); + MemoryContextReset(AQOUtilityMemCtx); } /* diff --git a/machine_learning.c b/machine_learning.c index 3077983d..7514bc86 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -59,19 +59,6 @@ OkNNr_allocate(int ncols) return data; } -void -OkNNr_free(OkNNrdata *data) -{ - int i; - - if (data->cols > 0) - { - for (i = 0; i < aqo_K; ++i) - pfree(data->matrix[i]); - } - pfree(data); -} - /* * Computes L2-distance between two given vectors. */ diff --git a/path_utils.c b/path_utils.c index 24f43002..d8dba208 100644 --- a/path_utils.c +++ b/path_utils.c @@ -212,7 +212,6 @@ get_list_of_relids(PlannerInfo *root, Relids relids, RelSortOut *rels) strlen(relname), 0))); hrels = lappend_oid(hrels, entry->relid); - pfree(relname); } ReleaseSysCache(htup); @@ -486,6 +485,8 @@ is_appropriate_path(Path *path) /* * Converts path info into plan node for collecting it after query execution. + * Don't switch here to any AQO-specific memory contexts, because we should + * store AQO prediction in the same context, as the plan. */ void aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) diff --git a/postprocessing.c b/postprocessing.c index db3510ff..011e1aee 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -120,7 +120,6 @@ learn_agg_sample(aqo_obj_stat *ctx, RelSortOut *rels, /* Critical section */ atomic_fss_learn_step(fs, fss, data, NULL, target, rfactor, rels->hrels, ctx->isTimedOut); - OkNNr_free(data); /* End of critical section */ } @@ -160,9 +159,6 @@ learn_sample(aqo_obj_stat *ctx, RelSortOut *rels, atomic_fss_learn_step(fs, fss, data, features, target, rfactor, rels->hrels, ctx->isTimedOut); /* End of critical section */ - - OkNNr_free(data); - pfree(features); } /* @@ -183,12 +179,16 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, double *cur_sel; int cur_hash; int cur_relid; + MemoryContext old_ctx_m; parametrized_sel = was_parametrized && (list_length(relidslist) == 1); if (parametrized_sel) { cur_relid = linitial_int(relidslist); + + old_ctx_m = MemoryContextSwitchTo(AQOUtilityMemCtx); get_eclasses(clauselist, &nargs, &args_hash, &eclass_hash); + MemoryContextSwitchTo(old_ctx_m); } foreach(l, clauselist) @@ -219,10 +219,9 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, } if (parametrized_sel) - { - pfree(args_hash); - pfree(eclass_hash); - } + { + MemoryContextReset(AQOUtilityMemCtx); + } return lst; } @@ -712,6 +711,7 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) StatEntry *stat; instr_time endtime; EphemeralNamedRelation enr = get_ENR(queryDesc->queryEnv, PlanStateInfo); + MemoryContext oldctx = MemoryContextSwitchTo(AQOLearnMemCtx); cardinality_sum_errors = 0.; cardinality_num_objects = 0; @@ -750,9 +750,6 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) * Analyze plan if AQO need to learn or need to collect statistics only. */ learnOnPlanState(queryDesc->planstate, (void *) &ctx); - list_free(ctx.clauselist); - list_free(ctx.relidslist); - list_free(ctx.selectivities); } /* Calculate execution time. */ @@ -778,8 +775,6 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) /* Store all learn data into the AQO service relations. */ if (!query_context.adding_query && query_context.auto_tuning) automatical_query_tuning(query_context.query_hash, stat); - - pfree(stat); } } @@ -787,6 +782,10 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) cur_classes = ldelete_uint64(cur_classes, query_context.query_hash); end: + /* Release all AQO-specific memory, allocated during learning procedure */ + MemoryContextSwitchTo(oldctx); + MemoryContextReset(AQOLearnMemCtx); + if (prev_ExecutorEnd_hook) prev_ExecutorEnd_hook(queryDesc); else @@ -812,21 +811,11 @@ StoreToQueryEnv(QueryDesc *queryDesc) { EphemeralNamedRelation enr; int qcsize = sizeof(QueryContextData); - MemoryContext oldCxt; bool newentry = false; + MemoryContext oldctx = MemoryContextSwitchTo(AQOCacheMemCtx); - /* - * Choose memory context for AQO parameters. Use pre-existed context if - * someone earlier created queryEnv (usually, SPI), or base on the queryDesc - * memory context. - */ - if (queryDesc->queryEnv != NULL) - oldCxt = MemoryContextSwitchTo(GetMemoryChunkContext(queryDesc->queryEnv)); - else - { - oldCxt = MemoryContextSwitchTo(GetMemoryChunkContext(queryDesc)); + if (queryDesc->queryEnv == NULL) queryDesc->queryEnv = create_queryEnv(); - } Assert(queryDesc->queryEnv); enr = get_ENR(queryDesc->queryEnv, AQOPrivateData); @@ -848,7 +837,7 @@ StoreToQueryEnv(QueryDesc *queryDesc) if (newentry) register_ENR(queryDesc->queryEnv, enr); - MemoryContextSwitchTo(oldCxt); + MemoryContextSwitchTo(oldctx); } static bool @@ -870,24 +859,14 @@ static void StorePlanInternals(QueryDesc *queryDesc) { EphemeralNamedRelation enr; - MemoryContext oldCxt; bool newentry = false; + MemoryContext oldctx = MemoryContextSwitchTo(AQOCacheMemCtx); njoins = 0; planstate_tree_walker(queryDesc->planstate, calculateJoinNum, &njoins); - /* - * Choose memory context for AQO parameters. Use pre-existed context if - * someone earlier created queryEnv (usually, SPI), or base on the queryDesc - * memory context. - */ - if (queryDesc->queryEnv != NULL) - oldCxt = MemoryContextSwitchTo(GetMemoryChunkContext(queryDesc->queryEnv)); - else - { - oldCxt = MemoryContextSwitchTo(GetMemoryChunkContext(queryDesc)); + if (queryDesc->queryEnv == NULL) queryDesc->queryEnv = create_queryEnv(); - } Assert(queryDesc->queryEnv); enr = get_ENR(queryDesc->queryEnv, PlanStateInfo); @@ -909,7 +888,7 @@ StorePlanInternals(QueryDesc *queryDesc) if (newentry) register_ENR(queryDesc->queryEnv, enr); - MemoryContextSwitchTo(oldCxt); + MemoryContextSwitchTo(oldctx); } /* diff --git a/preprocessing.c b/preprocessing.c index 748c85e4..ce5a624f 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -127,7 +127,8 @@ aqo_planner(Query *parse, ParamListInfo boundParams) { bool query_is_stored = false; - MemoryContext oldCxt; + MemoryContext oldctx; + oldctx = MemoryContextSwitchTo(AQOPredictMemCtx); /* * We do not work inside an parallel worker now by reason of insert into @@ -145,6 +146,7 @@ aqo_planner(Query *parse, * We should disable AQO for this query to remember this decision along * all execution stages. */ + MemoryContextSwitchTo(oldctx); disable_aqo_for_query(); return call_default_planner(parse, @@ -154,7 +156,15 @@ aqo_planner(Query *parse, } selectivity_cache_clear(); + MemoryContextSwitchTo(oldctx); + + oldctx = MemoryContextSwitchTo(AQOUtilityMemCtx); query_context.query_hash = get_query_hash(parse, query_string); + MemoryContextSwitchTo(oldctx); + + MemoryContextReset(AQOUtilityMemCtx); + + oldctx = MemoryContextSwitchTo(AQOPredictMemCtx); /* By default, they should be equal */ query_context.fspace_hash = query_context.query_hash; @@ -167,6 +177,7 @@ aqo_planner(Query *parse, * feature space, that is processing yet (disallow invalidation * recursion, as an example). */ + MemoryContextSwitchTo(oldctx); disable_aqo_for_query(); return call_default_planner(parse, @@ -174,13 +185,16 @@ aqo_planner(Query *parse, cursorOptions, boundParams); } + MemoryContextSwitchTo(oldctx); elog(DEBUG1, "AQO will be used for query '%s', class "UINT64_FORMAT, query_string ? query_string : "null string", query_context.query_hash); - oldCxt = MemoryContextSwitchTo(AQOMemoryContext); + oldctx = MemoryContextSwitchTo(AQOCacheMemCtx); cur_classes = lappend_uint64(cur_classes, query_context.query_hash); - MemoryContextSwitchTo(oldCxt); + MemoryContextSwitchTo(oldctx); + + oldctx = MemoryContextSwitchTo(AQOPredictMemCtx); if (aqo_mode == AQO_MODE_DISABLED) { @@ -335,11 +349,16 @@ aqo_planner(Query *parse, if (!IsQueryDisabled()) /* It's good place to set timestamp of start of a planning process. */ INSTR_TIME_SET_CURRENT(query_context.start_planning_time); - - return call_default_planner(parse, - query_string, - cursorOptions, - boundParams); + { + PlannedStmt *stmt; + MemoryContextSwitchTo(oldctx); + stmt = call_default_planner(parse, query_string, + cursorOptions, boundParams); + + /* Release the memory, allocated for AQO predictions */ + MemoryContextReset(AQOPredictMemCtx); + return stmt; + } } /* diff --git a/selectivity_cache.c b/selectivity_cache.c index 0b354ba0..cb7a5fd1 100644 --- a/selectivity_cache.c +++ b/selectivity_cache.c @@ -30,6 +30,9 @@ typedef struct List *objects = NIL; +/* Specific memory context for selectivity objects */ +MemoryContext AQOCacheSelectivity = NULL; + /* * Stores the given selectivity for clause_hash, relid and global_relid * of the clause. @@ -42,6 +45,13 @@ cache_selectivity(int clause_hash, { ListCell *l; Entry *cur_element; + MemoryContext old_ctx; + + if (!AQOCacheSelectivity) + AQOCacheSelectivity = AllocSetContextCreate(AQOTopMemCtx, + "AQOCacheSelectivity", + ALLOCSET_DEFAULT_SIZES); + foreach(l, objects) { @@ -53,13 +63,14 @@ cache_selectivity(int clause_hash, return; } } - + old_ctx = MemoryContextSwitchTo(AQOCacheSelectivity); cur_element = palloc(sizeof(*cur_element)); cur_element->clause_hash = clause_hash; cur_element->relid = relid; cur_element->global_relid = global_relid; cur_element->selectivity = selectivity; objects = lappend(objects, cur_element); + MemoryContextSwitchTo(old_ctx); } /* @@ -89,6 +100,12 @@ selectivity_cache_find_global_relid(int clause_hash, int global_relid) void selectivity_cache_clear(void) { - MemoryContextReset(AQO_cache_mem_ctx); + if (!AQOCacheSelectivity) + { + Assert(objects == NIL); + return; + } + + MemoryContextReset(AQOCacheSelectivity); objects = NIL; } diff --git a/storage.c b/storage.c index f56171c8..2c9ca15a 100644 --- a/storage.c +++ b/storage.c @@ -149,7 +149,6 @@ form_matrix(double *matrix, int nrows, int ncols) array = construct_md_array(elems, NULL, 2, dims, lbs, FLOAT8OID, 8, FLOAT8PASSBYVAL, 'd'); - pfree(elems); return array; } @@ -172,7 +171,6 @@ form_vector(double *vector, int nrows) elems[i] = Float8GetDatum(vector[i]); array = construct_md_array(elems, NULL, 1, dims, lbs, FLOAT8OID, 8, FLOAT8PASSBYVAL, 'd'); - pfree(elems); return array; } @@ -606,7 +604,6 @@ data_store(const char *filename, form_record_t callback, if (fwrite(&size, sizeof(size), 1, file) != 1 || fwrite(data, size, 1, file) != 1) goto error; - pfree(data); counter++; } @@ -618,7 +615,6 @@ data_store(const char *filename, form_record_t callback, } (void) durable_rename(tmpfile, filename, LOG); - pfree(tmpfile); elog(LOG, "[AQO] %d records stored in file %s.", counter, filename); return 0; @@ -886,7 +882,6 @@ data_load(const char *filename, deform_record_t callback, void *ctx) if (fread(data, size, 1, file) != 1) goto read_error; res = callback(data, size); - pfree(data); if (!res) { @@ -1634,7 +1629,6 @@ aqo_data(PG_FUNCTION_ARGS) array = construct_array(elems, entry->nrels, OIDOID, sizeof(Oid), true, TYPALIGN_INT); values[AD_OIDS] = PointerGetDatum(array); - pfree(elems); } else nulls[AD_OIDS] = true; From c297fec3439c5bed55942c56222c0a5f2292b87c Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Fri, 2 Sep 2022 11:42:59 +0300 Subject: [PATCH 121/203] Add memory context to cover memory space when applying aqo_timeout handler and applying isQueryUsingSystemRelation_walker and add UtilityMemCtx for allocation space for junk_fss and actual_fss list and reset it after cleaning aqo database process. --- aqo.c | 2 +- postprocessing.c | 2 ++ preprocessing.c | 4 +++- selectivity_cache.c | 2 +- storage.c | 5 +++-- 5 files changed, 10 insertions(+), 5 deletions(-) diff --git a/aqo.c b/aqo.c index ce7f90cb..8552e5e6 100644 --- a/aqo.c +++ b/aqo.c @@ -316,7 +316,7 @@ _PG_init(void) create_upper_paths_hook = aqo_store_upper_signature_hook; init_deactivated_queries_storage(); - + /* * Create own Top memory Context for reporting AQO memory in the future. */ diff --git a/postprocessing.c b/postprocessing.c index 011e1aee..04500b8b 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -615,6 +615,7 @@ static int exec_nested_level = 0; static void aqo_timeout_handler(void) { + MemoryContext oldctx = MemoryContextSwitchTo(AQOLearnMemCtx); aqo_obj_stat ctx = {NIL, NIL, NIL, false, false}; if (!timeoutCtl.queryDesc || !ExtractFromQueryEnv(timeoutCtl.queryDesc)) @@ -627,6 +628,7 @@ aqo_timeout_handler(void) elog(NOTICE, "[AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data."); learnOnPlanState(timeoutCtl.queryDesc->planstate, (void *) &ctx); + MemoryContextSwitchTo(oldctx); } static bool diff --git a/preprocessing.c b/preprocessing.c index ce5a624f..55000e79 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -157,7 +157,7 @@ aqo_planner(Query *parse, selectivity_cache_clear(); MemoryContextSwitchTo(oldctx); - + oldctx = MemoryContextSwitchTo(AQOUtilityMemCtx); query_context.query_hash = get_query_hash(parse, query_string); MemoryContextSwitchTo(oldctx); @@ -458,6 +458,7 @@ jointree_walker(Node *jtnode, void *context) static bool isQueryUsingSystemRelation_walker(Node *node, void *context) { + MemoryContext oldctx = MemoryContextSwitchTo(AQOLearnMemCtx); AQOPreWalkerCtx *ctx = (AQOPreWalkerCtx *) context; if (node == NULL) @@ -499,6 +500,7 @@ isQueryUsingSystemRelation_walker(Node *node, void *context) } jointree_walker((Node *) query->jointree, context); + MemoryContextSwitchTo(oldctx); /* Recursively plunge into subqueries and CTEs */ return query_tree_walker(query, diff --git a/selectivity_cache.c b/selectivity_cache.c index cb7a5fd1..fbaa8829 100644 --- a/selectivity_cache.c +++ b/selectivity_cache.c @@ -51,7 +51,7 @@ cache_selectivity(int clause_hash, AQOCacheSelectivity = AllocSetContextCreate(AQOTopMemCtx, "AQOCacheSelectivity", ALLOCSET_DEFAULT_SIZES); - + foreach(l, objects) { diff --git a/storage.c b/storage.c index 2c9ca15a..391f2784 100644 --- a/storage.c +++ b/storage.c @@ -2026,6 +2026,7 @@ cleanup_aqo_database(bool gentle, int *fs_num, int *fss_num) for(i = 0; i < dentry->nrels; i++) { Oid reloid = ObjectIdGetDatum(*(Oid *)ptr); + MemoryContext oldctx = MemoryContextSwitchTo(AQOUtilityMemCtx); if (!SearchSysCacheExists1(RELOID, reloid)) /* Remember this value */ @@ -2034,6 +2035,7 @@ cleanup_aqo_database(bool gentle, int *fs_num, int *fss_num) else actual_fss = list_append_unique_int(actual_fss, dentry->key.fss); + MemoryContextSwitchTo(oldctx); ptr += sizeof(Oid); } @@ -2083,8 +2085,7 @@ cleanup_aqo_database(bool gentle, int *fs_num, int *fss_num) (*fs_num) += (int) _aqo_queries_remove(entry->queryid); } - list_free(junk_fss); - list_free(actual_fss); + MemoryContextReset(AQOUtilityMemCtx); } /* From eb87e74a327d07a20de6b456792826c389bf595d Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Tue, 6 Sep 2022 10:51:24 +0300 Subject: [PATCH 122/203] Add processing cases with selectivities are equal as -1. Add assert check on NaN values. --- hash.c | 3 ++- machine_learning.c | 3 +++ postprocessing.c | 5 +++++ storage.c | 3 +++ 4 files changed, 13 insertions(+), 1 deletion(-) diff --git a/hash.c b/hash.c index 7a7b9b8e..d4866448 100644 --- a/hash.c +++ b/hash.c @@ -260,6 +260,7 @@ get_fss_for_object(List *relsigns, List *clauselist, if (nfeatures != NULL) { (*features)[inverse_idx[i]] = log(*s); + Assert(!isnan(log(*s))); if ((*features)[inverse_idx[i]] < log_selectivity_lower_bound) (*features)[inverse_idx[i]] = log_selectivity_lower_bound; } @@ -693,7 +694,7 @@ get_eclasses(List *clauselist, int *nargs, int **args_hash, int **eclass_hash) e_hashes = palloc((*nargs) * sizeof(*e_hashes)); MemoryContextSwitchTo(old_ctx_m); - + for (i = 0; i < *nargs; ++i) lsts[i] = NIL; diff --git a/machine_learning.c b/machine_learning.c index 7514bc86..7138db38 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -69,7 +69,10 @@ fs_distance(double *a, double *b, int len) int i; for (i = 0; i < len; ++i) + { + Assert(!isnan(a[i])); res += (a[i] - b[i]) * (a[i] - b[i]); + } if (len != 0) res = sqrt(res / len); return res; diff --git a/postprocessing.c b/postprocessing.c index 04500b8b..d0e11e67 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -214,6 +214,11 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, else cur_sel = &rinfo->outer_selec; + if (*cur_sel < 0) + *cur_sel = 0; + + Assert(cur_sel > 0); + lst = lappend(lst, cur_sel); i++; } diff --git a/storage.c b/storage.c index 391f2784..a93c4281 100644 --- a/storage.c +++ b/storage.c @@ -145,7 +145,10 @@ form_matrix(double *matrix, int nrows, int ncols) elems = palloc(sizeof(*elems) * nrows * ncols); for (i = 0; i < nrows; ++i) for (j = 0; j < ncols; ++j) + { elems[i * ncols + j] = Float8GetDatum(matrix[i * ncols + j]); + Assert(!isnan(matrix[i * ncols + j])); + } array = construct_md_array(elems, NULL, 2, dims, lbs, FLOAT8OID, 8, FLOAT8PASSBYVAL, 'd'); From d2a98e4bab98ff60bebfffb9ffe3db56f4709943 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 7 Sep 2022 11:03:25 +0500 Subject: [PATCH 123/203] Some mistakes, revealed by regression tests, passed at Raspberry PI4: 1. Fix uint64 format in some output messages. 2. Input parameters conversion mistake in aqo_queries_update. 3. Unneeded routine 'get_aqo_schema'. 4. Fix type of the first parameter (counter) in aqo_cardinality_error() and aqo_execution_time() routines. 5. Fix aqo_data() routine. --- aqo--1.4--1.5.sql | 6 ++--- aqo.c | 46 -------------------------------------- aqo.h | 1 - expected/gucs.out | 14 ++++++------ learn_cache.c | 19 +++++++++++----- path_utils.c | 3 ++- postprocessing.c | 7 ++++-- storage.c | 56 ++++++++++++++++++++++++++++++++--------------- 8 files changed, 69 insertions(+), 83 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index 23102d6e..3244a721 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -62,7 +62,7 @@ CREATE FUNCTION aqo_data ( OUT features double precision[][], OUT targets double precision[], OUT reliability double precision[], - OUT oids integer[] + OUT oids Oid[] ) RETURNS SETOF record AS 'MODULE_PATHNAME', 'aqo_data' @@ -106,7 +106,7 @@ LANGUAGE C VOLATILE; -- nexecs - number of executions of queries associated with this ID. -- CREATE OR REPLACE FUNCTION aqo_cardinality_error(controlled boolean) -RETURNS TABLE(num bigint, id bigint, fshash bigint, error float, nexecs bigint) +RETURNS TABLE(num integer, id bigint, fshash bigint, error double precision, nexecs bigint) AS 'MODULE_PATHNAME', 'aqo_cardinality_error' LANGUAGE C STRICT VOLATILE; COMMENT ON FUNCTION aqo_cardinality_error(boolean) IS @@ -119,7 +119,7 @@ COMMENT ON FUNCTION aqo_cardinality_error(boolean) IS -- Last case is possible in disabled mode with aqo.force_collect_stat = 'on'. -- CREATE OR REPLACE FUNCTION aqo_execution_time(controlled boolean) -RETURNS TABLE(num bigint, id bigint, fshash bigint, exec_time float, nexecs bigint) +RETURNS TABLE(num integer, id bigint, fshash bigint, exec_time double precision, nexecs bigint) AS 'MODULE_PATHNAME', 'aqo_execution_time' LANGUAGE C STRICT VOLATILE; COMMENT ON FUNCTION aqo_execution_time(boolean) IS diff --git a/aqo.c b/aqo.c index 8552e5e6..0612663d 100644 --- a/aqo.c +++ b/aqo.c @@ -359,52 +359,6 @@ _PG_init(void) RequestAddinShmemSpace(aqo_memsize()); } -/* - * Return AQO schema's Oid or InvalidOid if that's not possible. - */ -Oid -get_aqo_schema(void) -{ - Oid result; - Relation rel; - SysScanDesc scandesc; - HeapTuple tuple; - ScanKeyData entry[1]; - Oid ext_oid; - - /* It's impossible to fetch pg_aqo's schema now */ - if (!IsTransactionState()) - return InvalidOid; - - ext_oid = get_extension_oid("aqo", true); - if (ext_oid == InvalidOid) - return InvalidOid; /* exit if pg_aqo does not exist */ - - ScanKeyInit(&entry[0], -#if PG_VERSION_NUM >= 120000 - Anum_pg_extension_oid, -#else - ObjectIdAttributeNumber, -#endif - BTEqualStrategyNumber, F_OIDEQ, - ObjectIdGetDatum(ext_oid)); - - rel = relation_open(ExtensionRelationId, AccessShareLock); - scandesc = systable_beginscan(rel, ExtensionOidIndexId, true, - NULL, 1, entry); - tuple = systable_getnext(scandesc); - - /* We assume that there can be at most one matching tuple */ - if (HeapTupleIsValid(tuple)) - result = ((Form_pg_extension) GETSTRUCT(tuple))->extnamespace; - else - result = InvalidOid; - - systable_endscan(scandesc); - relation_close(rel, AccessShareLock); - return result; -} - /* * AQO is really needed for any activity? */ diff --git a/aqo.h b/aqo.h index 092480df..64092b94 100644 --- a/aqo.h +++ b/aqo.h @@ -297,7 +297,6 @@ extern double *selectivity_cache_find_global_relid(int clause_hash, int global_relid); extern void selectivity_cache_clear(void); -extern Oid get_aqo_schema(void); extern bool IsQueryDisabled(void); extern List *cur_classes; diff --git a/expected/gucs.out b/expected/gucs.out index 1255a82a..bbfd8001 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -69,17 +69,17 @@ SELECT obj_description('aqo_reset'::regproc::oid); (1 row) \df aqo_cardinality_error - List of functions - Schema | Name | Result data type | Argument data types | Type ---------+-----------------------+------------------------------------------------------------------------------------+---------------------+------ - public | aqo_cardinality_error | TABLE(num bigint, id bigint, fshash bigint, error double precision, nexecs bigint) | controlled boolean | func + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+-----------------------+-------------------------------------------------------------------------------------+---------------------+------ + public | aqo_cardinality_error | TABLE(num integer, id bigint, fshash bigint, error double precision, nexecs bigint) | controlled boolean | func (1 row) \df aqo_execution_time List of functions - Schema | Name | Result data type | Argument data types | Type ---------+--------------------+----------------------------------------------------------------------------------------+---------------------+------ - public | aqo_execution_time | TABLE(num bigint, id bigint, fshash bigint, exec_time double precision, nexecs bigint) | controlled boolean | func + Schema | Name | Result data type | Argument data types | Type +--------+--------------------+-----------------------------------------------------------------------------------------+---------------------+------ + public | aqo_execution_time | TABLE(num integer, id bigint, fshash bigint, exec_time double precision, nexecs bigint) | controlled boolean | func (1 row) \df aqo_drop_class diff --git a/learn_cache.c b/learn_cache.c index e0951fbe..74b72249 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -112,11 +112,19 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids) ptr = (char *) hdr + sizeof(dsm_block_hdr); /* start point of variable data */ /* copy the matrix into DSM storage */ - for (i = 0; i < aqo_K; ++i) + + if (hdr->cols > 0) { - if (i < hdr->rows) + for (i = 0; i < aqo_K; ++i) + { + if (i >= hdr->rows) + break; + + if (!ptr || !data->matrix[i]) + elog(PANIC, "Something disruptive have happened! %d, %d (%d %d)", i, hdr->rows, found, hdr->cols); memcpy(ptr, data->matrix[i], sizeof(double) * hdr->cols); - ptr += sizeof(double) * data->cols; + ptr += sizeof(double) * data->cols; + } } /* copy targets into DSM storage */ @@ -177,7 +185,7 @@ lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids) Assert(fss_htab && aqo_learn_statement_timeout); if (aqo_show_details) - elog(NOTICE, "[AQO] Load ML data for fs %lu, fss %d from the cache", + elog(NOTICE, "[AQO] Load ML data for fs "UINT64_FORMAT", fss %d from the cache", fs, fss); LWLockAcquire(&aqo_state->lock, LW_SHARED); @@ -213,6 +221,7 @@ init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **reloids) Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE) || LWLockHeldByMeInMode(&aqo_state->lock, LW_SHARED)); Assert(hdr->magic == AQO_SHARED_MAGIC); + Assert(hdr && ptr); data->rows = hdr->rows; data->cols = hdr->cols; @@ -264,7 +273,7 @@ lc_flush_data(void) ptr = get_dsm_all(&size); /* Iterate through records and store them into the aqo_data table */ - while(size > 0) + while (size > 0) { dsm_block_hdr *hdr = (dsm_block_hdr *) ptr; OkNNrdata data; diff --git a/path_utils.c b/path_utils.c index d8dba208..57cced69 100644 --- a/path_utils.c +++ b/path_utils.c @@ -53,7 +53,7 @@ create_aqo_plan_node() { AQOPlanNode *node = (AQOPlanNode *) newNode(sizeof(AQOPlanNode), T_ExtensibleNode); - + Assert(node != NULL); memcpy(node, &DefaultAQOPlanNode, sizeof(AQOPlanNode)); node->rels = palloc(sizeof(RelSortOut)); node->rels->hrels = NIL; @@ -570,6 +570,7 @@ AQOnodeCopy(struct ExtensibleNode *enew, const struct ExtensibleNode *eold) Assert(IsA(old, ExtensibleNode)); Assert(strcmp(old->node.extnodename, AQO_PLAN_NODE) == 0); + Assert(new && old); /* Copy static fields in one command */ memcpy(new, old, sizeof(AQOPlanNode)); diff --git a/postprocessing.c b/postprocessing.c index d0e11e67..9436f518 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -320,7 +320,7 @@ should_learn(PlanState *ps, AQOPlanNode *node, aqo_obj_stat *ctx, /* This node s*/ if (aqo_show_details) elog(NOTICE, - "[AQO] Learn on a plan node (%lu, %d), " + "[AQO] Learn on a plan node ("UINT64_FORMAT", %d), " "predicted rows: %.0lf, updated prediction: %.0lf", query_context.query_hash, node->fss, predicted, *nrows); @@ -336,7 +336,7 @@ should_learn(PlanState *ps, AQOPlanNode *node, aqo_obj_stat *ctx, if (ctx->learn && aqo_show_details && fabs(*nrows - predicted) / predicted > 0.2) elog(NOTICE, - "[AQO] Learn on a finished plan node (%lu, %d), " + "[AQO] Learn on a finished plan node ("UINT64_FORMAT", %d), " "predicted rows: %.0lf, updated prediction: %.0lf", query_context.query_hash, node->fss, predicted, *nrows); @@ -839,6 +839,7 @@ StoreToQueryEnv(QueryDesc *queryDesc) enr->md.reliddesc = InvalidOid; enr->md.tupdesc = NULL; enr->reldata = palloc0(qcsize); + Assert(enr->reldata != NULL); memcpy(enr->reldata, &query_context, qcsize); if (newentry) @@ -890,6 +891,7 @@ StorePlanInternals(QueryDesc *queryDesc) enr->md.reliddesc = InvalidOid; enr->md.tupdesc = NULL; enr->reldata = palloc0(sizeof(int)); + Assert(enr->reldata != NULL); memcpy(enr->reldata, &njoins, sizeof(int)); if (newentry) @@ -919,6 +921,7 @@ ExtractFromQueryEnv(QueryDesc *queryDesc) if (enr == NULL) return false; + Assert(enr->reldata != NULL); memcpy(&query_context, enr->reldata, sizeof(QueryContextData)); return true; diff --git a/storage.c b/storage.c index a93c4281..8c7467a3 100644 --- a/storage.c +++ b/storage.c @@ -448,9 +448,10 @@ _form_qtext_record_cb(void *ctx, size_t *size) Assert(DsaPointerIsValid(entry->qtext_dp)); query_string = dsa_get_address(qtext_dsa, entry->qtext_dp); + Assert(query_string != NULL); *size = sizeof(entry->queryid) + strlen(query_string) + 1; - data = palloc(*size); - ptr = data; + ptr = data = palloc(*size); + Assert(ptr != NULL); memcpy(ptr, &entry->queryid, sizeof(entry->queryid)); ptr += sizeof(entry->queryid); memcpy(ptr, query_string, strlen(query_string) + 1); @@ -645,7 +646,7 @@ _deform_stat_record_cb(void *data, size_t size) queryid = ((StatEntry *) data)->queryid; entry = (StatEntry *) hash_search(stat_htab, &queryid, HASH_ENTER, &found); - Assert(!found); + Assert(!found && entry); memcpy(entry, data, sizeof(StatEntry)); return true; } @@ -755,7 +756,9 @@ _deform_data_record_cb(void *data, size_t size) char *ptr = (char *) data, *dsa_ptr; + Assert(ptr != NULL); Assert(LWLockHeldByMeInMode(&aqo_state->data_lock, LW_EXCLUSIVE)); + entry = (DataEntry *) hash_search(data_htab, &fentry->key, HASH_ENTER, &found); Assert(!found); @@ -779,6 +782,7 @@ _deform_data_record_cb(void *data, size_t size) } dsa_ptr = (char *) dsa_get_address(data_dsa, entry->data_dp); + Assert(dsa_ptr != NULL); memcpy(dsa_ptr, ptr, sz); return true; } @@ -1317,7 +1321,8 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) if (entry->cols != data->cols || entry->nrels != list_length(reloids)) { /* Collision happened? */ - elog(LOG, "[AQO] Does a collision happened? Check it if possible (fs: %lu, fss: %d).", + elog(LOG, "[AQO] Does a collision happened? Check it if possible (fs: " + UINT64_FORMAT", fss: %d).", fs, fss); goto end; } @@ -1342,6 +1347,7 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) } } ptr = (char *) dsa_get_address(data_dsa, entry->data_dp); + Assert(ptr != NULL); /* * Copy AQO data into allocated DSA segment @@ -1353,6 +1359,7 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) { for (i = 0; i < entry->rows; i++) { + Assert(data->matrix[i]); memcpy(ptr, data->matrix[i], sizeof(double) * data->cols); ptr += sizeof(double) * data->cols; } @@ -1382,6 +1389,7 @@ static void build_knn_matrix(OkNNrdata *data, const OkNNrdata *temp_data) { Assert(data->cols == temp_data->cols); + Assert(data->matrix); if (data->rows > 0) /* trivial strategy - use first suitable record and ignore others */ @@ -1393,7 +1401,10 @@ build_knn_matrix(OkNNrdata *data, const OkNNrdata *temp_data) int i; for (i = 0; i < data->rows; i++) + { + Assert(data->matrix[i]); memcpy(data->matrix[i], temp_data->matrix[i], data->cols * sizeof(double)); + } } } @@ -1415,6 +1426,7 @@ _fill_knn_data(const DataEntry *entry, List **reloids) Assert(entry->rows <= aqo_K); Assert(ptr != NULL); Assert(entry->key.fss == ((data_key *)ptr)->fss); + Assert(data->matrix); ptr += sizeof(data_key); @@ -1422,6 +1434,7 @@ _fill_knn_data(const DataEntry *entry, List **reloids) { for (i = 0; i < entry->rows; i++) { + Assert(data->matrix[i]); memcpy(data->matrix[i], ptr, sizeof(double) * data->cols); ptr += sizeof(double) * data->cols; } @@ -1488,7 +1501,8 @@ load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, if (entry->cols != data->cols) { /* Collision happened? */ - elog(LOG, "[AQO] Does a collision happened? Check it if possible (fs: %lu, fss: %d).", + elog(LOG, "[AQO] Does a collision happened? Check it if possible " + "(fs: "UINT64_FORMAT", fss: %d).", fs, fss); found = false; goto end; @@ -1596,7 +1610,7 @@ aqo_data(PG_FUNCTION_ARGS) memset(nulls, 0, AD_TOTAL_NCOLS); values[AD_FS] = Int64GetDatum(entry->key.fs); - values[AD_FSS] = Int64GetDatum(entry->key.fss); + values[AD_FSS] = Int32GetDatum((int) entry->key.fss); values[AD_NFEATURES] = Int32GetDatum(entry->cols); /* Fill values from the DSA data chunk */ @@ -1861,7 +1875,8 @@ aqo_enable_query(PG_FUNCTION_ARGS) entry->auto_tuning = true; } else - elog(ERROR, "[AQO] Entry with queryid %ld not contained in table", queryid); + elog(ERROR, "[AQO] Entry with queryid "INT64_FORMAT + " not contained in table", (int64) queryid); hash_search(deactivated_queries, &queryid, HASH_REMOVE, NULL); LWLockRelease(&aqo_state->queries_lock); @@ -1888,7 +1903,8 @@ aqo_disable_query(PG_FUNCTION_ARGS) } else { - elog(ERROR, "[AQO] Entry with %ld not contained in table", queryid); + elog(ERROR, "[AQO] Entry with "INT64_FORMAT" not contained in table", + (int64) queryid); } LWLockRelease(&aqo_state->queries_lock); PG_RETURN_VOID(); @@ -1939,11 +1955,11 @@ aqo_queries_update(PG_FUNCTION_ARGS) if (!PG_ARGISNULL(AQ_FS)) entry->fs = PG_GETARG_INT64(AQ_FS); if (!PG_ARGISNULL(AQ_LEARN_AQO)) - entry->learn_aqo = PG_GETARG_INT64(AQ_LEARN_AQO); + entry->learn_aqo = PG_GETARG_BOOL(AQ_LEARN_AQO); if (!PG_ARGISNULL(AQ_USE_AQO)) - entry->use_aqo = PG_GETARG_INT64(AQ_USE_AQO); + entry->use_aqo = PG_GETARG_BOOL(AQ_USE_AQO); if (!PG_ARGISNULL(AQ_AUTO_TUNING)) - entry->auto_tuning = PG_GETARG_INT64(AQ_AUTO_TUNING); + entry->auto_tuning = PG_GETARG_BOOL(AQ_AUTO_TUNING); /* Remove the class from cache of deactivated queries */ hash_search(deactivated_queries, &queryid, HASH_REMOVE, NULL); @@ -2051,8 +2067,9 @@ cleanup_aqo_database(bool gentle, int *fs_num, int *fss_num) */ ereport(PANIC, (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("AQO detected incorrect behaviour: fs=%lu fss=%ld", - dentry->key.fs, dentry->key.fss))); + errmsg("AQO detected incorrect behaviour: fs=" + UINT64_FORMAT" fss=%d", + dentry->key.fs, (int32) dentry->key.fss))); } LWLockRelease(&aqo_state->data_lock); @@ -2170,24 +2187,27 @@ aqo_drop_class(PG_FUNCTION_ARGS) long cnt; if (queryid == 0) - elog(ERROR, "[AQO] Cannot remove basic class %lu.", queryid); + elog(ERROR, "[AQO] Cannot remove basic class "INT64_FORMAT".", + (int64) queryid); /* Extract FS value for the queryid */ LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_FIND, &found); if (!found) - elog(ERROR, "[AQO] Nothing to remove for the class %lu.", queryid); + elog(ERROR, "[AQO] Nothing to remove for the class "INT64_FORMAT".", + (int64) queryid); fs = entry->fs; LWLockRelease(&aqo_state->queries_lock); if (fs == 0) - elog(ERROR, "[AQO] Cannot remove class %lu with default FS.", queryid); + elog(ERROR, "[AQO] Cannot remove class "INT64_FORMAT" with default FS.", + (int64) queryid); if (fs != queryid) elog(WARNING, - "[AQO] Removing query class has non-generic feature space value: id = %lu, fs = %lu.", - queryid, fs); + "[AQO] Removing query class has non-generic feature space value: " + "id = "INT64_FORMAT", fs = "UINT64_FORMAT".", (int64) queryid, fs); /* Now, remove all data related to the class */ _aqo_queries_remove(queryid); From 253c2aafc29ac42d0786400ee4e46a0b566bd0ae Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Fri, 9 Sep 2022 08:06:38 +0500 Subject: [PATCH 124/203] Add error messages instead of (and in addition to) assertions to handle errors on production instance in more predictable way. Also, make minor additions in storage reset functions: clean a disk storage after cleaning the memory storage. --- storage.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/storage.c b/storage.c index 8c7467a3..1c6ceb57 100644 --- a/storage.c +++ b/storage.c @@ -391,8 +391,11 @@ aqo_stat_reset(void) elog(ERROR, "[AQO] hash table corrupted"); num_remove++; } + aqo_state->stat_changed = true; LWLockRelease(&aqo_state->stat_lock); - Assert(num_remove == num_entries); /* Is it really impossible? */ + + if (num_remove != num_entries) + elog(ERROR, "[AQO] Stat memory storage is corrupted or parallel access without a lock was detected."); aqo_stat_flush(); @@ -1231,9 +1234,10 @@ aqo_qtexts_reset(void) } aqo_state->qtexts_changed = true; LWLockRelease(&aqo_state->qtexts_lock); - Assert(num_remove == num_entries - 1); /* Is it really impossible? */ + if (num_remove != num_entries - 1) + elog(ERROR, "[AQO] Query texts memory storage is corrupted or parallel access without a lock was detected."); - /* TODO: clean disk storage */ + aqo_qtexts_flush(); return num_remove; } @@ -1439,6 +1443,7 @@ _fill_knn_data(const DataEntry *entry, List **reloids) ptr += sizeof(double) * data->cols; } } + /* copy targets from DSM storage */ memcpy(data->targets, ptr, sizeof(double) * entry->rows); ptr += sizeof(double) * entry->rows; @@ -1461,7 +1466,11 @@ _fill_knn_data(const DataEntry *entry, List **reloids) *reloids = lappend_oid(*reloids, ObjectIdGetDatum(*(Oid*)ptr)); ptr += sizeof(Oid); } - Assert(ptr - (char *) dsa_get_address(data_dsa, entry->data_dp) == sz); + + offset = ptr - (char *) dsa_get_address(data_dsa, entry->data_dp); + if (offset != sz) + elog(PANIC, "[AQO] Shared memory ML storage is corrupted."); + return data; } @@ -1710,9 +1719,10 @@ aqo_data_reset(void) } aqo_state->data_changed = true; LWLockRelease(&aqo_state->data_lock); - Assert(num_remove == num_entries); + if (num_remove != num_entries) + elog(ERROR, "[AQO] Query ML memory storage is corrupted or parallel access without a lock has detected."); - /* TODO: clean disk storage */ + aqo_data_flush(); return num_remove; } @@ -1844,8 +1854,11 @@ aqo_queries_reset(void) elog(ERROR, "[AQO] hash table corrupted"); num_remove++; } + aqo_state->queries_changed = true; LWLockRelease(&aqo_state->queries_lock); - Assert(num_remove == num_entries - 1); + + if (num_remove != num_entries - 1) + elog(ERROR, "[AQO] Queries memory storage is corrupted or parallel access without a lock has detected."); aqo_queries_flush(); From 0576bb084b6f32bc8c428fd9d24e42d9fedadac0 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Fri, 9 Sep 2022 10:23:24 +0500 Subject: [PATCH 125/203] Disable materializing strategy of the planner in look-a-like tests. It is just because of difference in behaviour of different versions of PGPro executor. In some versions it can disable unnecessary repeatable scans of a materialize node. XXX: Could we solve a problem by improvement of AQO logic? --- expected/look_a_like.out | 80 ++++++++++++++++++++-------------------- sql/look_a_like.sql | 28 +++++++++----- 2 files changed, 58 insertions(+), 50 deletions(-) diff --git a/expected/look_a_like.out b/expected/look_a_like.out index a867f10a..9cba2c48 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -2,6 +2,7 @@ CREATE EXTENSION aqo; SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; +SET enable_material = 'off'; DROP TABLE IF EXISTS a,b CASCADE; NOTICE: table "a" does not exist, skipping NOTICE: table "b" does not exist, skipping @@ -38,65 +39,60 @@ WHERE str NOT LIKE 'Query Identifier%'; JOINS: 0 (8 rows) --- cardinality 100 in the first Seq Scan on a SELECT str AS result FROM expln(' SELECT x FROM A,B WHERE x = 5 AND A.x = B.y;') AS str -WHERE str NOT LIKE 'Query Identifier%'; - result ------------------------------------------------------------- +WHERE str NOT LIKE 'Query Identifier%' +; -- Find cardinality for SCAN A(x=5) from a neighbour class, created by the + result +-------------------------------------------------------- Nested Loop (actual rows=10000 loops=1) AQO not used Output: a.x - -> Seq Scan on public.a (actual rows=100 loops=1) + -> Seq Scan on public.b (actual rows=100 loops=1) + AQO not used + Output: b.y + Filter: (b.y = 5) + Rows Removed by Filter: 900 + -> Seq Scan on public.a (actual rows=100 loops=100) AQO: rows=100, error=0% Output: a.x Filter: (a.x = 5) Rows Removed by Filter: 900 - -> Materialize (actual rows=100 loops=100) - AQO not used - Output: b.y - -> Seq Scan on public.b (actual rows=100 loops=1) - AQO not used - Output: b.y - Filter: (b.y = 5) - Rows Removed by Filter: 900 Using aqo: true AQO mode: LEARN JOINS: 0 -(19 rows) +(16 rows) --- cardinality 100 in Nesteed Loop in the first Seq Scan on a +-- query, executed above. SELECT str AS result FROM expln(' SELECT x, sum(x) FROM A,B WHERE y = 5 AND A.x = B.y group by(x);') AS str -WHERE str NOT LIKE 'Query Identifier%'; - result ------------------------------------------------------------------- +WHERE str NOT LIKE 'Query Identifier%' +; -- Find the JOIN cardinality from a neighbour class. + result +-------------------------------------------------------------- GroupAggregate (actual rows=1 loops=1) AQO not used Output: a.x, sum(a.x) Group Key: a.x -> Nested Loop (actual rows=10000 loops=1) - AQO not used + AQO: rows=10000, error=0% Output: a.x -> Seq Scan on public.a (actual rows=100 loops=1) AQO: rows=100, error=0% Output: a.x Filter: (a.x = 5) Rows Removed by Filter: 900 - -> Materialize (actual rows=100 loops=100) + -> Seq Scan on public.b (actual rows=100 loops=100) AQO: rows=100, error=0% Output: b.y - -> Seq Scan on public.b (actual rows=100 loops=1) - AQO: rows=100, error=0% - Output: b.y - Filter: (b.y = 5) - Rows Removed by Filter: 900 + Filter: (b.y = 5) + Rows Removed by Filter: 900 Using aqo: true AQO mode: LEARN JOINS: 1 -(23 rows) +(20 rows) -- cardinality 100 in the first Seq Scan on a SELECT str AS result @@ -176,8 +172,8 @@ SELECT str AS result FROM expln(' SELECT x FROM A,B where x < 10 and y > 10 group by(x);') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ----------------------------------------------------------------- + result +---------------------------------------------------------- HashAggregate (actual rows=0 loops=1) AQO not used Output: a.x @@ -185,28 +181,29 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; -> Nested Loop (actual rows=0 loops=1) AQO not used Output: a.x - -> Seq Scan on public.a (actual rows=1000 loops=1) - AQO: rows=1000, error=0% + -> Seq Scan on public.b (actual rows=0 loops=1) + AQO not used + Output: b.y + Filter: (b.y > 10) + Rows Removed by Filter: 1000 + -> Seq Scan on public.a (never executed) + AQO: rows=1000 Output: a.x Filter: (a.x < 10) - -> Materialize (actual rows=0 loops=1000) - AQO not used - -> Seq Scan on public.b (actual rows=0 loops=1) - AQO not used - Filter: (b.y > 10) - Rows Removed by Filter: 1000 Using aqo: true AQO mode: LEARN JOINS: 1 -(20 rows) +(19 rows) --- cardinality 1000 Hash Cond: (a.x = b.y) and 1 Seq Scan on b --- this cardinality is wrong because we take it from bad neibours (previous query). --- clause y > 10 give count of rows with the same clauses. +-- +-- TODO: +-- Not executed case. What could we do better here? +-- SELECT str AS result FROM expln(' SELECT x,y FROM A,B WHERE x < 10 and y > 10 AND A.x = B.y;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' +; result ---------------------------------------------------------- Hash Join (actual rows=0 loops=1) @@ -230,6 +227,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; JOINS: 0 (19 rows) +RESET enable_material; DROP TABLE a,b CASCADE; SELECT true FROM aqo_reset(); bool diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index a179f8f4..07aff8a7 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -3,6 +3,8 @@ SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; +SET enable_material = 'off'; + DROP TABLE IF EXISTS a,b CASCADE; CREATE TABLE a (x int); INSERT INTO a (x) SELECT mod(ival,10) FROM generate_series(1,1000) As ival; @@ -28,16 +30,20 @@ SELECT str AS result FROM expln(' SELECT x FROM A where x = 5;') AS str WHERE str NOT LIKE 'Query Identifier%'; --- cardinality 100 in the first Seq Scan on a + SELECT str AS result FROM expln(' SELECT x FROM A,B WHERE x = 5 AND A.x = B.y;') AS str -WHERE str NOT LIKE 'Query Identifier%'; --- cardinality 100 in Nesteed Loop in the first Seq Scan on a +WHERE str NOT LIKE 'Query Identifier%' +; -- Find cardinality for SCAN A(x=5) from a neighbour class, created by the +-- query, executed above. + SELECT str AS result FROM expln(' SELECT x, sum(x) FROM A,B WHERE y = 5 AND A.x = B.y group by(x);') AS str -WHERE str NOT LIKE 'Query Identifier%'; +WHERE str NOT LIKE 'Query Identifier%' +; -- Find the JOIN cardinality from a neighbour class. + -- cardinality 100 in the first Seq Scan on a SELECT str AS result FROM expln(' @@ -61,14 +67,18 @@ SELECT str AS result FROM expln(' SELECT x FROM A,B where x < 10 and y > 10 group by(x);') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; --- cardinality 1000 Hash Cond: (a.x = b.y) and 1 Seq Scan on b --- this cardinality is wrong because we take it from bad neibours (previous query). --- clause y > 10 give count of rows with the same clauses. + +-- +-- TODO: +-- Not executed case. What could we do better here? +-- SELECT str AS result FROM expln(' SELECT x,y FROM A,B WHERE x < 10 and y > 10 AND A.x = B.y;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' +; +RESET enable_material; DROP TABLE a,b CASCADE; SELECT true FROM aqo_reset(); -DROP EXTENSION aqo CASCADE; \ No newline at end of file +DROP EXTENSION aqo CASCADE; From 8c6c1ca31da9a71bf15fe10389f18df5aa135331 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Wed, 3 Aug 2022 13:06:53 +0300 Subject: [PATCH 126/203] Fix build aqo for PGPRO version. Add macros expression_tree_mutator for flexible calling expression_tree_mutator containing addition forth parameter only in pgpro version. --- path_utils.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/path_utils.c b/path_utils.c index 57cced69..4465f9d4 100644 --- a/path_utils.c +++ b/path_utils.c @@ -23,6 +23,10 @@ #include "aqo.h" #include "hash.h" +#ifdef PGPRO_STD +# define expression_tree_mutator(node, mutator, context) \ + expression_tree_mutator(node, mutator, context, 0) +#endif /* * Hook on creation of a plan node. We need to store AQO-specific data to From 0cfcab8156bde92c32b68650a7375a9b2c43da2b Mon Sep 17 00:00:00 2001 From: "a.pervushina" Date: Tue, 26 Jul 2022 19:13:06 +0300 Subject: [PATCH 127/203] [PGPRO-6755] Refactor machine dependent tests Tags: aqo --- expected/forced_stat_collection.out | 17 ++++++++++++----- expected/unsupported.out | 28 ++++++++++++++++++++-------- sql/forced_stat_collection.sql | 10 +++++++++- sql/unsupported.sql | 17 +++++++++++++++-- 4 files changed, 56 insertions(+), 16 deletions(-) diff --git a/expected/forced_stat_collection.out b/expected/forced_stat_collection.out index a0a44e6a..f635fbcc 100644 --- a/expected/forced_stat_collection.out +++ b/expected/forced_stat_collection.out @@ -38,14 +38,21 @@ SELECT * FROM aqo_data; ----+-----+-----------+----------+---------+-------------+------ (0 rows) -SELECT learn_aqo,use_aqo,auto_tuning,cardinality_error_without_aqo ce,executions_without_aqo nex +CREATE OR REPLACE FUNCTION round_array (double precision[]) +RETURNS double precision[] +LANGUAGE SQL +AS $$ + SELECT array_agg(round(elem::numeric, 3)) + FROM unnest($1) as arr(elem); +$$; +SELECT learn_aqo,use_aqo,auto_tuning,round_array(cardinality_error_without_aqo) ce,executions_without_aqo nex FROM aqo_queries AS aq JOIN aqo_query_stat AS aqs ON aq.queryid = aqs.queryid ORDER BY (cardinality_error_without_aqo); - learn_aqo | use_aqo | auto_tuning | ce | nex ------------+---------+-------------+----------------------+----- - f | f | f | {0.8637762840285226} | 1 - f | f | f | {2.9634630129852053} | 1 + learn_aqo | use_aqo | auto_tuning | ce | nex +-----------+---------+-------------+---------+----- + f | f | f | {0.864} | 1 + f | f | f | {2.963} | 1 (2 rows) SELECT query_text FROM aqo_query_texts ORDER BY (md5(query_text)); diff --git a/expected/unsupported.out b/expected/unsupported.out index 4b95c201..8e29b597 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -467,6 +467,17 @@ SELECT * FROM -- any prediction on number of fetched tuples. -- So, if selectivity was wrong we could make bad choice of Scan operation. -- For example, we could choose suboptimal index. +-- +-- Returns string-by-string explain of a query. Made for removing some strings +-- from the explain output. +-- +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; -- Turn off statistics gathering for simple demonstration of filtering problem. ALTER TABLE t SET (autovacuum_enabled = 'false'); CREATE INDEX ind1 ON t(x); @@ -502,21 +513,22 @@ SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; 50 (1 row) -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) - SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; - QUERY PLAN +SELECT str AS result +FROM expln('SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1') AS str +WHERE str NOT LIKE '%Heap Blocks%'; + result ----------------------------------------------------------------- Aggregate (actual rows=1 loops=1) AQO not used - -> Bitmap Heap Scan on t (actual rows=50 loops=1) + Output: count(*) + -> Bitmap Heap Scan on public.t (actual rows=50 loops=1) AQO: rows=50, error=0% - Recheck Cond: (mod(x, 3) = 1) - Filter: (x < 3) + Recheck Cond: (mod(t.x, 3) = 1) + Filter: (t.x < 3) Rows Removed by Filter: 300 - Heap Blocks: exact=5 -> Bitmap Index Scan on ind2 (actual rows=350 loops=1) AQO not used - Index Cond: (mod(x, 3) = 1) + Index Cond: (mod(t.x, 3) = 1) Using aqo: true AQO mode: LEARN JOINS: 0 diff --git a/sql/forced_stat_collection.sql b/sql/forced_stat_collection.sql index 71c4ffc1..d9fac51a 100644 --- a/sql/forced_stat_collection.sql +++ b/sql/forced_stat_collection.sql @@ -30,7 +30,15 @@ SELECT count(*) FROM person WHERE age<18; SELECT count(*) FROM person WHERE age<18 AND passport IS NOT NULL; SELECT * FROM aqo_data; -SELECT learn_aqo,use_aqo,auto_tuning,cardinality_error_without_aqo ce,executions_without_aqo nex +CREATE OR REPLACE FUNCTION round_array (double precision[]) +RETURNS double precision[] +LANGUAGE SQL +AS $$ + SELECT array_agg(round(elem::numeric, 3)) + FROM unnest($1) as arr(elem); +$$; + +SELECT learn_aqo,use_aqo,auto_tuning,round_array(cardinality_error_without_aqo) ce,executions_without_aqo nex FROM aqo_queries AS aq JOIN aqo_query_stat AS aqs ON aq.queryid = aqs.queryid ORDER BY (cardinality_error_without_aqo); diff --git a/sql/unsupported.sql b/sql/unsupported.sql index d5209af6..6446b741 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -135,6 +135,18 @@ SELECT * FROM -- So, if selectivity was wrong we could make bad choice of Scan operation. -- For example, we could choose suboptimal index. +-- +-- Returns string-by-string explain of a query. Made for removing some strings +-- from the explain output. +-- +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; + -- Turn off statistics gathering for simple demonstration of filtering problem. ALTER TABLE t SET (autovacuum_enabled = 'false'); CREATE INDEX ind1 ON t(x); @@ -147,8 +159,9 @@ EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) -- Here we filter more tuples than with the ind1 index. CREATE INDEX ind2 ON t(mod(x,3)); SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) - SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; +SELECT str AS result +FROM expln('SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1') AS str +WHERE str NOT LIKE '%Heap Blocks%'; -- Best choice is ... ANALYZE t; From 355199e6ae2ad78802b71fc87b4ad7f82566eaf2 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Tue, 13 Sep 2022 08:59:22 +0300 Subject: [PATCH 128/203] Append miss release locks. --- storage.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/storage.c b/storage.c index 1c6ceb57..c9f7dd18 100644 --- a/storage.c +++ b/storage.c @@ -1315,6 +1315,7 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) * that caller recognize it and don't try to call us more. */ (void) hash_search(data_htab, &key, HASH_REMOVE, NULL); + LWLockRelease(&aqo_state->data_lock); return false; } } @@ -1347,6 +1348,7 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) * that caller recognize it and don't try to call us more. */ (void) hash_search(data_htab, &key, HASH_REMOVE, NULL); + LWLockRelease(&aqo_state->data_lock); return false; } } From a21a228f5dd48f93674fa1328933c428674c6d79 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Thu, 15 Sep 2022 11:49:21 +0300 Subject: [PATCH 129/203] Add release lock in qtext_lock. --- storage.c | 1 + 1 file changed, 1 insertion(+) diff --git a/storage.c b/storage.c index c9f7dd18..47369c20 100644 --- a/storage.c +++ b/storage.c @@ -1040,6 +1040,7 @@ aqo_qtext_store(uint64 queryid, const char *query_string) * that caller recognize it and don't try to call us more. */ (void) hash_search(qtexts_htab, &queryid, HASH_REMOVE, NULL); + LWLockRelease(&aqo_state->qtexts_lock); return false; } From 4688cf654312d7f612dcabf7f5a2567fc5293398 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 21 Sep 2022 10:22:57 +0500 Subject: [PATCH 130/203] Be more careful with locks of relations and syscaches in get_list_of_relids() routine Switch on feature 'search on neighbour feature spaces' by a GUC (disabled by default). Some mistakes fixed. --- aqo.c | 13 ++++++++ aqo.h | 1 + aqo_shared.c | 18 ++++++++--- cardinality_estimation.c | 5 ++- conf.add | 1 + path_utils.c | 27 ++++++++++------ postprocessing.c | 2 -- storage.c | 66 +++++++++++++++++++++++++++++++--------- 8 files changed, 102 insertions(+), 31 deletions(-) diff --git a/aqo.c b/aqo.c index 0612663d..630ef1cd 100644 --- a/aqo.c +++ b/aqo.c @@ -213,6 +213,19 @@ _PG_init(void) NULL ); + DefineCustomBoolVariable( + "aqo.wide_search", + "Search ML data in neighbour feature spaces.", + NULL, + &use_wide_search, + false, + PGC_USERSET, + 0, + NULL, + NULL, + NULL + ); + DefineCustomIntVariable("aqo.join_threshold", "Sets the threshold of number of JOINs in query beyond which AQO is used.", NULL, diff --git a/aqo.h b/aqo.h index 64092b94..8cad51c2 100644 --- a/aqo.h +++ b/aqo.h @@ -173,6 +173,7 @@ extern bool force_collect_stat; extern bool aqo_show_hash; extern bool aqo_show_details; extern int aqo_join_threshold; +extern bool use_wide_search; /* Parameters for current query */ typedef struct QueryContextData diff --git a/aqo_shared.c b/aqo_shared.c index 2ec063e7..ac5c5aea 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -191,16 +191,18 @@ aqo_init_shmem(void) { /* First time through ... */ - LWLockInitialize(&aqo_state->lock, LWLockNewTrancheId()); aqo_state->dsm_handler = DSM_HANDLE_INVALID; - aqo_state->qtexts_dsa_handler = DSM_HANDLE_INVALID; + aqo_state->data_dsa_handler = DSM_HANDLE_INVALID; + aqo_state->qtext_trancheid = LWLockNewTrancheId(); + aqo_state->qtexts_changed = false; - aqo_state->data_dsa_handler = DSM_HANDLE_INVALID; + aqo_state->stat_changed = false; aqo_state->data_changed = false; aqo_state->queries_changed = false; + LWLockInitialize(&aqo_state->lock, LWLockNewTrancheId()); LWLockInitialize(&aqo_state->stat_lock, LWLockNewTrancheId()); LWLockInitialize(&aqo_state->qtexts_lock, LWLockNewTrancheId()); LWLockInitialize(&aqo_state->data_lock, LWLockNewTrancheId()); @@ -245,7 +247,7 @@ aqo_init_shmem(void) LWLockRegisterTranche(aqo_state->data_lock.tranche, "AQO Data Lock Tranche"); LWLockRegisterTranche(aqo_state->queries_lock.tranche, "AQO Queries Lock Tranche"); - if (!IsUnderPostmaster) + if (!IsUnderPostmaster && !found) { before_shmem_exit(on_shmem_shutdown, (Datum) 0); @@ -261,8 +263,16 @@ aqo_init_shmem(void) static void on_shmem_shutdown(int code, Datum arg) { + Assert(!IsUnderPostmaster); + + /* + * Save ML data to a permanent storage. Do it on postmaster shutdown only + * to save time. We can't do so for query_texts and aqo_data because of DSM + * limits. + */ aqo_stat_flush(); aqo_queries_flush(); + return; } Size diff --git a/cardinality_estimation.c b/cardinality_estimation.c index 4baba286..96cd2c70 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -24,6 +24,9 @@ #include "machine_learning.h" #include "storage.h" + +bool use_wide_search = false; + #ifdef AQO_DEBUG_PRINT static void predict_debug_output(List *clauses, List *selectivities, @@ -90,7 +93,7 @@ predict_for_relation(List *clauses, List *selectivities, List *relsigns, */ /* Try to search in surrounding feature spaces for the same node */ - if (!load_aqo_data(query_context.fspace_hash, *fss, data, NULL, true)) + if (!load_aqo_data(query_context.fspace_hash, *fss, data, NULL, use_wide_search)) result = -1; else { diff --git a/conf.add b/conf.add index ed455870..9e9d2336 100644 --- a/conf.add +++ b/conf.add @@ -1,3 +1,4 @@ autovacuum = off shared_preload_libraries = 'postgres_fdw, aqo' max_parallel_workers_per_gather = 1 # switch off parallel workers because of unsteadiness +aqo.wide_search = 'on' \ No newline at end of file diff --git a/path_utils.c b/path_utils.c index 4465f9d4..a84d38fe 100644 --- a/path_utils.c +++ b/path_utils.c @@ -155,6 +155,8 @@ hashTempTupleDesc(TupleDesc desc) return s; } +#include "storage/lmgr.h" + /* * Get list of relation indexes and prepare list of permanent table reloids, * list of temporary table reloids (can be changed between query launches) and @@ -177,6 +179,8 @@ get_list_of_relids(PlannerInfo *root, Relids relids, RelSortOut *rels) HeapTuple htup; Form_pg_class classForm; char *relname = NULL; + Oid relrewrite; + char relpersistence; entry = planner_rt_fetch(index, root); @@ -191,15 +195,23 @@ get_list_of_relids(PlannerInfo *root, Relids relids, RelSortOut *rels) if (!HeapTupleIsValid(htup)) elog(PANIC, "cache lookup failed for reloid %u", entry->relid); + /* Copy the fields from syscache and release the slot as quickly as possible. */ classForm = (Form_pg_class) GETSTRUCT(htup); + relpersistence = classForm->relpersistence; + relrewrite = classForm->relrewrite; + relname = pstrdup(NameStr(classForm->relname)); + ReleaseSysCache(htup); - if (classForm->relpersistence == RELPERSISTENCE_TEMP) + if (relpersistence == RELPERSISTENCE_TEMP) { /* The case of temporary table */ - Relation trel = relation_open(entry->relid, NoLock); - TupleDesc tdesc = RelationGetDescr(trel); + Relation trel; + TupleDesc tdesc; + trel = relation_open(entry->relid, NoLock); + tdesc = RelationGetDescr(trel); + Assert(CheckRelationLockedByMe(trel, AccessShareLock, true)); hashes = lappend_uint64(hashes, hashTempTupleDesc(tdesc)); relation_close(trel, NoLock); } @@ -207,18 +219,15 @@ get_list_of_relids(PlannerInfo *root, Relids relids, RelSortOut *rels) { /* The case of regular table */ relname = quote_qualified_identifier( - get_namespace_name(get_rel_namespace(entry->relid)), - classForm->relrewrite ? - get_rel_name(classForm->relrewrite) : - NameStr(classForm->relname)); + get_namespace_name(get_rel_namespace(entry->relid)), + relrewrite ? get_rel_name(relrewrite) : relname); + hashes = lappend_uint64(hashes, DatumGetInt64(hash_any_extended( (unsigned char *) relname, strlen(relname), 0))); hrels = lappend_oid(hrels, entry->relid); } - - ReleaseSysCache(htup); } rels->hrels = list_concat(rels->hrels, hrels); diff --git a/postprocessing.c b/postprocessing.c index 9436f518..ef4bdaee 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -171,7 +171,6 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, { List *lst = NIL; ListCell *l; - int i = 0; bool parametrized_sel; int nargs; int *args_hash; @@ -220,7 +219,6 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, Assert(cur_sel > 0); lst = lappend(lst, cur_sel); - i++; } if (parametrized_sel) diff --git a/storage.c b/storage.c index 47369c20..f72d6aca 100644 --- a/storage.c +++ b/storage.c @@ -303,7 +303,9 @@ aqo_stat_store(uint64 queryid, bool use_aqo, entry->exec_time[pos] = exec_time; entry->est_error[pos] = est_error; } + entry = memcpy(palloc(sizeof(StatEntry)), entry, sizeof(StatEntry)); + aqo_state->stat_changed = true; LWLockRelease(&aqo_state->stat_lock); return entry; } @@ -425,14 +427,24 @@ aqo_stat_flush(void) int ret; long entries; - LWLockAcquire(&aqo_state->stat_lock, LW_SHARED); + /* Use exclusive lock to prevent concurrent flushing in different backends. */ + LWLockAcquire(&aqo_state->stat_lock, LW_EXCLUSIVE); + + if (!aqo_state->stat_changed) + /* Hash table wasn't changed, meaningless to store it in permanent storage */ + goto end; + entries = hash_get_num_entries(stat_htab); hash_seq_init(&hash_seq, stat_htab); ret = data_store(PGAQO_STAT_FILE, _form_stat_record_cb, entries, (void *) &hash_seq); if (ret != 0) hash_seq_term(&hash_seq); + else + /* Hash table and disk storage are now consistent */ + aqo_state->stat_changed = false; +end: LWLockRelease(&aqo_state->stat_lock); } @@ -469,7 +481,7 @@ aqo_qtexts_flush(void) long entries; dsa_init(); - LWLockAcquire(&aqo_state->qtexts_lock, LW_SHARED); + LWLockAcquire(&aqo_state->qtexts_lock, LW_EXCLUSIVE); if (!aqo_state->qtexts_changed) /* XXX: mull over forced mode. */ @@ -481,7 +493,9 @@ aqo_qtexts_flush(void) (void *) &hash_seq); if (ret != 0) hash_seq_term(&hash_seq); - aqo_state->qtexts_changed = false; + else + /* Hash table and disk storage are now consistent */ + aqo_state->qtexts_changed = false; end: LWLockRelease(&aqo_state->qtexts_lock); @@ -531,7 +545,7 @@ aqo_data_flush(void) long entries; dsa_init(); - LWLockAcquire(&aqo_state->data_lock, LW_SHARED); + LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); if (!aqo_state->data_changed) /* XXX: mull over forced mode. */ @@ -548,6 +562,7 @@ aqo_data_flush(void) */ hash_seq_term(&hash_seq); else + /* Hash table and disk storage are now consistent */ aqo_state->data_changed = false; end: LWLockRelease(&aqo_state->data_lock); @@ -574,14 +589,22 @@ aqo_queries_flush(void) int ret; long entries; - LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + + if (!aqo_state->queries_changed) + goto end; + entries = hash_get_num_entries(queries_htab); hash_seq_init(&hash_seq, queries_htab); ret = data_store(PGAQO_QUERIES_FILE, _form_queries_record_cb, entries, (void *) &hash_seq); if (ret != 0) hash_seq_term(&hash_seq); + else + /* Hash table and disk storage are now consistent */ + aqo_state->queries_changed = false; +end: LWLockRelease(&aqo_state->queries_lock); } @@ -621,7 +644,8 @@ data_store(const char *filename, form_record_t callback, goto error; } - (void) durable_rename(tmpfile, filename, LOG); + /* Parallel (re)writing into a file haven't happen. */ + (void) durable_rename(tmpfile, filename, PANIC); elog(LOG, "[AQO] %d records stored in file %s.", counter, filename); return 0; @@ -839,7 +863,7 @@ aqo_queries_load(void) LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); - /* Load on postmaster sturtup. So no any concurrent actions possible here. */ + /* Load on postmaster startup. So no any concurrent actions possible here. */ Assert(hash_get_num_entries(queries_htab) == 0); data_load(PGAQO_QUERIES_FILE, _deform_queries_record_cb, NULL); @@ -926,6 +950,9 @@ data_load(const char *filename, deform_record_t callback, void *ctx) static void on_shmem_shutdown(int code, Datum arg) { + /* + * XXX: It can be expensive to rewrite a file on each shutdown of a backend. + */ aqo_qtexts_flush(); aqo_data_flush(); } @@ -1201,6 +1228,7 @@ _aqo_data_remove(data_key *key) if (hash_search(data_htab, key, HASH_REMOVE, NULL) == NULL) elog(PANIC, "[AQO] Inconsistent data hash table"); + aqo_state->data_changed = true; } @@ -1270,8 +1298,9 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) char *ptr; ListCell *lc; size_t size; - bool tblOverflow; - HASHACTION action; + bool tblOverflow; + HASHACTION action; + bool result; Assert(!LWLockHeldByMe(&aqo_state->data_lock)); @@ -1322,7 +1351,6 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) } Assert(DsaPointerIsValid(entry->data_dp)); - Assert(entry->rows <= data->rows); /* Reserved for the future features */ if (entry->cols != data->cols || entry->nrels != list_length(reloids)) { @@ -1388,8 +1416,9 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) aqo_state->data_changed = true; end: + result = aqo_state->data_changed; LWLockRelease(&aqo_state->data_lock); - return aqo_state->data_changed; + return result; } static void @@ -1497,7 +1526,7 @@ load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, dsa_init(); - LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); + LWLockAcquire(&aqo_state->data_lock, LW_SHARED); if (!wideSearch) { @@ -1632,7 +1661,8 @@ aqo_data(PG_FUNCTION_ARGS) ptr += sizeof(data_key); if (entry->cols > 0) - values[AD_FEATURES] = PointerGetDatum(form_matrix((double *)ptr, entry->rows, entry->cols)); + values[AD_FEATURES] = PointerGetDatum(form_matrix((double *) ptr, + entry->rows, entry->cols)); else nulls[AD_FEATURES] = true; @@ -1720,7 +1750,9 @@ aqo_data_reset(void) elog(ERROR, "[AQO] hash table corrupted"); num_remove++; } - aqo_state->data_changed = true; + + if (num_remove > 0) + aqo_state->data_changed = true; LWLockRelease(&aqo_state->data_lock); if (num_remove != num_entries) elog(ERROR, "[AQO] Query ML memory storage is corrupted or parallel access without a lock has detected."); @@ -1832,6 +1864,7 @@ aqo_queries_store(uint64 queryid, entry->use_aqo = use_aqo; entry->auto_tuning = auto_tuning; + aqo_state->queries_changed = true; LWLockRelease(&aqo_state->queries_lock); return true; } @@ -1857,7 +1890,10 @@ aqo_queries_reset(void) elog(ERROR, "[AQO] hash table corrupted"); num_remove++; } - aqo_state->queries_changed = true; + + if (num_remove > 0) + aqo_state->queries_changed = true; + LWLockRelease(&aqo_state->queries_lock); if (num_remove != num_entries - 1) From a8d85d5ac02df6e8698cad45616d4e50521f5794 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Mon, 3 Oct 2022 11:22:00 +0300 Subject: [PATCH 131/203] Refactor machine dependent tests. Tags: aqo --- expected/unsupported.out | 66 ++++++++++++++++++++-------------------- sql/unsupported.sql | 2 +- 2 files changed, 34 insertions(+), 34 deletions(-) diff --git a/expected/unsupported.out b/expected/unsupported.out index 8e29b597..dbdc1f7b 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -553,42 +553,42 @@ EXPLAIN (COSTS OFF) -- XXX: Do we stuck into an unstable behavior of an error value? -- Live with this variant of the test for some time. -SELECT to_char(error, '9.99EEEE')::text AS error, query_text +SELECT round(error::numeric, 3) AS error, query_text FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt WHERE aqt.queryid = cef.id ORDER BY (md5(query_text),error) DESC; - error | query_text ------------+------------------------------------------------------------------------------------------------ - 7.68e-01 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; - 7.04e-02 | SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; - 1.42e+00 | SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; - 0.00e+00 | SELECT * FROM + - | (SELECT * FROM t WHERE x < 0) AS t0 + - | JOIN + - | (SELECT * FROM t WHERE x > 20) AS t1 + - | USING(x); - 0.00e+00 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); - 0.00e+00 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + - | SELECT count(*) FROM t WHERE + - | x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR + - | x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); - 4.54e-01 | SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; - 0.00e+00 | SELECT count(*) FROM ( + - | SELECT count(*) AS x FROM ( + - | SELECT count(*) FROM t1 GROUP BY (x,y) + - | ) AS q1 + - | ) AS q2 + - | WHERE q2.x > 1; - 7.68e-01 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,x*y)) AS q1; - 0.00e+00 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t WHERE x = 1); - 0.00e+00 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + - | SELECT count(*) FROM t WHERE (SELECT avg(x) FROM t t0 WHERE t0.x = t.x) = + - | (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); - 0.00e+00 | SELECT count(*) FROM + - | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + - | JOIN + - | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2 + - | ON q1.x = q2.x+1; + error | query_text +-------+------------------------------------------------------------------------------------------------ + 0.768 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; + 0.070 | SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; + 1.416 | SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + 0.000 | SELECT * FROM + + | (SELECT * FROM t WHERE x < 0) AS t0 + + | JOIN + + | (SELECT * FROM t WHERE x > 20) AS t1 + + | USING(x); + 0.000 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + 0.000 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT count(*) FROM t WHERE + + | x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR + + | x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); + 0.454 | SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; + 0.000 | SELECT count(*) FROM ( + + | SELECT count(*) AS x FROM ( + + | SELECT count(*) FROM t1 GROUP BY (x,y) + + | ) AS q1 + + | ) AS q2 + + | WHERE q2.x > 1; + 0.768 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,x*y)) AS q1; + 0.000 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t WHERE x = 1); + 0.000 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT count(*) FROM t WHERE (SELECT avg(x) FROM t t0 WHERE t0.x = t.x) = + + | (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + 0.000 | SELECT count(*) FROM + + | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + + | JOIN + + | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2 + + | ON q1.x = q2.x+1; (12 rows) DROP TABLE t,t1 CASCADE; -- delete all tables used in the test diff --git a/sql/unsupported.sql b/sql/unsupported.sql index 6446b741..9f26b9a6 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -170,7 +170,7 @@ EXPLAIN (COSTS OFF) -- XXX: Do we stuck into an unstable behavior of an error value? -- Live with this variant of the test for some time. -SELECT to_char(error, '9.99EEEE')::text AS error, query_text +SELECT round(error::numeric, 3) AS error, query_text FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt WHERE aqt.queryid = cef.id ORDER BY (md5(query_text),error) DESC; From 714a817cb897bbf1fa6ad86ceb170d018e16cf19 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Mon, 17 Oct 2022 13:52:23 +0500 Subject: [PATCH 132/203] Bugfix. Do not delete AQO ML data file after loading into memory. --- storage.c | 1 - 1 file changed, 1 deletion(-) diff --git a/storage.c b/storage.c index f72d6aca..215a87b6 100644 --- a/storage.c +++ b/storage.c @@ -927,7 +927,6 @@ data_load(const char *filename, deform_record_t callback, void *ctx) } FreeFile(file); - unlink(filename); elog(LOG, "[AQO] %ld records loaded from file %s.", num, filename); return; From af377b4b56cb926158b81f9a53fcb854cd2908d4 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Tue, 18 Oct 2022 10:35:00 +0300 Subject: [PATCH 133/203] Reset aqo data before droping test table. --- expected/statement_timeout.out | 2 +- sql/statement_timeout.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/expected/statement_timeout.out b/expected/statement_timeout.out index 302b9b43..0b26b430 100644 --- a/expected/statement_timeout.out +++ b/expected/statement_timeout.out @@ -111,12 +111,12 @@ SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); 5 (1 row) -DROP TABLE t; SELECT 1 FROM aqo_reset(); ?column? ---------- 1 (1 row) +DROP TABLE t; DROP EXTENSION aqo; DROP FUNCTION check_estimated_rows; diff --git a/sql/statement_timeout.sql b/sql/statement_timeout.sql index 9666c1de..36afc370 100644 --- a/sql/statement_timeout.sql +++ b/sql/statement_timeout.sql @@ -60,7 +60,7 @@ SET statement_timeout = 5500; SELECT *, pg_sleep(1) FROM t; -- Get reliable data SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); -DROP TABLE t; SELECT 1 FROM aqo_reset(); +DROP TABLE t; DROP EXTENSION aqo; DROP FUNCTION check_estimated_rows; From a9222489f78991ba4c72a4ad54ce43786098c6c5 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 20 Oct 2022 09:36:21 +0500 Subject: [PATCH 134/203] Add schedule for regression tests instead of list of tests in the REGRESS variable. The real reason is to successfully pass the statement_timeout test in very slow environments. We must inialize REGRESS. So, add an empty dummy test just to define the variable. regress_schedule contains the full list of real tests. So all changes for real tests will be made in a general way in regress_schedule. Authors: a.lepikhov, m.polyakova. --- Makefile | 23 ++++------------------- expected/aqo_dummy_test.out | 0 regress_schedule | 21 +++++++++++++++++++++ sql/aqo_dummy_test.sql | 0 4 files changed, 25 insertions(+), 19 deletions(-) create mode 100644 expected/aqo_dummy_test.out create mode 100644 regress_schedule create mode 100644 sql/aqo_dummy_test.sql diff --git a/Makefile b/Makefile index 1ef23b54..8866ab7c 100755 --- a/Makefile +++ b/Makefile @@ -11,25 +11,10 @@ OBJS = $(WIN32RES) \ TAP_TESTS = 1 -REGRESS = aqo_disabled \ - aqo_controlled \ - aqo_intelligent \ - aqo_forced \ - aqo_learn \ - schema \ - aqo_fdw \ - aqo_CVE-2020-14350 \ - gucs \ - forced_stat_collection \ - unsupported \ - clean_aqo_data \ - plancache \ - statement_timeout \ - temp_tables \ - top_queries \ - relocatable\ - look_a_like \ - feature_subspace +# Use an empty dummy test to define the variable REGRESS and therefore run all +# regression tests. regress_schedule contains the full list of real tests. +REGRESS = aqo_dummy_test +REGRESS_OPTS = --schedule=$(srcdir)/regress_schedule fdw_srcdir = $(top_srcdir)/contrib/postgres_fdw stat_srcdir = $(top_srcdir)/contrib/pg_stat_statements diff --git a/expected/aqo_dummy_test.out b/expected/aqo_dummy_test.out new file mode 100644 index 00000000..e69de29b diff --git a/regress_schedule b/regress_schedule new file mode 100644 index 00000000..b67bc207 --- /dev/null +++ b/regress_schedule @@ -0,0 +1,21 @@ +test: aqo_disabled +test: aqo_controlled +test: aqo_intelligent +test: aqo_forced +test: aqo_learn +test: schema +test: aqo_fdw +test: aqo_CVE-2020-14350 +test: gucs +test: forced_stat_collection +test: unsupported +test: clean_aqo_data +test: plancache +# Performance-dependent test. Can be ignored if executes in containers or on slow machines +ignore: statement_timeout +test: statement_timeout +test: temp_tables +test: top_queries +test: relocatable +test: look_a_like +test: feature_subspace diff --git a/sql/aqo_dummy_test.sql b/sql/aqo_dummy_test.sql new file mode 100644 index 00000000..e69de29b From 2db27e110bf29b42933ec19f14348440251c111a Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 20 Oct 2022 11:35:21 +0500 Subject: [PATCH 135/203] Update github actions file (c-cpp.yml): 1. Enable TAP-tests 2. Add some useful options for configure and build stages. 3. Parameterize github CI, just to reduce code duplication. Authors: m.polyakova (mostly), and a.lepikhov. --- .github/workflows/c-cpp.yml | 22 +++++++++++++++++++--- t/001_pgbench.pl | 2 +- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 3c987855..71989628 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -4,8 +4,12 @@ on: push: branches: [ stable13 ] pull_request: + branches: [ stable13 ] +env: + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + jobs: build: @@ -14,14 +18,26 @@ jobs: steps: - name: pg run: | - echo "Deploying to production server on branch $GITHUB_REF" + sudo apt install libipc-run-perl + echo "Deploying to production server on branch" $BRANCH_NAME git config --global user.email "ci@postgrespro.ru" git config --global user.name "CI PgPro admin" + export COPT=-Werror + export CONFIGURE_OPTS="--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" git clone https://github.com/postgres/postgres.git pg cd pg + git checkout REL_13_STABLE - ./configure --prefix=`pwd`/tmp_install - git subtree add --prefix=contrib/aqo https://github.com/postgrespro/aqo.git $GITHUB_REF + ./configure $CONFIGURE_OPTS CFLAGS="-O3" + git clone https://github.com/postgrespro/aqo.git contrib/aqo + git -C contrib/aqo checkout $BRANCH_NAME patch -p1 --no-backup-if-mismatch < contrib/aqo/aqo_pg13.patch make -j4 > /dev/null && make -j4 -C contrib > /dev/null env CLIENTS=50 THREADS=50 make -C contrib/aqo check + + echo "Use AQO with debug code included" + git clean -fdx + git -C contrib/aqo clean -fdx + ./configure $CONFIGURE_OPTS CFLAGS="-DAQO_DEBUG_PRINT -O0" + make -j4 > /dev/null && make -j4 -C contrib > /dev/null + env CLIENTS=50 THREADS=50 make -C contrib/aqo check diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index eae0c829..3aa3b7b5 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -335,7 +335,7 @@ "SELECT sum(executions_with_aqo) FROM test.aqo_query_stat"); # 25 trans * 10 clients * 4 query classes = 1000 + unique SELECT to pgbench_branches -is($res, 1001, 'Each query should be logged in LEARN mode'); +is($res, $CLIENTS*100+1, 'Each query should be logged in LEARN mode'); $res = $node->safe_psql('postgres', "SELECT sum(executions_without_aqo) FROM test.aqo_query_stat"); is($res, 0, 'AQO has learned on the queries - 2'); From 2b13279e57b249877f3f637fa213b96a496f9e13 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Tue, 8 Nov 2022 10:25:54 +0300 Subject: [PATCH 136/203] Fix aqo.fs_max_items, add.fss_max_items. Set GucContext as PGC_POSTMASTER to allow values to be changed only before the instance is started. --- aqo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aqo.c b/aqo.c index 630ef1cd..03a9dce7 100644 --- a/aqo.c +++ b/aqo.c @@ -245,7 +245,7 @@ _PG_init(void) &fs_max_items, 10000, 1, INT_MAX, - PGC_SUSET, + PGC_POSTMASTER, 0, NULL, NULL, @@ -258,7 +258,7 @@ _PG_init(void) &fss_max_items, 100000, 0, INT_MAX, - PGC_SUSET, + PGC_POSTMASTER, 0, NULL, NULL, From 743524b3cd66c744288cbc141c4ee94142e4f415 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 6 Oct 2022 08:48:12 +0500 Subject: [PATCH 137/203] Change names of interface functions for better usage --- aqo--1.4--1.5.sql | 9 +++++---- expected/aqo_CVE-2020-14350.out | 24 ++++++++++++------------ expected/relocatable.out | 12 ++++++------ sql/aqo_CVE-2020-14350.sql | 16 ++++++++-------- sql/relocatable.sql | 4 ++-- storage.c | 8 ++++---- 6 files changed, 37 insertions(+), 36 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index 3244a721..d6e8be38 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -19,6 +19,7 @@ DROP TABLE public.aqo_data CASCADE; DROP TABLE public.aqo_queries CASCADE; DROP TABLE public.aqo_query_texts CASCADE; DROP TABLE public.aqo_query_stat CASCADE; +DROP FUNCTION invalidate_deactivated_queries_cache; /* @@ -76,14 +77,14 @@ CREATE VIEW aqo_queries AS SELECT * FROM aqo_queries(); /* UI functions */ -CREATE FUNCTION aqo_enable_query(queryid bigint) +CREATE FUNCTION aqo_enable_class(queryid bigint) RETURNS void -AS 'MODULE_PATHNAME', 'aqo_enable_query' +AS 'MODULE_PATHNAME', 'aqo_enable_class' LANGUAGE C STRICT VOLATILE; -CREATE FUNCTION aqo_disable_query(queryid bigint) +CREATE FUNCTION aqo_disable_class(queryid bigint) RETURNS void -AS 'MODULE_PATHNAME', 'aqo_enable_query' +AS 'MODULE_PATHNAME', 'aqo_disable_class' LANGUAGE C STRICT VOLATILE; CREATE FUNCTION aqo_queries_update( diff --git a/expected/aqo_CVE-2020-14350.out b/expected/aqo_CVE-2020-14350.out index ccdc4694..8685b935 100644 --- a/expected/aqo_CVE-2020-14350.out +++ b/expected/aqo_CVE-2020-14350.out @@ -116,7 +116,7 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_enable_query(hash bigint) +CREATE FUNCTION aqo_enable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -125,9 +125,9 @@ $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_enable_query" already exists with same argument types +ERROR: function "aqo_enable_class" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_enable_query(hash bigint) +CREATE OR REPLACE FUNCTION aqo_enable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -135,8 +135,8 @@ BEGIN END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_enable_query(42); - aqo_enable_query +SELECT aqo_enable_class(42); + aqo_enable_class ------------------ (1 row) @@ -149,7 +149,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_enable_query(bigint); +DROP FUNCTION aqo_enable_class(bigint); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 4 @@ -162,7 +162,7 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_disable_query(hash bigint) +CREATE FUNCTION aqo_disable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -171,9 +171,9 @@ $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_disable_query" already exists with same argument types +ERROR: function "aqo_disable_class" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_disable_query(hash bigint) +CREATE OR REPLACE FUNCTION aqo_disable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -181,8 +181,8 @@ BEGIN END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_disable_query(42); - aqo_disable_query +SELECT aqo_disable_class(42); + aqo_disable_class ------------------- (1 row) @@ -195,7 +195,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_disable_query(bigint); +DROP FUNCTION aqo_disable_class(bigint); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 5 diff --git a/expected/relocatable.out b/expected/relocatable.out index 5fcf06e6..949896f6 100644 --- a/expected/relocatable.out +++ b/expected/relocatable.out @@ -80,9 +80,9 @@ ORDER BY (md5(query_text)) /* * Below, we should check each UI function */ -SELECT aqo_disable_query(id) FROM ( +SELECT aqo_disable_class(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; - aqo_disable_query + aqo_disable_class ------------------- @@ -93,13 +93,13 @@ ORDER BY (learn_aqo, use_aqo, auto_tuning); learn_aqo | use_aqo | auto_tuning -----------+---------+------------- f | f | f - t | t | f - t | t | f + f | f | f + f | f | f (3 rows) -SELECT aqo_enable_query(id) FROM ( +SELECT aqo_enable_class(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; - aqo_enable_query + aqo_enable_class ------------------ diff --git a/sql/aqo_CVE-2020-14350.sql b/sql/aqo_CVE-2020-14350.sql index 1b36b50b..75833223 100644 --- a/sql/aqo_CVE-2020-14350.sql +++ b/sql/aqo_CVE-2020-14350.sql @@ -103,7 +103,7 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_enable_query(hash bigint) +CREATE FUNCTION aqo_enable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -115,7 +115,7 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_enable_query(hash bigint) +CREATE OR REPLACE FUNCTION aqo_enable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -124,13 +124,13 @@ END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_enable_query(42); +SELECT aqo_enable_class(42); SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_enable_query(bigint); +DROP FUNCTION aqo_enable_class(bigint); DROP EXTENSION IF EXISTS aqo; -- Test 4 @@ -140,7 +140,7 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_disable_query(hash bigint) +CREATE FUNCTION aqo_disable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -152,7 +152,7 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_disable_query(hash bigint) +CREATE OR REPLACE FUNCTION aqo_disable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -161,13 +161,13 @@ END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_disable_query(42); +SELECT aqo_disable_class(42); SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_disable_query(bigint); +DROP FUNCTION aqo_disable_class(bigint); DROP EXTENSION IF EXISTS aqo; -- Test 5 diff --git a/sql/relocatable.sql b/sql/relocatable.sql index e8cc57c3..780c385e 100644 --- a/sql/relocatable.sql +++ b/sql/relocatable.sql @@ -39,11 +39,11 @@ ORDER BY (md5(query_text)) /* * Below, we should check each UI function */ -SELECT aqo_disable_query(id) FROM ( +SELECT aqo_disable_class(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries ORDER BY (learn_aqo, use_aqo, auto_tuning); -SELECT aqo_enable_query(id) FROM ( +SELECT aqo_enable_class(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries ORDER BY (learn_aqo, use_aqo, auto_tuning); diff --git a/storage.c b/storage.c index 215a87b6..7dcc8fce 100644 --- a/storage.c +++ b/storage.c @@ -96,8 +96,8 @@ PG_FUNCTION_INFO_V1(aqo_query_stat); PG_FUNCTION_INFO_V1(aqo_query_texts); PG_FUNCTION_INFO_V1(aqo_data); PG_FUNCTION_INFO_V1(aqo_queries); -PG_FUNCTION_INFO_V1(aqo_enable_query); -PG_FUNCTION_INFO_V1(aqo_disable_query); +PG_FUNCTION_INFO_V1(aqo_enable_class); +PG_FUNCTION_INFO_V1(aqo_disable_class); PG_FUNCTION_INFO_V1(aqo_queries_update); PG_FUNCTION_INFO_V1(aqo_reset); PG_FUNCTION_INFO_V1(aqo_cleanup); @@ -1904,7 +1904,7 @@ aqo_queries_reset(void) } Datum -aqo_enable_query(PG_FUNCTION_ARGS) +aqo_enable_class(PG_FUNCTION_ARGS) { uint64 queryid = (uint64) PG_GETARG_INT64(0); QueriesEntry *entry; @@ -1935,7 +1935,7 @@ aqo_enable_query(PG_FUNCTION_ARGS) } Datum -aqo_disable_query(PG_FUNCTION_ARGS) +aqo_disable_class(PG_FUNCTION_ARGS) { uint64 queryid = (uint64) PG_GETARG_INT64(0); QueriesEntry *entry; From b6dd1c611b504c89b2c8857170663a57d7438876 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 12 Oct 2022 15:08:03 +0500 Subject: [PATCH 138/203] Bugfix. AQOUtilityMemCtx is reset although some allocated data still in use. Remove the AQOUtilityMemCtx memory context at all. It is used for too small operations. I don't buy that such operations can allocate so much memory that backend must free memory right after the end of operation to avoid OOM. I guess, prediction, planning and execution memory context set is good enough. --- aqo.c | 13 +------------ aqo.h | 1 - hash.c | 19 +++++-------------- postprocessing.c | 8 -------- preprocessing.c | 13 +++---------- storage.c | 4 ---- 6 files changed, 9 insertions(+), 49 deletions(-) diff --git a/aqo.c b/aqo.c index 03a9dce7..53f1fbd4 100644 --- a/aqo.c +++ b/aqo.c @@ -87,9 +87,6 @@ MemoryContext AQOTopMemCtx = NULL; /* Is released at the end of transaction */ MemoryContext AQOCacheMemCtx = NULL; -/* Should be released in-place, just after a huge calculation */ -MemoryContext AQOUtilityMemCtx = NULL; - /* Is released at the end of planning */ MemoryContext AQOPredictMemCtx = NULL; @@ -342,15 +339,7 @@ _PG_init(void) AQOCacheMemCtx = AllocSetContextCreate(AQOTopMemCtx, "AQOCacheMemCtx", ALLOCSET_DEFAULT_SIZES); - /* - * AQOUtilityMemoryContext containe short-lived information which - * is appeared from having got clause, selectivity arrays and relid lists - * while calculating hashes. It clean up inside calculated - * function or immediately after her having completed. - */ - AQOUtilityMemCtx = AllocSetContextCreate(AQOTopMemCtx, - "AQOUtilityMemoryContext", - ALLOCSET_DEFAULT_SIZES); + /* * AQOPredictMemoryContext save necessary information for making predict of plan nodes * and clean up in the execution stage of query. diff --git a/aqo.h b/aqo.h index 8cad51c2..aa1e3964 100644 --- a/aqo.h +++ b/aqo.h @@ -225,7 +225,6 @@ extern int njoins; /* AQO Memory contexts */ extern MemoryContext AQOTopMemCtx; extern MemoryContext AQOCacheMemCtx; -extern MemoryContext AQOUtilityMemCtx; extern MemoryContext AQOPredictMemCtx; extern MemoryContext AQOLearnMemCtx; diff --git a/hash.c b/hash.c index d4866448..96d402a1 100644 --- a/hash.c +++ b/hash.c @@ -212,7 +212,6 @@ get_fss_for_object(List *relsigns, List *clauselist, int sh = 0, old_sh; int fss_hash; - MemoryContext old_ctx_m; n = list_length(clauselist); @@ -220,12 +219,14 @@ get_fss_for_object(List *relsigns, List *clauselist, Assert(n == list_length(selectivities) || (nfeatures == NULL && features == NULL)); - get_eclasses(clauselist, &nargs, &args_hash, &eclass_hash); + /* + * It should be allocated in a caller memory context, because it will be + * returned. + */ if (nfeatures != NULL) *features = palloc0(sizeof(**features) * n); - old_ctx_m = MemoryContextSwitchTo(AQOUtilityMemCtx); - + get_eclasses(clauselist, &nargs, &args_hash, &eclass_hash); clause_hashes = palloc(sizeof(*clause_hashes) * n); clause_has_consts = palloc(sizeof(*clause_has_consts) * n); sorted_clauses = palloc(sizeof(*sorted_clauses) * n); @@ -299,9 +300,6 @@ get_fss_for_object(List *relsigns, List *clauselist, relations_hash = (int) get_relations_hash(relsigns); fss_hash = get_fss_hash(clauses_hash, eclasses_hash, relations_hash); - MemoryContextSwitchTo(old_ctx_m); - MemoryContextReset(AQOUtilityMemCtx); - if (nfeatures != NULL) { *nfeatures = n - sh; @@ -682,19 +680,14 @@ get_eclasses(List *clauselist, int *nargs, int **args_hash, int **eclass_hash) int i, v; int *e_hashes; - MemoryContext old_ctx_m; get_clauselist_args(clauselist, nargs, args_hash); *eclass_hash = palloc((*nargs) * sizeof(**eclass_hash)); - old_ctx_m = MemoryContextSwitchTo(AQOUtilityMemCtx); - p = perform_eclasses_join(clauselist, *nargs, *args_hash); lsts = palloc((*nargs) * sizeof(*lsts)); e_hashes = palloc((*nargs) * sizeof(*e_hashes)); - MemoryContextSwitchTo(old_ctx_m); - for (i = 0; i < *nargs; ++i) lsts[i] = NIL; @@ -708,8 +701,6 @@ get_eclasses(List *clauselist, int *nargs, int **args_hash, int **eclass_hash) for (i = 0; i < *nargs; ++i) (*eclass_hash)[i] = e_hashes[disjoint_set_get_parent(p, i)]; - - MemoryContextReset(AQOUtilityMemCtx); } /* diff --git a/postprocessing.c b/postprocessing.c index ef4bdaee..619d1c40 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -178,16 +178,13 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, double *cur_sel; int cur_hash; int cur_relid; - MemoryContext old_ctx_m; parametrized_sel = was_parametrized && (list_length(relidslist) == 1); if (parametrized_sel) { cur_relid = linitial_int(relidslist); - old_ctx_m = MemoryContextSwitchTo(AQOUtilityMemCtx); get_eclasses(clauselist, &nargs, &args_hash, &eclass_hash); - MemoryContextSwitchTo(old_ctx_m); } foreach(l, clauselist) @@ -221,11 +218,6 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, lst = lappend(lst, cur_sel); } - if (parametrized_sel) - { - MemoryContextReset(AQOUtilityMemCtx); - } - return lst; } diff --git a/preprocessing.c b/preprocessing.c index 55000e79..453cdc55 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -127,7 +127,8 @@ aqo_planner(Query *parse, ParamListInfo boundParams) { bool query_is_stored = false; - MemoryContext oldctx; + MemoryContext oldctx; + oldctx = MemoryContextSwitchTo(AQOPredictMemCtx); /* @@ -156,15 +157,8 @@ aqo_planner(Query *parse, } selectivity_cache_clear(); - MemoryContextSwitchTo(oldctx); - oldctx = MemoryContextSwitchTo(AQOUtilityMemCtx); query_context.query_hash = get_query_hash(parse, query_string); - MemoryContextSwitchTo(oldctx); - - MemoryContextReset(AQOUtilityMemCtx); - - oldctx = MemoryContextSwitchTo(AQOPredictMemCtx); /* By default, they should be equal */ query_context.fspace_hash = query_context.query_hash; @@ -185,15 +179,14 @@ aqo_planner(Query *parse, cursorOptions, boundParams); } - MemoryContextSwitchTo(oldctx); elog(DEBUG1, "AQO will be used for query '%s', class "UINT64_FORMAT, query_string ? query_string : "null string", query_context.query_hash); + MemoryContextSwitchTo(oldctx); oldctx = MemoryContextSwitchTo(AQOCacheMemCtx); cur_classes = lappend_uint64(cur_classes, query_context.query_hash); MemoryContextSwitchTo(oldctx); - oldctx = MemoryContextSwitchTo(AQOPredictMemCtx); if (aqo_mode == AQO_MODE_DISABLED) diff --git a/storage.c b/storage.c index 7dcc8fce..a42b0bee 100644 --- a/storage.c +++ b/storage.c @@ -2096,7 +2096,6 @@ cleanup_aqo_database(bool gentle, int *fs_num, int *fss_num) for(i = 0; i < dentry->nrels; i++) { Oid reloid = ObjectIdGetDatum(*(Oid *)ptr); - MemoryContext oldctx = MemoryContextSwitchTo(AQOUtilityMemCtx); if (!SearchSysCacheExists1(RELOID, reloid)) /* Remember this value */ @@ -2105,7 +2104,6 @@ cleanup_aqo_database(bool gentle, int *fs_num, int *fss_num) else actual_fss = list_append_unique_int(actual_fss, dentry->key.fss); - MemoryContextSwitchTo(oldctx); ptr += sizeof(Oid); } @@ -2155,8 +2153,6 @@ cleanup_aqo_database(bool gentle, int *fs_num, int *fss_num) /* Query class preferences */ (*fs_num) += (int) _aqo_queries_remove(entry->queryid); } - - MemoryContextReset(AQOUtilityMemCtx); } /* From db3403b80dbde7bba344bb5d301ec09691f91ba7 Mon Sep 17 00:00:00 2001 From: Marina Polyakova Date: Wed, 16 Nov 2022 17:26:26 +0300 Subject: [PATCH 139/203] Partial revert "Change names of interface functions for better usage" This reverts commit f097d8b3c428d909a1f7da7977a5bef8dfaa2f7b except for changes to the function invalidate_deactivated_queries_cache. --- aqo--1.4--1.5.sql | 9 +++++---- expected/aqo_CVE-2020-14350.out | 24 ++++++++++++------------ expected/gucs.out | 2 +- expected/relocatable.out | 12 ++++++------ sql/aqo_CVE-2020-14350.sql | 16 ++++++++-------- sql/relocatable.sql | 4 ++-- storage.c | 8 ++++---- 7 files changed, 38 insertions(+), 37 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index d6e8be38..622bb7fa 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -77,14 +77,14 @@ CREATE VIEW aqo_queries AS SELECT * FROM aqo_queries(); /* UI functions */ -CREATE FUNCTION aqo_enable_class(queryid bigint) +CREATE FUNCTION aqo_enable_query(queryid bigint) RETURNS void -AS 'MODULE_PATHNAME', 'aqo_enable_class' +AS 'MODULE_PATHNAME', 'aqo_enable_query' LANGUAGE C STRICT VOLATILE; -CREATE FUNCTION aqo_disable_class(queryid bigint) +CREATE FUNCTION aqo_disable_query(queryid bigint) RETURNS void -AS 'MODULE_PATHNAME', 'aqo_disable_class' +AS 'MODULE_PATHNAME', 'aqo_enable_query' LANGUAGE C STRICT VOLATILE; CREATE FUNCTION aqo_queries_update( @@ -145,6 +145,7 @@ COMMENT ON FUNCTION aqo_drop_class(bigint) IS -- Returns number of deleted rows from aqo_queries and aqo_data tables. -- CREATE OR REPLACE FUNCTION aqo_cleanup(OUT nfs integer, OUT nfss integer) +RETURNS SETOF record AS 'MODULE_PATHNAME', 'aqo_cleanup' LANGUAGE C STRICT VOLATILE; COMMENT ON FUNCTION aqo_cleanup() IS diff --git a/expected/aqo_CVE-2020-14350.out b/expected/aqo_CVE-2020-14350.out index 8685b935..ccdc4694 100644 --- a/expected/aqo_CVE-2020-14350.out +++ b/expected/aqo_CVE-2020-14350.out @@ -116,7 +116,7 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_enable_class(hash bigint) +CREATE FUNCTION aqo_enable_query(hash bigint) RETURNS VOID AS $$ BEGIN @@ -125,9 +125,9 @@ $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_enable_class" already exists with same argument types +ERROR: function "aqo_enable_query" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_enable_class(hash bigint) +CREATE OR REPLACE FUNCTION aqo_enable_query(hash bigint) RETURNS VOID AS $$ BEGIN @@ -135,8 +135,8 @@ BEGIN END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_enable_class(42); - aqo_enable_class +SELECT aqo_enable_query(42); + aqo_enable_query ------------------ (1 row) @@ -149,7 +149,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_enable_class(bigint); +DROP FUNCTION aqo_enable_query(bigint); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 4 @@ -162,7 +162,7 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_disable_class(hash bigint) +CREATE FUNCTION aqo_disable_query(hash bigint) RETURNS VOID AS $$ BEGIN @@ -171,9 +171,9 @@ $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_disable_class" already exists with same argument types +ERROR: function "aqo_disable_query" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_disable_class(hash bigint) +CREATE OR REPLACE FUNCTION aqo_disable_query(hash bigint) RETURNS VOID AS $$ BEGIN @@ -181,8 +181,8 @@ BEGIN END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_disable_class(42); - aqo_disable_class +SELECT aqo_disable_query(42); + aqo_disable_query ------------------- (1 row) @@ -195,7 +195,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_disable_class(bigint); +DROP FUNCTION aqo_disable_query(bigint); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 5 diff --git a/expected/gucs.out b/expected/gucs.out index bbfd8001..7528c67b 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -93,7 +93,7 @@ SELECT obj_description('aqo_reset'::regproc::oid); List of functions Schema | Name | Result data type | Argument data types | Type --------+-------------+------------------+-----------------------------------+------ - public | aqo_cleanup | record | OUT nfs integer, OUT nfss integer | func + public | aqo_cleanup | SETOF record | OUT nfs integer, OUT nfss integer | func (1 row) \df aqo_reset diff --git a/expected/relocatable.out b/expected/relocatable.out index 949896f6..5fcf06e6 100644 --- a/expected/relocatable.out +++ b/expected/relocatable.out @@ -80,9 +80,9 @@ ORDER BY (md5(query_text)) /* * Below, we should check each UI function */ -SELECT aqo_disable_class(id) FROM ( +SELECT aqo_disable_query(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; - aqo_disable_class + aqo_disable_query ------------------- @@ -93,13 +93,13 @@ ORDER BY (learn_aqo, use_aqo, auto_tuning); learn_aqo | use_aqo | auto_tuning -----------+---------+------------- f | f | f - f | f | f - f | f | f + t | t | f + t | t | f (3 rows) -SELECT aqo_enable_class(id) FROM ( +SELECT aqo_enable_query(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; - aqo_enable_class + aqo_enable_query ------------------ diff --git a/sql/aqo_CVE-2020-14350.sql b/sql/aqo_CVE-2020-14350.sql index 75833223..1b36b50b 100644 --- a/sql/aqo_CVE-2020-14350.sql +++ b/sql/aqo_CVE-2020-14350.sql @@ -103,7 +103,7 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_enable_class(hash bigint) +CREATE FUNCTION aqo_enable_query(hash bigint) RETURNS VOID AS $$ BEGIN @@ -115,7 +115,7 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_enable_class(hash bigint) +CREATE OR REPLACE FUNCTION aqo_enable_query(hash bigint) RETURNS VOID AS $$ BEGIN @@ -124,13 +124,13 @@ END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_enable_class(42); +SELECT aqo_enable_query(42); SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_enable_class(bigint); +DROP FUNCTION aqo_enable_query(bigint); DROP EXTENSION IF EXISTS aqo; -- Test 4 @@ -140,7 +140,7 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_disable_class(hash bigint) +CREATE FUNCTION aqo_disable_query(hash bigint) RETURNS VOID AS $$ BEGIN @@ -152,7 +152,7 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_disable_class(hash bigint) +CREATE OR REPLACE FUNCTION aqo_disable_query(hash bigint) RETURNS VOID AS $$ BEGIN @@ -161,13 +161,13 @@ END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_disable_class(42); +SELECT aqo_disable_query(42); SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_disable_class(bigint); +DROP FUNCTION aqo_disable_query(bigint); DROP EXTENSION IF EXISTS aqo; -- Test 5 diff --git a/sql/relocatable.sql b/sql/relocatable.sql index 780c385e..e8cc57c3 100644 --- a/sql/relocatable.sql +++ b/sql/relocatable.sql @@ -39,11 +39,11 @@ ORDER BY (md5(query_text)) /* * Below, we should check each UI function */ -SELECT aqo_disable_class(id) FROM ( +SELECT aqo_disable_query(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries ORDER BY (learn_aqo, use_aqo, auto_tuning); -SELECT aqo_enable_class(id) FROM ( +SELECT aqo_enable_query(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries ORDER BY (learn_aqo, use_aqo, auto_tuning); diff --git a/storage.c b/storage.c index a42b0bee..d81197ac 100644 --- a/storage.c +++ b/storage.c @@ -96,8 +96,8 @@ PG_FUNCTION_INFO_V1(aqo_query_stat); PG_FUNCTION_INFO_V1(aqo_query_texts); PG_FUNCTION_INFO_V1(aqo_data); PG_FUNCTION_INFO_V1(aqo_queries); -PG_FUNCTION_INFO_V1(aqo_enable_class); -PG_FUNCTION_INFO_V1(aqo_disable_class); +PG_FUNCTION_INFO_V1(aqo_enable_query); +PG_FUNCTION_INFO_V1(aqo_disable_query); PG_FUNCTION_INFO_V1(aqo_queries_update); PG_FUNCTION_INFO_V1(aqo_reset); PG_FUNCTION_INFO_V1(aqo_cleanup); @@ -1904,7 +1904,7 @@ aqo_queries_reset(void) } Datum -aqo_enable_class(PG_FUNCTION_ARGS) +aqo_enable_query(PG_FUNCTION_ARGS) { uint64 queryid = (uint64) PG_GETARG_INT64(0); QueriesEntry *entry; @@ -1935,7 +1935,7 @@ aqo_enable_class(PG_FUNCTION_ARGS) } Datum -aqo_disable_class(PG_FUNCTION_ARGS) +aqo_disable_query(PG_FUNCTION_ARGS) { uint64 queryid = (uint64) PG_GETARG_INT64(0); QueriesEntry *entry; From c9fb67faa589f4a42efe8e1a95f1870777b6e8d6 Mon Sep 17 00:00:00 2001 From: Marina Polyakova Date: Thu, 24 Nov 2022 06:48:22 +0300 Subject: [PATCH 140/203] Revert last changes in aqo--1.4--1.5.sql They can only be made in the next version of aqo. Because the current one is already released. --- aqo--1.4--1.5.sql | 2 -- expected/gucs.out | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index 622bb7fa..3244a721 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -19,7 +19,6 @@ DROP TABLE public.aqo_data CASCADE; DROP TABLE public.aqo_queries CASCADE; DROP TABLE public.aqo_query_texts CASCADE; DROP TABLE public.aqo_query_stat CASCADE; -DROP FUNCTION invalidate_deactivated_queries_cache; /* @@ -145,7 +144,6 @@ COMMENT ON FUNCTION aqo_drop_class(bigint) IS -- Returns number of deleted rows from aqo_queries and aqo_data tables. -- CREATE OR REPLACE FUNCTION aqo_cleanup(OUT nfs integer, OUT nfss integer) -RETURNS SETOF record AS 'MODULE_PATHNAME', 'aqo_cleanup' LANGUAGE C STRICT VOLATILE; COMMENT ON FUNCTION aqo_cleanup() IS diff --git a/expected/gucs.out b/expected/gucs.out index 7528c67b..bbfd8001 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -93,7 +93,7 @@ SELECT obj_description('aqo_reset'::regproc::oid); List of functions Schema | Name | Result data type | Argument data types | Type --------+-------------+------------------+-----------------------------------+------ - public | aqo_cleanup | SETOF record | OUT nfs integer, OUT nfss integer | func + public | aqo_cleanup | record | OUT nfs integer, OUT nfss integer | func (1 row) \df aqo_reset From 1ff3190af1c94dfbcfe4eb6042918baa4c88de7d Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Fri, 7 Oct 2022 15:53:05 +0500 Subject: [PATCH 141/203] [PGPRO-7183] bring in line stable 13, 14, 15 Cherry-pick commit: 690776aad838c3318b8b1f6800a3367abc1c2fe1 Bugfix. AQO plan node must have reasonable set of serialization routines --- aqo.h | 1 - cardinality_estimation.c | 1 + hash.c | 18 ++++++------ path_utils.c | 63 ++++++++++++++++++++++------------------ utils.c | 12 -------- 5 files changed, 45 insertions(+), 50 deletions(-) diff --git a/aqo.h b/aqo.h index aa1e3964..4471d2b8 100644 --- a/aqo.h +++ b/aqo.h @@ -283,7 +283,6 @@ void aqo_ExecutorEnd(QueryDesc *queryDesc); extern void automatical_query_tuning(uint64 query_hash, struct StatEntry *stat); /* Utilities */ -extern int int64_compare(const void *a, const void *b); extern int int_cmp(const void *a, const void *b); extern int double_cmp(const void *a, const void *b); extern int *argsort(void *a, int n, size_t es, diff --git a/cardinality_estimation.c b/cardinality_estimation.c index 96cd2c70..9db202a1 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -103,6 +103,7 @@ predict_for_relation(List *clauses, List *selectivities, List *relsigns, result = OkNNr_predict(data, features); } } + #ifdef AQO_DEBUG_PRINT predict_debug_output(clauses, selectivities, relsigns, *fss, result); #endif diff --git a/hash.c b/hash.c index 96d402a1..a7f7f9c1 100644 --- a/hash.c +++ b/hash.c @@ -33,7 +33,7 @@ static int get_node_hash(Node *node); static int get_unsorted_unsafe_int_array_hash(int *arr, int len); static int get_unordered_int_list_hash(List *lst); -static int64 get_relations_hash(List *relsigns); +static int get_relations_hash(List *relsigns); static int get_fss_hash(int clauses_hash, int eclasses_hash, int relidslist_hash); @@ -297,7 +297,7 @@ get_fss_for_object(List *relsigns, List *clauselist, clauses_hash = get_int_array_hash(sorted_clauses, n - sh); eclasses_hash = get_int_array_hash(eclass_hash, nargs); - relations_hash = (int) get_relations_hash(relsigns); + relations_hash = get_relations_hash(relsigns); fss_hash = get_fss_hash(clauses_hash, eclasses_hash, relations_hash); if (nfeatures != NULL) @@ -465,26 +465,26 @@ get_fss_hash(int clauses_hash, int eclasses_hash, int relidslist_hash) * Hash is supposed to be relations-order-insensitive. * Each element of a list must have a String type, */ -static int64 +static int get_relations_hash(List *relsigns) { int nhashes = 0; - int64 *hashes = palloc(list_length(relsigns) * sizeof(uint64)); + uint32 *hashes = palloc(list_length(relsigns) * sizeof(uint32)); ListCell *lc; - int64 result; + int result; foreach(lc, relsigns) { - hashes[nhashes++] = *(int64 *) lfirst(lc); + hashes[nhashes++] = (uint32) lfirst_int(lc); } /* Sort the array to make query insensitive to input order of relations. */ - qsort(hashes, nhashes, sizeof(int64), int64_compare); + qsort(hashes, nhashes, sizeof(uint32), int_cmp); /* Make a final hash value */ - result = DatumGetInt64(hash_any_extended((const unsigned char *) hashes, - nhashes * sizeof(int64), 0)); + result = DatumGetInt32(hash_any((const unsigned char *) hashes, + nhashes * sizeof(uint32))); return result; } diff --git a/path_utils.c b/path_utils.c index a84d38fe..7e849df7 100644 --- a/path_utils.c +++ b/path_utils.c @@ -135,10 +135,10 @@ get_selectivities(PlannerInfo *root, /* * Based on the hashTupleDesc() routine */ -static uint64 +static uint32 hashTempTupleDesc(TupleDesc desc) { - uint64 s; + uint32 s; int i; s = hash_combine(0, hash_uint32(desc->natts)); @@ -146,11 +146,11 @@ hashTempTupleDesc(TupleDesc desc) for (i = 0; i < desc->natts; ++i) { const char *attname = NameStr(TupleDescAttr(desc, i)->attname); - uint64 s1; + uint32 s1; - s = hash_combine64(s, hash_uint32(TupleDescAttr(desc, i)->atttypid)); - s1 = hash_bytes_extended((const unsigned char *) attname, strlen(attname), 0); - s = hash_combine64(s, s1); + s = hash_combine(s, hash_uint32(TupleDescAttr(desc, i)->atttypid)); + s1 = hash_bytes((const unsigned char *) attname, strlen(attname)); + s = hash_combine(s, s1); } return s; } @@ -186,8 +186,8 @@ get_list_of_relids(PlannerInfo *root, Relids relids, RelSortOut *rels) if (!OidIsValid(entry->relid)) { - /* Invalid oid */ - hashes = lappend_uint64(hashes, (UINT64_MAX / 7)); + /* TODO: Explain this logic. */ + hashes = lappend_int(hashes, INT32_MAX / 3); continue; } @@ -212,7 +212,7 @@ get_list_of_relids(PlannerInfo *root, Relids relids, RelSortOut *rels) trel = relation_open(entry->relid, NoLock); tdesc = RelationGetDescr(trel); Assert(CheckRelationLockedByMe(trel, AccessShareLock, true)); - hashes = lappend_uint64(hashes, hashTempTupleDesc(tdesc)); + hashes = lappend_int(hashes, hashTempTupleDesc(tdesc)); relation_close(trel, NoLock); } else @@ -222,9 +222,9 @@ get_list_of_relids(PlannerInfo *root, Relids relids, RelSortOut *rels) get_namespace_name(get_rel_namespace(entry->relid)), relrewrite ? get_rel_name(relrewrite) : relname); - hashes = lappend_uint64(hashes, DatumGetInt64(hash_any_extended( + hashes = lappend_int(hashes, DatumGetInt32(hash_any( (unsigned char *) relname, - strlen(relname), 0))); + strlen(relname)))); hrels = lappend_oid(hrels, entry->relid); } @@ -591,7 +591,7 @@ AQOnodeCopy(struct ExtensibleNode *enew, const struct ExtensibleNode *eold) /* These lists couldn't contain AQO nodes. Use basic machinery */ new->rels = palloc(sizeof(RelSortOut)); new->rels->hrels = list_copy(old->rels->hrels); - new->rels->signatures = list_copy_uint64(old->rels->signatures); + new->rels->signatures = list_copy(old->rels->signatures); new->clauses = copyObject(old->clauses); new->grouping_exprs = copyObject(old->grouping_exprs); @@ -626,21 +626,24 @@ AQOnodeEqual(const struct ExtensibleNode *a, const struct ExtensibleNode *b) #define WRITE_FLOAT_FIELD(fldname,format) \ appendStringInfo(str, " :" CppAsString(fldname) " " format, node->fldname) +/* + * Serialize AQO plan node to a string. + * + * Right now we can't correctly serialize all fields of the node. Taking into + * account that this action needed when a plan moves into parallel workers or + * just during debugging, we serialize it only partially, just for debug + * purposes. + * Some extensions may manipulate by parts of serialized plan too. + */ static void AQOnodeOut(struct StringInfoData *str, const struct ExtensibleNode *enode) { AQOPlanNode *node = (AQOPlanNode *) enode; - Assert(0); - WRITE_BOOL_FIELD(had_path); - WRITE_NODE_FIELD(rels); - WRITE_NODE_FIELD(clauses); - WRITE_NODE_FIELD(selectivities); - WRITE_NODE_FIELD(grouping_exprs); - - WRITE_ENUM_FIELD(jointype, JoinType); - WRITE_FLOAT_FIELD(parallel_divisor, "%.5f"); - WRITE_BOOL_FIELD(was_parametrized); + node->had_path = false; + node->jointype = 0; + node->parallel_divisor = 1.0; + node->was_parametrized = false; /* For Adaptive optimization DEBUG purposes */ WRITE_INT_FIELD(fss); @@ -677,6 +680,11 @@ AQOnodeOut(struct StringInfoData *str, const struct ExtensibleNode *enode) (void) token; /* in case not used elsewhere */ \ local_node->fldname = nodeRead(NULL, 0) +/* + * Deserialize AQO plan node from a string to internal representation. + * + * Should work in coherence with AQOnodeOut(). + */ static void AQOnodeRead(struct ExtensibleNode *enode) { @@ -684,17 +692,16 @@ AQOnodeRead(struct ExtensibleNode *enode) const char *token; int length; - Assert(0); READ_BOOL_FIELD(had_path); - READ_NODE_FIELD(rels); - READ_NODE_FIELD(clauses); - READ_NODE_FIELD(selectivities); - READ_NODE_FIELD(grouping_exprs); - READ_ENUM_FIELD(jointype, JoinType); READ_FLOAT_FIELD(parallel_divisor); READ_BOOL_FIELD(was_parametrized); + local_node->rels = palloc0(sizeof(RelSortOut)); + local_node->clauses = NIL; + local_node->selectivities = NIL; + local_node->grouping_exprs = NIL; + /* For Adaptive optimization DEBUG purposes */ READ_INT_FIELD(fss); READ_FLOAT_FIELD(prediction); diff --git a/utils.c b/utils.c index 029af9ab..c44b3a64 100644 --- a/utils.c +++ b/utils.c @@ -28,18 +28,6 @@ static int argsort_cmp(const void *a, const void *b); * qsort comparator functions */ -/* int64 comparator for pg_qsort. */ -int -int64_compare(const void *va, const void *vb) -{ - int64 a = *((const int64 *) va); - int64 b = *((const int64 *) vb); - - if (a == b) - return 0; - return (a > b) ? 1 : -1; -} - /* * Function for qsorting an integer arrays */ From 5654c202e7da456f861c6552b330b68dc0134f16 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Fri, 7 Oct 2022 19:07:23 +0500 Subject: [PATCH 142/203] [PGPRO-7183] bring in line stable 13, 14, 15 Cherry-pick commit: 9595a940041ee2c3821e929dff3ef2ba8eae6b6a Fix the bug with serialization machinery. --- expected/feature_subspace.out | 7 +- expected/look_a_like.out | 7 +- expected/parallel_workers.out | 125 ++++++++++++++++++++++++++++++++++ expected/unsupported.out | 8 +-- path_utils.c | 36 +++++----- postprocessing.c | 34 +++++---- sql/parallel_workers.sql | 61 +++++++++++++++++ 7 files changed, 234 insertions(+), 44 deletions(-) create mode 100644 expected/parallel_workers.out create mode 100644 sql/parallel_workers.sql diff --git a/expected/feature_subspace.out b/expected/feature_subspace.out index 185bede0..a49be254 100644 --- a/expected/feature_subspace.out +++ b/expected/feature_subspace.out @@ -29,19 +29,17 @@ WHERE str NOT LIKE '%Memory%'; AQO not used Merge Cond: (a.x = b.x) -> Sort (actual rows=10 loops=1) - AQO not used Sort Key: a.x -> Seq Scan on a (actual rows=10 loops=1) AQO not used -> Sort (actual rows=11 loops=1) - AQO not used Sort Key: b.x -> Seq Scan on b (actual rows=100 loops=1) AQO not used Using aqo: true AQO mode: LEARN JOINS: 0 -(16 rows) +(14 rows) -- TODO: Using method of other classes neighbours we get a bad estimation. SELECT str AS result @@ -56,13 +54,12 @@ WHERE str NOT LIKE '%Memory%'; -> Seq Scan on b (actual rows=100 loops=1) AQO: rows=100, error=0% -> Hash (actual rows=10 loops=1) - AQO not used -> Seq Scan on a (actual rows=10 loops=1) AQO: rows=10, error=0% Using aqo: true AQO mode: LEARN JOINS: 0 -(12 rows) +(11 rows) -- Look into the reason: two JOINs from different classes have the same FSS. SELECT to_char(d1.targets[1], 'FM999.00') AS target FROM aqo_data d1 diff --git a/expected/look_a_like.out b/expected/look_a_like.out index 9cba2c48..f3918dbf 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -148,7 +148,6 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; Output: a.x, b.y Merge Cond: (a.x = b.y) -> Sort (actual rows=1000 loops=1) - AQO not used Output: a.x Sort Key: a.x -> Seq Scan on public.a (actual rows=1000 loops=1) @@ -156,7 +155,6 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; Output: a.x Filter: (a.x < 10) -> Sort (actual rows=99901 loops=1) - AQO not used Output: b.y Sort Key: b.y -> Seq Scan on public.b (actual rows=1000 loops=1) @@ -165,7 +163,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; Using aqo: true AQO mode: LEARN JOINS: 0 -(22 rows) +(20 rows) -- cardinality 100 in Seq Scan on a and Seq Scan on b SELECT str AS result @@ -215,7 +213,6 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' Output: a.x Filter: (a.x < 10) -> Hash (actual rows=0 loops=1) - AQO not used Output: b.y -> Seq Scan on public.b (actual rows=0 loops=1) AQO: rows=1, error=100% @@ -225,7 +222,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' Using aqo: true AQO mode: LEARN JOINS: 0 -(19 rows) +(18 rows) RESET enable_material; DROP TABLE a,b CASCADE; diff --git a/expected/parallel_workers.out b/expected/parallel_workers.out new file mode 100644 index 00000000..14e086c8 --- /dev/null +++ b/expected/parallel_workers.out @@ -0,0 +1,125 @@ +-- Specifically test AQO machinery for queries uses partial paths and executed +-- with parallel workers. +CREATE EXTENSION aqo; +-- Utility tool. Allow to filter system-dependent strings from explain output. +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('%s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; +SET aqo.join_threshold = 0; +SET aqo.mode = 'learn'; +SET aqo.show_details = true; +-- Be generous with a number parallel workers to test the machinery +SET max_parallel_workers = 64; +SET max_parallel_workers_per_gather = 64; +-- Enforce usage of parallel workers +SET parallel_setup_cost = 0.1; +SET parallel_tuple_cost = 0.0001; +CREATE TABLE t AS ( + SELECT x AS id, repeat('a', 512) AS payload FROM generate_series(1, 1E5) AS x +); +ANALYZE t; +-- Simple test. Check serialization machinery mostly. +SELECT count(*) FROM t WHERE id % 100 = 0; -- Learning stage + count +------- + 1000 +(1 row) + +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) + SELECT count(*) FROM t WHERE id % 100 = 0;') AS str +WHERE str NOT LIKE '%Worker%'; + str +-------------------------------------------------------------------- + Finalize Aggregate (actual rows=1 loops=1) + AQO not used + -> Gather (actual rows=3 loops=1) + AQO not used + -> Partial Aggregate (actual rows=1 loops=3) + AQO not used + -> Parallel Seq Scan on t (actual rows=333 loops=3) + AQO: rows=1000, error=0% + Filter: ((id % '100'::numeric) = '0'::numeric) + Rows Removed by Filter: 33000 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(13 rows) + +-- More complex query just to provoke errors +SELECT count(*) FROM + (SELECT id FROM t WHERE id % 100 = 0 GROUP BY (id)) AS q1, + (SELECT max(id) AS id, payload FROM t + WHERE id % 101 = 0 GROUP BY (payload)) AS q2 +WHERE q1.id = q2.id; -- Learning stage + count +------- + 0 +(1 row) + +-- XXX: Why grouping prediction isn't working here? +SELECT str FROM expln(' +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) FROM + (SELECT id FROM t WHERE id % 100 = 0 GROUP BY (id)) AS q1, + (SELECT max(id) AS id, payload FROM t + WHERE id % 101 = 0 GROUP BY (payload)) AS q2 +WHERE q1.id = q2.id;') AS str +WHERE str NOT LIKE '%Workers%' AND str NOT LIKE '%Sort Method%'; + str +-------------------------------------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + AQO not used + -> Merge Join (actual rows=0 loops=1) + AQO not used + Merge Cond: (q2.id = t_1.id) + -> Sort (actual rows=1 loops=1) + Sort Key: q2.id + -> Subquery Scan on q2 (actual rows=1 loops=1) + AQO not used + -> Finalize GroupAggregate (actual rows=1 loops=1) + AQO not used + Group Key: t.payload + -> Gather Merge (actual rows=3 loops=1) + AQO not used + -> Partial GroupAggregate (actual rows=1 loops=3) + AQO not used + Group Key: t.payload + -> Sort (actual rows=330 loops=3) + AQO not used + Sort Key: t.payload + -> Parallel Seq Scan on t (actual rows=330 loops=3) + AQO: rows=991, error=0% + Filter: ((id % '101'::numeric) = '0'::numeric) + Rows Removed by Filter: 33003 + -> Group (actual rows=1000 loops=1) + AQO not used + Group Key: t_1.id + -> Gather Merge (actual rows=1000 loops=1) + AQO not used + -> Group (actual rows=333 loops=3) + AQO not used + Group Key: t_1.id + -> Sort (actual rows=333 loops=3) + AQO not used + Sort Key: t_1.id + -> Parallel Seq Scan on t t_1 (actual rows=333 loops=3) + AQO: rows=991, error=-1% + Filter: ((id % '100'::numeric) = '0'::numeric) + Rows Removed by Filter: 33000 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(42 rows) + +RESET parallel_tuple_cost; +RESET parallel_setup_cost; +RESET max_parallel_workers; +RESET max_parallel_workers_per_gather; +DROP TABLE t; +DROP FUNCTION expln; +DROP EXTENSION aqo; diff --git a/expected/unsupported.out b/expected/unsupported.out index dbdc1f7b..5cdcdc23 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -349,7 +349,6 @@ SELECT count(*) FROM Filter: (x <> t_1.x) Rows Removed by Filter: 50 -> Hash (actual rows=851 loops=1) - AQO not used -> Seq Scan on t (actual rows=851 loops=1) AQO: rows=851, error=0% Filter: (((x % 3))::numeric < (SubPlan 1)) @@ -364,7 +363,7 @@ SELECT count(*) FROM Using aqo: true AQO mode: LEARN JOINS: 1 -(31 rows) +(30 rows) -- Two identical subplans in a clause EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) @@ -527,12 +526,11 @@ WHERE str NOT LIKE '%Heap Blocks%'; Filter: (t.x < 3) Rows Removed by Filter: 300 -> Bitmap Index Scan on ind2 (actual rows=350 loops=1) - AQO not used Index Cond: (mod(t.x, 3) = 1) Using aqo: true AQO mode: LEARN JOINS: 0 -(14 rows) +(13 rows) -- Best choice is ... ANALYZE t; @@ -561,7 +559,7 @@ ORDER BY (md5(query_text),error) DESC; -------+------------------------------------------------------------------------------------------------ 0.768 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; 0.070 | SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; - 1.416 | SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + 0.000 | SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; 0.000 | SELECT * FROM + | (SELECT * FROM t WHERE x < 0) AS t0 + | JOIN + diff --git a/path_utils.c b/path_utils.c index 7e849df7..f601241b 100644 --- a/path_utils.c +++ b/path_utils.c @@ -67,9 +67,7 @@ create_aqo_plan_node() /* * Extract an AQO node from the plan private field. - * If no one node was found, return pointer to the default value or allocate new - * node (with default value) according to 'create' field. - * Can't return NULL value at all. + * If no one node was found, return pointer to the default value or return NULL. */ AQOPlanNode * get_aqo_plan_node(Plan *plan, bool create) @@ -94,7 +92,7 @@ get_aqo_plan_node(Plan *plan, bool create) if (node == NULL) { if (!create) - return &DefaultAQOPlanNode; + return NULL; node = create_aqo_plan_node(); plan->ext_nodes = lappend(plan->ext_nodes, node); @@ -497,9 +495,14 @@ is_appropriate_path(Path *path) } /* - * Converts path info into plan node for collecting it after query execution. + * Add AQO data into the plan node, if necessary. + * + * The necesssary case is when AQO is learning on this query, used for a + * prediction (and we will need the data to show prediction error at the end) or + * just to gather a plan statistics. * Don't switch here to any AQO-specific memory contexts, because we should - * store AQO prediction in the same context, as the plan. + * store AQO prediction in the same context, as the plan. So, explicitly free + * all unneeded data. */ void aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) @@ -511,7 +514,8 @@ aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) if (prev_create_plan_hook) prev_create_plan_hook(root, src, dest); - if (!query_context.use_aqo && !query_context.learn_aqo) + if (!query_context.use_aqo && !query_context.learn_aqo && + !query_context.collect_stat) return; is_join_path = (src->type == T_NestPath || src->type == T_MergePath || @@ -568,6 +572,11 @@ aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) } else { + /* + * In the case of forced stat gathering AQO must store fss as well as + * parallel divisor. Negative predicted cardinality field will be a sign + * that it is not a prediction, just statistics. + */ node->prediction = src->parent->predicted_cardinality; node->fss = src->parent->fss_hash; } @@ -640,11 +649,6 @@ AQOnodeOut(struct StringInfoData *str, const struct ExtensibleNode *enode) { AQOPlanNode *node = (AQOPlanNode *) enode; - node->had_path = false; - node->jointype = 0; - node->parallel_divisor = 1.0; - node->was_parametrized = false; - /* For Adaptive optimization DEBUG purposes */ WRITE_INT_FIELD(fss); WRITE_FLOAT_FIELD(prediction, "%.0f"); @@ -692,10 +696,10 @@ AQOnodeRead(struct ExtensibleNode *enode) const char *token; int length; - READ_BOOL_FIELD(had_path); - READ_ENUM_FIELD(jointype, JoinType); - READ_FLOAT_FIELD(parallel_divisor); - READ_BOOL_FIELD(was_parametrized); + local_node->had_path = false; + local_node->jointype = 0; + local_node->parallel_divisor = 1.0; + local_node->was_parametrized = false; local_node->rels = palloc0(sizeof(RelSortOut)); local_node->clauses = NIL; diff --git a/postprocessing.c b/postprocessing.c index 619d1c40..abbdcffd 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -109,13 +109,14 @@ learn_agg_sample(aqo_obj_stat *ctx, RelSortOut *rels, * Learn 'not executed' nodes only once, if no one another knowledge exists * for current feature subspace. */ - if (notExecuted && aqo_node->prediction > 0.) + if (notExecuted && aqo_node && aqo_node->prediction > 0.) return; target = log(learned); child_fss = get_fss_for_object(rels->signatures, ctx->clauselist, NIL, NULL,NULL); - fss = get_grouped_exprs_hash(child_fss, aqo_node->grouping_exprs); + fss = get_grouped_exprs_hash(child_fss, + aqo_node ? aqo_node->grouping_exprs : NIL); /* Critical section */ atomic_fss_learn_step(fs, fss, data, NULL, @@ -144,13 +145,13 @@ learn_sample(aqo_obj_stat *ctx, RelSortOut *rels, ctx->selectivities, &ncols, &features); /* Only Agg nodes can have non-empty a grouping expressions list. */ - Assert(!IsA(plan, Agg) || aqo_node->grouping_exprs != NIL); + Assert(!IsA(plan, Agg) || !aqo_node || aqo_node->grouping_exprs != NIL); /* * Learn 'not executed' nodes only once, if no one another knowledge exists * for current feature subspace. */ - if (notExecuted && aqo_node->prediction > 0) + if (notExecuted && aqo_node && aqo_node->prediction > 0) return; data = OkNNr_allocate(ncols); @@ -301,18 +302,18 @@ learn_subplan_recurse(PlanState *p, aqo_obj_stat *ctx) static bool should_learn(PlanState *ps, AQOPlanNode *node, aqo_obj_stat *ctx, - double predicted, double *nrows, double *rfactor) + double predicted, double nrows, double *rfactor) { if (ctx->isTimedOut) { - if (ctx->learn && *nrows > predicted * 1.2) + if (ctx->learn && nrows > predicted * 1.2) { /* This node s*/ if (aqo_show_details) elog(NOTICE, "[AQO] Learn on a plan node ("UINT64_FORMAT", %d), " "predicted rows: %.0lf, updated prediction: %.0lf", - query_context.query_hash, node->fss, predicted, *nrows); + query_context.query_hash, node->fss, predicted, nrows); *rfactor = RELIABILITY_MIN; return true; @@ -324,11 +325,11 @@ should_learn(PlanState *ps, AQOPlanNode *node, aqo_obj_stat *ctx, { /* This is much more reliable data. So we can correct our prediction. */ if (ctx->learn && aqo_show_details && - fabs(*nrows - predicted) / predicted > 0.2) + fabs(nrows - predicted) / predicted > 0.2) elog(NOTICE, "[AQO] Learn on a finished plan node ("UINT64_FORMAT", %d), " "predicted rows: %.0lf, updated prediction: %.0lf", - query_context.query_hash, node->fss, predicted, *nrows); + query_context.query_hash, node->fss, predicted, nrows); *rfactor = 0.9 * (RELIABILITY_MAX - RELIABILITY_MIN); return true; @@ -369,7 +370,12 @@ learnOnPlanState(PlanState *p, void *context) /* If something goes wrong, return quickly. */ return true; - aqo_node = get_aqo_plan_node(p->plan, false); + if ((aqo_node = get_aqo_plan_node(p->plan, false)) == NULL) + /* + * Skip the node even for error calculation. It can be incorrect in the + * case of parallel workers (parallel_divisor not known). + */ + goto end; /* * Compute real value of rows, passed through this node. Summarize rows @@ -475,7 +481,7 @@ learnOnPlanState(PlanState *p, void *context) /* * Some nodes inserts after planning step (See T_Hash node type). - * In this case we have'nt AQO prediction and fss record. + * In this case we haven't AQO prediction and fss record. */ if (aqo_node->had_path) { @@ -505,7 +511,7 @@ learnOnPlanState(PlanState *p, void *context) Assert(predicted >= 1. && learn_rows >= 1.); - if (should_learn(p, aqo_node, ctx, predicted, &learn_rows, &rfactor)) + if (should_learn(p, aqo_node, ctx, predicted, learn_rows, &rfactor)) { if (IsA(p, AggState)) learn_agg_sample(&SubplanCtx, @@ -521,6 +527,7 @@ learnOnPlanState(PlanState *p, void *context) } } +end: ctx->clauselist = list_concat(ctx->clauselist, SubplanCtx.clauselist); ctx->selectivities = list_concat(ctx->selectivities, SubplanCtx.selectivities); @@ -931,7 +938,8 @@ print_node_explain(ExplainState *es, PlanState *ps, Plan *plan) if (IsQueryDisabled() || !plan || es->format != EXPLAIN_FORMAT_TEXT) return; - aqo_node = get_aqo_plan_node(plan, false); + if ((aqo_node = get_aqo_plan_node(plan, false)) == NULL) + return; if (!aqo_show_details || !ps) goto explain_end; diff --git a/sql/parallel_workers.sql b/sql/parallel_workers.sql new file mode 100644 index 00000000..3fbccb48 --- /dev/null +++ b/sql/parallel_workers.sql @@ -0,0 +1,61 @@ +-- Specifically test AQO machinery for queries uses partial paths and executed +-- with parallel workers. + +CREATE EXTENSION aqo; + +-- Utility tool. Allow to filter system-dependent strings from explain output. +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('%s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; + +SET aqo.join_threshold = 0; +SET aqo.mode = 'learn'; +SET aqo.show_details = true; + +-- Be generous with a number parallel workers to test the machinery +SET max_parallel_workers = 64; +SET max_parallel_workers_per_gather = 64; +-- Enforce usage of parallel workers +SET parallel_setup_cost = 0.1; +SET parallel_tuple_cost = 0.0001; + +CREATE TABLE t AS ( + SELECT x AS id, repeat('a', 512) AS payload FROM generate_series(1, 1E5) AS x +); +ANALYZE t; + +-- Simple test. Check serialization machinery mostly. +SELECT count(*) FROM t WHERE id % 100 = 0; -- Learning stage +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) + SELECT count(*) FROM t WHERE id % 100 = 0;') AS str +WHERE str NOT LIKE '%Worker%'; + +-- More complex query just to provoke errors +SELECT count(*) FROM + (SELECT id FROM t WHERE id % 100 = 0 GROUP BY (id)) AS q1, + (SELECT max(id) AS id, payload FROM t + WHERE id % 101 = 0 GROUP BY (payload)) AS q2 +WHERE q1.id = q2.id; -- Learning stage +-- XXX: Why grouping prediction isn't working here? +SELECT str FROM expln(' +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) FROM + (SELECT id FROM t WHERE id % 100 = 0 GROUP BY (id)) AS q1, + (SELECT max(id) AS id, payload FROM t + WHERE id % 101 = 0 GROUP BY (payload)) AS q2 +WHERE q1.id = q2.id;') AS str +WHERE str NOT LIKE '%Workers%' AND str NOT LIKE '%Sort Method%'; + + +RESET parallel_tuple_cost; +RESET parallel_setup_cost; +RESET max_parallel_workers; +RESET max_parallel_workers_per_gather; +DROP TABLE t; +DROP FUNCTION expln; +DROP EXTENSION aqo; From 7b5c7bccf8e153bdbaa2d8d20d566d6423bb8466 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Mon, 10 Oct 2022 16:41:39 +0500 Subject: [PATCH 143/203] [PGPRO-7183] bring in line stable 13, 14, 15 Cherry-pick commit: e49f2fd29d075f8742d1103d49b0f94ef8ad55b8 Bugfix. Incorrect pointer shift during reading from learn_cache. --- learn_cache.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/learn_cache.c b/learn_cache.c index 74b72249..67590e5d 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -127,6 +127,12 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids) } } + /* + * Kludge code. But we should rewrite this code because now all knowledge + * base lives in non-transactional shared memory. + */ + ptr = (char *) hdr + sizeof(dsm_block_hdr) + (sizeof(double) * data->cols * aqo_K); + /* copy targets into DSM storage */ memcpy(ptr, data->targets, sizeof(double) * hdr->rows); ptr += sizeof(double) * aqo_K; From dcf7c0e666d1ef9ad7d11ea4436d019da3267ba9 Mon Sep 17 00:00:00 2001 From: Alexander Pyhalov Date: Fri, 7 Oct 2022 07:58:59 +0300 Subject: [PATCH 144/203] [PGPRO-7183] bring in line stable 13, 14, 15 Cherry-pick commit: 58ea474bd00265602c03b2b0051d3d2893fd675f Extract info from a Foreign Join plan node. --- expected/aqo_fdw.out | 43 +++++++++++++++++++++++++++++--- path_utils.c | 58 +++++++++++++++++++++++++++++++++++++++++--- sql/aqo_fdw.sql | 19 +++++++++++++++ 3 files changed, 114 insertions(+), 6 deletions(-) diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out index 36af3bd6..dabda707 100644 --- a/expected/aqo_fdw.out +++ b/expected/aqo_fdw.out @@ -104,7 +104,7 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) QUERY PLAN -------------------------------------------------------------------------------------------------------- Foreign Scan (actual rows=1 loops=1) - AQO not used + AQO: rows=1, error=0% Output: a.x, b.x Relations: (public.frgn a) INNER JOIN (public.frgn b) Remote SQL: SELECT r1.x, r2.x FROM (public.local r1 INNER JOIN public.local r2 ON (((r1.x = r2.x)))) @@ -113,6 +113,39 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) JOINS: 0 (8 rows) +CREATE TABLE local_a(aid int primary key, aval text); +CREATE TABLE local_b(bid int primary key, aid int references local_a(aid), bval text); +INSERT INTO local_a SELECT i, 'val_' || i FROM generate_series(1,100) i; +INSERT INTO local_b SELECT i, mod((i+random()*10)::numeric, 10) + 1, 'val_' || i FROM generate_series(1,1000) i; +ANALYZE local_a, local_b; +CREATE FOREIGN TABLE frgn_a(aid int, aval text) SERVER loopback OPTIONS (table_name 'local_a'); +CREATE FOREIGN TABLE frgn_b(bid int, aid int, bval text) SERVER loopback OPTIONS (table_name 'local_b'); +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * from frgn_a AS a, frgn_b AS b +WHERE a.aid = b.aid AND b.bval like 'val%'; + QUERY PLAN +----------------------------------------------- + Foreign Scan (actual rows=1000 loops=1) + AQO not used + Relations: (frgn_a a) INNER JOIN (frgn_b b) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(6 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * from frgn_a AS a, frgn_b AS b +WHERE a.aid = b.aid AND b.bval like 'val%'; + QUERY PLAN +----------------------------------------------- + Foreign Scan (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Relations: (frgn_a a) INNER JOIN (frgn_b b) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(6 rows) + -- TODO: Non-mergejoinable join condition. EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM frgn AS a, frgn AS b WHERE a.xfdwid, FDW_MISSING_OK); + if (!fdw || !fdw->fdwname) + return false; + + if (strcmp(fdw->fdwname, "postgres_fdw") != 0) + return false; + + return true; +} + /* * Extract an AQO node from the plan private field. * If no one node was found, return pointer to the default value or return NULL. @@ -519,7 +546,8 @@ aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) return; is_join_path = (src->type == T_NestPath || src->type == T_MergePath || - src->type == T_HashPath); + src->type == T_HashPath || + (src->type == T_ForeignPath && IS_JOIN_REL(src->parent))); node = get_aqo_plan_node(plan, true); @@ -535,8 +563,32 @@ aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) if (is_join_path) { - node->clauses = aqo_get_clauses(root, ((JoinPath *) src)->joinrestrictinfo); - node->jointype = ((JoinPath *) src)->jointype; + if (IsA(src, ForeignPath)) + { + PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) src->parent->fdw_private; + List *restrictclauses = NIL; + + if (!fpinfo) + return; + + /* We have to ensure that this is postgres_fdw ForeignPath */ + if (!is_postgres_fdw_server(src->parent->serverid)) + return; + + restrictclauses = list_concat(restrictclauses, fpinfo->joinclauses); + restrictclauses = list_concat(restrictclauses, fpinfo->remote_conds); + restrictclauses = list_concat(restrictclauses, fpinfo->local_conds); + + node->clauses = aqo_get_clauses(root, restrictclauses); + node->jointype = fpinfo->jointype; + + list_free(restrictclauses); + } + else + { + node->clauses = aqo_get_clauses(root, ((JoinPath *) src)->joinrestrictinfo); + node->jointype = ((JoinPath *) src)->jointype; + } } else if (IsA(src, AggPath)) /* Aggregation node must store grouping clauses. */ diff --git a/sql/aqo_fdw.sql b/sql/aqo_fdw.sql index 2d71a20d..7ede8b03 100644 --- a/sql/aqo_fdw.sql +++ b/sql/aqo_fdw.sql @@ -60,6 +60,23 @@ SELECT str FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; +CREATE TABLE local_a(aid int primary key, aval text); +CREATE TABLE local_b(bid int primary key, aid int references local_a(aid), bval text); +INSERT INTO local_a SELECT i, 'val_' || i FROM generate_series(1,100) i; +INSERT INTO local_b SELECT i, mod((i+random()*10)::numeric, 10) + 1, 'val_' || i FROM generate_series(1,1000) i; +ANALYZE local_a, local_b; + +CREATE FOREIGN TABLE frgn_a(aid int, aval text) SERVER loopback OPTIONS (table_name 'local_a'); +CREATE FOREIGN TABLE frgn_b(bid int, aid int, bval text) SERVER loopback OPTIONS (table_name 'local_b'); + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * from frgn_a AS a, frgn_b AS b +WHERE a.aid = b.aid AND b.bval like 'val%'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * from frgn_a AS a, frgn_b AS b +WHERE a.aid = b.aid AND b.bval like 'val%'; + -- TODO: Non-mergejoinable join condition. EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM frgn AS a, frgn AS b WHERE a.x Date: Thu, 13 Oct 2022 16:25:01 +0300 Subject: [PATCH 145/203] [PGPRO-7183] bring in line stable 13, 14, 15 Cherry-pick commit: b3bb11f84f45e80896f24e696da1f5a748d25948 Add tests on partitioned tables with foreign partitions. --- expected/aqo_fdw.out | 83 ++++++++++++++++++++++++++++++++++++++++++++ sql/aqo_fdw.sql | 44 ++++++++++++++++++++++- 2 files changed, 126 insertions(+), 1 deletion(-) diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out index dabda707..b3e21186 100644 --- a/expected/aqo_fdw.out +++ b/expected/aqo_fdw.out @@ -146,6 +146,89 @@ WHERE a.aid = b.aid AND b.bval like 'val%'; JOINS: 0 (6 rows) +-- Partitioned join over foreign tables +set enable_partitionwise_join = on; +ALTER SERVER loopback OPTIONS (ADD fdw_tuple_cost '1.0'); +CREATE TABLE local_main_p0(aid int, aval text); +CREATE TABLE local_main_p1(aid int, aval text); +CREATE TABLE main (aid int, aval text) PARTITION BY HASH(aid); +CREATE FOREIGN TABLE main_p0 PARTITION OF main FOR VALUES WITH (MODULUS 3, REMAINDER 0) + SERVER loopback OPTIONS (table_name 'local_main_p0'); +CREATE FOREIGN TABLE main_p1 PARTITION OF main FOR VALUES WITH (MODULUS 3, REMAINDER 1) + SERVER loopback OPTIONS (table_name 'local_main_p1'); +CREATE TABLE main_p2 PARTITION OF main FOR VALUES WITH (MODULUS 3, REMAINDER 2); +CREATE TABLE local_ref_p0(bid int, aid int, bval text); +CREATE TABLE local_ref_p1(bid int, aid int, bval text); +CREATE TABLE ref (bid int, aid int, bval text) PARTITION BY HASH(aid); +CREATE FOREIGN TABLE ref_p0 PARTITION OF ref FOR VALUES WITH (MODULUS 3, REMAINDER 0) + SERVER loopback OPTIONS (table_name 'local_ref_p0'); +CREATE FOREIGN TABLE ref_p1 PARTITION OF ref FOR VALUES WITH (MODULUS 3, REMAINDER 1) + SERVER loopback OPTIONS (table_name 'local_ref_p1'); +CREATE TABLE ref_p2 PARTITION OF ref FOR VALUES WITH (MODULUS 3, REMAINDER 2); +INSERT INTO main SELECT i, 'val_' || i FROM generate_series(1,100) i; +INSERT INTO ref SELECT i, mod(i, 10) + 1, 'val_' || i FROM generate_series(1,1000) i; +ANALYZE local_main_p0, local_main_p1, main_p2; +ANALYZE local_ref_p0, local_ref_p1, ref_p2; +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * from main AS a, ref AS b +WHERE a.aid = b.aid AND b.bval like 'val%'; + QUERY PLAN +-------------------------------------------------------------------- + Append (actual rows=1000 loops=1) + AQO not used + -> Foreign Scan (actual rows=400 loops=1) + AQO not used + Relations: (main_p0 a_1) INNER JOIN (ref_p0 b_1) + -> Foreign Scan (actual rows=300 loops=1) + AQO not used + Relations: (main_p1 a_2) INNER JOIN (ref_p1 b_2) + -> Hash Join (actual rows=300 loops=1) + AQO not used + Hash Cond: (b_3.aid = a_3.aid) + -> Seq Scan on ref_p2 b_3 (actual rows=300 loops=1) + AQO not used + Filter: (bval ~~ 'val%'::text) + -> Hash (actual rows=38 loops=1) + Buckets: 1024 Batches: 1 Memory Usage: 10kB + -> Seq Scan on main_p2 a_3 (actual rows=38 loops=1) + AQO not used + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(21 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * from main AS a, ref AS b +WHERE a.aid = b.aid AND b.bval like 'val%'; + QUERY PLAN +-------------------------------------------------------------------- + Append (actual rows=1000 loops=1) + AQO not used + -> Foreign Scan (actual rows=400 loops=1) + AQO: rows=400, error=0% + Relations: (main_p0 a_1) INNER JOIN (ref_p0 b_1) + -> Foreign Scan (actual rows=300 loops=1) + AQO: rows=300, error=0% + Relations: (main_p1 a_2) INNER JOIN (ref_p1 b_2) + -> Hash Join (actual rows=300 loops=1) + AQO: rows=300, error=0% + Hash Cond: (b_3.aid = a_3.aid) + -> Seq Scan on ref_p2 b_3 (actual rows=300 loops=1) + AQO: rows=300, error=0% + Filter: (bval ~~ 'val%'::text) + -> Hash (actual rows=38 loops=1) + Buckets: 1024 Batches: 1 Memory Usage: 10kB + -> Seq Scan on main_p2 a_3 (actual rows=38 loops=1) + AQO: rows=38, error=0% + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(21 rows) + +DROP TABLE main, local_main_p0, local_main_p1; +DROP TABLE ref, local_ref_p0, local_ref_p1; +ALTER SERVER loopback OPTIONS (DROP fdw_tuple_cost); +reset enable_partitionwise_join; -- TODO: Non-mergejoinable join condition. EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM frgn AS a, frgn AS b WHERE a.x Date: Fri, 14 Oct 2022 14:32:22 +0500 Subject: [PATCH 146/203] [PGPRO-7183] bring in line stable 13, 14, 15 Cherry-pick commit: 62b39450e5fa62d704d5e5b77e3e0e5e4292f587 restore_selectivities: avoid links to restrictinfo selectivity field: it can be freed or changed externally --- postprocessing.c | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/postprocessing.c b/postprocessing.c index abbdcffd..3bccce7c 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -176,7 +176,6 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, int nargs; int *args_hash; int *eclass_hash; - double *cur_sel; int cur_hash; int cur_relid; @@ -191,30 +190,29 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, foreach(l, clauselist) { RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); + Selectivity *cur_sel = NULL; - cur_sel = NULL; if (parametrized_sel) { cur_hash = get_clause_hash(rinfo->clause, nargs, args_hash, eclass_hash); cur_sel = selectivity_cache_find_global_relid(cur_hash, cur_relid); - if (cur_sel == NULL) - { - if (join_type == JOIN_INNER) - cur_sel = &rinfo->norm_selec; - else - cur_sel = &rinfo->outer_selec; - } } - else if (join_type == JOIN_INNER) - cur_sel = &rinfo->norm_selec; - else - cur_sel = &rinfo->outer_selec; - if (*cur_sel < 0) - *cur_sel = 0; + if (cur_sel == NULL) + { + cur_sel = palloc(sizeof(double)); + + if (join_type == JOIN_INNER) + *cur_sel = rinfo->norm_selec; + else + *cur_sel = rinfo->outer_selec; + + if (*cur_sel < 0) + *cur_sel = 0; + } - Assert(cur_sel > 0); + Assert(*cur_sel >= 0); lst = lappend(lst, cur_sel); } From aea4aec9ad4894bb58bda7f85a6bfb5d4c227baf Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Tue, 25 Oct 2022 22:08:03 +0300 Subject: [PATCH 147/203] [PGPRO-7183] bring in line stable 13, 14, 15 Cherry-pick commit: 961bdcf44d4e3d3394f4915bae73a61bcf3bfbe1 Fix aqo_fdw output test. --- expected/aqo_fdw.out | 20 ++++++++++++-------- sql/aqo_fdw.sql | 10 ++++++++-- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out index b3e21186..70219b58 100644 --- a/expected/aqo_fdw.out +++ b/expected/aqo_fdw.out @@ -169,10 +169,13 @@ INSERT INTO main SELECT i, 'val_' || i FROM generate_series(1,100) i; INSERT INTO ref SELECT i, mod(i, 10) + 1, 'val_' || i FROM generate_series(1,1000) i; ANALYZE local_main_p0, local_main_p1, main_p2; ANALYZE local_ref_p0, local_ref_p1, ref_p2; +SELECT str AS result +FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * from main AS a, ref AS b -WHERE a.aid = b.aid AND b.bval like 'val%'; - QUERY PLAN +WHERE a.aid = b.aid AND b.bval like ''val%''') AS str +WHERE str NOT LIKE '%Memory%'; + result -------------------------------------------------------------------- Append (actual rows=1000 loops=1) AQO not used @@ -189,18 +192,20 @@ WHERE a.aid = b.aid AND b.bval like 'val%'; AQO not used Filter: (bval ~~ 'val%'::text) -> Hash (actual rows=38 loops=1) - Buckets: 1024 Batches: 1 Memory Usage: 10kB -> Seq Scan on main_p2 a_3 (actual rows=38 loops=1) AQO not used Using aqo: true AQO mode: LEARN JOINS: 1 -(21 rows) +(20 rows) +SELECT str AS result +FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * from main AS a, ref AS b -WHERE a.aid = b.aid AND b.bval like 'val%'; - QUERY PLAN +WHERE a.aid = b.aid AND b.bval like ''val%''') AS str +WHERE str NOT LIKE '%Memory%'; + result -------------------------------------------------------------------- Append (actual rows=1000 loops=1) AQO not used @@ -217,13 +222,12 @@ WHERE a.aid = b.aid AND b.bval like 'val%'; AQO: rows=300, error=0% Filter: (bval ~~ 'val%'::text) -> Hash (actual rows=38 loops=1) - Buckets: 1024 Batches: 1 Memory Usage: 10kB -> Seq Scan on main_p2 a_3 (actual rows=38 loops=1) AQO: rows=38, error=0% Using aqo: true AQO mode: LEARN JOINS: 1 -(21 rows) +(20 rows) DROP TABLE main, local_main_p0, local_main_p1; DROP TABLE ref, local_ref_p0, local_ref_p1; diff --git a/sql/aqo_fdw.sql b/sql/aqo_fdw.sql index 7a921d17..efa275f4 100644 --- a/sql/aqo_fdw.sql +++ b/sql/aqo_fdw.sql @@ -107,13 +107,19 @@ INSERT INTO ref SELECT i, mod(i, 10) + 1, 'val_' || i FROM generate_series(1,100 ANALYZE local_main_p0, local_main_p1, main_p2; ANALYZE local_ref_p0, local_ref_p1, ref_p2; +SELECT str AS result +FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * from main AS a, ref AS b -WHERE a.aid = b.aid AND b.bval like 'val%'; +WHERE a.aid = b.aid AND b.bval like ''val%''') AS str +WHERE str NOT LIKE '%Memory%'; +SELECT str AS result +FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * from main AS a, ref AS b -WHERE a.aid = b.aid AND b.bval like 'val%'; +WHERE a.aid = b.aid AND b.bval like ''val%''') AS str +WHERE str NOT LIKE '%Memory%'; DROP TABLE main, local_main_p0, local_main_p1; DROP TABLE ref, local_ref_p0, local_ref_p1; From 235a8d6ea4b1ca0e354bf405a454a24a216a5a9e Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Thu, 17 Nov 2022 18:05:22 +1000 Subject: [PATCH 148/203] [PGPRO-7183] bring in line stable 13, 14, 15 Cherry-pick commit: 763b45b3d3ecfb78977947eb53a57485b6046eaa Suppress a line of EXPLAIN in parallel_workers test which contains substring --- expected/parallel_workers.out | 7 +++---- sql/parallel_workers.sql | 3 ++- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/expected/parallel_workers.out b/expected/parallel_workers.out index 14e086c8..fca67006 100644 --- a/expected/parallel_workers.out +++ b/expected/parallel_workers.out @@ -69,7 +69,8 @@ SELECT count(*) FROM (SELECT max(id) AS id, payload FROM t WHERE id % 101 = 0 GROUP BY (payload)) AS q2 WHERE q1.id = q2.id;') AS str -WHERE str NOT LIKE '%Workers%' AND str NOT LIKE '%Sort Method%'; +WHERE str NOT LIKE '%Workers%' AND str NOT LIKE '%Sort Method%' + AND str NOT LIKE '%Gather Merge%'; str -------------------------------------------------------------------------------------------------- Aggregate (actual rows=1 loops=1) @@ -84,7 +85,6 @@ WHERE str NOT LIKE '%Workers%' AND str NOT LIKE '%Sort Method%'; -> Finalize GroupAggregate (actual rows=1 loops=1) AQO not used Group Key: t.payload - -> Gather Merge (actual rows=3 loops=1) AQO not used -> Partial GroupAggregate (actual rows=1 loops=3) AQO not used @@ -99,7 +99,6 @@ WHERE str NOT LIKE '%Workers%' AND str NOT LIKE '%Sort Method%'; -> Group (actual rows=1000 loops=1) AQO not used Group Key: t_1.id - -> Gather Merge (actual rows=1000 loops=1) AQO not used -> Group (actual rows=333 loops=3) AQO not used @@ -114,7 +113,7 @@ WHERE str NOT LIKE '%Workers%' AND str NOT LIKE '%Sort Method%'; Using aqo: true AQO mode: LEARN JOINS: 1 -(42 rows) +(40 rows) RESET parallel_tuple_cost; RESET parallel_setup_cost; diff --git a/sql/parallel_workers.sql b/sql/parallel_workers.sql index 3fbccb48..b544cf19 100644 --- a/sql/parallel_workers.sql +++ b/sql/parallel_workers.sql @@ -49,7 +49,8 @@ SELECT count(*) FROM (SELECT max(id) AS id, payload FROM t WHERE id % 101 = 0 GROUP BY (payload)) AS q2 WHERE q1.id = q2.id;') AS str -WHERE str NOT LIKE '%Workers%' AND str NOT LIKE '%Sort Method%'; +WHERE str NOT LIKE '%Workers%' AND str NOT LIKE '%Sort Method%' + AND str NOT LIKE '%Gather Merge%'; RESET parallel_tuple_cost; From 2f6f36f4c09e4b678af129d223e4c25008750f30 Mon Sep 17 00:00:00 2001 From: Andrey Kazarinov Date: Thu, 8 Dec 2022 18:04:35 +0300 Subject: [PATCH 149/203] [PGPRO-7183] bring in line stable 13, 14, 15 Minor changes --- Makefile | 2 +- README.md | 5 ++++- aqo--1.4--1.5.sql | 2 ++ aqo.c | 2 +- aqo.conf | 4 ++++ conf.add | 4 ---- expected/gucs.out | 2 +- expected/look_a_like.out | 16 ++++++---------- expected/unsupported.out | 30 ++++++++++++++++++++++++++---- learn_cache.c | 14 ++++++++++---- postprocessing.c | 7 +++++++ regress_schedule | 1 + sql/aqo_fdw.sql | 1 + sql/look_a_like.sql | 16 ++++++---------- sql/unsupported.sql | 7 ++++++- storage.c | 24 ++++++++++++------------ storage.h | 2 +- 17 files changed, 89 insertions(+), 50 deletions(-) create mode 100644 aqo.conf delete mode 100644 conf.add diff --git a/Makefile b/Makefile index 8866ab7c..b07d7f86 100755 --- a/Makefile +++ b/Makefile @@ -19,7 +19,7 @@ REGRESS_OPTS = --schedule=$(srcdir)/regress_schedule fdw_srcdir = $(top_srcdir)/contrib/postgres_fdw stat_srcdir = $(top_srcdir)/contrib/pg_stat_statements PG_CPPFLAGS += -I$(libpq_srcdir) -I$(fdw_srcdir) -I$(stat_srcdir) -EXTRA_REGRESS_OPTS=--temp-config=$(top_srcdir)/$(subdir)/conf.add +EXTRA_REGRESS_OPTS=--temp-config=$(top_srcdir)/$(subdir)/aqo.conf EXTRA_INSTALL = contrib/postgres_fdw contrib/pg_stat_statements DATA = aqo--1.0.sql aqo--1.0--1.1.sql aqo--1.1--1.2.sql aqo--1.2.sql \ diff --git a/README.md b/README.md index e28ac89c..252c74ad 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,10 @@ To avoid compatibility issues, the following branches in the git-repository are * `stable9_6`. * `stable11` - for PG v10 and v11. * `stable12` - for PG v12. -* the `master` branch of the AQO repository correctly works with PGv13 and the PostgreSQL `master` branch. +* `stable13` - for PG v13. +* `stable14` - for PG v14. +* `stable15` - for PG v15. +* the `master` branch of the AQO repository correctly works with PGv15 and the PostgreSQL `master` branch. The module contains a patch and an extension. Patch has to be applied to the sources of PostgresSQL. Patch affects header files, that is why PostgreSQL diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index 3244a721..622bb7fa 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -19,6 +19,7 @@ DROP TABLE public.aqo_data CASCADE; DROP TABLE public.aqo_queries CASCADE; DROP TABLE public.aqo_query_texts CASCADE; DROP TABLE public.aqo_query_stat CASCADE; +DROP FUNCTION invalidate_deactivated_queries_cache; /* @@ -144,6 +145,7 @@ COMMENT ON FUNCTION aqo_drop_class(bigint) IS -- Returns number of deleted rows from aqo_queries and aqo_data tables. -- CREATE OR REPLACE FUNCTION aqo_cleanup(OUT nfs integer, OUT nfss integer) +RETURNS SETOF record AS 'MODULE_PATHNAME', 'aqo_cleanup' LANGUAGE C STRICT VOLATILE; COMMENT ON FUNCTION aqo_cleanup() IS diff --git a/aqo.c b/aqo.c index 53f1fbd4..1778f74c 100644 --- a/aqo.c +++ b/aqo.c @@ -353,7 +353,7 @@ _PG_init(void) */ AQOLearnMemCtx = AllocSetContextCreate(AQOTopMemCtx, "AQOLearnMemoryContext", - ALLOCSET_DEFAULT_SIZES); + ALLOCSET_DEFAULT_SIZES); RegisterResourceReleaseCallback(aqo_free_callback, NULL); RegisterAQOPlanNodeMethods(); diff --git a/aqo.conf b/aqo.conf new file mode 100644 index 00000000..b53b5a5d --- /dev/null +++ b/aqo.conf @@ -0,0 +1,4 @@ +autovacuum = off +shared_preload_libraries = 'postgres_fdw, aqo' +max_parallel_maintenance_workers = 1 # switch off parallel workers because of unsteadiness +aqo.wide_search = 'on' \ No newline at end of file diff --git a/conf.add b/conf.add deleted file mode 100644 index 9e9d2336..00000000 --- a/conf.add +++ /dev/null @@ -1,4 +0,0 @@ -autovacuum = off -shared_preload_libraries = 'postgres_fdw, aqo' -max_parallel_workers_per_gather = 1 # switch off parallel workers because of unsteadiness -aqo.wide_search = 'on' \ No newline at end of file diff --git a/expected/gucs.out b/expected/gucs.out index bbfd8001..7528c67b 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -93,7 +93,7 @@ SELECT obj_description('aqo_reset'::regproc::oid); List of functions Schema | Name | Result data type | Argument data types | Type --------+-------------+------------------+-----------------------------------+------ - public | aqo_cleanup | record | OUT nfs integer, OUT nfss integer | func + public | aqo_cleanup | SETOF record | OUT nfs integer, OUT nfss integer | func (1 row) \df aqo_reset diff --git a/expected/look_a_like.out b/expected/look_a_like.out index f3918dbf..ecd73fb4 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -25,8 +25,7 @@ $$ LANGUAGE PLPGSQL; -- in the next queries with the same fss_hash SELECT str AS result FROM expln(' -SELECT x FROM A where x = 5;') AS str -WHERE str NOT LIKE 'Query Identifier%'; +SELECT x FROM A where x = 5;') AS str; result ------------------------------------------------ Seq Scan on public.a (actual rows=100 loops=1) @@ -42,7 +41,6 @@ WHERE str NOT LIKE 'Query Identifier%'; SELECT str AS result FROM expln(' SELECT x FROM A,B WHERE x = 5 AND A.x = B.y;') AS str -WHERE str NOT LIKE 'Query Identifier%' ; -- Find cardinality for SCAN A(x=5) from a neighbour class, created by the result -------------------------------------------------------- @@ -68,7 +66,6 @@ WHERE str NOT LIKE 'Query Identifier%' SELECT str AS result FROM expln(' SELECT x, sum(x) FROM A,B WHERE y = 5 AND A.x = B.y group by(x);') AS str -WHERE str NOT LIKE 'Query Identifier%' ; -- Find the JOIN cardinality from a neighbour class. result -------------------------------------------------------------- @@ -97,8 +94,7 @@ WHERE str NOT LIKE 'Query Identifier%' -- cardinality 100 in the first Seq Scan on a SELECT str AS result FROM expln(' -SELECT x, sum(x) FROM A WHERE x = 5 group by(x);') AS str -WHERE str NOT LIKE 'Query Identifier%'; +SELECT x, sum(x) FROM A WHERE x = 5 group by(x);') AS str; result ------------------------------------------------------ GroupAggregate (actual rows=1 loops=1) @@ -120,7 +116,7 @@ WHERE str NOT LIKE 'Query Identifier%'; SELECT str AS result FROM expln(' SELECT x FROM A where x < 10 group by(x);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE '%Memory%'; result ------------------------------------------------------- HashAggregate (actual rows=10 loops=1) @@ -140,7 +136,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' SELECT x,y FROM A,B WHERE x < 10 AND A.x = B.y;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE '%Memory%'; result ------------------------------------------------------------- Merge Join (actual rows=100000 loops=1) @@ -169,7 +165,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' SELECT x FROM A,B where x < 10 and y > 10 group by(x);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE '%Memory%'; result ---------------------------------------------------------- HashAggregate (actual rows=0 loops=1) @@ -200,7 +196,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' SELECT x,y FROM A,B WHERE x < 10 and y > 10 AND A.x = B.y;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' +WHERE str NOT LIKE '%Memory%' ; result ---------------------------------------------------------- diff --git a/expected/unsupported.out b/expected/unsupported.out index 5cdcdc23..f4b17d99 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -9,7 +9,7 @@ CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y FROM generate_series(1,1000) AS gs; ANALYZE t, t1; -- --- Do not support HAVING clause for now. +-- Do not support HAVING clauses for now. -- SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; count @@ -34,6 +34,24 @@ EXPLAIN (COSTS OFF) JOINS: 0 (11 rows) +SELECT str FROM expln(' +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t GROUP BY (x) HAVING x > 3; +') AS str WHERE str NOT LIKE '%Memory Usage%'; + str +----------------------------------------------- + HashAggregate (actual rows=17 loops=1) + AQO not used + Group Key: x + -> Seq Scan on t (actual rows=801 loops=1) + AQO: rows=801, error=0% + Filter: (x > 3) + Rows Removed by Filter: 199 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(10 rows) + -- -- Doesn't estimates GROUP BY clause -- @@ -582,24 +600,28 @@ ORDER BY (md5(query_text),error) DESC; 0.000 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + | SELECT count(*) FROM t WHERE (SELECT avg(x) FROM t t0 WHERE t0.x = t.x) = + | (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + 0.106 | + + | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT * FROM t GROUP BY (x) HAVING x > 3; + + | 0.000 | SELECT count(*) FROM + | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + | JOIN + | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2 + | ON q1.x = q2.x+1; -(12 rows) +(13 rows) DROP TABLE t,t1 CASCADE; -- delete all tables used in the test SELECT count(*) FROM aqo_data; -- Just to detect some changes in the logic. May some false positives really bother us here? count ------- - 42 + 44 (1 row) SELECT * FROM aqo_cleanup(); nfs | nfss -----+------ - 12 | 42 + 13 | 44 (1 row) SELECT count(*) FROM aqo_data; -- No one row should be returned diff --git a/learn_cache.c b/learn_cache.c index 67590e5d..2fc6644a 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -227,7 +227,7 @@ init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **reloids) Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE) || LWLockHeldByMeInMode(&aqo_state->lock, LW_SHARED)); Assert(hdr->magic == AQO_SHARED_MAGIC); - Assert(hdr && ptr); + Assert(hdr && ptr && hdr->rows > 0); data->rows = hdr->rows; data->cols = hdr->cols; @@ -245,6 +245,12 @@ init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **reloids) } } + /* + * Kludge code. But we should rewrite this code because now all knowledge + * base lives in non-transactional shared memory. + */ + ptr = (char *) hdr + sizeof(dsm_block_hdr) + (sizeof(double) * data->cols * aqo_K); + memcpy(data->targets, ptr, sizeof(double) * hdr->rows); ptr += sizeof(double) * aqo_K; memcpy(data->rfactors, ptr, sizeof(double) * hdr->rows); @@ -261,7 +267,7 @@ init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **reloids) return calculate_size(hdr->cols, *reloids); } - /* It is just read operation. No any interest in size calculation. */ + /* It is just a read operation. No any interest in size calculation. */ return 0; } @@ -293,7 +299,7 @@ lc_flush_data(void) aqo_data_store(hdr->key.fs, hdr->key.fss, &data, reloids); if (!hash_search(fss_htab, (void *) &hdr->key, HASH_REMOVE, NULL)) - elog(ERROR, "[AQO] Flush: local ML cache is corrupted."); + elog(PANIC, "[AQO] Flush: local ML cache is corrupted."); } reset_dsm_cache(); @@ -323,7 +329,7 @@ lc_assign_hook(bool newval, void *extra) while ((entry = (htab_entry *) hash_seq_search(&status)) != NULL) { if (!hash_search(fss_htab, (void *) &entry->key, HASH_REMOVE, NULL)) - elog(ERROR, "[AQO] The local ML cache is corrupted."); + elog(PANIC, "[AQO] The local ML cache is corrupted."); } LWLockRelease(&aqo_state->lock); } diff --git a/postprocessing.c b/postprocessing.c index 3bccce7c..75a61707 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -636,6 +636,13 @@ set_timeout_if_need(QueryDesc *queryDesc) { TimestampTz fin_time; + if (IsParallelWorker()) + /* + * AQO timeout should stop only main worker. Other workers would be + * terminated by a regular ERROR machinery. + */ + return false; + if (!get_timeout_active(STATEMENT_TIMEOUT) || !aqo_learn_statement_timeout) return false; diff --git a/regress_schedule b/regress_schedule index b67bc207..418e14ec 100644 --- a/regress_schedule +++ b/regress_schedule @@ -10,6 +10,7 @@ test: gucs test: forced_stat_collection test: unsupported test: clean_aqo_data +test: parallel_workers test: plancache # Performance-dependent test. Can be ignored if executes in containers or on slow machines ignore: statement_timeout diff --git a/sql/aqo_fdw.sql b/sql/aqo_fdw.sql index efa275f4..4fc2782f 100644 --- a/sql/aqo_fdw.sql +++ b/sql/aqo_fdw.sql @@ -6,6 +6,7 @@ CREATE EXTENSION aqo; CREATE EXTENSION postgres_fdw; + SET aqo.mode = 'learn'; SET aqo.show_details = 'true'; -- show AQO info for each node and entire query. SET aqo.show_hash = 'false'; -- a hash value is system-depended. Ignore it. diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index 07aff8a7..be71feff 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -28,45 +28,41 @@ $$ LANGUAGE PLPGSQL; -- in the next queries with the same fss_hash SELECT str AS result FROM expln(' -SELECT x FROM A where x = 5;') AS str -WHERE str NOT LIKE 'Query Identifier%'; +SELECT x FROM A where x = 5;') AS str; SELECT str AS result FROM expln(' SELECT x FROM A,B WHERE x = 5 AND A.x = B.y;') AS str -WHERE str NOT LIKE 'Query Identifier%' ; -- Find cardinality for SCAN A(x=5) from a neighbour class, created by the -- query, executed above. SELECT str AS result FROM expln(' SELECT x, sum(x) FROM A,B WHERE y = 5 AND A.x = B.y group by(x);') AS str -WHERE str NOT LIKE 'Query Identifier%' ; -- Find the JOIN cardinality from a neighbour class. -- cardinality 100 in the first Seq Scan on a SELECT str AS result FROM expln(' -SELECT x, sum(x) FROM A WHERE x = 5 group by(x);') AS str -WHERE str NOT LIKE 'Query Identifier%'; +SELECT x, sum(x) FROM A WHERE x = 5 group by(x);') AS str; -- no one predicted rows. we use knowledge cardinalities of the query -- in the next queries with the same fss_hash SELECT str AS result FROM expln(' SELECT x FROM A where x < 10 group by(x);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE '%Memory%'; -- cardinality 1000 in Seq Scan on a SELECT str AS result FROM expln(' SELECT x,y FROM A,B WHERE x < 10 AND A.x = B.y;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE '%Memory%'; -- cardinality 100 in Seq Scan on a and Seq Scan on b SELECT str AS result FROM expln(' SELECT x FROM A,B where x < 10 and y > 10 group by(x);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE '%Memory%'; -- -- TODO: @@ -75,7 +71,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' SELECT x,y FROM A,B WHERE x < 10 and y > 10 AND A.x = B.y;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' +WHERE str NOT LIKE '%Memory%' ; RESET enable_material; diff --git a/sql/unsupported.sql b/sql/unsupported.sql index 9f26b9a6..b9f6e075 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -12,12 +12,17 @@ CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y ANALYZE t, t1; -- --- Do not support HAVING clause for now. +-- Do not support HAVING clauses for now. -- SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; EXPLAIN (COSTS OFF) SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; +SELECT str FROM expln(' +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t GROUP BY (x) HAVING x > 3; +') AS str WHERE str NOT LIKE '%Memory Usage%'; + -- -- Doesn't estimates GROUP BY clause -- diff --git a/storage.c b/storage.c index d81197ac..bcbcfac4 100644 --- a/storage.c +++ b/storage.c @@ -389,8 +389,8 @@ aqo_stat_reset(void) hash_seq_init(&hash_seq, stat_htab); while ((entry = hash_seq_search(&hash_seq)) != NULL) { - if (hash_search(stat_htab, &entry->queryid, HASH_REMOVE, NULL) == NULL) - elog(ERROR, "[AQO] hash table corrupted"); + if (!hash_search(stat_htab, &entry->queryid, HASH_REMOVE, NULL)) + elog(PANIC, "[AQO] hash table corrupted"); num_remove++; } aqo_state->stat_changed = true; @@ -1225,7 +1225,7 @@ _aqo_data_remove(data_key *key) dsa_free(data_dsa, entry->data_dp); entry->data_dp = InvalidDsaPointer; - if (hash_search(data_htab, key, HASH_REMOVE, NULL) == NULL) + if (!hash_search(data_htab, key, HASH_REMOVE, NULL)) elog(PANIC, "[AQO] Inconsistent data hash table"); aqo_state->data_changed = true; @@ -1256,8 +1256,8 @@ aqo_qtexts_reset(void) Assert(DsaPointerIsValid(entry->qtext_dp)); dsa_free(qtext_dsa, entry->qtext_dp); - if (hash_search(qtexts_htab, &entry->queryid, HASH_REMOVE, NULL) == NULL) - elog(ERROR, "[AQO] hash table corrupted"); + if (!hash_search(qtexts_htab, &entry->queryid, HASH_REMOVE, NULL)) + elog(PANIC, "[AQO] hash table corrupted"); num_remove++; } aqo_state->qtexts_changed = true; @@ -1718,8 +1718,8 @@ _aqo_data_clean(uint64 fs) Assert(DsaPointerIsValid(entry->data_dp)); dsa_free(data_dsa, entry->data_dp); entry->data_dp = InvalidDsaPointer; - if (hash_search(data_htab, &entry->key, HASH_REMOVE, NULL) == NULL) - elog(ERROR, "[AQO] hash table corrupted"); + if (!hash_search(data_htab, &entry->key, HASH_REMOVE, NULL)) + elog(PANIC, "[AQO] hash table corrupted"); removed++; } @@ -1745,8 +1745,8 @@ aqo_data_reset(void) { Assert(DsaPointerIsValid(entry->data_dp)); dsa_free(data_dsa, entry->data_dp); - if (hash_search(data_htab, &entry->key, HASH_REMOVE, NULL) == NULL) - elog(ERROR, "[AQO] hash table corrupted"); + if (!hash_search(data_htab, &entry->key, HASH_REMOVE, NULL)) + elog(PANIC, "[AQO] hash table corrupted"); num_remove++; } @@ -1885,8 +1885,8 @@ aqo_queries_reset(void) /* Don't remove default feature space */ continue; - if (hash_search(queries_htab, &entry->queryid, HASH_REMOVE, NULL) == NULL) - elog(ERROR, "[AQO] hash table corrupted"); + if (!hash_search(queries_htab, &entry->queryid, HASH_REMOVE, NULL)) + elog(PANIC, "[AQO] hash table corrupted"); num_remove++; } @@ -2218,7 +2218,7 @@ aqo_cleanup(PG_FUNCTION_ARGS) values[1] = Int32GetDatum(fss_num); tuplestore_putvalues(tupstore, tupDesc, values, nulls); tuplestore_donestoring(tupstore); - return (Datum) 0; + PG_RETURN_VOID(); } /* diff --git a/storage.h b/storage.h index 373cace0..94891c5d 100644 --- a/storage.h +++ b/storage.h @@ -67,7 +67,7 @@ typedef struct DataEntry /* * Link to DSA-allocated memory block. Can be shared across backends. * Contains: - * matrix[][], targets[], reliability[], oids. + * matrix[][], targets[], reliability[], oids. */ dsa_pointer data_dp; } DataEntry; From bdbc32736d53ff329adfb50bd5116aa5664ddf6a Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 21 Dec 2022 10:04:43 +0500 Subject: [PATCH 150/203] Update c-cpp.yml Arrange with stable15 and use -O2 optimization because pg with O3 can't compile --- .github/workflows/c-cpp.yml | 6 +++--- aqo.conf | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 71989628..8ee5bbf0 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -4,11 +4,10 @@ on: push: branches: [ stable13 ] pull_request: - branches: [ stable13 ] env: - BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} jobs: build: @@ -19,6 +18,7 @@ jobs: - name: pg run: | sudo apt install libipc-run-perl + echo "Deploying to production server on branch" $BRANCH_NAME git config --global user.email "ci@postgrespro.ru" git config --global user.name "CI PgPro admin" @@ -28,10 +28,10 @@ jobs: cd pg git checkout REL_13_STABLE - ./configure $CONFIGURE_OPTS CFLAGS="-O3" git clone https://github.com/postgrespro/aqo.git contrib/aqo git -C contrib/aqo checkout $BRANCH_NAME patch -p1 --no-backup-if-mismatch < contrib/aqo/aqo_pg13.patch + ./configure $CONFIGURE_OPTS CFLAGS="-O2" make -j4 > /dev/null && make -j4 -C contrib > /dev/null env CLIENTS=50 THREADS=50 make -C contrib/aqo check diff --git a/aqo.conf b/aqo.conf index b53b5a5d..0574a0a4 100644 --- a/aqo.conf +++ b/aqo.conf @@ -1,4 +1,4 @@ autovacuum = off shared_preload_libraries = 'postgres_fdw, aqo' max_parallel_maintenance_workers = 1 # switch off parallel workers because of unsteadiness -aqo.wide_search = 'on' \ No newline at end of file +aqo.wide_search = 'on' From 58fe21c64806234e80168478979bd52cd07cb17c Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Tue, 18 Oct 2022 11:45:26 +0300 Subject: [PATCH 151/203] Add compute_query_id parameter in aqo configure with value as regress. It is necessary for avoiding output Query Identifier while vanille's test are running. (Look at more in explain.c:612. We can get fail condition if only query identifier is not null) Where clause 'NOT LIKE '%Query Identifier%'' is throwed away due to being necessary any more. This addition parameter is appeared if we set compute_query_id parameter with value as 'auto'. Appearance of the parameter is checked in only gucs test. a.lepikhov: cherry-picked this commit down to stable13 just to have as synchronized as possible versions of the branches. --- aqo.conf | 1 + aqo_pg13.patch | 31 ++++++++++++--------------- expected/aqo_fdw.out | 46 +++++++++++++++++++++++++++------------- expected/gucs.out | 26 +++++++++++++++++------ expected/unsupported.out | 26 +++++++++++------------ preprocessing.c | 1 - sql/aqo_fdw.sql | 26 +++++++++++++++-------- sql/gucs.sql | 25 +++++++++++++++++----- sql/unsupported.sql | 27 ++++++++++++----------- 9 files changed, 128 insertions(+), 81 deletions(-) diff --git a/aqo.conf b/aqo.conf index 0574a0a4..06f3bf9c 100644 --- a/aqo.conf +++ b/aqo.conf @@ -2,3 +2,4 @@ autovacuum = off shared_preload_libraries = 'postgres_fdw, aqo' max_parallel_maintenance_workers = 1 # switch off parallel workers because of unsteadiness aqo.wide_search = 'on' + diff --git a/aqo_pg13.patch b/aqo_pg13.patch index 3755bbf5..406e3e0e 100644 --- a/aqo_pg13.patch +++ b/aqo_pg13.patch @@ -57,7 +57,7 @@ index bc05c96b4c..b6a3abe0d2 100644 if (es->format == EXPLAIN_FORMAT_TEXT) appendStringInfoChar(es->str, '\n'); diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c -index 682b28ed72..3a5c615deb 100644 +index 692b6c1559..580d04d784 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -132,6 +132,7 @@ CopyPlanFields(const Plan *from, Plan *newnode) @@ -69,7 +69,7 @@ index 682b28ed72..3a5c615deb 100644 /* diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c -index 7237b52e96..5e2ee2732a 100644 +index 21ececf0c2..a0e7a7ebca 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -342,6 +342,7 @@ _outPlanInfo(StringInfo str, const Plan *node) @@ -81,7 +81,7 @@ index 7237b52e96..5e2ee2732a 100644 /* diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c -index 62c945b6c5..a39046ca56 100644 +index 7976b369ba..604314e0b3 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1580,6 +1580,11 @@ ReadCommonPlan(Plan *local_node) @@ -394,7 +394,7 @@ index 917713c163..5b7bf1cec6 100644 /* diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c -index 60e7fda6a9..5732c7a685 100644 +index 27c665ac12..f599fba755 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -145,7 +145,8 @@ static List *extract_rollup_sets(List *groupingSets); @@ -407,7 +407,7 @@ index 60e7fda6a9..5732c7a685 100644 grouping_sets_data *gd, List *target_list); static RelOptInfo *create_grouping_paths(PlannerInfo *root, -@@ -3682,7 +3683,8 @@ standard_qp_callback(PlannerInfo *root, void *extra) +@@ -3686,7 +3687,8 @@ standard_qp_callback(PlannerInfo *root, void *extra) */ static double get_number_of_groups(PlannerInfo *root, @@ -417,7 +417,7 @@ index 60e7fda6a9..5732c7a685 100644 grouping_sets_data *gd, List *target_list) { -@@ -3719,7 +3721,7 @@ get_number_of_groups(PlannerInfo *root, +@@ -3723,7 +3725,7 @@ get_number_of_groups(PlannerInfo *root, GroupingSetData *gs = lfirst_node(GroupingSetData, lc2); double numGroups = estimate_num_groups(root, groupExprs, @@ -426,7 +426,7 @@ index 60e7fda6a9..5732c7a685 100644 &gset); gs->numGroups = numGroups; -@@ -3744,7 +3746,7 @@ get_number_of_groups(PlannerInfo *root, +@@ -3748,7 +3750,7 @@ get_number_of_groups(PlannerInfo *root, GroupingSetData *gs = lfirst_node(GroupingSetData, lc2); double numGroups = estimate_num_groups(root, groupExprs, @@ -435,7 +435,7 @@ index 60e7fda6a9..5732c7a685 100644 &gset); gs->numGroups = numGroups; -@@ -3760,8 +3762,8 @@ get_number_of_groups(PlannerInfo *root, +@@ -3764,8 +3766,8 @@ get_number_of_groups(PlannerInfo *root, groupExprs = get_sortgrouplist_exprs(parse->groupClause, target_list); @@ -446,7 +446,7 @@ index 60e7fda6a9..5732c7a685 100644 } } else if (parse->groupingSets) -@@ -4147,7 +4149,8 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, +@@ -4151,7 +4153,8 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, * Estimate number of groups. */ dNumGroups = get_number_of_groups(root, @@ -456,7 +456,7 @@ index 60e7fda6a9..5732c7a685 100644 gd, extra->targetList); -@@ -6931,13 +6934,15 @@ create_partial_grouping_paths(PlannerInfo *root, +@@ -6935,13 +6938,15 @@ create_partial_grouping_paths(PlannerInfo *root, if (cheapest_total_path != NULL) dNumPartialGroups = get_number_of_groups(root, @@ -541,7 +541,7 @@ index a203e6f1ff..d31bf5bae6 100644 return ppi; diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c -index 821844ada3..85b2482114 100644 +index 37458da096..248a1875a1 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -147,6 +147,7 @@ @@ -635,18 +635,15 @@ index 5ebf070979..5b2acd7de2 100644 diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h -index 90f02ce6fd..f3e2138ee2 100644 +index 90f02ce6fd..88c332164d 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h -@@ -159,6 +159,12 @@ typedef struct Plan +@@ -159,6 +159,9 @@ typedef struct Plan */ Bitmapset *extParam; Bitmapset *allParam; + -+ /* -+ * Additional fields for an extension purposes. -+ * TODO: allow to serialize/deserialize this list. -+ */ ++ /* Additional field for an extension purposes. */ + List *ext_nodes; } Plan; diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out index 70219b58..e568e993 100644 --- a/expected/aqo_fdw.out +++ b/expected/aqo_fdw.out @@ -54,14 +54,11 @@ SELECT x FROM frgn; (5 rows) -- Push down base filters. Use verbose mode to see filters. -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE)) -SELECT x FROM frgn WHERE x < 10; -ERROR: syntax error at or near ")" -LINE 1: ...LAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE)) - ^ -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) -SELECT x FROM frgn WHERE x < 10; - QUERY PLAN +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) + SELECT x FROM frgn WHERE x < 10; +') AS str; + str ----------------------------------------------------------- Foreign Scan on public.frgn (actual rows=1 loops=1) AQO not used @@ -72,6 +69,21 @@ SELECT x FROM frgn WHERE x < 10; JOINS: 0 (7 rows) +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) + SELECT x FROM frgn WHERE x < 10; +') AS str; + str +----------------------------------------------------------- + Foreign Scan on public.frgn (actual rows=1 loops=1) + AQO: rows=1, error=0% + Output: x + Remote SQL: SELECT x FROM public.local WHERE ((x < 10)) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(7 rows) + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT x FROM frgn WHERE x < -10; -- AQO ignores constants QUERY PLAN @@ -98,10 +110,12 @@ SELECT str FROM expln(' JOINS: 0 (6 rows) --- TODO: Should learn on postgres_fdw nodes -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) - SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; - QUERY PLAN +-- Should learn on postgres_fdw nodes +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) + SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; +') AS str; + str -------------------------------------------------------------------------------------------------------- Foreign Scan (actual rows=1 loops=1) AQO: rows=1, error=0% @@ -246,9 +260,11 @@ SELECT * FROM frgn AS a, frgn AS b WHERE a.x Date: Fri, 14 Oct 2022 09:43:07 +0500 Subject: [PATCH 152/203] Several bugfixes here: 1. don't enable statement timeout in parallel worker and 2. minor DSM cache fix. 3. don't clear learn_cache in a parallel worker. --- aqo_shared.c | 4 +--- learn_cache.c | 10 ++++++++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/aqo_shared.c b/aqo_shared.c index ac5c5aea..5715a76e 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -77,12 +77,10 @@ reset_dsm_cache(void) Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE)); - if (aqo_state->dsm_handler == DSM_HANDLE_INVALID) + if (aqo_state->dsm_handler == DSM_HANDLE_INVALID || !seg) /* Fast path. No any cached data exists. */ return; - Assert(seg); - hdr = (dsm_seg_hdr *) dsm_segment_address(seg); start = (char *) hdr + sizeof(dsm_seg_hdr); diff --git a/learn_cache.c b/learn_cache.c index 2fc6644a..c7f6ef87 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -13,6 +13,7 @@ */ #include "postgres.h" +#include "access/parallel.h" /* Just for IsParallelWorker() */ #include "miscadmin.h" #include "aqo.h" @@ -316,14 +317,15 @@ lc_assign_hook(bool newval, void *extra) HASH_SEQ_STATUS status; htab_entry *entry; - if (!fss_htab || !IsUnderPostmaster) + if (!fss_htab || !IsUnderPostmaster || IsParallelWorker()) + /* Clean this shared cache only in main backend process. */ return; /* Remove all entries, reset memory context. */ elog(DEBUG5, "[AQO] Cleanup local cache of ML data."); - /* Remove all frozen plans from a plancache. */ + /* Remove all entries in the shared hash table. */ LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); hash_seq_init(&status, fss_htab); while ((entry = (htab_entry *) hash_seq_search(&status)) != NULL) @@ -331,5 +333,9 @@ lc_assign_hook(bool newval, void *extra) if (!hash_search(fss_htab, (void *) &entry->key, HASH_REMOVE, NULL)) elog(PANIC, "[AQO] The local ML cache is corrupted."); } + + /* Now, clean additional DSM block */ + reset_dsm_cache(); + LWLockRelease(&aqo_state->lock); } From 8bd3378806bf1e75962a5912be6e18fe81a079d6 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 22 Dec 2022 12:03:44 +0500 Subject: [PATCH 153/203] Second stage of branches arrangement. Now: from master to the stable --- aqo.c | 2 +- aqo_shared.c | 4 ++-- aqo_shared.h | 3 +-- auto_tuning.c | 6 +++--- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/aqo.c b/aqo.c index 1778f74c..ab29e516 100644 --- a/aqo.c +++ b/aqo.c @@ -33,7 +33,7 @@ void _PG_init(void); #define AQO_MODULE_MAGIC (1234) /* Strategy of determining feature space for new queries. */ -int aqo_mode; +int aqo_mode = AQO_MODE_CONTROLLED; bool force_collect_stat; /* diff --git a/aqo_shared.c b/aqo_shared.c index 5715a76e..86908880 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -26,8 +26,8 @@ shmem_startup_hook_type prev_shmem_startup_hook = NULL; AQOSharedState *aqo_state = NULL; HTAB *fss_htab = NULL; static int aqo_htab_max_items = 1000; -int fs_max_items = 1; /* Max number of different feature spaces in ML model */ -int fss_max_items = 1; /* Max number of different feature subspaces in ML model */ +int fs_max_items = 10000; /* Max number of different feature spaces in ML model */ +int fss_max_items = 100000; /* Max number of different feature subspaces in ML model */ static uint32 temp_storage_size = 1024 * 1024 * 10; /* Storage size, in bytes */ static dsm_segment *seg = NULL; diff --git a/aqo_shared.h b/aqo_shared.h index 61c0d3d0..926a2723 100644 --- a/aqo_shared.h +++ b/aqo_shared.h @@ -1,12 +1,11 @@ #ifndef AQO_SHARED_H #define AQO_SHARED_H - +#include "lib/dshash.h" #include "storage/dsm.h" #include "storage/ipc.h" #include "storage/lwlock.h" #include "utils/dsa.h" -#include "lib/dshash.h" #define AQO_SHARED_MAGIC 0x053163 diff --git a/auto_tuning.c b/auto_tuning.c index 7a15e516..fad245ed 100644 --- a/auto_tuning.c +++ b/auto_tuning.c @@ -42,7 +42,7 @@ get_mean(double *elems, int nelems) double sum = 0; int i; - AssertArg(nelems > 0); + Assert(nelems > 0); for (i = 0; i < nelems; ++i) sum += elems[i]; @@ -58,7 +58,7 @@ get_estimation(double *elems, int nelems) { int start; - AssertArg(nelems > 0); + Assert(nelems > 0); if (nelems > auto_tuning_window_size) start = nelems - auto_tuning_window_size; @@ -77,7 +77,7 @@ is_stable(double *elems, int nelems) double est, last; - AssertArg(nelems > 1); + Assert(nelems > 1); est = get_mean(elems, nelems - 1); last = elems[nelems - 1]; From 770e9f7a6c717e4f495176fe08fb97802f6da6ce Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 22 Dec 2022 13:41:43 +0500 Subject: [PATCH 154/203] Raise AQO version to v1.6. Rename a couple of UI functions: 1. aqo_enable_query -> aqo_enable_class 2. aqo_disable_query -> aqo_disable_class Fix the bug of 1.5 with execution of "enable" routine from "disable" UI function. Correct aqo_cleanup() return type: It returns single set of values. So, we don't really needed all of the materialization machinery. Just to form and return a tuple. --- Makefile | 5 +++-- aqo--1.5--1.6.sql | 32 +++++++++++++++++++++++++++++ aqo.control | 2 +- expected/aqo_CVE-2020-14350.out | 24 +++++++++++----------- expected/gucs.out | 2 +- expected/relocatable.out | 12 +++++------ sql/aqo_CVE-2020-14350.sql | 16 +++++++-------- sql/relocatable.sql | 4 ++-- storage.c | 36 +++++++-------------------------- 9 files changed, 72 insertions(+), 61 deletions(-) create mode 100644 aqo--1.5--1.6.sql diff --git a/Makefile b/Makefile index b07d7f86..7370647f 100755 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # contrib/aqo/Makefile EXTENSION = aqo -EXTVERSION = 1.5 +EXTVERSION = 1.6 PGFILEDESC = "AQO - Adaptive Query Optimization" MODULE_big = aqo OBJS = $(WIN32RES) \ @@ -23,7 +23,8 @@ EXTRA_REGRESS_OPTS=--temp-config=$(top_srcdir)/$(subdir)/aqo.conf EXTRA_INSTALL = contrib/postgres_fdw contrib/pg_stat_statements DATA = aqo--1.0.sql aqo--1.0--1.1.sql aqo--1.1--1.2.sql aqo--1.2.sql \ - aqo--1.2--1.3.sql aqo--1.3--1.4.sql aqo--1.4--1.5.sql + aqo--1.2--1.3.sql aqo--1.3--1.4.sql aqo--1.4--1.5.sql \ + aqo--1.5--1.6.sql ifdef USE_PGXS PG_CONFIG ?= pg_config diff --git a/aqo--1.5--1.6.sql b/aqo--1.5--1.6.sql new file mode 100644 index 00000000..4101d33d --- /dev/null +++ b/aqo--1.5--1.6.sql @@ -0,0 +1,32 @@ +/* contrib/aqo/aqo--1.5--1.6.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION aqo UPDATE TO '1.6'" to load this file. \quit + +DROP FUNCTION aqo_enable_query; +DROP FUNCTION aqo_disable_query; +DROP FUNCTION aqo_cleanup; + +CREATE FUNCTION aqo_enable_class(queryid bigint) +RETURNS void +AS 'MODULE_PATHNAME', 'aqo_enable_query' +LANGUAGE C STRICT VOLATILE; + +CREATE FUNCTION aqo_disable_class(queryid bigint) +RETURNS void +AS 'MODULE_PATHNAME', 'aqo_disable_query' +LANGUAGE C STRICT VOLATILE; + +-- +-- Remove unneeded rows from the AQO ML storage. +-- For common feature space, remove rows from aqo_data only. +-- For custom feature space - remove all rows related to the space from all AQO +-- tables even if only one oid for one feature subspace of the space is illegal. +-- Returns number of deleted rows from aqo_queries and aqo_data tables. +-- +CREATE FUNCTION aqo_cleanup(OUT nfs integer, OUT nfss integer) +RETURNS record +AS 'MODULE_PATHNAME', 'aqo_cleanup' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_cleanup() IS +'Remove unneeded rows from the AQO ML storage'; diff --git a/aqo.control b/aqo.control index 5507effb..4ca0ecb6 100644 --- a/aqo.control +++ b/aqo.control @@ -1,5 +1,5 @@ # AQO extension comment = 'machine learning for cardinality estimation in optimizer' -default_version = '1.5' +default_version = '1.6' module_pathname = '$libdir/aqo' relocatable = true diff --git a/expected/aqo_CVE-2020-14350.out b/expected/aqo_CVE-2020-14350.out index ccdc4694..8685b935 100644 --- a/expected/aqo_CVE-2020-14350.out +++ b/expected/aqo_CVE-2020-14350.out @@ -116,7 +116,7 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_enable_query(hash bigint) +CREATE FUNCTION aqo_enable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -125,9 +125,9 @@ $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_enable_query" already exists with same argument types +ERROR: function "aqo_enable_class" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_enable_query(hash bigint) +CREATE OR REPLACE FUNCTION aqo_enable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -135,8 +135,8 @@ BEGIN END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_enable_query(42); - aqo_enable_query +SELECT aqo_enable_class(42); + aqo_enable_class ------------------ (1 row) @@ -149,7 +149,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_enable_query(bigint); +DROP FUNCTION aqo_enable_class(bigint); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 4 @@ -162,7 +162,7 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_disable_query(hash bigint) +CREATE FUNCTION aqo_disable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -171,9 +171,9 @@ $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_disable_query" already exists with same argument types +ERROR: function "aqo_disable_class" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_disable_query(hash bigint) +CREATE OR REPLACE FUNCTION aqo_disable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -181,8 +181,8 @@ BEGIN END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_disable_query(42); - aqo_disable_query +SELECT aqo_disable_class(42); + aqo_disable_class ------------------- (1 row) @@ -195,7 +195,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_disable_query(bigint); +DROP FUNCTION aqo_disable_class(bigint); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 5 diff --git a/expected/gucs.out b/expected/gucs.out index 3d63f978..6809df64 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -107,7 +107,7 @@ SELECT obj_description('aqo_reset'::regproc::oid); List of functions Schema | Name | Result data type | Argument data types | Type --------+-------------+------------------+-----------------------------------+------ - public | aqo_cleanup | SETOF record | OUT nfs integer, OUT nfss integer | func + public | aqo_cleanup | record | OUT nfs integer, OUT nfss integer | func (1 row) \df aqo_reset diff --git a/expected/relocatable.out b/expected/relocatable.out index 5fcf06e6..949896f6 100644 --- a/expected/relocatable.out +++ b/expected/relocatable.out @@ -80,9 +80,9 @@ ORDER BY (md5(query_text)) /* * Below, we should check each UI function */ -SELECT aqo_disable_query(id) FROM ( +SELECT aqo_disable_class(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; - aqo_disable_query + aqo_disable_class ------------------- @@ -93,13 +93,13 @@ ORDER BY (learn_aqo, use_aqo, auto_tuning); learn_aqo | use_aqo | auto_tuning -----------+---------+------------- f | f | f - t | t | f - t | t | f + f | f | f + f | f | f (3 rows) -SELECT aqo_enable_query(id) FROM ( +SELECT aqo_enable_class(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; - aqo_enable_query + aqo_enable_class ------------------ diff --git a/sql/aqo_CVE-2020-14350.sql b/sql/aqo_CVE-2020-14350.sql index 1b36b50b..75833223 100644 --- a/sql/aqo_CVE-2020-14350.sql +++ b/sql/aqo_CVE-2020-14350.sql @@ -103,7 +103,7 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_enable_query(hash bigint) +CREATE FUNCTION aqo_enable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -115,7 +115,7 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_enable_query(hash bigint) +CREATE OR REPLACE FUNCTION aqo_enable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -124,13 +124,13 @@ END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_enable_query(42); +SELECT aqo_enable_class(42); SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_enable_query(bigint); +DROP FUNCTION aqo_enable_class(bigint); DROP EXTENSION IF EXISTS aqo; -- Test 4 @@ -140,7 +140,7 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_disable_query(hash bigint) +CREATE FUNCTION aqo_disable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -152,7 +152,7 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_disable_query(hash bigint) +CREATE OR REPLACE FUNCTION aqo_disable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -161,13 +161,13 @@ END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_disable_query(42); +SELECT aqo_disable_class(42); SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_disable_query(bigint); +DROP FUNCTION aqo_disable_class(bigint); DROP EXTENSION IF EXISTS aqo; -- Test 5 diff --git a/sql/relocatable.sql b/sql/relocatable.sql index e8cc57c3..780c385e 100644 --- a/sql/relocatable.sql +++ b/sql/relocatable.sql @@ -39,11 +39,11 @@ ORDER BY (md5(query_text)) /* * Below, we should check each UI function */ -SELECT aqo_disable_query(id) FROM ( +SELECT aqo_disable_class(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries ORDER BY (learn_aqo, use_aqo, auto_tuning); -SELECT aqo_enable_query(id) FROM ( +SELECT aqo_enable_class(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries ORDER BY (learn_aqo, use_aqo, auto_tuning); diff --git a/storage.c b/storage.c index bcbcfac4..8a21892c 100644 --- a/storage.c +++ b/storage.c @@ -2170,39 +2170,16 @@ aqo_cleanup(PG_FUNCTION_ARGS) { int fs_num; int fss_num; - ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; TupleDesc tupDesc; - MemoryContext per_query_ctx; - MemoryContext oldcontext; - Tuplestorestate *tupstore; + HeapTuple tuple; + Datum result; Datum values[2]; bool nulls[2] = {0, 0}; - /* check to see if caller supports us returning a tuplestore */ - if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("set-valued function called in context that cannot accept a set"))); - if (!(rsinfo->allowedModes & SFRM_Materialize)) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("materialize mode required, but it is not allowed in this context"))); - - /* Switch into long-lived context to construct returned data structures */ - per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; - oldcontext = MemoryContextSwitchTo(per_query_ctx); - - /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); - Assert(tupDesc->natts == 2); - tupstore = tuplestore_begin_heap(true, false, work_mem); - rsinfo->returnMode = SFRM_Materialize; - rsinfo->setResult = tupstore; - rsinfo->setDesc = tupDesc; - - MemoryContextSwitchTo(oldcontext); + Assert(tupDesc->natts == 2); /* * Make forced cleanup: if at least one fss isn't actual, remove parent FS @@ -2216,9 +2193,10 @@ aqo_cleanup(PG_FUNCTION_ARGS) values[0] = Int32GetDatum(fs_num); values[1] = Int32GetDatum(fss_num); - tuplestore_putvalues(tupstore, tupDesc, values, nulls); - tuplestore_donestoring(tupstore); - PG_RETURN_VOID(); + tuple = heap_form_tuple(tupDesc, values, nulls); + result = HeapTupleGetDatum(tuple); + + PG_RETURN_DATUM(result); } /* From 0fdbb07c1c15eb71c903db67dea0a0312472c6c7 Mon Sep 17 00:00:00 2001 From: Daniil Anisimov Date: Tue, 10 Jan 2023 10:32:39 +0700 Subject: [PATCH 155/203] Removed the learn_cache routine. Now it is not needed, because non-transactional storage is used. --- Makefile | 2 +- aqo.c | 3 +- aqo.h | 7 +- aqo_shared.c | 158 --------------- aqo_shared.h | 19 -- cardinality_estimation.c | 2 +- cardinality_hooks.c | 2 +- expected/statement_timeout.out | 23 +++ learn_cache.c | 341 --------------------------------- learn_cache.h | 17 -- postprocessing.c | 24 +-- sql/statement_timeout.sql | 7 + storage.c | 19 +- 13 files changed, 50 insertions(+), 574 deletions(-) delete mode 100644 learn_cache.c delete mode 100644 learn_cache.h diff --git a/Makefile b/Makefile index 7370647f..d3aec440 100755 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ MODULE_big = aqo OBJS = $(WIN32RES) \ aqo.o auto_tuning.o cardinality_estimation.o cardinality_hooks.o \ hash.o machine_learning.o path_utils.o postprocessing.o preprocessing.o \ - selectivity_cache.o storage.o utils.o learn_cache.o aqo_shared.o + selectivity_cache.o storage.o utils.o aqo_shared.o TAP_TESTS = 1 diff --git a/aqo.c b/aqo.c index ab29e516..8280ccbf 100644 --- a/aqo.c +++ b/aqo.c @@ -22,7 +22,6 @@ #include "cardinality_hooks.h" #include "path_utils.h" #include "preprocessing.h" -#include "learn_cache.h" #include "storage.h" @@ -206,7 +205,7 @@ _PG_init(void) PGC_USERSET, 0, NULL, - lc_assign_hook, + NULL, NULL ); diff --git a/aqo.h b/aqo.h index 4471d2b8..0a373147 100644 --- a/aqo.h +++ b/aqo.h @@ -174,6 +174,7 @@ extern bool aqo_show_hash; extern bool aqo_show_details; extern int aqo_join_threshold; extern bool use_wide_search; +extern bool aqo_learn_statement_timeout; /* Parameters for current query */ typedef struct QueryContextData @@ -256,10 +257,8 @@ int get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash); /* Storage interaction */ -extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, - bool isSafe); -extern bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, - List *reloids, bool isTimedOut); +extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids); +extern bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids); /* Query preprocessing hooks */ extern void print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, diff --git a/aqo_shared.c b/aqo_shared.c index 86908880..0a6a8db6 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -12,161 +12,13 @@ #include "storage.h" -typedef struct -{ - int magic; - uint32 total_size; - uint32 delta; -} dsm_seg_hdr; - -#define free_space(hdr) (uint32) (temp_storage_size - sizeof(dsm_seg_hdr) - hdr->delta) -#define addr(delta) ((char *) dsm_segment_address(seg) + sizeof(dsm_seg_hdr) + delta) - shmem_startup_hook_type prev_shmem_startup_hook = NULL; AQOSharedState *aqo_state = NULL; -HTAB *fss_htab = NULL; -static int aqo_htab_max_items = 1000; int fs_max_items = 10000; /* Max number of different feature spaces in ML model */ int fss_max_items = 100000; /* Max number of different feature subspaces in ML model */ -static uint32 temp_storage_size = 1024 * 1024 * 10; /* Storage size, in bytes */ -static dsm_segment *seg = NULL; - -static void aqo_detach_shmem(int code, Datum arg); static void on_shmem_shutdown(int code, Datum arg); - -void * -get_dsm_all(uint32 *size) -{ - dsm_seg_hdr *hdr; - - Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE)); - - if (aqo_state->dsm_handler == DSM_HANDLE_INVALID) - { - /* Fast path. No any cached data exists. */ - *size = 0; - return NULL; - } - - if (!seg) - { - /* if segment exists we should connect to */ - seg = dsm_attach(aqo_state->dsm_handler); - Assert(seg); - dsm_pin_mapping(seg); - before_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); - } - - hdr = (dsm_seg_hdr *) dsm_segment_address(seg); - *size = hdr->delta; - return (char *) hdr + sizeof(dsm_seg_hdr); -} - -/* - * Cleanup of DSM cache: set header into default state and zero the memory block. - * This operation can be coupled with the cache dump, so we do it under an external - * hold of the lock. - */ -void -reset_dsm_cache(void) -{ - dsm_seg_hdr *hdr; - char *start; - - Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE)); - - if (aqo_state->dsm_handler == DSM_HANDLE_INVALID || !seg) - /* Fast path. No any cached data exists. */ - return; - - hdr = (dsm_seg_hdr *) dsm_segment_address(seg); - start = (char *) hdr + sizeof(dsm_seg_hdr); - - /* Reset the cache */ - memset(start, 0, hdr->delta); - - hdr->delta = 0; - hdr->total_size = temp_storage_size - sizeof(dsm_seg_hdr); -} - -char * -get_cache_address(void) -{ - dsm_seg_hdr *hdr; - - Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE) || - LWLockHeldByMeInMode(&aqo_state->lock, LW_SHARED)); - - if (aqo_state->dsm_handler != DSM_HANDLE_INVALID) - { - if (!seg) - { - /* Another process created the segment yet. Just attach to. */ - seg = dsm_attach(aqo_state->dsm_handler); - dsm_pin_mapping(seg); - before_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); - } - - hdr = (dsm_seg_hdr *) dsm_segment_address(seg); - } - else - { - /* - * First request for DSM cache in this instance. - * Create the DSM segment. Pin it to live up to instance shutdown. - * Don't forget to detach DSM segment before an exit. - */ - seg = dsm_create(temp_storage_size, 0); - dsm_pin_mapping(seg); - dsm_pin_segment(seg); - aqo_state->dsm_handler = dsm_segment_handle(seg); - before_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); - - hdr = (dsm_seg_hdr *) dsm_segment_address(seg); - hdr->magic = AQO_SHARED_MAGIC; - hdr->delta = 0; - hdr->total_size = temp_storage_size - sizeof(dsm_seg_hdr); - } - - Assert(seg); - Assert(hdr->magic == AQO_SHARED_MAGIC && hdr->total_size > 0); - - return (char *) hdr + sizeof(dsm_seg_hdr); -} - -uint32 -get_dsm_cache_pos(uint32 size) -{ - dsm_seg_hdr *hdr; - uint32 pos; - - Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE) || - LWLockHeldByMeInMode(&aqo_state->lock, LW_SHARED)); - - (void) get_cache_address(); - hdr = (dsm_seg_hdr *) dsm_segment_address(seg); - - if (free_space(hdr) < size || size == 0) - elog(ERROR, - "DSM cache can't allcoate a mem block. Required: %u, free: %u", - size, free_space(hdr)); - - pos = hdr->delta; - hdr->delta += size; - Assert(free_space(hdr) >= 0); - return pos; -} - -static void -aqo_detach_shmem(int code, Datum arg) -{ - if (seg != NULL) - dsm_detach(seg); - seg = NULL; -} - void aqo_init_shmem(void) { @@ -177,7 +29,6 @@ aqo_init_shmem(void) prev_shmem_startup_hook(); aqo_state = NULL; - fss_htab = NULL; stat_htab = NULL; qtexts_htab = NULL; data_htab = NULL; @@ -189,7 +40,6 @@ aqo_init_shmem(void) { /* First time through ... */ - aqo_state->dsm_handler = DSM_HANDLE_INVALID; aqo_state->qtexts_dsa_handler = DSM_HANDLE_INVALID; aqo_state->data_dsa_handler = DSM_HANDLE_INVALID; @@ -207,13 +57,6 @@ aqo_init_shmem(void) LWLockInitialize(&aqo_state->queries_lock, LWLockNewTrancheId()); } - info.keysize = sizeof(htab_key); - info.entrysize = sizeof(htab_entry); - fss_htab = ShmemInitHash("AQO hash", - aqo_htab_max_items, aqo_htab_max_items, - &info, - HASH_ELEM | HASH_BLOBS); - info.keysize = sizeof(((StatEntry *) 0)->queryid); info.entrysize = sizeof(StatEntry); stat_htab = ShmemInitHash("AQO Stat HTAB", fs_max_items, fs_max_items, @@ -279,7 +122,6 @@ aqo_memsize(void) Size size; size = MAXALIGN(sizeof(AQOSharedState)); - size = add_size(size, hash_estimate_size(aqo_htab_max_items, sizeof(htab_entry))); size = add_size(size, hash_estimate_size(fs_max_items, sizeof(AQOSharedState))); size = add_size(size, hash_estimate_size(fs_max_items, sizeof(StatEntry))); size = add_size(size, hash_estimate_size(fs_max_items, sizeof(QueryTextEntry))); diff --git a/aqo_shared.h b/aqo_shared.h index 926a2723..e922fb1c 100644 --- a/aqo_shared.h +++ b/aqo_shared.h @@ -9,23 +9,9 @@ #define AQO_SHARED_MAGIC 0x053163 -typedef struct -{ - /* XXX we assume this struct contains no padding bytes */ - uint64 fs; - int64 fss; -} htab_key; - -typedef struct -{ - htab_key key; - uint32 hdr_off; /* offset of data in DSM cache */ -} htab_entry; - typedef struct AQOSharedState { LWLock lock; /* mutual exclusion */ - dsm_handle dsm_handler; /* Storage fields */ LWLock stat_lock; /* lock for access to stat storage */ @@ -47,16 +33,11 @@ typedef struct AQOSharedState extern shmem_startup_hook_type prev_shmem_startup_hook; extern AQOSharedState *aqo_state; -extern HTAB *fss_htab; extern int fs_max_items; /* Max number of feature spaces that AQO can operate */ extern int fss_max_items; extern Size aqo_memsize(void); -extern void reset_dsm_cache(void); -extern void *get_dsm_all(uint32 *size); -extern char *get_cache_address(void); -extern uint32 get_dsm_cache_pos(uint32 size); extern void aqo_init_shmem(void); #endif /* AQO_SHARED_H */ diff --git a/cardinality_estimation.c b/cardinality_estimation.c index 9db202a1..aca17f1e 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -81,7 +81,7 @@ predict_for_relation(List *clauses, List *selectivities, List *relsigns, &ncols, &features); data = OkNNr_allocate(ncols); - if (load_fss_ext(query_context.fspace_hash, *fss, data, NULL, true)) + if (load_fss_ext(query_context.fspace_hash, *fss, data, NULL)) result = OkNNr_predict(data, features); else { diff --git a/cardinality_hooks.c b/cardinality_hooks.c index a3e8e331..049f674f 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -452,7 +452,7 @@ predict_num_groups(PlannerInfo *root, Path *subpath, List *group_exprs, *fss = get_grouped_exprs_hash(child_fss, group_exprs); memset(&data, 0, sizeof(OkNNrdata)); - if (!load_fss_ext(query_context.fspace_hash, *fss, &data, NULL, true)) + if (!load_fss_ext(query_context.fspace_hash, *fss, &data, NULL)) return -1; Assert(data.rows == 1); diff --git a/expected/statement_timeout.out b/expected/statement_timeout.out index 0b26b430..77a9a641 100644 --- a/expected/statement_timeout.out +++ b/expected/statement_timeout.out @@ -111,6 +111,29 @@ SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); 5 (1 row) +-- Interrupted query should immediately appear in aqo_data +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 +(1 row) + +SET statement_timeout = 500; +SELECT count(*) FROM aqo_data; -- Must be zero + count +------- + 0 +(1 row) + +SELECT x, pg_sleep(0.1) FROM t WHERE x > 0; +NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. +ERROR: canceling statement due to statement timeout +SELECT count(*) FROM aqo_data; -- Must be one + count +------- + 1 +(1 row) + SELECT 1 FROM aqo_reset(); ?column? ---------- diff --git a/learn_cache.c b/learn_cache.c deleted file mode 100644 index c7f6ef87..00000000 --- a/learn_cache.c +++ /dev/null @@ -1,341 +0,0 @@ -/* - ******************************************************************************* - * - * - * - ******************************************************************************* - * - * Copyright (c) 2016-2022, Postgres Professional - * - * IDENTIFICATION - * aqo/learn_cache.c - * - */ - -#include "postgres.h" -#include "access/parallel.h" /* Just for IsParallelWorker() */ -#include "miscadmin.h" - -#include "aqo.h" -#include "aqo_shared.h" -#include "learn_cache.h" -#include "storage.h" - - -typedef struct -{ - int magic; - htab_key key; - int rows; - int cols; - int nrelids; - - /* - * Links to variable data: - * double *matrix[aqo_K]; - * double *targets; - * double *rfactors; - * int *relids; - */ -} dsm_block_hdr; - - -bool aqo_learn_statement_timeout = false; - -static uint32 init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **relids); - - -/* Calculate, how many data we need to store an ML record. */ -static uint32 -calculate_size(int cols, List *reloids) -{ - uint32 size = sizeof(dsm_block_hdr); /* header's size */ - - size += sizeof(double) * cols * aqo_K; /* matrix */ - size += 2 * sizeof(double) * aqo_K; /* targets, rfactors */ - - /* Calculate memory size needed to store relation names */ - size += list_length(reloids) * sizeof(Oid); - return size; -} - -bool -lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids) -{ - htab_key key = {fs, fss}; - htab_entry *entry; - dsm_block_hdr *hdr; - char *ptr; - bool found; - int i; - ListCell *lc; - uint32 size; - - Assert(fss_htab && aqo_learn_statement_timeout); - - size = calculate_size(data->cols, reloids); - LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); - - entry = (htab_entry *) hash_search(fss_htab, &key, HASH_ENTER, &found); - if (found) - { - hdr = (dsm_block_hdr *) (get_cache_address() + entry->hdr_off); - - Assert(hdr->magic == AQO_SHARED_MAGIC); - Assert(hdr->key.fs == fs && hdr->key.fss == fss); - - if (data->cols != hdr->cols || list_length(reloids) != hdr->nrelids) - { - /* - * Collision found: the same {fs,fss}, but something different. - * For simplicity - just don't update. - */ - elog(DEBUG5, "[AQO]: A collision found in the temporary storage."); - LWLockRelease(&aqo_state->lock); - return false; - } - } - else - { - /* Get new block of DSM */ - entry->hdr_off = get_dsm_cache_pos(size); - hdr = (dsm_block_hdr *) (get_cache_address() + entry->hdr_off); - - /* These fields shouldn't change */ - hdr->magic = AQO_SHARED_MAGIC; - hdr->key.fs = fs; - hdr->key.fss = fss; - hdr->cols = data->cols; - hdr->nrelids = list_length(reloids); - } - - hdr->rows = data->rows; - ptr = (char *) hdr + sizeof(dsm_block_hdr); /* start point of variable data */ - - /* copy the matrix into DSM storage */ - - if (hdr->cols > 0) - { - for (i = 0; i < aqo_K; ++i) - { - if (i >= hdr->rows) - break; - - if (!ptr || !data->matrix[i]) - elog(PANIC, "Something disruptive have happened! %d, %d (%d %d)", i, hdr->rows, found, hdr->cols); - memcpy(ptr, data->matrix[i], sizeof(double) * hdr->cols); - ptr += sizeof(double) * data->cols; - } - } - - /* - * Kludge code. But we should rewrite this code because now all knowledge - * base lives in non-transactional shared memory. - */ - ptr = (char *) hdr + sizeof(dsm_block_hdr) + (sizeof(double) * data->cols * aqo_K); - - /* copy targets into DSM storage */ - memcpy(ptr, data->targets, sizeof(double) * hdr->rows); - ptr += sizeof(double) * aqo_K; - - /* copy rfactors into DSM storage */ - memcpy(ptr, data->rfactors, sizeof(double) * hdr->rows); - ptr += sizeof(double) * aqo_K; - - /* store list of relations */ - foreach(lc, reloids) - { - Oid reloid = lfirst_oid(lc); - - memcpy(ptr, &reloid, sizeof(Oid)); - ptr += sizeof(Oid); - } - - /* Check the invariant */ - Assert((uint32)(ptr - (char *) hdr) == size); - - elog(DEBUG5, "DSM entry: %s, targets: %d.", - found ? "Reused" : "New entry", hdr->rows); - LWLockRelease(&aqo_state->lock); - return true; -} - -bool -lc_has_fss(uint64 fs, int fss) -{ - htab_key key = {fs, fss}; - bool found; - - if (!aqo_learn_statement_timeout) - return false; - - Assert(fss_htab); - - LWLockAcquire(&aqo_state->lock, LW_SHARED); - (void) hash_search(fss_htab, &key, HASH_FIND, &found); - LWLockRelease(&aqo_state->lock); - - return found; -} - -/* - * Load ML data from a memory cache, not from a table. - */ -bool -lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids) -{ - htab_key key = {fs, fss}; - htab_entry *entry; - bool found; - dsm_block_hdr *hdr; - - Assert(fss_htab && aqo_learn_statement_timeout); - - if (aqo_show_details) - elog(NOTICE, "[AQO] Load ML data for fs "UINT64_FORMAT", fss %d from the cache", - fs, fss); - - LWLockAcquire(&aqo_state->lock, LW_SHARED); - entry = (htab_entry *) hash_search(fss_htab, &key, HASH_FIND, &found); - if (!found) - { - LWLockRelease(&aqo_state->lock); - return false; - } - - hdr = (dsm_block_hdr *) (get_cache_address() + entry->hdr_off); - Assert(hdr->magic == AQO_SHARED_MAGIC); - Assert(hdr->key.fs == fs && hdr->key.fss == fss); - - /* XXX */ - if (hdr->cols != data->cols) - { - LWLockRelease(&aqo_state->lock); - return false; - } - - init_with_dsm(data, hdr, reloids); - LWLockRelease(&aqo_state->lock); - return true; -} - -static uint32 -init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **reloids) -{ - int i; - char *ptr = (char *) hdr + sizeof(dsm_block_hdr); - - Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE) || - LWLockHeldByMeInMode(&aqo_state->lock, LW_SHARED)); - Assert(hdr->magic == AQO_SHARED_MAGIC); - Assert(hdr && ptr && hdr->rows > 0); - - data->rows = hdr->rows; - data->cols = hdr->cols; - - if (data->cols > 0) - { - for (i = 0; i < aqo_K; ++i) - { - if (i < data->rows) - { - data->matrix[i] = palloc(sizeof(double) * data->cols); - memcpy(data->matrix[i], ptr, sizeof(double) * data->cols); - } - ptr += sizeof(double) * data->cols; - } - } - - /* - * Kludge code. But we should rewrite this code because now all knowledge - * base lives in non-transactional shared memory. - */ - ptr = (char *) hdr + sizeof(dsm_block_hdr) + (sizeof(double) * data->cols * aqo_K); - - memcpy(data->targets, ptr, sizeof(double) * hdr->rows); - ptr += sizeof(double) * aqo_K; - memcpy(data->rfactors, ptr, sizeof(double) * hdr->rows); - ptr += sizeof(double) * aqo_K; - - if (reloids) - { - *reloids = NIL; - for (i = 0; i < hdr->nrelids; i++) - { - *reloids = lappend_oid(*reloids, *(Oid *)(ptr)); - ptr += sizeof(Oid); - } - return calculate_size(hdr->cols, *reloids); - } - - /* It is just a read operation. No any interest in size calculation. */ - return 0; -} - -void -lc_flush_data(void) -{ - char *ptr; - uint32 size; - - if (aqo_state->dsm_handler == DSM_HANDLE_INVALID) - /* Fast path. No any cached data exists. */ - return; - - LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); - ptr = get_dsm_all(&size); - - /* Iterate through records and store them into the aqo_data table */ - while (size > 0) - { - dsm_block_hdr *hdr = (dsm_block_hdr *) ptr; - OkNNrdata data; - List *reloids = NIL; - uint32 delta = 0; - - delta = init_with_dsm(&data, hdr, &reloids); - Assert(delta > 0); - ptr += delta; - size -= delta; - aqo_data_store(hdr->key.fs, hdr->key.fss, &data, reloids); - - if (!hash_search(fss_htab, (void *) &hdr->key, HASH_REMOVE, NULL)) - elog(PANIC, "[AQO] Flush: local ML cache is corrupted."); - } - - reset_dsm_cache(); - LWLockRelease(&aqo_state->lock); -} - -/* - * Main purpose of this hook is to cleanup a backend cache in some way to prevent - * memory leaks - in large queries we could have many unused fss nodes. - */ -void -lc_assign_hook(bool newval, void *extra) -{ - HASH_SEQ_STATUS status; - htab_entry *entry; - - if (!fss_htab || !IsUnderPostmaster || IsParallelWorker()) - /* Clean this shared cache only in main backend process. */ - return; - - /* Remove all entries, reset memory context. */ - - elog(DEBUG5, "[AQO] Cleanup local cache of ML data."); - - /* Remove all entries in the shared hash table. */ - LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); - hash_seq_init(&status, fss_htab); - while ((entry = (htab_entry *) hash_seq_search(&status)) != NULL) - { - if (!hash_search(fss_htab, (void *) &entry->key, HASH_REMOVE, NULL)) - elog(PANIC, "[AQO] The local ML cache is corrupted."); - } - - /* Now, clean additional DSM block */ - reset_dsm_cache(); - - LWLockRelease(&aqo_state->lock); -} diff --git a/learn_cache.h b/learn_cache.h deleted file mode 100644 index df61700e..00000000 --- a/learn_cache.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef LEARN_CACHE_H -#define LEARN_CACHE_H - -#include "nodes/pg_list.h" - -#include "machine_learning.h" - -extern bool aqo_learn_statement_timeout; - -extern bool lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids); -extern bool lc_has_fss(uint64 fs, int fss); -extern bool lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids); -extern void lc_remove_fss(uint64 fs, int fss); -extern void lc_flush_data(void); -extern void lc_assign_hook(bool newval, void *extra); - -#endif /* LEARN_CACHE_H */ diff --git a/postprocessing.c b/postprocessing.c index 75a61707..165391dd 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -28,10 +28,11 @@ #include "path_utils.h" #include "machine_learning.h" #include "preprocessing.h" -#include "learn_cache.h" #include "storage.h" +bool aqo_learn_statement_timeout = false; + typedef struct { List *clauselist; @@ -58,9 +59,8 @@ static char *PlanStateInfo = "PlanStateInfo"; /* Query execution statistics collecting utilities */ static void atomic_fss_learn_step(uint64 fhash, int fss, OkNNrdata *data, - double *features, - double target, double rfactor, - List *reloids, bool isTimedOut); + double *features, double target, + double rfactor, List *reloids); static bool learnOnPlanState(PlanState *p, void *context); static void learn_agg_sample(aqo_obj_stat *ctx, RelSortOut *rels, double learned, double rfactor, Plan *plan, @@ -85,13 +85,13 @@ static bool ExtractFromQueryEnv(QueryDesc *queryDesc); static void atomic_fss_learn_step(uint64 fs, int fss, OkNNrdata *data, double *features, double target, double rfactor, - List *reloids, bool isTimedOut) + List *reloids) { - if (!load_fss_ext(fs, fss, data, NULL, !isTimedOut)) + if (!load_fss_ext(fs, fss, data, NULL)) data->rows = 0; data->rows = OkNNr_learn(data, features, target, rfactor); - update_fss_ext(fs, fss, data, reloids, isTimedOut); + update_fss_ext(fs, fss, data, reloids); } static void @@ -120,7 +120,7 @@ learn_agg_sample(aqo_obj_stat *ctx, RelSortOut *rels, /* Critical section */ atomic_fss_learn_step(fs, fss, data, NULL, - target, rfactor, rels->hrels, ctx->isTimedOut); + target, rfactor, rels->hrels); /* End of critical section */ } @@ -157,8 +157,7 @@ learn_sample(aqo_obj_stat *ctx, RelSortOut *rels, data = OkNNr_allocate(ncols); /* Critical section */ - atomic_fss_learn_step(fs, fss, data, features, target, rfactor, - rels->hrels, ctx->isTimedOut); + atomic_fss_learn_step(fs, fss, data, features, target, rfactor, rels->hrels); /* End of critical section */ } @@ -750,11 +749,6 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) { aqo_obj_stat ctx = {NIL, NIL, NIL, query_context.learn_aqo, false}; - /* - * Before learn phase, flush all cached data down to ML base. - */ - lc_flush_data(); - /* * Analyze plan if AQO need to learn or need to collect statistics only. */ diff --git a/sql/statement_timeout.sql b/sql/statement_timeout.sql index 36afc370..60ae7a14 100644 --- a/sql/statement_timeout.sql +++ b/sql/statement_timeout.sql @@ -60,6 +60,13 @@ SET statement_timeout = 5500; SELECT *, pg_sleep(1) FROM t; -- Get reliable data SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); +-- Interrupted query should immediately appear in aqo_data +SELECT 1 FROM aqo_reset(); +SET statement_timeout = 500; +SELECT count(*) FROM aqo_data; -- Must be zero +SELECT x, pg_sleep(0.1) FROM t WHERE x > 0; +SELECT count(*) FROM aqo_data; -- Must be one + SELECT 1 FROM aqo_reset(); DROP TABLE t; DROP EXTENSION aqo; diff --git a/storage.c b/storage.c index 8a21892c..fcbe5569 100644 --- a/storage.c +++ b/storage.c @@ -27,7 +27,6 @@ #include "aqo_shared.h" #include "machine_learning.h" #include "preprocessing.h" -#include "learn_cache.h" #include "storage.h" @@ -107,25 +106,15 @@ PG_FUNCTION_INFO_V1(aqo_execution_time); bool -load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool isSafe) +load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids) { - if (isSafe && (!aqo_learn_statement_timeout || !lc_has_fss(fs, fss))) - return load_aqo_data(fs, fss, data, reloids, false); - else - { - Assert(aqo_learn_statement_timeout); - return lc_load_fss(fs, fss, data, reloids); - } + return load_aqo_data(fs, fss, data, reloids, false); } bool -update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids, - bool isTimedOut) +update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids) { - if (!isTimedOut) - return aqo_data_store(fs, fss, data, reloids); - else - return lc_update_fss(fs, fss, data, reloids); + return aqo_data_store(fs, fss, data, reloids); } /* From d3f7f3bfa28f5bab9adbf6267a2b323ce2a9eb98 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Tue, 28 Jun 2022 12:28:23 +0300 Subject: [PATCH 156/203] Load neighbours with the fss hash except dublicated neighours. Rewrite test for look-a-like functional. Current tests contain correlation columns and queries have more nodes and description features. Add aqo_k as custom parameter to define few number of features for prediction. Its default value is 3. Queries can contain a larger number of features than 3 especially generic queries. Also add predict_a_few_neibours parameter for switch avalable to predict a few neibors than 3. It is done for not to change the previous logic of the code --- aqo.c | 26 +- aqo.h | 1 + cardinality_estimation.c | 2 +- expected/look_a_like.out | 517 ++++++++++++++++++++++++++++----------- machine_learning.c | 5 +- sql/look_a_like.sql | 110 ++++++--- storage.c | 78 ++++-- storage.h | 2 +- 8 files changed, 543 insertions(+), 198 deletions(-) diff --git a/aqo.c b/aqo.c index 8280ccbf..c36b7526 100644 --- a/aqo.c +++ b/aqo.c @@ -34,6 +34,7 @@ void _PG_init(void); /* Strategy of determining feature space for new queries. */ int aqo_mode = AQO_MODE_CONTROLLED; bool force_collect_stat; +bool aqo_predict_with_few_neighbors; /* * Show special info in EXPLAIN mode. @@ -71,7 +72,7 @@ int auto_tuning_infinite_loop = 8; /* Machine learning parameters */ /* The number of nearest neighbors which will be chosen for ML-operations */ -int aqo_k = 3; +int aqo_k; double log_selectivity_lower_bound = -30; /* @@ -287,6 +288,29 @@ _PG_init(void) NULL ); + DefineCustomIntVariable("aqo.k_neighbors_threshold", + "Set the threshold of number of neighbors for predicting.", + NULL, + &aqo_k, + 3, + 1, INT_MAX / 1000, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); + + DefineCustomBoolVariable("aqo.predict_with_few_neighbors", + "Make prediction with less neighbors than we should have.", + NULL, + &aqo_predict_with_few_neighbors, + true, + PGC_USERSET, + 0, + NULL, + lc_assign_hook, + NULL); + prev_shmem_startup_hook = shmem_startup_hook; shmem_startup_hook = aqo_init_shmem; prev_planner_hook = planner_hook; diff --git a/aqo.h b/aqo.h index 0a373147..9418646c 100644 --- a/aqo.h +++ b/aqo.h @@ -217,6 +217,7 @@ extern double auto_tuning_convergence_error; /* Machine learning parameters */ extern int aqo_k; +extern bool aqo_predict_with_few_neighbors; extern double log_selectivity_lower_bound; /* Parameters for current query */ diff --git a/cardinality_estimation.c b/cardinality_estimation.c index aca17f1e..f93e0905 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -93,7 +93,7 @@ predict_for_relation(List *clauses, List *selectivities, List *relsigns, */ /* Try to search in surrounding feature spaces for the same node */ - if (!load_aqo_data(query_context.fspace_hash, *fss, data, NULL, use_wide_search)) + if (!load_aqo_data(query_context.fspace_hash, *fss, data, NULL, use_wide_search, features)) result = -1; else { diff --git a/expected/look_a_like.out b/expected/look_a_like.out index ecd73fb4..b0d3047c 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -2,14 +2,17 @@ CREATE EXTENSION aqo; SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; +set aqo.show_hash = 'off'; +SET aqo.k_neighbors_threshold_for_predict = 1; SET enable_material = 'off'; DROP TABLE IF EXISTS a,b CASCADE; NOTICE: table "a" does not exist, skipping NOTICE: table "b" does not exist, skipping -CREATE TABLE a (x int); -INSERT INTO a (x) SELECT mod(ival,10) FROM generate_series(1,1000) As ival; -CREATE TABLE b (y int); -INSERT INTO b (y) SELECT mod(ival + 1,10) FROM generate_series(1,1000) As ival; +-- Create tables with correlated datas in columns +CREATE TABLE a (x1 int, x2 int, x3 int); +INSERT INTO a (x1, x2, x3) SELECT mod(ival,10), mod(ival,10), mod(ival,10) FROM generate_series(1,1000) As ival; +CREATE TABLE b (y1 int, y2 int, y3 int); +INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; -- -- Returns string-by-string explain of a query. Made for removing some strings -- from the explain output. @@ -25,207 +28,425 @@ $$ LANGUAGE PLPGSQL; -- in the next queries with the same fss_hash SELECT str AS result FROM expln(' -SELECT x FROM A where x = 5;') AS str; - result ------------------------------------------------- - Seq Scan on public.a (actual rows=100 loops=1) +SELECT x1,y1 FROM A,B WHERE x1 = 5 AND x2 = 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +-------------------------------------------------------- + Nested Loop (actual rows=10000 loops=1) AQO not used - Output: x - Filter: (a.x = 5) - Rows Removed by Filter: 900 + Output: a.x1, b.y1 + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 = 5) AND (a.x2 = 5)) + Rows Removed by Filter: 900 + -> Seq Scan on public.b (actual rows=100 loops=100) + AQO not used + Output: b.y1, b.y2, b.y3 + Filter: (b.y1 = 5) + Rows Removed by Filter: 900 Using aqo: true AQO mode: LEARN JOINS: 0 -(8 rows) +(16 rows) SELECT str AS result FROM expln(' -SELECT x FROM A,B WHERE x = 5 AND A.x = B.y;') AS str -; -- Find cardinality for SCAN A(x=5) from a neighbour class, created by the - result --------------------------------------------------------- - Nested Loop (actual rows=10000 loops=1) +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------ + Hash Join (actual rows=50000 loops=1) AQO not used - Output: a.x - -> Seq Scan on public.b (actual rows=100 loops=1) + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) AQO not used - Output: b.y - Filter: (b.y = 5) - Rows Removed by Filter: 900 - -> Seq Scan on public.a (actual rows=100 loops=100) - AQO: rows=100, error=0% - Output: a.x - Filter: (a.x = 5) - Rows Removed by Filter: 900 + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=500 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=500 loops=1) + AQO not used + Output: a.x1 + Filter: ((a.x1 < 5) AND (a.x2 < 5)) + Rows Removed by Filter: 500 Using aqo: true AQO mode: LEARN JOINS: 0 -(16 rows) +(17 rows) --- query, executed above. SELECT str AS result FROM expln(' -SELECT x, sum(x) FROM A,B WHERE y = 5 AND A.x = B.y group by(x);') AS str -; -- Find the JOIN cardinality from a neighbour class. - result --------------------------------------------------------------- - GroupAggregate (actual rows=1 loops=1) +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 = 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +---------------------------------------------------------- + Hash Join (actual rows=0 loops=1) AQO not used - Output: a.x, sum(a.x) - Group Key: a.x - -> Nested Loop (actual rows=10000 loops=1) - AQO: rows=10000, error=0% - Output: a.x - -> Seq Scan on public.a (actual rows=100 loops=1) - AQO: rows=100, error=0% - Output: a.x - Filter: (a.x = 5) - Rows Removed by Filter: 900 - -> Seq Scan on public.b (actual rows=100 loops=100) - AQO: rows=100, error=0% - Output: b.y - Filter: (b.y = 5) - Rows Removed by Filter: 900 + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1 loops=1) + AQO: rows=1000, error=100% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=0 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=0 loops=1) + AQO not used + Output: a.x1 + Filter: ((a.x1 < 5) AND (a.x2 = 5)) + Rows Removed by Filter: 1000 Using aqo: true AQO mode: LEARN - JOINS: 1 -(20 rows) + JOINS: 0 +(17 rows) --- cardinality 100 in the first Seq Scan on a +--query contains nodes that have already been predicted SELECT str AS result FROM expln(' -SELECT x, sum(x) FROM A WHERE x = 5 group by(x);') AS str; - result ------------------------------------------------------- - GroupAggregate (actual rows=1 loops=1) +SELECT x1,y1 FROM A,B WHERE x1 < 10 AND x2 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------ + Hash Join (actual rows=50000 loops=1) + AQO: rows=50000, error=0% + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=500 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=500 loops=1) + AQO: rows=500, error=0% + Output: a.x1 + Filter: ((a.x1 < 10) AND (a.x2 < 5)) + Rows Removed by Filter: 500 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(17 rows) + +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A,B WHERE x1 > 2 AND x2 > 2 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------ + Hash Join (actual rows=70000 loops=1) AQO not used - Output: x, sum(x) - Group Key: a.x - -> Seq Scan on public.a (actual rows=100 loops=1) - AQO: rows=100, error=0% - Output: x - Filter: (a.x = 5) - Rows Removed by Filter: 900 + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=700 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=700 loops=1) + AQO not used + Output: a.x1 + Filter: ((a.x1 > 2) AND (a.x2 > 2)) + Rows Removed by Filter: 300 Using aqo: true AQO mode: LEARN JOINS: 0 -(12 rows) +(17 rows) --- no one predicted rows. we use knowledge cardinalities of the query --- in the next queries with the same fss_hash SELECT str AS result FROM expln(' -SELECT x FROM A where x < 10 group by(x);') AS str -WHERE str NOT LIKE '%Memory%'; - result -------------------------------------------------------- - HashAggregate (actual rows=10 loops=1) +SELECT x1,y1 FROM A,B WHERE x1 > 5 AND x2 > 5 AND x3 < 10 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------- + Hash Join (actual rows=40000 loops=1) AQO not used - Output: x - Group Key: a.x - -> Seq Scan on public.a (actual rows=1000 loops=1) - AQO not used - Output: x - Filter: (a.x < 10) + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=400 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=400 loops=1) + AQO not used + Output: a.x1 + Filter: ((a.x1 > 5) AND (a.x2 > 5) AND (a.x3 < 10)) + Rows Removed by Filter: 600 Using aqo: true AQO mode: LEARN JOINS: 0 -(11 rows) +(17 rows) --- cardinality 1000 in Seq Scan on a SELECT str AS result FROM expln(' -SELECT x,y FROM A,B WHERE x < 10 AND A.x = B.y;') AS str -WHERE str NOT LIKE '%Memory%'; - result -------------------------------------------------------------- - Merge Join (actual rows=100000 loops=1) +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND x3 < 10 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------- + Hash Join (actual rows=50000 loops=1) AQO not used - Output: a.x, b.y - Merge Cond: (a.x = b.y) - -> Sort (actual rows=1000 loops=1) - Output: a.x - Sort Key: a.x - -> Seq Scan on public.a (actual rows=1000 loops=1) - AQO: rows=1000, error=0% - Output: a.x - Filter: (a.x < 10) - -> Sort (actual rows=99901 loops=1) - Output: b.y - Sort Key: b.y - -> Seq Scan on public.b (actual rows=1000 loops=1) + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=500 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=500 loops=1) AQO not used - Output: b.y + Output: a.x1 + Filter: ((a.x1 < 5) AND (a.x2 < 5) AND (a.x3 < 10)) + Rows Removed by Filter: 500 Using aqo: true AQO mode: LEARN JOINS: 0 -(20 rows) +(17 rows) --- cardinality 100 in Seq Scan on a and Seq Scan on b +--query contains nodes that have already been predicted SELECT str AS result FROM expln(' -SELECT x FROM A,B where x < 10 and y > 10 group by(x);') AS str -WHERE str NOT LIKE '%Memory%'; - result ----------------------------------------------------------- - HashAggregate (actual rows=0 loops=1) +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 4 AND x3 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + Hash Join (actual rows=40000 loops=1) + AQO: rows=50000, error=20% + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=400 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=400 loops=1) + AQO: rows=500, error=20% + Output: a.x1 + Filter: ((a.x1 < 5) AND (a.x2 < 4) AND (a.x3 < 5)) + Rows Removed by Filter: 600 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(17 rows) + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +--------------------------------------------------------------- + HashAggregate (actual rows=2 loops=1) AQO not used - Output: a.x - Group Key: a.x - -> Nested Loop (actual rows=0 loops=1) + Output: a.x1 + Group Key: a.x1 + -> Nested Loop (actual rows=200000 loops=1) AQO not used - Output: a.x - -> Seq Scan on public.b (actual rows=0 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) AQO not used - Output: b.y - Filter: (b.y > 10) - Rows Removed by Filter: 1000 - -> Seq Scan on public.a (never executed) - AQO: rows=1000 - Output: a.x - Filter: (a.x < 10) + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=1000 loops=200) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 Using aqo: true AQO mode: LEARN JOINS: 1 -(19 rows) +(18 rows) --- --- TODO: --- Not executed case. What could we do better here? --- +--query contains nodes that have already been predicted SELECT str AS result FROM expln(' -SELECT x,y FROM A,B WHERE x < 10 and y > 10 AND A.x = B.y;') AS str -WHERE str NOT LIKE '%Memory%' -; - result ----------------------------------------------------------- - Hash Join (actual rows=0 loops=1) +SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +--------------------------------------------------------------- + HashAggregate (actual rows=2 loops=1) + AQO: rows=2, error=0% + Output: a.x1 + Group Key: a.x1 + -> Nested Loop (actual rows=200000 loops=1) + AQO: rows=200000, error=0% + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) + AQO: rows=200, error=0% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=1000 loops=200) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(18 rows) + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 2 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +--------------------------------------------------------------- + HashAggregate (actual rows=1 loops=1) + AQO: rows=2, error=50% + Output: a.x1 + Group Key: a.x1 + -> Nested Loop (actual rows=100000 loops=1) + AQO: rows=200000, error=50% + Output: a.x1 + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=200, error=50% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x3 > 2)) + Rows Removed by Filter: 900 + -> Seq Scan on public.b (actual rows=1000 loops=100) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(18 rows) + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 5 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + HashAggregate (actual rows=1 loops=1) AQO not used - Output: a.x, b.y - Hash Cond: (a.x = b.y) - -> Seq Scan on public.a (actual rows=1 loops=1) - AQO: rows=1000, error=100% - Output: a.x - Filter: (a.x < 10) - -> Hash (actual rows=0 loops=1) - Output: b.y - -> Seq Scan on public.b (actual rows=0 loops=1) - AQO: rows=1, error=100% - Output: b.y - Filter: (b.y > 10) - Rows Removed by Filter: 1000 + Output: a.x1 + Group Key: a.x1 + -> Nested Loop (actual rows=100000 loops=1) + AQO not used + Output: a.x1 + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 3) AND (a.x2 < 5) AND (a.x3 > 1)) + Rows Removed by Filter: 900 + -> Seq Scan on public.b (actual rows=1000 loops=100) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 +(18 rows) + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + HashAggregate (actual rows=2 loops=1) + AQO not used + Output: a.x1 + Group Key: a.x1 + -> Nested Loop (actual rows=200000 loops=1) + AQO not used + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 > 1) AND (a.x2 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=1000 loops=200) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(18 rows) + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 < 5 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + HashAggregate (actual rows=2 loops=1) + AQO not used + Output: a.x1 + Group Key: a.x1 + -> Nested Loop (actual rows=200000 loops=1) + AQO not used + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 > 1) AND (a.x2 < 4) AND (a.x3 < 5)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=1000 loops=200) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + Using aqo: true + AQO mode: LEARN + JOINS: 1 (18 rows) -RESET enable_material; -DROP TABLE a,b CASCADE; -SELECT true FROM aqo_reset(); - bool ------- - t +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x2 < 5 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + HashAggregate (actual rows=2 loops=1) + AQO not used + Output: a.x1 + Group Key: a.x1 + -> Nested Loop (actual rows=140000 loops=1) + AQO not used + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) + AQO: rows=100, error=-100% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x2 < 5) AND (a.x3 > 1)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=700 loops=200) + AQO not used + Output: b.y1, b.y2, b.y3 + Filter: (b.y1 > 2) + Rows Removed by Filter: 300 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(20 rows) + +--query contains nodes that have already been predicted +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 4 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + HashAggregate (actual rows=1 loops=1) + AQO: rows=2, error=50% + Output: a.x1 + Group Key: a.x1 + -> Nested Loop (actual rows=70000 loops=1) + AQO: rows=140000, error=50% + Output: a.x1 + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=200, error=50% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 3) AND (a.x2 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 900 + -> Seq Scan on public.b (actual rows=700 loops=100) + AQO: rows=700, error=0% + Output: b.y1, b.y2, b.y3 + Filter: (b.y1 > 2) + Rows Removed by Filter: 300 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(20 rows) + +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 (1 row) +DROP TABLE a; +DROP TABLE b; DROP EXTENSION aqo CASCADE; diff --git a/machine_learning.c b/machine_learning.c index 7138db38..d4f5cbee 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -74,7 +74,7 @@ fs_distance(double *a, double *b, int len) res += (a[i] - b[i]) * (a[i] - b[i]); } if (len != 0) - res = sqrt(res / len); + res = sqrt(res); return res; } @@ -148,6 +148,9 @@ OkNNr_predict(OkNNrdata *data, double *features) Assert(data != NULL); + if (!aqo_predict_with_few_neighbors && data->rows < aqo_k) + return -1.; + for (i = 0; i < data->rows; ++i) distances[i] = fs_distance(data->matrix[i], features, data->cols); diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index be71feff..5a348cd5 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -2,15 +2,20 @@ CREATE EXTENSION aqo; SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; +set aqo.show_hash = 'off'; +SET aqo.k_neighbors_threshold_for_predict = 1; SET enable_material = 'off'; DROP TABLE IF EXISTS a,b CASCADE; -CREATE TABLE a (x int); -INSERT INTO a (x) SELECT mod(ival,10) FROM generate_series(1,1000) As ival; -CREATE TABLE b (y int); -INSERT INTO b (y) SELECT mod(ival + 1,10) FROM generate_series(1,1000) As ival; +-- Create tables with correlated datas in columns +CREATE TABLE a (x1 int, x2 int, x3 int); +INSERT INTO a (x1, x2, x3) SELECT mod(ival,10), mod(ival,10), mod(ival,10) FROM generate_series(1,1000) As ival; + +CREATE TABLE b (y1 int, y2 int, y3 int); +INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; + -- -- Returns string-by-string explain of a query. Made for removing some strings @@ -26,55 +31,96 @@ $$ LANGUAGE PLPGSQL; -- no one predicted rows. we use knowledge cardinalities of the query -- in the next queries with the same fss_hash + SELECT str AS result FROM expln(' -SELECT x FROM A where x = 5;') AS str; +SELECT x1,y1 FROM A,B WHERE x1 = 5 AND x2 = 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' -SELECT x FROM A,B WHERE x = 5 AND A.x = B.y;') AS str -; -- Find cardinality for SCAN A(x=5) from a neighbour class, created by the --- query, executed above. +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' -SELECT x, sum(x) FROM A,B WHERE y = 5 AND A.x = B.y group by(x);') AS str -; -- Find the JOIN cardinality from a neighbour class. +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 = 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +--query contains nodes that have already been predicted --- cardinality 100 in the first Seq Scan on a SELECT str AS result FROM expln(' -SELECT x, sum(x) FROM A WHERE x = 5 group by(x);') AS str; +SELECT x1,y1 FROM A,B WHERE x1 < 10 AND x2 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; --- no one predicted rows. we use knowledge cardinalities of the query --- in the next queries with the same fss_hash SELECT str AS result FROM expln(' -SELECT x FROM A where x < 10 group by(x);') AS str -WHERE str NOT LIKE '%Memory%'; --- cardinality 1000 in Seq Scan on a +SELECT x1,y1 FROM A,B WHERE x1 > 2 AND x2 > 2 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + SELECT str AS result FROM expln(' -SELECT x,y FROM A,B WHERE x < 10 AND A.x = B.y;') AS str -WHERE str NOT LIKE '%Memory%'; +SELECT x1,y1 FROM A,B WHERE x1 > 5 AND x2 > 5 AND x3 < 10 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; --- cardinality 100 in Seq Scan on a and Seq Scan on b SELECT str AS result FROM expln(' -SELECT x FROM A,B where x < 10 and y > 10 group by(x);') AS str -WHERE str NOT LIKE '%Memory%'; +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND x3 < 10 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +--query contains nodes that have already been predicted + +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 4 AND x3 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +--query contains nodes that have already been predicted + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 2 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 5 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 < 5 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x2 < 5 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +--query contains nodes that have already been predicted --- --- TODO: --- Not executed case. What could we do better here? --- SELECT str AS result FROM expln(' -SELECT x,y FROM A,B WHERE x < 10 and y > 10 AND A.x = B.y;') AS str -WHERE str NOT LIKE '%Memory%' -; +SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 4 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; -RESET enable_material; -DROP TABLE a,b CASCADE; -SELECT true FROM aqo_reset(); +SELECT 1 FROM aqo_reset(); +DROP TABLE a; +DROP TABLE b; DROP EXTENSION aqo CASCADE; diff --git a/storage.c b/storage.c index fcbe5569..9b92088e 100644 --- a/storage.c +++ b/storage.c @@ -90,6 +90,8 @@ static bool _aqo_stat_remove(uint64 queryid); static bool _aqo_queries_remove(uint64 queryid); static bool _aqo_qtexts_remove(uint64 queryid); static bool _aqo_data_remove(data_key *key); +static bool neirest_neighbor(double **matrix, int old_rows, double *neighbor, int cols); +static double fs_distance(double *a, double *b, int len); PG_FUNCTION_INFO_V1(aqo_query_stat); PG_FUNCTION_INFO_V1(aqo_query_texts); @@ -1409,25 +1411,73 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) return result; } +static double +fs_distance(double *a, double *b, int len) +{ + double res = 0; + int i; + + for (i = 0; i < len; ++i) + res += (a[i] - b[i]) * (a[i] - b[i]); + if (len != 0) + res = sqrt(res); + return res; +} + +bool +neirest_neighbor(double **matrix, int old_rows, double *neibour, int cols) +{ + int i; + for (i=0; icols == temp_data->cols); Assert(data->matrix); - if (data->rows > 0) - /* trivial strategy - use first suitable record and ignore others */ - return; - - memcpy(data, temp_data, sizeof(OkNNrdata)); - if (data->cols > 0) + if (features != NULL) { - int i; + int old_rows = data->rows; + int k = old_rows; - for (i = 0; i < data->rows; i++) + if (data->cols > 0) { - Assert(data->matrix[i]); - memcpy(data->matrix[i], temp_data->matrix[i], data->cols * sizeof(double)); + int i; + + for (i = 0; i < data->rows; i++) + { + if (k < aqo_K && !neirest_neighbor(data->matrix, old_rows, data->matrix[i], data->cols)) + { + memcpy(data->matrix[k], temp_data->matrix[i], data->cols * sizeof(double)); + data->rfactors[k] = temp_data->rfactors[i]; + data->targets[k] = temp_data->targets[i]; + k++; + } + } + } + } + else + { + if (data->rows > 0) + /* trivial strategy - use first suitable record and ignore others */ + return; + memcpy(data, temp_data, sizeof(OkNNrdata)); + if (data->cols > 0) + { + int i; + + for (i = 0; i < data->rows; i++) + { + Assert(data->matrix[i]); + memcpy(data->matrix[i], temp_data->matrix[i], data->cols * sizeof(double)); + } } } } @@ -1503,7 +1553,7 @@ _fill_knn_data(const DataEntry *entry, List **reloids) */ bool load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, - bool wideSearch) + bool wideSearch, double *features) { DataEntry *entry; bool found; @@ -1538,7 +1588,7 @@ load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, } temp_data = _fill_knn_data(entry, reloids); - build_knn_matrix(data, temp_data); + build_knn_matrix(data, temp_data, features); } else /* Iterate across all elements of the table. XXX: Maybe slow. */ @@ -1576,7 +1626,7 @@ load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, else list_free(tmp_oids); - build_knn_matrix(data, temp_data); + build_knn_matrix(data, temp_data, NULL); found = true; } } diff --git a/storage.h b/storage.h index 94891c5d..0e7745e1 100644 --- a/storage.h +++ b/storage.h @@ -101,7 +101,7 @@ extern void aqo_qtexts_load(void); extern bool aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids); extern bool load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, - bool wideSearch); + bool wideSearch, double *features); extern void aqo_data_flush(void); extern void aqo_data_load(void); From 1a1d12a3bae9767d91d92609ca1f75fb89216cc6 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Fri, 20 Jan 2023 17:01:58 +0300 Subject: [PATCH 157/203] Add disabled nestloop and mergejoin parameters to stabilize look-a-like test, besides add two additional cases where look-a-like should not be applied. --- aqo.c | 2 +- expected/look_a_like.out | 400 ++++++++++++++++++++++++--------------- sql/look_a_like.sql | 56 ++++-- storage.c | 2 +- 4 files changed, 290 insertions(+), 170 deletions(-) diff --git a/aqo.c b/aqo.c index c36b7526..b3415c0f 100644 --- a/aqo.c +++ b/aqo.c @@ -308,7 +308,7 @@ _PG_init(void) PGC_USERSET, 0, NULL, - lc_assign_hook, + NULL, NULL); prev_shmem_startup_hook = shmem_startup_hook; diff --git a/expected/look_a_like.out b/expected/look_a_like.out index b0d3047c..5910c8ac 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -3,7 +3,9 @@ SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; set aqo.show_hash = 'off'; -SET aqo.k_neighbors_threshold_for_predict = 1; +SET aqo.k_neighbors_threshold = 1; +SET enable_nestloop = 'off'; +SET enable_mergejoin = 'off'; SET enable_material = 'off'; DROP TABLE IF EXISTS a,b CASCADE; NOTICE: table "a" does not exist, skipping @@ -29,7 +31,7 @@ $$ LANGUAGE PLPGSQL; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 = 5 AND x2 = 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; result -------------------------------------------------------- Nested Loop (actual rows=10000 loops=1) @@ -52,49 +54,51 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' -SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +SELECT x1,y1 FROM A LEFT JOIN b ON A.x1 = B.y1 WHERE x1 = 5 AND x2 = 5;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; result ------------------------------------------------------------ - Hash Join (actual rows=50000 loops=1) + Hash Left Join (actual rows=10000 loops=1) AQO not used Output: a.x1, b.y1 - Hash Cond: (b.y1 = a.x1) - -> Seq Scan on public.b (actual rows=1000 loops=1) - AQO not used - Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=500 loops=1) - Output: a.x1 - -> Seq Scan on public.a (actual rows=500 loops=1) - AQO not used - Output: a.x1 - Filter: ((a.x1 < 5) AND (a.x2 < 5)) - Rows Removed by Filter: 500 + Hash Cond: (a.x1 = b.y1) + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=100, error=0% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 = 5) AND (a.x2 = 5)) + Rows Removed by Filter: 900 + -> Hash (actual rows=100 loops=1) + Output: b.y1 + -> Seq Scan on public.b (actual rows=100 loops=1) + AQO: rows=100, error=0% + Output: b.y1 + Filter: (b.y1 = 5) + Rows Removed by Filter: 900 Using aqo: true AQO mode: LEARN JOINS: 0 -(17 rows) +(19 rows) SELECT str AS result FROM expln(' -SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 = 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ----------------------------------------------------------- - Hash Join (actual rows=0 loops=1) +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------ + Hash Join (actual rows=50000 loops=1) AQO not used Output: a.x1, b.y1 Hash Cond: (b.y1 = a.x1) - -> Seq Scan on public.b (actual rows=1 loops=1) - AQO: rows=1000, error=100% + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO not used Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=0 loops=1) + -> Hash (actual rows=500 loops=1) Output: a.x1 - -> Seq Scan on public.a (actual rows=0 loops=1) + -> Seq Scan on public.a (actual rows=500 loops=1) AQO not used Output: a.x1 - Filter: ((a.x1 < 5) AND (a.x2 = 5)) - Rows Removed by Filter: 1000 + Filter: ((a.x1 < 5) AND (a.x2 < 5)) + Rows Removed by Filter: 500 Using aqo: true AQO mode: LEARN JOINS: 0 @@ -104,7 +108,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 < 10 AND x2 < 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; result ------------------------------------------------------------ Hash Join (actual rows=50000 loops=1) @@ -129,7 +133,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 > 2 AND x2 > 2 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; result ------------------------------------------------------------ Hash Join (actual rows=70000 loops=1) @@ -154,7 +158,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 > 5 AND x2 > 5 AND x3 < 10 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; result ------------------------------------------------------------------- Hash Join (actual rows=40000 loops=1) @@ -179,7 +183,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND x3 < 10 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; result ------------------------------------------------------------------- Hash Join (actual rows=50000 loops=1) @@ -205,7 +209,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 4 AND x3 < 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; result ------------------------------------------------------------------ Hash Join (actual rows=40000 loops=1) @@ -230,216 +234,315 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ---------------------------------------------------------------- - HashAggregate (actual rows=2 loops=1) +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +--------------------------------------------------------------------- + Group (actual rows=2 loops=1) AQO not used Output: a.x1 Group Key: a.x1 - -> Nested Loop (actual rows=200000 loops=1) + -> Sort (actual rows=200000 loops=1) AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=200 loops=1) + Sort Key: a.x1 + -> Nested Loop (actual rows=200000 loops=1) AQO not used - Output: a.x1, a.x2, a.x3 - Filter: ((a.x1 < 4) AND (a.x3 > 1)) - Rows Removed by Filter: 800 - -> Seq Scan on public.b (actual rows=1000 loops=200) - AQO: rows=1000, error=0% - Output: b.y1, b.y2, b.y3 + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=1000 loops=200) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 Using aqo: true AQO mode: LEARN JOINS: 1 -(18 rows) +(22 rows) --query contains nodes that have already been predicted SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ---------------------------------------------------------------- - HashAggregate (actual rows=2 loops=1) - AQO: rows=2, error=0% +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +--------------------------------------------------------------------- + Group (actual rows=2 loops=1) + AQO not used Output: a.x1 Group Key: a.x1 - -> Nested Loop (actual rows=200000 loops=1) - AQO: rows=200000, error=0% + -> Sort (actual rows=200000 loops=1) + AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=200 loops=1) - AQO: rows=200, error=0% - Output: a.x1, a.x2, a.x3 - Filter: ((a.x1 < 4) AND (a.x3 > 1)) - Rows Removed by Filter: 800 - -> Seq Scan on public.b (actual rows=1000 loops=200) - AQO: rows=1000, error=0% - Output: b.y1, b.y2, b.y3 + Sort Key: a.x1 + -> Nested Loop (actual rows=200000 loops=1) + AQO: rows=200000, error=0% + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) + AQO: rows=200, error=0% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=1000 loops=200) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 Using aqo: true AQO mode: LEARN JOINS: 1 -(18 rows) +(22 rows) SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 2 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ---------------------------------------------------------------- - HashAggregate (actual rows=1 loops=1) - AQO: rows=2, error=50% +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +--------------------------------------------------------------------- + Group (actual rows=1 loops=1) + AQO not used Output: a.x1 Group Key: a.x1 - -> Nested Loop (actual rows=100000 loops=1) - AQO: rows=200000, error=50% + -> Sort (actual rows=100000 loops=1) + AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=100 loops=1) - AQO: rows=200, error=50% - Output: a.x1, a.x2, a.x3 - Filter: ((a.x1 < 4) AND (a.x3 > 2)) - Rows Removed by Filter: 900 - -> Seq Scan on public.b (actual rows=1000 loops=100) - AQO: rows=1000, error=0% - Output: b.y1, b.y2, b.y3 + Sort Key: a.x1 + -> Nested Loop (actual rows=100000 loops=1) + AQO: rows=200000, error=50% + Output: a.x1 + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=200, error=50% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x3 > 2)) + Rows Removed by Filter: 900 + -> Seq Scan on public.b (actual rows=1000 loops=100) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 Using aqo: true AQO mode: LEARN JOINS: 1 -(18 rows) +(22 rows) SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 5 AND x3 > 1 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ------------------------------------------------------------------- - HashAggregate (actual rows=1 loops=1) +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------ + Group (actual rows=1 loops=1) AQO not used Output: a.x1 Group Key: a.x1 - -> Nested Loop (actual rows=100000 loops=1) + -> Sort (actual rows=100000 loops=1) AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=100 loops=1) + Sort Key: a.x1 + -> Nested Loop (actual rows=100000 loops=1) AQO not used - Output: a.x1, a.x2, a.x3 - Filter: ((a.x1 < 3) AND (a.x2 < 5) AND (a.x3 > 1)) - Rows Removed by Filter: 900 - -> Seq Scan on public.b (actual rows=1000 loops=100) - AQO: rows=1000, error=0% - Output: b.y1, b.y2, b.y3 + Output: a.x1 + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 3) AND (a.x2 < 5) AND (a.x3 > 1)) + Rows Removed by Filter: 900 + -> Seq Scan on public.b (actual rows=1000 loops=100) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 Using aqo: true AQO mode: LEARN JOINS: 1 -(18 rows) +(22 rows) SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 > 1 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ------------------------------------------------------------------- - HashAggregate (actual rows=2 loops=1) +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------ + Group (actual rows=2 loops=1) AQO not used Output: a.x1 Group Key: a.x1 - -> Nested Loop (actual rows=200000 loops=1) + -> Sort (actual rows=200000 loops=1) AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=200 loops=1) + Sort Key: a.x1 + -> Nested Loop (actual rows=200000 loops=1) AQO not used - Output: a.x1, a.x2, a.x3 - Filter: ((a.x1 > 1) AND (a.x2 < 4) AND (a.x3 > 1)) - Rows Removed by Filter: 800 - -> Seq Scan on public.b (actual rows=1000 loops=200) - AQO: rows=1000, error=0% - Output: b.y1, b.y2, b.y3 + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 > 1) AND (a.x2 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=1000 loops=200) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 Using aqo: true AQO mode: LEARN JOINS: 1 -(18 rows) +(22 rows) SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 < 5 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ------------------------------------------------------------------- - HashAggregate (actual rows=2 loops=1) +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------ + Group (actual rows=2 loops=1) AQO not used Output: a.x1 Group Key: a.x1 - -> Nested Loop (actual rows=200000 loops=1) + -> Sort (actual rows=200000 loops=1) AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=200 loops=1) + Sort Key: a.x1 + -> Nested Loop (actual rows=200000 loops=1) AQO not used - Output: a.x1, a.x2, a.x3 - Filter: ((a.x1 > 1) AND (a.x2 < 4) AND (a.x3 < 5)) - Rows Removed by Filter: 800 - -> Seq Scan on public.b (actual rows=1000 loops=200) - AQO: rows=1000, error=0% - Output: b.y1, b.y2, b.y3 + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 > 1) AND (a.x2 < 4) AND (a.x3 < 5)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=1000 loops=200) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 Using aqo: true AQO mode: LEARN JOINS: 1 -(18 rows) +(22 rows) SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x2 < 5 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ------------------------------------------------------------------- - HashAggregate (actual rows=2 loops=1) +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------ + Group (actual rows=2 loops=1) AQO not used Output: a.x1 Group Key: a.x1 - -> Nested Loop (actual rows=140000 loops=1) + -> Sort (actual rows=140000 loops=1) AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=200 loops=1) - AQO: rows=100, error=-100% - Output: a.x1, a.x2, a.x3 - Filter: ((a.x1 < 4) AND (a.x2 < 5) AND (a.x3 > 1)) - Rows Removed by Filter: 800 - -> Seq Scan on public.b (actual rows=700 loops=200) + Sort Key: a.x1 + -> Nested Loop (actual rows=140000 loops=1) AQO not used - Output: b.y1, b.y2, b.y3 - Filter: (b.y1 > 2) - Rows Removed by Filter: 300 + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) + AQO: rows=100, error=-100% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x2 < 5) AND (a.x3 > 1)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=700 loops=200) + AQO not used + Output: b.y1, b.y2, b.y3 + Filter: (b.y1 > 2) + Rows Removed by Filter: 300 Using aqo: true AQO mode: LEARN JOINS: 1 -(20 rows) +(24 rows) --query contains nodes that have already been predicted SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 4 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ------------------------------------------------------------------- - HashAggregate (actual rows=1 loops=1) - AQO: rows=2, error=50% +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------ + Group (actual rows=1 loops=1) + AQO not used Output: a.x1 Group Key: a.x1 - -> Nested Loop (actual rows=70000 loops=1) - AQO: rows=140000, error=50% + -> Sort (actual rows=70000 loops=1) + AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=100 loops=1) - AQO: rows=200, error=50% + Sort Key: a.x1 + -> Nested Loop (actual rows=70000 loops=1) + AQO: rows=140000, error=50% + Output: a.x1 + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=200, error=50% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 3) AND (a.x2 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 900 + -> Seq Scan on public.b (actual rows=700 loops=100) + AQO: rows=700, error=0% + Output: b.y1, b.y2, b.y3 + Filter: (b.y1 > 2) + Rows Removed by Filter: 300 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(24 rows) + +CREATE TABLE c (z1 int, z2 int, z3 int); +INSERT INTO c (z1, z2, z3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; +SELECT str AS result +FROM expln(' +SELECT * FROM (a LEFT JOIN b ON a.x1 = b.y1) sc WHERE +not exists (SELECT z1 FROM c WHERE sc.x1=c.z1 );') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------- + Hash Left Join (actual rows=0 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3, b.y1, b.y2, b.y3 + Hash Cond: (a.x1 = b.y1) + -> Hash Anti Join (actual rows=0 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Hash Cond: (a.x1 = c.z1) + -> Seq Scan on public.a (actual rows=1000 loops=1) + AQO not used Output: a.x1, a.x2, a.x3 - Filter: ((a.x1 < 3) AND (a.x2 < 4) AND (a.x3 > 1)) - Rows Removed by Filter: 900 - -> Seq Scan on public.b (actual rows=700 loops=100) - AQO: rows=700, error=0% + -> Hash (actual rows=1000 loops=1) + Output: c.z1 + -> Seq Scan on public.c (actual rows=1000 loops=1) + AQO not used + Output: c.z1 + -> Hash (never executed) + Output: b.y1, b.y2, b.y3 + -> Seq Scan on public.b (never executed) + AQO: rows=1000 Output: b.y1, b.y2, b.y3 - Filter: (b.y1 > 2) - Rows Removed by Filter: 300 Using aqo: true AQO mode: LEARN JOINS: 1 -(20 rows) +(24 rows) + +SELECT str AS result +FROM expln(' +SELECT * FROM (A LEFT JOIN B ON A.x1 = B.y1) sc left join C on sc.x1=C.z1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------- + Hash Right Join (actual rows=10000000 loops=1) + AQO: rows=1, error=-999999900% + Output: a.x1, a.x2, a.x3, b.y1, b.y2, b.y3, c.z1, c.z2, c.z3 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=100000 loops=1) + Output: a.x1, a.x2, a.x3, c.z1, c.z2, c.z3 + -> Hash Left Join (actual rows=100000 loops=1) + AQO: rows=1, error=-9999900% + Output: a.x1, a.x2, a.x3, c.z1, c.z2, c.z3 + Hash Cond: (a.x1 = c.z1) + -> Seq Scan on public.a (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: a.x1, a.x2, a.x3 + -> Hash (actual rows=1000 loops=1) + Output: c.z1, c.z2, c.z3 + -> Seq Scan on public.c (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: c.z1, c.z2, c.z3 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(24 rows) SELECT 1 FROM aqo_reset(); ?column? @@ -449,4 +552,5 @@ SELECT 1 FROM aqo_reset(); DROP TABLE a; DROP TABLE b; +DROP FUNCTION expln; DROP EXTENSION aqo CASCADE; diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index 5a348cd5..5dc85b7b 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -3,8 +3,9 @@ SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; set aqo.show_hash = 'off'; -SET aqo.k_neighbors_threshold_for_predict = 1; - +SET aqo.k_neighbors_threshold = 1; +SET enable_nestloop = 'off'; +SET enable_mergejoin = 'off'; SET enable_material = 'off'; DROP TABLE IF EXISTS a,b CASCADE; @@ -35,92 +36,107 @@ $$ LANGUAGE PLPGSQL; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 = 5 AND x2 = 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' -SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +SELECT x1,y1 FROM A LEFT JOIN b ON A.x1 = B.y1 WHERE x1 = 5 AND x2 = 5;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' -SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 = 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; --query contains nodes that have already been predicted SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 < 10 AND x2 < 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 > 2 AND x2 > 2 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 > 5 AND x2 > 5 AND x3 < 10 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND x3 < 10 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; --query contains nodes that have already been predicted SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 4 AND x3 < 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; --query contains nodes that have already been predicted SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 2 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 5 AND x3 > 1 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 > 1 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 < 5 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x2 < 5 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; --query contains nodes that have already been predicted SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 4 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +CREATE TABLE c (z1 int, z2 int, z3 int); +INSERT INTO c (z1, z2, z3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; + +SELECT str AS result +FROM expln(' +SELECT * FROM (a LEFT JOIN b ON a.x1 = b.y1) sc WHERE +not exists (SELECT z1 FROM c WHERE sc.x1=c.z1 );') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +SELECT str AS result +FROM expln(' +SELECT * FROM (A LEFT JOIN B ON A.x1 = B.y1) sc left join C on sc.x1=C.z1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT 1 FROM aqo_reset(); DROP TABLE a; DROP TABLE b; +DROP FUNCTION expln; DROP EXTENSION aqo CASCADE; diff --git a/storage.c b/storage.c index 9b92088e..32446d6c 100644 --- a/storage.c +++ b/storage.c @@ -110,7 +110,7 @@ PG_FUNCTION_INFO_V1(aqo_execution_time); bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids) { - return load_aqo_data(fs, fss, data, reloids, false); + return load_aqo_data(fs, fss, data, reloids, false, NULL); } bool From 23264e795cbbcfcc85e2cc928952774162e2d6db Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Fri, 20 Jan 2023 18:00:54 +0300 Subject: [PATCH 158/203] Add delete table c after finished look-a-like test. --- expected/look_a_like.out | 1 + sql/look_a_like.sql | 1 + 2 files changed, 2 insertions(+) diff --git a/expected/look_a_like.out b/expected/look_a_like.out index 5910c8ac..8b2e315c 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -552,5 +552,6 @@ SELECT 1 FROM aqo_reset(); DROP TABLE a; DROP TABLE b; +DROP TABLE c; DROP FUNCTION expln; DROP EXTENSION aqo CASCADE; diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index 5dc85b7b..5a41c24a 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -138,5 +138,6 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L SELECT 1 FROM aqo_reset(); DROP TABLE a; DROP TABLE b; +DROP TABLE c; DROP FUNCTION expln; DROP EXTENSION aqo CASCADE; From 302739437a7ce5cc44e62079fd1d3be2708acf18 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Tue, 20 Dec 2022 12:10:14 +0300 Subject: [PATCH 159/203] Rewriting the statement_timeout test to spend less time on its execution. unfortunately, this does not completely solve the problem of the imbalance between the cost of resources expended (namely, the duration of the test) and its usefulness, since its results are ignored. We cannot completely exclude the test from the test, since it is necessary to know about cases of test failure during the further development of the extension. --- expected/statement_timeout.out | 32 ++++++++++++++++---------------- sql/statement_timeout.sql | 26 +++++++++++++------------- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/expected/statement_timeout.out b/expected/statement_timeout.out index 77a9a641..14b2f0dc 100644 --- a/expected/statement_timeout.out +++ b/expected/statement_timeout.out @@ -17,7 +17,7 @@ BEGIN END IF; END LOOP; END; $$; -CREATE TABLE t AS SELECT * FROM generate_series(1,100) AS x; +CREATE TABLE t AS SELECT * FROM generate_series(1,50) AS x; ANALYZE t; DELETE FROM t WHERE x > 5; -- Force optimizer to make overestimated prediction. CREATE EXTENSION IF NOT EXISTS aqo; @@ -25,30 +25,30 @@ SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'off'; SET aqo.learn_statement_timeout = 'on'; -SET statement_timeout = 800; -- [0.8s] -SELECT *, pg_sleep(1) FROM t; +SET statement_timeout = 100; -- [0.1s] +SELECT *, pg_sleep(0.1) FROM t; NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); -- haven't any partial data check_estimated_rows ---------------------- - 100 + 50 (1 row) -- Don't learn because running node has smaller cardinality than an optimizer prediction -SET statement_timeout = 3500; -SELECT *, pg_sleep(1) FROM t; +SET statement_timeout = 400; +SELECT *, pg_sleep(0.1) FROM t; NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); check_estimated_rows ---------------------- - 100 + 50 (1 row) -- We have a real learning data. -SET statement_timeout = 10000; -SELECT *, pg_sleep(1) FROM t; +SET statement_timeout = 8000; +SELECT *, pg_sleep(0.1) FROM t; x | pg_sleep ---+---------- 1 | @@ -74,8 +74,8 @@ SELECT 1 FROM aqo_reset(); 1 (1 row) -SET statement_timeout = 800; -SELECT *, pg_sleep(1) FROM t; -- Not learned +SET statement_timeout = 100; +SELECT *, pg_sleep(0.1) FROM t; -- Not learned NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); @@ -84,18 +84,18 @@ SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); 2 (1 row) -SET statement_timeout = 3500; -SELECT *, pg_sleep(1) FROM t; -- Learn! +SET statement_timeout = 500; +SELECT *, pg_sleep(0.1) FROM t; -- Learn! NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); check_estimated_rows ---------------------- - 3 + 2 (1 row) -SET statement_timeout = 5500; -SELECT *, pg_sleep(1) FROM t; -- Get reliable data +SET statement_timeout = 800; +SELECT *, pg_sleep(0.1) FROM t; -- Get reliable data x | pg_sleep ---+---------- 1 | diff --git a/sql/statement_timeout.sql b/sql/statement_timeout.sql index 60ae7a14..b0ebb6ba 100644 --- a/sql/statement_timeout.sql +++ b/sql/statement_timeout.sql @@ -18,7 +18,7 @@ BEGIN END LOOP; END; $$; -CREATE TABLE t AS SELECT * FROM generate_series(1,100) AS x; +CREATE TABLE t AS SELECT * FROM generate_series(1,50) AS x; ANALYZE t; DELETE FROM t WHERE x > 5; -- Force optimizer to make overestimated prediction. @@ -28,18 +28,18 @@ SET aqo.mode = 'learn'; SET aqo.show_details = 'off'; SET aqo.learn_statement_timeout = 'on'; -SET statement_timeout = 800; -- [0.8s] -SELECT *, pg_sleep(1) FROM t; +SET statement_timeout = 100; -- [0.1s] +SELECT *, pg_sleep(0.1) FROM t; SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); -- haven't any partial data -- Don't learn because running node has smaller cardinality than an optimizer prediction -SET statement_timeout = 3500; -SELECT *, pg_sleep(1) FROM t; +SET statement_timeout = 400; +SELECT *, pg_sleep(0.1) FROM t; SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); -- We have a real learning data. -SET statement_timeout = 10000; -SELECT *, pg_sleep(1) FROM t; +SET statement_timeout = 8000; +SELECT *, pg_sleep(0.1) FROM t; SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); -- Force to make an underestimated prediction @@ -48,16 +48,16 @@ ANALYZE t; INSERT INTO t (x) (SELECT * FROM generate_series(3,5) AS x); SELECT 1 FROM aqo_reset(); -SET statement_timeout = 800; -SELECT *, pg_sleep(1) FROM t; -- Not learned +SET statement_timeout = 100; +SELECT *, pg_sleep(0.1) FROM t; -- Not learned SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); -SET statement_timeout = 3500; -SELECT *, pg_sleep(1) FROM t; -- Learn! +SET statement_timeout = 500; +SELECT *, pg_sleep(0.1) FROM t; -- Learn! SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); -SET statement_timeout = 5500; -SELECT *, pg_sleep(1) FROM t; -- Get reliable data +SET statement_timeout = 800; +SELECT *, pg_sleep(0.1) FROM t; -- Get reliable data SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); -- Interrupted query should immediately appear in aqo_data From c30ca2bd4ddeb8412403ef8d27e0b720f3e696af Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Tue, 24 Jan 2023 20:39:04 +0300 Subject: [PATCH 160/203] Rename guc, which connected with setting minimum number of neighbours for predicting and add more understandable explanations of guc. --- aqo.c | 6 +++--- expected/look_a_like.out | 2 +- sql/look_a_like.sql | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/aqo.c b/aqo.c index b3415c0f..f5424e6a 100644 --- a/aqo.c +++ b/aqo.c @@ -288,8 +288,8 @@ _PG_init(void) NULL ); - DefineCustomIntVariable("aqo.k_neighbors_threshold", - "Set the threshold of number of neighbors for predicting.", + DefineCustomIntVariable("aqo.min_neighbors_for_predicting", + "Set how many neighbors the cardinality prediction will be calculated", NULL, &aqo_k, 3, @@ -301,7 +301,7 @@ _PG_init(void) NULL); DefineCustomBoolVariable("aqo.predict_with_few_neighbors", - "Make prediction with less neighbors than we should have.", + "Establish the ability to make predictions with fewer neighbors than were found.", NULL, &aqo_predict_with_few_neighbors, true, diff --git a/expected/look_a_like.out b/expected/look_a_like.out index 8b2e315c..faa9b0fd 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -3,7 +3,7 @@ SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; set aqo.show_hash = 'off'; -SET aqo.k_neighbors_threshold = 1; +SET aqo.min_neighbors_for_predicting = 1; SET enable_nestloop = 'off'; SET enable_mergejoin = 'off'; SET enable_material = 'off'; diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index 5a41c24a..9705bf1a 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -3,7 +3,7 @@ SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; set aqo.show_hash = 'off'; -SET aqo.k_neighbors_threshold = 1; +SET aqo.min_neighbors_for_predicting = 1; SET enable_nestloop = 'off'; SET enable_mergejoin = 'off'; SET enable_material = 'off'; From 98bc4a71adf9c3efa5d2fe67bca5589fbec3eb1f Mon Sep 17 00:00:00 2001 From: Sergei Glukhov Date: Thu, 24 Nov 2022 10:32:07 +0400 Subject: [PATCH 161/203] Added functions: aqo_query_texts_update(), aqo_query_stat_update(), aqo_data_update(). Changed function to be able to insert a record: aqo_queries_update(). --- aqo--1.5--1.6.sql | 47 ++++ auto_tuning.c | 6 +- expected/plancache.out | 6 + expected/update_functions.out | 476 ++++++++++++++++++++++++++++++++++ machine_learning.h | 16 ++ postprocessing.c | 14 +- preprocessing.c | 2 +- regress_schedule | 1 + sql/plancache.sql | 3 +- sql/update_functions.sql | 205 +++++++++++++++ storage.c | 358 +++++++++++++++++++++---- storage.h | 48 +++- 12 files changed, 1127 insertions(+), 55 deletions(-) create mode 100644 expected/update_functions.out create mode 100644 sql/update_functions.sql diff --git a/aqo--1.5--1.6.sql b/aqo--1.5--1.6.sql index 4101d33d..077f11b1 100644 --- a/aqo--1.5--1.6.sql +++ b/aqo--1.5--1.6.sql @@ -30,3 +30,50 @@ AS 'MODULE_PATHNAME', 'aqo_cleanup' LANGUAGE C STRICT VOLATILE; COMMENT ON FUNCTION aqo_cleanup() IS 'Remove unneeded rows from the AQO ML storage'; + +-- +-- Update or insert an aqo_query_texts +-- table record for given 'queryid'. +-- + +CREATE FUNCTION aqo_query_texts_update( + queryid bigint, query_text text) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_query_texts_update' +LANGUAGE C VOLATILE; + +-- +-- Update or insert an aqo_query_stat +-- table record for given 'queryid'. +-- + +CREATE FUNCTION aqo_query_stat_update( + queryid bigint, + execution_time_with_aqo double precision[], + execution_time_without_aqo double precision[], + planning_time_with_aqo double precision[], + planning_time_without_aqo double precision[], + cardinality_error_with_aqo double precision[], + cardinality_error_without_aqo double precision[], + executions_with_aqo bigint, + executions_without_aqo bigint) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_query_stat_update' +LANGUAGE C VOLATILE; + +-- +-- Update or insert an aqo_data +-- table record for given 'fs' & 'fss'. +-- + +CREATE FUNCTION aqo_data_update( + fs bigint, + fss integer, + nfeatures integer, + features double precision[][], + targets double precision[], + reliability double precision[], + oids Oid[]) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_data_update' +LANGUAGE C VOLATILE; diff --git a/auto_tuning.c b/auto_tuning.c index fad245ed..cf96a2cf 100644 --- a/auto_tuning.c +++ b/auto_tuning.c @@ -202,8 +202,10 @@ automatical_query_tuning(uint64 queryid, StatEntry *stat) if (num_iterations <= auto_tuning_max_iterations || p_use > 0.5) aqo_queries_store(queryid, query_context.fspace_hash, - query_context.learn_aqo, query_context.use_aqo, true); + query_context.learn_aqo, query_context.use_aqo, true, + &aqo_queries_nulls); else aqo_queries_store(queryid, - query_context.fspace_hash, false, false, false); + query_context.fspace_hash, false, false, false, + &aqo_queries_nulls); } diff --git a/expected/plancache.out b/expected/plancache.out index edcf30e7..6874468a 100644 --- a/expected/plancache.out +++ b/expected/plancache.out @@ -44,4 +44,10 @@ SELECT * FROM f1(); DROP FUNCTION f1; DROP TABLE test CASCADE; +SELECT true FROM aqo_reset(); + bool +------ + t +(1 row) + DROP EXTENSION aqo; diff --git a/expected/update_functions.out b/expected/update_functions.out new file mode 100644 index 00000000..6a6198e5 --- /dev/null +++ b/expected/update_functions.out @@ -0,0 +1,476 @@ +CREATE TABLE aqo_test1(a int, b int); +WITH RECURSIVE t(a, b) +AS ( + VALUES (1, 2) + UNION ALL + SELECT t.a + 1, t.b + 1 FROM t WHERE t.a < 20 +) INSERT INTO aqo_test1 (SELECT * FROM t); +CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); +ANALYZE aqo_test1; +CREATE TABLE aqo_test2(a int); +WITH RECURSIVE t(a) +AS ( + VALUES (0) + UNION ALL + SELECT t.a + 1 FROM t WHERE t.a < 100000 +) INSERT INTO aqo_test2 (SELECT * FROM t); +CREATE INDEX aqo_test2_idx_a ON aqo_test2 (a); +ANALYZE aqo_test2; +CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; +SET aqo.mode='intelligent'; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; + count +------- + 20 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b, aqo_test2 c WHERE a.a = b.a AND b.a = c.a; + count +------- + 20 +(1 row) + +SET aqo.mode='learn'; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; + count +------- + 20 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; + count +------- + 20 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; + count +------- + 20 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 10; + count +------- + 10 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 10 and b.a > 200; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 11 and b.a > 200; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 12 and b.a > 200; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 14 and b.a > 200; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +b.a > 300 and b.a < 500; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +b.a > 300 and b.a < 500; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +b.a > 300 and b.a < 500; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); + count +------- + 0 +(1 row) + +SET aqo.mode='controlled'; +CREATE TABLE aqo_query_texts_dump AS SELECT * FROM aqo_query_texts; +CREATE TABLE aqo_queries_dump AS SELECT * FROM aqo_queries; +CREATE TABLE aqo_query_stat_dump AS SELECT * FROM aqo_query_stat; +CREATE TABLE aqo_data_dump AS SELECT * FROM aqo_data; +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 +(1 row) + +-- +-- aqo_query_texts_update() testing. +-- +-- Populate aqo_query_texts with dump data. +SELECT aqo_query_texts_update(queryid, query_text) AS res +FROM aqo_query_texts_dump +ORDER BY res; + res +----- + f + t + t + t + t + t + t +(7 rows) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_texts EXCEPT TABLE aqo_query_texts_dump) +UNION ALL +(TABLE aqo_query_texts_dump EXCEPT TABLE aqo_query_texts); + queryid | query_text +---------+------------ +(0 rows) + +-- Update aqo_query_texts with dump data. +SELECT aqo_query_texts_update(queryid, query_text) AS res +FROM aqo_query_texts_dump +ORDER BY res; + res +----- + f + t + t + t + t + t + t +(7 rows) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_texts EXCEPT TABLE aqo_query_texts_dump) +UNION ALL +(TABLE aqo_query_texts_dump EXCEPT TABLE aqo_query_texts); + queryid | query_text +---------+------------ +(0 rows) + +-- +-- aqo_queries_update testing. +-- +-- Populate aqo_queries with dump data. +SELECT aqo_queries_update(queryid, fs, learn_aqo, use_aqo, auto_tuning) AS res +FROM aqo_queries_dump +ORDER BY res; + res +----- + f + t + t + t + t + t + t +(7 rows) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_queries_dump EXCEPT TABLE aqo_queries) +UNION ALL +(TABLE aqo_queries EXCEPT TABLE aqo_queries_dump); + queryid | fs | learn_aqo | use_aqo | auto_tuning +---------+----+-----------+---------+------------- +(0 rows) + +-- Update aqo_queries with dump data. +SELECT aqo_queries_update(queryid, fs, learn_aqo, use_aqo, auto_tuning) AS res +FROM aqo_queries_dump +ORDER BY res; + res +----- + f + t + t + t + t + t + t +(7 rows) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_queries_dump EXCEPT TABLE aqo_queries) +UNION ALL +(TABLE aqo_queries EXCEPT TABLE aqo_queries_dump); + queryid | fs | learn_aqo | use_aqo | auto_tuning +---------+----+-----------+---------+------------- +(0 rows) + +-- +-- aqo_query_stat_update() testing. +-- +-- Populate aqo_query_stat with dump data. +SELECT aqo_query_stat_update(queryid, execution_time_with_aqo, +execution_time_without_aqo, planning_time_with_aqo, planning_time_without_aqo, +cardinality_error_with_aqo, cardinality_error_without_aqo, executions_with_aqo, +executions_without_aqo) AS res +FROM aqo_query_stat_dump +ORDER BY res; + res +----- + t + t + t + t + t + t +(6 rows) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_stat_dump EXCEPT TABLE aqo_query_stat) +UNION ALL +(TABLE aqo_query_stat EXCEPT TABLE aqo_query_stat_dump); + queryid | execution_time_with_aqo | execution_time_without_aqo | planning_time_with_aqo | planning_time_without_aqo | cardinality_error_with_aqo | cardinality_error_without_aqo | executions_with_aqo | executions_without_aqo +---------+-------------------------+----------------------------+------------------------+---------------------------+----------------------------+-------------------------------+---------------------+------------------------ +(0 rows) + +-- Update aqo_query_stat with dump data. +SELECT aqo_query_stat_update(queryid, execution_time_with_aqo, +execution_time_without_aqo, planning_time_with_aqo, planning_time_without_aqo, +cardinality_error_with_aqo, cardinality_error_without_aqo, executions_with_aqo, +executions_without_aqo) AS res +FROM aqo_query_stat_dump +ORDER BY res; + res +----- + t + t + t + t + t + t +(6 rows) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_stat_dump EXCEPT TABLE aqo_query_stat) +UNION ALL +(TABLE aqo_query_stat EXCEPT TABLE aqo_query_stat_dump); + queryid | execution_time_with_aqo | execution_time_without_aqo | planning_time_with_aqo | planning_time_without_aqo | cardinality_error_with_aqo | cardinality_error_without_aqo | executions_with_aqo | executions_without_aqo +---------+-------------------------+----------------------------+------------------------+---------------------------+----------------------------+-------------------------------+---------------------+------------------------ +(0 rows) + +-- +-- aqo_data_update() testing. +-- +-- Populate aqo_data with dump data. +SELECT aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS res +FROM aqo_data_dump +ORDER BY res; + res +----- + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t +(27 rows) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_data_dump EXCEPT TABLE aqo_data) +UNION ALL +(TABLE aqo_data EXCEPT TABLE aqo_data_dump); + fs | fss | nfeatures | features | targets | reliability | oids +----+-----+-----------+----------+---------+-------------+------ +(0 rows) + +-- Update aqo_data with dump data. +SELECT aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS res +FROM aqo_data_dump +ORDER BY res; + res +----- + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t +(27 rows) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_data_dump EXCEPT TABLE aqo_data) +UNION ALL +(TABLE aqo_data EXCEPT TABLE aqo_data_dump); + fs | fss | nfeatures | features | targets | reliability | oids +----+-----+-----------+----------+---------+-------------+------ +(0 rows) + +-- Reject aqo_query_stat_update if there is NULL elements in array arg. +SELECT aqo_query_stat_update(1, '{NULL, 1}', '{1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', 1, 1); + aqo_query_stat_update +----------------------- + f +(1 row) + +-- Reject aqo_query_stat_update if arrays don't have the same size. +SELECT aqo_query_stat_update(1, '{1, 1}', '{1, 1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', 1, 1); + aqo_query_stat_update +----------------------- + f +(1 row) + +-- Reject aqo_query_stat_update if there are negative executions. +SELECT aqo_query_stat_update(1, '{1, 1}', '{1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', -1, 1); + aqo_query_stat_update +----------------------- + f +(1 row) + +SELECT aqo_query_stat_update(1, '{1, 1}', '{1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', 1, -1); + aqo_query_stat_update +----------------------- + f +(1 row) + +-- Reject aqo_query_data_update if number of matrix columns and nfeatures +-- are different. +SELECT aqo_data_update(1, 1, 0, '{{1}}', '{1, 1}', '{1, 1}', '{1, 2, 3}'); + aqo_data_update +----------------- + f +(1 row) + +-- Reject aqo_query_data_update if there is NULL elements in array arg. +SELECT aqo_data_update(1, 1, 1, '{{NULL}}', '{1}', '{1}', '{1, 2, 3}'); + aqo_data_update +----------------- + f +(1 row) + +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{NULL}', '{1}', '{1, 2, 3}'); + aqo_data_update +----------------- + f +(1 row) + +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1}', '{NULL}', '{1, 2, 3}'); + aqo_data_update +----------------- + f +(1 row) + +-- Reject aqo_query_data_update if Oids is NULL. +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1}', '{1}', NULL); + aqo_data_update +----------------- + f +(1 row) + +-- Reject aqo_query_data_update if arrays don't have the same number of rows. +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1, 1}', '{1}', '{1, 2, 3}'); + aqo_data_update +----------------- + f +(1 row) + +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1}', '{1, 1}', '{1, 2, 3}'); + aqo_data_update +----------------- + f +(1 row) + +SELECT aqo_data_update(1, 1, 1, '{{1}, {2}}', '{1}', '{1}', '{1, 2, 3}'); + aqo_data_update +----------------- + f +(1 row) + +SET aqo.mode='disabled'; +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 +(1 row) + +DROP EXTENSION aqo; +DROP TABLE aqo_test1, aqo_test2; +DROP TABLE aqo_query_texts_dump, aqo_queries_dump, aqo_query_stat_dump, aqo_data_dump; diff --git a/machine_learning.h b/machine_learning.h index b114cade..1d6d8303 100644 --- a/machine_learning.h +++ b/machine_learning.h @@ -21,6 +21,22 @@ typedef struct OkNNrdata double rfactors[aqo_K]; } OkNNrdata; +/* + * Auxiliary struct, used for passing arguments + * to aqo_data_store() function. + */ +typedef struct AqoDataArgs +{ + int rows; /* Number of filled rows in the matrix */ + int cols; /* Number of columns in the matrix */ + int nrels; /* Number of oids */ + + double **matrix; /* Pointer ot matrix array */ + double *targets; /* Pointer to array of 'targets' */ + double *rfactors; /* Pointer to array of 'rfactors' */ + Oid *oids; /* Array of relation OIDs */ +} AqoDataArgs; + extern OkNNrdata* OkNNr_allocate(int ncols); extern void OkNNr_free(OkNNrdata *data); diff --git a/postprocessing.c b/postprocessing.c index 165391dd..70688b1a 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -767,11 +767,21 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) if (query_context.collect_stat) { + /* + * aqo_stat_store() is used in 'append' mode. + * 'AqoStatArgs' fields execs_with_aqo, execs_without_aqo, + * cur_stat_slot, cur_stat_slot_aqo are not used in this + * mode and dummy values(0) are set in this case. + */ + AqoStatArgs stat_arg = { 0, 0, 0, + &execution_time, &query_context.planning_time, &cardinality_error, + 0, + &execution_time, &query_context.planning_time, &cardinality_error}; + /* Write AQO statistics to the aqo_query_stat table */ stat = aqo_stat_store(query_context.query_hash, query_context.use_aqo, - query_context.planning_time, execution_time, - cardinality_error); + &stat_arg, true); if (stat != NULL) { diff --git a/preprocessing.c b/preprocessing.c index 32608425..53fe1323 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -303,7 +303,7 @@ aqo_planner(Query *parse, */ if (aqo_queries_store(query_context.query_hash, query_context.fspace_hash, query_context.learn_aqo, query_context.use_aqo, - query_context.auto_tuning)) + query_context.auto_tuning, &aqo_queries_nulls)) { /* * Add query text into the ML-knowledge base. Just for further diff --git a/regress_schedule b/regress_schedule index 418e14ec..76a2e00e 100644 --- a/regress_schedule +++ b/regress_schedule @@ -12,6 +12,7 @@ test: unsupported test: clean_aqo_data test: parallel_workers test: plancache +test: update_functions # Performance-dependent test. Can be ignored if executes in containers or on slow machines ignore: statement_timeout test: statement_timeout diff --git a/sql/plancache.sql b/sql/plancache.sql index 3b074b90..c9aabae7 100644 --- a/sql/plancache.sql +++ b/sql/plancache.sql @@ -44,4 +44,5 @@ SELECT * FROM f1(); DROP FUNCTION f1; DROP TABLE test CASCADE; -DROP EXTENSION aqo; \ No newline at end of file +SELECT true FROM aqo_reset(); +DROP EXTENSION aqo; diff --git a/sql/update_functions.sql b/sql/update_functions.sql new file mode 100644 index 00000000..85b711e6 --- /dev/null +++ b/sql/update_functions.sql @@ -0,0 +1,205 @@ +CREATE TABLE aqo_test1(a int, b int); +WITH RECURSIVE t(a, b) +AS ( + VALUES (1, 2) + UNION ALL + SELECT t.a + 1, t.b + 1 FROM t WHERE t.a < 20 +) INSERT INTO aqo_test1 (SELECT * FROM t); +CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); +ANALYZE aqo_test1; + +CREATE TABLE aqo_test2(a int); +WITH RECURSIVE t(a) +AS ( + VALUES (0) + UNION ALL + SELECT t.a + 1 FROM t WHERE t.a < 100000 +) INSERT INTO aqo_test2 (SELECT * FROM t); +CREATE INDEX aqo_test2_idx_a ON aqo_test2 (a); +ANALYZE aqo_test2; + +CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; + +SET aqo.mode='intelligent'; + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b, aqo_test2 c WHERE a.a = b.a AND b.a = c.a; + +SET aqo.mode='learn'; + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 10; + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 10 and b.a > 200; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 11 and b.a > 200; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 12 and b.a > 200; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 14 and b.a > 200; + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +b.a > 300 and b.a < 500; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +b.a > 300 and b.a < 500; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +b.a > 300 and b.a < 500; + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); +SET aqo.mode='controlled'; + +CREATE TABLE aqo_query_texts_dump AS SELECT * FROM aqo_query_texts; +CREATE TABLE aqo_queries_dump AS SELECT * FROM aqo_queries; +CREATE TABLE aqo_query_stat_dump AS SELECT * FROM aqo_query_stat; +CREATE TABLE aqo_data_dump AS SELECT * FROM aqo_data; + +SELECT 1 FROM aqo_reset(); + +-- +-- aqo_query_texts_update() testing. +-- + +-- Populate aqo_query_texts with dump data. +SELECT aqo_query_texts_update(queryid, query_text) AS res +FROM aqo_query_texts_dump +ORDER BY res; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_texts EXCEPT TABLE aqo_query_texts_dump) +UNION ALL +(TABLE aqo_query_texts_dump EXCEPT TABLE aqo_query_texts); + +-- Update aqo_query_texts with dump data. +SELECT aqo_query_texts_update(queryid, query_text) AS res +FROM aqo_query_texts_dump +ORDER BY res; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_texts EXCEPT TABLE aqo_query_texts_dump) +UNION ALL +(TABLE aqo_query_texts_dump EXCEPT TABLE aqo_query_texts); + +-- +-- aqo_queries_update testing. +-- + +-- Populate aqo_queries with dump data. +SELECT aqo_queries_update(queryid, fs, learn_aqo, use_aqo, auto_tuning) AS res +FROM aqo_queries_dump +ORDER BY res; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_queries_dump EXCEPT TABLE aqo_queries) +UNION ALL +(TABLE aqo_queries EXCEPT TABLE aqo_queries_dump); + +-- Update aqo_queries with dump data. +SELECT aqo_queries_update(queryid, fs, learn_aqo, use_aqo, auto_tuning) AS res +FROM aqo_queries_dump +ORDER BY res; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_queries_dump EXCEPT TABLE aqo_queries) +UNION ALL +(TABLE aqo_queries EXCEPT TABLE aqo_queries_dump); + +-- +-- aqo_query_stat_update() testing. +-- + +-- Populate aqo_query_stat with dump data. +SELECT aqo_query_stat_update(queryid, execution_time_with_aqo, +execution_time_without_aqo, planning_time_with_aqo, planning_time_without_aqo, +cardinality_error_with_aqo, cardinality_error_without_aqo, executions_with_aqo, +executions_without_aqo) AS res +FROM aqo_query_stat_dump +ORDER BY res; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_stat_dump EXCEPT TABLE aqo_query_stat) +UNION ALL +(TABLE aqo_query_stat EXCEPT TABLE aqo_query_stat_dump); + +-- Update aqo_query_stat with dump data. +SELECT aqo_query_stat_update(queryid, execution_time_with_aqo, +execution_time_without_aqo, planning_time_with_aqo, planning_time_without_aqo, +cardinality_error_with_aqo, cardinality_error_without_aqo, executions_with_aqo, +executions_without_aqo) AS res +FROM aqo_query_stat_dump +ORDER BY res; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_stat_dump EXCEPT TABLE aqo_query_stat) +UNION ALL +(TABLE aqo_query_stat EXCEPT TABLE aqo_query_stat_dump); + +-- +-- aqo_data_update() testing. +-- + +-- Populate aqo_data with dump data. +SELECT aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS res +FROM aqo_data_dump +ORDER BY res; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_data_dump EXCEPT TABLE aqo_data) +UNION ALL +(TABLE aqo_data EXCEPT TABLE aqo_data_dump); + +-- Update aqo_data with dump data. +SELECT aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS res +FROM aqo_data_dump +ORDER BY res; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_data_dump EXCEPT TABLE aqo_data) +UNION ALL +(TABLE aqo_data EXCEPT TABLE aqo_data_dump); + + +-- Reject aqo_query_stat_update if there is NULL elements in array arg. +SELECT aqo_query_stat_update(1, '{NULL, 1}', '{1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', 1, 1); + +-- Reject aqo_query_stat_update if arrays don't have the same size. +SELECT aqo_query_stat_update(1, '{1, 1}', '{1, 1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', 1, 1); + +-- Reject aqo_query_stat_update if there are negative executions. +SELECT aqo_query_stat_update(1, '{1, 1}', '{1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', -1, 1); +SELECT aqo_query_stat_update(1, '{1, 1}', '{1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', 1, -1); + +-- Reject aqo_query_data_update if number of matrix columns and nfeatures +-- are different. +SELECT aqo_data_update(1, 1, 0, '{{1}}', '{1, 1}', '{1, 1}', '{1, 2, 3}'); + +-- Reject aqo_query_data_update if there is NULL elements in array arg. +SELECT aqo_data_update(1, 1, 1, '{{NULL}}', '{1}', '{1}', '{1, 2, 3}'); +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{NULL}', '{1}', '{1, 2, 3}'); +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1}', '{NULL}', '{1, 2, 3}'); + +-- Reject aqo_query_data_update if Oids is NULL. +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1}', '{1}', NULL); + +-- Reject aqo_query_data_update if arrays don't have the same number of rows. +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1, 1}', '{1}', '{1, 2, 3}'); +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1}', '{1, 1}', '{1, 2, 3}'); +SELECT aqo_data_update(1, 1, 1, '{{1}, {2}}', '{1}', '{1}', '{1, 2, 3}'); + +SET aqo.mode='disabled'; +SELECT 1 FROM aqo_reset(); +DROP EXTENSION aqo; + +DROP TABLE aqo_test1, aqo_test2; +DROP TABLE aqo_query_texts_dump, aqo_queries_dump, aqo_query_stat_dump, aqo_data_dump; diff --git a/storage.c b/storage.c index 32446d6c..02c8e0ca 100644 --- a/storage.c +++ b/storage.c @@ -78,6 +78,12 @@ HTAB *deactivated_queries = NULL; static const uint32 PGAQO_FILE_HEADER = 123467589; static const uint32 PGAQO_PG_MAJOR_VERSION = PG_VERSION_NUM / 100; +/* + * Used for internal aqo_queries_store() calls. + * No NULL arguments expected in this case. + */ +AqoQueriesNullArgs aqo_queries_nulls = { false, false, false, false }; + static ArrayType *form_matrix(double *matrix, int nrows, int ncols); static void dsa_init(void); @@ -105,6 +111,9 @@ PG_FUNCTION_INFO_V1(aqo_cleanup); PG_FUNCTION_INFO_V1(aqo_drop_class); PG_FUNCTION_INFO_V1(aqo_cardinality_error); PG_FUNCTION_INFO_V1(aqo_execution_time); +PG_FUNCTION_INFO_V1(aqo_query_texts_update); +PG_FUNCTION_INFO_V1(aqo_query_stat_update); +PG_FUNCTION_INFO_V1(aqo_data_update); bool @@ -116,7 +125,15 @@ load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids) bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids) { - return aqo_data_store(fs, fss, data, reloids); + /* + * 'reloids' explictly passed to aqo_data_store(). + * So AqoDataArgs fields 'nrels' & 'oids' are + * set to 0 and NULL repectively. + */ + AqoDataArgs data_arg = + {data->rows, data->cols, 0, data->matrix, + data->targets, data->rfactors, NULL}; + return aqo_data_store(fs, fss, &data_arg, reloids); } /* @@ -210,8 +227,8 @@ add_deactivated_query(uint64 queryid) * If stat hash table is full, return NULL and log this fact. */ StatEntry * -aqo_stat_store(uint64 queryid, bool use_aqo, - double plan_time, double exec_time, double est_error) +aqo_stat_store(uint64 queryid, bool use_aqo, AqoStatArgs *stat_arg, + bool append_mode) { StatEntry *entry; bool found; @@ -250,6 +267,34 @@ aqo_stat_store(uint64 queryid, bool use_aqo, entry->queryid = qid; } + if (!append_mode) + { + size_t sz; + if (found) + { + memset(entry, 0, sizeof(StatEntry)); + entry->queryid = queryid; + } + + sz = stat_arg->cur_stat_slot_aqo * sizeof(entry->est_error_aqo[0]); + memcpy(entry->plan_time_aqo, stat_arg->plan_time_aqo, sz); + memcpy(entry->exec_time_aqo, stat_arg->exec_time_aqo, sz); + memcpy(entry->est_error_aqo, stat_arg->est_error_aqo, sz); + entry->execs_with_aqo = stat_arg->execs_with_aqo; + entry->cur_stat_slot_aqo = stat_arg->cur_stat_slot_aqo; + + sz = stat_arg->cur_stat_slot * sizeof(entry->est_error[0]); + memcpy(entry->plan_time, stat_arg->plan_time, sz); + memcpy(entry->exec_time, stat_arg->exec_time, sz); + memcpy(entry->est_error, stat_arg->est_error, sz); + entry->execs_without_aqo = stat_arg->execs_without_aqo; + entry->cur_stat_slot = stat_arg->cur_stat_slot; + + aqo_state->stat_changed = true; + LWLockRelease(&aqo_state->stat_lock); + return entry; + } + /* Update the entry data */ if (use_aqo) @@ -269,9 +314,9 @@ aqo_stat_store(uint64 queryid, bool use_aqo, } entry->execs_with_aqo++; - entry->plan_time_aqo[pos] = plan_time; - entry->exec_time_aqo[pos] = exec_time; - entry->est_error_aqo[pos] = est_error; + entry->plan_time_aqo[pos] = *stat_arg->plan_time_aqo; + entry->exec_time_aqo[pos] = *stat_arg->exec_time_aqo; + entry->est_error_aqo[pos] = *stat_arg->est_error_aqo; } else { @@ -290,9 +335,9 @@ aqo_stat_store(uint64 queryid, bool use_aqo, } entry->execs_without_aqo++; - entry->plan_time[pos] = plan_time; - entry->exec_time[pos] = exec_time; - entry->est_error[pos] = est_error; + entry->plan_time[pos] = *stat_arg->plan_time; + entry->exec_time[pos] = *stat_arg->exec_time; + entry->est_error[pos] = *stat_arg->est_error; } entry = memcpy(palloc(sizeof(StatEntry)), entry, sizeof(StatEntry)); @@ -865,7 +910,7 @@ aqo_queries_load(void) LWLockRelease(&aqo_state->queries_lock); if (!found) { - if (!aqo_queries_store(0, 0, 0, 0, 0)) + if (!aqo_queries_store(0, 0, 0, 0, 0, &aqo_queries_nulls)) elog(PANIC, "[AQO] aqo_queries initialization was unsuccessful"); } } @@ -1279,7 +1324,7 @@ _compute_data_dsa(const DataEntry *entry) * Return true if data was changed. */ bool -aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) +aqo_data_store(uint64 fs, int fss, AqoDataArgs *data, List *reloids) { DataEntry *entry; bool found; @@ -1291,6 +1336,13 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) bool tblOverflow; HASHACTION action; bool result; + /* + * We should distinguish incoming data between internally + * passed structured data(reloids) and externaly + * passed data(plain arrays) from aqo_data_update() function. + */ + bool is_raw_data = (reloids == NULL); + int nrels = is_raw_data ? data->nrels : list_length(reloids); Assert(!LWLockHeldByMe(&aqo_state->data_lock)); @@ -1323,7 +1375,7 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) entry->cols = data->cols; entry->rows = data->rows; - entry->nrels = list_length(reloids); + entry->nrels = nrels; size = _compute_data_dsa(entry); entry->data_dp = dsa_allocate0(data_dsa, size); @@ -1342,7 +1394,7 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) Assert(DsaPointerIsValid(entry->data_dp)); - if (entry->cols != data->cols || entry->nrels != list_length(reloids)) + if (entry->cols != data->cols || entry->nrels != nrels) { /* Collision happened? */ elog(LOG, "[AQO] Does a collision happened? Check it if possible (fs: " @@ -1396,14 +1448,21 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) memcpy(ptr, data->rfactors, sizeof(double) * entry->rows); ptr += sizeof(double) * entry->rows; /* store list of relations. XXX: optimize ? */ - foreach(lc, reloids) + if (is_raw_data) { - Oid reloid = lfirst_oid(lc); - - memcpy(ptr, &reloid, sizeof(Oid)); - ptr += sizeof(Oid); + memcpy(ptr, data->oids, nrels * sizeof(Oid)); + ptr += nrels * sizeof(Oid); } + else + { + foreach(lc, reloids) + { + Oid reloid = lfirst_oid(lc); + memcpy(ptr, &reloid, sizeof(Oid)); + ptr += sizeof(Oid); + } + } aqo_state->data_changed = true; end: result = aqo_state->data_changed; @@ -1860,13 +1919,19 @@ aqo_queries(PG_FUNCTION_ARGS) bool aqo_queries_store(uint64 queryid, - uint64 fs, bool learn_aqo, bool use_aqo, bool auto_tuning) + uint64 fs, bool learn_aqo, bool use_aqo, bool auto_tuning, + AqoQueriesNullArgs *null_args) { QueriesEntry *entry; bool found; bool tblOverflow; HASHACTION action; + /* Insert is allowed if no args are NULL. */ + bool safe_insert = + (!null_args->fs_is_null && !null_args->learn_aqo_is_null && + !null_args->use_aqo_is_null && !null_args->auto_tuning_is_null); + Assert(queries_htab); /* Guard for default feature space */ @@ -1877,7 +1942,7 @@ aqo_queries_store(uint64 queryid, /* Check hash table overflow */ tblOverflow = hash_get_num_entries(queries_htab) < fs_max_items ? false : true; - action = tblOverflow ? HASH_FIND : HASH_ENTER; + action = (tblOverflow || !safe_insert) ? HASH_FIND : HASH_ENTER; entry = (QueriesEntry *) hash_search(queries_htab, &queryid, action, &found); @@ -1897,11 +1962,20 @@ aqo_queries_store(uint64 queryid, return false; } - entry->fs = fs; - entry->learn_aqo = learn_aqo; - entry->use_aqo = use_aqo; - entry->auto_tuning = auto_tuning; + if (!null_args->fs_is_null) + entry->fs = fs; + if (!null_args->learn_aqo_is_null) + entry->learn_aqo = learn_aqo; + if (!null_args->use_aqo_is_null) + entry->use_aqo = use_aqo; + if (!null_args->auto_tuning_is_null) + entry->auto_tuning = auto_tuning; + if (entry->learn_aqo || entry->use_aqo || entry->auto_tuning) + /* Remove the class from cache of deactivated queries */ + hash_search(deactivated_queries, &queryid, HASH_REMOVE, NULL); + + aqo_state->queries_changed = true; aqo_state->queries_changed = true; LWLockRelease(&aqo_state->queries_lock); return true; @@ -2030,32 +2104,37 @@ aqo_queries_find(uint64 queryid, QueryContextData *ctx) Datum aqo_queries_update(PG_FUNCTION_ARGS) { - QueriesEntry *entry; - uint64 queryid = PG_GETARG_INT64(AQ_QUERYID); - bool found; + uint64 queryid; + uint64 fs = 0; + bool learn_aqo = false; + bool use_aqo = false; + bool auto_tuning = false; - if (queryid == 0) - /* Do nothing for default feature space */ - PG_RETURN_BOOL(false); + AqoQueriesNullArgs null_args = + { PG_ARGISNULL(AQ_FS), PG_ARGISNULL(AQ_LEARN_AQO), + PG_ARGISNULL(AQ_USE_AQO), PG_ARGISNULL(AQ_AUTO_TUNING) }; - LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); - entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_FIND, - &found); - if (!PG_ARGISNULL(AQ_FS)) - entry->fs = PG_GETARG_INT64(AQ_FS); - if (!PG_ARGISNULL(AQ_LEARN_AQO)) - entry->learn_aqo = PG_GETARG_BOOL(AQ_LEARN_AQO); - if (!PG_ARGISNULL(AQ_USE_AQO)) - entry->use_aqo = PG_GETARG_BOOL(AQ_USE_AQO); - if (!PG_ARGISNULL(AQ_AUTO_TUNING)) - entry->auto_tuning = PG_GETARG_BOOL(AQ_AUTO_TUNING); + if (PG_ARGISNULL(AQ_QUERYID)) + PG_RETURN_BOOL(false); - /* Remove the class from cache of deactivated queries */ - hash_search(deactivated_queries, &queryid, HASH_REMOVE, NULL); + queryid = PG_GETARG_INT64(AQ_QUERYID); + if (queryid == 0) + /* Do nothing for default feature space */ + PG_RETURN_BOOL(false); - LWLockRelease(&aqo_state->queries_lock); - PG_RETURN_BOOL(true); + if (!null_args.fs_is_null) + fs = PG_GETARG_INT64(AQ_FS); + if (!null_args.learn_aqo_is_null) + learn_aqo = PG_GETARG_BOOL(AQ_LEARN_AQO); + if (!null_args.use_aqo_is_null) + use_aqo = PG_GETARG_BOOL(AQ_USE_AQO); + if (!null_args.auto_tuning_is_null) + auto_tuning = PG_GETARG_BOOL(AQ_AUTO_TUNING); + + PG_RETURN_BOOL(aqo_queries_store(queryid, + fs, learn_aqo, use_aqo, auto_tuning, + &null_args)); } Datum @@ -2483,3 +2562,192 @@ aqo_execution_time(PG_FUNCTION_ARGS) tuplestore_donestoring(tupstore); return (Datum) 0; } + +/* + * Update AQO query text for a given queryid value. + * Return true if operation have done some changes, + * false otherwize. + */ +Datum +aqo_query_texts_update(PG_FUNCTION_ARGS) +{ + uint64 queryid; + int str_len; + text *str; + char *str_buff; + bool res = false; + + /* Do nothing if any arguments are NULLs */ + if ((PG_ARGISNULL(QT_QUERYID) || PG_ARGISNULL(QT_QUERY_STRING))) + PG_RETURN_BOOL(false); + + if (!(queryid = PG_GETARG_INT64(QT_QUERYID))) + /* Do nothing for default feature space */ + PG_RETURN_BOOL(false); + + str = PG_GETARG_TEXT_PP(QT_QUERY_STRING); + str_len = VARSIZE_ANY_EXHDR(str) + 1; + if (str_len > querytext_max_size) + str_len = querytext_max_size; + + str_buff = (char*) palloc(str_len); + text_to_cstring_buffer(str, str_buff, str_len); + res = aqo_qtext_store(queryid, str_buff); + pfree(str_buff); + + PG_RETURN_BOOL(res); +} + +/* + * Check if incoming array is one dimensional array + * and array elements are not null. Init array field + * and return number of elements if check passed, + * otherwize return -1. + */ +static int init_dbl_array(double **dest, ArrayType *arr) +{ + if (ARR_NDIM(arr) > 1 || ARR_HASNULL(arr)) + return -1; + *dest = (double *) ARR_DATA_PTR(arr); + return ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr)); +} + +/* + * Update AQO query stat table for a given queryid value. + * Return true if operation have done some changes, + * false otherwize. + */ +Datum +aqo_query_stat_update(PG_FUNCTION_ARGS) +{ + uint64 queryid; + AqoStatArgs stat_arg; + + /* + * Arguments cannot be NULL. + */ + if (PG_ARGISNULL(QUERYID) || PG_ARGISNULL(NEXECS_AQO) || + PG_ARGISNULL(NEXECS) || PG_ARGISNULL(EXEC_TIME_AQO) || + PG_ARGISNULL(PLAN_TIME_AQO) || PG_ARGISNULL(EST_ERROR_AQO) || + PG_ARGISNULL(EXEC_TIME) || PG_ARGISNULL(PLAN_TIME) || + PG_ARGISNULL(EST_ERROR)) + PG_RETURN_BOOL(false); + + queryid = PG_GETARG_INT64(AQ_QUERYID); + stat_arg.execs_with_aqo = PG_GETARG_INT64(NEXECS_AQO); + stat_arg.execs_without_aqo = PG_GETARG_INT64(NEXECS); + if (queryid == 0 || stat_arg.execs_with_aqo < 0 || + stat_arg.execs_without_aqo < 0) + PG_RETURN_BOOL(false); + + /* + * Init 'with aqo' array fields for further update procedure and + * check that arrays have the same size. + */ + stat_arg.cur_stat_slot_aqo = + init_dbl_array(&stat_arg.exec_time_aqo, + PG_GETARG_ARRAYTYPE_P(EXEC_TIME_AQO)); + if (stat_arg.cur_stat_slot_aqo == -1 || + stat_arg.cur_stat_slot_aqo > STAT_SAMPLE_SIZE || + stat_arg.cur_stat_slot_aqo != + init_dbl_array(&stat_arg.plan_time_aqo, + PG_GETARG_ARRAYTYPE_P(PLAN_TIME_AQO)) || + stat_arg.cur_stat_slot_aqo != + init_dbl_array(&stat_arg.est_error_aqo, + PG_GETARG_ARRAYTYPE_P(EST_ERROR_AQO))) + PG_RETURN_BOOL(false); + + /* + * Init 'without aqo' array fields for further update procedure and + * check that arrays have the same size. + */ + stat_arg.cur_stat_slot = init_dbl_array(&stat_arg.exec_time, + PG_GETARG_ARRAYTYPE_P(EXEC_TIME)); + if (stat_arg.cur_stat_slot == -1 || + stat_arg.cur_stat_slot > STAT_SAMPLE_SIZE || + stat_arg.cur_stat_slot != + init_dbl_array(&stat_arg.plan_time, + PG_GETARG_ARRAYTYPE_P(PLAN_TIME)) || + stat_arg.cur_stat_slot != + init_dbl_array(&stat_arg.est_error, + PG_GETARG_ARRAYTYPE_P(EST_ERROR))) + PG_RETURN_BOOL(false); + + PG_RETURN_BOOL(aqo_stat_store(queryid, false, + &stat_arg, false) != NULL); +} + +/* + * Update AQO data for a given {fs, fss} values. + * Return true if operation have done some changes, + * false otherwize. + */ +Datum +aqo_data_update(PG_FUNCTION_ARGS) +{ + uint64 fs; + int fss; + double *features_arr[aqo_K]; + AqoDataArgs data_arg; + + ArrayType *arr; + + if (PG_ARGISNULL(AD_FS) || PG_ARGISNULL(AD_FSS) || + PG_ARGISNULL(AD_NFEATURES) || PG_ARGISNULL(AD_TARGETS) || + PG_ARGISNULL(AD_RELIABILITY) || PG_ARGISNULL(AD_OIDS)) + PG_RETURN_BOOL(false); + + fs = PG_GETARG_INT64(AD_FS); + fss = PG_GETARG_INT32(AD_FSS); + data_arg.cols = PG_GETARG_INT32(AD_NFEATURES); + + /* Init traget & reliability arrays. */ + data_arg.rows = + init_dbl_array(&data_arg.targets, + PG_GETARG_ARRAYTYPE_P(AD_TARGETS)); + if (data_arg.rows == -1 || data_arg.rows > aqo_K || + data_arg.rows != init_dbl_array(&data_arg.rfactors, + PG_GETARG_ARRAYTYPE_P(AD_RELIABILITY))) + PG_RETURN_BOOL(false); + + /* Init matrix array. */ + if (data_arg.cols == 0 && !PG_ARGISNULL(AD_FEATURES)) + PG_RETURN_BOOL(false); + if (PG_ARGISNULL(AD_FEATURES)) + { + if (data_arg.cols != 0) + PG_RETURN_BOOL(false); + data_arg.matrix = NULL; + } + else + { + int i; + + arr = PG_GETARG_ARRAYTYPE_P(AD_FEATURES); + /* + * Features is two dimensional array. + * Number of rows should be the same as for + * traget & reliability arrays. + */ + if (ARR_HASNULL(arr) || ARR_NDIM(arr) != 2 || + data_arg.rows != ARR_DIMS(arr)[0] || + data_arg.cols != ARR_DIMS(arr)[1]) + PG_RETURN_BOOL(false); + + for (i = 0; i < ARR_DIMS(arr)[0]; i++) + { + features_arr[i] = (double *) ARR_DATA_PTR(arr) + + i * ARR_DIMS(arr)[1]; + } + data_arg.matrix = features_arr; + } + + /* Init oids array. */ + arr = PG_GETARG_ARRAYTYPE_P(AD_OIDS); + if (ARR_HASNULL(arr)) + PG_RETURN_BOOL(false); + data_arg.oids = (Oid *) ARR_DATA_PTR(arr); + data_arg.nrels = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr)); + + PG_RETURN_BOOL(aqo_data_store(fs, fss, &data_arg, NULL)); +} diff --git a/storage.h b/storage.h index 0e7745e1..dcc1eec8 100644 --- a/storage.h +++ b/storage.h @@ -36,6 +36,26 @@ typedef struct StatEntry double est_error_aqo[STAT_SAMPLE_SIZE]; } StatEntry; +/* + * Auxiliary struct, used for passing arguments + * to aqo_stat_store() function. + */ +typedef struct AqoStatArgs +{ + int64 execs_with_aqo; + int64 execs_without_aqo; + + int cur_stat_slot; + double *exec_time; + double *plan_time; + double *est_error; + + int cur_stat_slot_aqo; + double *exec_time_aqo; + double *plan_time_aqo; + double *est_error_aqo; +} AqoStatArgs; + /* * Storage entry for query texts. * Query strings may have very different sizes. So, in hash table we store only @@ -82,6 +102,24 @@ typedef struct QueriesEntry bool auto_tuning; } QueriesEntry; +/* + * Auxiliary struct, used for passing arg NULL signs + * to aqo_queries_store() function. + */ +typedef struct AqoQueriesNullArgs +{ + bool fs_is_null; + bool learn_aqo_is_null; + bool use_aqo_is_null; + bool auto_tuning_is_null; +} AqoQueriesNullArgs; + +/* + * Used for internal aqo_queries_store() calls. + * No NULL arguments expected in this case. + */ +extern AqoQueriesNullArgs aqo_queries_nulls; + extern int querytext_max_size; extern int dsm_size_max; @@ -90,8 +128,8 @@ extern HTAB *qtexts_htab; extern HTAB *queries_htab; /* TODO */ extern HTAB *data_htab; /* TODO */ -extern StatEntry *aqo_stat_store(uint64 queryid, bool use_aqo, double plan_time, - double exec_time, double est_error); +extern StatEntry *aqo_stat_store(uint64 queryid, bool use_aqo, + AqoStatArgs *stat_arg, bool append_mode); extern void aqo_stat_flush(void); extern void aqo_stat_load(void); @@ -99,7 +137,8 @@ extern bool aqo_qtext_store(uint64 queryid, const char *query_string); extern void aqo_qtexts_flush(void); extern void aqo_qtexts_load(void); -extern bool aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids); +extern bool aqo_data_store(uint64 fs, int fss, AqoDataArgs *data, + List *reloids); extern bool load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool wideSearch, double *features); extern void aqo_data_flush(void); @@ -107,7 +146,8 @@ extern void aqo_data_load(void); extern bool aqo_queries_find(uint64 queryid, QueryContextData *ctx); extern bool aqo_queries_store(uint64 queryid, uint64 fs, bool learn_aqo, - bool use_aqo, bool auto_tuning); + bool use_aqo, bool auto_tuning, + AqoQueriesNullArgs *null_args); extern void aqo_queries_flush(void); extern void aqo_queries_load(void); From f1826ab97f2231537f2f6aede8e3b13769703a86 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Mon, 30 Jan 2023 09:25:10 +0500 Subject: [PATCH 162/203] Add assertion on incorrect number of rows in storing AQO data record. --- storage.c | 1 + 1 file changed, 1 insertion(+) diff --git a/storage.c b/storage.c index 02c8e0ca..8bb6f28e 100644 --- a/storage.c +++ b/storage.c @@ -1345,6 +1345,7 @@ aqo_data_store(uint64 fs, int fss, AqoDataArgs *data, List *reloids) int nrels = is_raw_data ? data->nrels : list_length(reloids); Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + Assert(data->rows > 0); dsa_init(); From c8b629cf7cacaa7ea5a44f4462678cf77e3b5d95 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Mon, 11 Jul 2022 11:54:01 +0300 Subject: [PATCH 163/203] Add smart statement timeout for learning aqo in special quesries within through manual retraining. AQO evaluates whether enough to execute the query through comparison integral error value with its fixed value (0.1), also if integral error didn't change compared to previous iterations, smart statemet timeout value will be increased. Besides, smart statemet timeout value won't be increased, if there is reached limit value, namely statement timeout. The initial smart_statement_timeout value is aqo statement timeout value or 0. Smart statement timeout value and number of its using are saved in aqo_queries. --- aqo--1.5--1.6.sql | 21 +++++++ aqo.c | 13 ++++ aqo.h | 11 ++++ auto_tuning.c | 4 +- expected/smart_statement_timeout.out | 94 ++++++++++++++++++++++++++++ expected/statement_timeout.out | 2 +- expected/update_functions.out | 8 +-- postprocessing.c | 51 +++++++++++++-- preprocessing.c | 2 + regress_schedule | 2 + sql/smart_statement_timeout.sql | 45 +++++++++++++ storage.c | 54 +++++++++++++++- storage.h | 5 ++ 13 files changed, 299 insertions(+), 13 deletions(-) create mode 100644 expected/smart_statement_timeout.out create mode 100644 sql/smart_statement_timeout.sql diff --git a/aqo--1.5--1.6.sql b/aqo--1.5--1.6.sql index 077f11b1..fa1b8bb7 100644 --- a/aqo--1.5--1.6.sql +++ b/aqo--1.5--1.6.sql @@ -3,9 +3,12 @@ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "ALTER EXTENSION aqo UPDATE TO '1.6'" to load this file. \quit +DROP VIEW aqo_queries; + DROP FUNCTION aqo_enable_query; DROP FUNCTION aqo_disable_query; DROP FUNCTION aqo_cleanup; +DROP FUNCTION aqo_queries; CREATE FUNCTION aqo_enable_class(queryid bigint) RETURNS void @@ -77,3 +80,21 @@ CREATE FUNCTION aqo_data_update( RETURNS bool AS 'MODULE_PATHNAME', 'aqo_data_update' LANGUAGE C VOLATILE; + +/* + * VIEWs to discover AQO data. + */ +CREATE FUNCTION aqo_queries ( + OUT queryid bigint, + OUT fs bigint, + OUT learn_aqo boolean, + OUT use_aqo boolean, + OUT auto_tuning boolean, + OUT smart_timeout bigint, + OUT count_increase_timeout bigint +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_queries' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE VIEW aqo_queries AS SELECT * FROM aqo_queries(); diff --git a/aqo.c b/aqo.c index f5424e6a..743f9ef6 100644 --- a/aqo.c +++ b/aqo.c @@ -35,6 +35,7 @@ void _PG_init(void); int aqo_mode = AQO_MODE_CONTROLLED; bool force_collect_stat; bool aqo_predict_with_few_neighbors; +int aqo_statement_timeout; /* * Show special info in EXPLAIN mode. @@ -48,6 +49,7 @@ bool aqo_predict_with_few_neighbors; */ bool aqo_show_hash; bool aqo_show_details; +bool change_flex_timeout; /* GUC variables */ static const struct config_enum_entry format_options[] = { @@ -287,6 +289,17 @@ _PG_init(void) NULL, NULL ); + DefineCustomIntVariable("aqo.statement_timeout", + "Time limit on learning.", + NULL, + &aqo_statement_timeout, + 0, + 0, INT_MAX, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); DefineCustomIntVariable("aqo.min_neighbors_for_predicting", "Set how many neighbors the cardinality prediction will be calculated", diff --git a/aqo.h b/aqo.h index 9418646c..9600b136 100644 --- a/aqo.h +++ b/aqo.h @@ -199,8 +199,15 @@ typedef struct QueryContextData instr_time start_execution_time; double planning_time; + int64 smart_timeout; + int64 count_increase_timeout; } QueryContextData; +/* + * Indicator for using smart statement timeout for query + */ +extern bool change_flex_timeout; + struct StatEntry; extern double predicted_ppi_rows; @@ -250,6 +257,7 @@ extern ExplainOnePlan_hook_type prev_ExplainOnePlan_hook; extern ExplainOneNode_hook_type prev_ExplainOneNode_hook; extern void ppi_hook(ParamPathInfo *ppi); +extern int aqo_statement_timeout; /* Hash functions */ void get_eclasses(List *clauselist, int *nargs, int **args_hash, @@ -298,5 +306,8 @@ extern void selectivity_cache_clear(void); extern bool IsQueryDisabled(void); +extern bool update_query_timeout(uint64 queryid, int64 smart_timeout); +extern double get_mean(double *elems, int nelems); + extern List *cur_classes; #endif diff --git a/auto_tuning.c b/auto_tuning.c index cf96a2cf..b035a093 100644 --- a/auto_tuning.c +++ b/auto_tuning.c @@ -26,17 +26,15 @@ */ double auto_tuning_convergence_error = 0.01; -static double get_mean(double *elems, int nelems); static double get_estimation(double *elems, int nelems); static bool is_stable(double *elems, int nelems); static bool converged_cq(double *elems, int nelems); static bool is_in_infinite_loop_cq(double *elems, int nelems); - /* * Returns mean value of the array of doubles. */ -static double +double get_mean(double *elems, int nelems) { double sum = 0; diff --git a/expected/smart_statement_timeout.out b/expected/smart_statement_timeout.out new file mode 100644 index 00000000..7aacd184 --- /dev/null +++ b/expected/smart_statement_timeout.out @@ -0,0 +1,94 @@ +DROP TABLE IF EXISTS a,b CASCADE; +NOTICE: table "a" does not exist, skipping +NOTICE: table "b" does not exist, skipping +CREATE TABLE a (x1 int, x2 int, x3 int); +INSERT INTO a (x1, x2, x3) SELECT mod(ival,4), mod(ival,10), mod(ival,10) FROM generate_series(1,100) As ival; +CREATE TABLE b (y1 int, y2 int, y3 int); +INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,4), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,100) As ival; +CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.join_threshold = 0; +SET aqo.mode = 'learn'; +SET aqo.show_details = 'off'; +SET aqo.learn_statement_timeout = 'on'; +SET statement_timeout = 1500; -- [1.5s] +SET aqo.statement_timeout = 500; -- [0.5s] +SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; +NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. Timeout is 0 +NOTICE: [AQO] Time limit for execution of the statement was increased. Current timeout is 1 + count | count +-------+------- + 62500 | 62500 +(1 row) + +select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts + where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' + and aqo_query_texts.queryid = aqo_queries.queryid limit 1; + smart_timeout | count_increase_timeout +---------------+------------------------ + 1 | 1 +(1 row) + +SET aqo.learn_statement_timeout = 'off'; +SET aqo.statement_timeout = 1000; -- [1s] +INSERT INTO a (x1, x2, x3) SELECT mod(ival,20), mod(ival,10), mod(ival,10) FROM generate_series(1,1000) As ival; +SET aqo.learn_statement_timeout = 'on'; +SET aqo.statement_timeout = 500; -- [0.5s] +SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; +NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. Timeout is 1 +NOTICE: [AQO] Time limit for execution of the statement was increased. Current timeout is 6 + count | count +--------+-------- + 563300 | 562500 +(1 row) + +select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts + where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' + and aqo_query_texts.queryid = aqo_queries.queryid limit 1; + smart_timeout | count_increase_timeout +---------------+------------------------ + 6 | 2 +(1 row) + +SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; +NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. Timeout is 6 +NOTICE: [AQO] Time limit for execution of the statement was increased. Current timeout is 63 + count | count +--------+-------- + 563300 | 562500 +(1 row) + +select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts + where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' + and aqo_query_texts.queryid = aqo_queries.queryid limit 1; + smart_timeout | count_increase_timeout +---------------+------------------------ + 63 | 3 +(1 row) + +SET statement_timeout = 100; -- [0.1s] +SET aqo.statement_timeout = 150; +SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; +NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. Timeout is 63 +NOTICE: [AQO] Time limit for execution of the statement was increased. Current timeout is 1728 + count | count +--------+-------- + 563300 | 562500 +(1 row) + +select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts + where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' + and aqo_query_texts.queryid = aqo_queries.queryid limit 1; + smart_timeout | count_increase_timeout +---------------+------------------------ + 1728 | 4 +(1 row) + +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 +(1 row) + +DROP TABLE a; +DROP TABLE b; +DROP EXTENSION aqo; diff --git a/expected/statement_timeout.out b/expected/statement_timeout.out index 14b2f0dc..a12fe9dd 100644 --- a/expected/statement_timeout.out +++ b/expected/statement_timeout.out @@ -91,7 +91,7 @@ ERROR: canceling statement due to statement timeout SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); check_estimated_rows ---------------------- - 2 + 4 (1 row) SET statement_timeout = 800; diff --git a/expected/update_functions.out b/expected/update_functions.out index 6a6198e5..03a97fe7 100644 --- a/expected/update_functions.out +++ b/expected/update_functions.out @@ -211,8 +211,8 @@ ORDER BY res; (TABLE aqo_queries_dump EXCEPT TABLE aqo_queries) UNION ALL (TABLE aqo_queries EXCEPT TABLE aqo_queries_dump); - queryid | fs | learn_aqo | use_aqo | auto_tuning ----------+----+-----------+---------+------------- + queryid | fs | learn_aqo | use_aqo | auto_tuning | smart_timeout | count_increase_timeout +---------+----+-----------+---------+-------------+---------------+------------------------ (0 rows) -- Update aqo_queries with dump data. @@ -234,8 +234,8 @@ ORDER BY res; (TABLE aqo_queries_dump EXCEPT TABLE aqo_queries) UNION ALL (TABLE aqo_queries EXCEPT TABLE aqo_queries_dump); - queryid | fs | learn_aqo | use_aqo | auto_tuning ----------+----+-----------+---------+------------- + queryid | fs | learn_aqo | use_aqo | auto_tuning | smart_timeout | count_increase_timeout +---------+----+-----------+---------+-------------+---------------+------------------------ (0 rows) -- diff --git a/postprocessing.c b/postprocessing.c index 70688b1a..8a55a6cd 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -44,6 +44,8 @@ typedef struct static double cardinality_sum_errors; static int cardinality_num_objects; +static int64 max_timeout_value; +static int64 growth_rate = 3; /* * Store an AQO-related query data into the Query Environment structure. @@ -625,15 +627,46 @@ aqo_timeout_handler(void) ctx.learn = query_context.learn_aqo; ctx.isTimedOut = true; - elog(NOTICE, "[AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data."); + if (aqo_statement_timeout == 0) + elog(NOTICE, "[AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data."); + else + elog(NOTICE, "[AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. Timeout is %ld", max_timeout_value); + learnOnPlanState(timeoutCtl.queryDesc->planstate, (void *) &ctx); MemoryContextSwitchTo(oldctx); } +/* + * Function for updating smart statement timeout + */ +static int64 +increase_smart_timeout() +{ + int64 smart_timeout_fin_time = (query_context.smart_timeout + 1) * pow(growth_rate, query_context.count_increase_timeout); + + if (query_context.smart_timeout == max_timeout_value && !update_query_timeout(query_context.query_hash, smart_timeout_fin_time)) + elog(NOTICE, "[AQO] Timeout is not updated!"); + + return smart_timeout_fin_time; +} + static bool set_timeout_if_need(QueryDesc *queryDesc) { - TimestampTz fin_time; + int64 fintime = (int64) get_timeout_finish_time(STATEMENT_TIMEOUT)-1; + + if (aqo_learn_statement_timeout && aqo_statement_timeout > 0) + { + max_timeout_value = Min(query_context.smart_timeout, (int64) aqo_statement_timeout); + if (max_timeout_value > fintime) + { + max_timeout_value = fintime; + } + } + else + { + max_timeout_value = fintime; + } if (IsParallelWorker()) /* @@ -663,8 +696,7 @@ set_timeout_if_need(QueryDesc *queryDesc) else Assert(!get_timeout_active(timeoutCtl.id)); - fin_time = get_timeout_finish_time(STATEMENT_TIMEOUT); - enable_timeout_at(timeoutCtl.id, fin_time - 1); + enable_timeout_at(timeoutCtl.id, (TimestampTz) max_timeout_value); /* Save pointer to queryDesc to use at learning after a timeout interruption. */ timeoutCtl.queryDesc = queryDesc; @@ -720,6 +752,7 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) instr_time endtime; EphemeralNamedRelation enr = get_ENR(queryDesc->queryEnv, PlanStateInfo); MemoryContext oldctx = MemoryContextSwitchTo(AQOLearnMemCtx); + double error = .0; cardinality_sum_errors = 0.; cardinality_num_objects = 0; @@ -788,6 +821,16 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) /* Store all learn data into the AQO service relations. */ if (!query_context.adding_query && query_context.auto_tuning) automatical_query_tuning(query_context.query_hash, stat); + + error = stat->est_error_aqo[stat->cur_stat_slot_aqo-1] - cardinality_sum_errors/(1 + cardinality_num_objects); + + if ( aqo_learn_statement_timeout && aqo_statement_timeout > 0 && error >= 0.1) + { + int64 fintime = increase_smart_timeout(); + elog(NOTICE, "[AQO] Time limit for execution of the statement was increased. Current timeout is %ld", fintime); + } + + pfree(stat); } } diff --git a/preprocessing.c b/preprocessing.c index 53fe1323..60e599ee 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -243,6 +243,8 @@ aqo_planner(Query *parse, elog(ERROR, "unrecognized mode in AQO: %d", aqo_mode); break; } + query_context.count_increase_timeout = 0; + query_context.smart_timeout = 0; } else /* Query class exists in a ML knowledge base. */ { diff --git a/regress_schedule b/regress_schedule index 76a2e00e..6c558e9a 100644 --- a/regress_schedule +++ b/regress_schedule @@ -15,9 +15,11 @@ test: plancache test: update_functions # Performance-dependent test. Can be ignored if executes in containers or on slow machines ignore: statement_timeout +ignore: smart_statement_timeout test: statement_timeout test: temp_tables test: top_queries test: relocatable test: look_a_like test: feature_subspace +test: smart_statement_timeout diff --git a/sql/smart_statement_timeout.sql b/sql/smart_statement_timeout.sql new file mode 100644 index 00000000..a0573dee --- /dev/null +++ b/sql/smart_statement_timeout.sql @@ -0,0 +1,45 @@ +DROP TABLE IF EXISTS a,b CASCADE; +CREATE TABLE a (x1 int, x2 int, x3 int); +INSERT INTO a (x1, x2, x3) SELECT mod(ival,4), mod(ival,10), mod(ival,10) FROM generate_series(1,100) As ival; + +CREATE TABLE b (y1 int, y2 int, y3 int); +INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,4), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,100) As ival; + +CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.join_threshold = 0; +SET aqo.mode = 'learn'; +SET aqo.show_details = 'off'; +SET aqo.learn_statement_timeout = 'on'; +SET statement_timeout = 1500; -- [1.5s] +SET aqo.statement_timeout = 500; -- [0.5s] + +SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; +select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts + where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' + and aqo_query_texts.queryid = aqo_queries.queryid limit 1; + +SET aqo.learn_statement_timeout = 'off'; +SET aqo.statement_timeout = 1000; -- [1s] +INSERT INTO a (x1, x2, x3) SELECT mod(ival,20), mod(ival,10), mod(ival,10) FROM generate_series(1,1000) As ival; +SET aqo.learn_statement_timeout = 'on'; +SET aqo.statement_timeout = 500; -- [0.5s] +SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; +select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts + where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' + and aqo_query_texts.queryid = aqo_queries.queryid limit 1; +SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; +select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts + where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' + and aqo_query_texts.queryid = aqo_queries.queryid limit 1; + +SET statement_timeout = 100; -- [0.1s] +SET aqo.statement_timeout = 150; +SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; +select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts + where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' + and aqo_query_texts.queryid = aqo_queries.queryid limit 1; + +SELECT 1 FROM aqo_reset(); +DROP TABLE a; +DROP TABLE b; +DROP EXTENSION aqo; diff --git a/storage.c b/storage.c index 8bb6f28e..5cb1ef76 100644 --- a/storage.c +++ b/storage.c @@ -55,7 +55,7 @@ typedef enum { } aqo_data_cols; typedef enum { - AQ_QUERYID = 0, AQ_FS, AQ_LEARN_AQO, AQ_USE_AQO, AQ_AUTO_TUNING, + AQ_QUERYID = 0, AQ_FS, AQ_LEARN_AQO, AQ_USE_AQO, AQ_AUTO_TUNING, AQ_SMART_TIMEOUT, AQ_COUNT_INCREASE_TIMEOUT, AQ_TOTAL_NCOLS } aqo_queries_cols; @@ -1910,6 +1910,8 @@ aqo_queries(PG_FUNCTION_ARGS) values[AQ_LEARN_AQO] = BoolGetDatum(entry->learn_aqo); values[AQ_USE_AQO] = BoolGetDatum(entry->use_aqo); values[AQ_AUTO_TUNING] = BoolGetDatum(entry->auto_tuning); + values[AQ_SMART_TIMEOUT] = Int64GetDatum(entry->smart_timeout); + values[AQ_COUNT_INCREASE_TIMEOUT] = Int64GetDatum(entry->count_increase_timeout); tuplestore_putvalues(tupstore, tupDesc, values, nulls); } @@ -1971,6 +1973,10 @@ aqo_queries_store(uint64 queryid, entry->use_aqo = use_aqo; if (!null_args->auto_tuning_is_null) entry->auto_tuning = auto_tuning; + if (!null_args->smart_timeout) + entry->smart_timeout = 0; + if (!null_args->count_increase_timeout) + entry->count_increase_timeout = 0; if (entry->learn_aqo || entry->use_aqo || entry->auto_tuning) /* Remove the class from cache of deactivated queries */ @@ -2091,11 +2097,57 @@ aqo_queries_find(uint64 queryid, QueryContextData *ctx) ctx->learn_aqo = entry->learn_aqo; ctx->use_aqo = entry->use_aqo; ctx->auto_tuning = entry->auto_tuning; + ctx->smart_timeout = entry->smart_timeout; + ctx->count_increase_timeout = entry->count_increase_timeout; } LWLockRelease(&aqo_state->queries_lock); return found; } +/* + * Function for update and save value of smart statement timeout + * for query in aqu_queries table + */ +bool +update_query_timeout(uint64 queryid, int64 smart_timeout) +{ + QueriesEntry *entry; + bool found; + bool tblOverflow; + HASHACTION action; + + Assert(queries_htab); + + /* Guard for default feature space */ + Assert(queryid != 0); + + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + + /* Check hash table overflow */ + tblOverflow = hash_get_num_entries(queries_htab) < fs_max_items ? false : true; + action = tblOverflow ? HASH_FIND : HASH_ENTER; + + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, action, + &found); + + /* Initialize entry on first usage */ + if (!found && action == HASH_FIND) + { + /* + * Hash table is full. To avoid possible problems - don't try to add + * more, just exit + */ + LWLockRelease(&aqo_state->queries_lock); + return false; + } + + entry->smart_timeout = smart_timeout; + entry->count_increase_timeout = entry->count_increase_timeout + 1; + + LWLockRelease(&aqo_state->queries_lock); + return true; +} + /* * Update AQO preferences for a given queryid value. * if incoming param is null - leave it unchanged. diff --git a/storage.h b/storage.h index dcc1eec8..35d94336 100644 --- a/storage.h +++ b/storage.h @@ -100,6 +100,9 @@ typedef struct QueriesEntry bool learn_aqo; bool use_aqo; bool auto_tuning; + + int64 smart_timeout; + int64 count_increase_timeout; } QueriesEntry; /* @@ -112,6 +115,8 @@ typedef struct AqoQueriesNullArgs bool learn_aqo_is_null; bool use_aqo_is_null; bool auto_tuning_is_null; + int64 smart_timeout; + int64 count_increase_timeout; } AqoQueriesNullArgs; /* From 6a431d51979ff1eea9cfc2421ce907feee6cb32c Mon Sep 17 00:00:00 2001 From: Andrey Kazarinov Date: Fri, 11 Nov 2022 17:52:52 +0300 Subject: [PATCH 164/203] Add function which shows memory usage. function memctx_htab_sizes outputs allocated sizes and used sizes of aqo's memory contexts and hash tables a.lepikhov: I've changed implementation because the VIEW on memory contexts introduced in the next version of core PostgreSQL. --- aqo--1.5--1.6.sql | 14 ++++++++++++++ t/001_pgbench.pl | 9 +++++++++ 2 files changed, 23 insertions(+) diff --git a/aqo--1.5--1.6.sql b/aqo--1.5--1.6.sql index fa1b8bb7..448b6023 100644 --- a/aqo--1.5--1.6.sql +++ b/aqo--1.5--1.6.sql @@ -98,3 +98,17 @@ AS 'MODULE_PATHNAME', 'aqo_queries' LANGUAGE C STRICT VOLATILE PARALLEL SAFE; CREATE VIEW aqo_queries AS SELECT * FROM aqo_queries(); + +-- Show how much shared memory AQO are using at the moment +CREATE FUNCTION aqo_memory_usage( + OUT name text, + OUT allocated_size int, + OUT used_size int +) +RETURNS SETOF record +AS $$ + SELECT name, allocated_size, size FROM pg_shmem_allocations + WHERE name LIKE 'AQO%'; +$$ LANGUAGE SQL; +COMMENT ON FUNCTION aqo_memory_usage() IS +'Show how much shared memory AQO are using at the moment'; diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index 3aa3b7b5..2374d83d 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -159,6 +159,9 @@ WHERE v.exec_time > 0."); is($res, 3); +$res = $node->safe_psql('postgres', "SELECT * FROM aqo_memory_usage() AS t1"); +note("MEMORY:\n$res\n"); + # ############################################################################## # # pgbench on a database with AQO in 'learn' mode. @@ -183,6 +186,9 @@ "$TRANSACTIONS", '-c', "$CLIENTS", '-j', "$THREADS" ], 'pgbench in frozen mode'); +$res = $node->safe_psql('postgres', "SELECT * FROM aqo_memory_usage() AS t1"); +note("MEMORY:\n$res\n"); + # ############################################################################## # # Check procedure of ML-knowledge data cleaning. @@ -298,6 +304,9 @@ is($new_stat_count == $stat_count - $pgb_stat_count, 1, 'Total number of samples in aqo_query_stat'); +$res = $node->safe_psql('postgres', "SELECT * FROM aqo_memory_usage() AS t1"); +note("MEMORY:\n$res\n"); + # ############################################################################## # # AQO works after moving to another schema From aff4ce7b50a1bff2eeb9eef4c674c16a47c20d5e Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Tue, 31 Jan 2023 11:19:10 +0500 Subject: [PATCH 165/203] Collect some artifacts of CI tests - initial commit --- .github/workflows/c-cpp.yml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 8ee5bbf0..ed3d1fea 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -41,3 +41,19 @@ jobs: ./configure $CONFIGURE_OPTS CFLAGS="-DAQO_DEBUG_PRINT -O0" make -j4 > /dev/null && make -j4 -C contrib > /dev/null env CLIENTS=50 THREADS=50 make -C contrib/aqo check + - name: Archive regression.diffs + if: ${{ always() }} + uses: actions/upload-artifact@v3 + with: + name: regression_diffs + path: /home/runner/work/aqo/aqo/pg/contrib/aqo/regression.diffs + retention-days: 1 + - name: Archive TAP tests log files + if: ${{ always() }} + uses: actions/upload-artifact@v3 + with: + name: tap_logs + path: | + log + retention-days: 1 + From fcb577a861eaac7ec0dd8df98357e29f064abdbd Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Tue, 31 Jan 2023 11:21:23 +0500 Subject: [PATCH 166/203] Remove regression tests on smart statement timeout. Should rethink test principles of time-dependendent features to make it more stable. --- expected/smart_statement_timeout.out | 94 ---------------------------- postprocessing.c | 4 +- sql/smart_statement_timeout.sql | 45 ------------- 3 files changed, 2 insertions(+), 141 deletions(-) delete mode 100644 expected/smart_statement_timeout.out delete mode 100644 sql/smart_statement_timeout.sql diff --git a/expected/smart_statement_timeout.out b/expected/smart_statement_timeout.out deleted file mode 100644 index 7aacd184..00000000 --- a/expected/smart_statement_timeout.out +++ /dev/null @@ -1,94 +0,0 @@ -DROP TABLE IF EXISTS a,b CASCADE; -NOTICE: table "a" does not exist, skipping -NOTICE: table "b" does not exist, skipping -CREATE TABLE a (x1 int, x2 int, x3 int); -INSERT INTO a (x1, x2, x3) SELECT mod(ival,4), mod(ival,10), mod(ival,10) FROM generate_series(1,100) As ival; -CREATE TABLE b (y1 int, y2 int, y3 int); -INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,4), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,100) As ival; -CREATE EXTENSION IF NOT EXISTS aqo; -SET aqo.join_threshold = 0; -SET aqo.mode = 'learn'; -SET aqo.show_details = 'off'; -SET aqo.learn_statement_timeout = 'on'; -SET statement_timeout = 1500; -- [1.5s] -SET aqo.statement_timeout = 500; -- [0.5s] -SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; -NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. Timeout is 0 -NOTICE: [AQO] Time limit for execution of the statement was increased. Current timeout is 1 - count | count --------+------- - 62500 | 62500 -(1 row) - -select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts - where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' - and aqo_query_texts.queryid = aqo_queries.queryid limit 1; - smart_timeout | count_increase_timeout ----------------+------------------------ - 1 | 1 -(1 row) - -SET aqo.learn_statement_timeout = 'off'; -SET aqo.statement_timeout = 1000; -- [1s] -INSERT INTO a (x1, x2, x3) SELECT mod(ival,20), mod(ival,10), mod(ival,10) FROM generate_series(1,1000) As ival; -SET aqo.learn_statement_timeout = 'on'; -SET aqo.statement_timeout = 500; -- [0.5s] -SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; -NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. Timeout is 1 -NOTICE: [AQO] Time limit for execution of the statement was increased. Current timeout is 6 - count | count ---------+-------- - 563300 | 562500 -(1 row) - -select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts - where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' - and aqo_query_texts.queryid = aqo_queries.queryid limit 1; - smart_timeout | count_increase_timeout ----------------+------------------------ - 6 | 2 -(1 row) - -SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; -NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. Timeout is 6 -NOTICE: [AQO] Time limit for execution of the statement was increased. Current timeout is 63 - count | count ---------+-------- - 563300 | 562500 -(1 row) - -select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts - where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' - and aqo_query_texts.queryid = aqo_queries.queryid limit 1; - smart_timeout | count_increase_timeout ----------------+------------------------ - 63 | 3 -(1 row) - -SET statement_timeout = 100; -- [0.1s] -SET aqo.statement_timeout = 150; -SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; -NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. Timeout is 63 -NOTICE: [AQO] Time limit for execution of the statement was increased. Current timeout is 1728 - count | count ---------+-------- - 563300 | 562500 -(1 row) - -select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts - where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' - and aqo_query_texts.queryid = aqo_queries.queryid limit 1; - smart_timeout | count_increase_timeout ----------------+------------------------ - 1728 | 4 -(1 row) - -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 -(1 row) - -DROP TABLE a; -DROP TABLE b; -DROP EXTENSION aqo; diff --git a/postprocessing.c b/postprocessing.c index 8a55a6cd..f6af5f48 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -630,7 +630,7 @@ aqo_timeout_handler(void) if (aqo_statement_timeout == 0) elog(NOTICE, "[AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data."); else - elog(NOTICE, "[AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. Timeout is %ld", max_timeout_value); + elog(NOTICE, "[AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. Timeout is "INT64_FORMAT, max_timeout_value); learnOnPlanState(timeoutCtl.queryDesc->planstate, (void *) &ctx); MemoryContextSwitchTo(oldctx); @@ -827,7 +827,7 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) if ( aqo_learn_statement_timeout && aqo_statement_timeout > 0 && error >= 0.1) { int64 fintime = increase_smart_timeout(); - elog(NOTICE, "[AQO] Time limit for execution of the statement was increased. Current timeout is %ld", fintime); + elog(NOTICE, "[AQO] Time limit for execution of the statement was increased. Current timeout is "UINT64_FORMAT, fintime); } pfree(stat); diff --git a/sql/smart_statement_timeout.sql b/sql/smart_statement_timeout.sql deleted file mode 100644 index a0573dee..00000000 --- a/sql/smart_statement_timeout.sql +++ /dev/null @@ -1,45 +0,0 @@ -DROP TABLE IF EXISTS a,b CASCADE; -CREATE TABLE a (x1 int, x2 int, x3 int); -INSERT INTO a (x1, x2, x3) SELECT mod(ival,4), mod(ival,10), mod(ival,10) FROM generate_series(1,100) As ival; - -CREATE TABLE b (y1 int, y2 int, y3 int); -INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,4), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,100) As ival; - -CREATE EXTENSION IF NOT EXISTS aqo; -SET aqo.join_threshold = 0; -SET aqo.mode = 'learn'; -SET aqo.show_details = 'off'; -SET aqo.learn_statement_timeout = 'on'; -SET statement_timeout = 1500; -- [1.5s] -SET aqo.statement_timeout = 500; -- [0.5s] - -SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; -select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts - where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' - and aqo_query_texts.queryid = aqo_queries.queryid limit 1; - -SET aqo.learn_statement_timeout = 'off'; -SET aqo.statement_timeout = 1000; -- [1s] -INSERT INTO a (x1, x2, x3) SELECT mod(ival,20), mod(ival,10), mod(ival,10) FROM generate_series(1,1000) As ival; -SET aqo.learn_statement_timeout = 'on'; -SET aqo.statement_timeout = 500; -- [0.5s] -SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; -select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts - where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' - and aqo_query_texts.queryid = aqo_queries.queryid limit 1; -SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; -select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts - where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' - and aqo_query_texts.queryid = aqo_queries.queryid limit 1; - -SET statement_timeout = 100; -- [0.1s] -SET aqo.statement_timeout = 150; -SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; -select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts - where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' - and aqo_query_texts.queryid = aqo_queries.queryid limit 1; - -SELECT 1 FROM aqo_reset(); -DROP TABLE a; -DROP TABLE b; -DROP EXTENSION aqo; From 71237661839d0bc9e095904017a012c7a64f5fc1 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Tue, 31 Jan 2023 11:24:55 +0500 Subject: [PATCH 167/203] Increase stability of the look_a_like test: clear learning data before the test. --- expected/look_a_like.out | 6 ++++++ regress_schedule | 3 --- sql/look_a_like.sql | 1 + 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/expected/look_a_like.out b/expected/look_a_like.out index faa9b0fd..899ef271 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -1,4 +1,10 @@ CREATE EXTENSION aqo; +SELECT true FROM aqo_reset(); + bool +------ + t +(1 row) + SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; diff --git a/regress_schedule b/regress_schedule index 6c558e9a..2bcdaaf2 100644 --- a/regress_schedule +++ b/regress_schedule @@ -15,11 +15,8 @@ test: plancache test: update_functions # Performance-dependent test. Can be ignored if executes in containers or on slow machines ignore: statement_timeout -ignore: smart_statement_timeout -test: statement_timeout test: temp_tables test: top_queries test: relocatable test: look_a_like test: feature_subspace -test: smart_statement_timeout diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index 9705bf1a..b5e1f671 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -1,4 +1,5 @@ CREATE EXTENSION aqo; +SELECT true FROM aqo_reset(); SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; From 89c2e93957bf6aa05a2549cc317756e1825e93e0 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Tue, 31 Jan 2023 13:47:45 +0500 Subject: [PATCH 168/203] Bugfix. Initialization of kNN data structure was omitted in one newly added case. --- storage.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/storage.c b/storage.c index 5cb1ef76..21b8ca2f 100644 --- a/storage.c +++ b/storage.c @@ -1465,6 +1465,7 @@ aqo_data_store(uint64 fs, int fss, AqoDataArgs *data, List *reloids) } } aqo_state->data_changed = true; + Assert(entry->rows > 0); end: result = aqo_state->data_changed; LWLockRelease(&aqo_state->data_lock); @@ -1505,13 +1506,15 @@ build_knn_matrix(OkNNrdata *data, const OkNNrdata *temp_data, double *features) if (features != NULL) { int old_rows = data->rows; - int k = old_rows; + int k = (old_rows < 0) ? 0 : old_rows; if (data->cols > 0) { int i; - for (i = 0; i < data->rows; i++) + Assert(data->cols == temp_data->cols); + + for (i = 0; i < temp_data->rows; i++) { if (k < aqo_K && !neirest_neighbor(data->matrix, old_rows, data->matrix[i], data->cols)) { @@ -1521,6 +1524,7 @@ build_knn_matrix(OkNNrdata *data, const OkNNrdata *temp_data, double *features) k++; } } + data->rows = k; } } else @@ -1605,11 +1609,13 @@ _fill_knn_data(const DataEntry *entry, List **reloids) } /* - * Return on feature subspace, unique defined by its class (fs) and hash value - * (fss). - * If reloids is NULL, skip loading of this list. + * By given feature space and subspace, build kNN data structure. + * * If wideSearch is true - make seqscan on the hash table to see for relevant * data across neighbours. + * If reloids is NULL - don't fill this list. + * + * Return false if the operation was unsuccessful. */ bool load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, @@ -1634,7 +1640,7 @@ load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, goto end; /* One entry with all correctly filled fields is found */ - Assert(entry); + Assert(entry && entry->rows > 0); Assert(DsaPointerIsValid(entry->data_dp)); if (entry->cols != data->cols) @@ -1643,12 +1649,14 @@ load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, elog(LOG, "[AQO] Does a collision happened? Check it if possible " "(fs: "UINT64_FORMAT", fss: %d).", fs, fss); - found = false; + found = false; /* Sign of unsuccessful operation */ goto end; } temp_data = _fill_knn_data(entry, reloids); + Assert(temp_data->rows > 0); build_knn_matrix(data, temp_data, features); + Assert(data->rows > 0); } else /* Iterate across all elements of the table. XXX: Maybe slow. */ @@ -1662,6 +1670,8 @@ load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, { List *tmp_oids = NIL; + Assert(entry->rows > 0); + if (entry->key.fss != fss || entry->cols != data->cols) continue; From f17f5a91ee4eb9912888bf74df46b9d7f4edfa21 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Tue, 31 Jan 2023 15:33:09 +0500 Subject: [PATCH 169/203] Rewrite update_functions.sql to avoid dependency on internal logic of the optimizer which can vary on version of PG core. --- expected/update_functions.out | 78 ++++++----------------------------- sql/update_functions.sql | 16 ++++--- 2 files changed, 22 insertions(+), 72 deletions(-) diff --git a/expected/update_functions.out b/expected/update_functions.out index 03a97fe7..cf9cee8e 100644 --- a/expected/update_functions.out +++ b/expected/update_functions.out @@ -295,40 +295,10 @@ UNION ALL -- aqo_data_update() testing. -- -- Populate aqo_data with dump data. -SELECT aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS res -FROM aqo_data_dump -ORDER BY res; - res ------ - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t -(27 rows) - +SELECT count(*) AS res1 FROM + aqo_data_dump, + LATERAL aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS ret +WHERE ret \gset -- Check if data is the same as in source, no result rows expected. (TABLE aqo_data_dump EXCEPT TABLE aqo_data) UNION ALL @@ -338,39 +308,15 @@ UNION ALL (0 rows) -- Update aqo_data with dump data. -SELECT aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS res -FROM aqo_data_dump -ORDER BY res; - res ------ - t - t - t - t - t - t - t - t - t - t - t - t +SELECT count(*) AS res2 FROM + aqo_data_dump, + LATERAL aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS ret +WHERE ret \gset +SELECT :res1 = :res2 AS ml_sizes_are_equal; + ml_sizes_are_equal +-------------------- t - t - t - t - t - t - t - t - t - t - t - t - t - t - t -(27 rows) +(1 row) -- Check if data is the same as in source, no result rows expected. (TABLE aqo_data_dump EXCEPT TABLE aqo_data) diff --git a/sql/update_functions.sql b/sql/update_functions.sql index 85b711e6..84add94a 100644 --- a/sql/update_functions.sql +++ b/sql/update_functions.sql @@ -146,9 +146,10 @@ UNION ALL -- -- Populate aqo_data with dump data. -SELECT aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS res -FROM aqo_data_dump -ORDER BY res; +SELECT count(*) AS res1 FROM + aqo_data_dump, + LATERAL aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS ret +WHERE ret \gset -- Check if data is the same as in source, no result rows expected. (TABLE aqo_data_dump EXCEPT TABLE aqo_data) @@ -156,9 +157,12 @@ UNION ALL (TABLE aqo_data EXCEPT TABLE aqo_data_dump); -- Update aqo_data with dump data. -SELECT aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS res -FROM aqo_data_dump -ORDER BY res; +SELECT count(*) AS res2 FROM + aqo_data_dump, + LATERAL aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS ret +WHERE ret \gset + +SELECT :res1 = :res2 AS ml_sizes_are_equal; -- Check if data is the same as in source, no result rows expected. (TABLE aqo_data_dump EXCEPT TABLE aqo_data) From db8179b546c433dd7de6686bdbd3290f408f36a0 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 1 Feb 2023 14:36:46 +0500 Subject: [PATCH 170/203] Bugfix. Assertion on disabled query at the ExecutorEnd hook. In an extravagant situation: (mode=disabled, forced stat gathering = 'on') we can get into a situation when AQO is disabled for a query, but previously cached plan contains some AQO preferences. Even so, we should ignore the query at the end of execution. --- postprocessing.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/postprocessing.c b/postprocessing.c index f6af5f48..aa82a534 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -757,7 +757,7 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) cardinality_sum_errors = 0.; cardinality_num_objects = 0; - if (!ExtractFromQueryEnv(queryDesc)) + if (IsQueryDisabled() || !ExtractFromQueryEnv(queryDesc)) /* AQO keep all query-related preferences at the query context. * It is needed to prevent from possible recursive changes, at * preprocessing stage of subqueries. @@ -768,7 +768,6 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) njoins = (enr != NULL) ? *(int *) enr->reldata : -1; - Assert(!IsQueryDisabled()); Assert(!IsParallelWorker()); if (query_context.explain_only) From f85ccdf8a5787c04a4fe7ab00b2cc4e7c3b96ded Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Sun, 5 Feb 2023 14:05:11 +0500 Subject: [PATCH 171/203] Improvement. Clean a list of deactivated queries during the call of the aqo_reset() routine: we want to clean all the AQO internal state on reset. --- storage.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/storage.c b/storage.c index 21b8ca2f..64bc4373 100644 --- a/storage.c +++ b/storage.c @@ -195,7 +195,7 @@ init_deactivated_queries_storage(void) MemSet(&hash_ctl, 0, sizeof(hash_ctl)); hash_ctl.keysize = sizeof(uint64); hash_ctl.entrysize = sizeof(uint64); - deactivated_queries = hash_create("aqo_deactivated_queries", + deactivated_queries = hash_create("AQO deactivated queries", 128, /* start small and extend */ &hash_ctl, HASH_ELEM | HASH_BLOBS); @@ -207,7 +207,7 @@ query_is_deactivated(uint64 queryid) { bool found; - hash_search(deactivated_queries, &queryid, HASH_FIND, &found); + (void) hash_search(deactivated_queries, &queryid, HASH_FIND, &found); return found; } @@ -215,7 +215,21 @@ query_is_deactivated(uint64 queryid) void add_deactivated_query(uint64 queryid) { - hash_search(deactivated_queries, &queryid, HASH_ENTER, NULL); + (void) hash_search(deactivated_queries, &queryid, HASH_ENTER, NULL); +} + +static void +reset_deactivated_queries(void) +{ + HASH_SEQ_STATUS hash_seq; + uint64 *queryid; + + hash_seq_init(&hash_seq, deactivated_queries); + while ((queryid = hash_seq_search(&hash_seq)) != NULL) + { + if (!hash_search(deactivated_queries, queryid, HASH_REMOVE, NULL)) + elog(PANIC, "[AQO] hash table corrupted"); + } } /* @@ -2177,7 +2191,6 @@ aqo_queries_update(PG_FUNCTION_ARGS) { PG_ARGISNULL(AQ_FS), PG_ARGISNULL(AQ_LEARN_AQO), PG_ARGISNULL(AQ_USE_AQO), PG_ARGISNULL(AQ_AUTO_TUNING) }; - if (PG_ARGISNULL(AQ_QUERYID)) PG_RETURN_BOOL(false); @@ -2209,6 +2222,10 @@ aqo_reset(PG_FUNCTION_ARGS) counter += aqo_qtexts_reset(); counter += aqo_data_reset(); counter += aqo_queries_reset(); + + /* Cleanup cache of deactivated queries */ + reset_deactivated_queries(); + PG_RETURN_INT64(counter); } From f84caae72663396214142fc2a5d18f873e925219 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 1 Feb 2023 09:32:08 +0500 Subject: [PATCH 172/203] One more step towards improving the AQO regression tests stability. Move GUCs, which can be changed in runtime, from global regression tests conf to first executed test 'aqo_disabled.sql'. There we set these values by ALTER SYSTEM/pg_reload_conf() and use them during the test. Also, we call aqo_reset() at the start of each test. And a bit more: 1. Avoid to show a number of records in AQO ML storage - it can depend on optimizer settings and quite unstable (in progress). 2. Use aliases query in output to avoid unstability of naming of anonymous columns. --- Makefile | 6 +++ aqo.conf | 3 -- expected/aqo_controlled.out | 21 +++++----- expected/aqo_disabled.out | 25 ++++++------ expected/aqo_fdw.out | 29 ++++++++++---- expected/aqo_forced.out | 17 ++++---- expected/aqo_intelligent.out | 16 ++++---- expected/aqo_learn.out | 26 ++++++------- expected/clean_aqo_data.out | 35 +++++++++-------- expected/feature_subspace.out | 38 +++++++++--------- expected/forced_stat_collection.out | 15 ++++---- expected/gucs.out | 22 +++++++---- expected/look_a_like.out | 20 ++++------ expected/parallel_workers.out | 9 ++++- expected/plancache.out | 15 ++++---- expected/relocatable.out | 9 ++++- expected/schema.out | 9 +++-- expected/statement_timeout.out | 60 ++++++++++++++++------------- expected/temp_tables.out | 45 ++++++++++++---------- expected/top_queries.out | 16 ++++---- expected/unsupported.out | 27 +++++++------ expected/update_functions.out | 26 ++++++------- sql/aqo_controlled.sql | 14 +++---- sql/aqo_disabled.sql | 18 ++++----- sql/aqo_fdw.sql | 6 +-- sql/aqo_forced.sql | 11 ++---- sql/aqo_intelligent.sql | 9 ++--- sql/aqo_learn.sql | 11 ++---- sql/clean_aqo_data.sql | 15 ++++---- sql/feature_subspace.sql | 6 +-- sql/forced_stat_collection.sql | 7 ++-- sql/gucs.sql | 9 +++-- sql/look_a_like.sql | 14 ++++--- sql/parallel_workers.sql | 5 +-- sql/plancache.sql | 7 ++-- sql/relocatable.sql | 5 ++- sql/schema.sql | 3 +- sql/statement_timeout.sql | 36 +++++++++-------- sql/temp_tables.sql | 19 +++++---- sql/top_queries.sql | 7 ++-- sql/unsupported.sql | 7 ++-- sql/update_functions.sql | 13 ++++--- t/001_pgbench.pl | 3 ++ t/002_pg_stat_statements_aqo.pl | 3 ++ 44 files changed, 382 insertions(+), 335 deletions(-) diff --git a/Makefile b/Makefile index d3aec440..ce9d00ba 100755 --- a/Makefile +++ b/Makefile @@ -16,6 +16,12 @@ TAP_TESTS = 1 REGRESS = aqo_dummy_test REGRESS_OPTS = --schedule=$(srcdir)/regress_schedule +# Set default values of some gucs to be stable on custom settings during +# a kind of installcheck +PGOPTIONS = --aqo.force_collect_stat=off --max_parallel_maintenance_workers=1 \ + --aqo.join_threshold=0 --max_parallel_workers_per_gather=1 +export PGOPTIONS + fdw_srcdir = $(top_srcdir)/contrib/postgres_fdw stat_srcdir = $(top_srcdir)/contrib/pg_stat_statements PG_CPPFLAGS += -I$(libpq_srcdir) -I$(fdw_srcdir) -I$(stat_srcdir) diff --git a/aqo.conf b/aqo.conf index 06f3bf9c..705e3dde 100644 --- a/aqo.conf +++ b/aqo.conf @@ -1,5 +1,2 @@ autovacuum = off shared_preload_libraries = 'postgres_fdw, aqo' -max_parallel_maintenance_workers = 1 # switch off parallel workers because of unsteadiness -aqo.wide_search = 'on' - diff --git a/expected/aqo_controlled.out b/expected/aqo_controlled.out index cf88bf42..43d27d74 100644 --- a/expected/aqo_controlled.out +++ b/expected/aqo_controlled.out @@ -1,3 +1,10 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -25,8 +32,6 @@ AS ( ) INSERT INTO aqo_test2 (SELECT * FROM t); CREATE INDEX aqo_test2_idx_a ON aqo_test2 (a); ANALYZE aqo_test2; -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; SET aqo.mode = 'controlled'; EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 @@ -199,11 +204,12 @@ WHERE t1.a = t2.b AND t2.a = t3.b; SELECT count(*) FROM (SELECT queryid AS id FROM aqo_queries) AS q1, - LATERAL aqo_queries_update(q1.id, NULL, NULL, true, NULL) + LATERAL aqo_queries_update(q1.id, NULL, NULL, true, NULL) AS ret +WHERE NOT ret ; -- set use = true count ------- - 12 + 1 (1 row) EXPLAIN (COSTS FALSE) @@ -311,11 +317,4 @@ DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; DROP INDEX aqo_test2_idx_a; DROP TABLE aqo_test2; --- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 -(1 row) - DROP EXTENSION aqo; diff --git a/expected/aqo_disabled.out b/expected/aqo_disabled.out index 606d258e..cf12e2fb 100644 --- a/expected/aqo_disabled.out +++ b/expected/aqo_disabled.out @@ -1,3 +1,12 @@ +-- Create the extension. Drop all lumps which could survive from +-- previous pass (repeated installcheck as an example). +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -16,8 +25,6 @@ AS ( ) INSERT INTO aqo_test1 (SELECT * FROM t); CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; SET aqo.mode = 'controlled'; CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -151,11 +158,12 @@ SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero SET aqo.mode = 'controlled'; SELECT count(*) FROM (SELECT queryid AS id FROM aqo_queries) AS q1, - LATERAL aqo_queries_update(q1.id, NULL, true, true, false) + LATERAL aqo_queries_update(q1.id, NULL, true, true, false) AS ret +WHERE NOT ret ; -- Enable all disabled query classes count ------- - 5 + 1 (1 row) EXPLAIN SELECT * FROM aqo_test0 @@ -223,15 +231,8 @@ SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero 0 (1 row) --- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 -(1 row) - -DROP EXTENSION aqo; DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; +DROP EXTENSION aqo; diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out index e568e993..69c1b132 100644 --- a/expected/aqo_fdw.out +++ b/expected/aqo_fdw.out @@ -3,12 +3,17 @@ -- JOIN push-down (check push of baserestrictinfo and joininfo) -- Aggregate push-down -- Push-down of groupings with HAVING clause. -CREATE EXTENSION aqo; -CREATE EXTENSION postgres_fdw; +CREATE EXTENSION IF NOT EXISTS aqo; +CREATE EXTENSION IF NOT EXISTS postgres_fdw; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + SET aqo.mode = 'learn'; SET aqo.show_details = 'true'; -- show AQO info for each node and entire query. SET aqo.show_hash = 'false'; -- a hash value is system-depended. Ignore it. -SET aqo.join_threshold = 0; DO $d$ BEGIN EXECUTE $$CREATE SERVER loopback FOREIGN DATA WRAPPER postgres_fdw @@ -100,15 +105,23 @@ SELECT str FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; ') AS str WHERE str NOT LIKE '%Sort Method%'; - str -------------------------------------------- - Foreign Scan (actual rows=1 loops=1) + str +------------------------------------------------------------ + Merge Join (actual rows=1 loops=1) AQO not used - Relations: (frgn a) INNER JOIN (frgn b) + Merge Cond: (a.x = b.x) + -> Sort (actual rows=1 loops=1) + Sort Key: a.x + -> Foreign Scan on frgn a (actual rows=1 loops=1) + AQO not used + -> Sort (actual rows=1 loops=1) + Sort Key: b.x + -> Foreign Scan on frgn b (actual rows=1 loops=1) + AQO not used Using aqo: true AQO mode: LEARN JOINS: 0 -(6 rows) +(14 rows) -- Should learn on postgres_fdw nodes SELECT str FROM expln(' diff --git a/expected/aqo_forced.out b/expected/aqo_forced.out index 091ead32..6d5d14a9 100644 --- a/expected/aqo_forced.out +++ b/expected/aqo_forced.out @@ -1,3 +1,11 @@ +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -16,8 +24,6 @@ AS ( ) INSERT INTO aqo_test1 (SELECT * FROM t); CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; SET aqo.mode = 'controlled'; EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 @@ -82,11 +88,4 @@ DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; --- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 -(1 row) - DROP EXTENSION aqo; diff --git a/expected/aqo_intelligent.out b/expected/aqo_intelligent.out index 7ec943f5..1d407ea7 100644 --- a/expected/aqo_intelligent.out +++ b/expected/aqo_intelligent.out @@ -1,3 +1,10 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -16,8 +23,6 @@ AS ( ) INSERT INTO aqo_test1 (SELECT * FROM t); CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; SET aqo.mode = 'intelligent'; EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -519,11 +524,4 @@ DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; --- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 -(1 row) - DROP EXTENSION aqo; diff --git a/expected/aqo_learn.out b/expected/aqo_learn.out index db117a0c..9a5ca8dd 100644 --- a/expected/aqo_learn.out +++ b/expected/aqo_learn.out @@ -1,3 +1,10 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + -- The function just copied from stats_ext.sql create function check_estimated_rows(text) returns table (estimated int, actual int) language plpgsql as @@ -36,8 +43,6 @@ AS ( ) INSERT INTO aqo_test1 (SELECT * FROM t); CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; SET aqo.mode = 'intelligent'; EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -236,10 +241,10 @@ SELECT count(*) FROM tmp1; (1 row) -- Remove data on some unneeded instances of tmp1 table. -SELECT * FROM aqo_cleanup(); - nfs | nfss ------+------ - 9 | 18 +SELECT true AS success FROM aqo_cleanup(); + success +--------- + t (1 row) -- Result of the query below should be empty @@ -563,7 +568,7 @@ SELECT * FROM check_estimated_rows( 'SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); estimated | actual -----------+-------- - 19 | 19 + 20 | 19 (1 row) SELECT count(*) FROM @@ -716,11 +721,4 @@ DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; --- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 -(1 row) - DROP EXTENSION aqo; diff --git a/expected/clean_aqo_data.out b/expected/clean_aqo_data.out index e66f274b..49b64832 100644 --- a/expected/clean_aqo_data.out +++ b/expected/clean_aqo_data.out @@ -1,5 +1,10 @@ -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + SET aqo.mode = 'learn'; DROP TABLE IF EXISTS a; NOTICE: table "a" does not exist, skipping @@ -11,9 +16,9 @@ SELECT * FROM a; (0 rows) SELECT 'a'::regclass::oid AS a_oid \gset -SELECT true FROM aqo_cleanup(); - bool ------- +SELECT true AS success FROM aqo_cleanup(); + success +--------- t (1 row) @@ -54,9 +59,9 @@ SELECT count(*) FROM aqo_query_stat WHERE (1 row) DROP TABLE a; -SELECT true FROM aqo_cleanup(); - bool ------- +SELECT true AS success FROM aqo_cleanup(); + success +--------- t (1 row) @@ -119,7 +124,7 @@ SELECT 'b'::regclass::oid AS b_oid \gset SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); count ------- - 2 + 3 (1 row) SELECT count(*) FROM aqo_queries WHERE @@ -175,9 +180,9 @@ SELECT count(*) FROM aqo_query_stat WHERE (1 row) DROP TABLE a; -SELECT true FROM aqo_cleanup(); - bool ------- +SELECT true AS success FROM aqo_cleanup(); + success +--------- t (1 row) @@ -253,9 +258,9 @@ SELECT count(*) FROM aqo_query_stat WHERE (1 row) DROP TABLE b; -SELECT true FROM aqo_cleanup(); - bool ------- +SELECT true AS success FROM aqo_cleanup(); + success +--------- t (1 row) diff --git a/expected/feature_subspace.out b/expected/feature_subspace.out index a49be254..a53b57e7 100644 --- a/expected/feature_subspace.out +++ b/expected/feature_subspace.out @@ -1,7 +1,12 @@ -- This test related to some issues on feature subspace calculation -CREATE EXTENSION aqo; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + SET aqo.mode = 'learn'; -SET aqo.join_threshold = 0; SET aqo.show_details = 'on'; CREATE TABLE a AS (SELECT gs AS x FROM generate_series(1,10) AS gs); CREATE TABLE b AS (SELECT gs AS x FROM generate_series(1,100) AS gs); @@ -46,20 +51,23 @@ SELECT str AS result FROM expln(' SELECT * FROM b LEFT JOIN a USING (x);') AS str WHERE str NOT LIKE '%Memory%'; - result ----------------------------------------------------- - Hash Left Join (actual rows=100 loops=1) - AQO: rows=10, error=-900% - Hash Cond: (b.x = a.x) - -> Seq Scan on b (actual rows=100 loops=1) - AQO: rows=100, error=0% - -> Hash (actual rows=10 loops=1) + result +----------------------------------------------------- + Merge Left Join (actual rows=100 loops=1) + AQO not used + Merge Cond: (b.x = a.x) + -> Sort (actual rows=100 loops=1) + Sort Key: b.x + -> Seq Scan on b (actual rows=100 loops=1) + AQO not used + -> Sort (actual rows=10 loops=1) + Sort Key: a.x -> Seq Scan on a (actual rows=10 loops=1) - AQO: rows=10, error=0% + AQO not used Using aqo: true AQO mode: LEARN JOINS: 0 -(11 rows) +(14 rows) -- Look into the reason: two JOINs from different classes have the same FSS. SELECT to_char(d1.targets[1], 'FM999.00') AS target FROM aqo_data d1 @@ -72,10 +80,4 @@ WHERE 'a'::regclass = ANY (d1.oids) AND 'b'::regclass = ANY (d1.oids) order by t (2 rows) DROP TABLE a,b CASCADE; -SELECT true FROM aqo_reset(); - bool ------- - t -(1 row) - DROP EXTENSION aqo; diff --git a/expected/forced_stat_collection.out b/expected/forced_stat_collection.out index f635fbcc..c5a6ac0e 100644 --- a/expected/forced_stat_collection.out +++ b/expected/forced_stat_collection.out @@ -1,5 +1,11 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + \set citizens 1000 -SET aqo.join_threshold = 0; SET aqo.mode = 'disabled'; SET aqo.force_collect_stat = 'off'; CREATE TABLE person ( @@ -19,7 +25,6 @@ INSERT INTO person (id,age,gender,passport) END FROM (SELECT *, 14+(id % 60) AS age FROM generate_series(1, :citizens) id) AS q1 ); -CREATE EXTENSION aqo; SET aqo.force_collect_stat = 'on'; SELECT count(*) FROM person WHERE age<18; count @@ -64,10 +69,4 @@ SELECT query_text FROM aqo_query_texts ORDER BY (md5(query_text)); (3 rows) DROP TABLE person; -SELECT 1 FROM aqo_reset(); -- Full remove of ML data before the end - ?column? ----------- - 1 -(1 row) - DROP EXTENSION aqo; diff --git a/expected/gucs.out b/expected/gucs.out index 6809df64..e6cd1692 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -1,4 +1,11 @@ -CREATE EXTENSION aqo; +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + -- Utility tool. Allow to filter system-dependent strings from an explain output. CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ BEGIN @@ -7,15 +14,14 @@ BEGIN RETURN; END; $$ LANGUAGE PLPGSQL; -SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = true; CREATE TABLE t(x int); INSERT INTO t (x) (SELECT * FROM generate_series(1, 100) AS gs); ANALYZE t; -SELECT true FROM aqo_reset(); -- Remember! DROP EXTENSION doesn't remove any AQO data gathered. - bool ------- +SELECT true AS success FROM aqo_reset(); + success +--------- t (1 row) @@ -124,9 +130,9 @@ SELECT count(*) FROM aqo_query_stat; 1 (1 row) -SELECT true FROM aqo_reset(); -- Remove one record from all tables - bool ------- +SELECT true AS success FROM aqo_reset(); + success +--------- t (1 row) diff --git a/expected/look_a_like.out b/expected/look_a_like.out index 899ef271..fb76fdd6 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -1,11 +1,12 @@ -CREATE EXTENSION aqo; -SELECT true FROM aqo_reset(); - bool ------- +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- t (1 row) -SET aqo.join_threshold = 0; +SET aqo.wide_search = 'on'; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; set aqo.show_hash = 'off'; @@ -550,14 +551,9 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L JOINS: 1 (24 rows) -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 -(1 row) - +RESET aqo.wide_search; +DROP EXTENSION aqo CASCADE; DROP TABLE a; DROP TABLE b; DROP TABLE c; DROP FUNCTION expln; -DROP EXTENSION aqo CASCADE; diff --git a/expected/parallel_workers.out b/expected/parallel_workers.out index fca67006..3e408f49 100644 --- a/expected/parallel_workers.out +++ b/expected/parallel_workers.out @@ -1,6 +1,12 @@ -- Specifically test AQO machinery for queries uses partial paths and executed -- with parallel workers. -CREATE EXTENSION aqo; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + -- Utility tool. Allow to filter system-dependent strings from explain output. CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ BEGIN @@ -9,7 +15,6 @@ BEGIN RETURN; END; $$ LANGUAGE PLPGSQL; -SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = true; -- Be generous with a number parallel workers to test the machinery diff --git a/expected/plancache.out b/expected/plancache.out index 6874468a..88698463 100644 --- a/expected/plancache.out +++ b/expected/plancache.out @@ -1,6 +1,11 @@ -- Tests on interaction of AQO with cached plans. -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + SET aqo.mode = 'intelligent'; SET aqo.show_details = 'on'; SET aqo.show_hash = 'off'; @@ -44,10 +49,4 @@ SELECT * FROM f1(); DROP FUNCTION f1; DROP TABLE test CASCADE; -SELECT true FROM aqo_reset(); - bool ------- - t -(1 row) - DROP EXTENSION aqo; diff --git a/expected/relocatable.out b/expected/relocatable.out index 949896f6..3d7f386f 100644 --- a/expected/relocatable.out +++ b/expected/relocatable.out @@ -1,5 +1,10 @@ -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + SET aqo.mode = 'learn'; -- use this mode for unconditional learning CREATE TABLE test AS (SELECT id, 'payload' || id FROM generate_series(1,100) id); ANALYZE test; diff --git a/expected/schema.out b/expected/schema.out index 0b5a5c07..e712f407 100644 --- a/expected/schema.out +++ b/expected/schema.out @@ -1,5 +1,3 @@ -DROP EXTENSION IF EXISTS aqo CASCADE; -NOTICE: extension "aqo" does not exist, skipping DROP SCHEMA IF EXISTS test CASCADE; NOTICE: schema "test" does not exist, skipping -- Check Zero-schema path behaviour @@ -12,7 +10,12 @@ ERROR: no schema has been selected to create in CREATE SCHEMA IF NOT EXISTS test1; SET search_path TO test1, public; CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + SET aqo.mode = 'intelligent'; CREATE TABLE test (id SERIAL, data TEXT); INSERT INTO test (data) VALUES ('string'); diff --git a/expected/statement_timeout.out b/expected/statement_timeout.out index a12fe9dd..39796549 100644 --- a/expected/statement_timeout.out +++ b/expected/statement_timeout.out @@ -17,37 +17,43 @@ BEGIN END IF; END LOOP; END; $$; +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + CREATE TABLE t AS SELECT * FROM generate_series(1,50) AS x; ANALYZE t; DELETE FROM t WHERE x > 5; -- Force optimizer to make overestimated prediction. -CREATE EXTENSION IF NOT EXISTS aqo; -SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'off'; SET aqo.learn_statement_timeout = 'on'; -SET statement_timeout = 100; -- [0.1s] +SET statement_timeout = 80; -- [0.1s] SELECT *, pg_sleep(0.1) FROM t; NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout -SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); -- haven't any partial data +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -- haven't any partial data check_estimated_rows ---------------------- 50 (1 row) -- Don't learn because running node has smaller cardinality than an optimizer prediction -SET statement_timeout = 400; +SET statement_timeout = 350; SELECT *, pg_sleep(0.1) FROM t; NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout -SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); check_estimated_rows ---------------------- 50 (1 row) -- We have a real learning data. -SET statement_timeout = 8000; +SET statement_timeout = 800; SELECT *, pg_sleep(0.1) FROM t; x | pg_sleep ---+---------- @@ -58,7 +64,7 @@ SELECT *, pg_sleep(0.1) FROM t; 5 | (5 rows) -SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); check_estimated_rows ---------------------- 5 @@ -68,33 +74,33 @@ SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); DELETE FROM t WHERE x > 2; ANALYZE t; INSERT INTO t (x) (SELECT * FROM generate_series(3,5) AS x); -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 +SELECT true AS success FROM aqo_reset(); + success +--------- + t (1 row) -SET statement_timeout = 100; +SET statement_timeout = 80; SELECT *, pg_sleep(0.1) FROM t; -- Not learned NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout -SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); check_estimated_rows ---------------------- 2 (1 row) -SET statement_timeout = 500; +SET statement_timeout = 350; SELECT *, pg_sleep(0.1) FROM t; -- Learn! NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout -SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); check_estimated_rows ---------------------- - 4 + 3 (1 row) -SET statement_timeout = 800; +SET statement_timeout = 550; SELECT *, pg_sleep(0.1) FROM t; -- Get reliable data x | pg_sleep ---+---------- @@ -105,17 +111,17 @@ SELECT *, pg_sleep(0.1) FROM t; -- Get reliable data 5 | (5 rows) -SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); check_estimated_rows ---------------------- 5 (1 row) -- Interrupted query should immediately appear in aqo_data -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 +SELECT true AS success FROM aqo_reset(); + success +--------- + t (1 row) SET statement_timeout = 500; @@ -134,10 +140,10 @@ SELECT count(*) FROM aqo_data; -- Must be one 1 (1 row) -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 +SELECT true AS success FROM aqo_reset(); + success +--------- + t (1 row) DROP TABLE t; diff --git a/expected/temp_tables.out b/expected/temp_tables.out index cb1da23f..9fa20e7c 100644 --- a/expected/temp_tables.out +++ b/expected/temp_tables.out @@ -1,5 +1,12 @@ -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +SET aqo.wide_search = 'on'; SET aqo.mode = 'learn'; CREATE TEMP TABLE tt(); CREATE TABLE pt(); @@ -48,10 +55,10 @@ SELECT count(*) FROM aqo_data; -- Don't bother about false negatives because of (1 row) DROP TABLE tt; -SELECT * FROM aqo_cleanup(); - nfs | nfss ------+------ - 0 | 0 +SELECT true AS success FROM aqo_cleanup(); + success +--------- + t (1 row) SELECT count(*) FROM aqo_data; -- Should return the same as previous call above @@ -61,10 +68,10 @@ SELECT count(*) FROM aqo_data; -- Should return the same as previous call above (1 row) DROP TABLE pt; -SELECT * FROM aqo_cleanup(); - nfs | nfss ------+------ - 3 | 10 +SELECT true AS success FROM aqo_cleanup(); + success +--------- + t (1 row) SELECT count(*) FROM aqo_data; -- Should be 0 @@ -133,10 +140,10 @@ SELECT * FROM check_estimated_rows(' SET aqo.mode = 'forced'; -- Now we use all fss records for each query DROP TABLE pt; -SELECT * FROM aqo_cleanup(); - nfs | nfss ------+------ - 2 | 5 +SELECT true AS success FROM aqo_cleanup(); + success +--------- + t (1 row) CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; @@ -184,12 +191,8 @@ SELECT * FROM check_estimated_rows(' 100 | 0 (1 row) +-- Clear common parts of AQO state +RESET aqo.wide_search; +DROP EXTENSION aqo CASCADE; DROP TABLE pt CASCADE; -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 -(1 row) - -DROP EXTENSION aqo; DROP FUNCTION check_estimated_rows; diff --git a/expected/top_queries.out b/expected/top_queries.out index ba72d7c8..62186efc 100644 --- a/expected/top_queries.out +++ b/expected/top_queries.out @@ -1,5 +1,11 @@ -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + SET aqo.mode = 'disabled'; SET aqo.force_collect_stat = 'on'; -- @@ -95,10 +101,4 @@ ORDER BY (md5(query_text)); SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; | 1 (3 rows) -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 -(1 row) - DROP EXTENSION aqo; diff --git a/expected/unsupported.out b/expected/unsupported.out index c42a3be5..a1a6f4ae 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -1,4 +1,10 @@ -CREATE EXTENSION aqo; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + -- Utility tool. Allow to filter system-dependent strings from an explain output. CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ BEGIN @@ -7,7 +13,6 @@ BEGIN RETURN; END; $$ LANGUAGE PLPGSQL; -SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; DROP TABLE IF EXISTS t; @@ -52,7 +57,7 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) AQO not used Group Key: x -> Seq Scan on t (actual rows=801 loops=1) - AQO: rows=801, error=0% + AQO not used Filter: (x > 3) Rows Removed by Filter: 199 Using aqo: true @@ -406,7 +411,7 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) -> Aggregate (actual rows=1 loops=1000) AQO not used -> Seq Scan on t t0 (actual rows=50 loops=1000) - AQO: rows=50, error=0% + AQO not used Filter: (x = t.x) Rows Removed by Filter: 950 SubPlan 2 @@ -616,10 +621,10 @@ SELECT count(*) FROM aqo_data; -- Just to detect some changes in the logic. May 44 (1 row) -SELECT * FROM aqo_cleanup(); - nfs | nfss ------+------ - 13 | 44 +SELECT true AS success FROM aqo_cleanup(); + success +--------- + t (1 row) SELECT count(*) FROM aqo_data; -- No one row should be returned @@ -637,10 +642,4 @@ ORDER BY (md5(query_text),error) DESC; -------+------------ (0 rows) -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 -(1 row) - DROP EXTENSION aqo; diff --git a/expected/update_functions.out b/expected/update_functions.out index cf9cee8e..74428a35 100644 --- a/expected/update_functions.out +++ b/expected/update_functions.out @@ -1,3 +1,11 @@ +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + CREATE TABLE aqo_test1(a int, b int); WITH RECURSIVE t(a, b) AS ( @@ -16,8 +24,6 @@ AS ( ) INSERT INTO aqo_test2 (SELECT * FROM t); CREATE INDEX aqo_test2_idx_a ON aqo_test2 (a); ANALYZE aqo_test2; -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; SET aqo.mode='intelligent'; SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; count @@ -134,10 +140,10 @@ CREATE TABLE aqo_query_texts_dump AS SELECT * FROM aqo_query_texts; CREATE TABLE aqo_queries_dump AS SELECT * FROM aqo_queries; CREATE TABLE aqo_query_stat_dump AS SELECT * FROM aqo_query_stat; CREATE TABLE aqo_data_dump AS SELECT * FROM aqo_data; -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 +SELECT true AS success FROM aqo_reset(); + success +--------- + t (1 row) -- @@ -411,12 +417,6 @@ SELECT aqo_data_update(1, 1, 1, '{{1}, {2}}', '{1}', '{1}', '{1, 2, 3}'); (1 row) SET aqo.mode='disabled'; -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 -(1 row) - -DROP EXTENSION aqo; +DROP EXTENSION aqo CASCADE; DROP TABLE aqo_test1, aqo_test2; DROP TABLE aqo_query_texts_dump, aqo_queries_dump, aqo_query_stat_dump, aqo_data_dump; diff --git a/sql/aqo_controlled.sql b/sql/aqo_controlled.sql index 0ba88e56..8c8e5fb8 100644 --- a/sql/aqo_controlled.sql +++ b/sql/aqo_controlled.sql @@ -1,3 +1,6 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -28,9 +31,6 @@ AS ( CREATE INDEX aqo_test2_idx_a ON aqo_test2 (a); ANALYZE aqo_test2; -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; - SET aqo.mode = 'controlled'; EXPLAIN (COSTS FALSE) @@ -111,7 +111,8 @@ WHERE t1.a = t2.b AND t2.a = t3.b; SELECT count(*) FROM (SELECT queryid AS id FROM aqo_queries) AS q1, - LATERAL aqo_queries_update(q1.id, NULL, NULL, true, NULL) + LATERAL aqo_queries_update(q1.id, NULL, NULL, true, NULL) AS ret +WHERE NOT ret ; -- set use = true EXPLAIN (COSTS FALSE) @@ -147,14 +148,9 @@ WHERE t1.a = t2.b AND t2.a = t3.b; DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; - DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; - DROP INDEX aqo_test2_idx_a; DROP TABLE aqo_test2; --- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT 1 FROM aqo_reset(); - DROP EXTENSION aqo; diff --git a/sql/aqo_disabled.sql b/sql/aqo_disabled.sql index fd709cf3..8397f847 100644 --- a/sql/aqo_disabled.sql +++ b/sql/aqo_disabled.sql @@ -1,3 +1,8 @@ +-- Create the extension. Drop all lumps which could survive from +-- previous pass (repeated installcheck as an example). +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -17,8 +22,6 @@ AS ( ) INSERT INTO aqo_test1 (SELECT * FROM t); CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; SET aqo.mode = 'controlled'; @@ -77,7 +80,8 @@ SET aqo.mode = 'controlled'; SELECT count(*) FROM (SELECT queryid AS id FROM aqo_queries) AS q1, - LATERAL aqo_queries_update(q1.id, NULL, true, true, false) + LATERAL aqo_queries_update(q1.id, NULL, true, true, false) AS ret +WHERE NOT ret ; -- Enable all disabled query classes EXPLAIN SELECT * FROM aqo_test0 @@ -98,13 +102,9 @@ FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero --- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT 1 FROM aqo_reset(); - -DROP EXTENSION aqo; - DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; - DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; + +DROP EXTENSION aqo; diff --git a/sql/aqo_fdw.sql b/sql/aqo_fdw.sql index bd211326..5425dcf4 100644 --- a/sql/aqo_fdw.sql +++ b/sql/aqo_fdw.sql @@ -4,13 +4,13 @@ -- Aggregate push-down -- Push-down of groupings with HAVING clause. -CREATE EXTENSION aqo; -CREATE EXTENSION postgres_fdw; +CREATE EXTENSION IF NOT EXISTS aqo; +CREATE EXTENSION IF NOT EXISTS postgres_fdw; +SELECT true AS success FROM aqo_reset(); SET aqo.mode = 'learn'; SET aqo.show_details = 'true'; -- show AQO info for each node and entire query. SET aqo.show_hash = 'false'; -- a hash value is system-depended. Ignore it. -SET aqo.join_threshold = 0; DO $d$ BEGIN diff --git a/sql/aqo_forced.sql b/sql/aqo_forced.sql index 92a26564..34f97359 100644 --- a/sql/aqo_forced.sql +++ b/sql/aqo_forced.sql @@ -1,3 +1,7 @@ +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -18,9 +22,6 @@ AS ( CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; - SET aqo.mode = 'controlled'; EXPLAIN (COSTS FALSE) @@ -53,11 +54,7 @@ WHERE a < 5 AND b < 5 AND c < 5 AND d < 5; DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; - DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; --- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT 1 FROM aqo_reset(); - DROP EXTENSION aqo; diff --git a/sql/aqo_intelligent.sql b/sql/aqo_intelligent.sql index 545325c1..45ecaecc 100644 --- a/sql/aqo_intelligent.sql +++ b/sql/aqo_intelligent.sql @@ -1,3 +1,6 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -18,9 +21,6 @@ AS ( CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; - SET aqo.mode = 'intelligent'; EXPLAIN SELECT * FROM aqo_test0 @@ -215,7 +215,4 @@ DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; --- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT 1 FROM aqo_reset(); - DROP EXTENSION aqo; diff --git a/sql/aqo_learn.sql b/sql/aqo_learn.sql index 8b57972e..8acd2db7 100644 --- a/sql/aqo_learn.sql +++ b/sql/aqo_learn.sql @@ -1,3 +1,6 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + -- The function just copied from stats_ext.sql create function check_estimated_rows(text) returns table (estimated int, actual int) language plpgsql as @@ -39,9 +42,6 @@ AS ( CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; - SET aqo.mode = 'intelligent'; EXPLAIN SELECT * FROM aqo_test0 @@ -124,7 +124,7 @@ WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; SELECT count(*) FROM tmp1; -- Remove data on some unneeded instances of tmp1 table. -SELECT * FROM aqo_cleanup(); +SELECT true AS success FROM aqo_cleanup(); -- Result of the query below should be empty SELECT * FROM aqo_query_texts aqt1, aqo_query_texts aqt2 @@ -314,7 +314,4 @@ DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; --- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT 1 FROM aqo_reset(); - DROP EXTENSION aqo; diff --git a/sql/clean_aqo_data.sql b/sql/clean_aqo_data.sql index d2abeb93..3c504bdb 100644 --- a/sql/clean_aqo_data.sql +++ b/sql/clean_aqo_data.sql @@ -1,5 +1,6 @@ -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + SET aqo.mode = 'learn'; DROP TABLE IF EXISTS a; @@ -7,7 +8,7 @@ DROP TABLE IF EXISTS b; CREATE TABLE a(); SELECT * FROM a; SELECT 'a'::regclass::oid AS a_oid \gset -SELECT true FROM aqo_cleanup(); +SELECT true AS success FROM aqo_cleanup(); /* * lines with a_oid in aqo_data, @@ -27,7 +28,7 @@ SELECT count(*) FROM aqo_query_stat WHERE aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); DROP TABLE a; -SELECT true FROM aqo_cleanup(); +SELECT true AS success FROM aqo_cleanup(); /* * lines with a_oid in aqo_data, @@ -79,7 +80,7 @@ SELECT count(*) FROM aqo_query_stat WHERE aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); DROP TABLE a; -SELECT true FROM aqo_cleanup(); +SELECT true AS success FROM aqo_cleanup(); /* * lines corresponding to a_oid and both a_oid's fs deleted in aqo_data, @@ -115,7 +116,7 @@ SELECT count(*) FROM aqo_query_stat WHERE aqo_queries.fs = aqo_queries.queryid); DROP TABLE b; -SELECT true FROM aqo_cleanup(); +SELECT true AS success FROM aqo_cleanup(); -- lines corresponding to b_oid in theese tables deleted SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); @@ -131,4 +132,4 @@ SELECT count(*) FROM aqo_query_stat WHERE aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fs = aqo_queries.queryid); -DROP EXTENSION aqo; \ No newline at end of file +DROP EXTENSION aqo; diff --git a/sql/feature_subspace.sql b/sql/feature_subspace.sql index 0176a700..c9463d55 100644 --- a/sql/feature_subspace.sql +++ b/sql/feature_subspace.sql @@ -1,9 +1,9 @@ -- This test related to some issues on feature subspace calculation -CREATE EXTENSION aqo; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); SET aqo.mode = 'learn'; -SET aqo.join_threshold = 0; SET aqo.show_details = 'on'; CREATE TABLE a AS (SELECT gs AS x FROM generate_series(1,10) AS gs); @@ -41,5 +41,5 @@ JOIN aqo_data d2 ON (d1.fs <> d2.fs AND d1.fss = d2.fss) WHERE 'a'::regclass = ANY (d1.oids) AND 'b'::regclass = ANY (d1.oids) order by target; DROP TABLE a,b CASCADE; -SELECT true FROM aqo_reset(); + DROP EXTENSION aqo; diff --git a/sql/forced_stat_collection.sql b/sql/forced_stat_collection.sql index d9fac51a..cf3990fc 100644 --- a/sql/forced_stat_collection.sql +++ b/sql/forced_stat_collection.sql @@ -1,6 +1,8 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + \set citizens 1000 -SET aqo.join_threshold = 0; SET aqo.mode = 'disabled'; SET aqo.force_collect_stat = 'off'; @@ -23,7 +25,6 @@ INSERT INTO person (id,age,gender,passport) FROM (SELECT *, 14+(id % 60) AS age FROM generate_series(1, :citizens) id) AS q1 ); -CREATE EXTENSION aqo; SET aqo.force_collect_stat = 'on'; SELECT count(*) FROM person WHERE age<18; @@ -46,5 +47,5 @@ ORDER BY (cardinality_error_without_aqo); SELECT query_text FROM aqo_query_texts ORDER BY (md5(query_text)); DROP TABLE person; -SELECT 1 FROM aqo_reset(); -- Full remove of ML data before the end + DROP EXTENSION aqo; diff --git a/sql/gucs.sql b/sql/gucs.sql index f3772883..d23d7214 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -1,4 +1,6 @@ -CREATE EXTENSION aqo; +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); -- Utility tool. Allow to filter system-dependent strings from an explain output. CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ @@ -9,7 +11,6 @@ BEGIN END; $$ LANGUAGE PLPGSQL; -SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = true; @@ -17,7 +18,7 @@ CREATE TABLE t(x int); INSERT INTO t (x) (SELECT * FROM generate_series(1, 100) AS gs); ANALYZE t; -SELECT true FROM aqo_reset(); -- Remember! DROP EXTENSION doesn't remove any AQO data gathered. +SELECT true AS success FROM aqo_reset(); -- Check AQO addons to explain (the only stable data) SELECT regexp_replace( str,'Query Identifier: -?\m\d+\M','Query Identifier: N','g') as str FROM expln(' @@ -46,7 +47,7 @@ SELECT obj_description('aqo_reset'::regproc::oid); -- Check stat reset SELECT count(*) FROM aqo_query_stat; -SELECT true FROM aqo_reset(); -- Remove one record from all tables +SELECT true AS success FROM aqo_reset(); SELECT count(*) FROM aqo_query_stat; DROP EXTENSION aqo; diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index b5e1f671..c9e59249 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -1,6 +1,9 @@ -CREATE EXTENSION aqo; -SELECT true FROM aqo_reset(); -SET aqo.join_threshold = 0; +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + +SET aqo.wide_search = 'on'; + SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; set aqo.show_hash = 'off'; @@ -136,9 +139,10 @@ FROM expln(' SELECT * FROM (A LEFT JOIN B ON A.x1 = B.y1) sc left join C on sc.x1=C.z1;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; -SELECT 1 FROM aqo_reset(); +RESET aqo.wide_search; +DROP EXTENSION aqo CASCADE; + DROP TABLE a; DROP TABLE b; DROP TABLE c; DROP FUNCTION expln; -DROP EXTENSION aqo CASCADE; diff --git a/sql/parallel_workers.sql b/sql/parallel_workers.sql index b544cf19..2cd04bc2 100644 --- a/sql/parallel_workers.sql +++ b/sql/parallel_workers.sql @@ -1,7 +1,8 @@ -- Specifically test AQO machinery for queries uses partial paths and executed -- with parallel workers. -CREATE EXTENSION aqo; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); -- Utility tool. Allow to filter system-dependent strings from explain output. CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ @@ -12,7 +13,6 @@ BEGIN END; $$ LANGUAGE PLPGSQL; -SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = true; @@ -52,7 +52,6 @@ WHERE q1.id = q2.id;') AS str WHERE str NOT LIKE '%Workers%' AND str NOT LIKE '%Sort Method%' AND str NOT LIKE '%Gather Merge%'; - RESET parallel_tuple_cost; RESET parallel_setup_cost; RESET max_parallel_workers; diff --git a/sql/plancache.sql b/sql/plancache.sql index c9aabae7..b2d1c6d6 100644 --- a/sql/plancache.sql +++ b/sql/plancache.sql @@ -1,7 +1,8 @@ -- Tests on interaction of AQO with cached plans. -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + SET aqo.mode = 'intelligent'; SET aqo.show_details = 'on'; SET aqo.show_hash = 'off'; @@ -44,5 +45,5 @@ SELECT * FROM f1(); DROP FUNCTION f1; DROP TABLE test CASCADE; -SELECT true FROM aqo_reset(); + DROP EXTENSION aqo; diff --git a/sql/relocatable.sql b/sql/relocatable.sql index 780c385e..adf20983 100644 --- a/sql/relocatable.sql +++ b/sql/relocatable.sql @@ -1,5 +1,6 @@ -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + SET aqo.mode = 'learn'; -- use this mode for unconditional learning CREATE TABLE test AS (SELECT id, 'payload' || id FROM generate_series(1,100) id); diff --git a/sql/schema.sql b/sql/schema.sql index 6f5f4454..28185710 100644 --- a/sql/schema.sql +++ b/sql/schema.sql @@ -1,4 +1,3 @@ -DROP EXTENSION IF EXISTS aqo CASCADE; DROP SCHEMA IF EXISTS test CASCADE; -- Check Zero-schema path behaviour @@ -11,7 +10,7 @@ CREATE EXTENSION aqo; -- fail CREATE SCHEMA IF NOT EXISTS test1; SET search_path TO test1, public; CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; +SELECT true AS success FROM aqo_reset(); SET aqo.mode = 'intelligent'; CREATE TABLE test (id SERIAL, data TEXT); diff --git a/sql/statement_timeout.sql b/sql/statement_timeout.sql index b0ebb6ba..43dab39e 100644 --- a/sql/statement_timeout.sql +++ b/sql/statement_timeout.sql @@ -18,56 +18,58 @@ BEGIN END LOOP; END; $$; +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + CREATE TABLE t AS SELECT * FROM generate_series(1,50) AS x; ANALYZE t; DELETE FROM t WHERE x > 5; -- Force optimizer to make overestimated prediction. -CREATE EXTENSION IF NOT EXISTS aqo; -SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'off'; SET aqo.learn_statement_timeout = 'on'; -SET statement_timeout = 100; -- [0.1s] +SET statement_timeout = 80; -- [0.1s] SELECT *, pg_sleep(0.1) FROM t; -SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); -- haven't any partial data +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -- haven't any partial data -- Don't learn because running node has smaller cardinality than an optimizer prediction -SET statement_timeout = 400; +SET statement_timeout = 350; SELECT *, pg_sleep(0.1) FROM t; -SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -- We have a real learning data. -SET statement_timeout = 8000; +SET statement_timeout = 800; SELECT *, pg_sleep(0.1) FROM t; -SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -- Force to make an underestimated prediction DELETE FROM t WHERE x > 2; ANALYZE t; INSERT INTO t (x) (SELECT * FROM generate_series(3,5) AS x); -SELECT 1 FROM aqo_reset(); +SELECT true AS success FROM aqo_reset(); -SET statement_timeout = 100; +SET statement_timeout = 80; SELECT *, pg_sleep(0.1) FROM t; -- Not learned -SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -SET statement_timeout = 500; +SET statement_timeout = 350; SELECT *, pg_sleep(0.1) FROM t; -- Learn! -SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -SET statement_timeout = 800; +SET statement_timeout = 550; SELECT *, pg_sleep(0.1) FROM t; -- Get reliable data -SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -- Interrupted query should immediately appear in aqo_data -SELECT 1 FROM aqo_reset(); +SELECT true AS success FROM aqo_reset(); SET statement_timeout = 500; SELECT count(*) FROM aqo_data; -- Must be zero SELECT x, pg_sleep(0.1) FROM t WHERE x > 0; SELECT count(*) FROM aqo_data; -- Must be one -SELECT 1 FROM aqo_reset(); +SELECT true AS success FROM aqo_reset(); DROP TABLE t; DROP EXTENSION aqo; DROP FUNCTION check_estimated_rows; diff --git a/sql/temp_tables.sql b/sql/temp_tables.sql index aba78aba..e7bc8fe5 100644 --- a/sql/temp_tables.sql +++ b/sql/temp_tables.sql @@ -1,5 +1,8 @@ -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + +SET aqo.wide_search = 'on'; SET aqo.mode = 'learn'; CREATE TEMP TABLE tt(); @@ -17,10 +20,10 @@ SELECT count(*) FROM pt AS pt1, tt AS tt1, tt AS tt2, pt AS pt2; SELECT count(*) FROM aqo_data; -- Don't bother about false negatives because of trivial query plans DROP TABLE tt; -SELECT * FROM aqo_cleanup(); +SELECT true AS success FROM aqo_cleanup(); SELECT count(*) FROM aqo_data; -- Should return the same as previous call above DROP TABLE pt; -SELECT * FROM aqo_cleanup(); +SELECT true AS success FROM aqo_cleanup(); SELECT count(*) FROM aqo_data; -- Should be 0 SELECT query_text FROM aqo_queries aq LEFT JOIN aqo_query_texts aqt ON aq.queryid = aqt.queryid @@ -67,7 +70,7 @@ SELECT * FROM check_estimated_rows(' SET aqo.mode = 'forced'; -- Now we use all fss records for each query DROP TABLE pt; -SELECT * FROM aqo_cleanup(); +SELECT true AS success FROM aqo_cleanup(); CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; CREATE TEMP TABLE ttd1 AS SELECT -(x*3) AS x, (x % 9) AS y1 FROM generate_series(1,100) AS x; @@ -91,7 +94,9 @@ SELECT * FROM check_estimated_rows(' SELECT pt.x, avg(pt.y) FROM pt,ttd1 WHERE pt.x = ttd1.x GROUP BY (pt.x); '); -- Don't use AQO for temp table because of different attname +-- Clear common parts of AQO state +RESET aqo.wide_search; +DROP EXTENSION aqo CASCADE; + DROP TABLE pt CASCADE; -SELECT 1 FROM aqo_reset(); -DROP EXTENSION aqo; DROP FUNCTION check_estimated_rows; diff --git a/sql/top_queries.sql b/sql/top_queries.sql index da3817a0..76000ac4 100755 --- a/sql/top_queries.sql +++ b/sql/top_queries.sql @@ -1,5 +1,7 @@ -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + SET aqo.mode = 'disabled'; SET aqo.force_collect_stat = 'on'; @@ -51,5 +53,4 @@ FROM aqo_cardinality_error(false) ce, aqo_query_texts aqt WHERE ce.id = aqt.queryid ORDER BY (md5(query_text)); -SELECT 1 FROM aqo_reset(); DROP EXTENSION aqo; diff --git a/sql/unsupported.sql b/sql/unsupported.sql index 808a19e1..8b36d721 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -1,4 +1,5 @@ -CREATE EXTENSION aqo; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); -- Utility tool. Allow to filter system-dependent strings from an explain output. CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ @@ -9,7 +10,6 @@ BEGIN END; $$ LANGUAGE PLPGSQL; -SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; @@ -182,7 +182,7 @@ ORDER BY (md5(query_text),error) DESC; DROP TABLE t,t1 CASCADE; -- delete all tables used in the test SELECT count(*) FROM aqo_data; -- Just to detect some changes in the logic. May some false positives really bother us here? -SELECT * FROM aqo_cleanup(); +SELECT true AS success FROM aqo_cleanup(); SELECT count(*) FROM aqo_data; -- No one row should be returned -- Look for any remaining queries in the ML storage. @@ -191,5 +191,4 @@ FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt WHERE aqt.queryid = cef.id ORDER BY (md5(query_text),error) DESC; -SELECT 1 FROM aqo_reset(); DROP EXTENSION aqo; diff --git a/sql/update_functions.sql b/sql/update_functions.sql index 84add94a..e2773978 100644 --- a/sql/update_functions.sql +++ b/sql/update_functions.sql @@ -1,3 +1,7 @@ +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + CREATE TABLE aqo_test1(a int, b int); WITH RECURSIVE t(a, b) AS ( @@ -18,9 +22,6 @@ AS ( CREATE INDEX aqo_test2_idx_a ON aqo_test2 (a); ANALYZE aqo_test2; -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; - SET aqo.mode='intelligent'; SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; @@ -61,7 +62,7 @@ CREATE TABLE aqo_queries_dump AS SELECT * FROM aqo_queries; CREATE TABLE aqo_query_stat_dump AS SELECT * FROM aqo_query_stat; CREATE TABLE aqo_data_dump AS SELECT * FROM aqo_data; -SELECT 1 FROM aqo_reset(); +SELECT true AS success FROM aqo_reset(); -- -- aqo_query_texts_update() testing. @@ -202,8 +203,8 @@ SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1}', '{1, 1}', '{1, 2, 3}'); SELECT aqo_data_update(1, 1, 1, '{{1}, {2}}', '{1}', '{1}', '{1, 2, 3}'); SET aqo.mode='disabled'; -SELECT 1 FROM aqo_reset(); -DROP EXTENSION aqo; + +DROP EXTENSION aqo CASCADE; DROP TABLE aqo_test1, aqo_test2; DROP TABLE aqo_query_texts_dump, aqo_queries_dump, aqo_query_stat_dump, aqo_data_dump; diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index 2374d83d..cb6b76de 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -20,6 +20,9 @@ my $CLIENTS = 10; my $THREADS = 10; +# Disable connection default settings, forced by PGOPTIONS in AQO Makefile +$ENV{PGOPTIONS}=""; + # Change pgbench parameters according to the environment variable. if (defined $ENV{TRANSACTIONS}) { diff --git a/t/002_pg_stat_statements_aqo.pl b/t/002_pg_stat_statements_aqo.pl index dfa84b3a..ae87efeb 100644 --- a/t/002_pg_stat_statements_aqo.pl +++ b/t/002_pg_stat_statements_aqo.pl @@ -23,6 +23,9 @@ my $THREADS = 10; my $query_id; +# Disable connection default settings, forced by PGOPTIONS in AQO Makefile +$ENV{PGOPTIONS}=""; + # General purpose variables. my $res; my $total_classes; From a14bf6dc68a9914202316da8a4458e6f6a84a989 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Sun, 19 Feb 2023 16:37:38 +0600 Subject: [PATCH 173/203] Generalize basic CI script reviewed-by: a.rybakina --- .github/workflows/c-cpp.yml | 82 +++++++++++++++++++++++-------------- 1 file changed, 52 insertions(+), 30 deletions(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index ed3d1fea..0123a181 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -1,10 +1,7 @@ -name: 'C/C++ CI for the stable13' +name: 'AQO basic CI' on: - push: - branches: [ stable13 ] pull_request: - branches: [ stable13 ] env: BRANCH_NAME: ${{ github.head_ref || github.ref_name }} @@ -15,45 +12,70 @@ jobs: runs-on: ubuntu-latest steps: - - name: pg + - uses: actions/checkout@v3 + - name: "Define PostreSQL major version" run: | - sudo apt install libipc-run-perl + echo "$(ls -la)" + patch_name=$(ls aqo_*.patch|tail -1) + echo "CORE_PATCH_NAME=$patch_name" >> $GITHUB_ENV + + # we can get number, otherwise set up master + vers_number=$(echo "$patch_name"|tr -d -c 0-9) + echo "PG_MAJOR_VERSION=$vers_number" >> $GITHUB_ENV + + branch_name="REL_${vers_number}_STABLE" + echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV + + echo "COPT=-Werror" >> $GITHUB_ENV + echo "CONFIGURE_OPTS=--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" >> $GITHUB_ENV + + - name: "Set master branch name, if needed" + if: env.PG_MAJOR_VERSION == '' + run: | + branch_name="master" + echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV - echo "Deploying to production server on branch" $BRANCH_NAME + - name: "Environment (debug output)" + if: ${{ always() }} + run: | + echo "Use PostgreSQL branch $PG_BRANCH (patch: $CORE_PATCH_NAME)" + echo "COPT: $COPT" + echo "CONFIGURE_OPTS: $CONFIGURE_OPTS" + echo "Deploying to production server on branch" $BRANCH_NAME "(PG $PG_BRANCH)" git config --global user.email "ci@postgrespro.ru" git config --global user.name "CI PgPro admin" - export COPT=-Werror - export CONFIGURE_OPTS="--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" - git clone https://github.com/postgres/postgres.git pg - cd pg - - git checkout REL_13_STABLE - git clone https://github.com/postgrespro/aqo.git contrib/aqo - git -C contrib/aqo checkout $BRANCH_NAME - patch -p1 --no-backup-if-mismatch < contrib/aqo/aqo_pg13.patch - ./configure $CONFIGURE_OPTS CFLAGS="-O2" + + - name: "Prepare PG directory" + run: | + git clone -b $PG_BRANCH --depth=1 --single-branch https://github.com/postgres/postgres.git $GITHUB_WORKSPACE/../pg + cd $GITHUB_WORKSPACE/../pg + ls -la + cp -r ../aqo contrib/aqo + patch -p1 --no-backup-if-mismatch < contrib/aqo/$CORE_PATCH_NAME + + - name: "make check" + run: | + sudo apt install libipc-run-perl + + cd $GITHUB_WORKSPACE/../pg + ./configure $CONFIGURE_OPTS CFLAGS="-O2" > /dev/null make -j4 > /dev/null && make -j4 -C contrib > /dev/null env CLIENTS=50 THREADS=50 make -C contrib/aqo check echo "Use AQO with debug code included" git clean -fdx git -C contrib/aqo clean -fdx - ./configure $CONFIGURE_OPTS CFLAGS="-DAQO_DEBUG_PRINT -O0" + ./configure $CONFIGURE_OPTS CFLAGS="-DAQO_DEBUG_PRINT -O0" > /dev/null make -j4 > /dev/null && make -j4 -C contrib > /dev/null env CLIENTS=50 THREADS=50 make -C contrib/aqo check - - name: Archive regression.diffs - if: ${{ always() }} - uses: actions/upload-artifact@v3 - with: - name: regression_diffs - path: /home/runner/work/aqo/aqo/pg/contrib/aqo/regression.diffs - retention-days: 1 - - name: Archive TAP tests log files + + - name: Archive artifacts if: ${{ always() }} uses: actions/upload-artifact@v3 with: - name: tap_logs + name: make_check_logs path: | - log - retention-days: 1 - + /home/runner/work/aqo/pg/contrib/aqo/regression.diffs + /home/runner/work/aqo/pg/contrib/aqo/log + /home/runner/work/aqo/pg/contrib/aqo/tmp_check/log + retention-days: 7 From 5addf94b5ab0414614d35b4003ab0300e17986a8 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 1 Feb 2023 09:32:08 +0500 Subject: [PATCH 174/203] Add couple of github actions flows on each push event: - run make installcheck over an instance in different modes. - run JOB benchmark [1] on a self hosted runner. Utility scripts stores in the .github folder. Branch name is a key to define the name of suitable PostgreSQL core branch: use "stable[XX]" phrase in the name of git branch to trigger compiling and launch of this commit with REL_[XX]_STABLE branch of the core. If the branch name doesn't contain such a phrase, use master branch. TODO: ===== 1. Add 'long' JOB test (parallel strategy disabled). 2. Add JOB test which would be executed up to full convergency of learning on each query. 3. Add installchecks with reusage of existed database and the AQO extension installed (sanity checks will be definitely broken but still). 4. Additional queries [2] can be a marker for successful learning. [1] https://github.com/danolivo/jo-bench [2] https://github.com/RyanMarcus/imdb_pg_dataset --- .github/scripts/job/aqo_instance_launch.sh | 47 ++++++ .github/scripts/job/check_result.sh | 15 ++ .github/scripts/job/dump_knowledge.sh | 17 ++ .github/scripts/job/job_pass.sh | 58 +++++++ .github/scripts/job/load_imdb.sh | 5 + .github/scripts/job/set_test_conditions_1.sh | 41 +++++ .github/scripts/job/set_test_conditions_2.sh | 42 +++++ .github/scripts/job/set_test_conditions_3.sh | 42 +++++ .github/workflows/installchecks.yml | 153 ++++++++++++++++++ .github/workflows/job.yml | 157 +++++++++++++++++++ 10 files changed, 577 insertions(+) create mode 100755 .github/scripts/job/aqo_instance_launch.sh create mode 100755 .github/scripts/job/check_result.sh create mode 100755 .github/scripts/job/dump_knowledge.sh create mode 100755 .github/scripts/job/job_pass.sh create mode 100755 .github/scripts/job/load_imdb.sh create mode 100755 .github/scripts/job/set_test_conditions_1.sh create mode 100755 .github/scripts/job/set_test_conditions_2.sh create mode 100755 .github/scripts/job/set_test_conditions_3.sh create mode 100644 .github/workflows/installchecks.yml create mode 100644 .github/workflows/job.yml diff --git a/.github/scripts/job/aqo_instance_launch.sh b/.github/scripts/job/aqo_instance_launch.sh new file mode 100755 index 00000000..f43d6b8e --- /dev/null +++ b/.github/scripts/job/aqo_instance_launch.sh @@ -0,0 +1,47 @@ +#!/bin/bash +ulimit -c unlimited + +# Kill all orphan processes +pkill -U `whoami` -9 -e postgres +pkill -U `whoami` -9 -e pgbench +pkill -U `whoami` -9 -e psql + +sleep 1 + +M=`pwd`/PGDATA +U=`whoami` + +rm -rf $M || true +mkdir $M +rm -rf logfile.log || true + +export LC_ALL=C +export LANGUAGE="en_US:en" +initdb -D $M --locale=C + +# PG Version-specific settings +ver=$(pg_ctl -V | egrep -o "[0-9]." | head -1) +echo "PostgreSQL version: $ver" +if [ $ver -gt 13 ] +then + echo "compute_query_id = 'regress'" >> $M/postgresql.conf +fi + +# Speed up the 'Join Order Benchmark' test +echo "shared_buffers = 1GB" >> $M/postgresql.conf +echo "work_mem = 128MB" >> $M/postgresql.conf +echo "fsync = off" >> $M/postgresql.conf +echo "autovacuum = 'off'" >> $M/postgresql.conf + +# AQO preferences +echo "shared_preload_libraries = 'aqo, pg_stat_statements'" >> $M/postgresql.conf +echo "aqo.mode = 'disabled'" >> $M/postgresql.conf +echo "aqo.join_threshold = 0" >> $M/postgresql.conf +echo "aqo.force_collect_stat = 'off'" >> $M/postgresql.conf +echo "aqo.fs_max_items = 10000" >> $M/postgresql.conf +echo "aqo.fss_max_items = 20000" >> $M/postgresql.conf + +pg_ctl -w -D $M -l logfile.log start +createdb $U +psql -c "CREATE EXTENSION aqo;" +psql -c "CREATE EXTENSION pg_stat_statements" diff --git a/.github/scripts/job/check_result.sh b/.github/scripts/job/check_result.sh new file mode 100755 index 00000000..ab194cfc --- /dev/null +++ b/.github/scripts/job/check_result.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +# ############################################################################## +# +# +# ############################################################################## + +# Show error delta (Negative result is a signal of possible issue) +result=$(psql -t -c "SELECT count(*) FROM aqo_cardinality_error(true) c JOIN aqo_cardinality_error(false) o USING (id) WHERE (o.error - c.error) < 0") + +if [ $result -gt 0 ]; then + exit 1; +fi + +exit 0; diff --git a/.github/scripts/job/dump_knowledge.sh b/.github/scripts/job/dump_knowledge.sh new file mode 100755 index 00000000..c5cb9736 --- /dev/null +++ b/.github/scripts/job/dump_knowledge.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +# ############################################################################## +# +# Make dump of a knowledge base +# +# ############################################################################## + +psql -c "CREATE TABLE aqo_data_dump AS SELECT * FROM aqo_data;" +psql -c "CREATE TABLE aqo_queries_dump AS SELECT * FROM aqo_queries;" +psql -c "CREATE TABLE aqo_query_texts_dump AS SELECT * FROM aqo_query_texts;" +psql -c "CREATE TABLE aqo_query_stat_dump AS SELECT * FROM aqo_query_stat;" + +pg_dump --table='aqo*' -f knowledge_base.dump $PGDATABASE + +psql -c "DROP TABLE aqo_data_dump, aqo_queries_dump, aqo_query_texts_dump, aqo_query_stat_dump" + diff --git a/.github/scripts/job/job_pass.sh b/.github/scripts/job/job_pass.sh new file mode 100755 index 00000000..1ad62fbd --- /dev/null +++ b/.github/scripts/job/job_pass.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +# ############################################################################## +# +# Pass each JOB query over the DBMS instance. Use $1 to specify a number of +# iterations, if needed. +# +# Results: +# - explains.txt - explain of each query +# - job_onepass_aqo_stat.dat - short report on execution time +# - knowledge_base.dump - dump of the AQO knowledge base +# +# ############################################################################## + +echo "The Join Order Benchmark 1Pass" +echo -e "Query Number\tITER\tQuery Name\tExecution Time, ms" > report.txt +echo -e "Clear a file with explains" > explains.txt + +if [ $# -eq 0 ] +then + ITERS=1 +else + ITERS=$1 +fi + +echo "Execute JOB with the $ITERS iterations" + +filenum=1 +for file in $JOB_DIR/queries/*.sql +do + # Get filename + short_file=$(basename "$file") + + echo -n "EXPLAIN (ANALYZE, VERBOSE, FORMAT JSON) " > test.sql + cat $file >> test.sql + + for (( i=1; i<=$ITERS; i++ )) + do + result=$(psql -f test.sql) + echo -e $result >> explains.txt + exec_time=$(echo $result | sed -n 's/.*"Execution Time": \([0-9]*\.[0-9]*\).*/\1/p') + echo -e "$filenum\t$short_file\t$i\t$exec_time" >> report.txt + echo -e "$filenum\t$i\t$short_file\t$exec_time" + done +filenum=$((filenum+1)) +done + +# Show total optimizer error in the test +psql -c "SELECT sum(error) AS total_error FROM aqo_cardinality_error(false)" +psql -c "SELECT sum(error) AS total_error_aqo FROM aqo_cardinality_error(true)" + +# Show error delta (Negative result is a signal of possible issue) +psql -c " +SELECT id, (o.error - c.error) AS errdelta + FROM aqo_cardinality_error(true) c JOIN aqo_cardinality_error(false) o + USING (id) +" + diff --git a/.github/scripts/job/load_imdb.sh b/.github/scripts/job/load_imdb.sh new file mode 100755 index 00000000..3cb44fb2 --- /dev/null +++ b/.github/scripts/job/load_imdb.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +psql -f $JOB_DIR/schema.sql +psql -vdatadir="'$JOB_DIR'" -f $JOB_DIR/copy.sql + diff --git a/.github/scripts/job/set_test_conditions_1.sh b/.github/scripts/job/set_test_conditions_1.sh new file mode 100755 index 00000000..2140893d --- /dev/null +++ b/.github/scripts/job/set_test_conditions_1.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +# ############################################################################## +# +# Test conditions No.1: Quick pass in 'disabled' mode with statistics and +# forced usage of a bunch of parallel workers. +# +# - Disabled mode with a stat gathering and AQO details in explain +# - Force usage of parallel workers aggressively +# - Enable pg_stat_statements statistics +# +# ############################################################################## + +# AQO specific settings +psql -c "ALTER SYSTEM SET aqo.mode = 'disabled'" +psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" +psql -c "ALTER SYSTEM SET aqo.show_details = 'on'" +psql -c "ALTER SYSTEM SET aqo.show_hash = 'on'" + +# Core settings: force parallel workers +psql -c "ALTER SYSTEM SET max_parallel_workers_per_gather = 16" +psql -c "ALTER SYSTEM SET force_parallel_mode = 'on'" +psql -c "ALTER SYSTEM SET from_collapse_limit = 20" +psql -c "ALTER SYSTEM SET join_collapse_limit = 20" +psql -c "ALTER SYSTEM SET parallel_setup_cost = 1.0" +psql -c "ALTER SYSTEM SET parallel_tuple_cost = 0.00001" +psql -c "ALTER SYSTEM SET min_parallel_table_scan_size = 0" +psql -c "ALTER SYSTEM SET min_parallel_index_scan_size = 0" + +# pg_stat_statements +psql -c "ALTER SYSTEM SET pg_stat_statements.track = 'all'" +psql -c "ALTER SYSTEM SET pg_stat_statements.track_planning = 'on'" + +psql -c "SELECT pg_reload_conf();" + +# Enable all previously executed queries which could be disabled +psql -c " + SELECT count(*) FROM aqo_queries, LATERAL aqo_disable_class(queryid) + WHERE queryid <> 0 +" + diff --git a/.github/scripts/job/set_test_conditions_2.sh b/.github/scripts/job/set_test_conditions_2.sh new file mode 100755 index 00000000..609b9624 --- /dev/null +++ b/.github/scripts/job/set_test_conditions_2.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +# ############################################################################## +# +# Test conditions No.2: Learn mode with forced parallel workers +# +# - Disabled mode with a stat gathering and AQO details in explain +# - Force usage of parallel workers aggressively +# - Enable pg_stat_statements statistics +# +# ############################################################################## + +# AQO specific settings +psql -c "ALTER SYSTEM SET aqo.mode = 'learn'" +psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'off'" +psql -c "ALTER SYSTEM SET aqo.show_details = 'on'" +psql -c "ALTER SYSTEM SET aqo.show_hash = 'on'" +psql -c "ALTER SYSTEM SET aqo.join_threshold = 0" +psql -c "ALTER SYSTEM SET aqo.wide_search = 'off'" + +# Core settings: force parallel workers +psql -c "ALTER SYSTEM SET max_parallel_workers_per_gather = 16" +psql -c "ALTER SYSTEM SET force_parallel_mode = 'on'" +psql -c "ALTER SYSTEM SET from_collapse_limit = 20" +psql -c "ALTER SYSTEM SET join_collapse_limit = 20" +psql -c "ALTER SYSTEM SET parallel_setup_cost = 1.0" +psql -c "ALTER SYSTEM SET parallel_tuple_cost = 0.00001" +psql -c "ALTER SYSTEM SET min_parallel_table_scan_size = 0" +psql -c "ALTER SYSTEM SET min_parallel_index_scan_size = 0" + +# pg_stat_statements +psql -c "ALTER SYSTEM SET pg_stat_statements.track = 'all'" +psql -c "ALTER SYSTEM SET pg_stat_statements.track_planning = 'on'" + +psql -c "SELECT pg_reload_conf();" + +# Enable all previously executed queries which could be disabled +psql -c " + SELECT count(*) FROM aqo_queries, LATERAL aqo_enable_class(queryid) + WHERE queryid <> 0 +" + diff --git a/.github/scripts/job/set_test_conditions_3.sh b/.github/scripts/job/set_test_conditions_3.sh new file mode 100755 index 00000000..00f4dbf3 --- /dev/null +++ b/.github/scripts/job/set_test_conditions_3.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +# ############################################################################## +# +# Test conditions No.3: Freeze ML base and forced parallel workers +# +# - Disabled mode with a stat gathering and AQO details in explain +# - Force usage of parallel workers aggressively +# - Enable pg_stat_statements statistics +# +# ############################################################################## + +# AQO specific settings +psql -c "ALTER SYSTEM SET aqo.mode = 'frozen'" +psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'off'" +psql -c "ALTER SYSTEM SET aqo.show_details = 'on'" +psql -c "ALTER SYSTEM SET aqo.show_hash = 'on'" +psql -c "ALTER SYSTEM SET aqo.join_threshold = 0" +psql -c "ALTER SYSTEM SET aqo.wide_search = 'off'" + +# Core settings: force parallel workers +psql -c "ALTER SYSTEM SET max_parallel_workers_per_gather = 16" +psql -c "ALTER SYSTEM SET force_parallel_mode = 'on'" +psql -c "ALTER SYSTEM SET from_collapse_limit = 20" +psql -c "ALTER SYSTEM SET join_collapse_limit = 20" +psql -c "ALTER SYSTEM SET parallel_setup_cost = 1.0" +psql -c "ALTER SYSTEM SET parallel_tuple_cost = 0.00001" +psql -c "ALTER SYSTEM SET min_parallel_table_scan_size = 0" +psql -c "ALTER SYSTEM SET min_parallel_index_scan_size = 0" + +# pg_stat_statements +psql -c "ALTER SYSTEM SET pg_stat_statements.track = 'all'" +psql -c "ALTER SYSTEM SET pg_stat_statements.track_planning = 'on'" + +psql -c "SELECT pg_reload_conf();" + +# Enable all previously executed queries which could be disabled +psql -c " + SELECT count(*) FROM aqo_queries, LATERAL aqo_enable_class(queryid) + WHERE queryid <> 0 +" + diff --git a/.github/workflows/installchecks.yml b/.github/workflows/installchecks.yml new file mode 100644 index 00000000..aeb976e4 --- /dev/null +++ b/.github/workflows/installchecks.yml @@ -0,0 +1,153 @@ +name: "InstallChecks" + +on: + push: + +env: + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + + # Set major PostgreSQL version for all underlying steps + - name: "Extract Postgres major version number" + run: | + PG_MAJOR_VERSION=$(echo "$BRANCH_NAME" | grep --only-matching 'stable[0-9].' | grep --only-matching '[0-9].') + + # Declare PG_MAJOR_VERSION as a environment variable + echo "PG_MAJOR_VERSION=$PG_MAJOR_VERSION" >> $GITHUB_ENV + echo "CORE_BRANCH_NAME=REL_${PG_MAJOR_VERSION}_STABLE" >> $GITHUB_ENV + echo "AQO_PATCH_NAME=aqo_pg$PG_MAJOR_VERSION.patch" >> $GITHUB_ENV + - name: "Set proper names for the master case" + if: env.PG_MAJOR_VERSION == '' + run: | + echo "PG_MAJOR_VERSION=master" >> $GITHUB_ENV + echo "CORE_BRANCH_NAME=master" >> $GITHUB_ENV + echo "AQO_PATCH_NAME=aqo_master.patch" >> $GITHUB_ENV + + - name: "Preparations" + run: | + sudo apt install libipc-run-perl libxml2-utils libxml2-dev xsltproc libxslt1-dev + + echo "Deploying to production server on branch" $BRANCH_NAME + git config --global user.email "ci@postgrespro.ru" + git config --global user.name "CI PgPro admin" + git clone https://github.com/postgres/postgres.git pg + cd pg + git checkout $CORE_BRANCH_NAME + git clone https://github.com/postgrespro/aqo.git contrib/aqo + git -C contrib/aqo checkout $BRANCH_NAME + patch -p1 --no-backup-if-mismatch < contrib/aqo/$AQO_PATCH_NAME + COPT="-Werror" + CONFIGURE_OPTS="--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" + echo "CONFIGURE_OPTS=$CONFIGURE_OPTS" >> $GITHUB_ENV + echo "COPT=$COPT" >> $GITHUB_ENV + + - name: "Paths" + run: | + echo "$GITHUB_WORKSPACE/pg/contrib/aqo/.github/scripts/job" >> $GITHUB_PATH + ls -la pg/contrib/aqo/.github/scripts/job + echo "$GITHUB_WORKSPACE/pg/tmp_install/bin" >> $GITHUB_PATH + echo "LD_LIBRARY_PATH=$GITHUB_WORKSPACE/pg/tmp_install/lib" >> $GITHUB_ENV + echo "PGDATABASE=`whoami`" >> $GITHUB_ENV + echo "PGHOST=localhost" >> $GITHUB_ENV + echo "PGDATA=PGDATA" >> $GITHUB_ENV + echo "PGUSER=`whoami`" >> $GITHUB_ENV + echo "PGPORT=5432" >> $GITHUB_ENV + + - name: "Debug" + run: | + echo "paths: $PATH" + echo "PG_MAJOR_VERSION: $PG_MAJOR_VERSION, CORE_BRANCH_NAME: $CORE_BRANCH_NAME, AQO_PATCH_NAME: $AQO_PATCH_NAME, CONFIGURE_OPTS: $CONFIGURE_OPTS" + + - name: "Compilation" + run: | + cd pg + ./configure $CONFIGURE_OPTS CFLAGS="-O2" + make -j4 > /dev/null && make -j4 -C contrib > /dev/null + make install >> make.log && make -C contrib install > /dev/null + + - name: "Launch AQO instance" + run: | + cd pg + + # Launch an instance with AQO extension + aqo_instance_launch.sh + AQO_VERSION=$(psql -t -c "SELECT extversion FROM pg_extension WHERE extname='aqo'") + echo "AQO_VERSION=$AQO_VERSION" >> $GITHUB_ENV + echo "Use AQO v.$AQO_VERSION" + + # Pass installcheck in disabled mode + - name: installcheck_disabled + run: | + cd pg + psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'off'" + psql -c "SELECT pg_reload_conf()" + make installcheck-world + + - name: installcheck_disabled_forced_stat + run: | + cd pg + psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" + psql -c "SELECT pg_reload_conf()" + make installcheck-world + + - name: installcheck_frozen + run: | + cd pg + psql -c "ALTER SYSTEM SET aqo.mode = 'frozen'" + psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" + psql -c "SELECT pg_reload_conf()" + make installcheck-world + + - name: installcheck_controlled + run: | + cd pg + psql -c "ALTER SYSTEM SET aqo.mode = 'controlled'" + psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" + psql -c "SELECT pg_reload_conf()" + make installcheck-world + + - name: installcheck_learn + continue-on-error: true + run: | + cd pg + psql -c "ALTER SYSTEM SET aqo.mode = 'learn'" + psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" + psql -c "SELECT pg_reload_conf()" + learn_result=$(make -k installcheck-world) + + - name: installcheck_intelligent + continue-on-error: true + run: | + cd pg + psql -c "ALTER SYSTEM SET aqo.mode = 'intelligent'" + psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" + psql -c "SELECT pg_reload_conf()" + make -k installcheck-world + + - name: installcheck_forced + continue-on-error: true + run: | + cd pg + psql -c "ALTER SYSTEM SET aqo.mode = 'forced'" + psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" + psql -c "SELECT pg_reload_conf()" + make -k installcheck-world + + # Save Artifacts + - name: Archive artifacts + if: ${{ failure() }} + uses: actions/upload-artifact@v3 + with: + name: ${{ env.AQO_VERSION }}-${{ env.CORE_BRANCH_NAME }}-${{ env.BRANCH_NAME }}-artifacts + path: | + pg/src/test/regress/regression.diffs + pg/logfile.log + pg/contrib/aqo/tmp_check/log + retention-days: 2 + diff --git a/.github/workflows/job.yml b/.github/workflows/job.yml new file mode 100644 index 00000000..682f4b42 --- /dev/null +++ b/.github/workflows/job.yml @@ -0,0 +1,157 @@ +name: 'Join Order Benchmark' + +env: + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + +# Trigger the workflow on each push +on: push + +jobs: + AQO_Tests: + + runs-on: self-hosted + + steps: + - name: "Set common paths" + run: | + echo "$HOME/aqo/.github/scripts/job" >> $GITHUB_PATH + echo "JOB_DIR=$HOME/jo-bench" >> $GITHUB_ENV + + # PostgreSQL-related environment variables + echo "$GITHUB_WORKSPACE/pg/tmp_install/bin" >> $GITHUB_PATH + echo "LD_LIBRARY_PATH=$GITHUB_WORKSPACE/pg/tmp_install/lib" >> $GITHUB_ENV + echo "PGDATABASE=`whoami`" >> $GITHUB_ENV + echo "PGHOST=localhost" >> $GITHUB_ENV + echo "PGDATA=PGDATA" >> $GITHUB_ENV + echo "PGUSER=`whoami`" >> $GITHUB_ENV + echo "PGPORT=5432" >> $GITHUB_ENV + + # Set major PostgreSQL version for all underlying steps + - name: "Extract Postgres major version number" + run: | + PG_MAJOR_VERSION=$(echo "$BRANCH_NAME" | grep --only-matching 'stable[0-9].' | grep --only-matching '[0-9].') + + # Declare PG_MAJOR_VERSION as a environment variable + echo "PG_MAJOR_VERSION=$PG_MAJOR_VERSION" >> $GITHUB_ENV + echo "CORE_BRANCH_NAME=REL_${PG_MAJOR_VERSION}_STABLE" >> $GITHUB_ENV + echo "AQO_PATCH_NAME=aqo_pg$PG_MAJOR_VERSION.patch" >> $GITHUB_ENV + - name: "Set proper names for the master case" + if: env.PG_MAJOR_VERSION == '' + run: | + echo "PG_MAJOR_VERSION=master" >> $GITHUB_ENV + echo "CORE_BRANCH_NAME=master" >> $GITHUB_ENV + echo "AQO_PATCH_NAME=aqo_master.patch" >> $GITHUB_ENV + + # Just for debug + - name: "Print environment variables" + run: | + echo "Test data: $PG_MAJOR_VERSION; Core branch: $CORE_BRANCH_NAME, AQO patch: $AQO_PATCH_NAME" + echo "Paths: $PATH, JOB path: $JOB_DIR" + echo "PG Libs: $LD_LIBRARY_PATH" + echo "PG Environment: dbname: $PGDATABASE, host: $PGHOST, pgdata: $PGDATA, pguser: $PGUSER, pgport: $PGPORT" + + # Runner contains clone of postgres and AQO repositories. We must refresh them + - name: "Code pre-cleanup" + run: | + rm -rf pg + git -C ~/pg clean -fdx + git -C ~/pg pull + git -C ~/pg checkout $CORE_BRANCH_NAME + git -C ~/pg pull + + git -C ~/aqo clean -fdx + git -C ~/aqo pull + git -C ~/aqo checkout $BRANCH_NAME + git -C ~/aqo pull + + # Copy the codes into test folder, arrange code versions and do the patching + - name: "Prepare code directory" + run: | + cp -r ~/pg pg + cd pg + cp -r ~/aqo contrib/aqo + patch -p1 --no-backup-if-mismatch < contrib/aqo/$AQO_PATCH_NAME + + - name: "Compilation" + run: | + cd pg + export COPT=-Werror + export CONFIGURE_OPTS="--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" + ./configure $CONFIGURE_OPTS CFLAGS="-O0" + make clean > /dev/null + make -C contrib clean > /dev/null + make -j2 > /dev/null && make -j2 -C contrib > /dev/null + make install >> make.log + make -C contrib install >> make.log + make -C doc install > /dev/null + + - name: "Launch AQO instance" + run: | + cd pg + make -j2 > /dev/null && make -j2 -C contrib > /dev/null + make install > /dev/null && make -C contrib install > /dev/null + + # Launch an instance with AQO extension + aqo_instance_launch.sh + AQO_VERSION=$(psql -t -c "SELECT extversion FROM pg_extension WHERE extname='aqo'") + echo "AQO_VERSION=$AQO_VERSION" >> $GITHUB_ENV + + - name: "Load a dump of the test database" + run: | + cd pg + echo "AQO_VERSION: $AQO_VERSION" + load_imdb.sh + + # Quick pass in parallel mode with statistics + - name: "Test No.1: Gather statistics in disabled mode" + run: | + cd pg + set_test_conditions_1.sh + job_pass.sh + dump_knowledge.sh + + - name: "Archive JOB test results" + if: ${{ always() }} + uses: actions/upload-artifact@v3 + with: + name: ${{ env.AQO_VERSION }}-${{ env.CORE_BRANCH_NAME }}-${{ env.BRANCH_NAME }}-result_base_stat + path: | + pg/explains.txt + pg/report.txt + pg/knowledge_base.dump + pg/logfile.log + retention-days: 1 + + # Test No.2: Learn on all incoming queries + - name: "Test No.2: Learning stage" + run: | + cd pg + set_test_conditions_2.sh + job_pass.sh 10 + check_result.sh + + # One pass on frozen AQO data, dump knowledge base, check total error + - name: "Test No.3: Frozen execution" + run: | + cd pg + set_test_conditions_3.sh + job_pass.sh + dump_knowledge.sh + + - name: "Archive JOB test results - frozen" + if: ${{ always() }} + uses: actions/upload-artifact@v3 + with: + name: ${{ env.AQO_VERSION }}-${{ env.CORE_BRANCH_NAME }}-${{ env.BRANCH_NAME }}-result_frozen + path: | + pg/explains.txt + pg/report.txt + pg/knowledge_base.dump + pg/logfile.log + retention-days: 7 + + - name: "Cleanup" + run: | + cd pg + pg_ctl -D PGDATA stop + From 492699a5758eb9ac3a71ff7ca9e7fc71dbb0e496 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 1 Mar 2023 08:45:57 +0500 Subject: [PATCH 175/203] Improvement of time-dependent test statement_timeout. Remember, each query can be executed longer than the timeout on an ancient machines of buildfarm. So, RESET this GUC each time when it isn't really needed for a test query. --- expected/statement_timeout.out | 11 +++++++++-- sql/statement_timeout.sql | 19 +++++++++++++++++-- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/expected/statement_timeout.out b/expected/statement_timeout.out index 39796549..1d957df7 100644 --- a/expected/statement_timeout.out +++ b/expected/statement_timeout.out @@ -35,6 +35,7 @@ SET statement_timeout = 80; -- [0.1s] SELECT *, pg_sleep(0.1) FROM t; NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout +RESET statement_timeout; SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -- haven't any partial data check_estimated_rows ---------------------- @@ -46,6 +47,7 @@ SET statement_timeout = 350; SELECT *, pg_sleep(0.1) FROM t; NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout +RESET statement_timeout; SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); check_estimated_rows ---------------------- @@ -64,6 +66,7 @@ SELECT *, pg_sleep(0.1) FROM t; 5 | (5 rows) +RESET statement_timeout; SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); check_estimated_rows ---------------------- @@ -84,6 +87,7 @@ SET statement_timeout = 80; SELECT *, pg_sleep(0.1) FROM t; -- Not learned NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout +RESET statement_timeout; SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); check_estimated_rows ---------------------- @@ -94,6 +98,7 @@ SET statement_timeout = 350; SELECT *, pg_sleep(0.1) FROM t; -- Learn! NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout +RESET statement_timeout; SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); check_estimated_rows ---------------------- @@ -111,6 +116,7 @@ SELECT *, pg_sleep(0.1) FROM t; -- Get reliable data 5 | (5 rows) +RESET statement_timeout; SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); check_estimated_rows ---------------------- @@ -134,18 +140,19 @@ SELECT count(*) FROM aqo_data; -- Must be zero SELECT x, pg_sleep(0.1) FROM t WHERE x > 0; NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout +RESET statement_timeout; SELECT count(*) FROM aqo_data; -- Must be one count ------- 1 (1 row) +DROP TABLE t; +DROP FUNCTION check_estimated_rows; SELECT true AS success FROM aqo_reset(); success --------- t (1 row) -DROP TABLE t; DROP EXTENSION aqo; -DROP FUNCTION check_estimated_rows; diff --git a/sql/statement_timeout.sql b/sql/statement_timeout.sql index 43dab39e..4ca9171f 100644 --- a/sql/statement_timeout.sql +++ b/sql/statement_timeout.sql @@ -32,16 +32,22 @@ SET aqo.learn_statement_timeout = 'on'; SET statement_timeout = 80; -- [0.1s] SELECT *, pg_sleep(0.1) FROM t; + +RESET statement_timeout; SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -- haven't any partial data -- Don't learn because running node has smaller cardinality than an optimizer prediction SET statement_timeout = 350; SELECT *, pg_sleep(0.1) FROM t; + +RESET statement_timeout; SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -- We have a real learning data. SET statement_timeout = 800; SELECT *, pg_sleep(0.1) FROM t; + +RESET statement_timeout; SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -- Force to make an underestimated prediction @@ -52,14 +58,20 @@ SELECT true AS success FROM aqo_reset(); SET statement_timeout = 80; SELECT *, pg_sleep(0.1) FROM t; -- Not learned + +RESET statement_timeout; SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); SET statement_timeout = 350; SELECT *, pg_sleep(0.1) FROM t; -- Learn! + +RESET statement_timeout; SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); SET statement_timeout = 550; SELECT *, pg_sleep(0.1) FROM t; -- Get reliable data + +RESET statement_timeout; SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -- Interrupted query should immediately appear in aqo_data @@ -67,9 +79,12 @@ SELECT true AS success FROM aqo_reset(); SET statement_timeout = 500; SELECT count(*) FROM aqo_data; -- Must be zero SELECT x, pg_sleep(0.1) FROM t WHERE x > 0; + +RESET statement_timeout; SELECT count(*) FROM aqo_data; -- Must be one -SELECT true AS success FROM aqo_reset(); DROP TABLE t; -DROP EXTENSION aqo; DROP FUNCTION check_estimated_rows; + +SELECT true AS success FROM aqo_reset(); +DROP EXTENSION aqo; From bd3585b1ad6794eb2231c072eddfaeb1c2fb16e8 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 1 Mar 2023 09:02:55 +0500 Subject: [PATCH 176/203] Improve basic CI and installcheck CI code. --- .github/workflows/c-cpp.yml | 4 +- .github/workflows/installchecks.yml | 90 +++++++++++++++-------------- regress_schedule | 1 + 3 files changed, 48 insertions(+), 47 deletions(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 0123a181..27f911cb 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -15,7 +15,6 @@ jobs: - uses: actions/checkout@v3 - name: "Define PostreSQL major version" run: | - echo "$(ls -la)" patch_name=$(ls aqo_*.patch|tail -1) echo "CORE_PATCH_NAME=$patch_name" >> $GITHUB_ENV @@ -49,7 +48,6 @@ jobs: run: | git clone -b $PG_BRANCH --depth=1 --single-branch https://github.com/postgres/postgres.git $GITHUB_WORKSPACE/../pg cd $GITHUB_WORKSPACE/../pg - ls -la cp -r ../aqo contrib/aqo patch -p1 --no-backup-if-mismatch < contrib/aqo/$CORE_PATCH_NAME @@ -70,7 +68,7 @@ jobs: env CLIENTS=50 THREADS=50 make -C contrib/aqo check - name: Archive artifacts - if: ${{ always() }} + if: ${{ failure() }} uses: actions/upload-artifact@v3 with: name: make_check_logs diff --git a/.github/workflows/installchecks.yml b/.github/workflows/installchecks.yml index aeb976e4..94e38d6c 100644 --- a/.github/workflows/installchecks.yml +++ b/.github/workflows/installchecks.yml @@ -14,44 +14,48 @@ jobs: steps: # Set major PostgreSQL version for all underlying steps - - name: "Extract Postgres major version number" + - uses: actions/checkout@v3 + - name: "Define PostreSQL major version" run: | - PG_MAJOR_VERSION=$(echo "$BRANCH_NAME" | grep --only-matching 'stable[0-9].' | grep --only-matching '[0-9].') + patch_name=$(ls aqo_*.patch|tail -1) + echo "CORE_PATCH_NAME=$patch_name" >> $GITHUB_ENV - # Declare PG_MAJOR_VERSION as a environment variable - echo "PG_MAJOR_VERSION=$PG_MAJOR_VERSION" >> $GITHUB_ENV - echo "CORE_BRANCH_NAME=REL_${PG_MAJOR_VERSION}_STABLE" >> $GITHUB_ENV - echo "AQO_PATCH_NAME=aqo_pg$PG_MAJOR_VERSION.patch" >> $GITHUB_ENV - - name: "Set proper names for the master case" + # we can get number, otherwise set up master + vers_number=$(echo "$patch_name"|tr -d -c 0-9) + echo "PG_MAJOR_VERSION=$vers_number" >> $GITHUB_ENV + + branch_name="REL_${vers_number}_STABLE" + echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV + + - name: "Set master branch name, if needed" if: env.PG_MAJOR_VERSION == '' run: | - echo "PG_MAJOR_VERSION=master" >> $GITHUB_ENV - echo "CORE_BRANCH_NAME=master" >> $GITHUB_ENV - echo "AQO_PATCH_NAME=aqo_master.patch" >> $GITHUB_ENV + branch_name="master" + echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV - - name: "Preparations" + - name: "Environment (debug output)" + if: ${{ always() }} run: | - sudo apt install libipc-run-perl libxml2-utils libxml2-dev xsltproc libxslt1-dev - - echo "Deploying to production server on branch" $BRANCH_NAME + echo "Use PostgreSQL branch $PG_BRANCH (patch: $CORE_PATCH_NAME)" + echo "Deploying to production server on branch" $BRANCH_NAME "(PG $PG_BRANCH)" git config --global user.email "ci@postgrespro.ru" git config --global user.name "CI PgPro admin" - git clone https://github.com/postgres/postgres.git pg - cd pg - git checkout $CORE_BRANCH_NAME - git clone https://github.com/postgrespro/aqo.git contrib/aqo - git -C contrib/aqo checkout $BRANCH_NAME - patch -p1 --no-backup-if-mismatch < contrib/aqo/$AQO_PATCH_NAME - COPT="-Werror" - CONFIGURE_OPTS="--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" - echo "CONFIGURE_OPTS=$CONFIGURE_OPTS" >> $GITHUB_ENV - echo "COPT=$COPT" >> $GITHUB_ENV + + - name: "Prepare PG directory" + run: | + sudo apt install libipc-run-perl libxml2-utils libxml2-dev xsltproc libxslt1-dev + git clone -b $PG_BRANCH --depth=1 --single-branch https://github.com/postgres/postgres.git $GITHUB_WORKSPACE/../pg + cd $GITHUB_WORKSPACE/../pg + cp -r ../aqo contrib/aqo + patch -p1 --no-backup-if-mismatch < contrib/aqo/$CORE_PATCH_NAME - name: "Paths" run: | - echo "$GITHUB_WORKSPACE/pg/contrib/aqo/.github/scripts/job" >> $GITHUB_PATH - ls -la pg/contrib/aqo/.github/scripts/job - echo "$GITHUB_WORKSPACE/pg/tmp_install/bin" >> $GITHUB_PATH + cd $GITHUB_WORKSPACE/../pg + echo "COPT=-Werror" >> $GITHUB_ENV + echo "CONFIGURE_OPTS=--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" >> $GITHUB_ENV + echo "$GITHUB_WORKSPACE/../pg/tmp_install/bin" >> $GITHUB_PATH + echo "$GITHUB_WORKSPACE/../pg/contrib/aqo/.github/scripts/job" >> $GITHUB_PATH echo "LD_LIBRARY_PATH=$GITHUB_WORKSPACE/pg/tmp_install/lib" >> $GITHUB_ENV echo "PGDATABASE=`whoami`" >> $GITHUB_ENV echo "PGHOST=localhost" >> $GITHUB_ENV @@ -59,21 +63,19 @@ jobs: echo "PGUSER=`whoami`" >> $GITHUB_ENV echo "PGPORT=5432" >> $GITHUB_ENV - - name: "Debug" - run: | - echo "paths: $PATH" - echo "PG_MAJOR_VERSION: $PG_MAJOR_VERSION, CORE_BRANCH_NAME: $CORE_BRANCH_NAME, AQO_PATCH_NAME: $AQO_PATCH_NAME, CONFIGURE_OPTS: $CONFIGURE_OPTS" - - name: "Compilation" run: | - cd pg - ./configure $CONFIGURE_OPTS CFLAGS="-O2" + cd $GITHUB_WORKSPACE/../pg + echo "paths: $PATH" + echo "COPT: $COPT" + echo "CONFIGURE_OPTS: $CONFIGURE_OPTS" + ./configure $CONFIGURE_OPTS CFLAGS="-O2" > /dev/null make -j4 > /dev/null && make -j4 -C contrib > /dev/null make install >> make.log && make -C contrib install > /dev/null - name: "Launch AQO instance" run: | - cd pg + cd $GITHUB_WORKSPACE/../pg # Launch an instance with AQO extension aqo_instance_launch.sh @@ -84,21 +86,21 @@ jobs: # Pass installcheck in disabled mode - name: installcheck_disabled run: | - cd pg + cd $GITHUB_WORKSPACE/../pg psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'off'" psql -c "SELECT pg_reload_conf()" make installcheck-world - name: installcheck_disabled_forced_stat run: | - cd pg + cd $GITHUB_WORKSPACE/../pg psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" make installcheck-world - name: installcheck_frozen run: | - cd pg + cd $GITHUB_WORKSPACE/../pg psql -c "ALTER SYSTEM SET aqo.mode = 'frozen'" psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" @@ -106,7 +108,7 @@ jobs: - name: installcheck_controlled run: | - cd pg + cd $GITHUB_WORKSPACE/../pg psql -c "ALTER SYSTEM SET aqo.mode = 'controlled'" psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" @@ -115,7 +117,7 @@ jobs: - name: installcheck_learn continue-on-error: true run: | - cd pg + cd $GITHUB_WORKSPACE/../pg psql -c "ALTER SYSTEM SET aqo.mode = 'learn'" psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" @@ -124,7 +126,7 @@ jobs: - name: installcheck_intelligent continue-on-error: true run: | - cd pg + cd $GITHUB_WORKSPACE/../pg psql -c "ALTER SYSTEM SET aqo.mode = 'intelligent'" psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" @@ -133,7 +135,7 @@ jobs: - name: installcheck_forced continue-on-error: true run: | - cd pg + cd $GITHUB_WORKSPACE/../pg psql -c "ALTER SYSTEM SET aqo.mode = 'forced'" psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" @@ -141,10 +143,10 @@ jobs: # Save Artifacts - name: Archive artifacts - if: ${{ failure() }} + if: ${{ always() }} uses: actions/upload-artifact@v3 with: - name: ${{ env.AQO_VERSION }}-${{ env.CORE_BRANCH_NAME }}-${{ env.BRANCH_NAME }}-artifacts + name: ${{ env.AQO_VERSION }}-${{ env.PG_BRANCH }}-${{ env.CORE_PATCH_NAME }}-artifacts path: | pg/src/test/regress/regression.diffs pg/logfile.log diff --git a/regress_schedule b/regress_schedule index 2bcdaaf2..76a2e00e 100644 --- a/regress_schedule +++ b/regress_schedule @@ -15,6 +15,7 @@ test: plancache test: update_functions # Performance-dependent test. Can be ignored if executes in containers or on slow machines ignore: statement_timeout +test: statement_timeout test: temp_tables test: top_queries test: relocatable From 27f9b5bc489ca368adcb1da86ebd628bae1a02df Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Thu, 9 Mar 2023 13:20:02 +0500 Subject: [PATCH 177/203] CI Refactoring: Unify code of all three CI workflows --- .github/workflows/c-cpp.yml | 60 ++++++----- .github/workflows/installchecks.yml | 93 +++++++++-------- .github/workflows/job.yml | 150 +++++++++++++++------------- 3 files changed, 170 insertions(+), 133 deletions(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 27f911cb..74e90277 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -1,11 +1,15 @@ name: 'AQO basic CI' -on: - pull_request: - env: + # Use it just for a report BRANCH_NAME: ${{ github.head_ref || github.ref_name }} +# Trigger it each timeon push or pull request. Honestly, it will be redundant +# most of the time, but external pull-request checks don't be missed out. +on: + push: + pull_request: + jobs: build: @@ -15,6 +19,11 @@ jobs: - uses: actions/checkout@v3 - name: "Define PostreSQL major version" run: | + echo "The action workflow is triggered by the $BRANCH_NAME" + sudo apt install libipc-run-perl + git config --global user.email "ci@postgrespro.ru" + git config --global user.name "CI PgPro admin" + patch_name=$(ls aqo_*.patch|tail -1) echo "CORE_PATCH_NAME=$patch_name" >> $GITHUB_ENV @@ -24,38 +33,43 @@ jobs: branch_name="REL_${vers_number}_STABLE" echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV - - echo "COPT=-Werror" >> $GITHUB_ENV - echo "CONFIGURE_OPTS=--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" >> $GITHUB_ENV - - name: "Set master branch name, if needed" if: env.PG_MAJOR_VERSION == '' run: | branch_name="master" echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV - - name: "Environment (debug output)" - if: ${{ always() }} + # Create workspace directory and environment variable. + # It is the second step because on the first we define versions and branches + - name: "Initial dir" run: | - echo "Use PostgreSQL branch $PG_BRANCH (patch: $CORE_PATCH_NAME)" - echo "COPT: $COPT" - echo "CONFIGURE_OPTS: $CONFIGURE_OPTS" - echo "Deploying to production server on branch" $BRANCH_NAME "(PG $PG_BRANCH)" - git config --global user.email "ci@postgrespro.ru" - git config --global user.name "CI PgPro admin" + git clone -b $PG_BRANCH --depth=1 --single-branch https://github.com/postgres/postgres.git $GITHUB_WORKSPACE/../pg + + # Invent variable with full path to PG directory just because github + # actions don't like relative paths ... + cd $GITHUB_WORKSPACE/../pg + echo PG_DIR=`pwd` >> $GITHUB_ENV - name: "Prepare PG directory" run: | - git clone -b $PG_BRANCH --depth=1 --single-branch https://github.com/postgres/postgres.git $GITHUB_WORKSPACE/../pg - cd $GITHUB_WORKSPACE/../pg + cd $PG_DIR cp -r ../aqo contrib/aqo patch -p1 --no-backup-if-mismatch < contrib/aqo/$CORE_PATCH_NAME + echo "COPT=-Werror" >> $GITHUB_ENV + echo "CONFIGURE_OPTS=--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" >> $GITHUB_ENV - - name: "make check" + # Just for debug + - name: "Environment (debug output)" + if: ${{ always() }} run: | - sudo apt install libipc-run-perl + echo "PG_MAJOR_VERSION: $PG_MAJOR_VERSION" + echo "PG_DIR: $PG_DIR" + echo "PG_BRANCH: $PG_BRANCH" + echo "CORE_PATCH_NAME: $CORE_PATCH_NAME" - cd $GITHUB_WORKSPACE/../pg + - name: "make check" + run: | + cd $PG_DIR ./configure $CONFIGURE_OPTS CFLAGS="-O2" > /dev/null make -j4 > /dev/null && make -j4 -C contrib > /dev/null env CLIENTS=50 THREADS=50 make -C contrib/aqo check @@ -73,7 +87,7 @@ jobs: with: name: make_check_logs path: | - /home/runner/work/aqo/pg/contrib/aqo/regression.diffs - /home/runner/work/aqo/pg/contrib/aqo/log - /home/runner/work/aqo/pg/contrib/aqo/tmp_check/log + ${{ env.PG_DIR }}/contrib/aqo/regression.diffs + ${{ env.PG_DIR }}/contrib/aqo/log + ${{ env.PG_DIR }}/contrib/aqo/tmp_check/log retention-days: 7 diff --git a/.github/workflows/installchecks.yml b/.github/workflows/installchecks.yml index 94e38d6c..075034a0 100644 --- a/.github/workflows/installchecks.yml +++ b/.github/workflows/installchecks.yml @@ -1,22 +1,29 @@ name: "InstallChecks" -on: - push: - env: + # Use it just for a report BRANCH_NAME: ${{ github.head_ref || github.ref_name }} +# Trigger it each timeon push or pull request. Honestly, it will be redundant +# most of the time, but external pull-request checks don't be missed out. +on: + push: + pull_request: + jobs: build: runs-on: ubuntu-latest steps: - - # Set major PostgreSQL version for all underlying steps - uses: actions/checkout@v3 - - name: "Define PostreSQL major version" + - name: "Define PostreSQL major version and set basic environment" run: | + echo "The action workflow is triggered by the $BRANCH_NAME" + sudo apt install libipc-run-perl + git config --global user.email "ci@postgrespro.ru" + git config --global user.name "CI PgPro admin" + patch_name=$(ls aqo_*.patch|tail -1) echo "CORE_PATCH_NAME=$patch_name" >> $GITHUB_ENV @@ -26,47 +33,51 @@ jobs: branch_name="REL_${vers_number}_STABLE" echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV - - name: "Set master branch name, if needed" if: env.PG_MAJOR_VERSION == '' run: | branch_name="master" echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV - - name: "Environment (debug output)" - if: ${{ always() }} + # Create workspace directory and environment variable. + # It is the second step because on the first we define versions and branches + - name: "Initial dir" run: | - echo "Use PostgreSQL branch $PG_BRANCH (patch: $CORE_PATCH_NAME)" - echo "Deploying to production server on branch" $BRANCH_NAME "(PG $PG_BRANCH)" - git config --global user.email "ci@postgrespro.ru" - git config --global user.name "CI PgPro admin" + git clone -b $PG_BRANCH --depth=1 --single-branch https://github.com/postgres/postgres.git $GITHUB_WORKSPACE/../pg + + # Invent variable with full path to PG directory just because github + # actions don't like relative paths ... + cd $GITHUB_WORKSPACE/../pg + echo PG_DIR=`pwd` >> $GITHUB_ENV - name: "Prepare PG directory" run: | - sudo apt install libipc-run-perl libxml2-utils libxml2-dev xsltproc libxslt1-dev - git clone -b $PG_BRANCH --depth=1 --single-branch https://github.com/postgres/postgres.git $GITHUB_WORKSPACE/../pg - cd $GITHUB_WORKSPACE/../pg + cd $PG_DIR cp -r ../aqo contrib/aqo patch -p1 --no-backup-if-mismatch < contrib/aqo/$CORE_PATCH_NAME - - - name: "Paths" - run: | - cd $GITHUB_WORKSPACE/../pg echo "COPT=-Werror" >> $GITHUB_ENV echo "CONFIGURE_OPTS=--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" >> $GITHUB_ENV - echo "$GITHUB_WORKSPACE/../pg/tmp_install/bin" >> $GITHUB_PATH - echo "$GITHUB_WORKSPACE/../pg/contrib/aqo/.github/scripts/job" >> $GITHUB_PATH - echo "LD_LIBRARY_PATH=$GITHUB_WORKSPACE/pg/tmp_install/lib" >> $GITHUB_ENV - echo "PGDATABASE=`whoami`" >> $GITHUB_ENV - echo "PGHOST=localhost" >> $GITHUB_ENV - echo "PGDATA=PGDATA" >> $GITHUB_ENV - echo "PGUSER=`whoami`" >> $GITHUB_ENV - echo "PGPORT=5432" >> $GITHUB_ENV + + # Instance-related environment + echo "$PG_DIR/tmp_install/bin" >> $GITHUB_PATH + echo "LD_LIBRARY_PATH=$PG_DIR/tmp_install/lib" >> $GITHUB_ENV + echo `pwd`/contrib/aqo/.github/scripts/job >> $GITHUB_PATH + + # Just for debug + - name: "Environment (debug output)" + if: ${{ always() }} + run: | + echo "PG_MAJOR_VERSION: $PG_MAJOR_VERSION" + echo "PG_DIR: $PG_DIR" + echo "PG_BRANCH: $PG_BRANCH" + echo "CORE_PATCH_NAME: $CORE_PATCH_NAME" + # See these paths to understand correctness of the instance initialization + echo "PATHs: $PATH" + echo "PG Libs: $LD_LIBRARY_PATH" - name: "Compilation" run: | - cd $GITHUB_WORKSPACE/../pg - echo "paths: $PATH" + cd $PG_DIR echo "COPT: $COPT" echo "CONFIGURE_OPTS: $CONFIGURE_OPTS" ./configure $CONFIGURE_OPTS CFLAGS="-O2" > /dev/null @@ -75,7 +86,7 @@ jobs: - name: "Launch AQO instance" run: | - cd $GITHUB_WORKSPACE/../pg + cd $PG_DIR # Launch an instance with AQO extension aqo_instance_launch.sh @@ -86,21 +97,21 @@ jobs: # Pass installcheck in disabled mode - name: installcheck_disabled run: | - cd $GITHUB_WORKSPACE/../pg + cd $PG_DIR psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'off'" psql -c "SELECT pg_reload_conf()" make installcheck-world - name: installcheck_disabled_forced_stat run: | - cd $GITHUB_WORKSPACE/../pg + cd $PG_DIR psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" make installcheck-world - name: installcheck_frozen run: | - cd $GITHUB_WORKSPACE/../pg + cd $PG_DIR psql -c "ALTER SYSTEM SET aqo.mode = 'frozen'" psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" @@ -108,7 +119,7 @@ jobs: - name: installcheck_controlled run: | - cd $GITHUB_WORKSPACE/../pg + cd $PG_DIR psql -c "ALTER SYSTEM SET aqo.mode = 'controlled'" psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" @@ -117,7 +128,7 @@ jobs: - name: installcheck_learn continue-on-error: true run: | - cd $GITHUB_WORKSPACE/../pg + cd $PG_DIR psql -c "ALTER SYSTEM SET aqo.mode = 'learn'" psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" @@ -126,7 +137,7 @@ jobs: - name: installcheck_intelligent continue-on-error: true run: | - cd $GITHUB_WORKSPACE/../pg + cd $PG_DIR psql -c "ALTER SYSTEM SET aqo.mode = 'intelligent'" psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" @@ -135,7 +146,7 @@ jobs: - name: installcheck_forced continue-on-error: true run: | - cd $GITHUB_WORKSPACE/../pg + cd $PG_DIR psql -c "ALTER SYSTEM SET aqo.mode = 'forced'" psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" @@ -148,8 +159,8 @@ jobs: with: name: ${{ env.AQO_VERSION }}-${{ env.PG_BRANCH }}-${{ env.CORE_PATCH_NAME }}-artifacts path: | - pg/src/test/regress/regression.diffs - pg/logfile.log - pg/contrib/aqo/tmp_check/log + ${{ env.PG_DIR }}/src/test/regress/regression.diffs + ${{ env.PG_DIR }}/logfile.log + ${{ env.PG_DIR }}/contrib/aqo/tmp_check/log retention-days: 2 diff --git a/.github/workflows/job.yml b/.github/workflows/job.yml index 682f4b42..817f0047 100644 --- a/.github/workflows/job.yml +++ b/.github/workflows/job.yml @@ -1,82 +1,94 @@ name: 'Join Order Benchmark' env: + # Use it just for a report BRANCH_NAME: ${{ github.head_ref || github.ref_name }} -# Trigger the workflow on each push -on: push +# Trigger the workflow on each release or on a manual action +on: + workflow_dispatch: + release: jobs: - AQO_Tests: + AQO_JOB_Benchmark: runs-on: self-hosted steps: - - name: "Set common paths" + - uses: actions/checkout@v3 + - name: "Define PostreSQL major version and set basic environment" run: | - echo "$HOME/aqo/.github/scripts/job" >> $GITHUB_PATH - echo "JOB_DIR=$HOME/jo-bench" >> $GITHUB_ENV + echo "The action workflow is triggered by the $BRANCH_NAME" + + # Cleanup, because of self-hosted runner + rm -rf $GITHUB_WORKSPACE/../pg + + patch_name=$(ls aqo_*.patch|tail -1) + echo "CORE_PATCH_NAME=$patch_name" >> $GITHUB_ENV + + # we can get number, otherwise set up master + vers_number=$(echo "$patch_name"|tr -d -c 0-9) + echo "PG_MAJOR_VERSION=$vers_number" >> $GITHUB_ENV + + branch_name="REL_${vers_number}_STABLE" + echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV + - name: "Set master branch name, if needed" + if: env.PG_MAJOR_VERSION == '' + run: | + branch_name="master" + echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV + + # Create workspace directory and environment variable. + # It is the second step because on the first we define versions and branches + - name: "Initial dir" + run: | + git clone -b $PG_BRANCH --depth=1 --single-branch https://github.com/postgres/postgres.git $GITHUB_WORKSPACE/../pg - # PostgreSQL-related environment variables - echo "$GITHUB_WORKSPACE/pg/tmp_install/bin" >> $GITHUB_PATH - echo "LD_LIBRARY_PATH=$GITHUB_WORKSPACE/pg/tmp_install/lib" >> $GITHUB_ENV + # Invent variable with full path to PG directory just because github + # actions don't like relative paths ... + cd $GITHUB_WORKSPACE/../pg + echo PG_DIR=`pwd` >> $GITHUB_ENV + + - name: "Prepare PG directory" + run: | + cd $PG_DIR + cp -r ../aqo contrib/aqo + patch -p1 --no-backup-if-mismatch < contrib/aqo/$CORE_PATCH_NAME + echo "COPT=-Werror" >> $GITHUB_ENV + echo "CONFIGURE_OPTS=--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" >> $GITHUB_ENV + + # Instance-related environment + echo "$PG_DIR/tmp_install/bin" >> $GITHUB_PATH + echo "LD_LIBRARY_PATH=$PG_DIR/tmp_install/lib" >> $GITHUB_ENV + echo `pwd`/contrib/aqo/.github/scripts/job >> $GITHUB_PATH + + # JOB-specific environment + echo "JOB_DIR=$HOME/jo-bench" >> $GITHUB_ENV echo "PGDATABASE=`whoami`" >> $GITHUB_ENV echo "PGHOST=localhost" >> $GITHUB_ENV echo "PGDATA=PGDATA" >> $GITHUB_ENV echo "PGUSER=`whoami`" >> $GITHUB_ENV echo "PGPORT=5432" >> $GITHUB_ENV - # Set major PostgreSQL version for all underlying steps - - name: "Extract Postgres major version number" - run: | - PG_MAJOR_VERSION=$(echo "$BRANCH_NAME" | grep --only-matching 'stable[0-9].' | grep --only-matching '[0-9].') - - # Declare PG_MAJOR_VERSION as a environment variable - echo "PG_MAJOR_VERSION=$PG_MAJOR_VERSION" >> $GITHUB_ENV - echo "CORE_BRANCH_NAME=REL_${PG_MAJOR_VERSION}_STABLE" >> $GITHUB_ENV - echo "AQO_PATCH_NAME=aqo_pg$PG_MAJOR_VERSION.patch" >> $GITHUB_ENV - - name: "Set proper names for the master case" - if: env.PG_MAJOR_VERSION == '' - run: | - echo "PG_MAJOR_VERSION=master" >> $GITHUB_ENV - echo "CORE_BRANCH_NAME=master" >> $GITHUB_ENV - echo "AQO_PATCH_NAME=aqo_master.patch" >> $GITHUB_ENV - # Just for debug - - name: "Print environment variables" + - name: "Environment (debug output)" + if: ${{ always() }} run: | - echo "Test data: $PG_MAJOR_VERSION; Core branch: $CORE_BRANCH_NAME, AQO patch: $AQO_PATCH_NAME" - echo "Paths: $PATH, JOB path: $JOB_DIR" + echo "PG_MAJOR_VERSION: $PG_MAJOR_VERSION" + echo "PG_DIR: $PG_DIR" + echo "PG_BRANCH: $PG_BRANCH" + echo "CORE_PATCH_NAME: $CORE_PATCH_NAME" + # See these paths to understand correctness of the instance initialization + echo "PATHs: $PATH" echo "PG Libs: $LD_LIBRARY_PATH" - echo "PG Environment: dbname: $PGDATABASE, host: $PGHOST, pgdata: $PGDATA, pguser: $PGUSER, pgport: $PGPORT" - # Runner contains clone of postgres and AQO repositories. We must refresh them - - name: "Code pre-cleanup" - run: | - rm -rf pg - git -C ~/pg clean -fdx - git -C ~/pg pull - git -C ~/pg checkout $CORE_BRANCH_NAME - git -C ~/pg pull - - git -C ~/aqo clean -fdx - git -C ~/aqo pull - git -C ~/aqo checkout $BRANCH_NAME - git -C ~/aqo pull - - # Copy the codes into test folder, arrange code versions and do the patching - - name: "Prepare code directory" - run: | - cp -r ~/pg pg - cd pg - cp -r ~/aqo contrib/aqo - patch -p1 --no-backup-if-mismatch < contrib/aqo/$AQO_PATCH_NAME + # JOB-specific environment variable + echo "JOB path: $JOB_DIR" + echo "PG Environment: dbname: $PGDATABASE, host: $PGHOST, pgdata: $PGDATA, pguser: $PGUSER, pgport: $PGPORT" - name: "Compilation" run: | - cd pg - export COPT=-Werror - export CONFIGURE_OPTS="--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" + cd $PG_DIR ./configure $CONFIGURE_OPTS CFLAGS="-O0" make clean > /dev/null make -C contrib clean > /dev/null @@ -87,9 +99,7 @@ jobs: - name: "Launch AQO instance" run: | - cd pg - make -j2 > /dev/null && make -j2 -C contrib > /dev/null - make install > /dev/null && make -C contrib install > /dev/null + cd $PG_DIR # Launch an instance with AQO extension aqo_instance_launch.sh @@ -98,14 +108,14 @@ jobs: - name: "Load a dump of the test database" run: | - cd pg + cd $PG_DIR echo "AQO_VERSION: $AQO_VERSION" load_imdb.sh # Quick pass in parallel mode with statistics - name: "Test No.1: Gather statistics in disabled mode" run: | - cd pg + cd $PG_DIR set_test_conditions_1.sh job_pass.sh dump_knowledge.sh @@ -116,16 +126,17 @@ jobs: with: name: ${{ env.AQO_VERSION }}-${{ env.CORE_BRANCH_NAME }}-${{ env.BRANCH_NAME }}-result_base_stat path: | - pg/explains.txt - pg/report.txt - pg/knowledge_base.dump - pg/logfile.log + # Relative paths not allowed ... + ${{ env.PG_DIR }}/explains.txt + ${{ env.PG_DIR }}/report.txt + ${{ env.PG_DIR }}/knowledge_base.dump + ${{ env.PG_DIR }}/logfile.log retention-days: 1 # Test No.2: Learn on all incoming queries - name: "Test No.2: Learning stage" run: | - cd pg + cd $PG_DIR set_test_conditions_2.sh job_pass.sh 10 check_result.sh @@ -133,7 +144,7 @@ jobs: # One pass on frozen AQO data, dump knowledge base, check total error - name: "Test No.3: Frozen execution" run: | - cd pg + cd $PG_DIR set_test_conditions_3.sh job_pass.sh dump_knowledge.sh @@ -144,14 +155,15 @@ jobs: with: name: ${{ env.AQO_VERSION }}-${{ env.CORE_BRANCH_NAME }}-${{ env.BRANCH_NAME }}-result_frozen path: | - pg/explains.txt - pg/report.txt - pg/knowledge_base.dump - pg/logfile.log + # Relative paths not allowed ... + ${{ env.PG_DIR }}/explains.txt + ${{ env.PG_DIR }}/report.txt + ${{ env.PG_DIR }}/knowledge_base.dump + ${{ env.PG_DIR }}/logfile.log retention-days: 7 - name: "Cleanup" run: | - cd pg + cd $PG_DIR pg_ctl -D PGDATA stop From 94bc12dcf77c5ac0bae87e56508705b6b2dcdb1c Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 30 Mar 2023 08:43:12 +0500 Subject: [PATCH 178/203] Add specific initial script for AQO 1.6. It mostly caused by desire of reducing number of failures 001_pgbench.pl test on WINDOWS OSes (it is related to speed of file descriptor allocations in the test, where we CREATE/DROP extensions competitively by several threads. Also, the aqo_CVE-2020-14350 test is corrected. --- Makefile | 2 +- aqo--1.6.sql | 210 ++++++++++++++++++++++++++++++++ expected/aqo_CVE-2020-14350.out | 138 +++++++-------------- sql/aqo_CVE-2020-14350.sql | 104 +++++----------- 4 files changed, 282 insertions(+), 172 deletions(-) create mode 100644 aqo--1.6.sql diff --git a/Makefile b/Makefile index ce9d00ba..1da2994c 100755 --- a/Makefile +++ b/Makefile @@ -30,7 +30,7 @@ EXTRA_INSTALL = contrib/postgres_fdw contrib/pg_stat_statements DATA = aqo--1.0.sql aqo--1.0--1.1.sql aqo--1.1--1.2.sql aqo--1.2.sql \ aqo--1.2--1.3.sql aqo--1.3--1.4.sql aqo--1.4--1.5.sql \ - aqo--1.5--1.6.sql + aqo--1.5--1.6.sql aqo--1.6.sql ifdef USE_PGXS PG_CONFIG ?= pg_config diff --git a/aqo--1.6.sql b/aqo--1.6.sql new file mode 100644 index 00000000..bb44cf22 --- /dev/null +++ b/aqo--1.6.sql @@ -0,0 +1,210 @@ +/* contrib/aqo/aqo--1.6.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION aqo" to load this file. \quit + +-- +-- Get cardinality error of queries the last time they were executed. +-- IN: +-- controlled - show queries executed under a control of AQO (true); +-- executed without an AQO control, but AQO has a stat on the query (false). +-- +-- OUT: +-- num - sequental number. Smaller number corresponds to higher error. +-- id - ID of a query. +-- fshash - feature space. Usually equal to zero or ID. +-- error - AQO error that calculated on plan nodes of the query. +-- nexecs - number of executions of queries associated with this ID. +-- +CREATE FUNCTION aqo_cardinality_error(controlled boolean) +RETURNS TABLE(num integer, id bigint, fshash bigint, error double precision, nexecs bigint) +AS 'MODULE_PATHNAME', 'aqo_cardinality_error' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_cardinality_error(boolean) IS +'Get cardinality error of queries the last time they were executed. Order queries according to an error value.'; + +-- +-- Remove unneeded rows from the AQO ML storage. +-- For common feature space, remove rows from aqo_data only. +-- For custom feature space - remove all rows related to the space from all AQO +-- tables even if only one oid for one feature subspace of the space is illegal. +-- Returns number of deleted rows from aqo_queries and aqo_data tables. +-- +CREATE FUNCTION aqo_cleanup(OUT nfs integer, OUT nfss integer) +RETURNS record +AS 'MODULE_PATHNAME', 'aqo_cleanup' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_cleanup() IS +'Remove unneeded rows from the AQO ML storage'; + +CREATE FUNCTION aqo_disable_class(queryid bigint) +RETURNS void +AS 'MODULE_PATHNAME', 'aqo_disable_query' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_disable_class(bigint) IS +'Set learn_aqo, use_aqo and auto_tuning into false for a class of queries with specific queryid.'; + +-- +-- Remove query class settings, text, statistics and ML data from AQO storage. +-- Return number of FSS records, removed from the storage. +-- +CREATE FUNCTION aqo_drop_class(queryid bigint) +RETURNS integer +AS 'MODULE_PATHNAME', 'aqo_drop_class' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_drop_class(bigint) IS +'Remove info about an query class from AQO ML knowledge base.'; + +CREATE FUNCTION aqo_enable_class(queryid bigint) +RETURNS void +AS 'MODULE_PATHNAME', 'aqo_enable_query' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_enable_class(bigint) IS +'Set learn_aqo, use_aqo and auto_tuning (in intelligent mode) into true for a class of queries with specific queryid.'; + +-- +-- Show execution time of queries, for which AQO has statistics. +-- controlled - show stat on executions where AQO was used for cardinality +-- estimations, or not used (controlled = false). +-- Last case is possible in disabled mode with aqo.force_collect_stat = 'on'. +-- +CREATE FUNCTION aqo_execution_time(controlled boolean) +RETURNS TABLE(num integer, id bigint, fshash bigint, exec_time double precision, nexecs bigint) +AS 'MODULE_PATHNAME', 'aqo_execution_time' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_execution_time(boolean) IS +'Get execution time of queries. If controlled = true (AQO could advise cardinality estimations), show time of last execution attempt. Another case (AQO not used), return an average value of execution time across all known executions.'; + +CREATE FUNCTION aqo_memory_usage( + OUT name text, + OUT allocated_size int, + OUT used_size int +) +RETURNS SETOF record +AS $$ + SELECT name, total_bytes, used_bytes FROM pg_backend_memory_contexts + WHERE name LIKE 'AQO%' + UNION + SELECT name, allocated_size, size FROM pg_shmem_allocations + WHERE name LIKE 'AQO%'; +$$ LANGUAGE SQL; +COMMENT ON FUNCTION aqo_memory_usage() IS +'Show allocated sizes and used sizes of aqo`s memory contexts and hash tables'; + +-- +-- Update or insert an aqo_data +-- table record for given 'fs' & 'fss'. +-- + +CREATE FUNCTION aqo_data_update( + fs bigint, + fss integer, + nfeatures integer, + features double precision[][], + targets double precision[], + reliability double precision[], + oids Oid[]) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_data_update' +LANGUAGE C VOLATILE; + +CREATE FUNCTION aqo_queries_update( + queryid bigint, fs bigint, learn_aqo bool, use_aqo bool, auto_tuning bool) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_queries_update' +LANGUAGE C VOLATILE; + +-- +-- Update or insert an aqo_query_stat +-- table record for given 'queryid'. +-- +CREATE FUNCTION aqo_query_stat_update( + queryid bigint, + execution_time_with_aqo double precision[], + execution_time_without_aqo double precision[], + planning_time_with_aqo double precision[], + planning_time_without_aqo double precision[], + cardinality_error_with_aqo double precision[], + cardinality_error_without_aqo double precision[], + executions_with_aqo bigint, + executions_without_aqo bigint) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_query_stat_update' +LANGUAGE C VOLATILE; + +-- +-- Update or insert an aqo_query_texts +-- table record for given 'queryid'. +-- +CREATE FUNCTION aqo_query_texts_update( + queryid bigint, query_text text) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_query_texts_update' +LANGUAGE C VOLATILE; + +-- +-- Remove all records in the AQO storage. +-- Return number of rows removed. +-- +CREATE FUNCTION aqo_reset() RETURNS bigint +AS 'MODULE_PATHNAME', 'aqo_reset' +LANGUAGE C PARALLEL SAFE; +COMMENT ON FUNCTION aqo_reset() IS +'Reset all data gathered by AQO'; + +-- ----------------------------------------------------------------------------- +-- +-- VIEWs +-- +-- ----------------------------------------------------------------------------- + +CREATE FUNCTION aqo_data ( + OUT fs bigint, + OUT fss integer, + OUT nfeatures integer, + OUT features double precision[][], + OUT targets double precision[], + OUT reliability double precision[], + OUT oids Oid[] +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_data' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE FUNCTION aqo_queries ( + OUT queryid bigint, + OUT fs bigint, + OUT learn_aqo boolean, + OUT use_aqo boolean, + OUT auto_tuning boolean, + OUT smart_timeout bigint, + OUT count_increase_timeout bigint +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_queries' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE FUNCTION aqo_query_stat ( + OUT queryid bigint, + OUT execution_time_with_aqo double precision[], + OUT execution_time_without_aqo double precision[], + OUT planning_time_with_aqo double precision[], + OUT planning_time_without_aqo double precision[], + OUT cardinality_error_with_aqo double precision[], + OUT cardinality_error_without_aqo double precision[], + OUT executions_with_aqo bigint, + OUT executions_without_aqo bigint +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_query_stat' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE FUNCTION aqo_query_texts(OUT queryid bigint, OUT query_text text) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_query_texts' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE VIEW aqo_data AS SELECT * FROM aqo_data(); +CREATE VIEW aqo_queries AS SELECT * FROM aqo_queries(); +CREATE VIEW aqo_query_stat AS SELECT * FROM aqo_query_stat(); +CREATE VIEW aqo_query_texts AS SELECT * FROM aqo_query_texts(); diff --git a/expected/aqo_CVE-2020-14350.out b/expected/aqo_CVE-2020-14350.out index 8685b935..5deb45ae 100644 --- a/expected/aqo_CVE-2020-14350.out +++ b/expected/aqo_CVE-2020-14350.out @@ -49,51 +49,32 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_status(hash bigint) -RETURNS TABLE ( - "learn" BOOL, - "use aqo" BOOL, - "auto tune" BOOL, - "fspace hash" bigINT, - "t_naqo" TEXT, - "err_naqo" TEXT, - "iters" BIGINT, - "t_aqo" TEXT, - "err_aqo" TEXT, - "iters_aqo" BIGINT -) +CREATE FUNCTION aqo_reset() +RETURNS bigint AS $$ BEGIN + RETURN 1; END $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_status" already exists with same argument types +ERROR: function "aqo_reset" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_status(hash bigint) -RETURNS TABLE ( - "learn" BOOL, - "use aqo" BOOL, - "auto tune" BOOL, - "fspace hash" bigINT, - "t_naqo" TEXT, - "err_naqo" TEXT, - "iters" BIGINT, - "t_aqo" TEXT, - "err_aqo" TEXT, - "iters_aqo" BIGINT -) +CREATE OR REPLACE FUNCTION aqo_reset() +RETURNS bigint AS $$ BEGIN ALTER ROLE regress_hacker SUPERUSER; + RETURN 2; END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_status(42); - aqo_status ------------- -(0 rows) +SELECT aqo_reset(); + aqo_reset +----------- + 2 +(1 row) SET ROLE regress_hacker; SHOW is_superuser; @@ -103,7 +84,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_status(bigint); +DROP FUNCTION aqo_reset(); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 3 @@ -208,29 +189,31 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_clear_hist(hash bigint) -RETURNS VOID +CREATE FUNCTION aqo_drop_class(queryid bigint) +RETURNS integer AS $$ BEGIN + RETURN 1; END $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_clear_hist" already exists with same argument types +ERROR: function "aqo_drop_class" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_clear_hist(hash bigint) -RETURNS VOID +CREATE OR REPLACE FUNCTION aqo_drop_class(queryid bigint) +RETURNS integer AS $$ BEGIN ALTER ROLE regress_hacker SUPERUSER; + RETURN 2; END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_clear_hist(42); - aqo_clear_hist +SELECT aqo_drop_class(42); + aqo_drop_class ---------------- - + 2 (1 row) SET ROLE regress_hacker; @@ -241,7 +224,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_clear_hist(bigint); +DROP FUNCTION aqo_drop_class(bigint); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 6 @@ -254,8 +237,8 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_drop(hash bigint) -RETURNS VOID +CREATE FUNCTION aqo_execution_time(controlled boolean) +RETURNS TABLE(num integer, id bigint, fshash bigint, exec_time double precision, nexecs bigint) AS $$ BEGIN END @@ -263,21 +246,20 @@ $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_drop" already exists with same argument types +ERROR: function "aqo_execution_time" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_drop(hash bigint) -RETURNS VOID +CREATE OR REPLACE FUNCTION aqo_execution_time(controlled boolean) +RETURNS TABLE(num integer, id bigint, fshash bigint, exec_time double precision, nexecs bigint) AS $$ BEGIN ALTER ROLE regress_hacker SUPERUSER; END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_drop(42); - aqo_drop ----------- - -(1 row) +SELECT aqo_execution_time(true); + aqo_execution_time +-------------------- +(0 rows) SET ROLE regress_hacker; SHOW is_superuser; @@ -287,7 +269,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_drop(bigint); +DROP FUNCTION aqo_execution_time(boolean); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 7 @@ -300,8 +282,8 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_ne_queries() -RETURNS SETOF int +CREATE FUNCTION aqo_memory_usage(OUT name text, OUT allocated_size int, OUT used_size int) +RETURNS SETOF record AS $$ BEGIN END @@ -309,19 +291,19 @@ $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_ne_queries" already exists with same argument types +ERROR: function "aqo_memory_usage" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_ne_queries() -RETURNS SETOF int +CREATE OR REPLACE FUNCTION aqo_memory_usage(OUT name text, OUT allocated_size int, OUT used_size int) +RETURNS SETOF record AS $$ BEGIN ALTER ROLE regress_hacker SUPERUSER; END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_ne_queries(); - aqo_ne_queries ----------------- +SELECT aqo_memory_usage(); + aqo_memory_usage +------------------ (0 rows) SET ROLE regress_hacker; @@ -332,43 +314,9 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_ne_queries(); +DROP FUNCTION aqo_memory_usage(); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping --- Test 8 -RESET ROLE; -ALTER ROLE regress_hacker NOSUPERUSER; -SET ROLE regress_hacker; -SHOW is_superuser; - is_superuser --------------- - off -(1 row) - -CREATE FUNCTION aqo_migrate_to_1_1_get_pk(rel text) -RETURNS regclass -AS $$ -DECLARE - ret regclass; -BEGIN - ALTER ROLE regress_hacker SUPERUSER; - SELECT * FROM aqo_migrate_to_1_1_get_pk(rel::regclass) INTO ret; - RETURN ret; -END -$$ LANGUAGE plpgsql; -RESET ROLE; -CREATE EXTENSION aqo; --- Test result (must be 'off') -SET ROLE regress_hacker; -SHOW is_superuser; - is_superuser --------------- - off -(1 row) - -RESET ROLE; -DROP FUNCTION aqo_migrate_to_1_1_get_pk(text); -DROP EXTENSION IF EXISTS aqo; -- Cleanup RESET ROLE; DROP OWNED BY regress_hacker CASCADE; diff --git a/sql/aqo_CVE-2020-14350.sql b/sql/aqo_CVE-2020-14350.sql index 75833223..c4979344 100644 --- a/sql/aqo_CVE-2020-14350.sql +++ b/sql/aqo_CVE-2020-14350.sql @@ -44,21 +44,11 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_status(hash bigint) -RETURNS TABLE ( - "learn" BOOL, - "use aqo" BOOL, - "auto tune" BOOL, - "fspace hash" bigINT, - "t_naqo" TEXT, - "err_naqo" TEXT, - "iters" BIGINT, - "t_aqo" TEXT, - "err_aqo" TEXT, - "iters_aqo" BIGINT -) +CREATE FUNCTION aqo_reset() +RETURNS bigint AS $$ BEGIN + RETURN 1; END $$ LANGUAGE plpgsql; @@ -67,33 +57,23 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_status(hash bigint) -RETURNS TABLE ( - "learn" BOOL, - "use aqo" BOOL, - "auto tune" BOOL, - "fspace hash" bigINT, - "t_naqo" TEXT, - "err_naqo" TEXT, - "iters" BIGINT, - "t_aqo" TEXT, - "err_aqo" TEXT, - "iters_aqo" BIGINT -) +CREATE OR REPLACE FUNCTION aqo_reset() +RETURNS bigint AS $$ BEGIN ALTER ROLE regress_hacker SUPERUSER; + RETURN 2; END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_status(42); +SELECT aqo_reset(); SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_status(bigint); +DROP FUNCTION aqo_reset(); DROP EXTENSION IF EXISTS aqo; -- Test 3 @@ -177,10 +157,11 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_clear_hist(hash bigint) -RETURNS VOID +CREATE FUNCTION aqo_drop_class(queryid bigint) +RETURNS integer AS $$ BEGIN + RETURN 1; END $$ LANGUAGE plpgsql; @@ -189,22 +170,23 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_clear_hist(hash bigint) -RETURNS VOID +CREATE OR REPLACE FUNCTION aqo_drop_class(queryid bigint) +RETURNS integer AS $$ BEGIN ALTER ROLE regress_hacker SUPERUSER; + RETURN 2; END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_clear_hist(42); +SELECT aqo_drop_class(42); SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_clear_hist(bigint); +DROP FUNCTION aqo_drop_class(bigint); DROP EXTENSION IF EXISTS aqo; -- Test 6 @@ -214,8 +196,8 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_drop(hash bigint) -RETURNS VOID +CREATE FUNCTION aqo_execution_time(controlled boolean) +RETURNS TABLE(num integer, id bigint, fshash bigint, exec_time double precision, nexecs bigint) AS $$ BEGIN END @@ -226,8 +208,8 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_drop(hash bigint) -RETURNS VOID +CREATE OR REPLACE FUNCTION aqo_execution_time(controlled boolean) +RETURNS TABLE(num integer, id bigint, fshash bigint, exec_time double precision, nexecs bigint) AS $$ BEGIN ALTER ROLE regress_hacker SUPERUSER; @@ -235,13 +217,13 @@ END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_drop(42); +SELECT aqo_execution_time(true); SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_drop(bigint); +DROP FUNCTION aqo_execution_time(boolean); DROP EXTENSION IF EXISTS aqo; -- Test 7 @@ -251,8 +233,8 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_ne_queries() -RETURNS SETOF int +CREATE FUNCTION aqo_memory_usage(OUT name text, OUT allocated_size int, OUT used_size int) +RETURNS SETOF record AS $$ BEGIN END @@ -263,52 +245,22 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_ne_queries() -RETURNS SETOF int -AS $$ -BEGIN - ALTER ROLE regress_hacker SUPERUSER; -END -$$ LANGUAGE plpgsql; - -RESET ROLE; -SELECT aqo_ne_queries(); - -SET ROLE regress_hacker; -SHOW is_superuser; - -RESET ROLE; -DROP FUNCTION aqo_ne_queries(); -DROP EXTENSION IF EXISTS aqo; - --- Test 8 -RESET ROLE; -ALTER ROLE regress_hacker NOSUPERUSER; - -SET ROLE regress_hacker; -SHOW is_superuser; - -CREATE FUNCTION aqo_migrate_to_1_1_get_pk(rel text) -RETURNS regclass +CREATE OR REPLACE FUNCTION aqo_memory_usage(OUT name text, OUT allocated_size int, OUT used_size int) +RETURNS SETOF record AS $$ -DECLARE - ret regclass; BEGIN ALTER ROLE regress_hacker SUPERUSER; - SELECT * FROM aqo_migrate_to_1_1_get_pk(rel::regclass) INTO ret; - RETURN ret; END $$ LANGUAGE plpgsql; RESET ROLE; -CREATE EXTENSION aqo; +SELECT aqo_memory_usage(); --- Test result (must be 'off') SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_migrate_to_1_1_get_pk(text); +DROP FUNCTION aqo_memory_usage(); DROP EXTENSION IF EXISTS aqo; -- Cleanup From 3207d62f4edb3b839f27c13b5a4ca074daa29716 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Fri, 24 Mar 2023 08:28:48 +0500 Subject: [PATCH 179/203] Bugfix. Do away with possible conflict of hooks, declared as 'extern' in different libraries. To avoid such a problem in future, refactor AQO interfaces: declare all hooks as static, reduce number of exporting functions and introduce concept of *_init() function for a module that needs some actions in the PG_init() routine. Reviewed by: @Anisimov-ds P.S.: being cherry-picked from stable14 the commit changed drastically because of huge differences between the PG cores... --- aqo--1.6.sql | 6 +- aqo.c | 57 ++------------ aqo.h | 56 ++------------ aqo_shared.c | 31 ++++++-- aqo_shared.h | 7 +- cardinality_hooks.c | 184 +++++++++++++++++--------------------------- cardinality_hooks.h | 31 -------- hash.h | 5 ++ path_utils.c | 55 ++++++++----- path_utils.h | 12 +-- postprocessing.c | 159 +++++++++++++++++++++----------------- preprocessing.c | 54 ++++--------- preprocessing.h | 12 --- storage.c | 2 +- storage.h | 6 ++ 15 files changed, 259 insertions(+), 418 deletions(-) delete mode 100644 cardinality_hooks.h delete mode 100644 preprocessing.h diff --git a/aqo--1.6.sql b/aqo--1.6.sql index bb44cf22..90d4fb06 100644 --- a/aqo--1.6.sql +++ b/aqo--1.6.sql @@ -75,6 +75,7 @@ LANGUAGE C STRICT VOLATILE; COMMENT ON FUNCTION aqo_execution_time(boolean) IS 'Get execution time of queries. If controlled = true (AQO could advise cardinality estimations), show time of last execution attempt. Another case (AQO not used), return an average value of execution time across all known executions.'; +-- Show how much shared memory AQO are using at the moment CREATE FUNCTION aqo_memory_usage( OUT name text, OUT allocated_size int, @@ -82,14 +83,11 @@ CREATE FUNCTION aqo_memory_usage( ) RETURNS SETOF record AS $$ - SELECT name, total_bytes, used_bytes FROM pg_backend_memory_contexts - WHERE name LIKE 'AQO%' - UNION SELECT name, allocated_size, size FROM pg_shmem_allocations WHERE name LIKE 'AQO%'; $$ LANGUAGE SQL; COMMENT ON FUNCTION aqo_memory_usage() IS -'Show allocated sizes and used sizes of aqo`s memory contexts and hash tables'; +'Show how much shared memory AQO are using at the moment'; -- -- Update or insert an aqo_data diff --git a/aqo.c b/aqo.c index 743f9ef6..33b56737 100644 --- a/aqo.c +++ b/aqo.c @@ -19,9 +19,7 @@ #include "aqo.h" #include "aqo_shared.h" -#include "cardinality_hooks.h" #include "path_utils.h" -#include "preprocessing.h" #include "storage.h" @@ -98,19 +96,6 @@ MemoryContext AQOLearnMemCtx = NULL; /* Additional plan info */ int njoins; -/* Saved hook values */ -post_parse_analyze_hook_type prev_post_parse_analyze_hook; -planner_hook_type prev_planner_hook; -ExecutorStart_hook_type prev_ExecutorStart_hook; -ExecutorRun_hook_type prev_ExecutorRun; -ExecutorEnd_hook_type prev_ExecutorEnd_hook; -set_baserel_rows_estimate_hook_type prev_set_foreign_rows_estimate_hook; -set_baserel_rows_estimate_hook_type prev_set_baserel_rows_estimate_hook; -get_parameterized_baserel_size_hook_type prev_get_parameterized_baserel_size_hook; -set_joinrel_size_estimates_hook_type prev_set_joinrel_size_estimates_hook; -get_parameterized_joinrel_size_hook_type prev_get_parameterized_joinrel_size_hook; -ExplainOnePlan_hook_type prev_ExplainOnePlan_hook; -ExplainOneNode_hook_type prev_ExplainOneNode_hook; /***************************************************************************** * @@ -324,42 +309,11 @@ _PG_init(void) NULL, NULL); - prev_shmem_startup_hook = shmem_startup_hook; - shmem_startup_hook = aqo_init_shmem; - prev_planner_hook = planner_hook; - planner_hook = aqo_planner; - prev_ExecutorStart_hook = ExecutorStart_hook; - ExecutorStart_hook = aqo_ExecutorStart; - prev_ExecutorRun = ExecutorRun_hook; - ExecutorRun_hook = aqo_ExecutorRun; - prev_ExecutorEnd_hook = ExecutorEnd_hook; - ExecutorEnd_hook = aqo_ExecutorEnd; - - /* Cardinality prediction hooks. */ - prev_set_baserel_rows_estimate_hook = set_baserel_rows_estimate_hook; - set_foreign_rows_estimate_hook = aqo_set_baserel_rows_estimate; - set_baserel_rows_estimate_hook = aqo_set_baserel_rows_estimate; - prev_get_parameterized_baserel_size_hook = get_parameterized_baserel_size_hook; - get_parameterized_baserel_size_hook = aqo_get_parameterized_baserel_size; - prev_set_joinrel_size_estimates_hook = set_joinrel_size_estimates_hook; - set_joinrel_size_estimates_hook = aqo_set_joinrel_size_estimates; - prev_get_parameterized_joinrel_size_hook = get_parameterized_joinrel_size_hook; - get_parameterized_joinrel_size_hook = aqo_get_parameterized_joinrel_size; - prev_estimate_num_groups_hook = estimate_num_groups_hook; - estimate_num_groups_hook = aqo_estimate_num_groups_hook; - parampathinfo_postinit_hook = ppi_hook; - - prev_create_plan_hook = create_plan_hook; - create_plan_hook = aqo_create_plan_hook; - - /* Service hooks. */ - prev_ExplainOnePlan_hook = ExplainOnePlan_hook; - ExplainOnePlan_hook = print_into_explain; - prev_ExplainOneNode_hook = ExplainOneNode_hook; - ExplainOneNode_hook = print_node_explain; - - prev_create_upper_paths_hook = create_upper_paths_hook; - create_upper_paths_hook = aqo_store_upper_signature_hook; + aqo_shmem_init(); + aqo_preprocessing_init(); + aqo_postprocessing_init(); + aqo_cardinality_hooks_init(); + aqo_path_utils_init(); init_deactivated_queries_storage(); @@ -394,7 +348,6 @@ _PG_init(void) RegisterAQOPlanNodeMethods(); EmitWarningsOnPlaceholders("aqo"); - RequestAddinShmemSpace(aqo_memsize()); } /* diff --git a/aqo.h b/aqo.h index 9600b136..6f57a4d1 100644 --- a/aqo.h +++ b/aqo.h @@ -132,7 +132,6 @@ #include "nodes/nodeFuncs.h" #include "optimizer/pathnode.h" #include "optimizer/planner.h" -#include "optimizer/cost.h" #include "parser/analyze.h" #include "parser/parsetree.h" #include "utils/builtins.h" @@ -140,11 +139,9 @@ #include "utils/hsearch.h" #include "utils/memutils.h" #include "utils/rel.h" -#include "utils/fmgroids.h" #include "utils/snapmgr.h" #include "machine_learning.h" -//#include "storage.h" /* Check PostgreSQL version (9.6.0 contains important changes in planner) */ #if PG_VERSION_NUM < 90600 @@ -237,58 +234,15 @@ extern MemoryContext AQOCacheMemCtx; extern MemoryContext AQOPredictMemCtx; extern MemoryContext AQOLearnMemCtx; -/* Saved hook values in case of unload */ -extern post_parse_analyze_hook_type prev_post_parse_analyze_hook; -extern planner_hook_type prev_planner_hook; -extern ExecutorStart_hook_type prev_ExecutorStart_hook; -extern ExecutorRun_hook_type prev_ExecutorRun; -extern ExecutorEnd_hook_type prev_ExecutorEnd_hook; -extern set_baserel_rows_estimate_hook_type - prev_set_foreign_rows_estimate_hook; -extern set_baserel_rows_estimate_hook_type - prev_set_baserel_rows_estimate_hook; -extern get_parameterized_baserel_size_hook_type - prev_get_parameterized_baserel_size_hook; -extern set_joinrel_size_estimates_hook_type - prev_set_joinrel_size_estimates_hook; -extern get_parameterized_joinrel_size_hook_type - prev_get_parameterized_joinrel_size_hook; -extern ExplainOnePlan_hook_type prev_ExplainOnePlan_hook; -extern ExplainOneNode_hook_type prev_ExplainOneNode_hook; - -extern void ppi_hook(ParamPathInfo *ppi); extern int aqo_statement_timeout; -/* Hash functions */ -void get_eclasses(List *clauselist, int *nargs, int **args_hash, - int **eclass_hash); -int get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash); - - -/* Storage interaction */ -extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids); -extern bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids); - -/* Query preprocessing hooks */ -extern void print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, - ExplainState *es, const char *queryString, - ParamListInfo params, - const instr_time *planduration, - QueryEnvironment *queryEnv); -extern void print_node_explain(ExplainState *es, PlanState *ps, Plan *plan); - /* Cardinality estimation */ extern double predict_for_relation(List *restrict_clauses, List *selectivities, List *relsigns, int *fss); -/* Query execution statistics collecting hooks */ -void aqo_ExecutorStart(QueryDesc *queryDesc, int eflags); -void aqo_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, - uint64 count, bool execute_once); -void aqo_ExecutorEnd(QueryDesc *queryDesc); - /* Automatic query tuning */ extern void automatical_query_tuning(uint64 query_hash, struct StatEntry *stat); +extern double get_mean(double *elems, int nelems); /* Utilities */ extern int int_cmp(const void *a, const void *b); @@ -306,8 +260,10 @@ extern void selectivity_cache_clear(void); extern bool IsQueryDisabled(void); -extern bool update_query_timeout(uint64 queryid, int64 smart_timeout); -extern double get_mean(double *elems, int nelems); - extern List *cur_classes; + +extern void aqo_cardinality_hooks_init(void); +extern void aqo_preprocessing_init(void); +extern void aqo_postprocessing_init(void); + #endif diff --git a/aqo_shared.c b/aqo_shared.c index 0a6a8db6..69918020 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -6,27 +6,29 @@ #include "lib/dshash.h" #include "miscadmin.h" +#include "storage/ipc.h" #include "storage/shmem.h" #include "aqo_shared.h" #include "storage.h" -shmem_startup_hook_type prev_shmem_startup_hook = NULL; AQOSharedState *aqo_state = NULL; int fs_max_items = 10000; /* Max number of different feature spaces in ML model */ int fss_max_items = 100000; /* Max number of different feature subspaces in ML model */ +static shmem_startup_hook_type aqo_shmem_startup_next = NULL; + static void on_shmem_shutdown(int code, Datum arg); -void +static void aqo_init_shmem(void) { bool found; HASHCTL info; - if (prev_shmem_startup_hook) - prev_shmem_startup_hook(); + if (aqo_shmem_startup_next) + aqo_shmem_startup_next(); aqo_state = NULL; stat_htab = NULL; @@ -116,10 +118,14 @@ on_shmem_shutdown(int code, Datum arg) return; } -Size -aqo_memsize(void) + +/* + * Requests any additional shared memory required for aqo. + */ +static void +aqo_shmem_request(void) { - Size size; + Size size; size = MAXALIGN(sizeof(AQOSharedState)); size = add_size(size, hash_estimate_size(fs_max_items, sizeof(AQOSharedState))); @@ -128,5 +134,14 @@ aqo_memsize(void) size = add_size(size, hash_estimate_size(fss_max_items, sizeof(DataEntry))); size = add_size(size, hash_estimate_size(fs_max_items, sizeof(QueriesEntry))); - return size; + RequestAddinShmemSpace(size); +} + +void +aqo_shmem_init(void) +{ + aqo_shmem_startup_next = shmem_startup_hook; + shmem_startup_hook = aqo_init_shmem; + + aqo_shmem_request(); } diff --git a/aqo_shared.h b/aqo_shared.h index e922fb1c..ee9e3087 100644 --- a/aqo_shared.h +++ b/aqo_shared.h @@ -1,9 +1,6 @@ #ifndef AQO_SHARED_H #define AQO_SHARED_H -#include "lib/dshash.h" -#include "storage/dsm.h" -#include "storage/ipc.h" #include "storage/lwlock.h" #include "utils/dsa.h" @@ -31,13 +28,11 @@ typedef struct AQOSharedState } AQOSharedState; -extern shmem_startup_hook_type prev_shmem_startup_hook; extern AQOSharedState *aqo_state; extern int fs_max_items; /* Max number of feature spaces that AQO can operate */ extern int fss_max_items; -extern Size aqo_memsize(void); -extern void aqo_init_shmem(void); +extern void aqo_shmem_init(void); #endif /* AQO_SHARED_H */ diff --git a/cardinality_hooks.c b/cardinality_hooks.c index 049f674f..95785668 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -27,115 +27,31 @@ #include "postgres.h" +#include "optimizer/cost.h" +#include "utils/selfuncs.h" + #include "aqo.h" -#include "cardinality_hooks.h" #include "hash.h" #include "machine_learning.h" #include "path_utils.h" - -estimate_num_groups_hook_type prev_estimate_num_groups_hook = NULL; +#include "storage.h" double predicted_ppi_rows; double fss_ppi_hash; - -/* - * Calls standard set_baserel_rows_estimate or its previous hook. - */ -static void -default_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) -{ - if (prev_set_baserel_rows_estimate_hook) - prev_set_baserel_rows_estimate_hook(root, rel); - else - set_baserel_rows_estimate_standard(root, rel); -} - -/* - * Calls standard get_parameterized_baserel_size or its previous hook. - */ -static double -default_get_parameterized_baserel_size(PlannerInfo *root, - RelOptInfo *rel, - List *param_clauses) -{ - if (prev_get_parameterized_baserel_size_hook) - return prev_get_parameterized_baserel_size_hook(root, rel, param_clauses); - else - return get_parameterized_baserel_size_standard(root, rel, param_clauses); -} - -/* - * Calls standard get_parameterized_joinrel_size or its previous hook. - */ -static double -default_get_parameterized_joinrel_size(PlannerInfo *root, - RelOptInfo *rel, - Path *outer_path, - Path *inner_path, - SpecialJoinInfo *sjinfo, - List *restrict_clauses) -{ - if (prev_get_parameterized_joinrel_size_hook) - return prev_get_parameterized_joinrel_size_hook(root, rel, - outer_path, - inner_path, - sjinfo, - restrict_clauses); - else - return get_parameterized_joinrel_size_standard(root, rel, - outer_path, - inner_path, - sjinfo, - restrict_clauses); -} - -/* - * Calls standard set_joinrel_size_estimates or its previous hook. - */ -static void -default_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, - RelOptInfo *outer_rel, - RelOptInfo *inner_rel, - SpecialJoinInfo *sjinfo, - List *restrictlist) -{ - if (prev_set_joinrel_size_estimates_hook) - prev_set_joinrel_size_estimates_hook(root, rel, - outer_rel, - inner_rel, - sjinfo, - restrictlist); - else - set_joinrel_size_estimates_standard(root, rel, - outer_rel, - inner_rel, - sjinfo, - restrictlist); -} - -static double -default_estimate_num_groups(PlannerInfo *root, List *groupExprs, - Path *subpath, RelOptInfo *grouped_rel, - List **pgset) -{ - double input_rows = subpath->rows; - - if (prev_estimate_num_groups_hook != NULL) - return (*prev_estimate_num_groups_hook)(root, groupExprs, - subpath, - grouped_rel, - pgset); - else - return estimate_num_groups(root, groupExprs, input_rows, pgset); -} +static set_baserel_rows_estimate_hook_type aqo_set_baserel_rows_estimate_next = NULL; +static get_parameterized_baserel_size_hook_type aqo_get_parameterized_baserel_size_next = NULL; +static set_joinrel_size_estimates_hook_type aqo_set_joinrel_size_estimates_next = NULL; +static get_parameterized_joinrel_size_hook_type aqo_get_parameterized_joinrel_size_next = NULL; +static set_parampathinfo_postinit_hook_type aqo_set_parampathinfo_postinit_next = NULL; +static estimate_num_groups_hook_type aqo_estimate_num_groups_next = NULL; /* * Our hook for setting baserel rows estimate. * Extracts clauses, their selectivities and list of relation relids and * passes them to predict_for_relation. */ -void +static void aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) { double predicted; @@ -187,13 +103,15 @@ aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) default_estimator: rel->predicted_cardinality = -1.; - default_set_baserel_rows_estimate(root, rel); + aqo_set_baserel_rows_estimate_next(root, rel); } - -void -ppi_hook(ParamPathInfo *ppi) +static void +aqo_parampathinfo_postinit(ParamPathInfo *ppi) { + if (aqo_set_parampathinfo_postinit_next) + (*aqo_set_parampathinfo_postinit_next)(ppi); + if (IsQueryDisabled()) return; @@ -206,7 +124,7 @@ ppi_hook(ParamPathInfo *ppi) * Extracts clauses (including parametrization ones), their selectivities * and list of relation relids and passes them to predict_for_relation. */ -double +static double aqo_get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, List *param_clauses) @@ -284,7 +202,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, return predicted; default_estimator: - return default_get_parameterized_baserel_size(root, rel, param_clauses); + return aqo_get_parameterized_baserel_size_next(root, rel, param_clauses); } /* @@ -292,7 +210,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, * Extracts clauses, their selectivities and list of relation relids and * passes them to predict_for_relation. */ -void +static void aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, RelOptInfo *outer_rel, RelOptInfo *inner_rel, @@ -354,9 +272,8 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, default_estimator: rel->predicted_cardinality = -1; - default_set_joinrel_size_estimates(root, rel, - outer_rel, inner_rel, - sjinfo, restrictlist); + aqo_set_joinrel_size_estimates_next(root, rel, outer_rel, inner_rel, + sjinfo, restrictlist); } /* @@ -364,7 +281,7 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, * Extracts clauses (including parametrization ones), their selectivities * and list of relation relids and passes them to predict_for_relation. */ -double +static double aqo_get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, Path *outer_path, @@ -421,7 +338,7 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, return predicted; default_estimator: - return default_get_parameterized_joinrel_size(root, rel, + return aqo_get_parameterized_joinrel_size_next(root, rel, outer_path, inner_path, sjinfo, clauses); } @@ -460,10 +377,10 @@ predict_num_groups(PlannerInfo *root, Path *subpath, List *group_exprs, return (prediction <= 0) ? -1 : prediction; } -double -aqo_estimate_num_groups_hook(PlannerInfo *root, List *groupExprs, - Path *subpath, RelOptInfo *grouped_rel, - List **pgset) +static double +aqo_estimate_num_groups(PlannerInfo *root, List *groupExprs, + Path *subpath, RelOptInfo *grouped_rel, + List **pgset) { int fss; double predicted; @@ -476,7 +393,7 @@ aqo_estimate_num_groups_hook(PlannerInfo *root, List *groupExprs, /* XXX: Don't support some GROUPING options */ goto default_estimator; - if (prev_estimate_num_groups_hook != NULL) + if (aqo_estimate_num_groups_next != NULL) elog(WARNING, "AQO replaced another estimator of a groups number"); if (groupExprs == NIL) @@ -503,6 +420,45 @@ aqo_estimate_num_groups_hook(PlannerInfo *root, List *groupExprs, MemoryContextSwitchTo(old_ctx_m); default_estimator: - return default_estimate_num_groups(root, groupExprs, subpath, grouped_rel, - pgset); + if (aqo_estimate_num_groups_next) + return aqo_estimate_num_groups_next(root, groupExprs, subpath, + grouped_rel, pgset); + else + return estimate_num_groups(root, groupExprs, subpath->rows, + pgset); +} + +void +aqo_cardinality_hooks_init(void) +{ + + /* Cardinality prediction hooks. */ + aqo_set_baserel_rows_estimate_next = set_baserel_rows_estimate_hook ? + set_baserel_rows_estimate_hook : + set_baserel_rows_estimate_standard; + set_baserel_rows_estimate_hook = aqo_set_baserel_rows_estimate; + + /* XXX: we have a problem here. Should be redesigned later */ + set_foreign_rows_estimate_hook = aqo_set_baserel_rows_estimate; + + aqo_get_parameterized_baserel_size_next = get_parameterized_baserel_size_hook ? + get_parameterized_baserel_size_hook : + get_parameterized_baserel_size_standard; + get_parameterized_baserel_size_hook = aqo_get_parameterized_baserel_size; + + aqo_set_joinrel_size_estimates_next = set_joinrel_size_estimates_hook ? + set_joinrel_size_estimates_hook : + set_joinrel_size_estimates_standard; + set_joinrel_size_estimates_hook = aqo_set_joinrel_size_estimates; + + aqo_get_parameterized_joinrel_size_next = get_parameterized_joinrel_size_hook ? + get_parameterized_joinrel_size_hook : + get_parameterized_joinrel_size_standard; + get_parameterized_joinrel_size_hook = aqo_get_parameterized_joinrel_size; + + aqo_set_parampathinfo_postinit_next = parampathinfo_postinit_hook; + parampathinfo_postinit_hook = aqo_parampathinfo_postinit; + + aqo_estimate_num_groups_next = estimate_num_groups_hook; + estimate_num_groups_hook = aqo_estimate_num_groups; } diff --git a/cardinality_hooks.h b/cardinality_hooks.h deleted file mode 100644 index 0e8c65c0..00000000 --- a/cardinality_hooks.h +++ /dev/null @@ -1,31 +0,0 @@ -#ifndef CARDINALITY_HOOKS_H -#define CARDINALITY_HOOKS_H - -#include "optimizer/planner.h" -#include "utils/selfuncs.h" - -extern estimate_num_groups_hook_type prev_estimate_num_groups_hook; - - -/* Cardinality estimation hooks */ -extern void aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel); -extern double aqo_get_parameterized_baserel_size(PlannerInfo *root, - RelOptInfo *rel, - List *param_clauses); -extern void aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, - RelOptInfo *outer_rel, - RelOptInfo *inner_rel, - SpecialJoinInfo *sjinfo, - List *restrictlist); -extern double aqo_get_parameterized_joinrel_size(PlannerInfo *root, - RelOptInfo *rel, - Path *outer_path, - Path *inner_path, - SpecialJoinInfo *sjinfo, - List *restrict_clauses); -extern double aqo_estimate_num_groups_hook(PlannerInfo *root, List *groupExprs, - Path *subpath, - RelOptInfo *grouped_rel, - List **pgset); - -#endif /* CARDINALITY_HOOKS_H */ diff --git a/hash.h b/hash.h index 01c90bed..419941f6 100644 --- a/hash.h +++ b/hash.h @@ -14,4 +14,9 @@ extern int get_fss_for_object(List *relsigns, List *clauselist, extern int get_int_array_hash(int *arr, int len); extern int get_grouped_exprs_hash(int fss, List *group_exprs); +/* Hash functions */ +void get_eclasses(List *clauselist, int *nargs, int **args_hash, + int **eclass_hash); +int get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash); + #endif /* AQO_HASH_H */ \ No newline at end of file diff --git a/path_utils.c b/path_utils.c index 3ad90065..942f1552 100644 --- a/path_utils.c +++ b/path_utils.c @@ -15,8 +15,11 @@ #include "access/relation.h" #include "nodes/readfuncs.h" +#include "optimizer/cost.h" #include "optimizer/optimizer.h" +#include "optimizer/planmain.h" #include "path_utils.h" +#include "storage/lmgr.h" #include "utils/syscache.h" #include "utils/lsyscache.h" @@ -30,13 +33,6 @@ expression_tree_mutator(node, mutator, context, 0) #endif -/* - * Hook on creation of a plan node. We need to store AQO-specific data to - * support learning stage. - */ -create_plan_hook_type prev_create_plan_hook = NULL; - -create_upper_paths_hook_type prev_create_upper_paths_hook = NULL; static AQOPlanNode DefaultAQOPlanNode = { @@ -54,6 +50,15 @@ static AQOPlanNode DefaultAQOPlanNode = .prediction = -1 }; +/* + * Hook on creation of a plan node. We need to store AQO-specific data to + * support learning stage. + */ +static create_plan_hook_type aqo_create_plan_next = NULL; + +static create_upper_paths_hook_type aqo_create_upper_paths_next = NULL; + + static AQOPlanNode * create_aqo_plan_node() { @@ -180,8 +185,6 @@ hashTempTupleDesc(TupleDesc desc) return s; } -#include "storage/lmgr.h" - /* * Get list of relation indexes and prepare list of permanent table reloids, * list of temporary table reloids (can be changed between query launches) and @@ -531,15 +534,15 @@ is_appropriate_path(Path *path) * store AQO prediction in the same context, as the plan. So, explicitly free * all unneeded data. */ -void -aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) +static void +aqo_create_plan(PlannerInfo *root, Path *src, Plan **dest) { bool is_join_path; Plan *plan = *dest; AQOPlanNode *node; - if (prev_create_plan_hook) - prev_create_plan_hook(root, src, dest); + if (aqo_create_plan_next) + aqo_create_plan_next(root, src, dest); if (!query_context.use_aqo && !query_context.learn_aqo && !query_context.collect_stat) @@ -784,20 +787,20 @@ RegisterAQOPlanNodeMethods(void) * * Assume, that we are last in the chain of path creators. */ -void -aqo_store_upper_signature_hook(PlannerInfo *root, - UpperRelationKind stage, - RelOptInfo *input_rel, - RelOptInfo *output_rel, - void *extra) +static void +aqo_store_upper_signature(PlannerInfo *root, + UpperRelationKind stage, + RelOptInfo *input_rel, + RelOptInfo *output_rel, + void *extra) { A_Const *fss_node = makeNode(A_Const); RelSortOut rels = {NIL, NIL}; List *clauses; List *selectivities; - if (prev_create_upper_paths_hook) - (*prev_create_upper_paths_hook)(root, stage, input_rel, output_rel, extra); + if (aqo_create_upper_paths_next) + (*aqo_create_upper_paths_next)(root, stage, input_rel, output_rel, extra); if (!query_context.use_aqo && !query_context.learn_aqo && !force_collect_stat) /* Includes 'disabled query' state. */ @@ -816,3 +819,13 @@ aqo_store_upper_signature_hook(PlannerInfo *root, NULL, NULL); output_rel->ext_nodes = lappend(output_rel->ext_nodes, (void *) fss_node); } + +void +aqo_path_utils_init(void) +{ + aqo_create_plan_next = create_plan_hook; + create_plan_hook = aqo_create_plan; + + aqo_create_upper_paths_next = create_upper_paths_hook; + create_upper_paths_hook = aqo_store_upper_signature; +} diff --git a/path_utils.h b/path_utils.h index 1803e08d..cbe83da0 100644 --- a/path_utils.h +++ b/path_utils.h @@ -3,7 +3,6 @@ #include "nodes/extensible.h" #include "nodes/pathnodes.h" -#include "optimizer/planmain.h" #include "optimizer/planner.h" #define AQO_PLAN_NODE "AQOPlanNode" @@ -52,8 +51,6 @@ typedef struct AQOPlanNode #define booltostr(x) ((x) ? "true" : "false") -extern create_plan_hook_type prev_create_plan_hook; - /* Extracting path information utilities */ extern List *get_selectivities(PlannerInfo *root, List *clauses, @@ -67,16 +64,11 @@ extern List *get_path_clauses(Path *path, PlannerInfo *root, List **selectivities); -extern void aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest); extern AQOPlanNode *get_aqo_plan_node(Plan *plan, bool create); extern void RegisterAQOPlanNodeMethods(void); -extern create_upper_paths_hook_type prev_create_upper_paths_hook; -extern void aqo_store_upper_signature_hook(PlannerInfo *root, - UpperRelationKind stage, - RelOptInfo *input_rel, - RelOptInfo *output_rel, - void *extra); extern List *aqo_get_clauses(PlannerInfo *root, List *restrictlist); +void aqo_path_utils_init(void); + #endif /* PATH_UTILS_H */ diff --git a/postprocessing.c b/postprocessing.c index aa82a534..d4763955 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -27,7 +27,6 @@ #include "hash.h" #include "path_utils.h" #include "machine_learning.h" -#include "preprocessing.h" #include "storage.h" @@ -58,6 +57,13 @@ static int64 growth_rate = 3; static char *AQOPrivateData = "AQOPrivateData"; static char *PlanStateInfo = "PlanStateInfo"; +/* Saved hooks */ +static ExecutorStart_hook_type aqo_ExecutorStart_next = NULL; +static ExecutorRun_hook_type aqo_ExecutorRun_next = NULL; +static ExecutorEnd_hook_type aqo_ExecutorEnd_next = NULL; +static ExplainOnePlan_hook_type aqo_ExplainOnePlan_next = NULL; +static ExplainOneNode_hook_type aqo_ExplainOneNode_next = NULL; + /* Query execution statistics collecting utilities */ static void atomic_fss_learn_step(uint64 fhash, int fss, OkNNrdata *data, @@ -542,7 +548,7 @@ learnOnPlanState(PlanState *p, void *context) /* * Set up flags to store cardinality statistics. */ -void +static void aqo_ExecutorStart(QueryDesc *queryDesc, int eflags) { instr_time now; @@ -594,10 +600,7 @@ aqo_ExecutorStart(QueryDesc *queryDesc, int eflags) StoreToQueryEnv(queryDesc); } - if (prev_ExecutorStart_hook) - prev_ExecutorStart_hook(queryDesc, eflags); - else - standard_ExecutorStart(queryDesc, eflags); + aqo_ExecutorStart_next(queryDesc, eflags); if (use_aqo) StorePlanInternals(queryDesc); @@ -706,7 +709,7 @@ set_timeout_if_need(QueryDesc *queryDesc) /* * ExecutorRun hook. */ -void +static void aqo_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, bool execute_once) { @@ -722,10 +725,7 @@ aqo_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, PG_TRY(); { - if (prev_ExecutorRun) - prev_ExecutorRun(queryDesc, direction, count, execute_once); - else - standard_ExecutorRun(queryDesc, direction, count, execute_once); + aqo_ExecutorRun_next(queryDesc, direction, count, execute_once); } PG_FINALLY(); { @@ -743,7 +743,7 @@ aqo_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, * cardinality statistics. * Also it updates query execution statistics in aqo_query_stat. */ -void +static void aqo_ExecutorEnd(QueryDesc *queryDesc) { double execution_time; @@ -841,10 +841,7 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) MemoryContextSwitchTo(oldctx); MemoryContextReset(AQOLearnMemCtx); - if (prev_ExecutorEnd_hook) - prev_ExecutorEnd_hook(queryDesc); - else - standard_ExecutorEnd(queryDesc); + aqo_ExecutorEnd_next(queryDesc); /* * standard_ExecutorEnd clears the queryDesc->planstate. After this point no @@ -975,7 +972,64 @@ ExtractFromQueryEnv(QueryDesc *queryDesc) return true; } -void +/* + * Prints if the plan was constructed with AQO. + */ +static void +print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, + ExplainState *es, const char *queryString, + ParamListInfo params, const instr_time *planduration, + QueryEnvironment *queryEnv) +{ + if (aqo_ExplainOnePlan_next) + aqo_ExplainOnePlan_next(plannedstmt, into, es, queryString, + params, planduration, queryEnv); + + if (IsQueryDisabled() || !aqo_show_details) + return; + + /* Report to user about aqo state only in verbose mode */ + ExplainPropertyBool("Using aqo", query_context.use_aqo, es); + + switch (aqo_mode) + { + case AQO_MODE_INTELLIGENT: + ExplainPropertyText("AQO mode", "INTELLIGENT", es); + break; + case AQO_MODE_FORCED: + ExplainPropertyText("AQO mode", "FORCED", es); + break; + case AQO_MODE_CONTROLLED: + ExplainPropertyText("AQO mode", "CONTROLLED", es); + break; + case AQO_MODE_LEARN: + ExplainPropertyText("AQO mode", "LEARN", es); + break; + case AQO_MODE_FROZEN: + ExplainPropertyText("AQO mode", "FROZEN", es); + break; + case AQO_MODE_DISABLED: + ExplainPropertyText("AQO mode", "DISABLED", es); + break; + default: + elog(ERROR, "Bad AQO state"); + break; + } + + /* + * Query class provides an user the conveniently use of the AQO + * auxiliary functions. + */ + if (aqo_mode != AQO_MODE_DISABLED || force_collect_stat) + { + if (aqo_show_hash) + ExplainPropertyInteger("Query hash", NULL, + query_context.query_hash, es); + ExplainPropertyInteger("JOINS", NULL, njoins, es); + } +} + +static void print_node_explain(ExplainState *es, PlanState *ps, Plan *plan) { int wrkrs = 1; @@ -983,8 +1037,8 @@ print_node_explain(ExplainState *es, PlanState *ps, Plan *plan) AQOPlanNode *aqo_node; /* Extension, which took a hook early can be executed early too. */ - if (prev_ExplainOneNode_hook) - prev_ExplainOneNode_hook(es, ps, plan); + if (aqo_ExplainOneNode_next) + aqo_ExplainOneNode_next(es, ps, plan); if (IsQueryDisabled() || !plan || es->format != EXPLAIN_FORMAT_TEXT) return; @@ -1042,59 +1096,20 @@ print_node_explain(ExplainState *es, PlanState *ps, Plan *plan) appendStringInfo(es->str, ", fss=%d", aqo_node->fss); } -/* - * Prints if the plan was constructed with AQO. - */ void -print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, - ExplainState *es, const char *queryString, - ParamListInfo params, const instr_time *planduration, - QueryEnvironment *queryEnv) +aqo_postprocessing_init(void) { - if (prev_ExplainOnePlan_hook) - prev_ExplainOnePlan_hook(plannedstmt, into, es, queryString, - params, planduration, queryEnv); - - if (IsQueryDisabled() || !aqo_show_details) - return; - - /* Report to user about aqo state only in verbose mode */ - ExplainPropertyBool("Using aqo", query_context.use_aqo, es); - - switch (aqo_mode) - { - case AQO_MODE_INTELLIGENT: - ExplainPropertyText("AQO mode", "INTELLIGENT", es); - break; - case AQO_MODE_FORCED: - ExplainPropertyText("AQO mode", "FORCED", es); - break; - case AQO_MODE_CONTROLLED: - ExplainPropertyText("AQO mode", "CONTROLLED", es); - break; - case AQO_MODE_LEARN: - ExplainPropertyText("AQO mode", "LEARN", es); - break; - case AQO_MODE_FROZEN: - ExplainPropertyText("AQO mode", "FROZEN", es); - break; - case AQO_MODE_DISABLED: - ExplainPropertyText("AQO mode", "DISABLED", es); - break; - default: - elog(ERROR, "Bad AQO state"); - break; - } - - /* - * Query class provides an user the conveniently use of the AQO - * auxiliary functions. - */ - if (aqo_mode != AQO_MODE_DISABLED || force_collect_stat) - { - if (aqo_show_hash) - ExplainPropertyInteger("Query hash", NULL, - query_context.query_hash, es); - ExplainPropertyInteger("JOINS", NULL, njoins, es); - } + /* Executor hooks */ + aqo_ExecutorStart_next = ExecutorStart_hook ? ExecutorStart_hook : standard_ExecutorStart; + ExecutorStart_hook = aqo_ExecutorStart; + aqo_ExecutorRun_next = ExecutorRun_hook ? ExecutorRun_hook : standard_ExecutorRun; + ExecutorRun_hook = aqo_ExecutorRun; + aqo_ExecutorEnd_next = ExecutorEnd_hook ? ExecutorEnd_hook : standard_ExecutorEnd; + ExecutorEnd_hook = aqo_ExecutorEnd; + + /* Service hooks. */ + aqo_ExplainOnePlan_next = ExplainOnePlan_hook; + ExplainOnePlan_hook = print_into_explain; + aqo_ExplainOneNode_next = ExplainOneNode_hook; + ExplainOneNode_hook = print_node_explain; } diff --git a/preprocessing.c b/preprocessing.c index 60e599ee..714b06a8 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -64,7 +64,6 @@ #include "parser/scansup.h" #include "aqo.h" #include "hash.h" -#include "preprocessing.h" #include "storage.h" /* List of feature spaces, that are processing in this backend. */ @@ -72,30 +71,12 @@ List *cur_classes = NIL; int aqo_join_threshold = 0; +static planner_hook_type aqo_planner_next = NULL; + +static void disable_aqo_for_query(void); static bool isQueryUsingSystemRelation(Query *query); static bool isQueryUsingSystemRelation_walker(Node *node, void *context); -/* - * Calls standard query planner or its previous hook. - */ -static PlannedStmt * -call_default_planner(Query *parse, - const char *query_string, - int cursorOptions, - ParamListInfo boundParams) -{ - if (prev_planner_hook) - return prev_planner_hook(parse, - query_string, - cursorOptions, - boundParams); - else - return standard_planner(parse, - query_string, - cursorOptions, - boundParams); -} - /* * Can AQO be used for the query? */ @@ -119,10 +100,8 @@ aqoIsEnabled(Query *parse) * Creates an entry in aqo_queries for new type of query if it is * necessary, i. e. AQO mode is "intelligent". */ -PlannedStmt * -aqo_planner(Query *parse, - const char *query_string, - int cursorOptions, +static PlannedStmt * +aqo_planner(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams) { bool query_is_stored = false; @@ -149,10 +128,7 @@ aqo_planner(Query *parse, MemoryContextSwitchTo(oldctx); disable_aqo_for_query(); - return call_default_planner(parse, - query_string, - cursorOptions, - boundParams); + return aqo_planner_next(parse, query_string, cursorOptions, boundParams); } selectivity_cache_clear(); @@ -173,10 +149,7 @@ aqo_planner(Query *parse, MemoryContextSwitchTo(oldctx); disable_aqo_for_query(); - return call_default_planner(parse, - query_string, - cursorOptions, - boundParams); + return aqo_planner_next(parse, query_string, cursorOptions, boundParams); } elog(DEBUG1, "AQO will be used for query '%s', class "UINT64_FORMAT, @@ -345,9 +318,9 @@ aqo_planner(Query *parse, INSTR_TIME_SET_CURRENT(query_context.start_planning_time); { PlannedStmt *stmt; + MemoryContextSwitchTo(oldctx); - stmt = call_default_planner(parse, query_string, - cursorOptions, boundParams); + stmt = aqo_planner_next(parse, query_string, cursorOptions, boundParams); /* Release the memory, allocated for AQO predictions */ MemoryContextReset(AQOPredictMemCtx); @@ -358,7 +331,7 @@ aqo_planner(Query *parse, /* * Turn off all AQO functionality for the current query. */ -void +static void disable_aqo_for_query(void) { query_context.learn_aqo = false; @@ -507,3 +480,10 @@ isQueryUsingSystemRelation_walker(Node *node, void *context) isQueryUsingSystemRelation_walker, context); } + +void +aqo_preprocessing_init(void) +{ + aqo_planner_next = planner_hook ? planner_hook : standard_planner; + planner_hook = aqo_planner; +} \ No newline at end of file diff --git a/preprocessing.h b/preprocessing.h deleted file mode 100644 index f27deb91..00000000 --- a/preprocessing.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef __PREPROCESSING_H__ -#define __PREPROCESSING_H__ - -#include "nodes/pathnodes.h" -#include "nodes/plannodes.h" -extern PlannedStmt *aqo_planner(Query *parse, - const char *query_string, - int cursorOptions, - ParamListInfo boundParams); -extern void disable_aqo_for_query(void); - -#endif /* __PREPROCESSING_H__ */ diff --git a/storage.c b/storage.c index 64bc4373..9e6cf306 100644 --- a/storage.c +++ b/storage.c @@ -22,11 +22,11 @@ #include "funcapi.h" #include "miscadmin.h" #include "pgstat.h" +#include "storage/ipc.h" #include "aqo.h" #include "aqo_shared.h" #include "machine_learning.h" -#include "preprocessing.h" #include "storage.h" diff --git a/storage.h b/storage.h index 35d94336..2b4e4cdd 100644 --- a/storage.h +++ b/storage.h @@ -164,4 +164,10 @@ extern void init_deactivated_queries_storage(void); extern bool query_is_deactivated(uint64 query_hash); extern void add_deactivated_query(uint64 query_hash); +/* Storage interaction */ +extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids); +extern bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids); + +extern bool update_query_timeout(uint64 queryid, int64 smart_timeout); + #endif /* STORAGE_H */ From e1761d4d837dff6d8ab08cbb081c69693c4dd6d0 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Sat, 25 Mar 2023 22:13:15 +0500 Subject: [PATCH 180/203] Enhancement. Report if someone external inserted a hook into the chain of AQO prediction hooks. It isn't a strict rule, but we should know about that. --- cardinality_hooks.c | 100 ++++++++++++++++++++++++++++++-------------- 1 file changed, 68 insertions(+), 32 deletions(-) diff --git a/cardinality_hooks.c b/cardinality_hooks.c index 95785668..1dfe6b65 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -39,6 +39,12 @@ double predicted_ppi_rows; double fss_ppi_hash; +/* + * Cardinality prediction hooks. + * It isn't clear what to do if someone else tries to live in this chain. + * Of course, someone may want to just report some stat or something like that. + * So, it can be legal, sometimees. So far, we only report this fact. + */ static set_baserel_rows_estimate_hook_type aqo_set_baserel_rows_estimate_next = NULL; static get_parameterized_baserel_size_hook_type aqo_get_parameterized_baserel_size_next = NULL; static set_joinrel_size_estimates_hook_type aqo_set_joinrel_size_estimates_next = NULL; @@ -94,12 +100,17 @@ aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) /* Return to the caller's memory context. */ MemoryContextSwitchTo(old_ctx_m); - if (predicted >= 0) - { - rel->rows = predicted; - rel->predicted_cardinality = predicted; - return; - } + if (predicted < 0) + goto default_estimator; + + if ((aqo_set_baserel_rows_estimate_next != set_baserel_rows_estimate_standard || + set_baserel_rows_estimate_hook != aqo_set_baserel_rows_estimate)) + /* It is unclear that to do in situation of such kind. Just report it */ + elog(WARNING, "AQO is in the middle of the set_baserel_rows_estimate_hook chain"); + + rel->rows = predicted; + rel->predicted_cardinality = predicted; + return; default_estimator: rel->predicted_cardinality = -1.; @@ -115,6 +126,11 @@ aqo_parampathinfo_postinit(ParamPathInfo *ppi) if (IsQueryDisabled()) return; + if ((aqo_set_parampathinfo_postinit_next != NULL || + parampathinfo_postinit_hook != aqo_parampathinfo_postinit)) + /* It is unclear that to do in situation of such kind. Just report it */ + elog(WARNING, "AQO is in the middle of the parampathinfo_postinit_hook chain"); + ppi->predicted_ppi_rows = predicted_ppi_rows; ppi->fss_ppi_hash = fss_ppi_hash; } @@ -198,8 +214,15 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, predicted_ppi_rows = predicted; fss_ppi_hash = fss; - if (predicted >= 0) - return predicted; + if (predicted < 0) + goto default_estimator; + + if ((aqo_get_parameterized_baserel_size_next != get_parameterized_baserel_size_standard || + get_parameterized_baserel_size_hook != aqo_get_parameterized_baserel_size)) + /* It is unclear that to do in situation of such kind. Just report it */ + elog(WARNING, "AQO is in the middle of the aqo_get_parameterized_baserel_size_next chain"); + + return predicted; default_estimator: return aqo_get_parameterized_baserel_size_next(root, rel, param_clauses); @@ -263,12 +286,17 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, rel->fss_hash = fss; - if (predicted >= 0) - { - rel->predicted_cardinality = predicted; - rel->rows = predicted; - return; - } + if (predicted < 0) + goto default_estimator; + + if ((aqo_set_joinrel_size_estimates_next != set_joinrel_size_estimates_standard || + set_joinrel_size_estimates_hook != aqo_set_joinrel_size_estimates)) + /* It is unclear that to do in situation of such kind. Just report it */ + elog(WARNING, "AQO is in the middle of the set_joinrel_size_estimates_hook chain"); + + rel->predicted_cardinality = predicted; + rel->rows = predicted; + return; default_estimator: rel->predicted_cardinality = -1; @@ -334,8 +362,15 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, predicted_ppi_rows = predicted; fss_ppi_hash = fss; - if (predicted >= 0) - return predicted; + if (predicted < 0) + goto default_estimator; + + if ((aqo_get_parameterized_joinrel_size_next != get_parameterized_joinrel_size_standard || + get_parameterized_joinrel_size_hook != aqo_get_parameterized_joinrel_size)) + /* It is unclear that to do in situation of such kind. Just report it */ + elog(WARNING, "AQO is in the middle of the get_parameterized_joinrel_size_hook chain"); + + return predicted; default_estimator: return aqo_get_parameterized_joinrel_size_next(root, rel, @@ -393,8 +428,10 @@ aqo_estimate_num_groups(PlannerInfo *root, List *groupExprs, /* XXX: Don't support some GROUPING options */ goto default_estimator; - if (aqo_estimate_num_groups_next != NULL) - elog(WARNING, "AQO replaced another estimator of a groups number"); + if (aqo_estimate_num_groups_next != NULL || + estimate_num_groups_hook != aqo_estimate_num_groups) + /* It is unclear that to do in situation of such kind. Just report it */ + elog(WARNING, "AQO is in the middle of the estimate_num_groups_hook chain"); if (groupExprs == NIL) return 1.0; @@ -431,29 +468,28 @@ aqo_estimate_num_groups(PlannerInfo *root, List *groupExprs, void aqo_cardinality_hooks_init(void) { - - /* Cardinality prediction hooks. */ - aqo_set_baserel_rows_estimate_next = set_baserel_rows_estimate_hook ? - set_baserel_rows_estimate_hook : - set_baserel_rows_estimate_standard; + if (set_baserel_rows_estimate_hook || + set_foreign_rows_estimate_hook || + get_parameterized_baserel_size_hook || + set_joinrel_size_estimates_hook || + get_parameterized_joinrel_size_hook || + parampathinfo_postinit_hook || + estimate_num_groups_hook) + elog(ERROR, "AQO estimation hooks shouldn't be intercepted"); + + aqo_set_baserel_rows_estimate_next = set_baserel_rows_estimate_standard; set_baserel_rows_estimate_hook = aqo_set_baserel_rows_estimate; /* XXX: we have a problem here. Should be redesigned later */ set_foreign_rows_estimate_hook = aqo_set_baserel_rows_estimate; - aqo_get_parameterized_baserel_size_next = get_parameterized_baserel_size_hook ? - get_parameterized_baserel_size_hook : - get_parameterized_baserel_size_standard; + aqo_get_parameterized_baserel_size_next = get_parameterized_baserel_size_standard; get_parameterized_baserel_size_hook = aqo_get_parameterized_baserel_size; - aqo_set_joinrel_size_estimates_next = set_joinrel_size_estimates_hook ? - set_joinrel_size_estimates_hook : - set_joinrel_size_estimates_standard; + aqo_set_joinrel_size_estimates_next = set_joinrel_size_estimates_standard; set_joinrel_size_estimates_hook = aqo_set_joinrel_size_estimates; - aqo_get_parameterized_joinrel_size_next = get_parameterized_joinrel_size_hook ? - get_parameterized_joinrel_size_hook : - get_parameterized_joinrel_size_standard; + aqo_get_parameterized_joinrel_size_next = get_parameterized_joinrel_size_standard; get_parameterized_joinrel_size_hook = aqo_get_parameterized_joinrel_size; aqo_set_parampathinfo_postinit_next = parampathinfo_postinit_hook; From 5b215f224ed34b6234e5eddaa9ff346fc88bfd01 Mon Sep 17 00:00:00 2001 From: Daniil Anisimov Date: Tue, 28 Mar 2023 12:23:02 +0700 Subject: [PATCH 181/203] Fix. Conventionally use of hooks. Also, some arrangement for stable14 added by a.lepikhov --- aqo_shared.c | 2 +- cardinality_hooks.c | 12 ++++++------ path_utils.c | 2 +- postprocessing.c | 10 +++++----- preprocessing.c | 6 +++--- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/aqo_shared.c b/aqo_shared.c index 69918020..b7cfced8 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -28,7 +28,7 @@ aqo_init_shmem(void) HASHCTL info; if (aqo_shmem_startup_next) - aqo_shmem_startup_next(); + (*aqo_shmem_startup_next)(); aqo_state = NULL; stat_htab = NULL; diff --git a/cardinality_hooks.c b/cardinality_hooks.c index 1dfe6b65..1520b4e7 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -114,7 +114,7 @@ aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) default_estimator: rel->predicted_cardinality = -1.; - aqo_set_baserel_rows_estimate_next(root, rel); + (*aqo_set_baserel_rows_estimate_next)(root, rel); } static void @@ -225,7 +225,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, return predicted; default_estimator: - return aqo_get_parameterized_baserel_size_next(root, rel, param_clauses); + return (*aqo_get_parameterized_baserel_size_next)(root, rel, param_clauses); } /* @@ -300,7 +300,7 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, default_estimator: rel->predicted_cardinality = -1; - aqo_set_joinrel_size_estimates_next(root, rel, outer_rel, inner_rel, + (*aqo_set_joinrel_size_estimates_next)(root, rel, outer_rel, inner_rel, sjinfo, restrictlist); } @@ -373,7 +373,7 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, return predicted; default_estimator: - return aqo_get_parameterized_joinrel_size_next(root, rel, + return (*aqo_get_parameterized_joinrel_size_next)(root, rel, outer_path, inner_path, sjinfo, clauses); } @@ -458,8 +458,8 @@ aqo_estimate_num_groups(PlannerInfo *root, List *groupExprs, default_estimator: if (aqo_estimate_num_groups_next) - return aqo_estimate_num_groups_next(root, groupExprs, subpath, - grouped_rel, pgset); + return (*aqo_estimate_num_groups_next)(root, groupExprs, subpath, + grouped_rel, pgset); else return estimate_num_groups(root, groupExprs, subpath->rows, pgset); diff --git a/path_utils.c b/path_utils.c index 942f1552..93b42d98 100644 --- a/path_utils.c +++ b/path_utils.c @@ -542,7 +542,7 @@ aqo_create_plan(PlannerInfo *root, Path *src, Plan **dest) AQOPlanNode *node; if (aqo_create_plan_next) - aqo_create_plan_next(root, src, dest); + (*aqo_create_plan_next)(root, src, dest); if (!query_context.use_aqo && !query_context.learn_aqo && !query_context.collect_stat) diff --git a/postprocessing.c b/postprocessing.c index d4763955..ba2e19e0 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -600,7 +600,7 @@ aqo_ExecutorStart(QueryDesc *queryDesc, int eflags) StoreToQueryEnv(queryDesc); } - aqo_ExecutorStart_next(queryDesc, eflags); + (*aqo_ExecutorStart_next)(queryDesc, eflags); if (use_aqo) StorePlanInternals(queryDesc); @@ -725,7 +725,7 @@ aqo_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, PG_TRY(); { - aqo_ExecutorRun_next(queryDesc, direction, count, execute_once); + (*aqo_ExecutorRun_next)(queryDesc, direction, count, execute_once); } PG_FINALLY(); { @@ -841,7 +841,7 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) MemoryContextSwitchTo(oldctx); MemoryContextReset(AQOLearnMemCtx); - aqo_ExecutorEnd_next(queryDesc); + (*aqo_ExecutorEnd_next)(queryDesc); /* * standard_ExecutorEnd clears the queryDesc->planstate. After this point no @@ -982,7 +982,7 @@ print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, QueryEnvironment *queryEnv) { if (aqo_ExplainOnePlan_next) - aqo_ExplainOnePlan_next(plannedstmt, into, es, queryString, + (*aqo_ExplainOnePlan_next)(plannedstmt, into, es, queryString, params, planduration, queryEnv); if (IsQueryDisabled() || !aqo_show_details) @@ -1038,7 +1038,7 @@ print_node_explain(ExplainState *es, PlanState *ps, Plan *plan) /* Extension, which took a hook early can be executed early too. */ if (aqo_ExplainOneNode_next) - aqo_ExplainOneNode_next(es, ps, plan); + (*aqo_ExplainOneNode_next)(es, ps, plan); if (IsQueryDisabled() || !plan || es->format != EXPLAIN_FORMAT_TEXT) return; diff --git a/preprocessing.c b/preprocessing.c index 714b06a8..01b28a32 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -128,7 +128,7 @@ aqo_planner(Query *parse, const char *query_string, int cursorOptions, MemoryContextSwitchTo(oldctx); disable_aqo_for_query(); - return aqo_planner_next(parse, query_string, cursorOptions, boundParams); + return (*aqo_planner_next)(parse, query_string, cursorOptions, boundParams); } selectivity_cache_clear(); @@ -149,7 +149,7 @@ aqo_planner(Query *parse, const char *query_string, int cursorOptions, MemoryContextSwitchTo(oldctx); disable_aqo_for_query(); - return aqo_planner_next(parse, query_string, cursorOptions, boundParams); + return (*aqo_planner_next)(parse, query_string, cursorOptions, boundParams); } elog(DEBUG1, "AQO will be used for query '%s', class "UINT64_FORMAT, @@ -320,7 +320,7 @@ aqo_planner(Query *parse, const char *query_string, int cursorOptions, PlannedStmt *stmt; MemoryContextSwitchTo(oldctx); - stmt = aqo_planner_next(parse, query_string, cursorOptions, boundParams); + stmt = (*aqo_planner_next)(parse, query_string, cursorOptions, boundParams); /* Release the memory, allocated for AQO predictions */ MemoryContextReset(AQOPredictMemCtx); From 1b33907fe4dcfbca7ef33edb9aca5d38d1a5501b Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Thu, 13 Apr 2023 15:31:17 +0500 Subject: [PATCH 182/203] Skip 'DROP EXTENSION' test in 001_pgbench.pl because of unstability on Windows --- t/001_pgbench.pl | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index cb6b76de..def7786e 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -403,8 +403,16 @@ "); $node->restart(); -$node->command_ok([ 'pgbench', '-T', +# Some specifics of core PostgreSQL pgbench code don't allow to stable pass this +# test on Windows OS. +# See https://www.postgresql.org/message-id/flat/8225e78650dd69f69c8cff37ecce9a09%40postgrespro.ru +SKIP: +{ + skip "Socket allocation issues. ", 1 + if ($windows_os); + $node->command_ok([ 'pgbench', '-T', "50", '-c', "$CLIENTS", '-j', "$THREADS" , '-f', "$bank"], 'Conflicts with an AQO dropping command.'); +} $node->stop(); From efff72473760a19f766b1a0dcda5abff875a8a2f Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Thu, 20 Apr 2023 13:43:48 +0500 Subject: [PATCH 183/203] Enhancement. Buildfarm have detected curious unstability in the parallel_workers test: EXPLAIN of Partial Aggregate sometimes showed 0 rows instead 1. It is a race: parallel workers ran when main process have read all underlying tuples. Use explain without analyze to avoid such a problem. As I see, we don't lose anything important. --- expected/parallel_workers.out | 37 +++++++++++++++++------------------ sql/parallel_workers.sql | 5 ++--- 2 files changed, 20 insertions(+), 22 deletions(-) diff --git a/expected/parallel_workers.out b/expected/parallel_workers.out index 3e408f49..c64aed61 100644 --- a/expected/parallel_workers.out +++ b/expected/parallel_workers.out @@ -68,53 +68,52 @@ WHERE q1.id = q2.id; -- Learning stage -- XXX: Why grouping prediction isn't working here? SELECT str FROM expln(' -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) FROM (SELECT id FROM t WHERE id % 100 = 0 GROUP BY (id)) AS q1, (SELECT max(id) AS id, payload FROM t WHERE id % 101 = 0 GROUP BY (payload)) AS q2 WHERE q1.id = q2.id;') AS str -WHERE str NOT LIKE '%Workers%' AND str NOT LIKE '%Sort Method%' - AND str NOT LIKE '%Gather Merge%'; +WHERE str NOT LIKE '%Workers%'; str -------------------------------------------------------------------------------------------------- - Aggregate (actual rows=1 loops=1) + Aggregate AQO not used - -> Merge Join (actual rows=0 loops=1) + -> Merge Join AQO not used Merge Cond: (q2.id = t_1.id) - -> Sort (actual rows=1 loops=1) + -> Sort Sort Key: q2.id - -> Subquery Scan on q2 (actual rows=1 loops=1) + -> Subquery Scan on q2 AQO not used - -> Finalize GroupAggregate (actual rows=1 loops=1) + -> Finalize GroupAggregate AQO not used Group Key: t.payload + -> Gather Merge AQO not used - -> Partial GroupAggregate (actual rows=1 loops=3) + -> Partial GroupAggregate AQO not used Group Key: t.payload - -> Sort (actual rows=330 loops=3) + -> Sort AQO not used Sort Key: t.payload - -> Parallel Seq Scan on t (actual rows=330 loops=3) - AQO: rows=991, error=0% + -> Parallel Seq Scan on t + AQO: rows=991 Filter: ((id % '101'::numeric) = '0'::numeric) - Rows Removed by Filter: 33003 - -> Group (actual rows=1000 loops=1) + -> Group AQO not used Group Key: t_1.id + -> Gather Merge AQO not used - -> Group (actual rows=333 loops=3) + -> Group AQO not used Group Key: t_1.id - -> Sort (actual rows=333 loops=3) + -> Sort AQO not used Sort Key: t_1.id - -> Parallel Seq Scan on t t_1 (actual rows=333 loops=3) - AQO: rows=991, error=-1% + -> Parallel Seq Scan on t t_1 + AQO: rows=991 Filter: ((id % '100'::numeric) = '0'::numeric) - Rows Removed by Filter: 33000 Using aqo: true AQO mode: LEARN JOINS: 1 diff --git a/sql/parallel_workers.sql b/sql/parallel_workers.sql index 2cd04bc2..419f23e6 100644 --- a/sql/parallel_workers.sql +++ b/sql/parallel_workers.sql @@ -43,14 +43,13 @@ SELECT count(*) FROM WHERE q1.id = q2.id; -- Learning stage -- XXX: Why grouping prediction isn't working here? SELECT str FROM expln(' -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) FROM (SELECT id FROM t WHERE id % 100 = 0 GROUP BY (id)) AS q1, (SELECT max(id) AS id, payload FROM t WHERE id % 101 = 0 GROUP BY (payload)) AS q2 WHERE q1.id = q2.id;') AS str -WHERE str NOT LIKE '%Workers%' AND str NOT LIKE '%Sort Method%' - AND str NOT LIKE '%Gather Merge%'; +WHERE str NOT LIKE '%Workers%'; RESET parallel_tuple_cost; RESET parallel_setup_cost; From 8f42e12ec3a57418448b4032e89e19754753ad38 Mon Sep 17 00:00:00 2001 From: Daniil Anisimov Date: Tue, 11 Apr 2023 01:16:24 +0700 Subject: [PATCH 184/203] Bugfix. Correctly use of a routine for joins counting. --- expected/aqo_fdw.out | 2 +- expected/feature_subspace.out | 4 ++-- expected/look_a_like.out | 20 ++++++++++---------- expected/unsupported.out | 2 +- postprocessing.c | 2 +- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out index 69c1b132..ca69fab4 100644 --- a/expected/aqo_fdw.out +++ b/expected/aqo_fdw.out @@ -120,7 +120,7 @@ SELECT str FROM expln(' AQO not used Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 (14 rows) -- Should learn on postgres_fdw nodes diff --git a/expected/feature_subspace.out b/expected/feature_subspace.out index a53b57e7..eceb0eb1 100644 --- a/expected/feature_subspace.out +++ b/expected/feature_subspace.out @@ -43,7 +43,7 @@ WHERE str NOT LIKE '%Memory%'; AQO not used Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 (14 rows) -- TODO: Using method of other classes neighbours we get a bad estimation. @@ -66,7 +66,7 @@ WHERE str NOT LIKE '%Memory%'; AQO not used Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 (14 rows) -- Look into the reason: two JOINs from different classes have the same FSS. diff --git a/expected/look_a_like.out b/expected/look_a_like.out index fb76fdd6..9e3dc286 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -56,7 +56,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L Rows Removed by Filter: 900 Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 (16 rows) SELECT str AS result @@ -83,7 +83,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L Rows Removed by Filter: 900 Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 (19 rows) SELECT str AS result @@ -108,7 +108,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L Rows Removed by Filter: 500 Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 (17 rows) --query contains nodes that have already been predicted @@ -134,7 +134,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L Rows Removed by Filter: 500 Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 (17 rows) SELECT str AS result @@ -159,7 +159,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L Rows Removed by Filter: 300 Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 (17 rows) SELECT str AS result @@ -184,7 +184,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L Rows Removed by Filter: 600 Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 (17 rows) SELECT str AS result @@ -209,7 +209,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L Rows Removed by Filter: 500 Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 (17 rows) --query contains nodes that have already been predicted @@ -235,7 +235,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L Rows Removed by Filter: 600 Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 (17 rows) SELECT str AS result @@ -516,7 +516,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L Output: b.y1, b.y2, b.y3 Using aqo: true AQO mode: LEARN - JOINS: 1 + JOINS: 2 (24 rows) SELECT str AS result @@ -548,7 +548,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L Output: c.z1, c.z2, c.z3 Using aqo: true AQO mode: LEARN - JOINS: 1 + JOINS: 2 (24 rows) RESET aqo.wide_search; diff --git a/expected/unsupported.out b/expected/unsupported.out index a1a6f4ae..6e45dcd8 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -487,7 +487,7 @@ SELECT * FROM Filter: (x > 20) Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 (13 rows) -- AQO needs to predict total fetched tuples in a table. diff --git a/postprocessing.c b/postprocessing.c index ba2e19e0..6850cde4 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -916,7 +916,7 @@ StorePlanInternals(QueryDesc *queryDesc) MemoryContext oldctx = MemoryContextSwitchTo(AQOCacheMemCtx); njoins = 0; - planstate_tree_walker(queryDesc->planstate, calculateJoinNum, &njoins); + calculateJoinNum(queryDesc->planstate, &njoins); if (queryDesc->queryEnv == NULL) queryDesc->queryEnv = create_queryEnv(); From baa4043733280b1e416ad562680e13e34d49cdca Mon Sep 17 00:00:00 2001 From: Daniil Anisimov Date: Thu, 20 Apr 2023 13:49:32 +0700 Subject: [PATCH 185/203] Add the routine for safe update. Reviewed by: @Alena0704 --- storage.c | 43 ++++++++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/storage.c b/storage.c index 9e6cf306..6c0dc77a 100644 --- a/storage.c +++ b/storage.c @@ -74,8 +74,12 @@ HTAB *data_htab = NULL; dsa_area *data_dsa = NULL; HTAB *deactivated_queries = NULL; -/* Used to check data file consistency */ -static const uint32 PGAQO_FILE_HEADER = 123467589; +/* + * Used to check data file consistency + * When changing data structures, PGAQO_FILE_HEADER should also be changed. + * In this case, all AQO file storages will be reset. + */ +static const uint32 PGAQO_FILE_HEADER = 0x20230330; static const uint32 PGAQO_PG_MAJOR_VERSION = PG_VERSION_NUM / 100; /* @@ -374,7 +378,7 @@ aqo_query_stat(PG_FUNCTION_ARGS) Datum values[TOTAL_NCOLS + 1]; bool nulls[TOTAL_NCOLS + 1]; HASH_SEQ_STATUS hash_seq; - StatEntry *entry; + StatEntry *entry; /* check to see if caller supports us returning a tuplestore */ if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) @@ -393,7 +397,9 @@ aqo_query_stat(PG_FUNCTION_ARGS) /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); - Assert(tupDesc->natts == TOTAL_NCOLS); + + if (tupDesc->natts != TOTAL_NCOLS) + elog(ERROR, "[AQO] Incorrect number of output arguments"); tupstore = tuplestore_begin_heap(true, false, work_mem); rsinfo->returnMode = SFRM_Materialize; @@ -1160,7 +1166,9 @@ aqo_query_texts(PG_FUNCTION_ARGS) /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); - Assert(tupDesc->natts == QT_TOTAL_NCOLS); + + if (tupDesc->natts != QT_TOTAL_NCOLS) + elog(ERROR, "[AQO] Incorrect number of output arguments"); tupstore = tuplestore_begin_heap(true, false, work_mem); rsinfo->returnMode = SFRM_Materialize; @@ -1754,7 +1762,9 @@ aqo_data(PG_FUNCTION_ARGS) /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); - Assert(tupDesc->natts == AD_TOTAL_NCOLS); + + if (tupDesc->natts != AD_TOTAL_NCOLS) + elog(ERROR, "[AQO] Incorrect number of output arguments"); tupstore = tuplestore_begin_heap(true, false, work_mem); rsinfo->returnMode = SFRM_Materialize; @@ -1914,7 +1924,9 @@ aqo_queries(PG_FUNCTION_ARGS) /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); - Assert(tupDesc->natts == AQ_TOTAL_NCOLS); + + if (tupDesc->natts != AQ_TOTAL_NCOLS) + elog(ERROR, "[AQO] Incorrect number of output arguments"); tupstore = tuplestore_begin_heap(true, false, work_mem); rsinfo->returnMode = SFRM_Materialize; @@ -2377,7 +2389,8 @@ aqo_cleanup(PG_FUNCTION_ARGS) if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); - Assert(tupDesc->natts == 2); + if (tupDesc->natts != 2) + elog(ERROR, "[AQO] Incorrect number of output arguments"); /* * Make forced cleanup: if at least one fss isn't actual, remove parent FS @@ -2488,7 +2501,9 @@ aqo_cardinality_error(PG_FUNCTION_ARGS) /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); - Assert(tupDesc->natts == AQE_TOTAL_NCOLS); + + if (tupDesc->natts != AQE_TOTAL_NCOLS) + elog(ERROR, "[AQO] Incorrect number of output arguments"); tupstore = tuplestore_begin_heap(true, false, work_mem); rsinfo->returnMode = SFRM_Materialize; @@ -2556,8 +2571,8 @@ aqo_execution_time(PG_FUNCTION_ARGS) MemoryContext per_query_ctx; MemoryContext oldcontext; Tuplestorestate *tupstore; - Datum values[AQE_TOTAL_NCOLS]; - bool nulls[AQE_TOTAL_NCOLS]; + Datum values[ET_TOTAL_NCOLS]; + bool nulls[ET_TOTAL_NCOLS]; HASH_SEQ_STATUS hash_seq; QueriesEntry *qentry; StatEntry *sentry; @@ -2580,7 +2595,9 @@ aqo_execution_time(PG_FUNCTION_ARGS) /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); - Assert(tupDesc->natts == ET_TOTAL_NCOLS); + + if (tupDesc->natts != ET_TOTAL_NCOLS) + elog(ERROR, "[AQO] Incorrect number of output arguments"); tupstore = tuplestore_begin_heap(true, false, work_mem); rsinfo->returnMode = SFRM_Materialize; @@ -2713,7 +2730,7 @@ aqo_query_stat_update(PG_FUNCTION_ARGS) PG_ARGISNULL(EST_ERROR)) PG_RETURN_BOOL(false); - queryid = PG_GETARG_INT64(AQ_QUERYID); + queryid = PG_GETARG_INT64(QUERYID); stat_arg.execs_with_aqo = PG_GETARG_INT64(NEXECS_AQO); stat_arg.execs_without_aqo = PG_GETARG_INT64(NEXECS); if (queryid == 0 || stat_arg.execs_with_aqo < 0 || From e658efebb798f6f18cd0d78b5e1963b98cdefd6e Mon Sep 17 00:00:00 2001 From: Daniil Anisimov Date: Thu, 20 Apr 2023 13:56:21 +0700 Subject: [PATCH 186/203] Add small bugfixes and refactoring. Reviewed by: @Alena0704 --- aqo.c | 1 - aqo.h | 1 - hash.c | 28 ++++++++++++++-------------- postprocessing.c | 4 ++-- preprocessing.c | 2 +- storage.c | 38 +++++++++++++++++++------------------- 6 files changed, 36 insertions(+), 38 deletions(-) diff --git a/aqo.c b/aqo.c index 33b56737..b3e176fb 100644 --- a/aqo.c +++ b/aqo.c @@ -61,7 +61,6 @@ static const struct config_enum_entry format_options[] = { }; /* Parameters of autotuning */ -int aqo_stat_size = STAT_SAMPLE_SIZE; int auto_tuning_window_size = 5; double auto_tuning_exploration = 0.1; int auto_tuning_max_iterations = 50; diff --git a/aqo.h b/aqo.h index 6f57a4d1..85c3f3b2 100644 --- a/aqo.h +++ b/aqo.h @@ -211,7 +211,6 @@ extern double predicted_ppi_rows; extern double fss_ppi_hash; /* Parameters of autotuning */ -extern int aqo_stat_size; extern int auto_tuning_window_size; extern double auto_tuning_exploration; extern int auto_tuning_max_iterations; diff --git a/hash.c b/hash.c index a7f7f9c1..9d7470a7 100644 --- a/hash.c +++ b/hash.c @@ -344,7 +344,7 @@ get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash) /* * Computes hash for given string. */ -int +static int get_str_hash(const char *str) { return DatumGetInt32(hash_any((const unsigned char *) str, @@ -381,7 +381,7 @@ get_int_array_hash(int *arr, int len) * Sorts given array in-place to compute hash. * The hash is order-insensitive. */ -int +static int get_unsorted_unsafe_int_array_hash(int *arr, int len) { qsort(arr, len, sizeof(*arr), int_cmp); @@ -396,7 +396,7 @@ get_unsorted_unsafe_int_array_hash(int *arr, int len) * using 'hash_any'. * Frees allocated memory before returning hash. */ -int +static int get_unordered_int_list_hash(List *lst) { int i = 0; @@ -448,7 +448,7 @@ replace_patterns(const char *str, const char *start_pattern, * Computes hash for given feature subspace. * Hash is supposed to be clause-order-insensitive. */ -int +static int get_fss_hash(int clauses_hash, int eclasses_hash, int relidslist_hash) { int hashes[3]; @@ -517,7 +517,7 @@ remove_locations(const char *str) * Returns index of given value in given sorted integer array * or -1 if not found. */ -int +static int get_id_in_sorted_int_array(int val, int n, int *arr) { int *i; @@ -536,7 +536,7 @@ get_id_in_sorted_int_array(int val, int n, int *arr) * Returns class of equivalence for given argument hash or 0 if such hash * does not belong to any equivalence class. */ -int +static int get_arg_eclass(int arg_hash, int nargs, int *args_hash, int *eclass_hash) { int di = get_id_in_sorted_int_array(arg_hash, nargs, args_hash); @@ -551,7 +551,7 @@ get_arg_eclass(int arg_hash, int nargs, int *args_hash, int *eclass_hash) * Builds list of non-constant arguments of equivalence clauses * of given clauselist. */ -void +static void get_clauselist_args(List *clauselist, int *nargs, int **args_hash) { RestrictInfo *rinfo; @@ -597,7 +597,7 @@ get_clauselist_args(List *clauselist, int *nargs, int **args_hash) /* * Returns class of an object in disjoint set. */ -int +static int disjoint_set_get_parent(int *p, int v) { if (p[v] == -1) @@ -609,7 +609,7 @@ disjoint_set_get_parent(int *p, int v) /* * Merges two equivalence classes in disjoint set. */ -void +static void disjoint_set_merge_eclasses(int *p, int v1, int v2) { int p1, @@ -629,7 +629,7 @@ disjoint_set_merge_eclasses(int *p, int v1, int v2) /* * Constructs disjoint set on arguments. */ -int * +static int * perform_eclasses_join(List *clauselist, int nargs, int *args_hash) { RestrictInfo *rinfo; @@ -706,7 +706,7 @@ get_eclasses(List *clauselist, int *nargs, int **args_hash, int **eclass_hash) /* * Checks whether the given char is brace, i. e. '{' or '}'. */ -bool +static bool is_brace(char ch) { return ch == '{' || ch == '}'; @@ -715,7 +715,7 @@ is_brace(char ch) /* * Returns whether arguments list contain constants. */ -bool +static bool has_consts(List *lst) { ListCell *l; @@ -729,7 +729,7 @@ has_consts(List *lst) /* * Returns pointer on the args list in clause or NULL. */ -List ** +static List ** get_clause_args_ptr(Expr *clause) { switch (clause->type) @@ -755,7 +755,7 @@ get_clause_args_ptr(Expr *clause) /* * Returns whether the clause is an equivalence clause. */ -bool +static bool clause_is_eq_clause(Expr *clause) { /* TODO: fix this horrible mess */ diff --git a/postprocessing.c b/postprocessing.c index 6850cde4..66aca901 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -173,7 +173,7 @@ learn_sample(aqo_obj_stat *ctx, RelSortOut *rels, * For given node specified by clauselist, relidslist and join_type restores * the same selectivities of clauses as were used at query optimization stage. */ -List * +static List * restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, bool was_parametrized) { @@ -336,7 +336,7 @@ should_learn(PlanState *ps, AQOPlanNode *node, aqo_obj_stat *ctx, "predicted rows: %.0lf, updated prediction: %.0lf", query_context.query_hash, node->fss, predicted, nrows); - *rfactor = 0.9 * (RELIABILITY_MAX - RELIABILITY_MIN); + *rfactor = RELIABILITY_MIN + 0.9 * (RELIABILITY_MAX - RELIABILITY_MIN); return true; } } diff --git a/preprocessing.c b/preprocessing.c index 01b28a32..ced4ffab 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -69,7 +69,7 @@ /* List of feature spaces, that are processing in this backend. */ List *cur_classes = NIL; -int aqo_join_threshold = 0; +int aqo_join_threshold = 3; static planner_hook_type aqo_planner_next = NULL; diff --git a/storage.c b/storage.c index 6c0dc77a..17f97555 100644 --- a/storage.c +++ b/storage.c @@ -100,7 +100,7 @@ static bool _aqo_stat_remove(uint64 queryid); static bool _aqo_queries_remove(uint64 queryid); static bool _aqo_qtexts_remove(uint64 queryid); static bool _aqo_data_remove(data_key *key); -static bool neirest_neighbor(double **matrix, int old_rows, double *neighbor, int cols); +static bool nearest_neighbor(double **matrix, int old_rows, double *neighbor, int cols); static double fs_distance(double *a, double *b, int len); PG_FUNCTION_INFO_V1(aqo_query_stat); @@ -143,7 +143,7 @@ update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids) /* * Forms ArrayType object for storage from simple C-array matrix. */ -ArrayType * +static ArrayType * form_matrix(double *matrix, int nrows, int ncols) { Datum *elems; @@ -375,8 +375,8 @@ aqo_query_stat(PG_FUNCTION_ARGS) MemoryContext per_query_ctx; MemoryContext oldcontext; Tuplestorestate *tupstore; - Datum values[TOTAL_NCOLS + 1]; - bool nulls[TOTAL_NCOLS + 1]; + Datum values[TOTAL_NCOLS]; + bool nulls[TOTAL_NCOLS]; HASH_SEQ_STATUS hash_seq; StatEntry *entry; @@ -408,13 +408,11 @@ aqo_query_stat(PG_FUNCTION_ARGS) MemoryContextSwitchTo(oldcontext); - memset(nulls, 0, TOTAL_NCOLS + 1); + memset(nulls, 0, TOTAL_NCOLS); LWLockAcquire(&aqo_state->stat_lock, LW_SHARED); hash_seq_init(&hash_seq, stat_htab); while ((entry = hash_seq_search(&hash_seq)) != NULL) { - memset(nulls, 0, TOTAL_NCOLS + 1); - values[QUERYID] = Int64GetDatum(entry->queryid); values[NEXECS] = Int64GetDatum(entry->execs_without_aqo); values[NEXECS_AQO] = Int64GetDatum(entry->execs_with_aqo); @@ -1507,8 +1505,8 @@ fs_distance(double *a, double *b, int len) return res; } -bool -neirest_neighbor(double **matrix, int old_rows, double *neibour, int cols) +static bool +nearest_neighbor(double **matrix, int old_rows, double *neibour, int cols) { int i; for (i=0; irows; i++) { - if (k < aqo_K && !neirest_neighbor(data->matrix, old_rows, data->matrix[i], data->cols)) + if (k < aqo_K && !nearest_neighbor(data->matrix, old_rows, + temp_data->matrix[i], + data->cols)) { memcpy(data->matrix[k], temp_data->matrix[i], data->cols * sizeof(double)); data->rfactors[k] = temp_data->rfactors[i]; @@ -1902,8 +1902,8 @@ aqo_queries(PG_FUNCTION_ARGS) MemoryContext per_query_ctx; MemoryContext oldcontext; Tuplestorestate *tupstore; - Datum values[AQ_TOTAL_NCOLS + 1]; - bool nulls[AQ_TOTAL_NCOLS + 1]; + Datum values[AQ_TOTAL_NCOLS]; + bool nulls[AQ_TOTAL_NCOLS]; HASH_SEQ_STATUS hash_seq; QueriesEntry *entry; @@ -1935,12 +1935,12 @@ aqo_queries(PG_FUNCTION_ARGS) MemoryContextSwitchTo(oldcontext); + memset(nulls, 0, AQ_TOTAL_NCOLS); + LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); hash_seq_init(&hash_seq, queries_htab); while ((entry = hash_seq_search(&hash_seq)) != NULL) { - memset(nulls, 0, AQ_TOTAL_NCOLS + 1); - values[AQ_QUERYID] = Int64GetDatum(entry->queryid); values[AQ_FS] = Int64GetDatum(entry->fs); values[AQ_LEARN_AQO] = BoolGetDatum(entry->learn_aqo); @@ -2142,7 +2142,7 @@ aqo_queries_find(uint64 queryid, QueryContextData *ctx) /* * Function for update and save value of smart statement timeout - * for query in aqu_queries table + * for query in aqo_queries table */ bool update_query_timeout(uint64 queryid, int64 smart_timeout) @@ -2515,6 +2515,8 @@ aqo_cardinality_error(PG_FUNCTION_ARGS) LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); LWLockAcquire(&aqo_state->stat_lock, LW_SHARED); + memset(nulls, 0, AQE_TOTAL_NCOLS * sizeof(nulls[0])); + hash_seq_init(&hash_seq, queries_htab); while ((qentry = hash_seq_search(&hash_seq)) != NULL) { @@ -2523,8 +2525,6 @@ aqo_cardinality_error(PG_FUNCTION_ARGS) int64 nexecs; int nvals; - memset(nulls, 0, AQE_TOTAL_NCOLS * sizeof(nulls[0])); - sentry = (StatEntry *) hash_search(stat_htab, &qentry->queryid, HASH_FIND, &found); if (!found) @@ -2609,6 +2609,8 @@ aqo_execution_time(PG_FUNCTION_ARGS) LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); LWLockAcquire(&aqo_state->stat_lock, LW_SHARED); + memset(nulls, 0, ET_TOTAL_NCOLS * sizeof(nulls[0])); + hash_seq_init(&hash_seq, queries_htab); while ((qentry = hash_seq_search(&hash_seq)) != NULL) { @@ -2618,8 +2620,6 @@ aqo_execution_time(PG_FUNCTION_ARGS) int nvals; double tm = 0; - memset(nulls, 0, ET_TOTAL_NCOLS * sizeof(nulls[0])); - sentry = (StatEntry *) hash_search(stat_htab, &qentry->queryid, HASH_FIND, &found); if (!found) From d09465cd93338330df70baacbc2b08c941f1a71f Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Wed, 26 Apr 2023 18:37:59 +0300 Subject: [PATCH 187/203] Set size one of table to 100 to ensure that it is choosen plan with only right side hash join. --- expected/look_a_like.out | 246 +++++++++++++++++++-------------------- sql/look_a_like.sql | 2 +- 2 files changed, 124 insertions(+), 124 deletions(-) diff --git a/expected/look_a_like.out b/expected/look_a_like.out index 9e3dc286..dc339ffa 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -19,7 +19,7 @@ NOTICE: table "a" does not exist, skipping NOTICE: table "b" does not exist, skipping -- Create tables with correlated datas in columns CREATE TABLE a (x1 int, x2 int, x3 int); -INSERT INTO a (x1, x2, x3) SELECT mod(ival,10), mod(ival,10), mod(ival,10) FROM generate_series(1,1000) As ival; +INSERT INTO a (x1, x2, x3) SELECT mod(ival,10), mod(ival,10), mod(ival,10) FROM generate_series(1,100) As ival; CREATE TABLE b (y1 int, y2 int, y3 int); INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; -- @@ -39,17 +39,17 @@ SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 = 5 AND x2 = 5 AND A.x1 = B.y1;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result --------------------------------------------------------- - Nested Loop (actual rows=10000 loops=1) + result +------------------------------------------------------- + Nested Loop (actual rows=1000 loops=1) AQO not used Output: a.x1, b.y1 - -> Seq Scan on public.a (actual rows=100 loops=1) + -> Seq Scan on public.a (actual rows=10 loops=1) AQO not used Output: a.x1, a.x2, a.x3 Filter: ((a.x1 = 5) AND (a.x2 = 5)) - Rows Removed by Filter: 900 - -> Seq Scan on public.b (actual rows=100 loops=100) + Rows Removed by Filter: 90 + -> Seq Scan on public.b (actual rows=100 loops=10) AQO not used Output: b.y1, b.y2, b.y3 Filter: (b.y1 = 5) @@ -63,24 +63,24 @@ SELECT str AS result FROM expln(' SELECT x1,y1 FROM A LEFT JOIN b ON A.x1 = B.y1 WHERE x1 = 5 AND x2 = 5;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result ------------------------------------------------------------- - Hash Left Join (actual rows=10000 loops=1) + result +----------------------------------------------------------- + Hash Right Join (actual rows=1000 loops=1) AQO not used Output: a.x1, b.y1 - Hash Cond: (a.x1 = b.y1) - -> Seq Scan on public.a (actual rows=100 loops=1) + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=100 loops=1) AQO: rows=100, error=0% - Output: a.x1, a.x2, a.x3 - Filter: ((a.x1 = 5) AND (a.x2 = 5)) + Output: b.y1, b.y2, b.y3 + Filter: (b.y1 = 5) Rows Removed by Filter: 900 - -> Hash (actual rows=100 loops=1) - Output: b.y1 - -> Seq Scan on public.b (actual rows=100 loops=1) - AQO: rows=100, error=0% - Output: b.y1 - Filter: (b.y1 = 5) - Rows Removed by Filter: 900 + -> Hash (actual rows=10 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=10 loops=1) + AQO: rows=10, error=0% + Output: a.x1 + Filter: ((a.x1 = 5) AND (a.x2 = 5)) + Rows Removed by Filter: 90 Using aqo: true AQO mode: LEARN JOINS: 1 @@ -90,22 +90,22 @@ SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND A.x1 = B.y1;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result ------------------------------------------------------------- - Hash Join (actual rows=50000 loops=1) + result +----------------------------------------------------------- + Hash Join (actual rows=5000 loops=1) AQO not used Output: a.x1, b.y1 Hash Cond: (b.y1 = a.x1) -> Seq Scan on public.b (actual rows=1000 loops=1) AQO not used Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=500 loops=1) + -> Hash (actual rows=50 loops=1) Output: a.x1 - -> Seq Scan on public.a (actual rows=500 loops=1) + -> Seq Scan on public.a (actual rows=50 loops=1) AQO not used Output: a.x1 Filter: ((a.x1 < 5) AND (a.x2 < 5)) - Rows Removed by Filter: 500 + Rows Removed by Filter: 50 Using aqo: true AQO mode: LEARN JOINS: 1 @@ -116,22 +116,22 @@ SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 < 10 AND x2 < 5 AND A.x1 = B.y1;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result ------------------------------------------------------------- - Hash Join (actual rows=50000 loops=1) - AQO: rows=50000, error=0% + result +----------------------------------------------------------- + Hash Join (actual rows=5000 loops=1) + AQO: rows=5000, error=0% Output: a.x1, b.y1 Hash Cond: (b.y1 = a.x1) -> Seq Scan on public.b (actual rows=1000 loops=1) AQO: rows=1000, error=0% Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=500 loops=1) + -> Hash (actual rows=50 loops=1) Output: a.x1 - -> Seq Scan on public.a (actual rows=500 loops=1) - AQO: rows=500, error=0% + -> Seq Scan on public.a (actual rows=50 loops=1) + AQO: rows=50, error=0% Output: a.x1 Filter: ((a.x1 < 10) AND (a.x2 < 5)) - Rows Removed by Filter: 500 + Rows Removed by Filter: 50 Using aqo: true AQO mode: LEARN JOINS: 1 @@ -141,22 +141,22 @@ SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 > 2 AND x2 > 2 AND A.x1 = B.y1;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result ------------------------------------------------------------- - Hash Join (actual rows=70000 loops=1) + result +----------------------------------------------------------- + Hash Join (actual rows=7000 loops=1) AQO not used Output: a.x1, b.y1 Hash Cond: (b.y1 = a.x1) -> Seq Scan on public.b (actual rows=1000 loops=1) AQO: rows=1000, error=0% Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=700 loops=1) + -> Hash (actual rows=70 loops=1) Output: a.x1 - -> Seq Scan on public.a (actual rows=700 loops=1) + -> Seq Scan on public.a (actual rows=70 loops=1) AQO not used Output: a.x1 Filter: ((a.x1 > 2) AND (a.x2 > 2)) - Rows Removed by Filter: 300 + Rows Removed by Filter: 30 Using aqo: true AQO mode: LEARN JOINS: 1 @@ -168,20 +168,20 @@ SELECT x1,y1 FROM A,B WHERE x1 > 5 AND x2 > 5 AND x3 < 10 AND A.x1 = B.y1;') AS WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; result ------------------------------------------------------------------- - Hash Join (actual rows=40000 loops=1) + Hash Join (actual rows=4000 loops=1) AQO not used Output: a.x1, b.y1 Hash Cond: (b.y1 = a.x1) -> Seq Scan on public.b (actual rows=1000 loops=1) AQO: rows=1000, error=0% Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=400 loops=1) + -> Hash (actual rows=40 loops=1) Output: a.x1 - -> Seq Scan on public.a (actual rows=400 loops=1) + -> Seq Scan on public.a (actual rows=40 loops=1) AQO not used Output: a.x1 Filter: ((a.x1 > 5) AND (a.x2 > 5) AND (a.x3 < 10)) - Rows Removed by Filter: 600 + Rows Removed by Filter: 60 Using aqo: true AQO mode: LEARN JOINS: 1 @@ -193,20 +193,20 @@ SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND x3 < 10 AND A.x1 = B.y1;') AS WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; result ------------------------------------------------------------------- - Hash Join (actual rows=50000 loops=1) + Hash Join (actual rows=5000 loops=1) AQO not used Output: a.x1, b.y1 Hash Cond: (b.y1 = a.x1) -> Seq Scan on public.b (actual rows=1000 loops=1) AQO: rows=1000, error=0% Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=500 loops=1) + -> Hash (actual rows=50 loops=1) Output: a.x1 - -> Seq Scan on public.a (actual rows=500 loops=1) + -> Seq Scan on public.a (actual rows=50 loops=1) AQO not used Output: a.x1 Filter: ((a.x1 < 5) AND (a.x2 < 5) AND (a.x3 < 10)) - Rows Removed by Filter: 500 + Rows Removed by Filter: 50 Using aqo: true AQO mode: LEARN JOINS: 1 @@ -219,20 +219,20 @@ SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 4 AND x3 < 5 AND A.x1 = B.y1;') AS s WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; result ------------------------------------------------------------------ - Hash Join (actual rows=40000 loops=1) - AQO: rows=50000, error=20% + Hash Join (actual rows=4000 loops=1) + AQO: rows=5000, error=20% Output: a.x1, b.y1 Hash Cond: (b.y1 = a.x1) -> Seq Scan on public.b (actual rows=1000 loops=1) AQO: rows=1000, error=0% Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=400 loops=1) + -> Hash (actual rows=40 loops=1) Output: a.x1 - -> Seq Scan on public.a (actual rows=400 loops=1) - AQO: rows=500, error=20% + -> Seq Scan on public.a (actual rows=40 loops=1) + AQO: rows=50, error=20% Output: a.x1 Filter: ((a.x1 < 5) AND (a.x2 < 4) AND (a.x3 < 5)) - Rows Removed by Filter: 600 + Rows Removed by Filter: 60 Using aqo: true AQO mode: LEARN JOINS: 1 @@ -242,25 +242,25 @@ SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result ---------------------------------------------------------------------- + result +-------------------------------------------------------------------- Group (actual rows=2 loops=1) AQO not used Output: a.x1 Group Key: a.x1 - -> Sort (actual rows=200000 loops=1) + -> Sort (actual rows=20000 loops=1) AQO not used Output: a.x1 Sort Key: a.x1 - -> Nested Loop (actual rows=200000 loops=1) + -> Nested Loop (actual rows=20000 loops=1) AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=200 loops=1) + -> Seq Scan on public.a (actual rows=20 loops=1) AQO not used Output: a.x1, a.x2, a.x3 Filter: ((a.x1 < 4) AND (a.x3 > 1)) - Rows Removed by Filter: 800 - -> Seq Scan on public.b (actual rows=1000 loops=200) + Rows Removed by Filter: 80 + -> Seq Scan on public.b (actual rows=1000 loops=20) AQO: rows=1000, error=0% Output: b.y1, b.y2, b.y3 Using aqo: true @@ -273,25 +273,25 @@ SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result ---------------------------------------------------------------------- + result +-------------------------------------------------------------------- Group (actual rows=2 loops=1) AQO not used Output: a.x1 Group Key: a.x1 - -> Sort (actual rows=200000 loops=1) + -> Sort (actual rows=20000 loops=1) AQO not used Output: a.x1 Sort Key: a.x1 - -> Nested Loop (actual rows=200000 loops=1) - AQO: rows=200000, error=0% + -> Nested Loop (actual rows=20000 loops=1) + AQO: rows=20000, error=0% Output: a.x1 - -> Seq Scan on public.a (actual rows=200 loops=1) - AQO: rows=200, error=0% + -> Seq Scan on public.a (actual rows=20 loops=1) + AQO: rows=20, error=0% Output: a.x1, a.x2, a.x3 Filter: ((a.x1 < 4) AND (a.x3 > 1)) - Rows Removed by Filter: 800 - -> Seq Scan on public.b (actual rows=1000 loops=200) + Rows Removed by Filter: 80 + -> Seq Scan on public.b (actual rows=1000 loops=20) AQO: rows=1000, error=0% Output: b.y1, b.y2, b.y3 Using aqo: true @@ -303,25 +303,25 @@ SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 2 GROUP BY(x1);') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result ---------------------------------------------------------------------- + result +-------------------------------------------------------------------- Group (actual rows=1 loops=1) AQO not used Output: a.x1 Group Key: a.x1 - -> Sort (actual rows=100000 loops=1) + -> Sort (actual rows=10000 loops=1) AQO not used Output: a.x1 Sort Key: a.x1 - -> Nested Loop (actual rows=100000 loops=1) - AQO: rows=200000, error=50% + -> Nested Loop (actual rows=10000 loops=1) + AQO: rows=20000, error=50% Output: a.x1 - -> Seq Scan on public.a (actual rows=100 loops=1) - AQO: rows=200, error=50% + -> Seq Scan on public.a (actual rows=10 loops=1) + AQO: rows=20, error=50% Output: a.x1, a.x2, a.x3 Filter: ((a.x1 < 4) AND (a.x3 > 2)) - Rows Removed by Filter: 900 - -> Seq Scan on public.b (actual rows=1000 loops=100) + Rows Removed by Filter: 90 + -> Seq Scan on public.b (actual rows=1000 loops=10) AQO: rows=1000, error=0% Output: b.y1, b.y2, b.y3 Using aqo: true @@ -339,19 +339,19 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L AQO not used Output: a.x1 Group Key: a.x1 - -> Sort (actual rows=100000 loops=1) + -> Sort (actual rows=10000 loops=1) AQO not used Output: a.x1 Sort Key: a.x1 - -> Nested Loop (actual rows=100000 loops=1) + -> Nested Loop (actual rows=10000 loops=1) AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=100 loops=1) + -> Seq Scan on public.a (actual rows=10 loops=1) AQO not used Output: a.x1, a.x2, a.x3 Filter: ((a.x1 < 3) AND (a.x2 < 5) AND (a.x3 > 1)) - Rows Removed by Filter: 900 - -> Seq Scan on public.b (actual rows=1000 loops=100) + Rows Removed by Filter: 90 + -> Seq Scan on public.b (actual rows=1000 loops=10) AQO: rows=1000, error=0% Output: b.y1, b.y2, b.y3 Using aqo: true @@ -369,19 +369,19 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L AQO not used Output: a.x1 Group Key: a.x1 - -> Sort (actual rows=200000 loops=1) + -> Sort (actual rows=20000 loops=1) AQO not used Output: a.x1 Sort Key: a.x1 - -> Nested Loop (actual rows=200000 loops=1) + -> Nested Loop (actual rows=20000 loops=1) AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=200 loops=1) + -> Seq Scan on public.a (actual rows=20 loops=1) AQO not used Output: a.x1, a.x2, a.x3 Filter: ((a.x1 > 1) AND (a.x2 < 4) AND (a.x3 > 1)) - Rows Removed by Filter: 800 - -> Seq Scan on public.b (actual rows=1000 loops=200) + Rows Removed by Filter: 80 + -> Seq Scan on public.b (actual rows=1000 loops=20) AQO: rows=1000, error=0% Output: b.y1, b.y2, b.y3 Using aqo: true @@ -399,19 +399,19 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L AQO not used Output: a.x1 Group Key: a.x1 - -> Sort (actual rows=200000 loops=1) + -> Sort (actual rows=20000 loops=1) AQO not used Output: a.x1 Sort Key: a.x1 - -> Nested Loop (actual rows=200000 loops=1) + -> Nested Loop (actual rows=20000 loops=1) AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=200 loops=1) + -> Seq Scan on public.a (actual rows=20 loops=1) AQO not used Output: a.x1, a.x2, a.x3 Filter: ((a.x1 > 1) AND (a.x2 < 4) AND (a.x3 < 5)) - Rows Removed by Filter: 800 - -> Seq Scan on public.b (actual rows=1000 loops=200) + Rows Removed by Filter: 80 + -> Seq Scan on public.b (actual rows=1000 loops=20) AQO: rows=1000, error=0% Output: b.y1, b.y2, b.y3 Using aqo: true @@ -429,19 +429,19 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L AQO not used Output: a.x1 Group Key: a.x1 - -> Sort (actual rows=140000 loops=1) + -> Sort (actual rows=14000 loops=1) AQO not used Output: a.x1 Sort Key: a.x1 - -> Nested Loop (actual rows=140000 loops=1) + -> Nested Loop (actual rows=14000 loops=1) AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=200 loops=1) - AQO: rows=100, error=-100% + -> Seq Scan on public.a (actual rows=20 loops=1) + AQO: rows=10, error=-100% Output: a.x1, a.x2, a.x3 Filter: ((a.x1 < 4) AND (a.x2 < 5) AND (a.x3 > 1)) - Rows Removed by Filter: 800 - -> Seq Scan on public.b (actual rows=700 loops=200) + Rows Removed by Filter: 80 + -> Seq Scan on public.b (actual rows=700 loops=20) AQO not used Output: b.y1, b.y2, b.y3 Filter: (b.y1 > 2) @@ -462,19 +462,19 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L AQO not used Output: a.x1 Group Key: a.x1 - -> Sort (actual rows=70000 loops=1) + -> Sort (actual rows=7000 loops=1) AQO not used Output: a.x1 Sort Key: a.x1 - -> Nested Loop (actual rows=70000 loops=1) - AQO: rows=140000, error=50% + -> Nested Loop (actual rows=7000 loops=1) + AQO: rows=14000, error=50% Output: a.x1 - -> Seq Scan on public.a (actual rows=100 loops=1) - AQO: rows=200, error=50% + -> Seq Scan on public.a (actual rows=10 loops=1) + AQO: rows=20, error=50% Output: a.x1, a.x2, a.x3 Filter: ((a.x1 < 3) AND (a.x2 < 4) AND (a.x3 > 1)) - Rows Removed by Filter: 900 - -> Seq Scan on public.b (actual rows=700 loops=100) + Rows Removed by Filter: 90 + -> Seq Scan on public.b (actual rows=700 loops=10) AQO: rows=700, error=0% Output: b.y1, b.y2, b.y3 Filter: (b.y1 > 2) @@ -501,7 +501,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L AQO not used Output: a.x1, a.x2, a.x3 Hash Cond: (a.x1 = c.z1) - -> Seq Scan on public.a (actual rows=1000 loops=1) + -> Seq Scan on public.a (actual rows=100 loops=1) AQO not used Output: a.x1, a.x2, a.x3 -> Hash (actual rows=1000 loops=1) @@ -523,29 +523,29 @@ SELECT str AS result FROM expln(' SELECT * FROM (A LEFT JOIN B ON A.x1 = B.y1) sc left join C on sc.x1=C.z1;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result -------------------------------------------------------------------------- - Hash Right Join (actual rows=10000000 loops=1) - AQO: rows=1, error=-999999900% + result +------------------------------------------------------------------------ + Hash Right Join (actual rows=1000000 loops=1) + AQO: rows=1, error=-99999900% Output: a.x1, a.x2, a.x3, b.y1, b.y2, b.y3, c.z1, c.z2, c.z3 Hash Cond: (b.y1 = a.x1) -> Seq Scan on public.b (actual rows=1000 loops=1) AQO: rows=1000, error=0% Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=100000 loops=1) + -> Hash (actual rows=10000 loops=1) Output: a.x1, a.x2, a.x3, c.z1, c.z2, c.z3 - -> Hash Left Join (actual rows=100000 loops=1) - AQO: rows=1, error=-9999900% + -> Hash Right Join (actual rows=10000 loops=1) + AQO: rows=1, error=-999900% Output: a.x1, a.x2, a.x3, c.z1, c.z2, c.z3 - Hash Cond: (a.x1 = c.z1) - -> Seq Scan on public.a (actual rows=1000 loops=1) + Hash Cond: (c.z1 = a.x1) + -> Seq Scan on public.c (actual rows=1000 loops=1) AQO: rows=1000, error=0% - Output: a.x1, a.x2, a.x3 - -> Hash (actual rows=1000 loops=1) Output: c.z1, c.z2, c.z3 - -> Seq Scan on public.c (actual rows=1000 loops=1) - AQO: rows=1000, error=0% - Output: c.z1, c.z2, c.z3 + -> Hash (actual rows=100 loops=1) + Output: a.x1, a.x2, a.x3 + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=100, error=0% + Output: a.x1, a.x2, a.x3 Using aqo: true AQO mode: LEARN JOINS: 2 diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index c9e59249..5edef7bb 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -16,7 +16,7 @@ DROP TABLE IF EXISTS a,b CASCADE; -- Create tables with correlated datas in columns CREATE TABLE a (x1 int, x2 int, x3 int); -INSERT INTO a (x1, x2, x3) SELECT mod(ival,10), mod(ival,10), mod(ival,10) FROM generate_series(1,1000) As ival; +INSERT INTO a (x1, x2, x3) SELECT mod(ival,10), mod(ival,10), mod(ival,10) FROM generate_series(1,100) As ival; CREATE TABLE b (y1 int, y2 int, y3 int); INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; From c80987852866bb0ae64188f9a18bcd2889ca7e13 Mon Sep 17 00:00:00 2001 From: Alexandra Date: Fri, 28 Apr 2023 15:11:42 +0300 Subject: [PATCH 188/203] Fix dsa_allocate for aqo_qtext_store to avoid segfault when out of memory (#166) --- storage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage.c b/storage.c index 17f97555..bf004199 100644 --- a/storage.c +++ b/storage.c @@ -1111,7 +1111,7 @@ aqo_qtext_store(uint64 queryid, const char *query_string) entry->queryid = queryid; size = size > querytext_max_size ? querytext_max_size : size; - entry->qtext_dp = dsa_allocate(qtext_dsa, size); + entry->qtext_dp = dsa_allocate0(qtext_dsa, size); if (!_check_dsa_validity(entry->qtext_dp)) { From 4ae351af9770df96bea1582ddd1e2c94f8dfb4f2 Mon Sep 17 00:00:00 2001 From: Daniil Anisimov Date: Thu, 25 May 2023 16:17:11 +0700 Subject: [PATCH 189/203] Try reducing the memory overhead. Free some allocated memory right after use. Reset AQOPredictMemCtx as soon as posible. Remove learning attempts on SubPlan nodes. Bugfix. Free allocated memory on save/load data. Add memory context for storage. Change copyright to 2016-2023. --- aqo.c | 11 +++++++- aqo.h | 3 +- auto_tuning.c | 2 +- cardinality_estimation.c | 2 +- cardinality_hooks.c | 16 +++++++++-- expected/unsupported.out | 61 ++++++++++++++++++++++++++++++++++++++-- hash.c | 30 ++++++++++++++++---- machine_learning.c | 2 +- path_utils.c | 37 +++++++++++------------- postprocessing.c | 10 +++++-- preprocessing.c | 2 +- sql/unsupported.sql | 10 +++++++ storage.c | 46 ++++++++++++++++++++++-------- 13 files changed, 183 insertions(+), 49 deletions(-) diff --git a/aqo.c b/aqo.c index b3e176fb..12051b6e 100644 --- a/aqo.c +++ b/aqo.c @@ -2,7 +2,7 @@ * aqo.c * Adaptive query optimization extension * - * Copyright (c) 2016-2022, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/aqo.c @@ -92,6 +92,9 @@ MemoryContext AQOPredictMemCtx = NULL; /* Is released at the end of learning */ MemoryContext AQOLearnMemCtx = NULL; +/* Is released at the end of load/store routines */ +MemoryContext AQOStorageMemCtx = NULL; + /* Additional plan info */ int njoins; @@ -343,6 +346,12 @@ _PG_init(void) AQOLearnMemCtx = AllocSetContextCreate(AQOTopMemCtx, "AQOLearnMemoryContext", ALLOCSET_DEFAULT_SIZES); + /* + * AQOStorageMemoryContext containe data for load/store routines. + */ + AQOStorageMemCtx = AllocSetContextCreate(AQOTopMemCtx, + "AQOStorageMemoryContext", + ALLOCSET_DEFAULT_SIZES); RegisterResourceReleaseCallback(aqo_free_callback, NULL); RegisterAQOPlanNodeMethods(); diff --git a/aqo.h b/aqo.h index 85c3f3b2..f3275003 100644 --- a/aqo.h +++ b/aqo.h @@ -105,7 +105,7 @@ * Module storage.c is responsible for storage query settings and models * (i. e. all information which is used in extension). * - * Copyright (c) 2016-2022, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/aqo.h @@ -232,6 +232,7 @@ extern MemoryContext AQOTopMemCtx; extern MemoryContext AQOCacheMemCtx; extern MemoryContext AQOPredictMemCtx; extern MemoryContext AQOLearnMemCtx; +extern MemoryContext AQOStorageMemCtx; extern int aqo_statement_timeout; diff --git a/auto_tuning.c b/auto_tuning.c index b035a093..36dfe2ef 100644 --- a/auto_tuning.c +++ b/auto_tuning.c @@ -8,7 +8,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2022, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/auto_tuning.c diff --git a/cardinality_estimation.c b/cardinality_estimation.c index f93e0905..8ab98f3c 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -8,7 +8,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2022, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/cardinality_estimation.c diff --git a/cardinality_hooks.c b/cardinality_hooks.c index 1520b4e7..da25e02c 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -18,7 +18,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2022, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/cardinality_hooks.c @@ -81,6 +81,7 @@ aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) if (!query_context.use_aqo) { MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); goto default_estimator; } @@ -99,6 +100,7 @@ aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) /* Return to the caller's memory context. */ MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); if (predicted < 0) goto default_estimator; @@ -190,12 +192,15 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, cache_selectivity(current_hash, rel->relid, rte->relid, *((double *) lfirst(l2))); } + + pfree(args_hash); + pfree(eclass_hash); } if (!query_context.use_aqo) { MemoryContextSwitchTo(oldctx); - + MemoryContextReset(AQOPredictMemCtx); goto default_estimator; } @@ -210,6 +215,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, /* Return to the caller's memory context */ MemoryContextSwitchTo(oldctx); + MemoryContextReset(AQOPredictMemCtx); predicted_ppi_rows = predicted; fss_ppi_hash = fss; @@ -264,6 +270,7 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, if (!query_context.use_aqo) { MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); goto default_estimator; } @@ -283,6 +290,7 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, /* Return to the caller's memory context */ MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); rel->fss_hash = fss; @@ -342,6 +350,7 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, if (!query_context.use_aqo) { MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); goto default_estimator; } @@ -358,6 +367,7 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, &fss); /* Return to the caller's memory context */ MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); predicted_ppi_rows = predicted; fss_ppi_hash = fss; @@ -445,6 +455,7 @@ aqo_estimate_num_groups(PlannerInfo *root, List *groupExprs, grouped_rel->rows = predicted; grouped_rel->fss_hash = fss; MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); return predicted; } else @@ -455,6 +466,7 @@ aqo_estimate_num_groups(PlannerInfo *root, List *groupExprs, grouped_rel->predicted_cardinality = -1; MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); default_estimator: if (aqo_estimate_num_groups_next) diff --git a/expected/unsupported.out b/expected/unsupported.out index 6e45dcd8..9db07618 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -311,6 +311,59 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) JOINS: 0 (23 rows) +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t WHERE + x = (SELECT x FROM t t0 WHERE t0.x = t.x LIMIT 1) AND + x IN (SELECT x FROM t t0 WHERE t0.x = t.x); + QUERY PLAN +----------------------------------------------------------- + Seq Scan on t (actual rows=1000 loops=1) + AQO not used + Filter: ((x = (SubPlan 1)) AND (SubPlan 2)) + SubPlan 1 + -> Limit (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=1 loops=1000) + AQO not used + Filter: (x = t.x) + Rows Removed by Filter: 475 + SubPlan 2 + -> Seq Scan on t t0_1 (actual rows=1 loops=1000) + AQO not used + Filter: (x = t.x) + Rows Removed by Filter: 475 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(18 rows) + +-- No prediction for top SeqScan, because it fss is changed +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t WHERE + x = (SELECT x FROM t t0 WHERE t0.x = t.x LIMIT 1) AND + x IN (SELECT x FROM t t0 WHERE t0.x = t.x); + QUERY PLAN +----------------------------------------------------------- + Seq Scan on t (actual rows=1000 loops=1) + AQO not used + Filter: ((SubPlan 2) AND (x = (SubPlan 1))) + SubPlan 2 + -> Seq Scan on t t0_1 (actual rows=1 loops=1000) + AQO: rows=1, error=0% + Filter: (x = t.x) + Rows Removed by Filter: 475 + SubPlan 1 + -> Limit (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=1 loops=1000) + AQO: rows=1, error=0% + Filter: (x = t.x) + Rows Removed by Filter: 475 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(18 rows) + -- It's OK to use the knowledge for a query with different constants. EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT count(*) FROM t WHERE @@ -580,6 +633,10 @@ ORDER BY (md5(query_text),error) DESC; -------+------------------------------------------------------------------------------------------------ 0.768 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; 0.070 | SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; + 1.554 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT * FROM t WHERE + + | x = (SELECT x FROM t t0 WHERE t0.x = t.x LIMIT 1) AND + + | x IN (SELECT x FROM t t0 WHERE t0.x = t.x); 0.000 | SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; 0.000 | SELECT * FROM + | (SELECT * FROM t WHERE x < 0) AS t0 + @@ -612,13 +669,13 @@ ORDER BY (md5(query_text),error) DESC; | JOIN + | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2 + | ON q1.x = q2.x+1; -(13 rows) +(14 rows) DROP TABLE t,t1 CASCADE; -- delete all tables used in the test SELECT count(*) FROM aqo_data; -- Just to detect some changes in the logic. May some false positives really bother us here? count ------- - 44 + 48 (1 row) SELECT true AS success FROM aqo_cleanup(); diff --git a/hash.c b/hash.c index 9d7470a7..55ce8b6a 100644 --- a/hash.c +++ b/hash.c @@ -12,7 +12,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2022, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/hash.c @@ -175,6 +175,8 @@ get_grouped_exprs_hash(int child_fss, List *group_exprs) final_hashes[0] = child_fss; final_hashes[1] = get_int_array_hash(hashes, i); + pfree(hashes); + return get_int_array_hash(final_hashes, 2); } @@ -242,6 +244,7 @@ get_fss_for_object(List *relsigns, List *clauselist, clause_has_consts[i] = (args != NULL && has_consts(*args)); i++; } + pfree(args_hash); idx = argsort(clause_hashes, n, sizeof(*clause_hashes), int_cmp); inverse_idx = inverse_permutation(idx, n); @@ -252,6 +255,7 @@ get_fss_for_object(List *relsigns, List *clauselist, sorted_clauses[inverse_idx[i]] = clause_hashes[i]; i++; } + pfree(clause_hashes); i = 0; foreach(lc, selectivities) @@ -267,6 +271,7 @@ get_fss_for_object(List *relsigns, List *clauselist, } i++; } + pfree(inverse_idx); for (i = 0; i < n;) { @@ -290,6 +295,8 @@ get_fss_for_object(List *relsigns, List *clauselist, sizeof(**features), double_cmp); i = j; } + pfree(idx); + pfree(clause_has_consts); /* * Generate feature subspace hash. @@ -299,6 +306,8 @@ get_fss_for_object(List *relsigns, List *clauselist, eclasses_hash = get_int_array_hash(eclass_hash, nargs); relations_hash = get_relations_hash(relsigns); fss_hash = get_fss_hash(clauses_hash, eclasses_hash, relations_hash); + pfree(sorted_clauses); + pfree(eclass_hash); if (nfeatures != NULL) { @@ -358,11 +367,17 @@ static int get_node_hash(Node *node) { char *str; + char *no_consts; + char *no_locations; int hash; - str = remove_locations(remove_consts(nodeToString(node))); - hash = get_str_hash(str); + str = nodeToString(node); + no_consts = remove_consts(str); pfree(str); + no_locations = remove_locations(no_consts); + pfree(no_consts); + hash = get_str_hash(no_locations); + pfree(no_locations); return hash; } @@ -485,6 +500,7 @@ get_relations_hash(List *relsigns) result = DatumGetInt32(hash_any((const unsigned char *) hashes, nhashes * sizeof(uint32))); + pfree(hashes); return result; } @@ -497,9 +513,11 @@ static char * remove_consts(const char *str) { char *res; + char *tmp; - res = replace_patterns(str, "{CONST", is_brace); - res = replace_patterns(res, ":stmt_len", is_brace); + tmp = replace_patterns(str, "{CONST", is_brace); + res = replace_patterns(tmp, ":stmt_len", is_brace); + pfree(tmp); return res; } @@ -701,6 +719,8 @@ get_eclasses(List *clauselist, int *nargs, int **args_hash, int **eclass_hash) for (i = 0; i < *nargs; ++i) (*eclass_hash)[i] = e_hashes[disjoint_set_get_parent(p, i)]; + + pfree(e_hashes); } /* diff --git a/machine_learning.c b/machine_learning.c index d4f5cbee..bfdf0aaa 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -12,7 +12,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2022, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/machine_learning.c diff --git a/path_utils.c b/path_utils.c index 93b42d98..67f5919b 100644 --- a/path_utils.c +++ b/path_utils.c @@ -5,7 +5,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2022, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/path_utils.c @@ -56,7 +56,7 @@ static AQOPlanNode DefaultAQOPlanNode = */ static create_plan_hook_type aqo_create_plan_next = NULL; -static create_upper_paths_hook_type aqo_create_upper_paths_next = NULL; +/*static create_upper_paths_hook_type aqo_create_upper_paths_next = NULL;*/ static AQOPlanNode * @@ -265,7 +265,7 @@ get_list_of_relids(PlannerInfo *root, Relids relids, RelSortOut *rels) /* * Search for any subplans or initplans. - * if subplan is found, replace it by the feature space value of this subplan. + * if subplan is found, replace it by zero Const. */ static Node * subplan_hunter(Node *node, void *context) @@ -276,21 +276,13 @@ subplan_hunter(Node *node, void *context) if (IsA(node, SubPlan)) { - SubPlan *splan = (SubPlan *) node; - PlannerInfo *root = (PlannerInfo *) context; - PlannerInfo *subroot; - RelOptInfo *upper_rel; - A_Const *fss; + A_Const *fss = makeNode(A_Const); - subroot = (PlannerInfo *) list_nth(root->glob->subroots, - splan->plan_id - 1); - upper_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL); + fss->val.type = T_Integer; + fss->location = -1; + fss->val.val.ival = 0; + return (Node *) fss; - Assert(list_length(upper_rel->ext_nodes) == 1); - Assert(IsA((Node *) linitial(upper_rel->ext_nodes), A_Const)); - - fss = (A_Const *) linitial(upper_rel->ext_nodes); - return (Node *) copyObject(fss); } return expression_tree_mutator(node, subplan_hunter, context); } @@ -783,11 +775,14 @@ RegisterAQOPlanNodeMethods(void) } /* + * Warning! This function does not word properly. + * Because value of Const nodes removed by hash routine. + * * Hook for create_upper_paths_hook * * Assume, that we are last in the chain of path creators. */ -static void +/*static void aqo_store_upper_signature(PlannerInfo *root, UpperRelationKind stage, RelOptInfo *input_rel, @@ -803,7 +798,7 @@ aqo_store_upper_signature(PlannerInfo *root, (*aqo_create_upper_paths_next)(root, stage, input_rel, output_rel, extra); if (!query_context.use_aqo && !query_context.learn_aqo && !force_collect_stat) - /* Includes 'disabled query' state. */ + / * Includes 'disabled query' state. * / return; if (stage != UPPERREL_FINAL) @@ -818,7 +813,7 @@ aqo_store_upper_signature(PlannerInfo *root, fss_node->val.val.ival = get_fss_for_object(rels.signatures, clauses, NIL, NULL, NULL); output_rel->ext_nodes = lappend(output_rel->ext_nodes, (void *) fss_node); -} +}*/ void aqo_path_utils_init(void) @@ -826,6 +821,6 @@ aqo_path_utils_init(void) aqo_create_plan_next = create_plan_hook; create_plan_hook = aqo_create_plan; - aqo_create_upper_paths_next = create_upper_paths_hook; - create_upper_paths_hook = aqo_store_upper_signature; + /*aqo_create_upper_paths_next = create_upper_paths_hook; + create_upper_paths_hook = aqo_store_upper_signature;*/ } diff --git a/postprocessing.c b/postprocessing.c index 66aca901..a6b6d030 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -9,7 +9,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2022, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/postprocessing.c @@ -224,6 +224,12 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, lst = lappend(lst, cur_sel); } + if (parametrized_sel) + { + pfree(args_hash); + pfree(eclass_hash); + } + return lst; } @@ -833,11 +839,11 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) } } - selectivity_cache_clear(); cur_classes = ldelete_uint64(cur_classes, query_context.query_hash); end: /* Release all AQO-specific memory, allocated during learning procedure */ + selectivity_cache_clear(); MemoryContextSwitchTo(oldctx); MemoryContextReset(AQOLearnMemCtx); diff --git a/preprocessing.c b/preprocessing.c index ced4ffab..81d1dd81 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -49,7 +49,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2022, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/preprocessing.c diff --git a/sql/unsupported.sql b/sql/unsupported.sql index 8b36d721..e5853306 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -98,6 +98,16 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t WHERE + x = (SELECT x FROM t t0 WHERE t0.x = t.x LIMIT 1) AND + x IN (SELECT x FROM t t0 WHERE t0.x = t.x); +-- No prediction for top SeqScan, because it fss is changed +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t WHERE + x = (SELECT x FROM t t0 WHERE t0.x = t.x LIMIT 1) AND + x IN (SELECT x FROM t t0 WHERE t0.x = t.x); + -- It's OK to use the knowledge for a query with different constants. EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT count(*) FROM t WHERE diff --git a/storage.c b/storage.c index bf004199..f71f5207 100644 --- a/storage.c +++ b/storage.c @@ -8,7 +8,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2022, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/storage.c @@ -666,11 +666,12 @@ static int data_store(const char *filename, form_record_t callback, long nrecs, void *ctx) { - FILE *file; - size_t size; - uint32 counter = 0; - void *data; - char *tmpfile; + FILE *file; + size_t size; + uint32 counter = 0; + void *data; + char *tmpfile; + MemoryContext old_context = MemoryContextSwitchTo(AQOStorageMemCtx); tmpfile = psprintf("%s.tmp", filename); file = AllocateFile(tmpfile, PG_BINARY_W); @@ -687,7 +688,11 @@ data_store(const char *filename, form_record_t callback, /* TODO: Add CRC code ? */ if (fwrite(&size, sizeof(size), 1, file) != 1 || fwrite(data, size, 1, file) != 1) + { + pfree(data); goto error; + } + pfree(data); counter++; } @@ -701,6 +706,9 @@ data_store(const char *filename, form_record_t callback, /* Parallel (re)writing into a file haven't happen. */ (void) durable_rename(tmpfile, filename, PANIC); elog(LOG, "[AQO] %d records stored in file %s.", counter, filename); + + MemoryContextSwitchTo(old_context); + MemoryContextReset(AQOStorageMemCtx); return 0; error: @@ -712,6 +720,9 @@ data_store(const char *filename, form_record_t callback, FreeFile(file); unlink(tmpfile); pfree(tmpfile); + + MemoryContextSwitchTo(old_context); + MemoryContextReset(AQOStorageMemCtx); return -1; } @@ -936,17 +947,20 @@ aqo_queries_load(void) static void data_load(const char *filename, deform_record_t callback, void *ctx) { - FILE *file; - long i; - uint32 header; - int32 pgver; - long num; + FILE *file; + long i; + uint32 header; + int32 pgver; + long num; + MemoryContext old_context = MemoryContextSwitchTo(AQOStorageMemCtx); file = AllocateFile(filename, PG_BINARY_R); if (file == NULL) { if (errno != ENOENT) goto read_error; + + MemoryContextSwitchTo(old_context); return; } @@ -968,8 +982,12 @@ data_load(const char *filename, deform_record_t callback, void *ctx) goto read_error; data = palloc(size); if (fread(data, size, 1, file) != 1) + { + pfree(data); goto read_error; + } res = callback(data, size); + pfree(data); if (!res) { @@ -983,6 +1001,9 @@ data_load(const char *filename, deform_record_t callback, void *ctx) FreeFile(file); elog(LOG, "[AQO] %ld records loaded from file %s.", num, filename); + + MemoryContextSwitchTo(old_context); + MemoryContextReset(AQOStorageMemCtx); return; read_error: @@ -998,6 +1019,9 @@ data_load(const char *filename, deform_record_t callback, void *ctx) if (file) FreeFile(file); unlink(filename); + + MemoryContextSwitchTo(old_context); + MemoryContextReset(AQOStorageMemCtx); } static void From 470e7093454dcb16a2997ffb8a1c00c0286436c5 Mon Sep 17 00:00:00 2001 From: Andrey Kazarinov Date: Wed, 21 Jun 2023 21:51:33 +0300 Subject: [PATCH 190/203] cancel aqo timeout action in the critical section --- aqo.h | 1 + postprocessing.c | 13 ++++++--- preprocessing.c | 22 ++++++++++++++- t/003_assertion_error.pl | 59 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 90 insertions(+), 5 deletions(-) create mode 100644 t/003_assertion_error.pl diff --git a/aqo.h b/aqo.h index f3275003..04d9b8b3 100644 --- a/aqo.h +++ b/aqo.h @@ -172,6 +172,7 @@ extern bool aqo_show_details; extern int aqo_join_threshold; extern bool use_wide_search; extern bool aqo_learn_statement_timeout; +extern bool aqo_learn_statement_timeout_enable; /* Parameters for current query */ typedef struct QueryContextData diff --git a/postprocessing.c b/postprocessing.c index a6b6d030..7df0a253 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -22,6 +22,7 @@ #include "optimizer/optimizer.h" #include "postgres_fdw.h" #include "utils/queryenvironment.h" +#include "miscadmin.h" #include "aqo.h" #include "hash.h" @@ -628,8 +629,12 @@ aqo_timeout_handler(void) MemoryContext oldctx = MemoryContextSwitchTo(AQOLearnMemCtx); aqo_obj_stat ctx = {NIL, NIL, NIL, false, false}; - if (!timeoutCtl.queryDesc || !ExtractFromQueryEnv(timeoutCtl.queryDesc)) + if (CritSectionCount > 0 || !timeoutCtl.queryDesc || + !ExtractFromQueryEnv(timeoutCtl.queryDesc)) + { + MemoryContextSwitchTo(oldctx); return; + } /* Now we can analyze execution state of the query. */ @@ -664,7 +669,7 @@ set_timeout_if_need(QueryDesc *queryDesc) { int64 fintime = (int64) get_timeout_finish_time(STATEMENT_TIMEOUT)-1; - if (aqo_learn_statement_timeout && aqo_statement_timeout > 0) + if (aqo_learn_statement_timeout_enable && aqo_statement_timeout > 0) { max_timeout_value = Min(query_context.smart_timeout, (int64) aqo_statement_timeout); if (max_timeout_value > fintime) @@ -684,7 +689,7 @@ set_timeout_if_need(QueryDesc *queryDesc) */ return false; - if (!get_timeout_active(STATEMENT_TIMEOUT) || !aqo_learn_statement_timeout) + if (!get_timeout_active(STATEMENT_TIMEOUT) || !aqo_learn_statement_timeout_enable) return false; if (!ExtractFromQueryEnv(queryDesc)) @@ -829,7 +834,7 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) error = stat->est_error_aqo[stat->cur_stat_slot_aqo-1] - cardinality_sum_errors/(1 + cardinality_num_objects); - if ( aqo_learn_statement_timeout && aqo_statement_timeout > 0 && error >= 0.1) + if ( aqo_learn_statement_timeout_enable && aqo_statement_timeout > 0 && error >= 0.1) { int64 fintime = increase_smart_timeout(); elog(NOTICE, "[AQO] Time limit for execution of the statement was increased. Current timeout is "UINT64_FORMAT, fintime); diff --git a/preprocessing.c b/preprocessing.c index 81d1dd81..a97e131b 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -71,7 +71,10 @@ List *cur_classes = NIL; int aqo_join_threshold = 3; +bool aqo_learn_statement_timeout_enable = false; + static planner_hook_type aqo_planner_next = NULL; +static post_parse_analyze_hook_type aqo_post_parse_analyze_hook = NULL; static void disable_aqo_for_query(void); static bool isQueryUsingSystemRelation(Query *query); @@ -481,9 +484,26 @@ isQueryUsingSystemRelation_walker(Node *node, void *context) context); } +static void +aqo_post_parse_analyze(ParseState *pstate, Query *query) +{ + aqo_learn_statement_timeout_enable = false; + /* + * Enable learn_statement_timeout for + * the top level SELECT statement only. + */ + if (query->commandType == CMD_SELECT) + aqo_learn_statement_timeout_enable = aqo_learn_statement_timeout; + + if (aqo_post_parse_analyze_hook) + aqo_post_parse_analyze_hook(pstate, query); +} + void aqo_preprocessing_init(void) { aqo_planner_next = planner_hook ? planner_hook : standard_planner; planner_hook = aqo_planner; -} \ No newline at end of file + aqo_post_parse_analyze_hook = post_parse_analyze_hook; + post_parse_analyze_hook = aqo_post_parse_analyze; +} diff --git a/t/003_assertion_error.pl b/t/003_assertion_error.pl new file mode 100644 index 00000000..e85206ff --- /dev/null +++ b/t/003_assertion_error.pl @@ -0,0 +1,59 @@ +use strict; +use warnings; + +use Config; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; + +use Test::More tests => 1; + +my $node = PostgreSQL::Test::Cluster->new('aqotest'); +$node->init; +$node->append_conf('postgresql.conf', qq{ + shared_preload_libraries = 'aqo' + aqo.join_threshold = 0 + aqo.mode = 'learn' + aqo.show_details = 'off' + aqo.learn_statement_timeout = 'on' + }); + +# Test constants. Default values. +my $TRANSACTIONS = 100; + +# Disable connection default settings, forced by PGOPTIONS in AQO Makefile +# $ENV{PGOPTIONS}=""; + +# Change pgbench parameters according to the environment variable. +if (defined $ENV{TRANSACTIONS}) +{ + $TRANSACTIONS = $ENV{TRANSACTIONS}; +} + +my $query_string = ' +CREATE TABLE IF NOT EXISTS aqo_test1(a int, b int); +WITH RECURSIVE t(a, b) +AS ( + VALUES (1, 2) + UNION ALL + SELECT t.a + 1, t.b + 1 FROM t WHERE t.a < 10 +) INSERT INTO aqo_test1 (SELECT * FROM t); + +SET statement_timeout = 10; + +CREATE TABLE tmp1 AS SELECT t1.a AS a, t2.a AS b, t3.a AS c +FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 +WHERE t1.a = t2.b AND t2.a = t3.b; +DROP TABLE tmp1; +'; + +$node->start(); + +$node->safe_psql('postgres', 'CREATE EXTENSION IF NOT EXISTS aqo;'); + +for (1..$TRANSACTIONS) { + $node->psql('postgres', $query_string); +} + +ok(1, "There are no segfaults"); + +$node->stop(); From 240ba054e9ab7918edb41d6cac4626100f352203 Mon Sep 17 00:00:00 2001 From: Alexandra Date: Tue, 29 Aug 2023 16:17:43 +0300 Subject: [PATCH 191/203] Fix aqo.dsm_max_size segfault (#178) Fix aqo.dsm_max_size segfault Add test for dsm_max_size --------- Co-authored-by: Alexandra Pervushina --- aqo.c | 6 ++-- aqo_shared.c | 2 ++ preprocessing.c | 15 +++++++-- storage.c | 65 +++++++++++++++++++++++++++++++----- storage.h | 3 +- t/004_dsm_size_max.pl | 76 +++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 152 insertions(+), 15 deletions(-) create mode 100644 t/004_dsm_size_max.pl diff --git a/aqo.c b/aqo.c index 12051b6e..efc911db 100644 --- a/aqo.c +++ b/aqo.c @@ -270,8 +270,8 @@ _PG_init(void) &dsm_size_max, 100, 0, INT_MAX, - PGC_SUSET, - 0, + PGC_POSTMASTER, + GUC_UNIT_MB, NULL, NULL, NULL @@ -383,5 +383,5 @@ PG_FUNCTION_INFO_V1(invalidate_deactivated_queries_cache); Datum invalidate_deactivated_queries_cache(PG_FUNCTION_ARGS) { - PG_RETURN_POINTER(NULL); + PG_RETURN_POINTER(NULL); } diff --git a/aqo_shared.c b/aqo_shared.c index b7cfced8..9b478552 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -97,6 +97,8 @@ aqo_init_shmem(void) /* Doesn't use DSA, so can be loaded in postmaster */ aqo_stat_load(); aqo_queries_load(); + + check_dsa_file_size(); } } diff --git a/preprocessing.c b/preprocessing.c index a97e131b..95371631 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -283,14 +283,23 @@ aqo_planner(Query *parse, const char *query_string, int cursorOptions, query_context.learn_aqo, query_context.use_aqo, query_context.auto_tuning, &aqo_queries_nulls)) { + bool dsa_valid = true; /* * Add query text into the ML-knowledge base. Just for further * analysis. In the case of cached plans we may have NULL query text. */ - if (!aqo_qtext_store(query_context.query_hash, query_string)) + if (!aqo_qtext_store(query_context.query_hash, query_string, &dsa_valid)) { - Assert(0); /* panic only on debug installation */ - elog(ERROR, "[AQO] Impossible situation was detected. Maybe not enough of shared memory?"); + if (!dsa_valid) + { + disable_aqo_for_query(); + elog(WARNING, "[AQO] Not enough DSA. AQO was disabled for this query"); + } + else + { + Assert(0); /* panic only on debug installation */ + elog(ERROR, "[AQO] Impossible situation was detected. Maybe not enough of shared memory?"); + } } } else diff --git a/storage.c b/storage.c index f71f5207..a11f16f4 100644 --- a/storage.c +++ b/storage.c @@ -507,7 +507,7 @@ _form_qtext_record_cb(void *ctx, size_t *size) { HASH_SEQ_STATUS *hash_seq = (HASH_SEQ_STATUS *) ctx; QueryTextEntry *entry; - void *data; + void *data; char *query_string; char *ptr; @@ -784,7 +784,7 @@ _deform_qtexts_record_cb(void *data, size_t size) HASH_ENTER, &found); Assert(!found); - entry->qtext_dp = dsa_allocate(qtext_dsa, len); + entry->qtext_dp = dsa_allocate_extended(qtext_dsa, len, DSA_ALLOC_NO_OOM | DSA_ALLOC_ZERO); if (!_check_dsa_validity(entry->qtext_dp)) { /* @@ -829,7 +829,7 @@ aqo_qtexts_load(void) if (!found) { - if (!aqo_qtext_store(0, "COMMON feature space (do not delete!)")) + if (!aqo_qtext_store(0, "COMMON feature space (do not delete!)", NULL)) elog(PANIC, "[AQO] DSA Initialization was unsuccessful"); } } @@ -944,6 +944,49 @@ aqo_queries_load(void) } } +static long +aqo_get_file_size(const char *filename) +{ + FILE *file; + long size = 0; + + file = AllocateFile(filename, PG_BINARY_R); + if (file == NULL) + { + if (errno != ENOENT) + goto read_error; + return size; + } + + fseek(file, 0L, SEEK_END); + size = ftell(file); + + FreeFile(file); + return size; + +read_error: + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", filename))); + if (file) + FreeFile(file); + unlink(filename); + return -1; +} + +void +check_dsa_file_size(void) +{ + long qtext_size = aqo_get_file_size(PGAQO_TEXT_FILE); + long data_size = aqo_get_file_size(PGAQO_DATA_FILE); + + if (qtext_size == -1 || data_size == -1 || + qtext_size + data_size >= dsm_size_max * 1024 * 1024) + { + elog(ERROR, "aqo.dsm_size_max is too small"); + } +} + static void data_load(const char *filename, deform_record_t callback, void *ctx) { @@ -1090,13 +1133,16 @@ dsa_init() * XXX: Maybe merge with aqo_queries ? */ bool -aqo_qtext_store(uint64 queryid, const char *query_string) +aqo_qtext_store(uint64 queryid, const char *query_string, bool *dsa_valid) { QueryTextEntry *entry; bool found; bool tblOverflow; HASHACTION action; + if (dsa_valid) + *dsa_valid = true; + Assert(!LWLockHeldByMe(&aqo_state->qtexts_lock)); if (query_string == NULL || querytext_max_size == 0) @@ -1135,7 +1181,7 @@ aqo_qtext_store(uint64 queryid, const char *query_string) entry->queryid = queryid; size = size > querytext_max_size ? querytext_max_size : size; - entry->qtext_dp = dsa_allocate0(qtext_dsa, size); + entry->qtext_dp = dsa_allocate_extended(qtext_dsa, size, DSA_ALLOC_NO_OOM | DSA_ALLOC_ZERO); if (!_check_dsa_validity(entry->qtext_dp)) { @@ -1144,7 +1190,10 @@ aqo_qtext_store(uint64 queryid, const char *query_string) * that caller recognize it and don't try to call us more. */ (void) hash_search(qtexts_htab, &queryid, HASH_REMOVE, NULL); + _aqo_queries_remove(queryid); LWLockRelease(&aqo_state->qtexts_lock); + if (dsa_valid) + *dsa_valid = false; return false; } @@ -1423,7 +1472,7 @@ aqo_data_store(uint64 fs, int fss, AqoDataArgs *data, List *reloids) entry->nrels = nrels; size = _compute_data_dsa(entry); - entry->data_dp = dsa_allocate0(data_dsa, size); + entry->data_dp = dsa_allocate_extended(data_dsa, size, DSA_ALLOC_NO_OOM | DSA_ALLOC_ZERO); if (!_check_dsa_validity(entry->data_dp)) { @@ -1455,7 +1504,7 @@ aqo_data_store(uint64 fs, int fss, AqoDataArgs *data, List *reloids) /* Need to re-allocate DSA chunk */ dsa_free(data_dsa, entry->data_dp); - entry->data_dp = dsa_allocate0(data_dsa, size); + entry->data_dp = dsa_allocate_extended(data_dsa, size, DSA_ALLOC_NO_OOM | DSA_ALLOC_ZERO); if (!_check_dsa_validity(entry->data_dp)) { @@ -2713,7 +2762,7 @@ aqo_query_texts_update(PG_FUNCTION_ARGS) str_buff = (char*) palloc(str_len); text_to_cstring_buffer(str, str_buff, str_len); - res = aqo_qtext_store(queryid, str_buff); + res = aqo_qtext_store(queryid, str_buff, NULL); pfree(str_buff); PG_RETURN_BOOL(res); diff --git a/storage.h b/storage.h index 2b4e4cdd..9491e33e 100644 --- a/storage.h +++ b/storage.h @@ -138,7 +138,7 @@ extern StatEntry *aqo_stat_store(uint64 queryid, bool use_aqo, extern void aqo_stat_flush(void); extern void aqo_stat_load(void); -extern bool aqo_qtext_store(uint64 queryid, const char *query_string); +extern bool aqo_qtext_store(uint64 queryid, const char *query_string, bool *dsa_valid); extern void aqo_qtexts_flush(void); extern void aqo_qtexts_load(void); @@ -156,6 +156,7 @@ extern bool aqo_queries_store(uint64 queryid, uint64 fs, bool learn_aqo, extern void aqo_queries_flush(void); extern void aqo_queries_load(void); +extern void check_dsa_file_size(void); /* * Machinery for deactivated queries cache. * TODO: Should live in a custom memory context diff --git a/t/004_dsm_size_max.pl b/t/004_dsm_size_max.pl new file mode 100644 index 00000000..26898b79 --- /dev/null +++ b/t/004_dsm_size_max.pl @@ -0,0 +1,76 @@ +use strict; +use warnings; + +use Config; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; + +use Test::More tests => 5; + +my $node = PostgreSQL::Test::Cluster->new('aqotest'); +$node->init; +$node->append_conf('postgresql.conf', qq{ +shared_preload_libraries = 'aqo' +aqo.mode = 'learn' +log_statement = 'ddl' +aqo.join_threshold = 0 +aqo.dsm_size_max = 4 +aqo.fs_max_items = 30000 +aqo.querytext_max_size = 1000000 +}); + +# Disable connection default settings, forced by PGOPTIONS in AQO Makefile +$ENV{PGOPTIONS}=""; + +# General purpose variables. +my $long_string = 'a' x 1000000; + +$node->start(); +$node->psql('postgres', 'CREATE EXTENSION aqo;'); + +for my $i (1 .. 3) { + $node->psql('postgres', "select aqo_query_texts_update(" . $i . ", \'" . $long_string . "\');"); +} +$node->stop(); + +$node->append_conf('postgresql.conf', 'aqo.dsm_size_max = 1'); +is($node->start(fail_ok => 1), + 0, "node fails to start"); + +$node->append_conf('postgresql.conf', 'aqo.dsm_size_max = 4'); +is($node->start(), + 1, "node starts"); +$node->psql('postgres', 'select * from aqo_reset();'); + +$long_string = '1, ' x 10000; +for my $i (1 .. 30) { + $node->psql('postgres', "select aqo_data_update(" . $i . ", 1, 1, '{{1}}', '{1}', '{1}', '{" . $long_string . " 1}');"); +} +$node->stop(); + +$node->append_conf('postgresql.conf', 'aqo.dsm_size_max = 1'); +is($node->start(fail_ok => 1), + 0, "node fails to start"); + +$node->append_conf('postgresql.conf', 'aqo.dsm_size_max = 4'); +is($node->start(), + 1, "node starts"); +$node->psql('postgres', 'select * from aqo_reset();'); +$node->stop(); + +my $regex; +$long_string = 'a' x 100000; +$regex = qr/.*WARNING: \[AQO\] Not enough DSA\. AQO was disabled for this query/; +$node->append_conf('postgresql.conf', 'aqo.dsm_size_max = 1'); +$node->start(); +my ($stdout, $stderr); +for my $i (1 .. 20) { + $node->psql('postgres', "create table a as select s, md5(random()::text) from generate_Series(1,100) s;"); + $node->psql('postgres', + "SELECT a.s FROM a CROSS JOIN ( SELECT '" . $long_string . "' as long_string) AS extra_rows;", + stdout => \$stdout, stderr => \$stderr); + $node->psql('postgres', "drop table a"); +} +like($stderr, $regex, 'warning for exceeding the dsa limit'); +$node->stop; +done_testing(); From fc7e5ff30babcc31c98c2a707d4d36c924970f3f Mon Sep 17 00:00:00 2001 From: Daniil Anisimov Date: Tue, 24 Oct 2023 00:54:37 +0700 Subject: [PATCH 192/203] Change aqo.querytext_max_size lower limit to 1. --- aqo.c | 2 +- expected/update_functions.out | 27 +++++++++++++++++++++++++++ sql/update_functions.sql | 8 ++++++++ 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/aqo.c b/aqo.c index efc911db..e84f89b0 100644 --- a/aqo.c +++ b/aqo.c @@ -256,7 +256,7 @@ _PG_init(void) NULL, &querytext_max_size, 1000, - 0, INT_MAX, + 1, INT_MAX, PGC_SUSET, 0, NULL, diff --git a/expected/update_functions.out b/expected/update_functions.out index 74428a35..d2e7c84c 100644 --- a/expected/update_functions.out +++ b/expected/update_functions.out @@ -417,6 +417,33 @@ SELECT aqo_data_update(1, 1, 1, '{{1}, {2}}', '{1}', '{1}', '{1, 2, 3}'); (1 row) SET aqo.mode='disabled'; +-- Testing the minimum querytext size +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +SET aqo.querytext_max_size = 0; +ERROR: 0 is outside the valid range for parameter "aqo.querytext_max_size" (1 .. 2147483647) +SET aqo.querytext_max_size = 1; +SELECT queryid, query_text FROM aqo_query_texts WHERE queryid > 0; + queryid | query_text +---------+------------ +(0 rows) + +SELECT aqo_query_texts_update(1, 'test'); + aqo_query_texts_update +------------------------ + t +(1 row) + +SELECT queryid, query_text FROM aqo_query_texts WHERE queryid > 0; + queryid | query_text +---------+------------ + 1 | +(1 row) + DROP EXTENSION aqo CASCADE; DROP TABLE aqo_test1, aqo_test2; DROP TABLE aqo_query_texts_dump, aqo_queries_dump, aqo_query_stat_dump, aqo_data_dump; diff --git a/sql/update_functions.sql b/sql/update_functions.sql index e2773978..4c7fee53 100644 --- a/sql/update_functions.sql +++ b/sql/update_functions.sql @@ -204,6 +204,14 @@ SELECT aqo_data_update(1, 1, 1, '{{1}, {2}}', '{1}', '{1}', '{1, 2, 3}'); SET aqo.mode='disabled'; +-- Testing the minimum querytext size +SELECT true AS success FROM aqo_reset(); +SET aqo.querytext_max_size = 0; +SET aqo.querytext_max_size = 1; +SELECT queryid, query_text FROM aqo_query_texts WHERE queryid > 0; +SELECT aqo_query_texts_update(1, 'test'); +SELECT queryid, query_text FROM aqo_query_texts WHERE queryid > 0; + DROP EXTENSION aqo CASCADE; DROP TABLE aqo_test1, aqo_test2; From 52d586ea8ed0441f593f3ff7c2455ea56661f77b Mon Sep 17 00:00:00 2001 From: Daniil Anisimov Date: Wed, 11 Oct 2023 13:34:54 +0700 Subject: [PATCH 193/203] Bugfix of look_a_like test. Add ANALYZE after creating tables to stabilize results of the test. --- expected/look_a_like.out | 82 ++++++++++++++++++++-------------------- sql/look_a_like.sql | 3 ++ 2 files changed, 45 insertions(+), 40 deletions(-) diff --git a/expected/look_a_like.out b/expected/look_a_like.out index dc339ffa..594f017e 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -22,6 +22,7 @@ CREATE TABLE a (x1 int, x2 int, x3 int); INSERT INTO a (x1, x2, x3) SELECT mod(ival,10), mod(ival,10), mod(ival,10) FROM generate_series(1,100) As ival; CREATE TABLE b (y1 int, y2 int, y3 int); INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; +ANALYZE a, b; -- -- Returns string-by-string explain of a query. Made for removing some strings -- from the explain output. @@ -90,22 +91,22 @@ SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND A.x1 = B.y1;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result ------------------------------------------------------------ + result +------------------------------------------------------------- Hash Join (actual rows=5000 loops=1) AQO not used Output: a.x1, b.y1 - Hash Cond: (b.y1 = a.x1) - -> Seq Scan on public.b (actual rows=1000 loops=1) + Hash Cond: (a.x1 = b.y1) + -> Seq Scan on public.a (actual rows=50 loops=1) AQO not used - Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=50 loops=1) - Output: a.x1 - -> Seq Scan on public.a (actual rows=50 loops=1) + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 5) AND (a.x2 < 5)) + Rows Removed by Filter: 50 + -> Hash (actual rows=1000 loops=1) + Output: b.y1 + -> Seq Scan on public.b (actual rows=1000 loops=1) AQO not used - Output: a.x1 - Filter: ((a.x1 < 5) AND (a.x2 < 5)) - Rows Removed by Filter: 50 + Output: b.y1 Using aqo: true AQO mode: LEARN JOINS: 1 @@ -191,22 +192,22 @@ SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND x3 < 10 AND A.x1 = B.y1;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result -------------------------------------------------------------------- + result +------------------------------------------------------------- Hash Join (actual rows=5000 loops=1) AQO not used Output: a.x1, b.y1 - Hash Cond: (b.y1 = a.x1) - -> Seq Scan on public.b (actual rows=1000 loops=1) - AQO: rows=1000, error=0% - Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=50 loops=1) - Output: a.x1 - -> Seq Scan on public.a (actual rows=50 loops=1) - AQO not used - Output: a.x1 - Filter: ((a.x1 < 5) AND (a.x2 < 5) AND (a.x3 < 10)) - Rows Removed by Filter: 50 + Hash Cond: (a.x1 = b.y1) + -> Seq Scan on public.a (actual rows=50 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 5) AND (a.x2 < 5) AND (a.x3 < 10)) + Rows Removed by Filter: 50 + -> Hash (actual rows=1000 loops=1) + Output: b.y1 + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1 Using aqo: true AQO mode: LEARN JOINS: 1 @@ -486,34 +487,35 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L CREATE TABLE c (z1 int, z2 int, z3 int); INSERT INTO c (z1, z2, z3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; +ANALYZE c; SELECT str AS result FROM expln(' SELECT * FROM (a LEFT JOIN b ON a.x1 = b.y1) sc WHERE not exists (SELECT z1 FROM c WHERE sc.x1=c.z1 );') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result -------------------------------------------------------------------- - Hash Left Join (actual rows=0 loops=1) + result +------------------------------------------------------------------------- + Hash Right Join (actual rows=0 loops=1) AQO not used Output: a.x1, a.x2, a.x3, b.y1, b.y2, b.y3 - Hash Cond: (a.x1 = b.y1) - -> Hash Anti Join (actual rows=0 loops=1) - AQO not used + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (never executed) + AQO: rows=1000 + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=0 loops=1) Output: a.x1, a.x2, a.x3 - Hash Cond: (a.x1 = c.z1) - -> Seq Scan on public.a (actual rows=100 loops=1) + -> Hash Anti Join (actual rows=0 loops=1) AQO not used Output: a.x1, a.x2, a.x3 - -> Hash (actual rows=1000 loops=1) - Output: c.z1 - -> Seq Scan on public.c (actual rows=1000 loops=1) + Hash Cond: (a.x1 = c.z1) + -> Seq Scan on public.a (actual rows=100 loops=1) AQO not used + Output: a.x1, a.x2, a.x3 + -> Hash (actual rows=1000 loops=1) Output: c.z1 - -> Hash (never executed) - Output: b.y1, b.y2, b.y3 - -> Seq Scan on public.b (never executed) - AQO: rows=1000 - Output: b.y1, b.y2, b.y3 + -> Seq Scan on public.c (actual rows=1000 loops=1) + AQO not used + Output: c.z1 Using aqo: true AQO mode: LEARN JOINS: 2 diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index 5edef7bb..f50e4e55 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -21,6 +21,7 @@ INSERT INTO a (x1, x2, x3) SELECT mod(ival,10), mod(ival,10), mod(ival,10) FROM CREATE TABLE b (y1 int, y2 int, y3 int); INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; +ANALYZE a, b; -- -- Returns string-by-string explain of a query. Made for removing some strings @@ -128,6 +129,8 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L CREATE TABLE c (z1 int, z2 int, z3 int); INSERT INTO c (z1, z2, z3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; +ANALYZE c; + SELECT str AS result FROM expln(' SELECT * FROM (a LEFT JOIN b ON a.x1 = b.y1) sc WHERE From a3e8286969dd4c6b9381c68ba645649ef390879b Mon Sep 17 00:00:00 2001 From: Daniil Anisimov Date: Mon, 22 Jan 2024 08:50:18 +0300 Subject: [PATCH 194/203] Fix testing with WRITE_READ_PARSE_PLAN_TREES. Change RestrictInfo to AQOClause. Add AQOConstNode to use it instead of useless nodes. Serialize/deserialize all AQOPlanNode and AQOConstNode fields. --- aqo_pg13.patch | 66 ++++--- cardinality_hooks.c | 3 +- hash.c | 33 ++-- path_utils.c | 411 +++++++++++++++++++++++++++++++++++--------- path_utils.h | 38 +++- postprocessing.c | 20 +-- 6 files changed, 430 insertions(+), 141 deletions(-) diff --git a/aqo_pg13.patch b/aqo_pg13.patch index 406e3e0e..d7ecb41c 100644 --- a/aqo_pg13.patch +++ b/aqo_pg13.patch @@ -1,5 +1,5 @@ diff --git a/contrib/Makefile b/contrib/Makefile -index 1846d415b6..95519ac11d 100644 +index 1846d415b6f..95519ac11de 100644 --- a/contrib/Makefile +++ b/contrib/Makefile @@ -7,6 +7,7 @@ include $(top_builddir)/src/Makefile.global @@ -11,7 +11,7 @@ index 1846d415b6..95519ac11d 100644 auto_explain \ bloom \ diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c -index bc05c96b4c..b6a3abe0d2 100644 +index bc05c96b4ce..b6a3abe0d2b 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -24,6 +24,7 @@ @@ -57,7 +57,7 @@ index bc05c96b4c..b6a3abe0d2 100644 if (es->format == EXPLAIN_FORMAT_TEXT) appendStringInfoChar(es->str, '\n'); diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c -index 692b6c1559..580d04d784 100644 +index 692b6c1559f..580d04d7844 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -132,6 +132,7 @@ CopyPlanFields(const Plan *from, Plan *newnode) @@ -69,35 +69,31 @@ index 692b6c1559..580d04d784 100644 /* diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c -index 21ececf0c2..a0e7a7ebca 100644 +index 21ececf0c2f..ebfd3ba86de 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -342,6 +342,7 @@ _outPlanInfo(StringInfo str, const Plan *node) WRITE_NODE_FIELD(initPlan); WRITE_BITMAPSET_FIELD(extParam); WRITE_BITMAPSET_FIELD(allParam); -+ /*WRITE_NODE_FIELD(ext_nodes); */ ++ WRITE_NODE_FIELD(ext_nodes); } /* diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c -index 7976b369ba..604314e0b3 100644 +index 7976b369ba8..2e47bd8d950 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c -@@ -1580,6 +1580,11 @@ ReadCommonPlan(Plan *local_node) +@@ -1580,6 +1580,7 @@ ReadCommonPlan(Plan *local_node) READ_NODE_FIELD(initPlan); READ_BITMAPSET_FIELD(extParam); READ_BITMAPSET_FIELD(allParam); -+ local_node->ext_nodes = NIL; -+ /* READ_NODE_FIELD(ext_nodes); -+ * Don't serialize this field. It is required to serialize RestrictInfo and -+ * EqualenceClass. -+ */ ++ READ_NODE_FIELD(ext_nodes); } /* diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c -index 4edc859cb5..988f2e6ab7 100644 +index 4edc859cb57..988f2e6ab75 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -98,6 +98,12 @@ @@ -363,7 +359,7 @@ index 4edc859cb5..988f2e6ab7 100644 { double parallel_divisor = path->parallel_workers; diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c -index 917713c163..5b7bf1cec6 100644 +index 917713c1633..5b7bf1cec69 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -70,6 +70,7 @@ @@ -394,7 +390,7 @@ index 917713c163..5b7bf1cec6 100644 /* diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c -index 27c665ac12..f599fba755 100644 +index 821693c60ee..fa627f472f9 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -145,7 +145,8 @@ static List *extract_rollup_sets(List *groupingSets); @@ -407,7 +403,7 @@ index 27c665ac12..f599fba755 100644 grouping_sets_data *gd, List *target_list); static RelOptInfo *create_grouping_paths(PlannerInfo *root, -@@ -3686,7 +3687,8 @@ standard_qp_callback(PlannerInfo *root, void *extra) +@@ -3682,7 +3683,8 @@ standard_qp_callback(PlannerInfo *root, void *extra) */ static double get_number_of_groups(PlannerInfo *root, @@ -417,7 +413,7 @@ index 27c665ac12..f599fba755 100644 grouping_sets_data *gd, List *target_list) { -@@ -3723,7 +3725,7 @@ get_number_of_groups(PlannerInfo *root, +@@ -3719,7 +3721,7 @@ get_number_of_groups(PlannerInfo *root, GroupingSetData *gs = lfirst_node(GroupingSetData, lc2); double numGroups = estimate_num_groups(root, groupExprs, @@ -426,7 +422,7 @@ index 27c665ac12..f599fba755 100644 &gset); gs->numGroups = numGroups; -@@ -3748,7 +3750,7 @@ get_number_of_groups(PlannerInfo *root, +@@ -3744,7 +3746,7 @@ get_number_of_groups(PlannerInfo *root, GroupingSetData *gs = lfirst_node(GroupingSetData, lc2); double numGroups = estimate_num_groups(root, groupExprs, @@ -435,7 +431,7 @@ index 27c665ac12..f599fba755 100644 &gset); gs->numGroups = numGroups; -@@ -3764,8 +3766,8 @@ get_number_of_groups(PlannerInfo *root, +@@ -3760,8 +3762,8 @@ get_number_of_groups(PlannerInfo *root, groupExprs = get_sortgrouplist_exprs(parse->groupClause, target_list); @@ -446,7 +442,7 @@ index 27c665ac12..f599fba755 100644 } } else if (parse->groupingSets) -@@ -4151,7 +4153,8 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, +@@ -4147,7 +4149,8 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, * Estimate number of groups. */ dNumGroups = get_number_of_groups(root, @@ -456,7 +452,7 @@ index 27c665ac12..f599fba755 100644 gd, extra->targetList); -@@ -6935,13 +6938,15 @@ create_partial_grouping_paths(PlannerInfo *root, +@@ -6931,13 +6934,15 @@ create_partial_grouping_paths(PlannerInfo *root, if (cheapest_total_path != NULL) dNumPartialGroups = get_number_of_groups(root, @@ -475,7 +471,7 @@ index 27c665ac12..f599fba755 100644 extra->targetList); diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c -index a203e6f1ff..d31bf5bae6 100644 +index a203e6f1ff5..d31bf5bae63 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -258,6 +258,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) @@ -541,7 +537,7 @@ index a203e6f1ff..d31bf5bae6 100644 return ppi; diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c -index 37458da096..248a1875a1 100644 +index 37458da096d..248a1875a18 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -147,6 +147,7 @@ @@ -573,7 +569,7 @@ index 37458da096..248a1875a1 100644 * estimate_num_groups - Estimate number of groups in a grouped query * diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h -index ba661d32a6..09d0abe58b 100644 +index ba661d32a63..09d0abe58be 100644 --- a/src/include/commands/explain.h +++ b/src/include/commands/explain.h @@ -75,6 +75,18 @@ extern PGDLLIMPORT ExplainOneQuery_hook_type ExplainOneQuery_hook; @@ -596,7 +592,7 @@ index ba661d32a6..09d0abe58b 100644 extern void ExplainQuery(ParseState *pstate, ExplainStmt *stmt, ParamListInfo params, DestReceiver *dest); diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h -index 5ebf070979..5b2acd7de2 100644 +index d2b4271de9d..559b9db7121 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -739,6 +739,10 @@ typedef struct RelOptInfo @@ -635,7 +631,7 @@ index 5ebf070979..5b2acd7de2 100644 diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h -index 90f02ce6fd..88c332164d 100644 +index 90f02ce6fdd..88c332164dd 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -159,6 +159,9 @@ typedef struct Plan @@ -649,7 +645,7 @@ index 90f02ce6fd..88c332164d 100644 /* ---------------- diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h -index 6141654e47..e6b28cbb05 100644 +index 6141654e478..e6b28cbb05f 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -39,6 +39,37 @@ typedef enum @@ -733,7 +729,7 @@ index 6141654e47..e6b28cbb05 100644 #endif /* COST_H */ diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h -index 3bd7072ae8..21bbaba11c 100644 +index 3bd7072ae8c..21bbaba11c8 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -18,6 +18,10 @@ @@ -748,7 +744,7 @@ index 3bd7072ae8..21bbaba11c 100644 * prototypes for pathnode.c */ diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h -index 8ce60e202e..75415102c2 100644 +index 8ce60e202e5..75415102c2e 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -24,6 +24,12 @@ extern double cursor_tuple_fraction; @@ -765,7 +761,7 @@ index 8ce60e202e..75415102c2 100644 * prototypes for plan/planmain.c */ diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h -index 7ac4a06391..def3522881 100644 +index 7ac4a063915..74fe91b89f1 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h @@ -127,6 +127,12 @@ typedef bool (*get_index_stats_hook_type) (PlannerInfo *root, @@ -781,13 +777,13 @@ index 7ac4a06391..def3522881 100644 /* Functions in selfuncs.c */ -@@ -195,6 +201,9 @@ extern void mergejoinscansel(PlannerInfo *root, Node *clause, +@@ -193,6 +199,9 @@ extern void mergejoinscansel(PlannerInfo *root, Node *clause, + Selectivity *leftstart, Selectivity *leftend, + Selectivity *rightstart, Selectivity *rightend); - extern double estimate_num_groups(PlannerInfo *root, List *groupExprs, - double input_rows, List **pgset); +extern double estimate_num_groups_ext(PlannerInfo *root, List *groupExprs, + Path *subpath, RelOptInfo *grouped_rel, + List **pgset); + extern double estimate_num_groups(PlannerInfo *root, List *groupExprs, + double input_rows, List **pgset); - extern void estimate_hash_bucket_stats(PlannerInfo *root, - Node *hashkey, double nbuckets, diff --git a/cardinality_hooks.c b/cardinality_hooks.c index da25e02c..0c23d391 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -186,8 +186,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, forboth(l, allclauses, l2, selectivities) { - current_hash = get_clause_hash( - ((RestrictInfo *) lfirst(l))->clause, + current_hash = get_clause_hash(((AQOClause *) lfirst(l))->clause, nargs, args_hash, eclass_hash); cache_selectivity(current_hash, rel->relid, rte->relid, *((double *) lfirst(l2))); diff --git a/hash.c b/hash.c index 55ce8b6a..397fe11c 100644 --- a/hash.c +++ b/hash.c @@ -27,6 +27,7 @@ #include "aqo.h" #include "hash.h" +#include "path_utils.h" static int get_str_hash(const char *str); static int get_node_hash(Node *node); @@ -236,11 +237,11 @@ get_fss_for_object(List *relsigns, List *clauselist, i = 0; foreach(lc, clauselist) { - RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); + AQOClause *clause = (AQOClause *) lfirst(lc); - clause_hashes[i] = get_clause_hash(rinfo->clause, + clause_hashes[i] = get_clause_hash(clause->clause, nargs, args_hash, eclass_hash); - args = get_clause_args_ptr(rinfo->clause); + args = get_clause_args_ptr(clause->clause); clause_has_consts[i] = (args != NULL && has_consts(*args)); i++; } @@ -335,14 +336,14 @@ get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash) cclause = copyObject(clause); args = get_clause_args_ptr(cclause); + /* XXX: Why does it work even if this loop is removed? */ foreach(l, *args) { arg_eclass = get_arg_eclass(get_node_hash(lfirst(l)), nargs, args_hash, eclass_hash); if (arg_eclass != 0) { - lfirst(l) = makeNode(Param); - ((Param *) lfirst(l))->paramid = arg_eclass; + lfirst(l) = create_aqo_const_node(AQO_NODE_EXPR, arg_eclass); } } if (!clause_is_eq_clause(clause) || has_consts(*args)) @@ -572,7 +573,7 @@ get_arg_eclass(int arg_hash, int nargs, int *args_hash, int *eclass_hash) static void get_clauselist_args(List *clauselist, int *nargs, int **args_hash) { - RestrictInfo *rinfo; + AQOClause *clause; List **args; ListCell *l; ListCell *l2; @@ -582,9 +583,9 @@ get_clauselist_args(List *clauselist, int *nargs, int **args_hash) foreach(l, clauselist) { - rinfo = (RestrictInfo *) lfirst(l); - args = get_clause_args_ptr(rinfo->clause); - if (args != NULL && clause_is_eq_clause(rinfo->clause)) + clause = (AQOClause *) lfirst(l); + args = get_clause_args_ptr(clause->clause); + if (args != NULL && clause_is_eq_clause(clause->clause)) foreach(l2, *args) if (!IsA(lfirst(l2), Const)) cnt++; @@ -593,9 +594,9 @@ get_clauselist_args(List *clauselist, int *nargs, int **args_hash) *args_hash = palloc(cnt * sizeof(**args_hash)); foreach(l, clauselist) { - rinfo = (RestrictInfo *) lfirst(l); - args = get_clause_args_ptr(rinfo->clause); - if (args != NULL && clause_is_eq_clause(rinfo->clause)) + clause = (AQOClause *) lfirst(l); + args = get_clause_args_ptr(clause->clause); + if (args != NULL && clause_is_eq_clause(clause->clause)) foreach(l2, *args) if (!IsA(lfirst(l2), Const)) (*args_hash)[i++] = get_node_hash(lfirst(l2)); @@ -650,7 +651,7 @@ disjoint_set_merge_eclasses(int *p, int v1, int v2) static int * perform_eclasses_join(List *clauselist, int nargs, int *args_hash) { - RestrictInfo *rinfo; + AQOClause *clause; int *p; ListCell *l, *l2; @@ -664,9 +665,9 @@ perform_eclasses_join(List *clauselist, int nargs, int *args_hash) foreach(l, clauselist) { - rinfo = (RestrictInfo *) lfirst(l); - args = get_clause_args_ptr(rinfo->clause); - if (args != NULL && clause_is_eq_clause(rinfo->clause)) + clause = (AQOClause *) lfirst(l); + args = get_clause_args_ptr(clause->clause); + if (args != NULL && clause_is_eq_clause(clause->clause)) { i3 = -1; foreach(l2, *args) diff --git a/path_utils.c b/path_utils.c index 67f5919b..3f8f2895 100644 --- a/path_utils.c +++ b/path_utils.c @@ -22,6 +22,7 @@ #include "storage/lmgr.h" #include "utils/syscache.h" #include "utils/lsyscache.h" +#include "common/shortest_dec.h" #include "aqo.h" #include "hash.h" @@ -39,7 +40,8 @@ static AQOPlanNode DefaultAQOPlanNode = .node.type = T_ExtensibleNode, .node.extnodename = AQO_PLAN_NODE, .had_path = false, - .rels = NULL, + .rels.hrels = NIL, + .rels.signatures = NIL, .clauses = NIL, .selectivities = NIL, .grouping_exprs = NIL, @@ -47,18 +49,39 @@ static AQOPlanNode DefaultAQOPlanNode = .parallel_divisor = -1., .was_parametrized = false, .fss = INT_MAX, - .prediction = -1 + .prediction = -1. }; /* * Hook on creation of a plan node. We need to store AQO-specific data to * support learning stage. */ -static create_plan_hook_type aqo_create_plan_next = NULL; +static create_plan_hook_type aqo_create_plan_next = NULL; -/*static create_upper_paths_hook_type aqo_create_upper_paths_next = NULL;*/ +/*static create_upper_paths_hook_type aqo_create_upper_paths_next = NULL;*/ +/* Return a copy of the given list of AQOClause structs */ +static List * +copy_aqo_clauses(List *src) +{ + List *result = NIL; + ListCell *lc; + + foreach(lc, src) + { + AQOClause *old = (AQOClause *) lfirst(lc); + AQOClause *new = palloc(sizeof(AQOClause)); + + memcpy(new, old, sizeof(AQOClause)); + new->clause = copyObject(old->clause); + + result = lappend(result, (void *) new); + } + + return result; +} + static AQOPlanNode * create_aqo_plan_node() { @@ -66,12 +89,20 @@ create_aqo_plan_node() T_ExtensibleNode); Assert(node != NULL); memcpy(node, &DefaultAQOPlanNode, sizeof(AQOPlanNode)); - node->rels = palloc(sizeof(RelSortOut)); - node->rels->hrels = NIL; - node->rels->signatures = NIL; return node; } +AQOConstNode * +create_aqo_const_node(AQOConstType type, int fss) +{ + AQOConstNode *node = (AQOConstNode *) newNode(sizeof(AQOConstNode), + T_ExtensibleNode); + Assert(node != NULL); + node->node.extnodename = AQO_CONST_NODE; + node->type = type; + node->fss = fss; + return node; +} /* Ensure that it's postgres_fdw's foreign server oid */ static bool @@ -276,13 +307,8 @@ subplan_hunter(Node *node, void *context) if (IsA(node, SubPlan)) { - A_Const *fss = makeNode(A_Const); - - fss->val.type = T_Integer; - fss->location = -1; - fss->val.val.ival = 0; - return (Node *) fss; - + /* TODO: use fss of SubPlan here */ + return (Node *) create_aqo_const_node(AQO_NODE_SUBPLAN, 0); } return expression_tree_mutator(node, subplan_hunter, context); } @@ -292,8 +318,8 @@ subplan_hunter(Node *node, void *context) * During this operation clauses could be changed and we couldn't walk across * this list next. */ -List * -aqo_get_clauses(PlannerInfo *root, List *restrictlist) +static List * +aqo_get_raw_clauses(PlannerInfo *root, List *restrictlist) { List *clauses = NIL; ListCell *lc; @@ -311,14 +337,49 @@ aqo_get_clauses(PlannerInfo *root, List *restrictlist) return clauses; } +static List * +copy_aqo_clauses_from_rinfo(List *src) +{ + List *result = NIL; + ListCell *lc; + + foreach(lc, src) + { + RestrictInfo *old = (RestrictInfo *) lfirst(lc); + AQOClause *new = palloc(sizeof(AQOClause)); + + new->clause = copyObject(old->clause); + new->norm_selec = old->norm_selec; + new->outer_selec = old->outer_selec; + + result = lappend(result, (void *) new); + } + + return result; +} + /* - * For given path returns the list of all clauses used in it. - * Also returns selectivities for the clauses throw the selectivities variable. - * Both clauses and selectivities returned lists are copies and therefore - * may be modified without corruption of the input data. + * Return copy of clauses returned from the aqo_get_raw_clause() routine + * and convert it into AQOClause struct. */ List * -get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) +aqo_get_clauses(PlannerInfo *root, List *restrictlist) +{ + List *clauses = aqo_get_raw_clauses(root, restrictlist); + List *result = copy_aqo_clauses_from_rinfo(clauses); + + list_free_deep(clauses); + return result; +} + +/* + * Returns a list of all used clauses for the given path. + * Also returns selectivities for the clauses to 'selectivities' variable. + * The returned list of the selectivities is a copy and therefore + * may be modified without corruption of the input data. + */ +static List * +get_path_clauses_recurse(Path *path, PlannerInfo *root, List **selectivities) { List *inner; List *inner_sel = NIL; @@ -338,89 +399,89 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) case T_NestPath: case T_MergePath: case T_HashPath: - cur = ((JoinPath *) path)->joinrestrictinfo; + cur = list_concat(cur, ((JoinPath *) path)->joinrestrictinfo); /* Not quite correct to avoid sjinfo, but we believe in caching */ cur_sel = get_selectivities(root, cur, 0, ((JoinPath *) path)->jointype, NULL); - outer = get_path_clauses(((JoinPath *) path)->outerjoinpath, root, + outer = get_path_clauses_recurse(((JoinPath *) path)->outerjoinpath, root, &outer_sel); - inner = get_path_clauses(((JoinPath *) path)->innerjoinpath, root, + inner = get_path_clauses_recurse(((JoinPath *) path)->innerjoinpath, root, &inner_sel); *selectivities = list_concat(cur_sel, list_concat(outer_sel, inner_sel)); - return list_concat(list_copy(cur), list_concat(outer, inner)); + return list_concat(cur, list_concat(outer, inner)); break; case T_UniquePath: - return get_path_clauses(((UniquePath *) path)->subpath, root, + return get_path_clauses_recurse(((UniquePath *) path)->subpath, root, selectivities); break; case T_GatherPath: case T_GatherMergePath: - return get_path_clauses(((GatherPath *) path)->subpath, root, + return get_path_clauses_recurse(((GatherPath *) path)->subpath, root, selectivities); break; case T_MaterialPath: - return get_path_clauses(((MaterialPath *) path)->subpath, root, + return get_path_clauses_recurse(((MaterialPath *) path)->subpath, root, selectivities); break; case T_ProjectionPath: - return get_path_clauses(((ProjectionPath *) path)->subpath, root, + return get_path_clauses_recurse(((ProjectionPath *) path)->subpath, root, selectivities); break; case T_ProjectSetPath: - return get_path_clauses(((ProjectSetPath *) path)->subpath, root, + return get_path_clauses_recurse(((ProjectSetPath *) path)->subpath, root, selectivities); break; case T_SortPath: - return get_path_clauses(((SortPath *) path)->subpath, root, + return get_path_clauses_recurse(((SortPath *) path)->subpath, root, selectivities); break; case T_IncrementalSortPath: { IncrementalSortPath *p = (IncrementalSortPath *) path; - return get_path_clauses(p->spath.subpath, root, + return get_path_clauses_recurse(p->spath.subpath, root, selectivities); } break; case T_GroupPath: - return get_path_clauses(((GroupPath *) path)->subpath, root, + return get_path_clauses_recurse(((GroupPath *) path)->subpath, root, selectivities); break; case T_UpperUniquePath: - return get_path_clauses(((UpperUniquePath *) path)->subpath, root, + return get_path_clauses_recurse(((UpperUniquePath *) path)->subpath, root, selectivities); break; case T_AggPath: - return get_path_clauses(((AggPath *) path)->subpath, root, + return get_path_clauses_recurse(((AggPath *) path)->subpath, root, selectivities); break; case T_GroupingSetsPath: - return get_path_clauses(((GroupingSetsPath *) path)->subpath, root, + return get_path_clauses_recurse(((GroupingSetsPath *) path)->subpath, root, selectivities); break; case T_WindowAggPath: - return get_path_clauses(((WindowAggPath *) path)->subpath, root, + return get_path_clauses_recurse(((WindowAggPath *) path)->subpath, root, selectivities); break; case T_SetOpPath: - return get_path_clauses(((SetOpPath *) path)->subpath, root, + return get_path_clauses_recurse(((SetOpPath *) path)->subpath, root, selectivities); break; case T_LockRowsPath: - return get_path_clauses(((LockRowsPath *) path)->subpath, root, + return get_path_clauses_recurse(((LockRowsPath *) path)->subpath, root, selectivities); break; case T_LimitPath: - return get_path_clauses(((LimitPath *) path)->subpath, root, + return get_path_clauses_recurse(((LimitPath *) path)->subpath, root, selectivities); break; case T_SubqueryScanPath: /* Recursing into Subquery we must use subroot */ Assert(path->parent->subroot != NULL); - return get_path_clauses(((SubqueryScanPath *) path)->subpath, + return get_path_clauses_recurse(((SubqueryScanPath *) path)->subpath, path->parent->subroot, selectivities); break; @@ -432,11 +493,11 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) { Path *subpath = lfirst(lc); - cur = list_concat(cur, list_copy( - get_path_clauses(subpath, root, selectivities))); + cur = list_concat(cur, + get_path_clauses_recurse(subpath, root, selectivities)); cur_sel = list_concat(cur_sel, *selectivities); } - cur = list_concat(cur, aqo_get_clauses(root, + cur = list_concat(cur, aqo_get_raw_clauses(root, path->parent->baserestrictinfo)); *selectivities = list_concat(cur_sel, get_selectivities(root, @@ -459,11 +520,11 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) { Path *subpath = lfirst(lc); - cur = list_concat(cur, list_copy( - get_path_clauses(subpath, root, selectivities))); + cur = list_concat(cur, + get_path_clauses_recurse(subpath, root, selectivities)); cur_sel = list_concat(cur_sel, *selectivities); } - cur = list_concat(cur, aqo_get_clauses(root, + cur = list_concat(cur, aqo_get_raw_clauses(root, path->parent->baserestrictinfo)); *selectivities = list_concat(cur_sel, get_selectivities(root, @@ -475,7 +536,7 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) case T_ForeignPath: /* The same as in the default case */ default: - cur = list_concat(list_copy(path->parent->baserestrictinfo), + cur = list_concat(list_concat(cur, path->parent->baserestrictinfo), path->param_info ? path->param_info->ppi_clauses : NIL); if (path->param_info) @@ -484,12 +545,26 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) else cur_sel = get_selectivities(root, cur, 0, JOIN_INNER, NULL); *selectivities = cur_sel; - cur = aqo_get_clauses(root, cur); + cur = aqo_get_raw_clauses(root, cur); return cur; break; } } +/* + * Returns a list of AQOClauses for the given path, which is a copy + * of the clauses returned from the get_path_clauses_recurse() routine. + * Also returns selectivities for the clauses to 'selectivities' variable. + * Both returned lists are copies and therefore may be modified without + * corruption of the input data. + */ +List * +get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) +{ + return copy_aqo_clauses_from_rinfo( + get_path_clauses_recurse(path, root, selectivities)); +} + /* * Some of paths are kind of utility path. I mean, It isn't corresponding to * specific RelOptInfo node. So, it should be omitted in process of clauses @@ -595,7 +670,7 @@ aqo_create_plan(PlannerInfo *root, Path *src, Plan **dest) (*dest)->lefttree->targetlist); /* Copy bare expressions for further AQO learning case. */ node->grouping_exprs = copyObject(groupExprs); - get_list_of_relids(root, ap->subpath->parent->relids, node->rels); + get_list_of_relids(root, ap->subpath->parent->relids, &node->rels); node->jointype = JOIN_INNER; } else if (is_appropriate_path(src)) @@ -606,7 +681,7 @@ aqo_create_plan(PlannerInfo *root, Path *src, Plan **dest) node->jointype = JOIN_INNER; } - get_list_of_relids(root, src->parent->relids, node->rels); + get_list_of_relids(root, src->parent->relids, &node->rels); if (src->parallel_workers > 0) node->parallel_divisor = get_parallel_divisor(src); @@ -641,15 +716,19 @@ AQOnodeCopy(struct ExtensibleNode *enew, const struct ExtensibleNode *eold) Assert(strcmp(old->node.extnodename, AQO_PLAN_NODE) == 0); Assert(new && old); - /* Copy static fields in one command */ - memcpy(new, old, sizeof(AQOPlanNode)); + /* + * Copy static fields in one command. + * But do not copy fields of the old->node. + * Elsewise, we can use pointers that will be freed. + * For example, it is old->node.extnodename. + */ + memcpy(&new->had_path, &old->had_path, sizeof(AQOPlanNode) - offsetof(AQOPlanNode, had_path)); /* These lists couldn't contain AQO nodes. Use basic machinery */ - new->rels = palloc(sizeof(RelSortOut)); - new->rels->hrels = list_copy(old->rels->hrels); - new->rels->signatures = list_copy(old->rels->signatures); + new->rels.hrels = list_copy(old->rels.hrels); + new->rels.signatures = list_copy(old->rels.signatures); - new->clauses = copyObject(old->clauses); + new->clauses = copy_aqo_clauses(old->clauses); new->grouping_exprs = copyObject(old->grouping_exprs); new->selectivities = copyObject(old->selectivities); enew = (ExtensibleNode *) new; @@ -661,6 +740,39 @@ AQOnodeEqual(const struct ExtensibleNode *a, const struct ExtensibleNode *b) return false; } +static void +AQOconstCopy(struct ExtensibleNode *enew, const struct ExtensibleNode *eold) +{ + AQOConstNode *new = (AQOConstNode *) enew; + AQOConstNode *old = (AQOConstNode *) eold; + + Assert(IsA(old, ExtensibleNode)); + Assert(strcmp(old->node.extnodename, AQO_CONST_NODE) == 0); + Assert(new && old); + + new->type = old->type; + new->fss = old->fss; + enew = (ExtensibleNode *) new; +} + +static bool +AQOconstEqual(const struct ExtensibleNode *a, const struct ExtensibleNode *b) +{ + return false; +} + +/* + * Convert a double value, attempting to ensure the value is preserved exactly. + */ +static void +outDouble(StringInfo str, double d) +{ + char buf[DOUBLE_SHORTEST_DECIMAL_LEN]; + + double_to_shortest_decimal_buf(d, buf); + appendStringInfoString(str, buf); +} + #define WRITE_INT_FIELD(fldname) \ appendStringInfo(str, " :" CppAsString(fldname) " %d", node->fldname) @@ -678,17 +790,57 @@ AQOnodeEqual(const struct ExtensibleNode *a, const struct ExtensibleNode *b) appendStringInfo(str, " :" CppAsString(fldname) " %d", \ (int) node->fldname) -/* Write a float field --- caller must give format to define precision */ -#define WRITE_FLOAT_FIELD(fldname,format) \ - appendStringInfo(str, " :" CppAsString(fldname) " " format, node->fldname) +/* Write a float field */ +#define WRITE_FLOAT_FIELD(fldname) \ + (appendStringInfo(str, " :" CppAsString(fldname) " "), \ + outDouble(str, node->fldname)) + +/* The start part of a custom list writer */ +#define WRITE_CUSTOM_LIST_START(fldname) \ + { \ + appendStringInfo(str, " :N_" CppAsString(fldname) " %d ", \ + list_length(node->fldname)); \ + /* Serialize this list like an array */ \ + if (list_length(node->fldname)) \ + { \ + ListCell *lc; \ + appendStringInfo(str, "("); \ + foreach (lc, node->fldname) + +/* The end part of a custom list writer */ +#define WRITE_CUSTOM_LIST_END() \ + appendStringInfo(str, " )"); \ + } \ + else \ + appendStringInfo(str, "<>"); \ + } + +/* Write a list of int values */ +#define WRITE_INT_LIST(fldname) \ + WRITE_CUSTOM_LIST_START(fldname) \ + { \ + int val = lfirst_int(lc); \ + appendStringInfo(str, " %d", val); \ + } \ + WRITE_CUSTOM_LIST_END() + +/* Write a list of AQOClause values */ +#define WRITE_AQOCLAUSE_LIST(fldname) \ + WRITE_CUSTOM_LIST_START(clauses) \ + { \ + AQOClause *node = lfirst(lc); \ + /* Serialize this struct like a node */ \ + appendStringInfo(str, " {"); \ + WRITE_NODE_FIELD(clause); \ + WRITE_FLOAT_FIELD(norm_selec); \ + WRITE_FLOAT_FIELD(outer_selec); \ + appendStringInfo(str, " }"); \ + } \ + WRITE_CUSTOM_LIST_END() /* * Serialize AQO plan node to a string. * - * Right now we can't correctly serialize all fields of the node. Taking into - * account that this action needed when a plan moves into parallel workers or - * just during debugging, we serialize it only partially, just for debug - * purposes. * Some extensions may manipulate by parts of serialized plan too. */ static void @@ -696,9 +848,36 @@ AQOnodeOut(struct StringInfoData *str, const struct ExtensibleNode *enode) { AQOPlanNode *node = (AQOPlanNode *) enode; - /* For Adaptive optimization DEBUG purposes */ + WRITE_BOOL_FIELD(had_path); + + WRITE_NODE_FIELD(rels.hrels); + WRITE_INT_LIST(rels.signatures); + + WRITE_AQOCLAUSE_LIST(clauses); + + WRITE_NODE_FIELD(selectivities); + WRITE_NODE_FIELD(grouping_exprs); + WRITE_ENUM_FIELD(jointype, JoinType); + + WRITE_FLOAT_FIELD(parallel_divisor); + WRITE_BOOL_FIELD(was_parametrized); + + WRITE_INT_FIELD(fss); + WRITE_FLOAT_FIELD(prediction); +} + +/* + * Serialize AQO const node to a string. + * + * Some extensions may manipulate by parts of serialized plan too. + */ +static void +AQOconstOut(struct StringInfoData *str, const struct ExtensibleNode *enode) +{ + AQOConstNode *node = (AQOConstNode *) enode; + + WRITE_ENUM_FIELD(type, AQOConstType); WRITE_INT_FIELD(fss); - WRITE_FLOAT_FIELD(prediction, "%.0f"); } /* Read an integer field (anything written as ":fldname %d") */ @@ -731,6 +910,54 @@ AQOnodeOut(struct StringInfoData *str, const struct ExtensibleNode *enode) (void) token; /* in case not used elsewhere */ \ local_node->fldname = nodeRead(NULL, 0) +/* The start part of a custom list reader */ +#define READ_CUSTOM_LIST_START() \ + { \ + int counter; \ + token = pg_strtok(&length); /* skip the name */ \ + token = pg_strtok(&length); \ + counter = atoi(token); \ + token = pg_strtok(&length); /* left bracket "(" */ \ + if (length) \ + { \ + for (int i = 0; i < counter; i++) + +/* The end part of a custom list reader */ +#define READ_CUSTOM_LIST_END(fldname) \ + token = pg_strtok(&length); /* right bracket ")" */ \ + } \ + else \ + local_node->fldname = NIL; \ + } + +/* Read a list of int values */ +#define READ_INT_LIST(fldname) \ + READ_CUSTOM_LIST_START() \ + { \ + int val; \ + token = pg_strtok(&length); \ + val = atoi(token); \ + local_node->fldname = lappend_int( \ + local_node->fldname, val); \ + } \ + READ_CUSTOM_LIST_END(fldname) + +/* Read a list of AQOClause values */ +#define READ_AQOCLAUSE_LIST(fldname) \ + READ_CUSTOM_LIST_START() \ + { \ + /* copy to use in the inner blocks of code */ \ + AQOPlanNode *node_copy = local_node; \ + AQOClause *local_node = palloc(sizeof(AQOClause)); \ + token = pg_strtok(&length); /* left bracket "{" */ \ + READ_NODE_FIELD(clause); \ + READ_FLOAT_FIELD(norm_selec); \ + READ_FLOAT_FIELD(outer_selec); \ + token = pg_strtok(&length); /* right bracket "}" */ \ + node_copy->fldname = lappend(node_copy->fldname, local_node); \ + } \ + READ_CUSTOM_LIST_END(fldname) + /* * Deserialize AQO plan node from a string to internal representation. * @@ -743,22 +970,41 @@ AQOnodeRead(struct ExtensibleNode *enode) const char *token; int length; - local_node->had_path = false; - local_node->jointype = 0; - local_node->parallel_divisor = 1.0; - local_node->was_parametrized = false; + READ_BOOL_FIELD(had_path); + + READ_NODE_FIELD(rels.hrels); + READ_INT_LIST(rels.signatures); + + READ_AQOCLAUSE_LIST(clauses); + + READ_NODE_FIELD(selectivities); + READ_NODE_FIELD(grouping_exprs); + READ_ENUM_FIELD(jointype, JoinType); - local_node->rels = palloc0(sizeof(RelSortOut)); - local_node->clauses = NIL; - local_node->selectivities = NIL; - local_node->grouping_exprs = NIL; + READ_FLOAT_FIELD(parallel_divisor); + READ_BOOL_FIELD(was_parametrized); - /* For Adaptive optimization DEBUG purposes */ READ_INT_FIELD(fss); READ_FLOAT_FIELD(prediction); } -static const ExtensibleNodeMethods method = +/* + * Deserialize AQO const node from a string to internal representation. + * + * Should work in coherence with AQOconstOut(). + */ +static void +AQOconstRead(struct ExtensibleNode *enode) +{ + AQOConstNode *local_node = (AQOConstNode *) enode; + const char *token; + int length; + + READ_ENUM_FIELD(type, AQOConstType); + READ_INT_FIELD(fss); +} + +static const ExtensibleNodeMethods aqo_node_method = { .extnodename = AQO_PLAN_NODE, .node_size = sizeof(AQOPlanNode), @@ -768,10 +1014,21 @@ static const ExtensibleNodeMethods method = .nodeRead = AQOnodeRead }; +static const ExtensibleNodeMethods aqo_const_method = +{ + .extnodename = AQO_CONST_NODE, + .node_size = sizeof(AQOConstNode), + .nodeCopy = AQOconstCopy, + .nodeEqual = AQOconstEqual, + .nodeOut = AQOconstOut, + .nodeRead = AQOconstRead +}; + void RegisterAQOPlanNodeMethods(void) { - RegisterExtensibleNodeMethods(&method); + RegisterExtensibleNodeMethods(&aqo_node_method); + RegisterExtensibleNodeMethods(&aqo_const_method); } /* diff --git a/path_utils.h b/path_utils.h index cbe83da0..0d5d68bd 100644 --- a/path_utils.h +++ b/path_utils.h @@ -6,6 +6,7 @@ #include "optimizer/planner.h" #define AQO_PLAN_NODE "AQOPlanNode" +#define AQO_CONST_NODE "AQOConstNode" /* * Find and sort out relations that used in the query: @@ -20,6 +21,20 @@ typedef struct * table or on a table structure for temp table */ } RelSortOut; +/* + * Fields of the RestrictInfo needed in the AQOPlanNode + */ +typedef struct AQOClause +{ + /* the represented clause of WHERE or JOIN */ + Expr *clause; + /* selectivity for "normal" (JOIN_INNER) semantics; -1 if not yet set */ + Selectivity norm_selec; + /* selectivity for outer join semantics; -1 if not yet set */ + Selectivity outer_selec; + +} AQOClause; + /* * information for adaptive query optimization */ @@ -27,7 +42,7 @@ typedef struct AQOPlanNode { ExtensibleNode node; bool had_path; - RelSortOut *rels; + RelSortOut rels; List *clauses; List *selectivities; @@ -43,6 +58,25 @@ typedef struct AQOPlanNode double prediction; } AQOPlanNode; +/* + * The type of a node that is replaced by AQOConstNode. + */ +typedef enum AQOConstType +{ + AQO_NODE_EXPR = 0, + AQO_NODE_SUBPLAN +} AQOConstType; + +/* + * A custom node that is used to calcucate a fss instead of regular node, + * such as SubPlan or Expr. + */ +typedef struct AQOConstNode +{ + ExtensibleNode node; + AQOConstType type; /* The type of the replaced node */ + int fss; /* The fss of the replaced node */ +} AQOConstNode; #define strtobool(x) ((*(x) == 't') ? true : false) @@ -64,6 +98,8 @@ extern List *get_path_clauses(Path *path, PlannerInfo *root, List **selectivities); +extern AQOConstNode *create_aqo_const_node(AQOConstType type, int fss); + extern AQOPlanNode *get_aqo_plan_node(Plan *plan, bool create); extern void RegisterAQOPlanNodeMethods(void); diff --git a/postprocessing.c b/postprocessing.c index 7df0a253..e166f84c 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -197,12 +197,12 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, foreach(l, clauselist) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); - Selectivity *cur_sel = NULL; + AQOClause *clause = (AQOClause *) lfirst(l); + Selectivity *cur_sel = NULL; if (parametrized_sel) { - cur_hash = get_clause_hash(rinfo->clause, nargs, + cur_hash = get_clause_hash(clause->clause, nargs, args_hash, eclass_hash); cur_sel = selectivity_cache_find_global_relid(cur_hash, cur_relid); } @@ -212,9 +212,9 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, cur_sel = palloc(sizeof(double)); if (join_type == JOIN_INNER) - *cur_sel = rinfo->norm_selec; + *cur_sel = clause->norm_selec; else - *cur_sel = rinfo->outer_selec; + *cur_sel = clause->outer_selec; if (*cur_sel < 0) *cur_sel = 0; @@ -500,7 +500,7 @@ learnOnPlanState(PlanState *p, void *context) List *cur_selectivities; cur_selectivities = restore_selectivities(aqo_node->clauses, - aqo_node->rels->hrels, + aqo_node->rels.hrels, aqo_node->jointype, aqo_node->was_parametrized); SubplanCtx.selectivities = list_concat(SubplanCtx.selectivities, @@ -508,14 +508,14 @@ learnOnPlanState(PlanState *p, void *context) SubplanCtx.clauselist = list_concat(SubplanCtx.clauselist, list_copy(aqo_node->clauses)); - if (aqo_node->rels->hrels != NIL) + if (aqo_node->rels.hrels != NIL) { /* * This plan can be stored as a cached plan. In the case we will have * bogus path_relids field (changed by list_concat routine) at the * next usage (and aqo-learn) of this plan. */ - ctx->relidslist = list_copy(aqo_node->rels->hrels); + ctx->relidslist = list_copy(aqo_node->rels.hrels); if (p->instrument) { @@ -527,12 +527,12 @@ learnOnPlanState(PlanState *p, void *context) { if (IsA(p, AggState)) learn_agg_sample(&SubplanCtx, - aqo_node->rels, learn_rows, rfactor, + &aqo_node->rels, learn_rows, rfactor, p->plan, notExecuted); else learn_sample(&SubplanCtx, - aqo_node->rels, learn_rows, rfactor, + &aqo_node->rels, learn_rows, rfactor, p->plan, notExecuted); } } From 865c20aeec27e0e15280e76c66d700921fcf33ef Mon Sep 17 00:00:00 2001 From: Daniil Anisimov Date: Wed, 29 Nov 2023 14:34:23 +0700 Subject: [PATCH 195/203] Change the logic for equivalence classes. It now uses EquivalenceClass structures to indicate which clauses are equivalent. --- cardinality_hooks.c | 2 +- expected/eclasses.out | 1085 +++++++++++++++++++++++++++++++++ expected/eclasses_mchar.out | 6 + expected/eclasses_mchar_1.out | 181 ++++++ hash.c | 264 +++----- hash.h | 4 +- path_utils.c | 55 ++ path_utils.h | 9 + postprocessing.c | 4 +- regress_schedule | 2 + sql/eclasses.sql | 394 ++++++++++++ sql/eclasses_mchar.sql | 73 +++ 12 files changed, 1894 insertions(+), 185 deletions(-) create mode 100644 expected/eclasses.out create mode 100644 expected/eclasses_mchar.out create mode 100644 expected/eclasses_mchar_1.out create mode 100644 sql/eclasses.sql create mode 100644 sql/eclasses_mchar.sql diff --git a/cardinality_hooks.c b/cardinality_hooks.c index 0c23d391..71b26726 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -186,7 +186,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, forboth(l, allclauses, l2, selectivities) { - current_hash = get_clause_hash(((AQOClause *) lfirst(l))->clause, + current_hash = get_clause_hash((AQOClause *) lfirst(l), nargs, args_hash, eclass_hash); cache_selectivity(current_hash, rel->relid, rte->relid, *((double *) lfirst(l2))); diff --git a/expected/eclasses.out b/expected/eclasses.out new file mode 100644 index 00000000..01650286 --- /dev/null +++ b/expected/eclasses.out @@ -0,0 +1,1085 @@ +-- Testing for working with equivalence classes +CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.show_details = 'on'; +SET aqo.show_hash = 'off'; +SET aqo.mode = 'forced'; +-- +-- Returns string-by-string explain of a query. Made for removing some strings +-- from the explain output. +-- +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; +-- Integer fields +CREATE TABLE aqo_test_int(a int, b int, c int); +INSERT INTO aqo_test_int SELECT (x/10)::int, (x/100)::int, (x/1000)::int +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_int; +CREATE TABLE aqo_test_int1(a int, b int, c int); +INSERT INTO aqo_test_int1 SELECT (x/10)::int, (x/10)::int, (x/10)::int +FROM generate_series(0, 999) x; +ANALYZE aqo_test_int1; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND a = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: ((a = 0) AND (b = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = c AND a = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: ((a = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = c AND a = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: ((b = c) AND (a = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = a AND c = b AND a = any('{0, 1, 2}'::int[]); + QUERY PLAN +---------------------------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: ((b = a) AND (a = c) AND (a = ANY ('{0,1,2}'::integer[]))) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = a AND c = b AND a = all('{0, 1, 2}'::int[]); + QUERY PLAN +---------------------------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=0 loops=1) + AQO not used + Filter: ((b = a) AND (a = c) AND (a = ALL ('{0,1,2}'::integer[]))) + Rows Removed by Filter: 10000 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 5 +SELECT count(*) FROM aqo_data; + count +------- + 5 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND a = c AND a = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: ((a = 0) AND (b = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND a = c AND b = c AND a = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0 AND b = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0 AND c = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0 AND b = 0 AND c = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = a AND c = b AND 0 = a AND 0 = b AND 0 = c; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((b = 0) AND (a = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 1 +SELECT count(*) FROM aqo_data; + count +------- + 1 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Tests with JOIN clauses. +-- Case 1. +-- 4 cols in 1 eclass, all of them is 0. +-- 3 nodes with unique FSS. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.a = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + result +----------------------------------------------------------- + Nested Loop (actual rows=100 loops=1) + AQO not used + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: ((a = 0) AND (b = 0)) + Rows Removed by Filter: 9990 + -> Seq Scan on aqo_test_int1 (actual rows=10 loops=10) + AQO not used + Filter: ((a = 0) AND (b = 0)) + Rows Removed by Filter: 990 + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(13 rows) + +-- Case 2. +-- 4 cols in 2 eclasses, 2 is 0 and 2 is 1. +-- The top node must be unique, but all of nodes like in a query of case 1. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON 1 = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + result +---------------------------------------------------------------- + Nested Loop (actual rows=100 loops=1) + AQO not used + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0)) + Rows Removed by Filter: 9990 + -> Materialize (actual rows=10 loops=10) + AQO: rows=10, error=0% + -> Seq Scan on aqo_test_int1 (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 1) AND (b = 1)) + Rows Removed by Filter: 990 + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(15 rows) + +-- Case 3. +-- 4 cols in 2 eclasses, 2 is 0 and 2 is equal but not a const. +-- 1 scan node with FSS like in case 2 and 2 nodes with unique FSS. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.a = 0') AS str +WHERE str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + Nested Loop (actual rows=10000 loops=1) + AQO not used + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0)) + Rows Removed by Filter: 9990 + -> Materialize (actual rows=1000 loops=10) + AQO not used + -> Seq Scan on aqo_test_int1 (actual rows=1000 loops=1) + AQO not used + Filter: (a = b) + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(14 rows) + +-- Case 4. +-- 4 cols in 1 eclass, all of them is 0. +-- 3 nodes with unique FSS. This is not case 1, because it is SEMI-JOIN. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +WHERE EXISTS ( + SELECT * FROM aqo_test_int1 + WHERE a = b AND t1.a = a)') AS str +WHERE str NOT LIKE '%Memory%'; + result +---------------------------------------------------------- + Nested Loop Semi Join (actual rows=10 loops=1) + AQO not used + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: ((a = 0) AND (b = 0)) + Rows Removed by Filter: 9990 + -> Seq Scan on aqo_test_int1 (actual rows=1 loops=10) + AQO not used + Filter: ((a = 0) AND (b = 0)) + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(12 rows) + +-- Case 5. +-- 4 cols in 1 eclass, all of them is 0. +-- The top node with unique FSS. Leaf nodes like in the case 4. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +WHERE NOT EXISTS ( + SELECT * FROM aqo_test_int1 + WHERE a = b AND t1.a = a)') AS str +WHERE str NOT LIKE '%Memory%'; + result +--------------------------------------------------------------- + Nested Loop Anti Join (actual rows=0 loops=1) + AQO not used + Join Filter: (aqo_test_int.a = aqo_test_int1.a) + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0)) + Rows Removed by Filter: 9990 + -> Materialize (actual rows=1 loops=10) + AQO: rows=1, error=0% + -> Seq Scan on aqo_test_int1 (actual rows=1 loops=1) + AQO: rows=1, error=0% + Filter: ((a = 0) AND (b = 0)) + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(15 rows) + +-- Must be 10 rows. +SELECT count(*) FROM aqo_data; + count +------- + 13 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Case 6. +-- 4 cols in 1 eclass. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE b = a) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE b = a) t2 +ON t1.a = t2.b') AS str +WHERE str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + Hash Join (actual rows=100 loops=1) + AQO not used + Hash Cond: (aqo_test_int.b = aqo_test_int1.b) + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: (b = a) + Rows Removed by Filter: 9990 + -> Hash (actual rows=1000 loops=1) + -> Seq Scan on aqo_test_int1 (actual rows=1000 loops=1) + AQO not used + Filter: (b = a) + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(14 rows) + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE b = a) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE b = a) t2 +ON t1.a = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + Hash Join (actual rows=100 loops=1) + AQO: rows=100, error=0% + Hash Cond: (aqo_test_int.b = aqo_test_int1.b) + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: (b = a) + Rows Removed by Filter: 9990 + -> Hash (actual rows=1000 loops=1) + -> Seq Scan on aqo_test_int1 (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Filter: (b = a) + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(14 rows) + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.a = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + Hash Join (actual rows=100 loops=1) + AQO: rows=100, error=0% + Hash Cond: (aqo_test_int.a = aqo_test_int1.a) + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: (a = b) + Rows Removed by Filter: 9990 + -> Hash (actual rows=1000 loops=1) + -> Seq Scan on aqo_test_int1 (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Filter: (a = b) + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(14 rows) + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.b = t2.b') AS str +WHERE str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + Hash Join (actual rows=100 loops=1) + AQO: rows=100, error=0% + Hash Cond: (aqo_test_int.a = aqo_test_int1.a) + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: (a = b) + Rows Removed by Filter: 9990 + -> Hash (actual rows=1000 loops=1) + -> Seq Scan on aqo_test_int1 (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Filter: (a = b) + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(14 rows) + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.b::text = t2.b::text') AS str +WHERE str NOT LIKE '%Memory%'; + result +----------------------------------------------------------------- + Hash Join (actual rows=100 loops=1) + AQO not used + Hash Cond: ((aqo_test_int1.b)::text = (aqo_test_int.b)::text) + -> Seq Scan on aqo_test_int1 (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Filter: (a = b) + -> Hash (actual rows=10 loops=1) + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: (a = b) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(14 rows) + +-- Must be 4 rows. +SELECT count(*) FROM aqo_data; + count +------- + 4 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Text fields +CREATE TABLE aqo_test_text(a text, b text, c text); +INSERT INTO aqo_test_text +SELECT (x/10)::text, (x/100)::text, (x/1000)::text +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_text; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND a = '0'; + QUERY PLAN +---------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::text) AND (b = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = c AND a = '0'; + QUERY PLAN +---------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::text) AND (c = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE b = c AND a = '0'; + QUERY PLAN +---------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO not used + Filter: ((b = c) AND (a = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 3 +SELECT count(*) FROM aqo_data; + count +------- + 3 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND a = c AND a = '0'; + QUERY PLAN +--------------------------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::text) AND (b = '0'::text) AND (c = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0'; + QUERY PLAN +--------------------------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::text) AND (b = '0'::text) AND (c = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND a = c AND b = c AND a = '0'; + QUERY PLAN +--------------------------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::text) AND (b = '0'::text) AND (c = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0' AND b = '0'; + QUERY PLAN +--------------------------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::text) AND (b = '0'::text) AND (c = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0' AND c = '0'; + QUERY PLAN +--------------------------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::text) AND (b = '0'::text) AND (c = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0' AND b = '0' AND c = '0'; + QUERY PLAN +--------------------------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::text) AND (b = '0'::text) AND (c = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 1 +SELECT count(*) FROM aqo_data; + count +------- + 1 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- JSONB fields +CREATE TABLE aqo_test_jsonb(a jsonb, b jsonb, c jsonb); +INSERT INTO aqo_test_jsonb SELECT +to_jsonb(x/10), to_jsonb(x/100), to_jsonb(x/1000) +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_jsonb; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND a = '0'::jsonb; + QUERY PLAN +----------------------------------------------------- + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::jsonb) AND (b = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = c AND a = '0'::jsonb; + QUERY PLAN +----------------------------------------------------- + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::jsonb) AND (c = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE b = c AND a = '0'::jsonb; + QUERY PLAN +----------------------------------------------------- + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO not used + Filter: ((b = c) AND (a = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 3 +SELECT count(*) FROM aqo_data; + count +------- + 3 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND a = c AND a = '0'::jsonb; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::jsonb) AND (b = '0'::jsonb) AND (c = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::jsonb) AND (b = '0'::jsonb) AND (c = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND a = c AND b = c AND a = '0'::jsonb; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::jsonb) AND (b = '0'::jsonb) AND (c = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb AND b = '0'::jsonb; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::jsonb) AND (b = '0'::jsonb) AND (c = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb AND c = '0'::jsonb; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::jsonb) AND (b = '0'::jsonb) AND (c = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb AND b = '0'::jsonb AND c = '0'::jsonb; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::jsonb) AND (b = '0'::jsonb) AND (c = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 1 +SELECT count(*) FROM aqo_data; + count +------- + 1 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- BOX fields +CREATE TABLE aqo_test_box(a box, b box, c box); +INSERT INTO aqo_test_box SELECT +('((0,0), ('||(x/10)||', '||(x/10)||'))')::box, +('((0,0), ('||(x/100)||', '||(x/100)||'))')::box, +('((0,0), ('||(x/1000)||', '||(x/1000)||'))')::box +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_box; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND a = c AND a = '((0,0), (0,0))'::box; + QUERY PLAN +-------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a = b) AND (a = c) AND (a = '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box; + QUERY PLAN +-------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a = b) AND (b = c) AND (a = '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND a = c AND b = c AND a = '((0,0), (0,0))'::box; + QUERY PLAN +-------------------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a = b) AND (a = c) AND (b = c) AND (a = '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box AND b = '((0,0), (0,0))'::box; + QUERY PLAN +------------------------------------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a = b) AND (b = c) AND (a = '(0,0),(0,0)'::box) AND (b = '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box AND c = '((0,0), (0,0))'::box; + QUERY PLAN +------------------------------------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a = b) AND (b = c) AND (a = '(0,0),(0,0)'::box) AND (c = '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box AND b = '((0,0), (0,0))'::box AND c = '((0,0), (0,0))'::box; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------ + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a = b) AND (b = c) AND (a = '(0,0),(0,0)'::box) AND (b = '(0,0),(0,0)'::box) AND (c = '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 6 +SELECT count(*) FROM aqo_data; + count +------- + 6 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Not equivalent queries too +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND a ~= c AND a ~= '((0,0), (0,0))'::box; + QUERY PLAN +----------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a ~= b) AND (a ~= c) AND (a ~= '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box; + QUERY PLAN +----------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a ~= b) AND (b ~= c) AND (a ~= '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND a ~= c AND b ~= c AND a ~= '((0,0), (0,0))'::box; + QUERY PLAN +------------------------------------------------------------------------------ + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a ~= b) AND (a ~= c) AND (b ~= c) AND (a ~= '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box AND b ~= '((0,0), (0,0))'::box; + QUERY PLAN +----------------------------------------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a ~= b) AND (b ~= c) AND (a ~= '(0,0),(0,0)'::box) AND (b ~= '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box AND c ~= '((0,0), (0,0))'::box; + QUERY PLAN +----------------------------------------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a ~= b) AND (b ~= c) AND (a ~= '(0,0),(0,0)'::box) AND (c ~= '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box AND b ~= '((0,0), (0,0))'::box AND c ~= '((0,0), (0,0))'::box; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a ~= b) AND (b ~= c) AND (a ~= '(0,0),(0,0)'::box) AND (b ~= '(0,0),(0,0)'::box) AND (c ~= '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 6 +SELECT count(*) FROM aqo_data; + count +------- + 6 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +DROP TABLE aqo_test_int; +DROP TABLE aqo_test_text; +DROP TABLE aqo_test_jsonb; +DROP TABLE aqo_test_box; +DROP EXTENSION aqo; diff --git a/expected/eclasses_mchar.out b/expected/eclasses_mchar.out new file mode 100644 index 00000000..5593e045 --- /dev/null +++ b/expected/eclasses_mchar.out @@ -0,0 +1,6 @@ +-- Testing for working with equivalence classes for mchar type +-- Skip test if mchar extension does not exist +SELECT count(*) = 0 AS skip_test +FROM pg_available_extensions WHERE name = 'mchar' \gset +\if :skip_test +\quit diff --git a/expected/eclasses_mchar_1.out b/expected/eclasses_mchar_1.out new file mode 100644 index 00000000..a50422cb --- /dev/null +++ b/expected/eclasses_mchar_1.out @@ -0,0 +1,181 @@ +-- Testing for working with equivalence classes for mchar type +-- Skip test if mchar extension does not exist +SELECT count(*) = 0 AS skip_test +FROM pg_available_extensions WHERE name = 'mchar' \gset +\if :skip_test +\quit +\endif +CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.show_details = 'on'; +SET aqo.show_hash = 'off'; +SET aqo.mode = 'forced'; +-- MCHAR fields +CREATE EXTENSION MCHAR; +CREATE TABLE aqo_test_mchar(a mchar, b mchar, c mchar); +INSERT INTO aqo_test_mchar +SELECT (x/10)::text::mchar, (x/100)::text::mchar, (x/1000)::text::mchar +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_mchar; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND a = '0'; + QUERY PLAN +----------------------------------------------------- + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::mchar) AND (b = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = c AND a = '0'; + QUERY PLAN +----------------------------------------------------- + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::mchar) AND (c = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE b = c AND a = '0'; + QUERY PLAN +----------------------------------------------------- + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO not used + Filter: ((b = c) AND (a = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 3 +SELECT count(*) FROM aqo_data; + count +------- + 3 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND a = c AND a = '0'; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::mchar) AND (b = '0'::mchar) AND (c = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0'; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::mchar) AND (b = '0'::mchar) AND (c = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND a = c AND b = c AND a = '0'; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::mchar) AND (b = '0'::mchar) AND (c = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0' AND b = '0'; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::mchar) AND (b = '0'::mchar) AND (c = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0' AND c = '0'; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::mchar) AND (b = '0'::mchar) AND (c = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0' AND b = '0' AND c = '0'; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::mchar) AND (b = '0'::mchar) AND (c = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 1 +SELECT count(*) FROM aqo_data; + count +------- + 1 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +DROP TABLE aqo_test_mchar; +DROP EXTENSION mchar; +DROP EXTENSION aqo; diff --git a/hash.c b/hash.c index 397fe11c..598f0038 100644 --- a/hash.c +++ b/hash.c @@ -47,15 +47,11 @@ static int get_id_in_sorted_int_array(int val, int n, int *arr); static int get_arg_eclass(int arg_hash, int nargs, int *args_hash, int *eclass_hash); -static void get_clauselist_args(List *clauselist, int *nargs, int **args_hash); -static int disjoint_set_get_parent(int *p, int v); -static void disjoint_set_merge_eclasses(int *p, int v1, int v2); -static int *perform_eclasses_join(List *clauselist, int nargs, int *args_hash); +static int *get_clauselist_args(List *clauselist, int *nargs, int **args_hash); static bool is_brace(char ch); static bool has_consts(List *lst); static List **get_clause_args_ptr(Expr *clause); -static bool clause_is_eq_clause(Expr *clause); /* * Computes hash for given query.Query Identifier: = @@ -239,8 +235,8 @@ get_fss_for_object(List *relsigns, List *clauselist, { AQOClause *clause = (AQOClause *) lfirst(lc); - clause_hashes[i] = get_clause_hash(clause->clause, - nargs, args_hash, eclass_hash); + clause_hashes[i] = get_clause_hash(clause, nargs, args_hash, + eclass_hash); args = get_clause_args_ptr(clause->clause); clause_has_consts[i] = (args != NULL && has_consts(*args)); i++; @@ -324,19 +320,19 @@ get_fss_for_object(List *relsigns, List *clauselist, * Also args-order-insensitiveness for equal clause is required. */ int -get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash) +get_clause_hash(AQOClause *clause, int nargs, int *args_hash, int *eclass_hash) { Expr *cclause; - List **args = get_clause_args_ptr(clause); + List **args = get_clause_args_ptr(clause->clause); int arg_eclass; ListCell *l; if (args == NULL) - return get_node_hash((Node *) clause); + return get_node_hash((Node *) clause->clause); - cclause = copyObject(clause); + cclause = copyObject(clause->clause); args = get_clause_args_ptr(cclause); - /* XXX: Why does it work even if this loop is removed? */ + foreach(l, *args) { arg_eclass = get_arg_eclass(get_node_hash(lfirst(l)), @@ -346,7 +342,7 @@ get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash) lfirst(l) = create_aqo_const_node(AQO_NODE_EXPR, arg_eclass); } } - if (!clause_is_eq_clause(clause) || has_consts(*args)) + if (!clause->is_eq_clause || has_consts(*args)) return get_node_hash((Node *) cclause); return get_node_hash((Node *) linitial(*args)); } @@ -570,121 +566,98 @@ get_arg_eclass(int arg_hash, int nargs, int *args_hash, int *eclass_hash) * Builds list of non-constant arguments of equivalence clauses * of given clauselist. */ -static void +static int * get_clauselist_args(List *clauselist, int *nargs, int **args_hash) { AQOClause *clause; List **args; ListCell *l; - ListCell *l2; int i = 0; int sh = 0; int cnt = 0; + int *p; + int *p_sorted; + int *args_hash_sorted; + int *idx; + + /* Not more than 2 args in each clause from clauselist */ + *args_hash = palloc(2 * list_length(clauselist) * sizeof(**args_hash)); + p = palloc(2 * list_length(clauselist) * sizeof(*p)); foreach(l, clauselist) { + Expr *e; + clause = (AQOClause *) lfirst(l); args = get_clause_args_ptr(clause->clause); - if (args != NULL && clause_is_eq_clause(clause->clause)) - foreach(l2, *args) - if (!IsA(lfirst(l2), Const)) - cnt++; + if (args == NULL || !clause->is_eq_clause) + continue; + + /* Left argument */ + e = (args != NULL && list_length(*args) ? linitial(*args) : NULL); + if (e && !IsA(e, Const)) + { + (*args_hash)[cnt] = get_node_hash((Node *) e); + p[cnt++] = clause->left_ec; + } + + /* Right argument */ + e = (args != NULL && list_length(*args) >= 2 ? lsecond(*args) : NULL); + if (e && !IsA(e, Const)) + { + (*args_hash)[cnt] = get_node_hash((Node *) e); + p[cnt++] = clause->right_ec; + } } - *args_hash = palloc(cnt * sizeof(**args_hash)); - foreach(l, clauselist) + /* Use argsort for simultaniously sorting of args_hash and p arrays */ + idx = argsort(*args_hash, cnt, sizeof(**args_hash), int_cmp); + + args_hash_sorted = palloc(cnt * sizeof(*args_hash_sorted)); + p_sorted = palloc(cnt * sizeof(*p_sorted)); + + for (i = 0; i < cnt; ++i) { - clause = (AQOClause *) lfirst(l); - args = get_clause_args_ptr(clause->clause); - if (args != NULL && clause_is_eq_clause(clause->clause)) - foreach(l2, *args) - if (!IsA(lfirst(l2), Const)) - (*args_hash)[i++] = get_node_hash(lfirst(l2)); + args_hash_sorted[i] = (*args_hash)[idx[i]]; + p_sorted[i] = p[idx[i]]; } - qsort(*args_hash, cnt, sizeof(**args_hash), int_cmp); + pfree(idx); + pfree(p); + pfree(*args_hash); + *args_hash = args_hash_sorted; + + /* Remove duplicates of the hashes */ for (i = 1; i < cnt; ++i) if ((*args_hash)[i - 1] == (*args_hash)[i]) sh++; else + { (*args_hash)[i - sh] = (*args_hash)[i]; + p_sorted[i - sh] = p_sorted[i]; + } *nargs = cnt - sh; *args_hash = repalloc(*args_hash, (*nargs) * sizeof(**args_hash)); -} - -/* - * Returns class of an object in disjoint set. - */ -static int -disjoint_set_get_parent(int *p, int v) -{ - if (p[v] == -1) - return v; - else - return p[v] = disjoint_set_get_parent(p, p[v]); -} - -/* - * Merges two equivalence classes in disjoint set. - */ -static void -disjoint_set_merge_eclasses(int *p, int v1, int v2) -{ - int p1, - p2; - - p1 = disjoint_set_get_parent(p, v1); - p2 = disjoint_set_get_parent(p, v2); - if (p1 != p2) - { - if ((v1 + v2) % 2) - p[p1] = p2; - else - p[p2] = p1; - } -} - -/* - * Constructs disjoint set on arguments. - */ -static int * -perform_eclasses_join(List *clauselist, int nargs, int *args_hash) -{ - AQOClause *clause; - int *p; - ListCell *l, - *l2; - List **args; - int h2; - int i2, - i3; - - p = palloc(nargs * sizeof(*p)); - memset(p, -1, nargs * sizeof(*p)); + p_sorted = repalloc(p_sorted, (*nargs) * sizeof(*p_sorted)); - foreach(l, clauselist) + /* Compress the values of eclasses */ + if (*nargs > 0) { - clause = (AQOClause *) lfirst(l); - args = get_clause_args_ptr(clause->clause); - if (args != NULL && clause_is_eq_clause(clause->clause)) + int prev = p_sorted[0]; + p_sorted[0] = 0; + for (i = 1; i < *nargs; i++) { - i3 = -1; - foreach(l2, *args) - { - if (!IsA(lfirst(l2), Const)) - { - h2 = get_node_hash(lfirst(l2)); - i2 = get_id_in_sorted_int_array(h2, nargs, args_hash); - if (i3 != -1) - disjoint_set_merge_eclasses(p, i2, i3); - i3 = i2; - } - } + int cur = p_sorted[i]; + if (cur == prev) + p_sorted[i] = p_sorted[i-1]; + else + p_sorted[i] = p_sorted[i-1] + 1; + prev = cur; } } - return p; + return p_sorted; } /* @@ -696,30 +669,31 @@ get_eclasses(List *clauselist, int *nargs, int **args_hash, int **eclass_hash) { int *p; List **lsts; - int i, - v; + int i; + /* + * An auxiliary array of equivalence clauses hashes + * used to improve performance. + */ int *e_hashes; - get_clauselist_args(clauselist, nargs, args_hash); + p = get_clauselist_args(clauselist, nargs, args_hash); *eclass_hash = palloc((*nargs) * sizeof(**eclass_hash)); - p = perform_eclasses_join(clauselist, *nargs, *args_hash); - lsts = palloc((*nargs) * sizeof(*lsts)); + lsts = palloc0((*nargs) * sizeof(*lsts)); e_hashes = palloc((*nargs) * sizeof(*e_hashes)); + /* Combine args hashes corresponding to the same eclass into one list. */ for (i = 0; i < *nargs; ++i) - lsts[i] = NIL; + lsts[p[i]] = lappend_int(lsts[p[i]], (*args_hash)[i]); + /* Precompute eclasses hashes only once per eclass. */ for (i = 0; i < *nargs; ++i) - { - v = disjoint_set_get_parent(p, i); - lsts[v] = lappend_int(lsts[v], (*args_hash)[i]); - } - for (i = 0; i < *nargs; ++i) - e_hashes[i] = get_unordered_int_list_hash(lsts[i]); + if (lsts[i] != NIL) + e_hashes[i] = get_unordered_int_list_hash(lsts[i]); + /* Determine the hashes of each eclass. */ for (i = 0; i < *nargs; ++i) - (*eclass_hash)[i] = e_hashes[disjoint_set_get_parent(p, i)]; + (*eclass_hash)[i] = e_hashes[p[i]]; pfree(e_hashes); } @@ -772,75 +746,3 @@ get_clause_args_ptr(Expr *clause) break; } } - -/* - * Returns whether the clause is an equivalence clause. - */ -static bool -clause_is_eq_clause(Expr *clause) -{ - /* TODO: fix this horrible mess */ - return ( - clause->type == T_OpExpr || - clause->type == T_DistinctExpr || - clause->type == T_NullIfExpr || - clause->type == T_ScalarArrayOpExpr - ) && ( - ((OpExpr *) clause)->opno == Int4EqualOperator || - ((OpExpr *) clause)->opno == BooleanEqualOperator || - ((OpExpr *) clause)->opno == TextEqualOperator || - ((OpExpr *) clause)->opno == TIDEqualOperator || - ((OpExpr *) clause)->opno == ARRAY_EQ_OP || - ((OpExpr *) clause)->opno == RECORD_EQ_OP || - ((OpExpr *) clause)->opno == 15 || - ((OpExpr *) clause)->opno == 92 || - ((OpExpr *) clause)->opno == 93 || - ((OpExpr *) clause)->opno == 94 || - ((OpExpr *) clause)->opno == 352 || - ((OpExpr *) clause)->opno == 353 || - ((OpExpr *) clause)->opno == 385 || - ((OpExpr *) clause)->opno == 386 || - ((OpExpr *) clause)->opno == 410 || - ((OpExpr *) clause)->opno == 416 || - ((OpExpr *) clause)->opno == 503 || - ((OpExpr *) clause)->opno == 532 || - ((OpExpr *) clause)->opno == 533 || - ((OpExpr *) clause)->opno == 560 || - ((OpExpr *) clause)->opno == 566 || - ((OpExpr *) clause)->opno == 607 || - ((OpExpr *) clause)->opno == 649 || - ((OpExpr *) clause)->opno == 620 || - ((OpExpr *) clause)->opno == 670 || - ((OpExpr *) clause)->opno == 792 || - ((OpExpr *) clause)->opno == 811 || - ((OpExpr *) clause)->opno == 900 || - ((OpExpr *) clause)->opno == 1093 || - ((OpExpr *) clause)->opno == 1108 || - ((OpExpr *) clause)->opno == 1550 || - ((OpExpr *) clause)->opno == 1120 || - ((OpExpr *) clause)->opno == 1130 || - ((OpExpr *) clause)->opno == 1320 || - ((OpExpr *) clause)->opno == 1330 || - ((OpExpr *) clause)->opno == 1500 || - ((OpExpr *) clause)->opno == 1535 || - ((OpExpr *) clause)->opno == 1616 || - ((OpExpr *) clause)->opno == 1220 || - ((OpExpr *) clause)->opno == 1201 || - ((OpExpr *) clause)->opno == 1752 || - ((OpExpr *) clause)->opno == 1784 || - ((OpExpr *) clause)->opno == 1804 || - ((OpExpr *) clause)->opno == 1862 || - ((OpExpr *) clause)->opno == 1868 || - ((OpExpr *) clause)->opno == 1955 || - ((OpExpr *) clause)->opno == 2060 || - ((OpExpr *) clause)->opno == 2542 || - ((OpExpr *) clause)->opno == 2972 || - ((OpExpr *) clause)->opno == 3222 || - ((OpExpr *) clause)->opno == 3516 || - ((OpExpr *) clause)->opno == 3629 || - ((OpExpr *) clause)->opno == 3676 || - ((OpExpr *) clause)->opno == 3882 || - ((OpExpr *) clause)->opno == 3240 || - ((OpExpr *) clause)->opno == 3240 - ); -} diff --git a/hash.h b/hash.h index 419941f6..d9d3cbfd 100644 --- a/hash.h +++ b/hash.h @@ -2,6 +2,7 @@ #define AQO_HASH_H #include "nodes/pg_list.h" +#include "path_utils.h" extern uint64 get_query_hash(Query *parse, const char *query_text); extern bool list_member_uint64(const List *list, uint64 datum); @@ -17,6 +18,7 @@ extern int get_grouped_exprs_hash(int fss, List *group_exprs); /* Hash functions */ void get_eclasses(List *clauselist, int *nargs, int **args_hash, int **eclass_hash); -int get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash); +int get_clause_hash(AQOClause *clause, int nargs, int *args_hash, + int *eclass_hash); #endif /* AQO_HASH_H */ \ No newline at end of file diff --git a/path_utils.c b/path_utils.c index 3f8f2895..97aa3443 100644 --- a/path_utils.c +++ b/path_utils.c @@ -52,6 +52,14 @@ static AQOPlanNode DefaultAQOPlanNode = .prediction = -1. }; + +/* + * Auxiliary list for relabel equivalence classes + * from pointers to the serial numbers - indexes of this list. + * Maybe it's need to use some smart data structure such a HTAB? + */ +List *eclass_collector = NIL; + /* * Hook on creation of a plan node. We need to store AQO-specific data to * support learning stage. @@ -337,6 +345,42 @@ aqo_get_raw_clauses(PlannerInfo *root, List *restrictlist) return clauses; } +void +eclass_collector_free(void) +{ + list_free(eclass_collector); + eclass_collector = NIL; +} + +static int +get_eclass_index(EquivalenceClass *ec) +{ + ListCell *lc; + int i = 0; + MemoryContext old_ctx; + + if (ec == NULL) + return -1; + + /* Get the top of merged eclasses */ + while(ec->ec_merged) + ec = ec->ec_merged; + + foreach (lc, eclass_collector) + { + if (lfirst(lc) == ec) + break; + i++; + } + + old_ctx = MemoryContextSwitchTo(AQOCacheMemCtx); + if (i == list_length(eclass_collector)) + eclass_collector = lappend(eclass_collector, ec); + MemoryContextSwitchTo(old_ctx); + + return i; +} + static List * copy_aqo_clauses_from_rinfo(List *src) { @@ -352,6 +396,11 @@ copy_aqo_clauses_from_rinfo(List *src) new->norm_selec = old->norm_selec; new->outer_selec = old->outer_selec; + new->left_ec = get_eclass_index(old->left_ec); + new->right_ec = get_eclass_index(old->right_ec); + + new->is_eq_clause = (old->left_ec != NULL || old->left_ec != NULL); + result = lappend(result, (void *) new); } @@ -834,6 +883,9 @@ outDouble(StringInfo str, double d) WRITE_NODE_FIELD(clause); \ WRITE_FLOAT_FIELD(norm_selec); \ WRITE_FLOAT_FIELD(outer_selec); \ + WRITE_INT_FIELD(left_ec); \ + WRITE_INT_FIELD(right_ec); \ + WRITE_BOOL_FIELD(is_eq_clause); \ appendStringInfo(str, " }"); \ } \ WRITE_CUSTOM_LIST_END() @@ -953,6 +1005,9 @@ AQOconstOut(struct StringInfoData *str, const struct ExtensibleNode *enode) READ_NODE_FIELD(clause); \ READ_FLOAT_FIELD(norm_selec); \ READ_FLOAT_FIELD(outer_selec); \ + READ_INT_FIELD(left_ec); \ + READ_INT_FIELD(right_ec); \ + READ_BOOL_FIELD(is_eq_clause); \ token = pg_strtok(&length); /* right bracket "}" */ \ node_copy->fldname = lappend(node_copy->fldname, local_node); \ } \ diff --git a/path_utils.h b/path_utils.h index 0d5d68bd..a6c65bfc 100644 --- a/path_utils.h +++ b/path_utils.h @@ -33,6 +33,14 @@ typedef struct AQOClause /* selectivity for outer join semantics; -1 if not yet set */ Selectivity outer_selec; + /* Serial number of EquivalenceClass containing lefthand */ + int left_ec; + /* Serial number of EquivalenceClass containing righthand */ + int right_ec; + /* Quick check for equivalence class */ + bool is_eq_clause; + + EquivalenceClass *ec; } AQOClause; /* @@ -106,5 +114,6 @@ extern void RegisterAQOPlanNodeMethods(void); extern List *aqo_get_clauses(PlannerInfo *root, List *restrictlist); void aqo_path_utils_init(void); +void eclass_collector_free(void); #endif /* PATH_UTILS_H */ diff --git a/postprocessing.c b/postprocessing.c index e166f84c..b8a70faf 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -202,8 +202,7 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, if (parametrized_sel) { - cur_hash = get_clause_hash(clause->clause, nargs, - args_hash, eclass_hash); + cur_hash = get_clause_hash(clause, nargs, args_hash, eclass_hash); cur_sel = selectivity_cache_find_global_relid(cur_hash, cur_relid); } @@ -849,6 +848,7 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) end: /* Release all AQO-specific memory, allocated during learning procedure */ selectivity_cache_clear(); + eclass_collector_free(); MemoryContextSwitchTo(oldctx); MemoryContextReset(AQOLearnMemCtx); diff --git a/regress_schedule b/regress_schedule index 76a2e00e..96b2cb93 100644 --- a/regress_schedule +++ b/regress_schedule @@ -21,3 +21,5 @@ test: top_queries test: relocatable test: look_a_like test: feature_subspace +test: eclasses +test: eclasses_mchar diff --git a/sql/eclasses.sql b/sql/eclasses.sql new file mode 100644 index 00000000..a041d2cb --- /dev/null +++ b/sql/eclasses.sql @@ -0,0 +1,394 @@ +-- Testing for working with equivalence classes + +CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.show_details = 'on'; +SET aqo.show_hash = 'off'; +SET aqo.mode = 'forced'; + +-- +-- Returns string-by-string explain of a query. Made for removing some strings +-- from the explain output. +-- +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; + +-- Integer fields +CREATE TABLE aqo_test_int(a int, b int, c int); +INSERT INTO aqo_test_int SELECT (x/10)::int, (x/100)::int, (x/1000)::int +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_int; + +CREATE TABLE aqo_test_int1(a int, b int, c int); +INSERT INTO aqo_test_int1 SELECT (x/10)::int, (x/10)::int, (x/10)::int +FROM generate_series(0, 999) x; +ANALYZE aqo_test_int1; + +SELECT true AS success FROM aqo_reset(); + +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND a = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = c AND a = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = c AND a = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = a AND c = b AND a = any('{0, 1, 2}'::int[]); + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = a AND c = b AND a = all('{0, 1, 2}'::int[]); +-- Must be 5 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND a = c AND a = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND a = c AND b = c AND a = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0 AND b = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0 AND c = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0 AND b = 0 AND c = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = a AND c = b AND 0 = a AND 0 = b AND 0 = c; + +-- Must be 1 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + + +-- Tests with JOIN clauses. + +-- Case 1. +-- 4 cols in 1 eclass, all of them is 0. +-- 3 nodes with unique FSS. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.a = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + +-- Case 2. +-- 4 cols in 2 eclasses, 2 is 0 and 2 is 1. +-- The top node must be unique, but all of nodes like in a query of case 1. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON 1 = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + +-- Case 3. +-- 4 cols in 2 eclasses, 2 is 0 and 2 is equal but not a const. +-- 1 scan node with FSS like in case 2 and 2 nodes with unique FSS. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.a = 0') AS str +WHERE str NOT LIKE '%Memory%'; + +-- Case 4. +-- 4 cols in 1 eclass, all of them is 0. +-- 3 nodes with unique FSS. This is not case 1, because it is SEMI-JOIN. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +WHERE EXISTS ( + SELECT * FROM aqo_test_int1 + WHERE a = b AND t1.a = a)') AS str +WHERE str NOT LIKE '%Memory%'; + +-- Case 5. +-- 4 cols in 1 eclass, all of them is 0. +-- The top node with unique FSS. Leaf nodes like in the case 4. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +WHERE NOT EXISTS ( + SELECT * FROM aqo_test_int1 + WHERE a = b AND t1.a = a)') AS str +WHERE str NOT LIKE '%Memory%'; + +-- Must be 10 rows. +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +-- Case 6. +-- 4 cols in 1 eclass. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE b = a) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE b = a) t2 +ON t1.a = t2.b') AS str +WHERE str NOT LIKE '%Memory%'; + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE b = a) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE b = a) t2 +ON t1.a = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.a = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.b = t2.b') AS str +WHERE str NOT LIKE '%Memory%'; + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.b::text = t2.b::text') AS str +WHERE str NOT LIKE '%Memory%'; + +-- Must be 4 rows. +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + + +-- Text fields +CREATE TABLE aqo_test_text(a text, b text, c text); +INSERT INTO aqo_test_text +SELECT (x/10)::text, (x/100)::text, (x/1000)::text +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_text; + +SELECT true AS success FROM aqo_reset(); +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE b = c AND a = '0'; +-- Must be 3 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND a = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND a = c AND b = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0' AND b = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0' AND c = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0' AND b = '0' AND c = '0'; +-- Must be 1 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + + +-- JSONB fields +CREATE TABLE aqo_test_jsonb(a jsonb, b jsonb, c jsonb); +INSERT INTO aqo_test_jsonb SELECT +to_jsonb(x/10), to_jsonb(x/100), to_jsonb(x/1000) +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_jsonb; + +SELECT true AS success FROM aqo_reset(); +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND a = '0'::jsonb; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = c AND a = '0'::jsonb; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE b = c AND a = '0'::jsonb; +-- Must be 3 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND a = c AND a = '0'::jsonb; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND a = c AND b = c AND a = '0'::jsonb; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb AND b = '0'::jsonb; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb AND c = '0'::jsonb; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb AND b = '0'::jsonb AND c = '0'::jsonb; +-- Must be 1 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + + +-- BOX fields +CREATE TABLE aqo_test_box(a box, b box, c box); +INSERT INTO aqo_test_box SELECT +('((0,0), ('||(x/10)||', '||(x/10)||'))')::box, +('((0,0), ('||(x/100)||', '||(x/100)||'))')::box, +('((0,0), ('||(x/1000)||', '||(x/1000)||'))')::box +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_box; + +SELECT true AS success FROM aqo_reset(); +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND a = c AND a = '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND a = c AND b = c AND a = '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box AND b = '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box AND c = '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box AND b = '((0,0), (0,0))'::box AND c = '((0,0), (0,0))'::box; +-- Must be 6 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +-- Not equivalent queries too +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND a ~= c AND a ~= '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND a ~= c AND b ~= c AND a ~= '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box AND b ~= '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box AND c ~= '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box AND b ~= '((0,0), (0,0))'::box AND c ~= '((0,0), (0,0))'::box; +-- Must be 6 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +DROP TABLE aqo_test_int; +DROP TABLE aqo_test_text; +DROP TABLE aqo_test_jsonb; +DROP TABLE aqo_test_box; + +DROP EXTENSION aqo; diff --git a/sql/eclasses_mchar.sql b/sql/eclasses_mchar.sql new file mode 100644 index 00000000..62e10802 --- /dev/null +++ b/sql/eclasses_mchar.sql @@ -0,0 +1,73 @@ +-- Testing for working with equivalence classes for mchar type + +-- Skip test if mchar extension does not exist +SELECT count(*) = 0 AS skip_test +FROM pg_available_extensions WHERE name = 'mchar' \gset + +\if :skip_test +\quit +\endif + +CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.show_details = 'on'; +SET aqo.show_hash = 'off'; +SET aqo.mode = 'forced'; + +-- MCHAR fields +CREATE EXTENSION MCHAR; +CREATE TABLE aqo_test_mchar(a mchar, b mchar, c mchar); +INSERT INTO aqo_test_mchar +SELECT (x/10)::text::mchar, (x/100)::text::mchar, (x/1000)::text::mchar +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_mchar; + +SELECT true AS success FROM aqo_reset(); +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE b = c AND a = '0'; + +-- Must be 3 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND a = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND a = c AND b = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0' AND b = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0' AND c = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0' AND b = '0' AND c = '0'; +-- Must be 1 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +DROP TABLE aqo_test_mchar; + +DROP EXTENSION mchar; +DROP EXTENSION aqo; From e4999b75f7bad1db67575e6021a1f9a250f4b0f5 Mon Sep 17 00:00:00 2001 From: Andrey Kazarinov Date: Mon, 30 Oct 2023 16:44:11 +0300 Subject: [PATCH 196/203] assign fss without conditions in estimation of group number --- cardinality_hooks.c | 2 +- t/005_display_groupby_fss.pl | 79 ++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 t/005_display_groupby_fss.pl diff --git a/cardinality_hooks.c b/cardinality_hooks.c index 71b26726..97ed96f3 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -448,11 +448,11 @@ aqo_estimate_num_groups(PlannerInfo *root, List *groupExprs, old_ctx_m = MemoryContextSwitchTo(AQOPredictMemCtx); predicted = predict_num_groups(root, subpath, groupExprs, &fss); + grouped_rel->fss_hash = fss; if (predicted > 0.) { grouped_rel->predicted_cardinality = predicted; grouped_rel->rows = predicted; - grouped_rel->fss_hash = fss; MemoryContextSwitchTo(old_ctx_m); MemoryContextReset(AQOPredictMemCtx); return predicted; diff --git a/t/005_display_groupby_fss.pl b/t/005_display_groupby_fss.pl new file mode 100644 index 00000000..6f663f0c --- /dev/null +++ b/t/005_display_groupby_fss.pl @@ -0,0 +1,79 @@ +use strict; +use warnings; + +use Config; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; + +use Test::More tests => 2; + +my $node = PostgreSQL::Test::Cluster->new('aqotest'); +$node->init; +$node->append_conf('postgresql.conf', qq{ + shared_preload_libraries = 'aqo' + log_statement = 'ddl' + aqo.join_threshold = 0 + aqo.mode = 'learn' + aqo.show_details = 'on' + aqo.show_hash = 'on' + aqo.min_neighbors_for_predicting = 1 + enable_nestloop = 'off' + enable_mergejoin = 'off' + enable_material = 'off' + }); + +$node->start(); +$node->safe_psql('postgres', 'CREATE EXTENSION aqo'); + +# Create tables with correlated datas in columns + +$node->safe_psql('postgres', 'CREATE TABLE a (x1 int, x2 int, x3 int); +INSERT INTO a (x1, x2, x3) SELECT mod(ival,10), mod(ival,10), mod(ival,10) FROM generate_series(1,1000) As ival'); + +$node->safe_psql('postgres', 'CREATE TABLE b (y1 int, y2 int, y3 int); +INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival'); + +my $result; + +my $plan = $node->safe_psql('postgres', 'EXPLAIN (analyze true, verbose true) +SELECT a.x1, b.y1, COUNT(*) FROM a, b WHERE a.x2 = b.y2 GROUP BY a.x1, b.y1;'); +my @fss = $plan =~ /fss=(-?\d+)/g; + +$result = $node->safe_psql('postgres', 'SELECT count(*) FROM aqo_data;'); +is($result, 4); + +$result = $node->safe_psql('postgres', 'SELECT fss FROM aqo_data;'); + +my @storage = split(/\n/, $result); + +# compare fss from plan and fss from storage +my $test2 = 1; +if (scalar @fss == scalar @storage) { + foreach my $numb1 (@fss) { + my $found = 0; + + # check fss not zero + if ($numb1 == 0) { + $test2 = 0; + last; + } + + foreach my $numb2 (@storage) { + if ($numb2 == $numb1) { + $found = 1; + last; + } + } + + if (!$found) { + $test2 = 0; + last; + } + } +} else { + $test2 = 0; +} + +is($test2, 1); + +$node->stop(); \ No newline at end of file From 33a64e0ae52f7e9b72c7fdac012b6f777275f797 Mon Sep 17 00:00:00 2001 From: Alexandra Pervushina Date: Mon, 25 Sep 2023 02:46:24 +0300 Subject: [PATCH 197/203] Print aqo details regardless of IsQueryDisabled --- aqo.c | 2 +- postprocessing.c | 16 +++++++--------- preprocessing.c | 5 +++++ 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/aqo.c b/aqo.c index e84f89b0..f3707702 100644 --- a/aqo.c +++ b/aqo.c @@ -96,7 +96,7 @@ MemoryContext AQOLearnMemCtx = NULL; MemoryContext AQOStorageMemCtx = NULL; /* Additional plan info */ -int njoins; +int njoins = -1; /***************************************************************************** diff --git a/postprocessing.c b/postprocessing.c index b8a70faf..0f2f7f57 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -85,7 +85,6 @@ static void StoreToQueryEnv(QueryDesc *queryDesc); static void StorePlanInternals(QueryDesc *queryDesc); static bool ExtractFromQueryEnv(QueryDesc *queryDesc); - /* * This is the critical section: only one runner is allowed to be inside this * function for one feature subspace. @@ -766,6 +765,7 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) cardinality_sum_errors = 0.; cardinality_num_objects = 0; + njoins = -1; if (IsQueryDisabled() || !ExtractFromQueryEnv(queryDesc)) /* AQO keep all query-related preferences at the query context. @@ -996,7 +996,8 @@ print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, (*aqo_ExplainOnePlan_next)(plannedstmt, into, es, queryString, params, planduration, queryEnv); - if (IsQueryDisabled() || !aqo_show_details) + if (!(aqo_mode != AQO_MODE_DISABLED || force_collect_stat) || + !aqo_show_details) return; /* Report to user about aqo state only in verbose mode */ @@ -1031,13 +1032,10 @@ print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, * Query class provides an user the conveniently use of the AQO * auxiliary functions. */ - if (aqo_mode != AQO_MODE_DISABLED || force_collect_stat) - { - if (aqo_show_hash) - ExplainPropertyInteger("Query hash", NULL, - query_context.query_hash, es); - ExplainPropertyInteger("JOINS", NULL, njoins, es); - } + if (aqo_show_hash) + ExplainPropertyInteger("Query hash", NULL, + (int64) query_context.query_hash, es); + ExplainPropertyInteger("JOINS", NULL, njoins, es); } static void diff --git a/preprocessing.c b/preprocessing.c index 95371631..62bd94d5 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -130,6 +130,7 @@ aqo_planner(Query *parse, const char *query_string, int cursorOptions, */ MemoryContextSwitchTo(oldctx); disable_aqo_for_query(); + query_context.query_hash = 0; return (*aqo_planner_next)(parse, query_string, cursorOptions, boundParams); } @@ -233,7 +234,11 @@ aqo_planner(Query *parse, const char *query_string, int cursorOptions, */ if (!query_context.learn_aqo && !query_context.use_aqo && !query_context.auto_tuning && !force_collect_stat) + { add_deactivated_query(query_context.query_hash); + disable_aqo_for_query(); + goto ignore_query_settings; + } /* * That we can do if query exists in database. From 65877801d7a8ad1e83afdd067789f8d0834c0265 Mon Sep 17 00:00:00 2001 From: Daniil Anisimov Date: Mon, 19 Feb 2024 16:50:44 +0700 Subject: [PATCH 198/203] Fix collecting eclasses routine. --- aqo.c | 1 + hash.c | 22 ++++++++++++++++------ path_utils.c | 19 ++++++------------- path_utils.h | 3 ++- postprocessing.c | 1 - 5 files changed, 25 insertions(+), 21 deletions(-) diff --git a/aqo.c b/aqo.c index f3707702..3e8796cd 100644 --- a/aqo.c +++ b/aqo.c @@ -118,6 +118,7 @@ aqo_free_callback(ResourceReleasePhase phase, { MemoryContextReset(AQOCacheMemCtx); cur_classes = NIL; + aqo_eclass_collector = NIL; } } diff --git a/hash.c b/hash.c index 598f0038..2b0d3675 100644 --- a/hash.c +++ b/hash.c @@ -641,18 +641,28 @@ get_clauselist_args(List *clauselist, int *nargs, int **args_hash) *args_hash = repalloc(*args_hash, (*nargs) * sizeof(**args_hash)); p_sorted = repalloc(p_sorted, (*nargs) * sizeof(*p_sorted)); - /* Compress the values of eclasses */ + /* + * Compress the values of eclasses. + * It is only sorted in order of args_hash. + * Get the indexes in ascending order of the elements. + */ + idx = argsort(p_sorted, *nargs, sizeof(*p_sorted), int_cmp); + + /* + * Remove the holes from given array. + * Later we can use it as indexes of args_hash. + */ if (*nargs > 0) { - int prev = p_sorted[0]; - p_sorted[0] = 0; + int prev = p_sorted[idx[0]]; + p_sorted[idx[0]] = 0; for (i = 1; i < *nargs; i++) { - int cur = p_sorted[i]; + int cur = p_sorted[idx[i]]; if (cur == prev) - p_sorted[i] = p_sorted[i-1]; + p_sorted[idx[i]] = p_sorted[idx[i-1]]; else - p_sorted[i] = p_sorted[i-1] + 1; + p_sorted[idx[i]] = p_sorted[idx[i-1]] + 1; prev = cur; } } diff --git a/path_utils.c b/path_utils.c index 97aa3443..15cf20ad 100644 --- a/path_utils.c +++ b/path_utils.c @@ -52,13 +52,13 @@ static AQOPlanNode DefaultAQOPlanNode = .prediction = -1. }; - /* * Auxiliary list for relabel equivalence classes * from pointers to the serial numbers - indexes of this list. - * Maybe it's need to use some smart data structure such a HTAB? + * XXX: Maybe it's need to use some smart data structure such a HTAB? + * It must be allocated in AQOCacheMemCtx. */ -List *eclass_collector = NIL; +List *aqo_eclass_collector = NIL; /* * Hook on creation of a plan node. We need to store AQO-specific data to @@ -345,13 +345,6 @@ aqo_get_raw_clauses(PlannerInfo *root, List *restrictlist) return clauses; } -void -eclass_collector_free(void) -{ - list_free(eclass_collector); - eclass_collector = NIL; -} - static int get_eclass_index(EquivalenceClass *ec) { @@ -366,7 +359,7 @@ get_eclass_index(EquivalenceClass *ec) while(ec->ec_merged) ec = ec->ec_merged; - foreach (lc, eclass_collector) + foreach (lc, aqo_eclass_collector) { if (lfirst(lc) == ec) break; @@ -374,8 +367,8 @@ get_eclass_index(EquivalenceClass *ec) } old_ctx = MemoryContextSwitchTo(AQOCacheMemCtx); - if (i == list_length(eclass_collector)) - eclass_collector = lappend(eclass_collector, ec); + if (i == list_length(aqo_eclass_collector)) + aqo_eclass_collector = lappend(aqo_eclass_collector, ec); MemoryContextSwitchTo(old_ctx); return i; diff --git a/path_utils.h b/path_utils.h index a6c65bfc..dec9eb1e 100644 --- a/path_utils.h +++ b/path_utils.h @@ -8,6 +8,8 @@ #define AQO_PLAN_NODE "AQOPlanNode" #define AQO_CONST_NODE "AQOConstNode" +extern List *aqo_eclass_collector; + /* * Find and sort out relations that used in the query: * Use oids of relations to store dependency of ML row on a set of tables. @@ -114,6 +116,5 @@ extern void RegisterAQOPlanNodeMethods(void); extern List *aqo_get_clauses(PlannerInfo *root, List *restrictlist); void aqo_path_utils_init(void); -void eclass_collector_free(void); #endif /* PATH_UTILS_H */ diff --git a/postprocessing.c b/postprocessing.c index 0f2f7f57..b902e064 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -848,7 +848,6 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) end: /* Release all AQO-specific memory, allocated during learning procedure */ selectivity_cache_clear(); - eclass_collector_free(); MemoryContextSwitchTo(oldctx); MemoryContextReset(AQOLearnMemCtx); From 1e2e081caf4c1a3a1b7c75ac5fb2b5ecd63a1350 Mon Sep 17 00:00:00 2001 From: Daniil Anisimov Date: Thu, 22 Feb 2024 20:24:30 +0700 Subject: [PATCH 199/203] Refactor the comparator functions. --- utils.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/utils.c b/utils.c index c44b3a64..13908783 100644 --- a/utils.c +++ b/utils.c @@ -32,11 +32,14 @@ static int argsort_cmp(const void *a, const void *b); * Function for qsorting an integer arrays */ int -int_cmp(const void *a, const void *b) +int_cmp(const void *arg1, const void *arg2) { - if (*(int *) a < *(int *) b) + int v1 = *((const int *) arg1); + int v2 = *((const int *) arg2); + + if (v1 < v2) return -1; - else if (*(int *) a > *(int *) b) + else if (v1 > v2) return 1; else return 0; @@ -46,11 +49,14 @@ int_cmp(const void *a, const void *b) * Function for qsorting an double arrays */ int -double_cmp(const void *a, const void *b) +double_cmp(const void *arg1, const void *arg2) { - if (*(double *) a < *(double *) b) + double v1 = *((const double *) arg1); + double v2 = *((const double *) arg2); + + if (v1 < v2) return -1; - else if (*(double *) a > *(double *) b) + else if (v1 > v2) return 1; else return 0; @@ -60,12 +66,14 @@ double_cmp(const void *a, const void *b) * Compares elements for two given indexes */ int -argsort_cmp(const void *a, const void *b) +argsort_cmp(const void *arg1, const void *arg2) { - return (*argsort_value_cmp) ((char *) argsort_a + - *((int *) a) * argsort_es, - (char *) argsort_a + - *((int *) b) * argsort_es); + int idx1 = *((const int *) arg1); + int idx2 = *((const int *) arg2); + char *arr = (char *) argsort_a; + + return (*argsort_value_cmp) (&arr[idx1 * argsort_es], + &arr[idx2 * argsort_es]); } /* From 87ed6b9f8cf06bdaa14899c4ae64279763987fca Mon Sep 17 00:00:00 2001 From: Timur Magomedov Date: Thu, 4 Apr 2024 20:01:24 +0300 Subject: [PATCH 200/203] Reset aqo mode to frozen in case of shmem overflow --- preprocessing.c | 7 ++++--- t/006_overflow.pl | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 3 deletions(-) create mode 100644 t/006_overflow.pl diff --git a/preprocessing.c b/preprocessing.c index 62bd94d5..5d7053ae 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -316,10 +316,11 @@ aqo_planner(Query *parse, const char *query_string, int cursorOptions, disable_aqo_for_query(); /* - * Switch AQO to controlled mode. In this mode we wouldn't add new - * query classes, just use and learn on existed set. + * Switch AQO to frozen mode. In this mode we wouldn't collect + * any new data, just read collected statistics for already + * known query classes. */ - aqo_mode = AQO_MODE_CONTROLLED; + aqo_mode = AQO_MODE_FROZEN; } } diff --git a/t/006_overflow.pl b/t/006_overflow.pl new file mode 100644 index 00000000..eb2d71b9 --- /dev/null +++ b/t/006_overflow.pl @@ -0,0 +1,47 @@ +use strict; +use warnings; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More tests => 4; + +my $node = PostgreSQL::Test::Cluster->new('aqotest'); +$node->init; +$node->append_conf('postgresql.conf', qq{ + shared_preload_libraries = 'aqo' + aqo.join_threshold = 0 + aqo.mode = 'frozen' + aqo.show_details = 'on' + aqo.dsm_size_max = 10 + aqo.force_collect_stat = 'on' + aqo.fs_max_items = 3 + aqo.fss_max_items = 10 +}); + +# General purpose variables. +my $res; +my $mode; + +# Disable default settings, forced by PGOPTIONS in AQO Makefile +$ENV{PGOPTIONS}=""; + +$node->start(); +$node->safe_psql('postgres', 'CREATE EXTENSION aqo'); + +$mode = $node->safe_psql('postgres',"show aqo.mode"); +like($mode, qr/frozen/); + +$node->safe_psql('postgres', 'CREATE TABLE a (x int); +INSERT INTO a (x) SELECT mod(ival,10) FROM generate_series(1,1000) As ival'); + +$res = $node->safe_psql('postgres', 'EXPLAIN ANALYZE SELECT x FROM a WHERE x < 5;'); +like($res, qr/AQO mode: FROZEN/); + +$res = $node->safe_psql('postgres', 'EXPLAIN ANALYZE SELECT count(x) FROM a WHERE x > 5;'); +like($res, qr/AQO mode: FROZEN/); + +$mode = $node->safe_psql('postgres',"show aqo.mode"); +like($mode, qr/frozen/); + +$node->stop(); +done_testing(); From 631b34130c8c0ce5123249fe964c0b4b250eebe6 Mon Sep 17 00:00:00 2001 From: Artem Fadeev Date: Fri, 23 Aug 2024 14:46:03 +0300 Subject: [PATCH 201/203] Fix svace warnings Fixed arithmetics in check_dsa_file_size to avoid server startup failure when aqo.dsm_size_max in bytes overflows signed integer. Updated corresponding tap-test. Two unreachable paths were removed. (cherry-picked from stable14) --- cardinality_hooks.c | 3 --- storage.c | 4 +--- t/004_dsm_size_max.pl | 8 +++++++- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/cardinality_hooks.c b/cardinality_hooks.c index 97ed96f3..ccb020b5 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -442,9 +442,6 @@ aqo_estimate_num_groups(PlannerInfo *root, List *groupExprs, /* It is unclear that to do in situation of such kind. Just report it */ elog(WARNING, "AQO is in the middle of the estimate_num_groups_hook chain"); - if (groupExprs == NIL) - return 1.0; - old_ctx_m = MemoryContextSwitchTo(AQOPredictMemCtx); predicted = predict_num_groups(root, subpath, groupExprs, &fss); diff --git a/storage.c b/storage.c index a11f16f4..10b7cfc6 100644 --- a/storage.c +++ b/storage.c @@ -968,8 +968,6 @@ aqo_get_file_size(const char *filename) ereport(LOG, (errcode_for_file_access(), errmsg("could not read file \"%s\": %m", filename))); - if (file) - FreeFile(file); unlink(filename); return -1; } @@ -981,7 +979,7 @@ check_dsa_file_size(void) long data_size = aqo_get_file_size(PGAQO_DATA_FILE); if (qtext_size == -1 || data_size == -1 || - qtext_size + data_size >= dsm_size_max * 1024 * 1024) + ((unsigned long) qtext_size + (unsigned long) data_size) >> 20 >= dsm_size_max) { elog(ERROR, "aqo.dsm_size_max is too small"); } diff --git a/t/004_dsm_size_max.pl b/t/004_dsm_size_max.pl index 26898b79..8b7f8e62 100644 --- a/t/004_dsm_size_max.pl +++ b/t/004_dsm_size_max.pl @@ -5,7 +5,7 @@ use PostgreSQL::Test::Cluster; use PostgreSQL::Test::Utils; -use Test::More tests => 5; +use Test::More tests => 6; my $node = PostgreSQL::Test::Cluster->new('aqotest'); $node->init; @@ -58,6 +58,12 @@ $node->psql('postgres', 'select * from aqo_reset();'); $node->stop(); +# 3000mb (more than 2*31 bytes) overflows 4-byte signed int +$node->append_conf('postgresql.conf', 'aqo.dsm_size_max = 3000'); +is($node->start(fail_ok => 1), 1, "Large aqo.dsm_size_max doesn't cause integer overflow"); +$node->stop(); + + my $regex; $long_string = 'a' x 100000; $regex = qr/.*WARNING: \[AQO\] Not enough DSA\. AQO was disabled for this query/; From a2b2ed6d8c69c66773cde5820a7ae153de50d9be Mon Sep 17 00:00:00 2001 From: Artem Fadeev Date: Mon, 30 Sep 2024 11:17:29 +0300 Subject: [PATCH 202/203] Fix build_knn_matrix (now called update_knn_matrix) Previous version of build_knn_matrix had an unreachable branch (`if (features!=NULL)`), which lead to use_wide_search having no effect. There was also a memory bug of copying a memory area into itself. predict_for_relation was fixed with interoperation of use_wide_search and predict_with_few_neighbors features in mind. Additions to the look_a_like regression test reflect those changes. This commit also removes unused arguments from several functions and fixes a couple of typos. --- cardinality_estimation.c | 23 ++++--- cardinality_hooks.c | 2 +- expected/gucs.out | 1 + expected/look_a_like.out | 125 ++++++++++++++++++++++++++++++++++++++- expected/unsupported.out | 1 + machine_learning.c | 1 + postprocessing.c | 2 +- sql/gucs.sql | 1 + sql/look_a_like.sql | 66 ++++++++++++++++++++- storage.c | 99 +++++++++++++------------------ storage.h | 4 +- 11 files changed, 248 insertions(+), 77 deletions(-) diff --git a/cardinality_estimation.c b/cardinality_estimation.c index 8ab98f3c..f0cca328 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -81,8 +81,17 @@ predict_for_relation(List *clauses, List *selectivities, List *relsigns, &ncols, &features); data = OkNNr_allocate(ncols); - if (load_fss_ext(query_context.fspace_hash, *fss, data, NULL)) + if (load_aqo_data(query_context.fspace_hash, *fss, data, false) && + data->rows >= (aqo_predict_with_few_neighbors ? 1 : aqo_k)) result = OkNNr_predict(data, features); + /* Try to search in surrounding feature spaces for the same node */ + else if (use_wide_search && load_aqo_data(query_context.fspace_hash, *fss, data, true)) + { + elog(DEBUG5, "[AQO] Make prediction for fss "INT64_FORMAT" by a neighbour " + "includes %d feature(s) and %d fact(s).", + (int64) *fss, data->cols, data->rows); + result = OkNNr_predict(data, features); + } else { /* @@ -91,17 +100,7 @@ predict_for_relation(List *clauses, List *selectivities, List *relsigns, * small part of paths was used for AQO learning and stored into * the AQO knowledge base. */ - - /* Try to search in surrounding feature spaces for the same node */ - if (!load_aqo_data(query_context.fspace_hash, *fss, data, NULL, use_wide_search, features)) - result = -1; - else - { - elog(DEBUG5, "[AQO] Make prediction for fss %d by a neighbour " - "includes %d feature(s) and %d fact(s).", - *fss, data->cols, data->rows); - result = OkNNr_predict(data, features); - } + result = -1; } #ifdef AQO_DEBUG_PRINT diff --git a/cardinality_hooks.c b/cardinality_hooks.c index ccb020b5..6546499c 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -413,7 +413,7 @@ predict_num_groups(PlannerInfo *root, Path *subpath, List *group_exprs, *fss = get_grouped_exprs_hash(child_fss, group_exprs); memset(&data, 0, sizeof(OkNNrdata)); - if (!load_fss_ext(query_context.fspace_hash, *fss, &data, NULL)) + if (!load_aqo_data(query_context.fspace_hash, *fss, &data, false)) return -1; Assert(data.rows == 1); diff --git a/expected/gucs.out b/expected/gucs.out index e6cd1692..a31219df 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -142,4 +142,5 @@ SELECT count(*) FROM aqo_query_stat; 0 (1 row) +DROP TABLE t; DROP EXTENSION aqo; diff --git a/expected/look_a_like.out b/expected/look_a_like.out index 594f017e..854bb852 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -9,8 +9,9 @@ SELECT true AS success FROM aqo_reset(); SET aqo.wide_search = 'on'; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; -set aqo.show_hash = 'off'; +SET aqo.show_hash = 'off'; SET aqo.min_neighbors_for_predicting = 1; +SET aqo.predict_with_few_neighbors = 'off'; SET enable_nestloop = 'off'; SET enable_mergejoin = 'off'; SET enable_material = 'off'; @@ -553,9 +554,131 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L JOINS: 2 (24 rows) +-- Next few test cases focus on fss corresponding to (x1 > ? AND x2 < ? AND x3 < ?). We will denote +-- it by fss0. At this moment there is exactly one fs with (fs, fss0, dbid) record in aqo_data. We'll +-- refer to it as fs0. +-- Let's create another fs for fss0. We'll call this fs fs1. Since aqo.wide_search='on', +-- aqo.min_neighbors_for_predicting=1, and there is (fs0, fss0, dbid) data record, AQO must be used here. +SELECT str AS result +FROM expln(' +SELECT * FROM A WHERE x1 > -100 AND x2 < 10 AND x3 < 10;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +---------------------------------------------------------------------- + Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=20, error=-400% + Output: x1, x2, x3 + Filter: ((a.x1 > '-100'::integer) AND (a.x2 < 10) AND (a.x3 < 10)) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(7 rows) + +-- Now there are 2 data records for fss0: one for (fs0, fss0, dbid) and one for (fs1, fss0, dbid) +-- We repeat previous query, but set aqo.min_neighbors_for_predicting to 2. Since aqo.predict_with_few_neighbors +-- is 'off', AQO is obliged to use both data records for fss0. +SET aqo.min_neighbors_for_predicting = 2; +SELECT str AS result +FROM expln(' +SELECT * FROM A WHERE x1 > 1 AND x2 < 10 AND x3 < 10;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +-------------------------------------------------------- + Seq Scan on public.a (actual rows=80 loops=1) + AQO: rows=77, error=-4% + Output: x1, x2, x3 + Filter: ((a.x1 > 1) AND (a.x2 < 10) AND (a.x3 < 10)) + Rows Removed by Filter: 20 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(8 rows) + +-- Now there are 3 data records for fss0: 1 for (fs0, fss0, dbid) and 2 for (fs1, fss0, dbid) +-- Lastly, we run invoke query with previously unseen fs with fss0 feature subspace. AQO must use +-- three data records from two neighbors for this one. +SET aqo.min_neighbors_for_predicting = 3; +SELECT str AS result +FROM expln(' +SELECT x2 FROM A WHERE x1 > 3 AND x2 < 10 AND x3 < 10 GROUP BY(x2);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +-------------------------------------------------------------- + HashAggregate (actual rows=6 loops=1) + AQO not used + Output: x2 + Group Key: a.x2 + -> Seq Scan on public.a (actual rows=60 loops=1) + AQO: rows=71, error=15% + Output: x1, x2, x3 + Filter: ((a.x1 > 3) AND (a.x2 < 10) AND (a.x3 < 10)) + Rows Removed by Filter: 40 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(12 rows) + +----- +DROP TABLE IF EXISTS t; +NOTICE: table "t" does not exist, skipping +CREATE TABLE t AS SELECT x, x AS y, x AS z FROM generate_series(1, 10000) x; +ANALYZE t; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Test that when there are less records than aqo.min_neighbors_for_predicting for given (fs, fss, dbid) +-- and aqo.predict_with_few_neighbors is off, those records have higher precedence for cardinality estimation +-- than neighbors' records. +SELECT str AS result +FROM expln(' +select * from t where x <= 10000 and y <= 10000 and z <= 10000;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------ + Seq Scan on public.t (actual rows=10000 loops=1) + AQO not used + Output: x, y, z + Filter: ((t.x <= 10000) AND (t.y <= 10000) AND (t.z <= 10000)) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(7 rows) + +DO +$$ +BEGIN + for counter in 1..20 loop + EXECUTE format('explain analyze select *, 1 from t where x <= 1 and y <= 1 and z <= %L;', 10 * counter); + EXECUTE format('explain analyze select *, 1 from t where x <= 1 and y <= %L and z <= 1;', 10 * counter); + EXECUTE format('explain analyze select *, 1 from t where x <= %L and y <= 1 and z <= 1;', 10 * counter); + end loop; +END; +$$ LANGUAGE PLPGSQL; +-- AQO should predict ~1000 rows to indicate that the record from previous invocation was used. +SELECT str AS result +FROM expln(' +select * from t where x <= 10000 and y <= 10000 and z <= 10000;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------ + Seq Scan on public.t (actual rows=10000 loops=1) + AQO: rows=9987, error=-0% + Output: x, y, z + Filter: ((t.x <= 10000) AND (t.y <= 10000) AND (t.z <= 10000)) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(7 rows) + RESET aqo.wide_search; +RESET aqo.predict_with_few_neighbors; +RESET aqo.min_neighbors_for_predicting; DROP EXTENSION aqo CASCADE; DROP TABLE a; DROP TABLE b; DROP TABLE c; +DROP TABLE t; DROP FUNCTION expln; diff --git a/expected/unsupported.out b/expected/unsupported.out index 9db07618..a088a47c 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -16,6 +16,7 @@ $$ LANGUAGE PLPGSQL; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; DROP TABLE IF EXISTS t; +NOTICE: table "t" does not exist, skipping CREATE TABLE t AS SELECT (gs.* / 50) AS x FROM generate_series(1,1000) AS gs; ANALYZE t; CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y diff --git a/machine_learning.c b/machine_learning.c index bfdf0aaa..d7520a94 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -150,6 +150,7 @@ OkNNr_predict(OkNNrdata *data, double *features) if (!aqo_predict_with_few_neighbors && data->rows < aqo_k) return -1.; + Assert(data->rows > 0); for (i = 0; i < data->rows; ++i) distances[i] = fs_distance(data->matrix[i], features, data->cols); diff --git a/postprocessing.c b/postprocessing.c index b902e064..cae61e98 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -95,7 +95,7 @@ atomic_fss_learn_step(uint64 fs, int fss, OkNNrdata *data, double *features, double target, double rfactor, List *reloids) { - if (!load_fss_ext(fs, fss, data, NULL)) + if (!load_aqo_data(fs, fss, data, false)) data->rows = 0; data->rows = OkNNr_learn(data, features, target, rfactor); diff --git a/sql/gucs.sql b/sql/gucs.sql index d23d7214..f949bbec 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -50,4 +50,5 @@ SELECT count(*) FROM aqo_query_stat; SELECT true AS success FROM aqo_reset(); SELECT count(*) FROM aqo_query_stat; +DROP TABLE t; DROP EXTENSION aqo; diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index f50e4e55..5eb47a65 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -6,8 +6,9 @@ SET aqo.wide_search = 'on'; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; -set aqo.show_hash = 'off'; +SET aqo.show_hash = 'off'; SET aqo.min_neighbors_for_predicting = 1; +SET aqo.predict_with_few_neighbors = 'off'; SET enable_nestloop = 'off'; SET enable_mergejoin = 'off'; SET enable_material = 'off'; @@ -142,10 +143,73 @@ FROM expln(' SELECT * FROM (A LEFT JOIN B ON A.x1 = B.y1) sc left join C on sc.x1=C.z1;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +-- Next few test cases focus on fss corresponding to (x1 > ? AND x2 < ? AND x3 < ?). We will denote +-- it by fss0. At this moment there is exactly one fs with (fs, fss0, dbid) record in aqo_data. We'll +-- refer to it as fs0. + +-- Let's create another fs for fss0. We'll call this fs fs1. Since aqo.wide_search='on', +-- aqo.min_neighbors_for_predicting=1, and there is (fs0, fss0, dbid) data record, AQO must be used here. +SELECT str AS result +FROM expln(' +SELECT * FROM A WHERE x1 > -100 AND x2 < 10 AND x3 < 10;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; +-- Now there are 2 data records for fss0: one for (fs0, fss0, dbid) and one for (fs1, fss0, dbid) + +-- We repeat previous query, but set aqo.min_neighbors_for_predicting to 2. Since aqo.predict_with_few_neighbors +-- is 'off', AQO is obliged to use both data records for fss0. +SET aqo.min_neighbors_for_predicting = 2; +SELECT str AS result +FROM expln(' +SELECT * FROM A WHERE x1 > 1 AND x2 < 10 AND x3 < 10;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; +-- Now there are 3 data records for fss0: 1 for (fs0, fss0, dbid) and 2 for (fs1, fss0, dbid) + +-- Lastly, we run invoke query with previously unseen fs with fss0 feature subspace. AQO must use +-- three data records from two neighbors for this one. +SET aqo.min_neighbors_for_predicting = 3; +SELECT str AS result +FROM expln(' +SELECT x2 FROM A WHERE x1 > 3 AND x2 < 10 AND x3 < 10 GROUP BY(x2);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +----- +DROP TABLE IF EXISTS t; +CREATE TABLE t AS SELECT x, x AS y, x AS z FROM generate_series(1, 10000) x; +ANALYZE t; +SELECT true AS success FROM aqo_reset(); + +-- Test that when there are less records than aqo.min_neighbors_for_predicting for given (fs, fss, dbid) +-- and aqo.predict_with_few_neighbors is off, those records have higher precedence for cardinality estimation +-- than neighbors' records. +SELECT str AS result +FROM expln(' +select * from t where x <= 10000 and y <= 10000 and z <= 10000;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; +DO +$$ +BEGIN + for counter in 1..20 loop + EXECUTE format('explain analyze select *, 1 from t where x <= 1 and y <= 1 and z <= %L;', 10 * counter); + EXECUTE format('explain analyze select *, 1 from t where x <= 1 and y <= %L and z <= 1;', 10 * counter); + EXECUTE format('explain analyze select *, 1 from t where x <= %L and y <= 1 and z <= 1;', 10 * counter); + end loop; +END; +$$ LANGUAGE PLPGSQL; +-- AQO should predict ~1000 rows to indicate that the record from previous invocation was used. +SELECT str AS result +FROM expln(' +select * from t where x <= 10000 and y <= 10000 and z <= 10000;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + + RESET aqo.wide_search; +RESET aqo.predict_with_few_neighbors; +RESET aqo.min_neighbors_for_predicting; DROP EXTENSION aqo CASCADE; DROP TABLE a; DROP TABLE b; DROP TABLE c; +DROP TABLE t; DROP FUNCTION expln; diff --git a/storage.c b/storage.c index 10b7cfc6..79b1b11d 100644 --- a/storage.c +++ b/storage.c @@ -120,12 +120,6 @@ PG_FUNCTION_INFO_V1(aqo_query_stat_update); PG_FUNCTION_INFO_V1(aqo_data_update); -bool -load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids) -{ - return load_aqo_data(fs, fss, data, reloids, false, NULL); -} - bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids) { @@ -1577,66 +1571,53 @@ fs_distance(double *a, double *b, int len) } static bool -nearest_neighbor(double **matrix, int old_rows, double *neibour, int cols) +nearest_neighbor(double **matrix, int old_rows, double *neighbor, int cols) { int i; for (i=0; irows is kept <= aqo_K. + */ static void -build_knn_matrix(OkNNrdata *data, const OkNNrdata *temp_data, double *features) +update_knn_matrix(OkNNrdata *data, const OkNNrdata *temp_data) { + int k = (data->rows < 0) ? 0 : data->rows; + int i; + Assert(data->cols == temp_data->cols); Assert(data->matrix); - if (features != NULL) + if (data->cols > 0) { - int old_rows = data->rows; - int k = (old_rows < 0) ? 0 : old_rows; - - if (data->cols > 0) + for (i = 0; i < temp_data->rows && k < aqo_K; i++) { - int i; - - Assert(data->cols == temp_data->cols); - - for (i = 0; i < temp_data->rows; i++) + if (!nearest_neighbor(data->matrix, k, temp_data->matrix[i], data->cols)) { - if (k < aqo_K && !nearest_neighbor(data->matrix, old_rows, - temp_data->matrix[i], - data->cols)) - { - memcpy(data->matrix[k], temp_data->matrix[i], data->cols * sizeof(double)); - data->rfactors[k] = temp_data->rfactors[i]; - data->targets[k] = temp_data->targets[i]; - k++; - } + memcpy(data->matrix[k], temp_data->matrix[i], data->cols * sizeof(double)); + data->rfactors[k] = temp_data->rfactors[i]; + data->targets[k] = temp_data->targets[i]; + k++; } - data->rows = k; } } - else + /* Data has no columns. Only one record can be added */ + else if (k == 0 && temp_data->rows > 0) { - if (data->rows > 0) - /* trivial strategy - use first suitable record and ignore others */ - return; - memcpy(data, temp_data, sizeof(OkNNrdata)); - if (data->cols > 0) - { - int i; - - for (i = 0; i < data->rows; i++) - { - Assert(data->matrix[i]); - memcpy(data->matrix[i], temp_data->matrix[i], data->cols * sizeof(double)); - } - } + data->rfactors[0] = temp_data->rfactors[0]; + data->targets[0] = temp_data->targets[0]; + k = 1; } + data->rows = k; + + Assert(data->rows >= 0 && data->rows <= aqo_K); } static OkNNrdata * @@ -1706,13 +1687,11 @@ _fill_knn_data(const DataEntry *entry, List **reloids) * * If wideSearch is true - make seqscan on the hash table to see for relevant * data across neighbours. - * If reloids is NULL - don't fill this list. * * Return false if the operation was unsuccessful. */ bool -load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, - bool wideSearch, double *features) +load_aqo_data(uint64 fs, int fss, OkNNrdata *data, bool wideSearch) { DataEntry *entry; bool found; @@ -1720,6 +1699,7 @@ load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, OkNNrdata *temp_data; Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + Assert(wideSearch || data->rows <= 0); dsa_init(); @@ -1739,16 +1719,16 @@ load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, if (entry->cols != data->cols) { /* Collision happened? */ - elog(LOG, "[AQO] Does a collision happened? Check it if possible " + elog(LOG, "[AQO] Did a collision happen? Check it if possible " "(fs: "UINT64_FORMAT", fss: %d).", fs, fss); found = false; /* Sign of unsuccessful operation */ goto end; } - temp_data = _fill_knn_data(entry, reloids); + temp_data = _fill_knn_data(entry, NULL); Assert(temp_data->rows > 0); - build_knn_matrix(data, temp_data, features); + update_knn_matrix(data, temp_data); Assert(data->rows > 0); } else @@ -1770,28 +1750,31 @@ load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, temp_data = _fill_knn_data(entry, &tmp_oids); - if (data->rows > 0 && list_length(tmp_oids) != noids) + if (noids >= 0 && list_length(tmp_oids) != noids) { /* Dubious case. So log it and skip these data */ elog(LOG, "[AQO] different number depended oids for the same fss %d: " "%d and %d correspondingly.", fss, list_length(tmp_oids), noids); - Assert(noids >= 0); list_free(tmp_oids); continue; } noids = list_length(tmp_oids); + list_free(tmp_oids); - if (reloids != NULL && *reloids == NIL) - *reloids = tmp_oids; - else - list_free(tmp_oids); - - build_knn_matrix(data, temp_data, NULL); + update_knn_matrix(data, temp_data); found = true; + + /* Abort if data is full */ + if (data->rows == aqo_K || (data->cols == 0 && data->rows == 1)) + { + hash_seq_term(&hash_seq); + break; + } } + } Assert(!found || (data->rows > 0 && data->rows <= aqo_K)); diff --git a/storage.h b/storage.h index 9491e33e..692014c3 100644 --- a/storage.h +++ b/storage.h @@ -144,8 +144,7 @@ extern void aqo_qtexts_load(void); extern bool aqo_data_store(uint64 fs, int fss, AqoDataArgs *data, List *reloids); -extern bool load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, - bool wideSearch, double *features); +extern bool load_aqo_data(uint64 fs, int fss, OkNNrdata *data, bool wideSearch); extern void aqo_data_flush(void); extern void aqo_data_load(void); @@ -166,7 +165,6 @@ extern bool query_is_deactivated(uint64 query_hash); extern void add_deactivated_query(uint64 query_hash); /* Storage interaction */ -extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids); extern bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids); extern bool update_query_timeout(uint64 queryid, int64 smart_timeout); From eff2fec91b970a3ec23db3138a552d6ba4d75aff Mon Sep 17 00:00:00 2001 From: Artem Fadeev Date: Mon, 30 Sep 2024 14:53:15 +0300 Subject: [PATCH 203/203] Fix smart statement timeout update logic and aqo_stat_store Note: due to a mix of absolute and relative time in set_timeout_if_need function, smart statement timeout feature doesn't currently work since its timeouts are set in the past. This commit changes checked precondition for smart statement timeout change to fix array indexing bug, but the feature itself remains broken. This commit also fixes arithmetic errors in aqo_stat_store in the case of fully filled arrays. --- expected/aqo_query_stat.out | 155 ++++++++++++++++++++++++++++++++++++ postprocessing.c | 21 +++-- regress_schedule | 1 + sql/aqo_query_stat.sql | 74 +++++++++++++++++ storage.c | 18 +++-- 5 files changed, 254 insertions(+), 15 deletions(-) create mode 100644 expected/aqo_query_stat.out create mode 100644 sql/aqo_query_stat.sql diff --git a/expected/aqo_query_stat.out b/expected/aqo_query_stat.out new file mode 100644 index 00000000..2478b4e5 --- /dev/null +++ b/expected/aqo_query_stat.out @@ -0,0 +1,155 @@ +-- Testing aqo_query_stat update logic +-- Note: this test assumes STAT_SAMPLE_SIZE to be 20. +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +DROP TABLE IF EXISTS A; +NOTICE: table "a" does not exist, skipping +CREATE TABLE A AS SELECT x FROM generate_series(1, 20) as x; +ANALYZE A; +DROP TABLE IF EXISTS B; +NOTICE: table "b" does not exist, skipping +CREATE TABLE B AS SELECT y FROM generate_series(1, 10) as y; +ANALYZE B; +CREATE OR REPLACE FUNCTION round_array (double precision[]) +RETURNS double precision[] +LANGUAGE SQL +AS $$ + SELECT array_agg(round(elem::numeric, 3)) + FROM unnest($1) as arr(elem); +$$ +SET aqo.mode = 'learn'; +SET aqo.force_collect_stat = 'on'; +SET aqo.min_neighbors_for_predicting = 1; +-- First test: adding real records +SET aqo.mode = 'disabled'; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 15 AND B.y < 5; + count +------- + 20 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 16 AND B.y < 6; + count +------- + 20 +(1 row) + +SET aqo.mode = 'learn'; +SELECT aqo_enable_class(queryid) FROM aqo_queries WHERE queryid != 0; + aqo_enable_class +------------------ + +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 17 AND B.y < 7; + count +------- + 18 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 18 AND B.y < 8; + count +------- + 14 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 19 AND B.y < 9; + count +------- + 8 +(1 row) + +-- Ignore unstable time-related columns +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + error_aqo | error_no_aqo | executions_with_aqo | executions_without_aqo +--------------------+--------------+---------------------+------------------------ + {0.22,0.362,0.398} | {0.392,0.21} | 3 | 2 +(1 row) + +SELECT true AS success from aqo_reset(); + success +--------- + t +(1 row) + +-- Second test: fake data in aqo_query_stat +SET aqo.mode = 'disabled'; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 5 AND B.y < 100; + count +------- + 135 +(1 row) + +SELECT aqo_query_stat_update( + queryid, + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + 100, 50) +FROM aqo_query_stat; + aqo_query_stat_update +----------------------- + t +(1 row) + +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + error_aqo | error_no_aqo | executions_with_aqo | executions_without_aqo +------------------------------------------------------+------------------------------------------------------+---------------------+------------------------ + {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20} | {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20} | 100 | 50 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 10 AND B.y < 100; + count +------- + 100 +(1 row) + +SET aqo.mode = 'learn'; +SELECT aqo_enable_class(queryid) FROM aqo_queries WHERE queryid != 0; + aqo_enable_class +------------------ + +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 15 AND B.y < 5; + count +------- + 20 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 16 AND B.y < 6; + count +------- + 20 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 17 AND B.y < 7; + count +------- + 18 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 18 AND B.y < 8; + count +------- + 14 +(1 row) + +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + error_aqo | error_no_aqo | executions_with_aqo | executions_without_aqo +---------------------------------------------------------------------+----------------------------------------------------------+---------------------+------------------------ + {5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,0.392,0.344,0.34,0.362} | {2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,0.218} | 104 | 51 +(1 row) + +SET aqo.mode TO DEFAULT; +SET aqo.force_collect_stat TO DEFAULT; +SET aqo.min_neighbors_for_predicting TO DEFAULT; +DROP FUNCTION round_array; +DROP TABLE A; +DROP TABLE B; +DROP EXTENSION aqo CASCADE; diff --git a/postprocessing.c b/postprocessing.c index cae61e98..50d27624 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -30,6 +30,8 @@ #include "machine_learning.h" #include "storage.h" +#define SMART_TIMEOUT_ERROR_THRESHOLD (0.1) + bool aqo_learn_statement_timeout = false; @@ -761,7 +763,6 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) instr_time endtime; EphemeralNamedRelation enr = get_ENR(queryDesc->queryEnv, PlanStateInfo); MemoryContext oldctx = MemoryContextSwitchTo(AQOLearnMemCtx); - double error = .0; cardinality_sum_errors = 0.; cardinality_num_objects = 0; @@ -827,18 +828,22 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) if (stat != NULL) { - /* Store all learn data into the AQO service relations. */ - if (!query_context.adding_query && query_context.auto_tuning) - automatical_query_tuning(query_context.query_hash, stat); - - error = stat->est_error_aqo[stat->cur_stat_slot_aqo-1] - cardinality_sum_errors/(1 + cardinality_num_objects); - - if ( aqo_learn_statement_timeout_enable && aqo_statement_timeout > 0 && error >= 0.1) + Assert(!query_context.use_aqo || stat->cur_stat_slot_aqo > 0); + /* If query used aqo, increase smart timeout if needed */ + if (query_context.use_aqo && + aqo_learn_statement_timeout_enable && + aqo_statement_timeout > 0 && + stat->est_error_aqo[stat->cur_stat_slot_aqo-1] - + cardinality_sum_errors/(1 + cardinality_num_objects) >= SMART_TIMEOUT_ERROR_THRESHOLD) { int64 fintime = increase_smart_timeout(); elog(NOTICE, "[AQO] Time limit for execution of the statement was increased. Current timeout is "UINT64_FORMAT, fintime); } + /* Store all learn data into the AQO service relations. */ + if (!query_context.adding_query && query_context.auto_tuning) + automatical_query_tuning(query_context.query_hash, stat); + pfree(stat); } } diff --git a/regress_schedule b/regress_schedule index 96b2cb93..f3084fc8 100644 --- a/regress_schedule +++ b/regress_schedule @@ -23,3 +23,4 @@ test: look_a_like test: feature_subspace test: eclasses test: eclasses_mchar +test: aqo_query_stat diff --git a/sql/aqo_query_stat.sql b/sql/aqo_query_stat.sql new file mode 100644 index 00000000..a9228b5e --- /dev/null +++ b/sql/aqo_query_stat.sql @@ -0,0 +1,74 @@ +-- Testing aqo_query_stat update logic +-- Note: this test assumes STAT_SAMPLE_SIZE to be 20. +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + +DROP TABLE IF EXISTS A; +CREATE TABLE A AS SELECT x FROM generate_series(1, 20) as x; +ANALYZE A; + +DROP TABLE IF EXISTS B; +CREATE TABLE B AS SELECT y FROM generate_series(1, 10) as y; +ANALYZE B; + +CREATE OR REPLACE FUNCTION round_array (double precision[]) +RETURNS double precision[] +LANGUAGE SQL +AS $$ + SELECT array_agg(round(elem::numeric, 3)) + FROM unnest($1) as arr(elem); +$$ + +SET aqo.mode = 'learn'; +SET aqo.force_collect_stat = 'on'; +SET aqo.min_neighbors_for_predicting = 1; + +-- First test: adding real records +SET aqo.mode = 'disabled'; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 15 AND B.y < 5; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 16 AND B.y < 6; + +SET aqo.mode = 'learn'; +SELECT aqo_enable_class(queryid) FROM aqo_queries WHERE queryid != 0; + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 17 AND B.y < 7; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 18 AND B.y < 8; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 19 AND B.y < 9; +-- Ignore unstable time-related columns +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + +SELECT true AS success from aqo_reset(); + + +-- Second test: fake data in aqo_query_stat +SET aqo.mode = 'disabled'; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 5 AND B.y < 100; +SELECT aqo_query_stat_update( + queryid, + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + 100, 50) +FROM aqo_query_stat; +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 10 AND B.y < 100; + +SET aqo.mode = 'learn'; +SELECT aqo_enable_class(queryid) FROM aqo_queries WHERE queryid != 0; + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 15 AND B.y < 5; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 16 AND B.y < 6; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 17 AND B.y < 7; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 18 AND B.y < 8; +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + + +SET aqo.mode TO DEFAULT; +SET aqo.force_collect_stat TO DEFAULT; +SET aqo.min_neighbors_for_predicting TO DEFAULT; + +DROP FUNCTION round_array; +DROP TABLE A; +DROP TABLE B; +DROP EXTENSION aqo CASCADE; diff --git a/storage.c b/storage.c index 79b1b11d..a65ce463 100644 --- a/storage.c +++ b/storage.c @@ -233,7 +233,9 @@ reset_deactivated_queries(void) /* * Update AQO statistics. * - * Add a record (or update an existed) to stat storage for the query class. + * In append mode, append one element to exec_time, plan_time, est_error arrays + * (or their *_aqo counterparts, if use_aqo is true). Without append mode, add a + * record (or overwrite an existing) to stat storage for the query class. * Returns a copy of stat entry, allocated in current memory context. Caller is * in charge to free this struct after usage. * If stat hash table is full, return NULL and log this fact. @@ -312,19 +314,20 @@ aqo_stat_store(uint64 queryid, bool use_aqo, AqoStatArgs *stat_arg, if (use_aqo) { Assert(entry->cur_stat_slot_aqo >= 0); - pos = entry->cur_stat_slot_aqo; - if (entry->cur_stat_slot_aqo < STAT_SAMPLE_SIZE - 1) + if (entry->cur_stat_slot_aqo < STAT_SAMPLE_SIZE) entry->cur_stat_slot_aqo++; else { size_t sz = (STAT_SAMPLE_SIZE - 1) * sizeof(entry->est_error_aqo[0]); - Assert(entry->cur_stat_slot_aqo = STAT_SAMPLE_SIZE - 1); + Assert(entry->cur_stat_slot_aqo == STAT_SAMPLE_SIZE); + memmove(entry->plan_time_aqo, &entry->plan_time_aqo[1], sz); memmove(entry->exec_time_aqo, &entry->exec_time_aqo[1], sz); memmove(entry->est_error_aqo, &entry->est_error_aqo[1], sz); } + pos = entry->cur_stat_slot_aqo - 1; entry->execs_with_aqo++; entry->plan_time_aqo[pos] = *stat_arg->plan_time_aqo; entry->exec_time_aqo[pos] = *stat_arg->exec_time_aqo; @@ -333,19 +336,20 @@ aqo_stat_store(uint64 queryid, bool use_aqo, AqoStatArgs *stat_arg, else { Assert(entry->cur_stat_slot >= 0); - pos = entry->cur_stat_slot; - if (entry->cur_stat_slot < STAT_SAMPLE_SIZE - 1) + if (entry->cur_stat_slot < STAT_SAMPLE_SIZE) entry->cur_stat_slot++; else { size_t sz = (STAT_SAMPLE_SIZE - 1) * sizeof(entry->est_error[0]); - Assert(entry->cur_stat_slot = STAT_SAMPLE_SIZE - 1); + Assert(entry->cur_stat_slot == STAT_SAMPLE_SIZE); + memmove(entry->plan_time, &entry->plan_time[1], sz); memmove(entry->exec_time, &entry->exec_time[1], sz); memmove(entry->est_error, &entry->est_error[1], sz); } + pos = entry->cur_stat_slot - 1; entry->execs_without_aqo++; entry->plan_time[pos] = *stat_arg->plan_time; entry->exec_time[pos] = *stat_arg->exec_time;